[sankaku] add 'tags' option (#94)
This commit is contained in:
parent
173add6935
commit
269dc2bbd5
@ -617,19 +617,7 @@ Description A (comma-separated) list of post types to extract images, etc. from.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.3dbooru.tags
|
||||
----------------------
|
||||
extractor.e621.tags
|
||||
-------------------
|
||||
extractor.gelbooru.tags
|
||||
-----------------------
|
||||
extractor.konachan.tags
|
||||
-----------------------
|
||||
extractor.rule34.tags
|
||||
---------------------
|
||||
extractor.safebooru.tags
|
||||
------------------------
|
||||
extractor.yandere.tags
|
||||
extractor.[booru].tags
|
||||
----------------------
|
||||
=========== =====
|
||||
Type ``bool``
|
||||
|
@ -48,5 +48,12 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor,
|
||||
pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"]
|
||||
test = [("https://idol.sankakucomplex.com/post/show/694215", {
|
||||
"content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
|
||||
"count": 1,
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_character": "shani_(the_witcher)",
|
||||
"tags_copyright": "the_witcher",
|
||||
"tags_idol": "lyumos",
|
||||
"tags_medium": "3:2_aspect_ratio cosplay",
|
||||
"tags_general": str,
|
||||
},
|
||||
})]
|
||||
|
@ -11,8 +11,10 @@
|
||||
from .common import SharedConfigExtractor, Message
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache
|
||||
import time
|
||||
import collections
|
||||
import random
|
||||
import time
|
||||
import re
|
||||
|
||||
|
||||
class SankakuExtractor(SharedConfigExtractor):
|
||||
@ -30,6 +32,7 @@ class SankakuExtractor(SharedConfigExtractor):
|
||||
self.logged_in = True
|
||||
self.start_page = 1
|
||||
self.start_post = 0
|
||||
self.extags = self.config("tags", False)
|
||||
self.wait_min = self.config("wait-min", 2.5)
|
||||
self.wait_max = self.config("wait-max", 5.0)
|
||||
if self.wait_max < self.wait_min:
|
||||
@ -81,7 +84,7 @@ class SankakuExtractor(SharedConfigExtractor):
|
||||
height, pos = extr(page, 'height=', '>', pos)
|
||||
file_url = extr(page, '<embed src="', '"', pos)[0]
|
||||
|
||||
return {
|
||||
data = {
|
||||
"id": text.parse_int(post_id),
|
||||
"md5": file_url.rpartition("/")[2].partition(".")[0],
|
||||
"tags": tags,
|
||||
@ -94,6 +97,17 @@ class SankakuExtractor(SharedConfigExtractor):
|
||||
"height": text.parse_int(height),
|
||||
}
|
||||
|
||||
if self.extags:
|
||||
tags = collections.defaultdict(list)
|
||||
tags_html = text.extract(page, '<ul id=tag-sidebar>', '</ul>')[0]
|
||||
pattern = re.compile(r'tag-type-([^>]+)><a href="/\?tags=([^"]+)')
|
||||
for tag_type, tag_name in pattern.findall(tags_html):
|
||||
tags[tag_type].append(text.unquote(tag_name))
|
||||
for key, value in tags.items():
|
||||
data["tags_" + key] = " ".join(value)
|
||||
|
||||
return data
|
||||
|
||||
def wait(self):
|
||||
"""Wait for a randomly chosen amount of seconds"""
|
||||
time.sleep(random.uniform(self.wait_min, self.wait_max))
|
||||
@ -261,7 +275,15 @@ class SankakuPostExtractor(SankakuExtractor):
|
||||
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"]
|
||||
test = [("https://chan.sankakucomplex.com/post/show/360451", {
|
||||
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
|
||||
"count": 1,
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "bonocho",
|
||||
"tags_copyright": "batman_(series) the_dark_knight",
|
||||
"tags_medium": "sketch copyright_name",
|
||||
"tags_studio": "dc_comics",
|
||||
"tags_character": str,
|
||||
"tags_general": str,
|
||||
},
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
|
@ -22,6 +22,8 @@ TRAVIS_SKIP = {
|
||||
|
||||
# temporary issues, etc.
|
||||
BROKEN = {
|
||||
"8chan",
|
||||
"subapics",
|
||||
"whatisthisimnotgoodwithcomputers",
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user