[sankaku] add 'tags' option (#94)

This commit is contained in:
Mike Fährmann 2018-07-13 16:20:14 +02:00
parent 173add6935
commit 269dc2bbd5
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
4 changed files with 36 additions and 17 deletions

View File

@ -617,19 +617,7 @@ Description A (comma-separated) list of post types to extract images, etc. from.
=========== ===== =========== =====
extractor.3dbooru.tags extractor.[booru].tags
----------------------
extractor.e621.tags
-------------------
extractor.gelbooru.tags
-----------------------
extractor.konachan.tags
-----------------------
extractor.rule34.tags
---------------------
extractor.safebooru.tags
------------------------
extractor.yandere.tags
---------------------- ----------------------
=========== ===== =========== =====
Type ``bool`` Type ``bool``

View File

@ -48,5 +48,12 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor,
pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"] pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"]
test = [("https://idol.sankakucomplex.com/post/show/694215", { test = [("https://idol.sankakucomplex.com/post/show/694215", {
"content": "694ec2491240787d75bf5d0c75d0082b53a85afd", "content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
"count": 1, "options": (("tags", True),),
"keyword": {
"tags_character": "shani_(the_witcher)",
"tags_copyright": "the_witcher",
"tags_idol": "lyumos",
"tags_medium": "3:2_aspect_ratio cosplay",
"tags_general": str,
},
})] })]

View File

@ -11,8 +11,10 @@
from .common import SharedConfigExtractor, Message from .common import SharedConfigExtractor, Message
from .. import text, util, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
import time import collections
import random import random
import time
import re
class SankakuExtractor(SharedConfigExtractor): class SankakuExtractor(SharedConfigExtractor):
@ -30,6 +32,7 @@ class SankakuExtractor(SharedConfigExtractor):
self.logged_in = True self.logged_in = True
self.start_page = 1 self.start_page = 1
self.start_post = 0 self.start_post = 0
self.extags = self.config("tags", False)
self.wait_min = self.config("wait-min", 2.5) self.wait_min = self.config("wait-min", 2.5)
self.wait_max = self.config("wait-max", 5.0) self.wait_max = self.config("wait-max", 5.0)
if self.wait_max < self.wait_min: if self.wait_max < self.wait_min:
@ -81,7 +84,7 @@ class SankakuExtractor(SharedConfigExtractor):
height, pos = extr(page, 'height=', '>', pos) height, pos = extr(page, 'height=', '>', pos)
file_url = extr(page, '<embed src="', '"', pos)[0] file_url = extr(page, '<embed src="', '"', pos)[0]
return { data = {
"id": text.parse_int(post_id), "id": text.parse_int(post_id),
"md5": file_url.rpartition("/")[2].partition(".")[0], "md5": file_url.rpartition("/")[2].partition(".")[0],
"tags": tags, "tags": tags,
@ -94,6 +97,17 @@ class SankakuExtractor(SharedConfigExtractor):
"height": text.parse_int(height), "height": text.parse_int(height),
} }
if self.extags:
tags = collections.defaultdict(list)
tags_html = text.extract(page, '<ul id=tag-sidebar>', '</ul>')[0]
pattern = re.compile(r'tag-type-([^>]+)><a href="/\?tags=([^"]+)')
for tag_type, tag_name in pattern.findall(tags_html):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
data["tags_" + key] = " ".join(value)
return data
def wait(self): def wait(self):
"""Wait for a randomly chosen amount of seconds""" """Wait for a randomly chosen amount of seconds"""
time.sleep(random.uniform(self.wait_min, self.wait_max)) time.sleep(random.uniform(self.wait_min, self.wait_max))
@ -261,7 +275,15 @@ class SankakuPostExtractor(SankakuExtractor):
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"] pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"]
test = [("https://chan.sankakucomplex.com/post/show/360451", { test = [("https://chan.sankakucomplex.com/post/show/360451", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229", "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"count": 1, "options": (("tags", True),),
"keyword": {
"tags_artist": "bonocho",
"tags_copyright": "batman_(series) the_dark_knight",
"tags_medium": "sketch copyright_name",
"tags_studio": "dc_comics",
"tags_character": str,
"tags_general": str,
},
})] })]
def __init__(self, match): def __init__(self, match):

View File

@ -22,6 +22,8 @@ TRAVIS_SKIP = {
# temporary issues, etc. # temporary issues, etc.
BROKEN = { BROKEN = {
"8chan",
"subapics",
"whatisthisimnotgoodwithcomputers", "whatisthisimnotgoodwithcomputers",
} }