[sankaku] add 'tags' option (#94)
This commit is contained in:
parent
173add6935
commit
269dc2bbd5
@ -617,19 +617,7 @@ Description A (comma-separated) list of post types to extract images, etc. from.
|
|||||||
=========== =====
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
extractor.3dbooru.tags
|
extractor.[booru].tags
|
||||||
----------------------
|
|
||||||
extractor.e621.tags
|
|
||||||
-------------------
|
|
||||||
extractor.gelbooru.tags
|
|
||||||
-----------------------
|
|
||||||
extractor.konachan.tags
|
|
||||||
-----------------------
|
|
||||||
extractor.rule34.tags
|
|
||||||
---------------------
|
|
||||||
extractor.safebooru.tags
|
|
||||||
------------------------
|
|
||||||
extractor.yandere.tags
|
|
||||||
----------------------
|
----------------------
|
||||||
=========== =====
|
=========== =====
|
||||||
Type ``bool``
|
Type ``bool``
|
||||||
|
@ -48,5 +48,12 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor,
|
|||||||
pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"]
|
pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"]
|
||||||
test = [("https://idol.sankakucomplex.com/post/show/694215", {
|
test = [("https://idol.sankakucomplex.com/post/show/694215", {
|
||||||
"content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
|
"content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
|
||||||
"count": 1,
|
"options": (("tags", True),),
|
||||||
|
"keyword": {
|
||||||
|
"tags_character": "shani_(the_witcher)",
|
||||||
|
"tags_copyright": "the_witcher",
|
||||||
|
"tags_idol": "lyumos",
|
||||||
|
"tags_medium": "3:2_aspect_ratio cosplay",
|
||||||
|
"tags_general": str,
|
||||||
|
},
|
||||||
})]
|
})]
|
||||||
|
@ -11,8 +11,10 @@
|
|||||||
from .common import SharedConfigExtractor, Message
|
from .common import SharedConfigExtractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import time
|
import collections
|
||||||
import random
|
import random
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
class SankakuExtractor(SharedConfigExtractor):
|
class SankakuExtractor(SharedConfigExtractor):
|
||||||
@ -30,6 +32,7 @@ class SankakuExtractor(SharedConfigExtractor):
|
|||||||
self.logged_in = True
|
self.logged_in = True
|
||||||
self.start_page = 1
|
self.start_page = 1
|
||||||
self.start_post = 0
|
self.start_post = 0
|
||||||
|
self.extags = self.config("tags", False)
|
||||||
self.wait_min = self.config("wait-min", 2.5)
|
self.wait_min = self.config("wait-min", 2.5)
|
||||||
self.wait_max = self.config("wait-max", 5.0)
|
self.wait_max = self.config("wait-max", 5.0)
|
||||||
if self.wait_max < self.wait_min:
|
if self.wait_max < self.wait_min:
|
||||||
@ -81,7 +84,7 @@ class SankakuExtractor(SharedConfigExtractor):
|
|||||||
height, pos = extr(page, 'height=', '>', pos)
|
height, pos = extr(page, 'height=', '>', pos)
|
||||||
file_url = extr(page, '<embed src="', '"', pos)[0]
|
file_url = extr(page, '<embed src="', '"', pos)[0]
|
||||||
|
|
||||||
return {
|
data = {
|
||||||
"id": text.parse_int(post_id),
|
"id": text.parse_int(post_id),
|
||||||
"md5": file_url.rpartition("/")[2].partition(".")[0],
|
"md5": file_url.rpartition("/")[2].partition(".")[0],
|
||||||
"tags": tags,
|
"tags": tags,
|
||||||
@ -94,6 +97,17 @@ class SankakuExtractor(SharedConfigExtractor):
|
|||||||
"height": text.parse_int(height),
|
"height": text.parse_int(height),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self.extags:
|
||||||
|
tags = collections.defaultdict(list)
|
||||||
|
tags_html = text.extract(page, '<ul id=tag-sidebar>', '</ul>')[0]
|
||||||
|
pattern = re.compile(r'tag-type-([^>]+)><a href="/\?tags=([^"]+)')
|
||||||
|
for tag_type, tag_name in pattern.findall(tags_html):
|
||||||
|
tags[tag_type].append(text.unquote(tag_name))
|
||||||
|
for key, value in tags.items():
|
||||||
|
data["tags_" + key] = " ".join(value)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
def wait(self):
|
def wait(self):
|
||||||
"""Wait for a randomly chosen amount of seconds"""
|
"""Wait for a randomly chosen amount of seconds"""
|
||||||
time.sleep(random.uniform(self.wait_min, self.wait_max))
|
time.sleep(random.uniform(self.wait_min, self.wait_max))
|
||||||
@ -261,7 +275,15 @@ class SankakuPostExtractor(SankakuExtractor):
|
|||||||
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"]
|
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"]
|
||||||
test = [("https://chan.sankakucomplex.com/post/show/360451", {
|
test = [("https://chan.sankakucomplex.com/post/show/360451", {
|
||||||
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
|
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
|
||||||
"count": 1,
|
"options": (("tags", True),),
|
||||||
|
"keyword": {
|
||||||
|
"tags_artist": "bonocho",
|
||||||
|
"tags_copyright": "batman_(series) the_dark_knight",
|
||||||
|
"tags_medium": "sketch copyright_name",
|
||||||
|
"tags_studio": "dc_comics",
|
||||||
|
"tags_character": str,
|
||||||
|
"tags_general": str,
|
||||||
|
},
|
||||||
})]
|
})]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
|
@ -22,6 +22,8 @@ TRAVIS_SKIP = {
|
|||||||
|
|
||||||
# temporary issues, etc.
|
# temporary issues, etc.
|
||||||
BROKEN = {
|
BROKEN = {
|
||||||
|
"8chan",
|
||||||
|
"subapics",
|
||||||
"whatisthisimnotgoodwithcomputers",
|
"whatisthisimnotgoodwithcomputers",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user