[booru] add extractors for "Popular" images

This commit is contained in:
Mike Fährmann 2017-08-24 21:24:51 +02:00
parent f7cdfd4c25
commit 18e6ed1c7e
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
5 changed files with 84 additions and 18 deletions

View File

@ -10,7 +10,7 @@ Site URL Capabilities
Archive of Sins https://archiveofsins.com/ Threads
Archived.Moe https://archived.moe/ Threads
Batoto https://bato.to/ Chapters, Manga Optional
Danbooru https://danbooru.donmai.us/ Pools, Posts, Tag-Searches
Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches
Desuarchive https://desuarchive.org/ Threads
DeviantArt https://www.deviantart.com/ |Collections, De-1|
Doki Reader https://kobato.hologfx.com/ Chapters, Manga
@ -72,7 +72,7 @@ Tumblr https://www.tumblr.com/ Images from Users, Post
Twitter https://twitter.com/ Tweets
Warosu https://warosu.org/ Threads
World Three http://www.slide.world-three.org/ Chapters, Manga
Yandere https://yande.re/ Pools, Posts, Tag-Searches
Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches
Chronos http://chronos.to/ individual Images
Coreimg http://coreimg.net/ individual Images
Fapat http://fapat.me/ individual Images
@ -99,4 +99,4 @@ Turboimagehost http://turboimagehost.com/ individual Images
.. |http://www.thes-0| replace:: http://www.thespectrum.net/manga_scans/
.. |Collections, De-1| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals
.. |Images from Use-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results
.. |Images from Use-3| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Individual Images
.. |Images from Use-3| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images

View File

@ -11,19 +11,17 @@
from .common import Extractor, Message
from .. import text
import xml.etree.ElementTree as ET
import json
import urllib.parse
import datetime
class BooruExtractor(Extractor):
"""Base class for all booru extractors"""
filename_fmt = "{category}_{id}_{md5}.{extension}"
info = {}
headers = {}
pagestart = 1
pagekey = "page"
api_url = ""
category = ""
def __init__(self):
Extractor.__init__(self)
@ -85,12 +83,8 @@ class JSONBooruExtractor(BooruExtractor):
def items_impl(self):
self.update_page(reset=True)
while True:
images = json.loads(
self.request(self.api_url, params=self.params,
headers=self.headers).text
)
for data in images:
yield data
images = self.request(self.api_url, params=self.params).json()
yield from images
if len(images) < self.params["limit"]:
return
self.update_page()
@ -143,3 +137,42 @@ class BooruPostExtractor(BooruExtractor):
BooruExtractor.__init__(self)
self.post = match.group(1)
self.params["tags"] = "id:" + self.post
class BooruPopularExtractor(BooruExtractor):
"""Extractor for popular images"""
directory_fmt = ["{category}", "popular", "{scale}", "{date}"]
def __init__(self, match):
BooruExtractor.__init__(self)
self.scale = match.group(1)
self.params.update(text.parse_query(match.group(2)))
def get_job_metadata(self, fmt="%Y-%m-%d"):
if "scale" in self.params:
scale = self.params["scale"]
elif self.scale:
scale = self.scale
if scale.startswith("by_"):
scale = scale[3:]
else:
scale = "day"
if "date" in self.params:
date = self.params["date"][:10]
elif "year" in self.params:
date = "{:>04}-{:>02}-{:>02}".format(
self.params["year"],
self.params.get("month", "01"),
self.params.get("day", "01"))
else:
date = datetime.datetime.utcnow().strftime(fmt)
if scale == "week":
dt = datetime.datetime.strptime(date, fmt)
dt -= datetime.timedelta(days=dt.weekday())
date = dt.strftime(fmt)
elif scale == "month":
date = date[:-3]
return {"date": date, "scale": scale}

View File

@ -20,8 +20,8 @@ class DanbooruExtractor(booru.JSONBooruExtractor):
class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
"""Extractor for images from danbooru based on search-tags"""
subcategory = "tag"
pattern = [(r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts"
r"\?(?:utf8=%E2%9C%93&)?tags=([^&]+)")]
pattern = [r"(?:https?://)?danbooru\.donmai\.us/posts"
r"\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"]
test = [("https://danbooru.donmai.us/posts?tags=bonocho", {
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
})]
@ -30,7 +30,7 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
"""Extractor for image-pools from danbooru"""
subcategory = "pool"
pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/pools/(\d+)"]
pattern = [r"(?:https?://)?danbooru\.donmai\.us/pools/(\d+)"]
test = [("https://danbooru.donmai.us/pools/7659", {
"content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
})]
@ -39,7 +39,22 @@ class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor):
"""Extractor for single images from danbooru"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts/(\d+)"]
pattern = [r"(?:https?://)?danbooru\.donmai\.us/posts/(\d+)"]
test = [("https://danbooru.donmai.us/posts/294929", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
})]
class DanbooruPopularExtractor(DanbooruExtractor, booru.BooruPopularExtractor):
"""Extractor for popular images from danbooru"""
subcategory = "popular"
pattern = [r"(?:https?://)?danbooru\.donmai\.us/"
r"explore/posts/popular()(?:\?([^#]*))?"]
test = [
(("https://danbooru.donmai.us/explore/posts/popular"
"?date=2017-07-17+14%3A13%3A05+-0400&scale=week"), {
"url": "2c1bafa62a587d881b709a8aea6549986fe4605b",
}),
("https://danbooru.donmai.us/explore/posts/popular", None),
]
api_url = "https://danbooru.donmai.us/explore/posts/popular.json"

View File

@ -29,7 +29,7 @@ class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor):
class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
"""Extractor for image-pools from yande.re"""
subcategory = "pool"
pattern = [r"(?:https?://)?(?:www\.)?yande.re/pool/show/(\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(\d+)"]
test = [("https://yande.re/pool/show/318", {
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
})]
@ -38,7 +38,24 @@ class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor):
"""Extractor for single images from yande.re"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:www\.)?yande.re/post/show/(\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(\d+)"]
test = [("https://yande.re/post/show/51824", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
})]
class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor):
"""Extractor for popular images from yande.re"""
subcategory = "popular"
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/popular_"
r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
test = [
("https://yande.re/post/popular_by_day?day=20&month=8&year=2017", {
"url": "3fb32f7108d43d70681a38366443ec825d324108",
}),
("https://yande.re/post/popular_recent", None),
]
@property
def api_url(self):
return "https://yande.re/post/popular_" + self.scale + ".json"

View File

@ -65,6 +65,7 @@ SUBCATEGORY_MAP = {
"manga" : "Manga",
"me" : "pixiv.me Links",
"pinit" : "pin.it Links",
"popular": "Popular Images",
"search" : "Search Results",
"status" : "Images from Statuses",
"tag" : "Tag-Searches",