From 18e6ed1c7e23da4331a851ed5d95a9825b904165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 24 Aug 2017 21:24:51 +0200 Subject: [PATCH] [booru] add extractors for "Popular" images --- docs/supportedsites.rst | 6 ++-- gallery_dl/extractor/booru.py | 51 ++++++++++++++++++++++++++------ gallery_dl/extractor/danbooru.py | 23 +++++++++++--- gallery_dl/extractor/yandere.py | 21 +++++++++++-- scripts/build_supportedsites.py | 1 + 5 files changed, 84 insertions(+), 18 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 2e497042..2f0e62b7 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -10,7 +10,7 @@ Site URL Capabilities Archive of Sins https://archiveofsins.com/ Threads Archived.Moe https://archived.moe/ Threads Batoto https://bato.to/ Chapters, Manga Optional -Danbooru https://danbooru.donmai.us/ Pools, Posts, Tag-Searches +Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches Desuarchive https://desuarchive.org/ Threads DeviantArt https://www.deviantart.com/ |Collections, De-1| Doki Reader https://kobato.hologfx.com/ Chapters, Manga @@ -72,7 +72,7 @@ Tumblr https://www.tumblr.com/ Images from Users, Post Twitter https://twitter.com/ Tweets Warosu https://warosu.org/ Threads World Three http://www.slide.world-three.org/ Chapters, Manga -Yandere https://yande.re/ Pools, Posts, Tag-Searches +Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches Chronos http://chronos.to/ individual Images Coreimg http://coreimg.net/ individual Images Fapat http://fapat.me/ individual Images @@ -99,4 +99,4 @@ Turboimagehost http://turboimagehost.com/ individual Images .. |http://www.thes-0| replace:: http://www.thespectrum.net/manga_scans/ .. |Collections, De-1| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals .. |Images from Use-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results -.. |Images from Use-3| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Individual Images +.. |Images from Use-3| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index f8013684..96626be1 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -11,19 +11,17 @@ from .common import Extractor, Message from .. import text import xml.etree.ElementTree as ET -import json import urllib.parse +import datetime class BooruExtractor(Extractor): """Base class for all booru extractors""" filename_fmt = "{category}_{id}_{md5}.{extension}" - info = {} headers = {} pagestart = 1 pagekey = "page" api_url = "" - category = "" def __init__(self): Extractor.__init__(self) @@ -85,12 +83,8 @@ class JSONBooruExtractor(BooruExtractor): def items_impl(self): self.update_page(reset=True) while True: - images = json.loads( - self.request(self.api_url, params=self.params, - headers=self.headers).text - ) - for data in images: - yield data + images = self.request(self.api_url, params=self.params).json() + yield from images if len(images) < self.params["limit"]: return self.update_page() @@ -143,3 +137,42 @@ class BooruPostExtractor(BooruExtractor): BooruExtractor.__init__(self) self.post = match.group(1) self.params["tags"] = "id:" + self.post + + +class BooruPopularExtractor(BooruExtractor): + """Extractor for popular images""" + directory_fmt = ["{category}", "popular", "{scale}", "{date}"] + + def __init__(self, match): + BooruExtractor.__init__(self) + self.scale = match.group(1) + self.params.update(text.parse_query(match.group(2))) + + def get_job_metadata(self, fmt="%Y-%m-%d"): + if "scale" in self.params: + scale = self.params["scale"] + elif self.scale: + scale = self.scale + if scale.startswith("by_"): + scale = scale[3:] + else: + scale = "day" + + if "date" in self.params: + date = self.params["date"][:10] + elif "year" in self.params: + date = "{:>04}-{:>02}-{:>02}".format( + self.params["year"], + self.params.get("month", "01"), + self.params.get("day", "01")) + else: + date = datetime.datetime.utcnow().strftime(fmt) + + if scale == "week": + dt = datetime.datetime.strptime(date, fmt) + dt -= datetime.timedelta(days=dt.weekday()) + date = dt.strftime(fmt) + elif scale == "month": + date = date[:-3] + + return {"date": date, "scale": scale} diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 77dab15f..2ca3fe9e 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -20,8 +20,8 @@ class DanbooruExtractor(booru.JSONBooruExtractor): class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor): """Extractor for images from danbooru based on search-tags""" subcategory = "tag" - pattern = [(r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts" - r"\?(?:utf8=%E2%9C%93&)?tags=([^&]+)")] + pattern = [r"(?:https?://)?danbooru\.donmai\.us/posts" + r"\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"] test = [("https://danbooru.donmai.us/posts?tags=bonocho", { "content": "b196fb9f1668109d7774a0a82efea3ffdda07746", })] @@ -30,7 +30,7 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor): class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor): """Extractor for image-pools from danbooru""" subcategory = "pool" - pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/pools/(\d+)"] + pattern = [r"(?:https?://)?danbooru\.donmai\.us/pools/(\d+)"] test = [("https://danbooru.donmai.us/pools/7659", { "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99", })] @@ -39,7 +39,22 @@ class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor): class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor): """Extractor for single images from danbooru""" subcategory = "post" - pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts/(\d+)"] + pattern = [r"(?:https?://)?danbooru\.donmai\.us/posts/(\d+)"] test = [("https://danbooru.donmai.us/posts/294929", { "content": "5e255713cbf0a8e0801dc423563c34d896bb9229", })] + + +class DanbooruPopularExtractor(DanbooruExtractor, booru.BooruPopularExtractor): + """Extractor for popular images from danbooru""" + subcategory = "popular" + pattern = [r"(?:https?://)?danbooru\.donmai\.us/" + r"explore/posts/popular()(?:\?([^#]*))?"] + test = [ + (("https://danbooru.donmai.us/explore/posts/popular" + "?date=2017-07-17+14%3A13%3A05+-0400&scale=week"), { + "url": "2c1bafa62a587d881b709a8aea6549986fe4605b", + }), + ("https://danbooru.donmai.us/explore/posts/popular", None), + ] + api_url = "https://danbooru.donmai.us/explore/posts/popular.json" diff --git a/gallery_dl/extractor/yandere.py b/gallery_dl/extractor/yandere.py index 81c6b118..beb905dc 100644 --- a/gallery_dl/extractor/yandere.py +++ b/gallery_dl/extractor/yandere.py @@ -29,7 +29,7 @@ class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor): class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor): """Extractor for image-pools from yande.re""" subcategory = "pool" - pattern = [r"(?:https?://)?(?:www\.)?yande.re/pool/show/(\d+)"] + pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(\d+)"] test = [("https://yande.re/pool/show/318", { "content": "2a35b9d6edecce11cc2918c6dce4de2198342b68", })] @@ -38,7 +38,24 @@ class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor): class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor): """Extractor for single images from yande.re""" subcategory = "post" - pattern = [r"(?:https?://)?(?:www\.)?yande.re/post/show/(\d+)"] + pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(\d+)"] test = [("https://yande.re/post/show/51824", { "content": "59201811c728096b2d95ce6896fd0009235fe683", })] + + +class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor): + """Extractor for popular images from yande.re""" + subcategory = "popular" + pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/popular_" + r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"] + test = [ + ("https://yande.re/post/popular_by_day?day=20&month=8&year=2017", { + "url": "3fb32f7108d43d70681a38366443ec825d324108", + }), + ("https://yande.re/post/popular_recent", None), + ] + + @property + def api_url(self): + return "https://yande.re/post/popular_" + self.scale + ".json" diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 96fc40da..5dcd4f3a 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -65,6 +65,7 @@ SUBCATEGORY_MAP = { "manga" : "Manga", "me" : "pixiv.me Links", "pinit" : "pin.it Links", + "popular": "Popular Images", "search" : "Search Results", "status" : "Images from Statuses", "tag" : "Tag-Searches",