From 4891f4a328ba5f4f5c2331fc0e6950c4a0fb1a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 15 May 2019 17:25:46 +0200 Subject: [PATCH] [hentainexus] add search extractor (#256) --- docs/supportedsites.rst | 2 +- gallery_dl/extractor/hentainexus.py | 39 ++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 0bcba729..014aeb8d 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -36,7 +36,7 @@ Hentai Foundry https://www.hentai-foundry.com/ |hentaifoundry-C| Hentai2Read https://hentai2read.com/ Chapters, Manga HentaiFox https://hentaifox.com/ Galleries, Search Results HentaiHere https://hentaihere.com/ Chapters, Manga -Hentainexus https://hentainexus.com/ Galleries +Hentainexus https://hentainexus.com/ Galleries, Search Results Hitomi.la https://hitomi.la/ Galleries Hypnohub https://hypnohub.net/ Pools, Popular Images, Posts, Tag-Searches Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py index a71134c2..e148943b 100644 --- a/gallery_dl/extractor/hentainexus.py +++ b/gallery_dl/extractor/hentainexus.py @@ -8,7 +8,7 @@ """Extractors for https://hentainexus.com/""" -from .common import GalleryExtractor +from .common import GalleryExtractor, Extractor, Message from .. import text, util import json @@ -58,3 +58,40 @@ class HentainexusGalleryExtractor(GalleryExtractor): base = extr('"', '"') return [(base + img, None) for img in json.loads(imgs)] + + +class HentainexusSearchExtractor(Extractor): + """Extractor for search results on hentainexus.com""" + category = "hentainexus" + subcategory = "search" + root = "https://hentainexus.com" + pattern = (r"(?i)(?:https?://)?(?:www\.)?hentainexus\.com" + r"(?:/page/\d+)?/?(?:\?(q=[^/?#]+))?$") + test = ( + ("https://hentainexus.com/?q=tag:%22heart+pupils%22%20tag:group", { + "pattern": HentainexusGalleryExtractor.pattern, + "count": ">= 50", + }), + ("https://hentainexus.com/page/3?q=tag:%22heart+pupils%22"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.params = text.parse_query(match.group(1)) + + def items(self): + yield Message.Version, 1 + params = self.params + path = "/" + + while path: + page = self.request(self.root + path, params=params).text + extr = text.extract_from(page) + + while True: + gallery_id = extr('