[hentaifoundry] split 'artist' into 'user'+'artist'

and some smaller changes ... 'user' is the name of the account an image is listed at and 'artist' is now the name of the account who created the image. For example "https://www.hentai-foundry.com/user/Tenpura/faves/pictures" - 'user': Tenpura - 'artist' of the only image: LewdBrush
2018-09-22 17:38:09 +02:00 · 2018-09-22 17:38:09 +02:00 · 1c95a0173f
commit 1c95a0173f
parent 55f5c87160
1 changed files with 44 additions and 42 deletions
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@ -15,19 +15,18 @@ from .. import text, util, exception
 class HentaifoundryExtractor(Extractor):
    """Base class for hentaifoundry extractors"""
    category = "hentaifoundry"
-    directory_fmt = ["{category}", "{artist}"]
+    directory_fmt = ["{category}", "{user}"]
    filename_fmt = "{category}_{index}_{title}.{extension}"
    archive_fmt = "{index}"
    root = "https://www.hentai-foundry.com"
    per_page = 25
-    def __init__(self, artist, needle=""):
+    def __init__(self, user="", page=1):
        Extractor.__init__(self)
-        self.artist = artist
+        self.url = ""
-        self.needle = needle
+        self.user = user
        self.artist_url = "{}/pictures/user/{}".format(self.root, artist)
        self.start_page = 1
        self.start_post = 0
        self.start_page = text.parse_int(page, 1)
    def items(self):
        data = self.get_job_metadata()
@ -48,18 +47,25 @@ class HentaifoundryExtractor(Extractor):
    def get_job_metadata(self):
        """Collect metadata for extractor-job"""
-        page = self.request(self.artist_url + "?enterAgree=1").text
+        self.request(self.root + "/?enterAgree=1")
-        needle = ' >{} ('.format(self.needle)
+        return {"user": self.user}
        count = text.parse_int(text.extract(page, needle, ')')[0])
        return {"artist": self.artist, "count": count}
    def get_image_pages(self):
-        """Yield urls all image pages of one artist"""
+        """Yield urls of all relevant image pages"""
        num = self.start_page
-    def get_image_metadata(self, url):
+        while True:
-        """Collect metadata for an image"""
+            page = self.request("{}/page/{}".format(self.url, num)).text
-        page = self.request(text.urljoin(self.root, url)).text
+            yield from text.extract_iter(page, 'thumbTitle"><a href="', '"')
-        index = url.rsplit("/", 2)[1]
+
            if 'class="pager"' not in page or 'class="last hidden"' in page:
                return
            num += 1
    def get_image_metadata(self, page_url):
        """Collect url and metadata from an image page"""
        page = self.request(text.urljoin(self.root, page_url)).text
        index = page_url.rsplit("/", 2)[1]
        title , pos = text.extract(page, '<title>', '</title>')
        width , pos = text.extract(page, 'width="', '"', pos)
        height, pos = text.extract(page, 'height="', '"', pos)
@ -110,28 +116,17 @@ class HentaifoundryExtractor(Extractor):
        url = self.root + "/site/filters"
        self.request(url, method="POST", data=data)
    def _pagination(self, url):
        num = self.start_page
        while True:
            page = self.request("{}/page/{}".format(url, num)).text
            yield from text.extract_iter(page, 'thumbTitle"><a href="', '"')
            if 'class="pager"' not in page or 'class="last hidden"' in page:
                return
            num += 1
 class HentaifoundryUserExtractor(HentaifoundryExtractor):
    """Extractor for all images of a hentai-foundry-user"""
    subcategory = "user"
    pattern = [r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
-               r"/(?:pictures/user/([^/]+)(?:/(?:page/(\d+))?)?$"
+               r"/(?:pictures/user/([^/]+)(?:/page/(\d+))?/?$"
               r"|user/([^/]+)/profile)"]
    test = [
        ("https://www.hentai-foundry.com/pictures/user/Tenpura", {
            "url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
-            "keyword": "98dc5e3856a38243ad4be3e428dc6a069243bc13",
+            "keyword": "d56e75566dc7dfe71d2ebd08c056a47f8832372d",
        }),
        ("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3", None),
        ("https://www.hentai-foundry.com/user/Tenpura/profile", None),
@ -139,34 +134,38 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
    def __init__(self, match):
        HentaifoundryExtractor.__init__(
-            self, match.group(1) or match.group(3), "Pictures")
+            self, match.group(1) or match.group(3), match.group(2))
-        self.start_page = text.parse_int(match.group(2), 1)
+        self.url = "{}/pictures/user/{}".format(self.root, self.user)
-    def get_image_pages(self):
+    def get_job_metadata(self):
-        return self._pagination(self.artist_url)
+        page = self.request(self.url + "?enterAgree=1").text
        count = text.extract(page, ">Pictures (", ")")[0]
        return {"user": self.user, "count": text.parse_int(count)}
 class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
    """Extractor for scrap images of a hentai-foundry-user"""
    subcategory = "scraps"
-    directory_fmt = ["{category}", "{artist}", "Scraps"]
+    directory_fmt = ["{category}", "{user}", "Scraps"]
    pattern = [r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
-               r"/pictures/user/([^/]+)/scraps(?:/(?:page/(\d+))?)?$"]
+               r"/pictures/user/([^/]+)/scraps(?:/page/(\d+))?"]
    test = [
        ("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", {
            "url": "00a11e30b73ff2b00a1fba0014f08d49da0a68ec",
-            "keyword": "e294a72ab7be53a716eab92d8c97f82d6e76693c",
+            "keyword": "8c9a2ad4bf20247bcebb7aef3cfe7016f35da4a7",
        }),
        (("https://www.hentai-foundry.com"
          "/pictures/user/Evulchibi/scraps/page/3"), None),
    ]
    def __init__(self, match):
-        HentaifoundryExtractor.__init__(self, match.group(1), "Scraps")
+        HentaifoundryExtractor.__init__(self, match.group(1), match.group(2))
-        self.start_page = text.parse_int(match.group(2), 1)
+        self.url = "{}/pictures/user/{}/scraps".format(self.root, self.user)
-    def get_image_pages(self):
+    def get_job_metadata(self):
-        return self._pagination(self.artist_url + "/scraps")
+        page = self.request(self.url + "?enterAgree=1").text
        count = text.extract(page, ">Scraps (", ")")[0]
        return {"user": self.user, "count": text.parse_int(count)}
 class HentaifoundryImageExtractor(HentaifoundryExtractor):
@ -178,7 +177,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
        (("https://www.hentai-foundry.com"
          "/pictures/user/Tenpura/407501/shimakaze"), {
            "url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3",
-            "keyword": "e6ae60151ae3c17a22b3d61574ff5a883e577573",
+            "keyword": "aa64a4cfcd9c254ee143d9a3522195d11f8c1fb8",
            "content": "91bf01497c39254b6dfb234a18e8f01629c77fd1",
        }),
        ("https://www.hentai-foundry.com/pictures/user/Tenpura/340853/", {
@ -193,8 +192,11 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
        self.index = match.group(2)
    def items(self):
-        url, data = self.get_image_metadata(
+        post_url = "{}/pictures/user/{}/{}/?enterAgree=1".format(
-            "{}/{}/?enterAgree=1".format(self.artist_url, self.index))
+            self.root, self.user, self.index)
        url, data = self.get_image_metadata(post_url)
        data["user"] = self.user
        yield Message.Version, 1
        yield Message.Directory, data
        yield Message.Url, url, data