[exhentai] fix search and favorite extraction

removes basically all metadata, but that can be compensated for with the right search query. writing "parsers" for all 4 possible views that have been introduced in the latest changes is too much of a hassle ...
2019-03-28 16:22:02 +01:00 · 2019-03-28 16:22:02 +01:00 · 5398bfbd69
commit 5398bfbd69
parent 369eb66125
1 changed files with 8 additions and 43 deletions
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@ -339,53 +339,30 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):

    def __init__(self, match):
        ExhentaiExtractor.__init__(self, match)
-        self.params = text.parse_query(match.group(2) or "")
+        self.params = text.parse_query(match.group(2))
        self.params["page"] = text.parse_int(self.params.get("page"))
        self.search_url = self.root

    def items(self):
        self.login()
-        self.init()
        yield Message.Version, 1

        while True:
+            last = None
            page = self.request(self.search_url, params=self.params).text

-            for row in text.extract_iter(page, '<tr class="gtr', '</tr>'):
-                yield self._parse_row(row)
+            for gallery in ExhentaiGalleryExtractor.pattern.finditer(page):
+                url = gallery.group(0)
+                if url == last:
+                    continue
+                last = url
+                yield Message.Queue, url, {}

            if 'class="ptdd">&gt;<' in page or ">No hits found</p>" in page:
                return
            self.params["page"] += 1
            self.wait()

-    def init(self):
-        pass
-
-    def _parse_row(self, row, extr=text.extract):
-        """Parse information of a single result row"""
-        gtype, pos = extr(row, ' alt="', '"')
-        date , pos = extr(row, 'nowrap">', '<', pos)
-        url  , pos = extr(row, ' class="it5"><a href="', '"', pos)
-        title, pos = extr(row, '>', '<', pos)
-        key , last = self._parse_last(row, pos)
-        parts = url.rsplit("/", 3)
-
-        return Message.Queue, url, {
-            "type": gtype,
-            "date": date,
-            "gallery_id": text.parse_int(parts[1]),
-            "gallery_token": parts[2],
-            "title": text.unescape(title),
-            "_extractor": ExhentaiGalleryExtractor,
-            key: last,
-        }
-
-    def _parse_last(self, row, pos):
-        """Parse the last column of a result row"""
-        return "uploader", text.remove_html(
-            text.extract(row, '<td class="itu">', '</td>', pos)[0])
-

 class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
    """Extractor for favorited exhentai galleries"""
@ -400,15 +377,3 @@ class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
    def __init__(self, match):
        ExhentaiSearchExtractor.__init__(self, match)
        self.search_url = self.root + "/favorites.php"
-
-    def init(self):
-        # The first request to '/favorites.php' will return an empty list
-        # if the 's' cookie isn't set (maybe on some other conditions as well),
-        # so we make a "noop" request to get all the correct cookie values
-        # and to get a filled favorite list on the next one.
-        # TODO: proper cookie storage
-        self.request(self.url)
-        self.wait(1.5)
-
-    def _parse_last(self, row, pos):
-        return "date_favorited", text.extract(row, 'nowrap">', '<', pos)[0]