[exhentai] fix search and favorite extraction
removes basically all metadata, but that can be compensated for with the right search query. writing "parsers" for all 4 possible views that have been introduced in the latest changes is too much of a hassle ...
This commit is contained in:
parent
369eb66125
commit
5398bfbd69
@ -339,53 +339,30 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
ExhentaiExtractor.__init__(self, match)
|
||||
self.params = text.parse_query(match.group(2) or "")
|
||||
self.params = text.parse_query(match.group(2))
|
||||
self.params["page"] = text.parse_int(self.params.get("page"))
|
||||
self.search_url = self.root
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
self.init()
|
||||
yield Message.Version, 1
|
||||
|
||||
while True:
|
||||
last = None
|
||||
page = self.request(self.search_url, params=self.params).text
|
||||
|
||||
for row in text.extract_iter(page, '<tr class="gtr', '</tr>'):
|
||||
yield self._parse_row(row)
|
||||
for gallery in ExhentaiGalleryExtractor.pattern.finditer(page):
|
||||
url = gallery.group(0)
|
||||
if url == last:
|
||||
continue
|
||||
last = url
|
||||
yield Message.Queue, url, {}
|
||||
|
||||
if 'class="ptdd">><' in page or ">No hits found</p>" in page:
|
||||
return
|
||||
self.params["page"] += 1
|
||||
self.wait()
|
||||
|
||||
def init(self):
|
||||
pass
|
||||
|
||||
def _parse_row(self, row, extr=text.extract):
|
||||
"""Parse information of a single result row"""
|
||||
gtype, pos = extr(row, ' alt="', '"')
|
||||
date , pos = extr(row, 'nowrap">', '<', pos)
|
||||
url , pos = extr(row, ' class="it5"><a href="', '"', pos)
|
||||
title, pos = extr(row, '>', '<', pos)
|
||||
key , last = self._parse_last(row, pos)
|
||||
parts = url.rsplit("/", 3)
|
||||
|
||||
return Message.Queue, url, {
|
||||
"type": gtype,
|
||||
"date": date,
|
||||
"gallery_id": text.parse_int(parts[1]),
|
||||
"gallery_token": parts[2],
|
||||
"title": text.unescape(title),
|
||||
"_extractor": ExhentaiGalleryExtractor,
|
||||
key: last,
|
||||
}
|
||||
|
||||
def _parse_last(self, row, pos):
|
||||
"""Parse the last column of a result row"""
|
||||
return "uploader", text.remove_html(
|
||||
text.extract(row, '<td class="itu">', '</td>', pos)[0])
|
||||
|
||||
|
||||
class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
|
||||
"""Extractor for favorited exhentai galleries"""
|
||||
@ -400,15 +377,3 @@ class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
|
||||
def __init__(self, match):
|
||||
ExhentaiSearchExtractor.__init__(self, match)
|
||||
self.search_url = self.root + "/favorites.php"
|
||||
|
||||
def init(self):
|
||||
# The first request to '/favorites.php' will return an empty list
|
||||
# if the 's' cookie isn't set (maybe on some other conditions as well),
|
||||
# so we make a "noop" request to get all the correct cookie values
|
||||
# and to get a filled favorite list on the next one.
|
||||
# TODO: proper cookie storage
|
||||
self.request(self.url)
|
||||
self.wait(1.5)
|
||||
|
||||
def _parse_last(self, row, pos):
|
||||
return "date_favorited", text.extract(row, 'nowrap">', '<', pos)[0]
|
||||
|
Loading…
x
Reference in New Issue
Block a user