[exhentai] update 'limits' check (#1487)

Only use 'limits' to set a custom upper bound.
Checking if the actual maximum gets exceeded is not necessary.
This commit is contained in:
Mike Fährmann 2021-04-22 22:41:14 +02:00
parent 141ca4ac0a
commit 9514cb8c12
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 31 additions and 43 deletions

View File

@ -923,15 +923,12 @@ Description
extractor.exhentai.limits extractor.exhentai.limits
------------------------- -------------------------
Type Type
``bool`` or ``integer`` ``integer``
Default Default
``true`` ``null``
Description Description
Check image download limits Sets a custom image download limit and
and stop extraction when they are exceeded. stops extraction when it gets exceeded.
If this value is an ``integer``, it gets used as the limit maximum
instead of the value listed on ``https://e-hentai.org/home.php``
extractor.exhentai.domain extractor.exhentai.domain

View File

@ -43,16 +43,14 @@ class ExhentaiExtractor(Extractor):
self.cookiedomain = "." + domain self.cookiedomain = "." + domain
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.limits = self.config("limits", True)
self.original = self.config("original", True) self.original = self.config("original", True)
if type(self.limits) is int: limits = self.config("limits", False)
self._limit_max = self.limits if limits and limits.__class__ is int:
self.limits = True self.limits = limits
self._remaining = 0
else: else:
self._limit_max = 0 self.limits = False
self._remaining = 0
self.session.headers["Referer"] = self.root + "/" self.session.headers["Referer"] = self.root + "/"
if version != "ex": if version != "ex":
@ -219,6 +217,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
if "/fullimg.php" in url: if "/fullimg.php" in url:
data["extension"] = "" data["extension"] = ""
data["_http_validate"] = _validate_response data["_http_validate"] = _validate_response
else:
data["_http_validate"] = None
yield Message.Url, url, data yield Message.Url, url, data
def get_metadata(self, page): def get_metadata(self, page):
@ -358,6 +358,26 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"Continue with '%s/s/%s/%s-%s' as URL after resetting it.", "Continue with '%s/s/%s/%s-%s' as URL after resetting it.",
self.root, data["image_token"], self.gallery_id, data["num"]) self.root, data["image_token"], self.gallery_id, data["num"])
def _check_limits(self, data):
if not self._remaining or data["num"] % 25 == 0:
self._update_limits()
self._remaining -= data["cost"]
if self._remaining <= 0:
self._report_limits(data)
def _update_limits(self):
url = "https://e-hentai.org/home.php"
cookies = {
cookie.name: cookie.value
for cookie in self.session.cookies
if cookie.domain == self.cookiedomain and cookie.name != "igneous"
}
page = self.request(url, cookies=cookies).text
current = text.extract(page, "<strong>", "</strong>")[0]
self.log.debug("Image Limits: %s/%s", current, self.limits)
self._remaining = self.limits - text.parse_int(current)
def _gallery_page(self): def _gallery_page(self):
url = "{}/g/{}/{}/".format( url = "{}/g/{}/{}/".format(
self.root, self.gallery_id, self.gallery_token) self.root, self.gallery_id, self.gallery_token)
@ -381,35 +401,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
raise exception.NotFoundError("image page") raise exception.NotFoundError("image page")
return page return page
def _check_limits(self, data):
if not self._remaining or data["num"] % 25 == 0:
self._update_limits()
self._remaining -= data["cost"]
if self._remaining <= 0:
ExhentaiExtractor.LIMIT = True
url = "{}/s/{}/{}-{}".format(
self.root, data["image_token"], self.gallery_id, data["num"])
raise exception.StopExtraction(
"Image limit reached! Continue with '%s' "
"as URL after resetting it.", url)
def _update_limits(self):
url = "https://e-hentai.org/home.php"
cookies = {
cookie.name: cookie.value
for cookie in self.session.cookies
if cookie.domain == self.cookiedomain and cookie.name != "igneous"
}
page = self.request(url, cookies=cookies).text
current, pos = text.extract(page, "<strong>", "</strong>")
maximum, pos = text.extract(page, "<strong>", "</strong>", pos)
if self._limit_max:
maximum = self._limit_max
self.log.debug("Image Limits: %s/%s", current, maximum)
self._remaining = text.parse_int(maximum) - text.parse_int(current)
@staticmethod @staticmethod
def _parse_image_info(url): def _parse_image_info(url):
for part in url.split("/")[4:]: for part in url.split("/")[4:]: