[kissmanga][readcomiconline] add 'captcha' option (#279)
to configure how to handle CAPTCHA page redirects: - either interactively wait for the user to solve the CAPTCHA - or raise StopExtraction like before
This commit is contained in:
parent
e30ada162d
commit
4465a3ea68
@ -593,6 +593,18 @@ Description Controls whether to choose the GIF or MP4 version of an animation.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.kissmanga.captcha
|
||||
---------------------------
|
||||
=========== =====
|
||||
Type ``string``
|
||||
Default ``"stop"``
|
||||
Description Controls how to handle redirects to CAPTCHA pages.
|
||||
|
||||
* ``"stop``: Stop the current extractor run.
|
||||
* ``"wait``: Ask the user to solve the CAPTCHA and wait.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.oauth.browser
|
||||
-----------------------
|
||||
=========== =====
|
||||
@ -646,6 +658,18 @@ Description Minimum and maximum wait time in seconds between HTTP requests
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.readcomiconline.captcha
|
||||
---------------------------------
|
||||
=========== =====
|
||||
Type ``string``
|
||||
Default ``"stop"``
|
||||
Description Controls how to handle redirects to CAPTCHA pages.
|
||||
|
||||
* ``"stop``: Stop the current extractor run.
|
||||
* ``"wait``: Ask the user to solve the CAPTCHA and wait.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.recursive.blacklist
|
||||
-----------------------------
|
||||
=========== =====
|
||||
|
@ -62,6 +62,10 @@
|
||||
{
|
||||
"mp4": true
|
||||
},
|
||||
"kissmanga":
|
||||
{
|
||||
"captcha": "stop"
|
||||
},
|
||||
"nijie":
|
||||
{
|
||||
"username": null,
|
||||
@ -82,6 +86,10 @@
|
||||
"wait-min": 3.0,
|
||||
"wait-max": 6.0
|
||||
},
|
||||
"readcomiconline":
|
||||
{
|
||||
"captcha": "stop"
|
||||
},
|
||||
"recursive":
|
||||
{
|
||||
"blacklist": ["directlink", "oauth", "recursive", "test"]
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
"""Extract manga-chapters and entire manga from https://kissmanga.com/"""
|
||||
|
||||
from .common import ChapterExtractor, MangaExtractor
|
||||
from .common import ChapterExtractor, MangaExtractor, Extractor
|
||||
from .. import text, aes, exception
|
||||
from ..cache import cache
|
||||
import hashlib
|
||||
@ -16,21 +16,35 @@ import ast
|
||||
import re
|
||||
|
||||
|
||||
class KissmangaBase():
|
||||
class RedirectMixin():
|
||||
"""Detect and handle redirects to CAPTCHA pages"""
|
||||
|
||||
def request(self, url):
|
||||
while True:
|
||||
response = Extractor.request(self, url)
|
||||
if not response.history or "/AreYouHuman" not in response.url:
|
||||
return response
|
||||
if self.config("captcha", "stop") == "wait":
|
||||
self.log.warning(
|
||||
"Redirect to \n%s\nVisit this URL in your browser, solve "
|
||||
"the CAPTCHA, and press ENTER to continue", response.url)
|
||||
try:
|
||||
input()
|
||||
except (EOFError, OSError):
|
||||
pass
|
||||
else:
|
||||
self.log.error(
|
||||
"Redirect to \n%s\nVisit this URL in your browser and "
|
||||
"solve the CAPTCHA to continue", response.url)
|
||||
raise exception.StopExtraction()
|
||||
|
||||
|
||||
class KissmangaBase(RedirectMixin):
|
||||
"""Base class for kissmanga extractors"""
|
||||
category = "kissmanga"
|
||||
archive_fmt = "{chapter_id}_{page}"
|
||||
root = "https://kissmanga.com"
|
||||
|
||||
def request(self, url):
|
||||
response = super().request(url)
|
||||
if response.history and "/AreYouHuman" in response.url:
|
||||
self.log.error("Redirect to \n%s\n"
|
||||
"Visit this URL in your browser and solve "
|
||||
"the CAPTCHA to continue.", response.url)
|
||||
raise exception.StopExtraction()
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def parse_chapter_string(data):
|
||||
"""Parse 'chapter_string' value contained in 'data'"""
|
||||
|
@ -9,11 +9,12 @@
|
||||
"""Extract comic-issues and entire comics from https://readcomiconline.to/"""
|
||||
|
||||
from .common import ChapterExtractor, MangaExtractor
|
||||
from .. import text, exception
|
||||
from .kissmanga import RedirectMixin
|
||||
from .. import text
|
||||
import re
|
||||
|
||||
|
||||
class ReadcomiconlineBase():
|
||||
class ReadcomiconlineBase(RedirectMixin):
|
||||
"""Base class for readcomiconline extractors"""
|
||||
category = "readcomiconline"
|
||||
directory_fmt = ("{category}", "{comic}", "{issue:>03}")
|
||||
@ -21,15 +22,6 @@ class ReadcomiconlineBase():
|
||||
archive_fmt = "{issue_id}_{page}"
|
||||
root = "https://readcomiconline.to"
|
||||
|
||||
def request(self, url):
|
||||
response = super().request(url)
|
||||
if response.history and "/AreYouHuman" in response.url:
|
||||
self.log.error("Redirect to \n%s\n"
|
||||
"Visit this URL in your browser and solve "
|
||||
"the CAPTCHA to continue.", response.url)
|
||||
raise exception.StopExtraction()
|
||||
return response
|
||||
|
||||
|
||||
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
|
||||
"""Extractor for comic-issues from readcomiconline.to"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user