# -*- coding: utf-8 -*- # Copyright 2015-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract manga-chapters and entire manga from https://kissmanga.com/""" from .common import ChapterExtractor, MangaExtractor, Extractor from .. import text, aes, exception from ..cache import cache import hashlib import ast import re class RedirectMixin(): """Detect and handle redirects to CAPTCHA pages""" def request(self, url, **kwargs): while True: response = Extractor.request(self, url, **kwargs) if not response.history or "/AreYouHuman" not in response.url: return response if self.config("captcha", "stop") == "wait": self.log.warning( "Redirect to \n%s\nVisit this URL in your browser, solve " "the CAPTCHA, and press ENTER to continue", response.url) try: input() except (EOFError, OSError): pass else: raise exception.StopExtraction( "Redirect to \n%s\nVisit this URL in your browser and " "solve the CAPTCHA to continue", response.url) class KissmangaBase(RedirectMixin): """Base class for kissmanga extractors""" category = "kissmanga" archive_fmt = "{chapter_id}_{page}" root = "https://kissmanga.com" @staticmethod def parse_chapter_string(data): """Parse 'chapter_string' value contained in 'data'""" data["chapter_string"] = text.unescape(data["chapter_string"]) match = re.match(( r"(?:[Vv]ol\.0*(\d+) )?" r"(?:[Cc]h\.)?0*(\d+)" r"(?:[.:]0*(\d+))?" r"(?: *[:-]? *(.+))?" ), data["chapter_string"]) if not match: match = re.match(( r".+?(?: -)? ()" r"0*(\d+)(?:[Vv.]0*(\d+))?" r"(?: *[:-]? *(.+))?" ), data["chapter_string"]) if match: volume, chapter, minor, title = match.groups() else: volume, chapter, minor, title = 0, 0, "", data["chapter_string"] data["volume"] = text.parse_int(volume) data["chapter"] = text.parse_int(chapter) data["chapter_minor"] = "." + minor if minor else "" data["title"] = title if title and title != "Read Online" else "" return data class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor): """Extractor for manga-chapters from kissmanga.com""" pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com" r"(/Manga/[^/?]+/[^/?]+\?id=(\d+))") test = ( ("https://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", { "url": "46e63fd63e9e16f19bc1e6c7a45dc060815642fd", "keyword": "1cd0b5214ac7ae4d53e2fd8fec40ceec84cd09bf", }), ("https://kissmanga.com/Manga/Urban-Tales/a?id=256717", { "url": "c26be8bf9c2abacee2076979d021634092cf38f1", "keyword": "e1d16780df8e04076ed2b5f0637c5b710ec2f2ea", }), ("https://kissmanga.com/Manga/Monster/Monster-79?id=7608", { "count": 23, "keyword": "f433a7a8fae840e17dace316a243fa27faab86de", }), ("https://kissmanga.com/Manga/Houseki-no-Kuni/Oneshot?id=404189", { "count": 49, "keyword": "cea131c9fe9c71309b3270cd86718d4d1198c31c", }), ("https://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608"), ) def __init__(self, match): ChapterExtractor.__init__(self, match) self.chapter_id = match.group(2) self.session.headers["Referer"] = self.root def metadata(self, page): title = text.extract(page, "