# -*- coding: utf-8 -*- # Copyright 2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for http://www.keenspot.com/""" from .common import Extractor, Message from .. import text class KeenspotComicExtractor(Extractor): """Extractor for webcomics from keenspot.com""" category = "keenspot" subcategory = "comic" directory_fmt = ("{category}", "{comic}") filename_fmt = "{filename}.{extension}" archive_fmt = "{comic}_{filename}" pattern = r"(?:https?://)?(?!www\.|forums\.)([^.]+)\.keenspot\.com" test = ( ("http://marksmen.keenspot.com/", { # link "range": "1-3", "url": "83bcf029103bf8bc865a1988afa4aaeb23709ba6", }), ("http://barkercomic.keenspot.com/", { # id "range": "1-3", "url": "c4080926db18d00bac641fdd708393b7d61379e6", }), ("http://crowscare.keenspot.com/", { # id v2 "range": "1-3", "url": "a00e66a133dd39005777317da90cef921466fcaa" }), ("http://supernovas.keenspot.com/", { # ks "range": "1-3", "url": "de21b12887ef31ff82edccbc09d112e3885c3aab" }), ) def __init__(self, match): Extractor.__init__(self, match) self._next = None self.comic = match.group(1) self.root = "http://" + self.comic + ".keenspot.com" def items(self): data = {"comic": self.comic} yield Message.Version, 1 yield Message.Directory, data url = self._first(self.request(self.root + "/").text) while url: if url[0] == "/": url = self.root + url page = self.request(url).text for img in text.extract_iter(page, 'class="ksc"', '>'): img = text.extract(img, 'src="', '"')[0] if img[0] == "/": img = self.root + img yield Message.Url, img, text.nameext_from_url(img, data) url = self._next(page) def _first(self, page): url = text.extract(page, '= 0: self._next = self._next_id return text.rextract(page, 'FIRST PAGE<') if pos >= 0: self._next = self._next_id return text.rextract(page, '= 0: self._next = self._next_ks return text.extract(page, 'href="', '"', pos)[0] self.log.error("Unrecognized page layout") return None @staticmethod def _next_link(page): return text.extract(page, '= 0 else None @staticmethod def _next_ks(page): pos = page.index('