# -*- coding: utf-8 -*- # Copyright 2014, 2015 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract manga pages from http://bato.to/""" from .common import AsynchronousExtractor, Message from .. import text, iso639_1 import re class BatotoExtractor(AsynchronousExtractor): category = "batoto" directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"] test = [("http://bato.to/reader#df48fa98f7d41851", { "url": "ab0526091f65b8eda7a8866b937adbdb468d68b1", "keyword": "ef14d3230aa0872e8d9e4236ed9160755f78aeb3", })] url = "https://bato.to/areader" def __init__(self, match): AsynchronousExtractor.__init__(self) self.token = match.group(1) self.session.headers.update({ "X-Requested-With": "XMLHttpRequest", "Referer": "https://bato.to/reader", }) def items(self): params = { "id": self.token, "p": 1, "supress_webtoon": "t", } page = self.request(self.url, params=params).text data = self.get_job_metadata(page) yield Message.Version, 1 yield Message.Directory, data.copy() for i in range(int(data["count"])): next_url, image_url = self.get_page_urls(page) text.nameext_from_url(image_url, data) data["page"] = i+1 yield Message.Url, image_url, data.copy() if next_url: params["p"] += 1 page = self.request(self.url, params=params).text def get_job_metadata(self, page): """Collect metadata for extractor-job""" extr = text.extract _ , pos = extr(page, '', ' - ', pos) lang , pos = extr(page, '', '', pos) _ , pos = extr(page, '