2017-02-06 20:05:58 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
# Copyright 2017 Mike Fährmann
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2017-04-09 11:37:21 +02:00
|
|
|
"""Extract manga-chapters from https://fascans.com/"""
|
2017-02-06 20:05:58 +01:00
|
|
|
|
2017-04-09 11:37:21 +02:00
|
|
|
from .common import Extractor, Message
|
2017-05-18 15:22:25 +02:00
|
|
|
from .. import text, util
|
2017-04-09 11:37:21 +02:00
|
|
|
import json
|
2017-02-06 20:05:58 +01:00
|
|
|
|
|
|
|
|
2017-04-09 11:37:21 +02:00
|
|
|
class FallenangelsChapterExtractor(Extractor):
|
|
|
|
"""Extractor for manga-chapters from fascans.com"""
|
2017-02-06 20:05:58 +01:00
|
|
|
category = "fallenangels"
|
2017-04-09 11:37:21 +02:00
|
|
|
subcategory = "chapter"
|
|
|
|
directory_fmt = ["{category}", "{manga}", "{chapter:>03} - {title}"]
|
|
|
|
filename_fmt = "{manga}_{chapter:>03}_{page:>03}.{extension}"
|
2017-05-18 15:22:25 +02:00
|
|
|
pattern = [(r"(?:https?://)?(manga|truyen)\.fascans\.com/"
|
|
|
|
r"manga/([^/]+)/(\d+)")]
|
|
|
|
test = [
|
|
|
|
("https://manga.fascans.com/manga/chronos-ruler/20/1", {
|
|
|
|
"url": "4604a7914566cc2da0ff789aa178e2d1c8c241e3",
|
|
|
|
"keyword": "b2b9c7fd4696b9369d230c3069b5333b476f35d6",
|
|
|
|
}),
|
|
|
|
("http://truyen.fascans.com/manga/hungry-marie/8", {
|
|
|
|
"url": "1f923d9cb337d5e7bbf4323719881794a951c6ae",
|
|
|
|
"keyword": "5520691dbaa26248bcd994e6c6a87bb39710f6c3",
|
|
|
|
}),
|
|
|
|
]
|
2017-04-09 11:37:21 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
Extractor.__init__(self)
|
2017-05-18 15:22:25 +02:00
|
|
|
self.version, self.manga, self.chapter = match.groups()
|
2017-04-09 11:37:21 +02:00
|
|
|
|
|
|
|
def items(self):
|
2017-05-18 15:22:25 +02:00
|
|
|
url = "https://{}.fascans.com/manga/{}/{}/1".format(
|
|
|
|
self.version, self.manga, self.chapter)
|
2017-04-09 11:37:21 +02:00
|
|
|
page = self.request(url).text
|
|
|
|
data = self.get_metadata(page)
|
|
|
|
imgs = self.get_images(page)
|
|
|
|
data["count"] = len(imgs)
|
|
|
|
yield Message.Version, 1
|
|
|
|
yield Message.Directory, data
|
|
|
|
for data["page"], img in enumerate(imgs, 1):
|
|
|
|
url = img["page_image"]
|
|
|
|
yield Message.Url, url, text.nameext_from_url(url, data)
|
|
|
|
|
|
|
|
def get_metadata(self, page):
|
|
|
|
"""Collect metadata for extractor-job"""
|
2017-05-18 15:22:25 +02:00
|
|
|
lang = "vi" if self.version == "truyen" else "en"
|
2017-04-09 11:37:21 +02:00
|
|
|
data = {
|
|
|
|
"chapter": self.chapter,
|
2017-05-18 15:22:25 +02:00
|
|
|
"lang": lang,
|
|
|
|
"language": util.code_to_language(lang),
|
2017-04-09 11:37:21 +02:00
|
|
|
}
|
|
|
|
return text.extract_all(page, (
|
|
|
|
("manga", 'name="description" content="', ' Chapter '),
|
|
|
|
("title", ': ', ' - Page 1'),
|
|
|
|
), values=data)[0]
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_images(page):
|
|
|
|
"""Return a list of all images in this chapter"""
|
|
|
|
return json.loads(text.extract(page, "var pages = ", ";")[0])
|