128 lines
4.7 KiB
Python
Raw Normal View History

2015-12-08 22:29:34 +01:00
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
2015-12-08 22:29:34 +01:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters and entire manga from https://mangapark.me/"""
2015-12-08 22:29:34 +01:00
from .common import ChapterExtractor, MangaExtractor
from .. import text, util
from urllib.parse import urljoin
2015-12-09 00:07:18 +01:00
2017-02-01 00:53:19 +01:00
class MangaparkExtractor():
"""Base class for mangapark extractors"""
2015-12-09 00:07:18 +01:00
category = "mangapark"
root = "https://mangapark.me"
@staticmethod
def parse_chapter_path(path, data):
"""Get volume/chapter information from url-path of a chapter"""
data["volume"], data["chapter_minor"] = 0, ""
for part in path.split("/")[3:]:
key, value = part[0], part[1:]
if key == "s":
data["version"] = util.safe_int(value)
elif key == "v":
data["volume"] = util.safe_int(value)
elif key == "c":
chapter, dot, minor = value.partition(".")
data["chapter"] = util.safe_int(chapter)
data["chapter_minor"] = dot + minor
elif key == "e":
data["chapter_minor"] = "v" + value
class MangaparkMangaExtractor(MangaparkExtractor, MangaExtractor):
"""Extractor for manga from mangapark.me"""
pattern = [r"(?:https?://)?(?:www\.)?(mangapark\.me/manga/[^/]+)/?$"]
test = [("https://mangapark.me/manga/aria", {
"url": "4cb5606530b4eeacde7a4c9fd38296eb6ff46563",
"keyword": "e87ab8e7ad2571bbe587881e7fd422e8f582f818",
2015-12-14 03:00:58 +01:00
})]
2015-12-09 00:07:18 +01:00
def chapters(self, page):
results = []
data = {"lang": "en", "language": "English"}
data["manga"] = text.unescape(
text.extract(page, '<title>', ' Manga - Read ')[0])
2015-12-09 00:07:18 +01:00
pos = page.index('<div id="list" class="book-list">')
while True:
test, pos = text.extract(page, '<a class="ch sts sts_', '', pos)
if test is None:
return results
path , pos = text.extract(page, 'href="', '"', pos)
title, pos = text.extract(page, '</a>', '</span>', pos)
date , pos = text.extract(page, '<i>', '</i>', pos)
count, pos = text.extract(page, '\tof ', ' ', pos)
2015-12-09 00:07:18 +01:00
self.parse_chapter_path(path, data)
data["title"] = title[3:].strip()
data["date"] = date
data["count"] = util.safe_int(count)
results.append((self.root + path, data.copy()))
2015-12-08 22:29:34 +01:00
class MangaparkChapterExtractor(MangaparkExtractor, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.me"""
pattern = [(r"(?:https?://)?(?:www\.)?mangapark\.me(/manga/[^/]+"
r"/s\d+(?:/v\d+)?/c\d+[^/]*(?:/e\d+)?)")]
2015-12-14 03:00:58 +01:00
test = [
("https://mangapark.me/manga/gosu/s2/c55", {
"count": 50,
"keyword": "72ac1714b492b021a1fe26d9271ed132d51a930e",
}),
(("https://mangapark.me/manga/"
"ad-astra-per-aspera-hata-kenjirou/s5/c1.2"), {
"count": 40,
2018-03-21 12:37:46 +01:00
"keyword": "f7f7fb1ca8b26a59a47d8ec60c5eaaf69a43a3f6",
2015-12-14 03:00:58 +01:00
}),
("https://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", {
"count": 15,
2018-03-21 12:37:46 +01:00
"keyword": "8d5d1608d4182495ea43ad665e25b755b6468be2",
}),
2015-12-14 03:00:58 +01:00
]
2015-12-08 22:29:34 +01:00
def __init__(self, match):
self.path = match.group(1)
url = self.root + self.path + "?zoom=2"
ChapterExtractor.__init__(self, url)
2015-12-08 22:29:34 +01:00
def get_metadata(self, page):
data = {"lang": "en", "language": "English"}
self.parse_chapter_path(self.path, data)
text.extract_all(page, (
("manga_id" , "var _manga_id = '", "'"),
("chapter_id", "var _book_id = '", "'"),
2015-12-08 22:29:34 +01:00
("manga" , "<h2>", "</h2>"),
("title" , "</a>", "<"),
2015-12-08 22:29:34 +01:00
(None , 'target="_blank" href="', ''),
("count" , 'page 1">1 / ', '<'),
), values=data)
data["manga"], _, data["type"] = data["manga"].rpartition(" ")
data["manga"] = text.unescape(data["manga"])
data["title"] = data["title"].partition(": ")[2]
data["count"] = util.safe_int(data["count"])
2015-12-08 22:29:34 +01:00
return data
def get_images(self, page):
2015-12-08 22:29:34 +01:00
pos = 0
num = 0
2015-12-08 22:29:34 +01:00
while True:
2017-02-01 00:53:19 +01:00
url, pos = text.extract(page, ' target="_blank" href="', '"', pos)
2015-12-08 22:29:34 +01:00
if not url:
return
num += 1
2015-12-08 22:29:34 +01:00
width , pos = text.extract(page, ' width="', '"', pos)
height, pos = text.extract(page, ' _heighth="', '"', pos)
yield urljoin(self.root, url), {
"page": num,
2015-12-08 22:29:34 +01:00
"width": width,
"height": height,
}