simplify code by using a MangaExtractor base class
This commit is contained in:
parent
2974d782a3
commit
f226417420
@ -8,15 +8,16 @@
|
||||
|
||||
"""Extract manga chapters from https://bato.to/"""
|
||||
|
||||
from .common import Extractor, AsynchronousExtractor, Message
|
||||
from .common import MangaExtractor, AsynchronousExtractor, Message
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache
|
||||
import re
|
||||
|
||||
|
||||
class BatotoExtractor(Extractor):
|
||||
class BatotoExtractor():
|
||||
"""Base class for batoto extractors"""
|
||||
category = "batoto"
|
||||
scheme = "https"
|
||||
root = "https://bato.to"
|
||||
|
||||
def login(self):
|
||||
@ -56,34 +57,19 @@ class BatotoExtractor(Extractor):
|
||||
return {c: response.cookies[c] for c in ("member_id", "pass_hash")}
|
||||
|
||||
|
||||
class BatotoMangaExtractor(BatotoExtractor):
|
||||
"""Extractor for mangas from bato.to"""
|
||||
subcategory = "manga"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?bato\.to/comic/_/comics/.*-r\d+"]
|
||||
class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
|
||||
"""Extractor for manga from bato.to"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(bato\.to/comic/_/comics/.*-r\d+)"]
|
||||
test = [("http://bato.to/comic/_/comics/aria-r2007", {
|
||||
"url": "a38585b0339587666d772ee06f2a60abdbf42a97",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
BatotoExtractor.__init__(self)
|
||||
self.url = match.group(0)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
yield Message.Version, 1
|
||||
for chapter in self.get_chapters():
|
||||
yield Message.Queue, chapter
|
||||
|
||||
def get_chapters(self):
|
||||
"""Return a list of all chapter urls"""
|
||||
def chapters(self, page):
|
||||
# TODO: filter by language / translator
|
||||
needle = ('<td style="border-top:0;">\n '
|
||||
'<a href="http://bato.to/reader#')
|
||||
page = self.request(self.url).text
|
||||
return reversed([
|
||||
self.root + "/reader#" + mangahash
|
||||
for mangahash in text.extract_iter(page, needle, '"')
|
||||
])
|
||||
return [self.root + "/reader#" + mangahash
|
||||
for mangahash in text.extract_iter(page, needle, '"')]
|
||||
|
||||
|
||||
class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
|
||||
|
@ -81,6 +81,40 @@ class AsynchronousExtractor(Extractor):
|
||||
put(None)
|
||||
|
||||
|
||||
class MangaExtractor(Extractor):
|
||||
|
||||
subcategory = "manga"
|
||||
scheme = "http"
|
||||
root = ""
|
||||
reverse = True
|
||||
|
||||
def __init__(self, match, url=None):
|
||||
Extractor.__init__(self)
|
||||
self.url = url or self.scheme + "://" + match.group(1)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
page = self.request(self.url).text
|
||||
|
||||
chapters = self.chapters(page)
|
||||
if self.reverse:
|
||||
chapters.reverse()
|
||||
|
||||
yield Message.Version, 1
|
||||
for chapter in chapters:
|
||||
yield Message.Queue, chapter
|
||||
|
||||
def login(self):
|
||||
"""Login and set necessary cookies"""
|
||||
|
||||
def chapters(self, page):
|
||||
"""Return a list of all chapter urls"""
|
||||
return [self.root + path for path in self.chapter_paths(page)]
|
||||
|
||||
def chapter_paths(self, page):
|
||||
return []
|
||||
|
||||
|
||||
def safe_request(session, url, method="GET", *args, **kwargs):
|
||||
tries = 0
|
||||
while True:
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
"""Base classes for extractors for FoOlSlide based sites"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, MangaExtractor, Message
|
||||
from .. import text, util
|
||||
import json
|
||||
|
||||
@ -96,24 +96,17 @@ class FoolslideChapterExtractor(Extractor):
|
||||
return json.loads(text.extract(page, needle, ";", pos)[0])
|
||||
|
||||
|
||||
class FoolslideMangaExtractor(Extractor):
|
||||
class FoolslideMangaExtractor(MangaExtractor):
|
||||
"""Base class for manga extractors for FoOlSlide based sites"""
|
||||
subcategory = "manga"
|
||||
scheme = "https"
|
||||
|
||||
def __init__(self, match, url=None):
|
||||
Extractor.__init__(self)
|
||||
self.url = url or self.scheme + "://" + match.group(1)
|
||||
def request(self, url):
|
||||
return MangaExtractor.request(
|
||||
self, url, encoding="utf-8", method="post", data={"adult": "true"}
|
||||
)
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for url in self.chapters():
|
||||
yield Message.Queue, url
|
||||
|
||||
def chapters(self):
|
||||
def chapters(self, page):
|
||||
"""Return a list of all chapter urls"""
|
||||
page = self.request(self.url, encoding="utf-8",
|
||||
method="post", data={"adult": "true"}).text
|
||||
return reversed(list(text.extract_iter(
|
||||
return list(text.extract_iter(
|
||||
page, '<div class="title"><a href="', '"'
|
||||
)))
|
||||
))
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015,2016 Mike Fährmann
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -8,32 +8,21 @@
|
||||
|
||||
"""Extract images from http://www.hbrowse.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, MangaExtractor, Message
|
||||
from .. import text
|
||||
import json
|
||||
|
||||
|
||||
class HbrowseMangaExtractor(Extractor):
|
||||
"""Extractor for mangas from hbrowse.com"""
|
||||
class HbrowseMangaExtractor(MangaExtractor):
|
||||
"""Extractor for manga from hbrowse.com"""
|
||||
category = "hbrowse"
|
||||
subcategory = "manga"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/?$"]
|
||||
pattern = [r"(?:https?://)?((?:www\.)?hbrowse\.com/\d+)/?$"]
|
||||
reverse = False
|
||||
test = [("http://www.hbrowse.com/10363", {
|
||||
"url": "4d9def5df21c23f8c3d36de2076c189c02ea43bd",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.gid = match.group(1)
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for url in self.get_chapters():
|
||||
yield Message.Queue, url
|
||||
|
||||
def get_chapters(self):
|
||||
"""Return a list of all chapter urls"""
|
||||
page = self.request("http://www.hbrowse.com/" + self.gid).text
|
||||
def chapters(self, page):
|
||||
needle = '<td class="listMiddle">\n<a class="listLink" href="'
|
||||
return list(text.extract_iter(page, needle, '"'))
|
||||
|
||||
|
@ -1,23 +1,24 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 Mike Fährmann
|
||||
# Copyright 2016-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extract hentaimanga from https://hentai2read.com/"""
|
||||
"""Extract hentai-manga from https://hentai2read.com/"""
|
||||
|
||||
from .common import MangaExtractor
|
||||
from .. import text
|
||||
from . import hentaicdn
|
||||
import re
|
||||
import json
|
||||
|
||||
|
||||
class Hentai2readMangaExtractor(hentaicdn.HentaicdnMangaExtractor):
|
||||
"""Extractor for mangas from hentai2read.com"""
|
||||
class Hentai2readMangaExtractor(MangaExtractor):
|
||||
"""Extractor for hmanga from hentai2read.com"""
|
||||
category = "hentai2read"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/?$"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(hentai2read\.com/[^/]+/?)$"]
|
||||
test = [
|
||||
("http://hentai2read.com/amazon_elixir/", {
|
||||
"url": "d1f87b71d3c97b49a478cdfb6ae96b2d9520ab78",
|
||||
@ -27,16 +28,11 @@ class Hentai2readMangaExtractor(hentaicdn.HentaicdnMangaExtractor):
|
||||
})
|
||||
]
|
||||
|
||||
def __init__(self, match):
|
||||
hentaicdn.HentaicdnMangaExtractor.__init__(self)
|
||||
self.url_title = match.group(1)
|
||||
|
||||
def get_chapters(self):
|
||||
def chapters(self, page):
|
||||
page = text.extract(
|
||||
self.request("http://hentai2read.com/" + self.url_title).text,
|
||||
'<ul class="nav-chapters remove-margin-b">', '</ul>\n</div>'
|
||||
page, '<ul class="nav-chapters remove-margin-b">', '</ul>\n</div>'
|
||||
)[0]
|
||||
return text.extract_iter(page, '<li>\n<a href="', '"')
|
||||
return list(text.extract_iter(page, '<li>\n<a href="', '"'))
|
||||
|
||||
|
||||
class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 Mike Fährmann
|
||||
# Copyright 2016-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -13,19 +13,6 @@ from .. import text
|
||||
import json
|
||||
|
||||
|
||||
class HentaicdnMangaExtractor(Extractor):
|
||||
"""Base class for extractors for mangas"""
|
||||
subcategory = "manga"
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for chapter in reversed(list(self.get_chapters())):
|
||||
yield Message.Queue, chapter
|
||||
|
||||
def get_chapters(self):
|
||||
"""Return a list of all chapter urls"""
|
||||
|
||||
|
||||
class HentaicdnChapterExtractor(Extractor):
|
||||
"""Base class for extractors for a single manga chapter"""
|
||||
subcategory = "chapter"
|
||||
|
@ -6,17 +6,19 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extract hentaimanga from https://hentaihere.com/"""
|
||||
"""Extract hentai-manga from https://hentaihere.com/"""
|
||||
|
||||
from .common import MangaExtractor
|
||||
from .. import text
|
||||
from . import hentaicdn
|
||||
import re
|
||||
|
||||
|
||||
class HentaihereMangaExtractor(hentaicdn.HentaicdnMangaExtractor):
|
||||
"""Extractor for mangas from hentaihere.com"""
|
||||
class HentaihereMangaExtractor(MangaExtractor):
|
||||
"""Extractor for hmanga from hentaihere.com"""
|
||||
category = "hentaihere"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/?$"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(hentaihere\.com/m/S\d+)/?$"]
|
||||
scheme = "https"
|
||||
test = [
|
||||
("https://hentaihere.com/m/S13812", {
|
||||
"url": "d1ba6e28bb2162e844f8559c2b2725ba0a093559",
|
||||
@ -26,15 +28,10 @@ class HentaihereMangaExtractor(hentaicdn.HentaicdnMangaExtractor):
|
||||
}),
|
||||
]
|
||||
|
||||
def __init__(self, match):
|
||||
hentaicdn.HentaicdnMangaExtractor.__init__(self)
|
||||
self.gid = match.group(1)
|
||||
|
||||
def get_chapters(self):
|
||||
return text.extract_iter(
|
||||
self.request("https://hentaihere.com/m/S" + self.gid).text,
|
||||
'<li class="sub-chp clearfix">\n<a href="', '"'
|
||||
)
|
||||
def chapters(self, page):
|
||||
return list(text.extract_iter(
|
||||
page, '<li class="sub-chp clearfix">\n<a href="', '"'
|
||||
))
|
||||
|
||||
|
||||
class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 Mike Fährmann
|
||||
# Copyright 2016-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -29,13 +29,9 @@ class KisscomicComicExtractor(KisscomicExtractor,
|
||||
"url": "8c180e2ec2492712b089ca091c54909cb0fe3d4a",
|
||||
})]
|
||||
|
||||
def get_chapters(self):
|
||||
"""Return a list of all chapter urls"""
|
||||
page = self.request(self.url).text
|
||||
def chapter_paths(self, page):
|
||||
pos = page.find('<div class="list-chapter mCustomScrollbar">')
|
||||
return reversed(list(
|
||||
text.extract_iter(page, '<li><a href="', '"', pos)
|
||||
))
|
||||
return text.extract_iter(page, '<li><a href="', '"', pos)
|
||||
|
||||
|
||||
class KisscomicIssueExtractor(KisscomicExtractor,
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
"""Extract manga-chapters and entire manga from http://kissmanga.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, MangaExtractor, Message
|
||||
from .. import text, cloudflare, aes
|
||||
from ..cache import cache
|
||||
import re
|
||||
@ -38,25 +38,15 @@ class KissmangaExtractor(Extractor):
|
||||
request = cloudflare.request_func
|
||||
|
||||
|
||||
class KissmangaMangaExtractor(KissmangaExtractor):
|
||||
"""Extractor for mangas from kissmanga.com"""
|
||||
subcategory = "manga"
|
||||
class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor):
|
||||
"""Extractor for manga from kissmanga.com"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/[^/]+/?$"]
|
||||
test = [("http://kissmanga.com/Manga/Dropout", {
|
||||
"url": "992befdd64e178fe5af67de53f8b510860d968ca",
|
||||
})]
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for chapter in self.get_chapters():
|
||||
yield Message.Queue, self.root + chapter
|
||||
|
||||
def get_chapters(self):
|
||||
"""Return a list of all chapter urls"""
|
||||
page = self.request(self.url).text
|
||||
return reversed(list(
|
||||
text.extract_iter(page, '<td>\n<a href="', '"')
|
||||
))
|
||||
def chapter_paths(self, page):
|
||||
return text.extract_iter(page, '<td>\n<a href="', '"')
|
||||
|
||||
|
||||
class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 Mike Fährmann
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -8,36 +8,24 @@
|
||||
|
||||
"""Extract manga-chapters and entire manga from http://www.mangahere.co/"""
|
||||
|
||||
from .common import Extractor, AsynchronousExtractor, Message
|
||||
from .common import MangaExtractor, AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
import re
|
||||
|
||||
|
||||
class MangahereMangaExtractor(Extractor):
|
||||
"""Extractor for mangas from mangahere.co"""
|
||||
class MangahereMangaExtractor(MangaExtractor):
|
||||
"""Extractor for manga from mangahere.co"""
|
||||
category = "mangahere"
|
||||
subcategory = "manga"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangahere\.co/manga/([^/]+)/?$"]
|
||||
pattern = [r"(?:https?://)?((?:www\.)?mangahere\.co/manga/[^/]+/?)$"]
|
||||
test = [("http://www.mangahere.co/manga/aria/", {
|
||||
"url": "77d96842292a6a341e8937816ed45cc09b538cf0",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.url = match.group(0) + "/"
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for chapter in self.get_chapters():
|
||||
yield Message.Queue, chapter
|
||||
|
||||
def get_chapters(self):
|
||||
"""Return a list of all chapter urls"""
|
||||
page = self.request(self.url).text
|
||||
return reversed(list(text.extract_iter(
|
||||
def chapters(self, page):
|
||||
return list(text.extract_iter(
|
||||
page, '<a class="color_0077" href="', '"',
|
||||
page.index('<div class="detail_list">')
|
||||
)))
|
||||
))
|
||||
|
||||
|
||||
class MangahereChapterExtractor(AsynchronousExtractor):
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 Mike Fährmann
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -14,13 +14,12 @@ from .mangareader import MangareaderMangaExtractor, MangareaderChapterExtractor
|
||||
class MangapandaBase():
|
||||
"""Base class for mangapanda extractors"""
|
||||
category = "mangapanda"
|
||||
url_base = "http://www.mangapanda.com"
|
||||
root = "http://www.mangapanda.com"
|
||||
|
||||
|
||||
class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
|
||||
"""Extractor for mangas from mangapanda.com"""
|
||||
subcategory = "manga"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/]+)$"]
|
||||
"""Extractor for manga from mangapanda.com"""
|
||||
pattern = [r"(?:https?://)?((?:www\.)?mangapanda\.com/[^/]+)$"]
|
||||
test = [("http://www.mangapanda.com/mushishi", {
|
||||
"url": "50a1ba730b85426b904da256c80f68ba6a8a2566",
|
||||
})]
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015, 2016 Mike Fährmann
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -8,37 +8,23 @@
|
||||
|
||||
"""Extract manga-chapters and entire manga from http://mangapark.me/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, MangaExtractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
class MangaparkMangaExtractor(Extractor):
|
||||
"""Extractor for mangas from mangapark.me"""
|
||||
class MangaparkMangaExtractor(MangaExtractor):
|
||||
"""Extractor for manga from mangapark.me"""
|
||||
category = "mangapark"
|
||||
subcategory = "manga"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangapark\.me/manga/([^/]+)$"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(mangapark\.me/manga/[^/]+)$"]
|
||||
root = "http://mangapark.me"
|
||||
test = [("http://mangapark.me/manga/mushishi", {
|
||||
"url": "9902e342af71af19a5ac20fcd01950b165acf119",
|
||||
})]
|
||||
url_base = "http://mangapark.me"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.url_title = match.group(1)
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for chapter in self.get_chapters():
|
||||
yield Message.Queue, self.url_base + chapter
|
||||
|
||||
def get_chapters(self):
|
||||
"""Return a list of all chapter urls"""
|
||||
page = self.request(self.url_base + "/manga/" + self.url_title).text
|
||||
def chapter_paths(self, page):
|
||||
needle = '<a class="ch sts sts_1" target="_blank" href="'
|
||||
pos = page.index('<div id="list" class="book-list">')
|
||||
return reversed(list(
|
||||
text.extract_iter(page, needle, '"', pos)
|
||||
))
|
||||
return text.extract_iter(page, needle, '"', pos)
|
||||
|
||||
|
||||
class MangaparkChapterExtractor(Extractor):
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 Mike Fährmann
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -8,7 +8,7 @@
|
||||
|
||||
"""Extract manga-chapters and entire manga from http://www.mangareader.net/"""
|
||||
|
||||
from .common import AsynchronousExtractor, Extractor, Message
|
||||
from .common import AsynchronousExtractor, MangaExtractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
@ -17,29 +17,21 @@ class MangareaderBase():
|
||||
category = "mangareader"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||
url_base = "http://www.mangareader.net"
|
||||
root = "http://www.mangareader.net"
|
||||
|
||||
|
||||
class MangareaderMangaExtractor(MangareaderBase, Extractor):
|
||||
"""Extractor for mangas from mangareader.net"""
|
||||
subcategory = "manga"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/]+)$"]
|
||||
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
"""Extractor for manga from mangareader.net"""
|
||||
pattern = [r"(?:https?://)?((?:www\.)?mangareader\.net/[^/]+)$"]
|
||||
reverse = False
|
||||
test = [("http://www.mangareader.net/mushishi", {
|
||||
"url": "249042420b67a07b32e7f6be4c7410b6d810b808",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.url_title = match.group(1)
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
url = self.url_base + self.url_title
|
||||
page = self.request(url).text
|
||||
needle = '<a href="' + self.url_title
|
||||
def chapter_paths(self, page):
|
||||
needle = '<div class="chico_manga"></div>\n<a href="'
|
||||
pos = page.index('<div id="readmangasum">')
|
||||
for chapter in text.extract_iter(page, needle, '"', pos):
|
||||
yield Message.Queue, url + chapter
|
||||
return text.extract_iter(page, needle, '"', pos)
|
||||
|
||||
|
||||
class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
@ -61,7 +53,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
self.part, self.url_title, self.chapter = match.groups()
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.url_base + self.part).text
|
||||
page = self.request(self.root + self.part).text
|
||||
data = self.get_job_metadata(page)
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
@ -75,7 +67,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
|
||||
def get_job_metadata(self, chapter_page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
page = self.request(self.url_base + self.url_title).text
|
||||
page = self.request(self.root + self.url_title).text
|
||||
data = {
|
||||
"chapter": self.chapter,
|
||||
"lang": "en",
|
||||
@ -119,7 +111,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
width , pos = extr(page, '<img id="img" width="', '"', pos)
|
||||
height, pos = extr(page, ' height="', '"', pos)
|
||||
image, pos = extr(page, ' src="', '"', pos)
|
||||
return self.url_base + url, image, text.nameext_from_url(image, {
|
||||
return self.root + url, image, text.nameext_from_url(image, {
|
||||
"width": width,
|
||||
"height": height,
|
||||
})
|
||||
|
@ -30,12 +30,8 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor,
|
||||
"url": "c5a530538a30b176916e30cbe223a93d83cb2691",
|
||||
})]
|
||||
|
||||
def get_chapters(self):
|
||||
"""Return a list of all chapter urls"""
|
||||
page = self.request(self.url).text
|
||||
return reversed(list(
|
||||
text.extract_iter(page, ' <li><a href="', '"')
|
||||
))
|
||||
def chapter_paths(self, page):
|
||||
return text.extract_iter(page, ' <li><a href="', '"')
|
||||
|
||||
|
||||
class ReadcomiconlineIssueExtractor(ReadcomiconlineExtractor,
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 Mike Fährmann
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -8,36 +8,25 @@
|
||||
|
||||
"""Extract manga pages from http://www.thespectrum.net/manga_scans/"""
|
||||
|
||||
from .common import Extractor, AsynchronousExtractor, Message
|
||||
from .common import MangaExtractor, AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
class SpectrumnexusMangaExtractor(Extractor):
|
||||
"""Extractor for mangas from thespectrum.net"""
|
||||
class SpectrumnexusMangaExtractor(MangaExtractor):
|
||||
"""Extractor for manga from thespectrum.net"""
|
||||
category = "spectrumnexus"
|
||||
subcategory = "manga"
|
||||
pattern = [r"(?:https?://)?view\.thespectrum\.net/series/([^\.]+)\.html$"]
|
||||
pattern = [r"(?:https?://)?(view\.thespectrum\.net/series/[^.]+\.html)#?$"]
|
||||
reverse = False
|
||||
test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", {
|
||||
"url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb",
|
||||
})]
|
||||
url_base = "http://view.thespectrum.net/series/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.url = self.url_base + match.group(1) + ".html"
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for chapter in self.get_chapters():
|
||||
yield Message.Queue, self.url + "?ch=" + chapter.replace(" ", "+")
|
||||
|
||||
def get_chapters(self):
|
||||
"""Return a list of all chapter identifiers"""
|
||||
page = self.request(self.url).text
|
||||
page = text.extract(
|
||||
page, '<select class="selectchapter"', '</select>'
|
||||
)[0]
|
||||
return text.extract_iter(page, '<option value="', '"')
|
||||
def chapters(self, page):
|
||||
page = text.extract(page, 'class="selectchapter"', '</select>')[0]
|
||||
return [
|
||||
self.url + "?ch=" + chapter.replace(" ", "+")
|
||||
for chapter in text.extract_iter(page, '<option value="', '"')
|
||||
]
|
||||
|
||||
|
||||
class SpectrumnexusChapterExtractor(AsynchronousExtractor):
|
||||
|
Loading…
x
Reference in New Issue
Block a user