From 749fbbfa6cd2a3d2b2809fda426de91dbd5e7cf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 5 Mar 2018 18:37:21 +0100 Subject: [PATCH] [mangadex] add chapter- and manga-extractor --- CHANGELOG.md | 2 + docs/gallery-dl-example.conf | 136 ++++++++++++++++++++++++++++ docs/supportedsites.rst | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/mangadex.py | 148 +++++++++++++++++++++++++++++++ gallery_dl/util.py | 2 + gallery_dl/version.py | 2 +- test/test_extractors.py | 2 + 8 files changed, 293 insertions(+), 1 deletion(-) create mode 100644 docs/gallery-dl-example.conf create mode 100644 gallery_dl/extractor/mangadex.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ab8da7e..0d56b61a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +## Unreleased + ## 1.3.0 - 2018-03-02 - Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76)) - Added options to customize [archive ID formats](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorarchive-format) and [undefined replacement fields](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorkeywords-default) diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf new file mode 100644 index 00000000..6dc586be --- /dev/null +++ b/docs/gallery-dl-example.conf @@ -0,0 +1,136 @@ +{ + "base-directory": "/tmp/", + "netrc": false, + + "downloader": + { + "part": true, + "part-directory": null, + "http": + { + "rate": null, + "retries": 5, + "timeout": 30, + "verify": true + } + }, + "extractor": + { + "archive": null, + "proxy": null, + "skip": true, + "sleep": 0, + + "pixiv": + { + "user": + { + "directory": ["{category}", "{user[id]}"] + }, + "bookmark": + { + "directory": ["{category}", "my bookmarks"] + }, + "ugoira": true, + "username": null, + "password": null + }, + "batoto": + { + "username": null, + "password": null + }, + "exhentai": + { + "wait-min": 3, + "wait-max": 6, + "original": true, + "username": null, + "password": null, + "cookies": { + "igneous": null, + "s": null, + "yay": "louder" + } + }, + "nijie": + { + "username": null, + "password": null + }, + "sankaku": + { + "wait-min": 2, + "wait-max": 4, + "username": null, + "password": null + }, + "seiga": + { + "username": null, + "password": null + }, + "gelbooru": + { + "filename": "{category}_{id:>07}_{md5}.{extension}", + "api": true + }, + "reddit": + { + "refresh-token": null, + "comments": 500, + "morecomments": false, + "date-min": 0, + "date-max": 253402210800, + "date-format": "%Y-%m-%dT%H:%M:%S", + "id-min": "0", + "id-max": "ZIK0ZJ", + "recursion": 0 + }, + "flickr": + { + "access-token": null, + "access-token-secret": null, + "metadata": false, + "size-max": null + }, + "deviantart": + { + "refresh-token": null, + "flat": true, + "mature": true, + "original": true + }, + "gfycat": + { + "format": "mp4" + }, + "imgur": + { + "mp4": true + }, + "tumblr": + { + "posts": "photo", + "inline": false, + "reblogs": true, + "external": false + }, + "recursive": + { + "blacklist": ["directlink", "oauth", "recursive", "test"] + }, + "oauth": + { + "browser": true + } + }, + "output": + { + "mode": "auto", + "shorten": true, + "progress": true, + "logfile": null, + "unsupportedfile": null + } +} diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index dbbddcbe..b3180e78 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -47,6 +47,7 @@ Luscious https://luscious.net/ Albums Manga Fox http://fanfox.net/ Chapters Manga Here http://www.mangahere.co/ Chapters, Manga Manga Stream https://mangastream.com/ Chapters +Mangadex https://mangadex.org/ Chapters, Manga Mangapanda https://www.mangapanda.com/ Chapters, Manga MangaPark https://mangapark.me/ Chapters, Manga Mangareader https://www.mangareader.net/ Chapters, Manga diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 18819abf..b1b97d00 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -51,6 +51,7 @@ modules = [ "konachan", "loveisover", "luscious", + "mangadex", "mangafox", "mangahere", "mangapanda", diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py new file mode 100644 index 00000000..0b59b801 --- /dev/null +++ b/gallery_dl/extractor/mangadex.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract manga-chapters and entire manga from https://mangadex.org/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, util +from urllib.parse import urljoin +import json +import re + + +class MangadexExtractor(): + """Base class for mangadex extractors""" + category = "mangadex" + root = "https://mangadex.org" + + +class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor): + """Extractor for manga-chapters from mangadex.org""" + pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"] + test = [ + ("https://mangadex.org/chapter/122094", { + "keyword": "b4c83fe41f125eae745c2e00d29e087cc4eb78df", + "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", + }), + # oneshot + ("https://mangadex.org/chapter/138086", { + "count": 64, + "keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee", + }), + ] + + def __init__(self, match): + self.chapter_id = match.group(1) + url = self.root + "/chapter/" + self.chapter_id + ChapterExtractor.__init__(self, url) + + def get_metadata(self, page): + info , pos = text.extract(page, '="og:title" content="', '"') + manga_id, pos = text.extract(page, '/images/manga/', '.', pos) + _ , pos = text.extract(page, ' id="jump_group"', '', pos) + _ , pos = text.extract(page, ' selected ', '', pos) + language, ___ = text.extract(page, " title='", "'", pos-100) + group , pos = text.extract(page, '>', '<', pos) + + info = text.unescape(info) + match = re.match( + r"(?:(?:Vol\. (\d+) )?Ch\. (\d+)([^ ]*)|(.*)) " + r"\(([^)]+)\)", + info) + + return { + "manga": match.group(5), + "manga_id": util.safe_int(manga_id), + "volume": util.safe_int(match.group(1)), + "chapter": util.safe_int(match.group(2)), + "chapter_minor": match.group(3) or "", + "chapter_id": util.safe_int(self.chapter_id), + "chapter_string": info.rstrip(" - MangaDex"), + "group": text.unescape(group), + "lang": util.language_to_code(language), + "language": language, + } + + def get_images(self, page): + dataurl , pos = text.extract(page, "var dataurl = '", "'") + pagelist, pos = text.extract(page, "var page_array = [", "]", pos) + server , pos = text.extract(page, "var server = '", "'", pos) + + base = urljoin(self.root, server + dataurl + "/") + + return [ + (base + page, None) + for page in json.loads( + "[" + pagelist.replace("'", '"').rstrip(",") + "]" + ) + ] + + +class MangadexMangaExtractor(MangadexExtractor, MangaExtractor): + """Extractor for manga from mangadex.org""" + pattern = [r"(?:https?://)?(?:www\.)?(mangadex\.(?:org|com)/manga/\d+)"] + test = [ + ("https://mangadex.org/manga/2946/souten-no-koumori", { + "url": "9e77934759828458d0424473922e41f348719472", + "keywords": { + "manga": "Souten no Koumori", + "manga_id": 2946, + "title": "Oneshot", + "volume": int, + "chapter": int, + "chapter_minor": str, + "chapter_id": int, + "group": str, + "contributor": str, + "date": str, + "views": int, + "lang": str, + "language": str, + }, + }), + ] + + def chapters(self, page): + results = [] + extr = text.extract + + manga = text.unescape(extr( + page, '"og:title" content="', '"')[0].rpartition(" (")[0]) + manga_id = util.safe_int(extr( + page, '/images/manga/', '.')[0]) + + for info in text.extract_iter(page, ""): + chid , pos = extr(info, 'data-chapter-id="', '"') + chapter , pos = extr(info, 'data-chapter-num="', '"', pos) + volume , pos = extr(info, 'data-volume-num="', '"', pos) + title , pos = extr(info, 'data-chapter-name="', '"', pos) + language, pos = extr(info, " title='", "'", pos) + group , pos = extr(info, "", "", pos) + user , pos = extr(info, "", "", pos) + views , pos = extr(info, ">", "<", pos) + date , pos = extr(info, ' datetime="', '"', pos) + + chapter, sep, minor = chapter.partition(".") + + results.append((self.root + "/chapter/" + chid, { + "manga": manga, + "manga_id": util.safe_int(manga_id), + "title": text.unescape(title), + "volume": util.safe_int(volume), + "chapter": util.safe_int(chapter), + "chapter_minor": sep + minor, + "chapter_id": util.safe_int(chid), + "group": text.unescape(text.remove_html(group)), + "contributor": text.remove_html(user), + "views": util.safe_int(views), + "date": date, + "lang": util.language_to_code(language), + "language": language, + })) + + return results diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 059d8468..3a5a7ab3 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -155,6 +155,8 @@ def language_to_code(lang, default=None): CODES = { "ar": "Arabic", + "bg": "Bulgarian", + "ca": "Catalan", "cs": "Czech", "da": "Danish", "de": "German", diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 6d3630a8..af9570e8 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.3.0" +__version__ = "1.3.1-dev" diff --git a/test/test_extractors.py b/test/test_extractors.py index 80cd4f4e..28a90efa 100644 --- a/test/test_extractors.py +++ b/test/test_extractors.py @@ -18,7 +18,9 @@ SKIP = { "archivedmoe", "archiveofsins", "thebarchive", # temporary issues + "imgchili", "powermanga", + "pinterest", }