rewrite extractors to use config-module

2015-10-05 15:35:48 +02:00 · 2015-10-05 15:35:48 +02:00 · 3c13548f29
commit 3c13548f29
parent 608d3193a9
19 changed files with 54 additions and 56 deletions
--- a/gallery_dl/download.py
+++ b/gallery_dl/download.py
@ -112,13 +112,11 @@ class DownloadJob():
        scheme = url[:pos] if pos != -1 else "http"
        if scheme == "https":
            scheme = "http"
-
        downloader = self.downloaders.get(scheme)
        if downloader is None:
            module = self.mngr.get_downloader_module(scheme)
            downloader = module.Downloader()
            self.downloaders[scheme] = downloader
-
        return downloader

    @staticmethod
@ -148,7 +146,7 @@ class ExtractorFinder():
        if match:
            module = importlib.import_module(".extractor." + name, __package__)
            klass = getattr(module, module.info["extractor"])
-            return klass(match, {}), module.info
+            return klass(match), module.info
        else:
            print("no suitable extractor found")
            return None, None
@ -158,9 +156,9 @@ class ExtractorFinder():
        for category in config.get(("extractor",)):
            patterns = config.get(("extractor", category, "pattern"), default=[])
            for pattern in patterns:
-                    match = re.match(pattern, url)
-                    if match:
-                        return category, match
+                match = re.match(pattern, url)
+                if match:
+                    return category, match
        for category, info in self.extractor_metadata():
            for pattern in info["pattern"]:
                match = re.match(pattern, url)
--- a/gallery_dl/extractor/3dbooru.py
+++ b/gallery_dl/extractor/3dbooru.py
@ -22,8 +22,8 @@ info = {

 class ThreeDeeBooruExtractor(JSONBooruExtractor):

-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
        self.api_url = "http://behoimi.org/post/index.json"
        self.headers = {
            "Referer": "http://behoimi.org/post/show/",
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@ -25,8 +25,8 @@ class FourChanExtractor(ChanExtractor):
    api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
    file_url = "https://i.4cdn.org/{board}/{tim}{ext}"

-    def __init__(self, match, config):
+    def __init__(self, match):
        ChanExtractor.__init__(
-            self, config, info["category"],
+            self, info["category"],
            match.group(1), match.group(2)
        )
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@ -25,8 +25,8 @@ class InfinityChanExtractor(ChanExtractor):
    api_url = "https://8ch.net/{board}/res/{thread}.json"
    file_url = "https://media.8ch.net/{board}/src/{tim}{ext}"

-    def __init__(self, match, config):
+    def __init__(self, match):
        ChanExtractor.__init__(
-            self, config, info["category"],
+            self, info["category"],
            match.group(1), match.group(2)
        )
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@ -27,8 +27,8 @@ class BatotoExtractor(AsynchronousExtractor):

    url_base = "http://bato.to/read/_/"

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.chapter_id = match.group(1)

    def items(self):
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@ -19,8 +19,8 @@ class BooruExtractor(SequentialExtractor):

    api_url = ""

-    def __init__(self, match, config, info):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, match, info):
+        SequentialExtractor.__init__(self)
        self.info = info
        self.tags = text.unquote(match.group(1))
        self.page = "page"
--- a/gallery_dl/extractor/chan.py
+++ b/gallery_dl/extractor/chan.py
@ -10,15 +10,14 @@

 from .common import SequentialExtractor, Message
 from .. import text
-import re

 class ChanExtractor(SequentialExtractor):

    api_url = ""
    file_url = ""

-    def __init__(self, config, category, board, thread):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, category, board, thread):
+        SequentialExtractor.__init__(self)
        self.metadata = {
            "category": category,
            "board": board,
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@ -12,7 +12,7 @@ import time
 import queue
 import requests
 import threading
-import html.parser
+from .. import config


 class Message():
@ -47,15 +47,15 @@ class Extractor():

 class SequentialExtractor(Extractor):

-    def __init__(self, _):
+    def __init__(self):
        Extractor.__init__(self)


 class AsynchronousExtractor(Extractor):

-    def __init__(self, config):
+    def __init__(self):
        Extractor.__init__(self)
-        queue_size = int(config.get("general", "queue-size", fallback=5))
+        queue_size = int(config.get(("queue-size",), default=5))
        self.__queue = queue.Queue(maxsize=queue_size)
        self.__thread = threading.Thread(target=self.async_items, daemon=True)

--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@ -22,6 +22,6 @@ info = {

 class DanbooruExtractor(JSONBooruExtractor):

-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
        self.api_url = "https://danbooru.donmai.us/posts.json"
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@ -23,6 +23,6 @@ info = {

 class E621Extractor(JSONBooruExtractor):

-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
        self.api_url = "https://e621.net/post/index.json"
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@ -22,8 +22,8 @@ info = {

 class GelbooruExtractor(XMLBooruExtractor):

-    def __init__(self, match, config):
-        XMLBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        XMLBooruExtractor.__init__(self, match, info)
        self.api_url = "http://gelbooru.com/"
        self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}

--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@ -25,8 +25,8 @@ class ImagebamExtractor(AsynchronousExtractor):

    url_base = "http://www.imagebam.com"

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.match = match
        self.num = 0
        self.metadata = {}
--- a/gallery_dl/extractor/imgbox.py
+++ b/gallery_dl/extractor/imgbox.py
@ -26,8 +26,8 @@ class ImgboxExtractor(AsynchronousExtractor):

    url_base = "http://imgbox.com"

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.key = match.group(1)
        self.metadata = {}

--- a/gallery_dl/extractor/imgchili.py
+++ b/gallery_dl/extractor/imgchili.py
@ -24,8 +24,8 @@ info = {

 class ImgchiliExtractor(SequentialExtractor):

-    def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, match):
+        SequentialExtractor.__init__(self)
        self.match = match
        self.num = 0

--- a/gallery_dl/extractor/mangareader.py
+++ b/gallery_dl/extractor/mangareader.py
@ -28,8 +28,8 @@ class MangaReaderExtractor(AsynchronousExtractor):

    url_base = "http://www.mangareader.net"

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.part = match.group(1)

    def items(self):
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@ -9,7 +9,7 @@
 """Extract images from https://nijie.info/"""

 from .common import AsynchronousExtractor, Message
-from ..text  import filename_from_url
+from .. import config, text
 import re

 info = {
@ -26,8 +26,8 @@ class NijieExtractor(AsynchronousExtractor):

    popup_url = "https://nijie.info/view_popup.php?id="

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.artist_id = match.group(1)
        self.artist_url = (
            "https://nijie.info/members_illust.php?id="
@ -36,7 +36,9 @@ class NijieExtractor(AsynchronousExtractor):
        self.session.headers["Referer"] = self.artist_url
        self.session.cookies["R18"] = "1"
        self.session.cookies["nijie_referer"] = "nijie.info"
-        self.session.cookies.update(config["nijie-cookies"])
+        self.session.cookies.update(
+            config.get(("extractor", info["category"], "cookies"))
+        )

    def items(self):
        data = self.get_job_metadata()
@ -56,19 +58,19 @@ class NijieExtractor(AsynchronousExtractor):

    def get_image_ids(self):
        """Collect all image-ids for a specific artist"""
-        text = self.request(self.artist_url).text
+        page = self.request(self.artist_url).text
        regex = r'<a href="/view\.php\?id=(\d+)"'
-        return [m.group(1) for m in re.finditer(regex, text)]
+        return [m.group(1) for m in re.finditer(regex, page)]

    def get_image_data(self, image_id):
        """Get URL and metadata for images specified by 'image_id'"""
-        text = self.request(self.popup_url + image_id).text
-        matches = re.findall('<img src="([^"]+)"', text)
+        page = self.request(self.popup_url + image_id).text
+        matches = re.findall('<img src="([^"]+)"', page)
        for index, url in enumerate(matches):
            yield "https:" + url, {
                "count": len(matches),
                "index": index,
                "image-id": image_id,
-                "name" : filename_from_url(url),
+                "name" : text.filename_from_url(url),
                "extension": url[url.rfind(".")+1:],
            }
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@ -9,7 +9,7 @@
 """Extract images and ugoira from http://www.pixiv.net/"""

 from .common import SequentialExtractor, Message
-from .. import text
+from .. import config, text
 import re
 import json

@ -29,16 +29,15 @@ class PixivExtractor(SequentialExtractor):
    member_url = "http://www.pixiv.net/member_illust.php"
    illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"

-    def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
-        self.config = config
+    def __init__(self, match):
+        SequentialExtractor.__init__(self)
        self.artist_id = match.group(1)
        self.api = PixivAPI(self.session)

    def items(self):
        self.api.login(
-            self.config.get("pixiv", "username"),
-            self.config.get("pixiv", "password"),
+            config.get(("extractor", "pixiv", "username")),
+            config.get(("extractor", "pixiv", "password")),
        )
        metadata = self.get_job_metadata()

--- a/gallery_dl/extractor/redhawkscans.py
+++ b/gallery_dl/extractor/redhawkscans.py
@ -28,8 +28,8 @@ class RedHawkScansExtractor(SequentialExtractor):

    url_base = "https://manga.redhawkscans.com/reader/read/"

-    def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, match):
+        SequentialExtractor.__init__(self)
        self.part = match.group(1)

    def items(self):
--- a/gallery_dl/extractor/yandere.py
+++ b/gallery_dl/extractor/yandere.py
@ -22,6 +22,6 @@ info = {

 class YandereExtractor(JSONBooruExtractor):

-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
        self.api_url = "https://yande.re/post.json"