allow running a BaseExtractor for any URL

by prefixing it with '<base-category>:' For example: shopify:https://partakefoods.com/products/crunchy-cookie-variety-pack gelbooru_v01:https://5naf.booru.org/index.php?page=post&s=view&id=46963 Available base categories are: mastodon, shopify, moebooru, gelbooru_v01, gelbooru_v02, reactor, foolslide, foolfuuka, philomena
2021-12-14 22:58:38 +01:00 · 2021-12-14 22:58:38 +01:00 · ad30653b17
commit ad30653b17
parent 299bd2f1f5
1 changed files with 9 additions and 2 deletions
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@ -571,7 +571,11 @@ class BaseExtractor(Extractor):
        if not self.category:
            for index, group in enumerate(match.groups()):
                if group is not None:
-                    self.category, self.root = self.instances[index]
+                    if index:
+                        self.category, self.root = self.instances[index-1]
+                    else:
+                        self.root = group
+                        self.category = group.partition("://")[2]
                    break
        Extractor.__init__(self, match)

@ -594,7 +598,10 @@ class BaseExtractor(Extractor):
                pattern = re.escape(root[root.index(":") + 3:])
            pattern_list.append(pattern + "()")

-        return r"(?:https?://)?(?:" + "|".join(pattern_list) + r")"
+        return (
+            r"(?:" + cls.basecategory + r":(https?://[^/?#]+)|"
+            r"(?:https?://)?(?:" + "|".join(pattern_list) + r"))"
+        )


 class HTTPSAdapter(HTTPAdapter):