allow running a BaseExtractor for any URL

by prefixing it with '<base-category>:'

For example:
  shopify:https://partakefoods.com/products/crunchy-cookie-variety-pack
  gelbooru_v01:https://5naf.booru.org/index.php?page=post&s=view&id=46963

Available base categories are:
  mastodon, shopify, moebooru, gelbooru_v01, gelbooru_v02,
  reactor, foolslide, foolfuuka,  philomena
This commit is contained in:
Mike Fährmann 2021-12-14 22:58:38 +01:00
parent 299bd2f1f5
commit ad30653b17
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -571,7 +571,11 @@ class BaseExtractor(Extractor):
if not self.category:
for index, group in enumerate(match.groups()):
if group is not None:
self.category, self.root = self.instances[index]
if index:
self.category, self.root = self.instances[index-1]
else:
self.root = group
self.category = group.partition("://")[2]
break
Extractor.__init__(self, match)
@ -594,7 +598,10 @@ class BaseExtractor(Extractor):
pattern = re.escape(root[root.index(":") + 3:])
pattern_list.append(pattern + "()")
return r"(?:https?://)?(?:" + "|".join(pattern_list) + r")"
return (
r"(?:" + cls.basecategory + r":(https?://[^/?#]+)|"
r"(?:https?://)?(?:" + "|".join(pattern_list) + r"))"
)
class HTTPSAdapter(HTTPAdapter):