implement extractor.add() and .add_module()

... as a public and non-hacky way to add (external) extractors to
gallery-dl's pool and make them available for extractor.find()
This commit is contained in:
Mike Fährmann 2018-02-02 00:01:41 +01:00
parent a34cebc253
commit 6a07e38366
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 23 additions and 11 deletions

View File

@ -103,6 +103,23 @@ def find(url):
return None
def add(klass):
"""Add 'klass' to the list of available extractors"""
for pattern in klass:
_cache.append((re.compile(pattern), klass))
def add_module(module):
"""Add all extractors in 'module' to the list of available extractors"""
tuples = [
(re.compile(pattern), klass)
for klass in _get_classes(module)
for pattern in klass.pattern
]
_cache.extend(tuples)
return tuples
def extractors():
"""Yield all available extractor classes"""
return sorted(
@ -139,14 +156,9 @@ def _list_patterns():
yield from _cache
for module_name in _module_iter:
module = importlib.import_module("."+module_name, __package__)
tuples = [
(re.compile(pattern), klass)
for klass in _get_classes(module)
for pattern in klass.pattern
]
_cache.extend(tuples)
yield from tuples
yield from add_module(
importlib.import_module("."+module_name, __package__)
)
def _get_classes(module):

View File

@ -26,6 +26,6 @@ class PowermangaMangaExtractor(foolslide.FoolslideMangaExtractor):
category = "powermanga"
pattern = foolslide.manga_pattern(r"read\.powermanga\.org")
test = [("https://read.powermanga.org/series/one_piece/", {
"url": "6ba226780a3c1c1f1cc5f4a4b96c18260f4ec0f3",
"keyword": "576109177b1bb59ab2f55450cc9ef4a31e28714c",
"url": "3b2037a9ffe30ea0da4e710a40863f0693f21afe",
"keyword": "e2a924b0924cba711e78b3585ad24a97dec70006",
})]

View File

@ -30,7 +30,7 @@ class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")]
test = [("http://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"content": "a01768c6f86f32eb7ebbdeb87c30b0d9968d7f97",
"pattern": r"https?://b?img\.rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"pattern": r"https?://(.?img\.)?rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 2,
})]