more extractor test-cases

This commit is contained in:
Mike Fährmann 2015-12-14 03:00:58 +01:00
parent a99fdb0d1e
commit ba99506c72
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
16 changed files with 136 additions and 2 deletions

View File

@ -23,13 +23,25 @@ class ThreeDeeBooruTagExtractor(ThreeDeeBooruExtractor, booru.BooruTagExtractor)
"""Extract images from 3dbooru based on search-tags"""
subcategory = "tag"
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+)"]
test = [("http://behoimi.org/post/index?tags=himekawa_azuru", {
"url": "6f6f485fb92629bc22a5df18b7cbb3ac13ae46b2",
"keyword": "df2f7b1fb0efb739eed55788de700a1b834d2896",
})]
class ThreeDeeBooruPoolExtractor(ThreeDeeBooruExtractor, booru.BooruPoolExtractor):
"""Extract image-pools from 3dbooru"""
subcategory = "pool"
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"]
test = [("http://behoimi.org/pool/show/27", {
"url": "da75d2d1475449d5ef0c266cb612683b110a30f2",
"keyword": "ea76fd6cef4430bee403d080bf173af829a4390a",
})]
class ThreeDeeBooruPostExtractor(ThreeDeeBooruExtractor, booru.BooruPostExtractor):
"""Extract single images from 3dbooru"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"]
test = [("http://behoimi.org/post/show/140852", {
"url": "ce874ea26f01d6c94795f3cc3aaaaa9bc325f2f6",
"keyword": "ebde54ed04e1de7d1fd819728963f754b77a693e",
})]

View File

@ -21,7 +21,6 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"]
test = [("https://danbooru.donmai.us/posts?tags=heath_ledger", {
"url": "a261c33f117c7395f0eac54091075e67c8e66fca",
"keyword": "fc4685c98aedaf2383384d47af4f7bd257c40f32",
})]
class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):

View File

@ -22,13 +22,25 @@ class E621TagExtractor(E621Extractor, booru.BooruTagExtractor):
r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)",
r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+)",
]
test = [("https://e621.net/post/index/1/anry", {
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
"keyword": "573152cda6e193f57c9042fcfc561c45865ef0c2",
})]
class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor):
"""Extract image-pools from e621"""
subcategory = "pool"
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(\d+)"]
test = [("https://e621.net/pool/show/73", {
"url": "842f2fb065c7c339486a9b1d689020b8569888ed",
"keyword": "c0c7cc1e7721607dc6a94e052664c14985b1d404",
})]
class E621PostExtractor(E621Extractor, booru.BooruPostExtractor):
"""Extract single images from e621"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(\d+)"]
test = [("https://e621.net/post/show/535", {
"url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
"keyword": "f2309ce2bf1f7fb9403756a2789f71738ca71231",
})]

View File

@ -33,6 +33,10 @@ class GelbooruTagExtractor(GelbooruExtractor, booru.BooruTagExtractor):
"""Extract images from gelbooru based on search-tags"""
subcategory = "tag"
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=list&tags=([^&]+)"]
test = [("http://gelbooru.com/index.php?page=post&s=list&tags=heath_ledger", {
"url": "907cd80f0c1635b96eeb4b970c19f5ad9ab93414",
"keyword": "71a22b7434d326a44580bc933221c2c50fed8d4a",
})]
# TODO: find out how to access pools via gelbooru-api
# class GelbooruPoolExtractor(GelbooruExtractor, booru.BooruPoolExtractor):
@ -44,3 +48,6 @@ class GelbooruPostExtractor(GelbooruExtractor, booru.BooruPostExtractor):
"""Extract single images from gelbooru"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=view&id=(\d+)"]
test = [("http://gelbooru.com/index.php?page=post&s=view&id=313638", {
"url": "9154c1edad734f0bacd2445c5b7540804b59f2ef",
})]

View File

@ -21,6 +21,10 @@ class HentaiFoundryUserExtractor(Extractor):
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)/?$",
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile",
]
test = [("http://www.hentai-foundry.com/pictures/user/Orzy", {
"url": "236ac02c8f081fee44ad2c2571bf74615633b91e",
"keyword": "f5f1aa78ecbe390fb117a0b599f771cd47df86c6",
})]
url_base = "http://www.hentai-foundry.com/pictures/user/"
def __init__(self, match):
@ -109,6 +113,10 @@ class HentaiFoundryImageExtractor(Extractor):
filename_fmt = "{category}_{index}_{title}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/"
r"([^/]+)/(\d+)/[^/]+")]
test = [("http://www.hentai-foundry.com/pictures/user/Orzy/76940/Youmu-Konpaku", {
"url": "50c267b2b2983b98b18fd0d2acbec8ce5ba64c77",
"keyword": "8c9b7054b78fb4f52982c3f21f3ba2a9fcdd5428",
})]
def __init__(self, match):
Extractor.__init__(self)

View File

@ -18,6 +18,10 @@ class HitomiExtractor(Extractor):
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{num:>03}_{name}.{extension}"
pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"]
test = [("http://hitomi.la/galleries/867789.html", {
"url": "23fd59894c3db65aec826aa5efb85f96d2384883",
"keyword": "80395a06b6ba24842c15121d142830bb467ae68b",
})]
def __init__(self, match):
Extractor.__init__(self)

View File

@ -19,13 +19,25 @@ class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor):
"""Extract images from konachan based on search-tags"""
subcategory = "tag"
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+)"]
test = [("http://konachan.com/post?tags=batman_(series)", {
"url": "3bc7d258f74854002028ae861f2977835a022454",
"keyword": "e5c5767a0d3968be5465b1d00817467bf9fac1b1",
})]
class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor):
"""Extract image-pools from konachan"""
subcategory = "pool"
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/pool/show/(\d+)"]
test = [("http://konachan.com/pool/show/5", {
"url": "27f0b7bc60bb8961612005b53c8d46cf76272003",
"keyword": "9d1eba1c4adbf751f4b5dac2f79eb4dbec1ca577",
})]
class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor):
"""Extract single images from konachan"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/show/(\d+)"]
test = [("http://konachan.com/post/show/141341", {
"url": "3bc7d258f74854002028ae861f2977835a022454",
"keyword": "df1ce9be720e335f68eca1a53d3df6cf727b6372",
})]

View File

@ -17,6 +17,9 @@ class MangaHereMangaExtractor(Extractor):
category = "mangahere"
subcategory = "manga"
pattern = [r"(?:https?://)?(?:www\.)?mangahere\.co/manga/([^/]+)/?$"]
test = [("http://www.mangahere.co/manga/aria/", {
"url": "77d96842292a6a341e8937816ed45cc09b538cf0",
})]
def __init__(self, match):
Extractor.__init__(self)
@ -44,6 +47,10 @@ class MangaHereChapterExtractor(AsynchronousExtractor):
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.)?mangahere\.co/manga/"
r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")]
test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", {
"url": "c807532e919af7600fe0ef21fb89c5062637dd87",
"keyword": "f342e3df9fa39eb10cf7ba5ef3300df6ad77f332",
})]
url_fmt = "http://www.mangahere.co/manga/{}/{}.html"
def __init__(self, match):

View File

@ -17,6 +17,9 @@ class MangaMintMangaExtractor(Extractor):
category = "mangamint"
subcategory = "manga"
pattern = [r"(?:https?://)?(?:www\.)?mangamint\.com(/manga/[^\?]+-manga)"]
test = [("www.mangamint.com/manga/mushishi-manga", {
"url": "df7a1f4224d23e392ec09d4c7bbd4fbc873327d0",
})]
url_base = "https://www.mangamint.com"
def __init__(self, match):
@ -50,6 +53,10 @@ class MangaMintChapterExtractor(Extractor):
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"]
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?mangamint\.com/([^\?]+-(\d+))"]
test = [("http://www.mangamint.com/mushishi-1", {
"url": "eb1669d7043a17b79ccc92454c2676200628800c",
"keyword": "ca4ba6fa84367fd7c345879a17ebaad39b589da5",
})]
def __init__(self, match):
Extractor.__init__(self)

View File

@ -20,7 +20,9 @@ class MangaPandaMangaExtractor(MangaPandaBase, MangaReaderMangaExtractor):
"""Extract all manga-chapters from mangapanda"""
subcategory = "manga"
pattern = [r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/]+)$"]
test = [("http://www.mangapanda.com/mushishi", {
"url": "50a1ba730b85426b904da256c80f68ba6a8a2566",
})]
class MangaPandaChapterExtractor(MangaPandaBase, MangaReaderChapterExtractor):
"""Extract a single manga-chapter from mangapanda"""
@ -29,3 +31,7 @@ class MangaPandaChapterExtractor(MangaPandaBase, MangaReaderChapterExtractor):
r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))",
r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
]
test = [("http://www.mangapanda.com/red-storm/2", {
"url": "4bf4ddf6c50105ec8a37675495ab80c46608275d",
"keyword": "dcb8d655e3f461738c821819bbb8d017bd916713",
})]

View File

@ -16,6 +16,9 @@ class MangaparkMangaExtractor(Extractor):
category = "mangapark"
subcategory = "manga"
pattern = [r"(?:https?://)?(?:www\.)?mangapark\.me/manga/([^/]+)$"]
test = [("http://mangapark.me/manga/mushishi", {
"url": "9902e342af71af19a5ac20fcd01950b165acf119",
})]
url_base = "http://mangapark.me"
def __init__(self, match):
@ -45,6 +48,16 @@ class MangaparkChapterExtractor(Extractor):
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.)?mangapark\.me/manga/"
r"([^/]+/s(\d+)(?:/v(\d+))?/c(\d+)(?:(\.\d+)|/e(\d+))?)")]
test = [
("http://mangapark.me/manga/ad-astra-per-aspera-hata-kenjirou/s1/c1.2/1", {
"url": "f325ce264df390c5ba9607c52a7e7b0829672404",
"keyword": "480a114319e42c561079ffe138afd67e22a74cd3",
}),
("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", {
"url": "8534c8286a18c4db47606f84a4df9f1a42bab291",
"keyword": "f96962442cdd5bc957603831c695159d974b7b93",
})
]
def __init__(self, match):
Extractor.__init__(self)

View File

@ -23,6 +23,9 @@ class MangaReaderMangaExtractor(MangaReaderBase, Extractor):
"""Extract all manga-chapters from mangareader"""
subcategory = "manga"
pattern = [r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/]+)$"]
test = [("http://www.mangareader.net/mushishi", {
"url": "249042420b67a07b32e7f6be4c7410b6d810b808",
})]
def __init__(self, match):
Extractor.__init__(self)
@ -45,6 +48,10 @@ class MangaReaderChapterExtractor(MangaReaderBase, AsynchronousExtractor):
r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))",
r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
]
test = [("http://www.mangareader.net/karate-shoukoushi-kohinata-minoru/11", {
"url": "84ffaab4c027ef9022695c53163c3aeabd07ca58",
"keyword": "0df7db81a44ef642922aab798c303d60e2b6802d",
})]
def __init__(self, match):
AsynchronousExtractor.__init__(self)

View File

@ -16,6 +16,9 @@ class MangaShareMangaExtractor(Extractor):
category = "mangashare"
subcategory = "manga"
pattern = [r"(?:https?://)?read\.mangashare\.com/[^/]+$"]
test = [("http://read.mangashare.com/Gantz", {
"url": "c3b9153d99200ddd2fae0194dad903ccb815e9e7",
})]
def __init__(self, match):
Extractor.__init__(self)
@ -41,6 +44,10 @@ class MangaShareChapterExtractor(AsynchronousExtractor):
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
pattern = [r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)"]
test = [("http://read.mangashare.com/Gantz/chapter-331/page001.html", {
"url": "2980fb9548e809dea63d104bc514dcc33bdd9ef7",
"keyword": "4872a5645ab79cb9ecf363a5bf4cb9062fd61eef",
})]
url_fmt = "http://read.mangashare.com/{}/page{:>03}.html"
def __init__(self, match):

View File

@ -21,6 +21,10 @@ class PixivUserExtractor(Extractor):
directory_fmt = ["{category}", "{artist-id}-{artist-nick}"]
filename_fmt = "{category}_{artist-id}_{id}{num}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)"]
test = [("http://www.pixiv.net/member_illust.php?id=173530", {
"url": "8f2fc0437e2095ab750c4340a4eba33ec6269477",
"keyword": "315d6fc710cddfecbe0bc030ff04930537af0ce7",
})]
member_url = "http://www.pixiv.net/member_illust.php"
illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"
@ -136,6 +140,10 @@ class PixivWorkExtractor(PixivUserExtractor):
subcategory = "work"
pattern = [(r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php"
r"\?(?:[^&]+&)*illust_id=(\d+)")]
test = [("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=966412", {
"url": "efb622f065b0871e92195e7bee0b4d75bd687d8d",
"keyword": "abc22b8d70c67e9884fdec8d851b57a9d29d5890",
})]
def __init__(self, match):
PixivUserExtractor.__init__(self, match)
@ -156,6 +164,10 @@ class PixivFavoriteExtractor(PixivUserExtractor):
subcategory = "favorite"
directory_fmt = ["{category}", "bookmarks", "{artist-id}-{artist-nick}"]
pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/bookmark\.php\?id=(\d+)"]
test = [("http://www.pixiv.net/bookmark.php?id=173530", {
"url": "0110c5c2ee9612a0362e26f7481a8916b6f410fe",
"keyword": "ebf15d8fe9ce99bff61a3a6d98418d898141d9a0",
})]
def __init__(self, match):
PixivUserExtractor.__init__(self, match)
@ -169,6 +181,7 @@ class PixivBookmarkExtractor(PixivFavoriteExtractor):
"""Extract all favorites/bookmarks of your own account"""
subcategory = "bookmark"
pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/bookmark\.php()$"]
test = []
def __init__(self, match):
PixivFavoriteExtractor.__init__(self, match)

View File

@ -29,8 +29,16 @@ class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor):
"""Extract images from safebooru based on search-tags"""
subcategory = "tag"
pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=list&tags=([^&]+)"]
test = [("http://safebooru.org/index.php?page=post&s=list&tags=heath_ledger", {
"url": "72f17ad6f8254595b56f7e5dd1947d8b51b1ba9b",
"keyword": "79670e1de47e39352fe71f482ece003cdf8e4512",
})]
class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor):
"""Extract single images from safebooru"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=view&id=(\d+)"]
test = [("http://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "bcb6047665729c7c9db243a27f41cbef9af1ecef",
"keyword": "e2d9a87a66d89eb68d3e3420075c3be3c7ca530a",
})]

View File

@ -19,13 +19,25 @@ class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor):
"""Extract images from yandere based on search-tags"""
subcategory = "tag"
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+)"]
test = [("https://yande.re/post?tags=yuuki_itsuka", {
"url": "a6df238d4657736eaae9840a0b6a68fb290aa6d5",
"keyword": "7699bf0fd1dad622c8806f6193fb79f12d40c138",
})]
class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
"""Extract image-pools from yandere"""
subcategory = "pool"
pattern = [r"(?:https?://)?(?:www\.)?yande.re/pool/show/(\d+)"]
test = [("https://yande.re/pool/show/12", {
"url": "07f32d2b70c3dc6b014597a49b9fa4e8c274989f",
"keyword": "963e9dacc8f4dd5f0bc46f2f187e94b71e35d950",
})]
class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor):
"""Extract single images from yandere"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:www\.)?yande.re/post/show/(\d+)"]
test = [("https://yande.re/post/show/298952", {
"url": "ce0c3c29ee968b45db4e4ed5ad3fe8e7ecfb2e33",
"keyword": "ddc1f3c071f4e87e80394982d35f384a12119ca6",
})]