[mememuseum] add 'tag' and 'post' extractors (closes #2264)
This commit is contained in:
parent
e5f6af6e32
commit
79a461a2c1
@ -469,6 +469,12 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<td>Albums, Channels</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mememuseum</td>
|
||||
<td>https://meme.museum/</td>
|
||||
<td>Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>My Hentai Gallery</td>
|
||||
<td>https://myhentaigallery.com/</td>
|
||||
|
@ -81,6 +81,7 @@ modules = [
|
||||
"mangapark",
|
||||
"mangasee",
|
||||
"mangoxo",
|
||||
"mememuseum",
|
||||
"myhentaigallery",
|
||||
"myportfolio",
|
||||
"naver",
|
||||
|
120
gallery_dl/extractor/mememuseum.py
Normal file
120
gallery_dl/extractor/mememuseum.py
Normal file
@ -0,0 +1,120 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://meme.museum/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
class MememuseumExtractor(Extractor):
|
||||
"""Base class for meme.museum extractors"""
|
||||
basecategory = "booru"
|
||||
category = "mememuseum"
|
||||
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||
archive_fmt = "{id}"
|
||||
root = "https://meme.museum"
|
||||
|
||||
def items(self):
|
||||
data = self.metadata()
|
||||
|
||||
for post in self.posts():
|
||||
url = post["file_url"]
|
||||
for key in ("id", "width", "height"):
|
||||
post[key] = text.parse_int(post[key])
|
||||
post["tags"] = text.unquote(post["tags"])
|
||||
post.update(data)
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
def metadata(self):
|
||||
"""Return general metadata"""
|
||||
return ()
|
||||
|
||||
def posts(self):
|
||||
"""Return an iterable containing data of all relevant posts"""
|
||||
return ()
|
||||
|
||||
|
||||
class MememuseumTagExtractor(MememuseumExtractor):
|
||||
"""Extractor for images from meme.museum by search-tags"""
|
||||
subcategory = "tag"
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
pattern = r"(?:https?://)?meme\.museum/post/list/([^/?#]+)"
|
||||
test = ("https://meme.museum/post/list/animated/1", {
|
||||
"pattern": r"https://meme\.museum/_images/\w+/\d+%20-%20",
|
||||
"count": ">= 30"
|
||||
})
|
||||
per_page = 25
|
||||
|
||||
def __init__(self, match):
|
||||
MememuseumExtractor.__init__(self, match)
|
||||
self.tags = text.unquote(match.group(1))
|
||||
|
||||
def metadata(self):
|
||||
return {"search_tags": self.tags}
|
||||
|
||||
def posts(self):
|
||||
pnum = 1
|
||||
while True:
|
||||
url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
|
||||
extr = text.extract_from(self.request(url).text)
|
||||
|
||||
while True:
|
||||
mime = extr("data-mime='", "'")
|
||||
if not mime:
|
||||
break
|
||||
|
||||
pid = extr("data-post-id='", "'")
|
||||
tags, dimensions, size = extr("title='", "'").split(" // ")
|
||||
md5 = extr("/_thumbs/", "/")
|
||||
width, _, height = dimensions.partition("x")
|
||||
|
||||
yield {
|
||||
"file_url": "{}/_images/{}/{}%20-%20{}.{}".format(
|
||||
self.root, md5, pid, text.quote(tags),
|
||||
mime.rpartition("/")[2]),
|
||||
"id": pid, "md5": md5, "tags": tags,
|
||||
"width": width, "height": height,
|
||||
"size": text.parse_bytes(size[:-1]),
|
||||
}
|
||||
|
||||
if not extr(">Next<", ">"):
|
||||
return
|
||||
pnum += 1
|
||||
|
||||
|
||||
class MememuseumPostExtractor(MememuseumExtractor):
|
||||
"""Extractor for single images from meme.museum"""
|
||||
subcategory = "post"
|
||||
pattern = r"(?:https?://)?meme\.museum/post/view/(\d+)"
|
||||
test = ("https://meme.museum/post/view/10243", {
|
||||
"pattern": r"https://meme\.museum/_images/105febebcd5ca791ee332adc4997"
|
||||
r"1f78/10243%20-%20g%20beard%20open_source%20richard_stallm"
|
||||
r"an%20stallman%20tagme%20text\.jpg",
|
||||
"keyword": "3c8009251480cf17248c08b2b194dc0c4d59580e",
|
||||
"content": "45565f3f141fc960a8ae1168b80e718a494c52d2",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
MememuseumExtractor.__init__(self, match)
|
||||
self.post_id = match.group(1)
|
||||
|
||||
def posts(self):
|
||||
url = "{}/post/view/{}".format(self.root, self.post_id)
|
||||
extr = text.extract_from(self.request(url).text)
|
||||
|
||||
return ({
|
||||
"id" : self.post_id,
|
||||
"tags" : extr(": ", "<"),
|
||||
"md5" : extr("/_thumbs/", "/"),
|
||||
"file_url": self.root + extr("id='main_image' src='", "'"),
|
||||
"width" : extr("data-width=", " ").strip("'\""),
|
||||
"height" : extr("data-height=", " ").strip("'\""),
|
||||
"size" : 0,
|
||||
},)
|
Loading…
x
Reference in New Issue
Block a user