121 lines
4.0 KiB
Python
121 lines
4.0 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2022 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
"""Extractors for https://meme.museum/"""
|
|
|
|
from .common import Extractor, Message
|
|
from .. import text
|
|
|
|
|
|
class MememuseumExtractor(Extractor):
|
|
"""Base class for meme.museum extractors"""
|
|
basecategory = "booru"
|
|
category = "mememuseum"
|
|
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
|
archive_fmt = "{id}"
|
|
root = "https://meme.museum"
|
|
|
|
def items(self):
|
|
data = self.metadata()
|
|
|
|
for post in self.posts():
|
|
url = post["file_url"]
|
|
for key in ("id", "width", "height"):
|
|
post[key] = text.parse_int(post[key])
|
|
post["tags"] = text.unquote(post["tags"])
|
|
post.update(data)
|
|
yield Message.Directory, post
|
|
yield Message.Url, url, text.nameext_from_url(url, post)
|
|
|
|
def metadata(self):
|
|
"""Return general metadata"""
|
|
return ()
|
|
|
|
def posts(self):
|
|
"""Return an iterable containing data of all relevant posts"""
|
|
return ()
|
|
|
|
|
|
class MememuseumTagExtractor(MememuseumExtractor):
|
|
"""Extractor for images from meme.museum by search-tags"""
|
|
subcategory = "tag"
|
|
directory_fmt = ("{category}", "{search_tags}")
|
|
pattern = r"(?:https?://)?meme\.museum/post/list/([^/?#]+)"
|
|
test = ("https://meme.museum/post/list/animated/1", {
|
|
"pattern": r"https://meme\.museum/_images/\w+/\d+%20-%20",
|
|
"count": ">= 30"
|
|
})
|
|
per_page = 25
|
|
|
|
def __init__(self, match):
|
|
MememuseumExtractor.__init__(self, match)
|
|
self.tags = text.unquote(match.group(1))
|
|
|
|
def metadata(self):
|
|
return {"search_tags": self.tags}
|
|
|
|
def posts(self):
|
|
pnum = 1
|
|
while True:
|
|
url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
|
|
extr = text.extract_from(self.request(url).text)
|
|
|
|
while True:
|
|
mime = extr("data-mime='", "'")
|
|
if not mime:
|
|
break
|
|
|
|
pid = extr("data-post-id='", "'")
|
|
tags, dimensions, size = extr("title='", "'").split(" // ")
|
|
md5 = extr("/_thumbs/", "/")
|
|
width, _, height = dimensions.partition("x")
|
|
|
|
yield {
|
|
"file_url": "{}/_images/{}/{}%20-%20{}.{}".format(
|
|
self.root, md5, pid, text.quote(tags),
|
|
mime.rpartition("/")[2]),
|
|
"id": pid, "md5": md5, "tags": tags,
|
|
"width": width, "height": height,
|
|
"size": text.parse_bytes(size[:-1]),
|
|
}
|
|
|
|
if not extr(">Next<", ">"):
|
|
return
|
|
pnum += 1
|
|
|
|
|
|
class MememuseumPostExtractor(MememuseumExtractor):
|
|
"""Extractor for single images from meme.museum"""
|
|
subcategory = "post"
|
|
pattern = r"(?:https?://)?meme\.museum/post/view/(\d+)"
|
|
test = ("https://meme.museum/post/view/10243", {
|
|
"pattern": r"https://meme\.museum/_images/105febebcd5ca791ee332adc4997"
|
|
r"1f78/10243%20-%20g%20beard%20open_source%20richard_stallm"
|
|
r"an%20stallman%20tagme%20text\.jpg",
|
|
"keyword": "3c8009251480cf17248c08b2b194dc0c4d59580e",
|
|
"content": "45565f3f141fc960a8ae1168b80e718a494c52d2",
|
|
})
|
|
|
|
def __init__(self, match):
|
|
MememuseumExtractor.__init__(self, match)
|
|
self.post_id = match.group(1)
|
|
|
|
def posts(self):
|
|
url = "{}/post/view/{}".format(self.root, self.post_id)
|
|
extr = text.extract_from(self.request(url).text)
|
|
|
|
return ({
|
|
"id" : self.post_id,
|
|
"tags" : extr(": ", "<"),
|
|
"md5" : extr("/_thumbs/", "/"),
|
|
"file_url": self.root + extr("id='main_image' src='", "'"),
|
|
"width" : extr("data-width=", " ").strip("'\""),
|
|
"height" : extr("data-height=", " ").strip("'\""),
|
|
"size" : 0,
|
|
},)
|