[tumblr] support '/blog/view' URLs (#2760)

This commit is contained in:
Mike Fährmann 2022-07-15 15:22:54 +02:00
parent 46f11a3118
commit a566e63cdf
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from https://www.tumblr.com/""" """Extractors for https://www.tumblr.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, oauth, exception from .. import text, oauth, exception
@ -35,7 +35,10 @@ POST_TYPES = frozenset((
BASE_PATTERN = ( BASE_PATTERN = (
r"(?:tumblr:(?:https?://)?([^/]+)|" r"(?:tumblr:(?:https?://)?([^/]+)|"
r"(?:https?://)?([\w-]+\.tumblr\.com))") r"(?:https?://)?"
r"(?:www\.tumblr\.com/blog/(?:view/)?([\w-]+)|"
r"([\w-]+\.tumblr\.com)))"
)
class TumblrExtractor(Extractor): class TumblrExtractor(Extractor):
@ -48,9 +51,14 @@ class TumblrExtractor(Extractor):
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.blog = match.group(1) or match.group(2)
self.api = TumblrAPI(self)
name = match.group(2)
if name:
self.blog = name + ".tumblr.com"
else:
self.blog = match.group(1) or match.group(3)
self.api = TumblrAPI(self)
self.types = self._setup_posttypes() self.types = self._setup_posttypes()
self.avatar = self.config("avatar", False) self.avatar = self.config("avatar", False)
self.inline = self.config("inline", True) self.inline = self.config("inline", True)
@ -232,6 +240,8 @@ class TumblrUserExtractor(TumblrExtractor):
("https://demo.tumblr.com/archive"), ("https://demo.tumblr.com/archive"),
("tumblr:http://www.b-authentique.com/"), ("tumblr:http://www.b-authentique.com/"),
("tumblr:www.b-authentique.com"), ("tumblr:www.b-authentique.com"),
("https://www.tumblr.com/blog/view/smarties-art"),
("https://www.tumblr.com/blog/smarties-art"),
) )
def posts(self): def posts(self):
@ -241,7 +251,7 @@ class TumblrUserExtractor(TumblrExtractor):
class TumblrPostExtractor(TumblrExtractor): class TumblrPostExtractor(TumblrExtractor):
"""Extractor for images from a single post on tumblr""" """Extractor for images from a single post on tumblr"""
subcategory = "post" subcategory = "post"
pattern = BASE_PATTERN + r"/(?:post|image)/(\d+)" pattern = BASE_PATTERN + r"/(?:post/|image/)?(\d+)"
test = ( test = (
("http://demo.tumblr.com/post/459265350", { ("http://demo.tumblr.com/post/459265350", {
"pattern": (r"https://\d+\.media\.tumblr\.com" "pattern": (r"https://\d+\.media\.tumblr\.com"
@ -273,11 +283,12 @@ class TumblrPostExtractor(TumblrExtractor):
"exception": exception.NotFoundError, # HTML response (#297) "exception": exception.NotFoundError, # HTML response (#297)
}), }),
("http://demo.tumblr.com/image/459265350"), ("http://demo.tumblr.com/image/459265350"),
("https://www.tumblr.com/blog/view/smarties-art/686047436641353728"),
) )
def __init__(self, match): def __init__(self, match):
TumblrExtractor.__init__(self, match) TumblrExtractor.__init__(self, match)
self.post_id = match.group(3) self.post_id = match.group(4)
self.reblogs = True self.reblogs = True
self.date_min = 0 self.date_min = 0
@ -293,14 +304,18 @@ class TumblrTagExtractor(TumblrExtractor):
"""Extractor for images from a tumblr-user by tag""" """Extractor for images from a tumblr-user by tag"""
subcategory = "tag" subcategory = "tag"
pattern = BASE_PATTERN + r"/tagged/([^/?#]+)" pattern = BASE_PATTERN + r"/tagged/([^/?#]+)"
test = ("http://demo.tumblr.com/tagged/Times%20Square", { test = (
"pattern": (r"https://\d+\.media\.tumblr\.com/tumblr_[^/_]+_1280.jpg"), ("http://demo.tumblr.com/tagged/Times%20Square", {
"count": 1, "pattern": r"https://\d+\.media\.tumblr\.com"
}) r"/tumblr_[^/_]+_1280.jpg",
"count": 1,
}),
("https://www.tumblr.com/blog/view/smarties-art/tagged/undertale"),
)
def __init__(self, match): def __init__(self, match):
TumblrExtractor.__init__(self, match) TumblrExtractor.__init__(self, match)
self.tag = text.unquote(match.group(3).replace("-", " ")) self.tag = text.unquote(match.group(4).replace("-", " "))
def posts(self): def posts(self):
return self.api.posts(self.blog, {"tag": self.tag}) return self.api.posts(self.blog, {"tag": self.tag})
@ -312,9 +327,12 @@ class TumblrLikesExtractor(TumblrExtractor):
directory_fmt = ("{category}", "{blog_name}", "likes") directory_fmt = ("{category}", "{blog_name}", "likes")
archive_fmt = "f_{blog[name]}_{id}_{num}" archive_fmt = "f_{blog[name]}_{id}_{num}"
pattern = BASE_PATTERN + r"/likes" pattern = BASE_PATTERN + r"/likes"
test = ("http://mikf123.tumblr.com/likes", { test = (
"count": 1, ("http://mikf123.tumblr.com/likes", {
}) "count": 1,
}),
("https://www.tumblr.com/blog/view/mikf123/likes"),
)
def posts(self): def posts(self):
return self.api.likes(self.blog) return self.api.likes(self.blog)