[reddit] add extractor for reddit-hosted images (closes #68)

This commit is contained in:
Mike Fährmann 2018-01-14 18:55:42 +01:00
parent f10ffc0839
commit cc0c2cca57
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 33 additions and 3 deletions

View File

@ -61,7 +61,7 @@ PowerManga https://powermanga.org/ Chapters, Manga
Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga
Read Comic Online http://readcomiconline.to/ Comic-Issues, Comics
RebeccaBlackTech https://rbt.asia/ Threads
Reddit https://reddit.com/ Submissions, Subreddits Optional (OAuth)
Reddit https://reddit.com/ individual Images, Submissions, Subreddits Optional (OAuth)
Rule 34 https://rule34.xxx/ Posts, Tag-Searches
Safebooru https://safebooru.org/ Posts, Tag-Searches
Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional

View File

@ -27,7 +27,7 @@ class Extractor():
subcategory = ""
categorytransfer = False
directory_fmt = ["{category}"]
filename_fmt = "{filename}"
filename_fmt = "{name}.{extension}"
cookiedomain = ""
def __init__(self):

View File

@ -32,7 +32,8 @@ class RedditExtractor(Extractor):
depth = 0
yield Message.Version, 1
with extractor.blacklist(("reddit",) + util.SPECIAL_EXTRACTORS):
with extractor.blacklist(
util.SPECIAL_EXTRACTORS, [RedditSubredditExtractor]):
while True:
extra = []
for url in self._urls(submissions):
@ -101,6 +102,35 @@ class RedditSubmissionExtractor(RedditExtractor):
return (self.api.submission(self.submission_id),)
class RedditImageExtractor(Extractor):
"""Extractor for reddit-hosted images"""
category = "reddit"
subcategory = "image"
pattern = [r"(?:https?://)?i\.redd(?:\.it|ituploads\.com)"
r"/[^/?&#]+(?:\?[^#]*)?"]
test = [
("https://i.redd.it/upjtjcx2npzz.jpg", {
"url": "0de614900feef103e580b632190458c0b62b641a",
"content": "cc9a68cf286708d5ce23c68e79cd9cf7826db6a3",
}),
(("https://i.reddituploads.com/0f44f1b1fca2461f957c713d9592617d"
"?fit=max&h=1536&w=1536&s=e96ce7846b3c8e1f921d2ce2671fb5e2"), {
"url": "f24f25efcedaddeec802e46c60d77ef975dc52a5",
"content": "d13c3b5f7e39b454fa21b33d7d6b0e0f07126849",
}),
]
def __init__(self, match):
Extractor.__init__(self)
self.url = match.group(0)
def items(self):
data = text.nameext_from_url(self.url)
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, self.url, data
class RedditAPI():
"""Minimal interface for the reddit API"""
CLIENT_ID = "6N9uN0krSDE-ig"