2015-10-11 16:22:38 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2019-02-08 13:45:40 +01:00
|
|
|
# Copyright 2015-2019 Mike Fährmann
|
2015-10-11 16:22:38 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2017-05-26 22:30:09 +02:00
|
|
|
"""Extract images from https://imgur.com/"""
|
2015-10-11 16:22:38 +02:00
|
|
|
|
|
|
|
from .common import Extractor, Message
|
2016-10-07 00:13:51 +02:00
|
|
|
from .. import text, exception
|
2015-10-11 16:22:38 +02:00
|
|
|
|
2017-02-01 00:53:19 +01:00
|
|
|
|
2019-09-19 15:54:26 +02:00
|
|
|
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com"
|
|
|
|
|
|
|
|
|
2017-05-26 22:30:09 +02:00
|
|
|
class ImgurExtractor(Extractor):
|
|
|
|
"""Base class for imgur extractors"""
|
|
|
|
category = "imgur"
|
2019-07-04 23:45:26 +02:00
|
|
|
root = "https://imgur.com"
|
2017-05-26 22:30:09 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
2019-02-11 13:31:10 +01:00
|
|
|
Extractor.__init__(self, match)
|
2019-10-22 23:51:41 +02:00
|
|
|
self.api = ImgurAPI(self)
|
2019-08-14 21:20:58 +02:00
|
|
|
self.key = match.group(1)
|
2017-05-29 08:48:07 +02:00
|
|
|
self.mp4 = self.config("mp4", True)
|
2017-05-26 22:30:09 +02:00
|
|
|
|
2019-10-22 23:51:41 +02:00
|
|
|
def _prepare(self, image):
|
2019-08-14 21:20:58 +02:00
|
|
|
try:
|
2019-10-22 23:51:41 +02:00
|
|
|
del image["ad_url"]
|
|
|
|
del image["ad_type"]
|
|
|
|
del image["ad_config"]
|
2019-08-14 21:20:58 +02:00
|
|
|
except KeyError:
|
|
|
|
pass
|
2017-05-26 22:30:09 +02:00
|
|
|
|
2019-10-22 23:51:41 +02:00
|
|
|
url = image["mp4"] if image["animated"] and self.mp4 else image["link"]
|
|
|
|
image["date"] = text.parse_timestamp(image["datetime"])
|
|
|
|
text.nameext_from_url(url, image)
|
|
|
|
|
2017-05-26 22:30:09 +02:00
|
|
|
return url
|
|
|
|
|
2019-10-22 23:51:41 +02:00
|
|
|
def _items_queue(self, items):
|
2019-09-19 15:54:26 +02:00
|
|
|
album_ex = ImgurAlbumExtractor
|
|
|
|
image_ex = ImgurImageExtractor
|
|
|
|
|
|
|
|
yield Message.Version, 1
|
2019-10-22 23:51:41 +02:00
|
|
|
for item in items:
|
|
|
|
item["_extractor"] = album_ex if item["is_album"] else image_ex
|
|
|
|
yield Message.Queue, item["link"], item
|
2019-09-19 15:54:26 +02:00
|
|
|
|
2017-05-26 22:30:09 +02:00
|
|
|
|
|
|
|
class ImgurImageExtractor(ImgurExtractor):
|
2019-08-14 21:20:58 +02:00
|
|
|
"""Extractor for individual images on imgur.com"""
|
2017-05-26 22:30:09 +02:00
|
|
|
subcategory = "image"
|
2019-10-22 23:51:41 +02:00
|
|
|
filename_fmt = "{category}_{id}{title:?_//}.{extension}"
|
|
|
|
archive_fmt = "{id}"
|
2019-09-19 15:54:26 +02:00
|
|
|
pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?"
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2017-05-26 22:30:09 +02:00
|
|
|
("https://imgur.com/21yMxCS", {
|
|
|
|
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
|
|
|
|
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
2018-01-20 18:49:29 +01:00
|
|
|
"keyword": {
|
2019-10-22 23:51:41 +02:00
|
|
|
"account_id" : None,
|
|
|
|
"account_url" : None,
|
|
|
|
"animated" : False,
|
|
|
|
"bandwidth" : int,
|
|
|
|
"date" : "type:datetime",
|
|
|
|
"datetime" : 1478787875,
|
|
|
|
"description" : None,
|
|
|
|
"edited" : "0",
|
|
|
|
"extension" : "png",
|
|
|
|
"favorite" : False,
|
|
|
|
"filename" : "21yMxCS",
|
|
|
|
"has_sound" : False,
|
|
|
|
"height" : 32,
|
|
|
|
"id" : "21yMxCS",
|
|
|
|
"in_gallery" : False,
|
|
|
|
"in_most_viral": False,
|
|
|
|
"is_ad" : False,
|
|
|
|
"link" : "https://i.imgur.com/21yMxCS.png",
|
|
|
|
"nsfw" : False,
|
|
|
|
"section" : None,
|
|
|
|
"size" : 182,
|
|
|
|
"tags" : [],
|
|
|
|
"title" : "Test",
|
|
|
|
"type" : "image/png",
|
|
|
|
"views" : int,
|
|
|
|
"vote" : None,
|
|
|
|
"width" : 64,
|
2018-01-20 18:49:29 +01:00
|
|
|
},
|
2017-05-26 22:30:09 +02:00
|
|
|
}),
|
2017-05-27 11:49:29 +02:00
|
|
|
("http://imgur.com/0gybAXR", { # gifv/mp4 video
|
|
|
|
"url": "a2220eb265a55b0c95e0d3d721ec7665460e3fd7",
|
|
|
|
"content": "a3c080e43f58f55243ab830569ba02309d59abfc",
|
2017-05-26 22:30:09 +02:00
|
|
|
}),
|
2019-11-07 18:31:20 +01:00
|
|
|
("https://imgur.com/XFfsmuC", { # missing title in API response (#467)
|
|
|
|
"keyword": {"title": "Tears are a natural response to irritants"},
|
|
|
|
}),
|
2018-02-11 16:28:19 +01:00
|
|
|
("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1'
|
2019-10-22 23:51:41 +02:00
|
|
|
"url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e",
|
2018-02-11 16:28:19 +01:00
|
|
|
}),
|
2018-04-08 17:50:57 +02:00
|
|
|
("https://imgur.com/zzzzzzz", { # not found
|
2019-10-22 23:51:41 +02:00
|
|
|
"exception": exception.HttpError,
|
2017-05-26 22:30:09 +02:00
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://www.imgur.com/21yMxCS"), # www
|
|
|
|
("https://m.imgur.com/21yMxCS"), # mobile
|
|
|
|
("https://imgur.com/zxaY6"), # 5 character key
|
|
|
|
("https://i.imgur.com/21yMxCS.png"), # direct link
|
|
|
|
("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail
|
|
|
|
("https://i.imgur.com/zxaY6.gif"), # direct link (short)
|
|
|
|
("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
|
|
|
|
)
|
2017-05-26 22:30:09 +02:00
|
|
|
|
|
|
|
def items(self):
|
2019-10-22 23:51:41 +02:00
|
|
|
image = self.api.image(self.key)
|
2019-11-07 18:31:20 +01:00
|
|
|
if not image["title"]:
|
|
|
|
page = self.request(self.root + "/" + self.key, fatal=False).text
|
2019-11-28 22:13:24 +01:00
|
|
|
title = text.extract(page, "<title>", "<")[0] or ""
|
|
|
|
image["title"] = text.unescape(title.rpartition(" - ")[0].strip())
|
2017-05-26 22:30:09 +02:00
|
|
|
url = self._prepare(image)
|
|
|
|
yield Message.Version, 1
|
|
|
|
yield Message.Directory, image
|
|
|
|
yield Message.Url, url, image
|
|
|
|
|
|
|
|
|
|
|
|
class ImgurAlbumExtractor(ImgurExtractor):
|
2019-08-14 21:20:58 +02:00
|
|
|
"""Extractor for imgur albums"""
|
2016-08-01 16:02:25 +02:00
|
|
|
subcategory = "album"
|
2019-10-22 23:51:41 +02:00
|
|
|
directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}")
|
|
|
|
filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}"
|
|
|
|
archive_fmt = "{album[id]}_{id}"
|
2019-09-19 15:54:26 +02:00
|
|
|
pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})"
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2016-12-31 00:51:06 +01:00
|
|
|
("https://imgur.com/a/TcBmP", {
|
|
|
|
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
|
2018-01-20 18:49:29 +01:00
|
|
|
"keyword": {
|
|
|
|
"album": {
|
2019-10-22 23:51:41 +02:00
|
|
|
"account_id" : None,
|
|
|
|
"account_url" : None,
|
|
|
|
"cover" : "693j2Kr",
|
|
|
|
"cover_edited": None,
|
|
|
|
"cover_height": 1400,
|
|
|
|
"cover_width" : 951,
|
|
|
|
"date" : "type:datetime",
|
|
|
|
"datetime" : 1444387070,
|
|
|
|
"description" : None,
|
|
|
|
"favorite" : False,
|
|
|
|
"id" : "TcBmP",
|
|
|
|
"images_count": 19,
|
|
|
|
"in_gallery" : False,
|
|
|
|
"is_ad" : False,
|
|
|
|
"is_album" : True,
|
|
|
|
"layout" : "blog",
|
|
|
|
"link" : "https://imgur.com/a/TcBmP",
|
|
|
|
"nsfw" : False,
|
|
|
|
"privacy" : "hidden",
|
|
|
|
"section" : None,
|
|
|
|
"title" : "138",
|
|
|
|
"views" : int,
|
2018-01-20 18:49:29 +01:00
|
|
|
},
|
2019-10-22 23:51:41 +02:00
|
|
|
"account_id" : None,
|
|
|
|
"account_url": None,
|
|
|
|
"animated" : bool,
|
|
|
|
"bandwidth" : int,
|
|
|
|
"date" : "type:datetime",
|
|
|
|
"datetime" : int,
|
|
|
|
"description": None,
|
|
|
|
"edited" : "0",
|
|
|
|
"favorite" : False,
|
|
|
|
"has_sound" : False,
|
|
|
|
"height" : int,
|
|
|
|
"id" : str,
|
|
|
|
"in_gallery" : False,
|
|
|
|
"is_ad" : False,
|
|
|
|
"link" : r"re:https://i\.imgur\.com/\w+\.jpg",
|
|
|
|
"nsfw" : None,
|
|
|
|
"num" : int,
|
|
|
|
"section" : None,
|
|
|
|
"size" : int,
|
|
|
|
"tags" : list,
|
|
|
|
"title" : None,
|
|
|
|
"type" : "image/jpeg",
|
|
|
|
"views" : int,
|
|
|
|
"vote" : None,
|
|
|
|
"width" : int,
|
2018-01-20 18:49:29 +01:00
|
|
|
},
|
2016-12-31 00:51:06 +01:00
|
|
|
}),
|
2019-08-14 21:20:58 +02:00
|
|
|
("https://imgur.com/a/eD9CT", { # large album
|
2019-10-22 23:51:41 +02:00
|
|
|
"url": "de748c181a04d18bef1de9d4f4866ef0a06d632b",
|
2017-07-18 12:42:19 +02:00
|
|
|
}),
|
2018-05-13 11:19:10 +02:00
|
|
|
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
|
|
|
|
"url": "695ef0c950023362a0163ee5041796300db76674",
|
|
|
|
}),
|
2018-07-16 18:14:41 +02:00
|
|
|
("https://imgur.com/t/unmuted/YMqBcua", { # unmuted URL
|
|
|
|
"url": "86b4747f8147cec7602f0214e267309af73a8655",
|
2018-05-30 16:19:01 +02:00
|
|
|
}),
|
2016-12-31 00:51:06 +01:00
|
|
|
("https://imgur.com/a/TcBmQ", {
|
2019-10-22 23:51:41 +02:00
|
|
|
"exception": exception.HttpError,
|
2016-12-31 00:51:06 +01:00
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://www.imgur.com/a/TcBmP"), # www
|
|
|
|
("https://m.imgur.com/a/TcBmP"), # mobile
|
|
|
|
)
|
2015-11-21 04:26:30 +01:00
|
|
|
|
2015-10-11 16:22:38 +02:00
|
|
|
def items(self):
|
2019-10-22 23:51:41 +02:00
|
|
|
album = self.api.album(self.key)
|
|
|
|
album["date"] = text.parse_timestamp(album["datetime"])
|
|
|
|
images = album["images"]
|
2017-05-26 22:30:09 +02:00
|
|
|
|
2019-10-22 23:51:41 +02:00
|
|
|
try:
|
|
|
|
del album["images"]
|
|
|
|
del album["ad_config"]
|
|
|
|
except KeyError:
|
|
|
|
pass
|
2017-07-18 12:42:19 +02:00
|
|
|
|
2015-10-11 16:22:38 +02:00
|
|
|
yield Message.Version, 1
|
2017-05-26 22:30:09 +02:00
|
|
|
yield Message.Directory, {"album": album, "count": len(images)}
|
|
|
|
for num, image in enumerate(images, 1):
|
|
|
|
url = self._prepare(image)
|
2016-10-07 00:13:51 +02:00
|
|
|
image["num"] = num
|
2017-05-26 22:30:09 +02:00
|
|
|
image["album"] = album
|
2016-10-07 00:13:51 +02:00
|
|
|
yield Message.Url, url, image
|
2019-08-14 21:20:58 +02:00
|
|
|
|
|
|
|
|
|
|
|
class ImgurGalleryExtractor(ImgurExtractor):
|
|
|
|
"""Extractor for imgur galleries"""
|
|
|
|
subcategory = "gallery"
|
2019-09-19 15:54:26 +02:00
|
|
|
pattern = BASE_PATTERN + r"/gallery/(\w{7}|\w{5})"
|
2019-08-14 21:20:58 +02:00
|
|
|
test = (
|
|
|
|
("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380)
|
|
|
|
"pattern": "https://imgur.com/zf2fIms",
|
|
|
|
}),
|
|
|
|
("https://imgur.com/gallery/eD9CT", {
|
|
|
|
"pattern": "https://imgur.com/a/eD9CT",
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
url = self.root + "/a/" + self.key
|
2019-08-20 20:00:43 +02:00
|
|
|
with self.request(url, method="HEAD", fatal=False) as response:
|
2019-10-22 23:51:41 +02:00
|
|
|
if response.status_code < 400:
|
|
|
|
extr = ImgurAlbumExtractor
|
|
|
|
else:
|
|
|
|
extr = ImgurImageExtractor
|
|
|
|
url = self.root + "/" + self.key
|
2019-08-14 21:20:58 +02:00
|
|
|
|
|
|
|
yield Message.Version, 1
|
2019-08-20 20:00:43 +02:00
|
|
|
yield Message.Queue, url, {"_extractor": extr}
|
2019-09-17 22:58:18 +02:00
|
|
|
|
|
|
|
|
|
|
|
class ImgurUserExtractor(ImgurExtractor):
|
|
|
|
"""Extractor for all images posted by a user"""
|
|
|
|
subcategory = "user"
|
2019-09-19 15:54:26 +02:00
|
|
|
pattern = BASE_PATTERN + r"/user/([^/?&#]+)(?:/posts|/submitted)?/?$"
|
2019-09-17 22:58:18 +02:00
|
|
|
test = (
|
|
|
|
("https://imgur.com/user/Miguenzo", {
|
2019-09-19 15:54:26 +02:00
|
|
|
"range": "1-100",
|
|
|
|
"count": 100,
|
2019-09-24 23:01:12 +02:00
|
|
|
"pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+",
|
2019-09-17 22:58:18 +02:00
|
|
|
}),
|
|
|
|
("https://imgur.com/user/Miguenzo/posts"),
|
2019-09-19 15:54:26 +02:00
|
|
|
("https://imgur.com/user/Miguenzo/submitted"),
|
2019-09-17 22:58:18 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
def items(self):
|
2019-10-22 23:51:41 +02:00
|
|
|
return self._items_queue(self.api.account_submissions(self.key))
|
2019-09-17 22:58:18 +02:00
|
|
|
|
|
|
|
|
2019-09-19 15:54:26 +02:00
|
|
|
class ImgurFavoriteExtractor(ImgurExtractor):
|
|
|
|
"""Extractor for a user's favorites"""
|
|
|
|
subcategory = "favorite"
|
|
|
|
pattern = BASE_PATTERN + r"/user/([^/?&#]+)/favorites"
|
|
|
|
test = ("https://imgur.com/user/Miguenzo/favorites", {
|
|
|
|
"range": "1-100",
|
|
|
|
"count": 100,
|
2019-09-24 23:01:12 +02:00
|
|
|
"pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+",
|
2019-09-19 15:54:26 +02:00
|
|
|
})
|
2019-09-17 22:58:18 +02:00
|
|
|
|
2019-09-19 15:54:26 +02:00
|
|
|
def items(self):
|
2019-10-22 23:51:41 +02:00
|
|
|
return self._items_queue(self.api.account_favorites(self.key))
|
|
|
|
|
|
|
|
|
2019-12-02 22:34:34 +01:00
|
|
|
class ImgurSubredditExtractor(ImgurExtractor):
|
|
|
|
"""Extractor for a subreddits's imgur links"""
|
|
|
|
subcategory = "subreddit"
|
|
|
|
pattern = BASE_PATTERN + r"/r/([^/?&#]+)"
|
|
|
|
test = ("https://imgur.com/r/pics", {
|
|
|
|
"range": "1-100",
|
|
|
|
"count": 100,
|
|
|
|
"pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+",
|
|
|
|
})
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
return self._items_queue(self.api.gallery_subreddit(self.key))
|
|
|
|
|
|
|
|
|
2019-10-22 23:51:41 +02:00
|
|
|
class ImgurAPI():
|
|
|
|
|
|
|
|
def __init__(self, extractor):
|
|
|
|
self.extractor = extractor
|
|
|
|
self.headers = {
|
|
|
|
"Authorization": "Client-ID " + extractor.config(
|
|
|
|
"client-id", "546c25a59c58ad7"),
|
|
|
|
}
|
|
|
|
|
|
|
|
def account_favorites(self, account):
|
|
|
|
endpoint = "account/{}/gallery_favorites".format(account)
|
|
|
|
return self._pagination(endpoint)
|
|
|
|
|
|
|
|
def account_submissions(self, account):
|
|
|
|
endpoint = "account/{}/submissions".format(account)
|
|
|
|
return self._pagination(endpoint)
|
|
|
|
|
2019-12-02 22:34:34 +01:00
|
|
|
def gallery_subreddit(self, subreddit):
|
|
|
|
endpoint = "gallery/r/{}".format(subreddit)
|
|
|
|
return self._pagination(endpoint)
|
|
|
|
|
2019-10-22 23:51:41 +02:00
|
|
|
def album(self, album_hash):
|
|
|
|
return self._call("album/" + album_hash)
|
|
|
|
|
|
|
|
def image(self, image_hash):
|
|
|
|
return self._call("image/" + image_hash)
|
|
|
|
|
|
|
|
def _call(self, endpoint):
|
|
|
|
return self.extractor.request(
|
|
|
|
"https://api.imgur.com/3/" + endpoint, headers=self.headers,
|
|
|
|
).json()["data"]
|
|
|
|
|
|
|
|
def _pagination(self, endpoint):
|
|
|
|
num = 0
|
|
|
|
|
|
|
|
while True:
|
|
|
|
data = self._call("{}/{}".format(endpoint, num))
|
|
|
|
if not data:
|
|
|
|
return
|
|
|
|
yield from data
|
|
|
|
num += 1
|