2015-10-04 04:13:50 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2020-01-01 16:07:23 +01:00
|
|
|
# Copyright 2015-2020 Mike Fährmann
|
2015-10-04 04:13:50 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2017-04-20 13:20:41 +02:00
|
|
|
"""Extract images from https://www.deviantart.com/"""
|
2015-10-04 04:13:50 +02:00
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
from .common import Extractor, Message
|
2019-08-29 10:09:21 +02:00
|
|
|
from .. import text, util, exception
|
2017-07-12 09:47:01 +02:00
|
|
|
from ..cache import cache, memcache
|
2019-05-29 23:50:05 +02:00
|
|
|
import collections
|
2017-05-13 21:42:29 +02:00
|
|
|
import itertools
|
2018-10-10 21:19:45 +02:00
|
|
|
import mimetypes
|
2018-07-14 11:52:21 +02:00
|
|
|
import math
|
2019-05-29 23:50:05 +02:00
|
|
|
import time
|
2017-04-03 18:23:13 +02:00
|
|
|
import re
|
2015-10-04 04:13:50 +02:00
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2018-06-28 20:14:18 +02:00
|
|
|
BASE_PATTERN = (
|
|
|
|
r"(?:https?://)?(?:"
|
|
|
|
r"(?:www\.)?deviantart\.com/([\w-]+)|"
|
2019-02-15 16:38:29 +01:00
|
|
|
r"(?!www\.)([\w-]+)\.deviantart\.com)"
|
2018-06-28 20:14:18 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
class DeviantartExtractor(Extractor):
|
2019-08-21 23:47:17 +02:00
|
|
|
"""Base class for deviantart extractors using the OAuth API"""
|
2015-11-21 04:26:30 +01:00
|
|
|
category = "deviantart"
|
2019-11-18 22:09:58 +01:00
|
|
|
directory_fmt = ("{category}", "{username}")
|
2018-01-30 22:49:16 +01:00
|
|
|
filename_fmt = "{category}_{index}_{title}.{extension}"
|
2020-01-01 16:07:23 +01:00
|
|
|
cookiedomain = None
|
2018-06-28 20:14:18 +02:00
|
|
|
root = "https://www.deviantart.com"
|
2015-11-21 04:26:30 +01:00
|
|
|
|
2017-07-10 18:14:40 +02:00
|
|
|
def __init__(self, match=None):
|
2019-02-11 13:31:10 +01:00
|
|
|
Extractor.__init__(self, match)
|
2017-03-13 21:42:16 +01:00
|
|
|
self.offset = 0
|
2017-10-07 13:07:34 +02:00
|
|
|
self.flat = self.config("flat", True)
|
2019-06-10 21:05:25 +02:00
|
|
|
self.extra = self.config("extra", False)
|
2019-08-02 23:29:38 +02:00
|
|
|
self.quality = self.config("quality", "100")
|
2017-10-07 13:07:34 +02:00
|
|
|
self.original = self.config("original", True)
|
2019-02-11 13:31:10 +01:00
|
|
|
self.user = match.group(1) or match.group(2)
|
2017-12-29 22:15:57 +01:00
|
|
|
self.group = False
|
2019-06-17 19:49:50 +02:00
|
|
|
self.api = DeviantartAPI(self)
|
2017-08-22 20:15:13 +02:00
|
|
|
|
2019-08-02 23:29:38 +02:00
|
|
|
if self.quality:
|
|
|
|
self.quality = "q_{}".format(self.quality)
|
|
|
|
|
2019-11-26 23:29:46 +01:00
|
|
|
if self.original != "image":
|
|
|
|
self._update_content = self._update_content_default
|
|
|
|
else:
|
|
|
|
self._update_content = self._update_content_image
|
|
|
|
self.original = True
|
|
|
|
|
2018-07-16 18:14:41 +02:00
|
|
|
self.commit_journal = {
|
|
|
|
"html": self._commit_journal_html,
|
|
|
|
"text": self._commit_journal_text,
|
|
|
|
}.get(self.config("journals", "html"))
|
|
|
|
|
2017-03-13 21:42:16 +01:00
|
|
|
def skip(self, num):
|
|
|
|
self.offset += num
|
|
|
|
return num
|
2015-10-04 04:13:50 +02:00
|
|
|
|
|
|
|
def items(self):
|
2017-12-29 22:15:57 +01:00
|
|
|
if self.user:
|
2019-10-24 21:42:10 +02:00
|
|
|
profile = self.api.user_profile(self.user)
|
|
|
|
self.group = not profile
|
2017-12-29 22:15:57 +01:00
|
|
|
if self.group:
|
|
|
|
self.subcategory = "group-" + self.subcategory
|
2019-11-18 22:09:58 +01:00
|
|
|
self.user = self.user.lower()
|
2019-10-24 21:42:10 +02:00
|
|
|
else:
|
|
|
|
self.user = profile["user"]["username"]
|
2017-12-29 22:15:57 +01:00
|
|
|
|
2015-10-04 04:13:50 +02:00
|
|
|
yield Message.Version, 1
|
2017-04-03 14:56:47 +02:00
|
|
|
for deviation in self.deviations():
|
2017-09-12 16:19:00 +02:00
|
|
|
if isinstance(deviation, tuple):
|
|
|
|
url, data = deviation
|
|
|
|
yield Message.Queue, url, data
|
2017-07-12 09:47:01 +02:00
|
|
|
continue
|
|
|
|
|
2017-04-03 18:23:13 +02:00
|
|
|
self.prepare(deviation)
|
2017-07-10 18:14:40 +02:00
|
|
|
yield Message.Directory, deviation
|
2017-05-10 16:45:45 +02:00
|
|
|
|
|
|
|
if "content" in deviation:
|
2017-10-07 13:07:34 +02:00
|
|
|
content = deviation["content"]
|
2019-05-05 20:57:04 +02:00
|
|
|
|
|
|
|
if self.original and deviation["is_downloadable"] and \
|
|
|
|
text.ext_from_url(content["src"]) != "gif":
|
2018-10-10 21:19:45 +02:00
|
|
|
self._update_content(deviation, content)
|
2019-05-05 20:57:04 +02:00
|
|
|
|
2019-08-02 23:29:38 +02:00
|
|
|
if content["src"].startswith("https://images-wixmp-"):
|
|
|
|
if deviation["index"] <= 790677560:
|
|
|
|
# https://github.com/r888888888/danbooru/issues/4069
|
|
|
|
content["src"] = re.sub(
|
|
|
|
r"(/f/[^/]+/[^/]+)/v\d+/.*",
|
|
|
|
r"/intermediary\1", content["src"])
|
|
|
|
if self.quality:
|
|
|
|
content["src"] = re.sub(
|
|
|
|
r"q_\d+", self.quality, content["src"])
|
2019-05-05 20:57:04 +02:00
|
|
|
|
2017-10-07 13:07:34 +02:00
|
|
|
yield self.commit(deviation, content)
|
2017-05-10 16:45:45 +02:00
|
|
|
|
2019-06-13 21:14:12 +02:00
|
|
|
elif deviation["is_downloadable"]:
|
2019-11-26 23:29:46 +01:00
|
|
|
content = self.api.deviation_download(deviation["deviationid"])
|
2019-06-13 21:14:12 +02:00
|
|
|
yield self.commit(deviation, content)
|
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
if "videos" in deviation:
|
|
|
|
video = max(deviation["videos"],
|
2018-04-20 14:53:21 +02:00
|
|
|
key=lambda x: text.parse_int(x["quality"][:-1]))
|
2017-05-10 16:45:45 +02:00
|
|
|
yield self.commit(deviation, video)
|
|
|
|
|
|
|
|
if "flash" in deviation:
|
|
|
|
yield self.commit(deviation, deviation["flash"])
|
|
|
|
|
2018-07-16 18:14:41 +02:00
|
|
|
if "excerpt" in deviation and self.commit_journal:
|
2017-05-13 21:42:29 +02:00
|
|
|
journal = self.api.deviation_content(deviation["deviationid"])
|
|
|
|
yield self.commit_journal(deviation, journal)
|
2017-04-03 14:56:47 +02:00
|
|
|
|
2019-06-10 21:05:25 +02:00
|
|
|
if self.extra:
|
2019-06-08 23:53:16 +02:00
|
|
|
for match in DeviantartStashExtractor.pattern.finditer(
|
|
|
|
deviation.get("description", "")):
|
|
|
|
deviation["_extractor"] = DeviantartStashExtractor
|
|
|
|
yield Message.Queue, match.group(0), deviation
|
2019-06-06 18:53:50 +02:00
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
def deviations(self):
|
|
|
|
"""Return an iterable containing all relevant Deviation-objects"""
|
|
|
|
|
2017-07-10 18:14:40 +02:00
|
|
|
def prepare(self, deviation):
|
2017-04-03 18:23:13 +02:00
|
|
|
"""Adjust the contents of a Deviation-object"""
|
2017-05-10 16:45:45 +02:00
|
|
|
try:
|
2019-04-11 10:37:01 +02:00
|
|
|
deviation["index"] = text.parse_int(
|
|
|
|
deviation["url"].rpartition("-")[2])
|
2017-05-10 16:45:45 +02:00
|
|
|
except KeyError:
|
|
|
|
deviation["index"] = 0
|
2019-11-26 23:29:46 +01:00
|
|
|
|
2017-07-10 18:14:40 +02:00
|
|
|
if self.user:
|
|
|
|
deviation["username"] = self.user
|
2019-11-21 17:00:08 +01:00
|
|
|
deviation["_username"] = self.user.lower()
|
2019-11-26 23:29:46 +01:00
|
|
|
else:
|
|
|
|
deviation["username"] = deviation["author"]["username"]
|
|
|
|
deviation["_username"] = deviation["username"].lower()
|
|
|
|
|
2017-09-10 22:20:47 +02:00
|
|
|
deviation["da_category"] = deviation["category"]
|
2018-09-13 19:52:01 +02:00
|
|
|
deviation["published_time"] = text.parse_int(
|
|
|
|
deviation["published_time"])
|
2019-04-29 17:29:05 +02:00
|
|
|
deviation["date"] = text.parse_timestamp(
|
|
|
|
deviation["published_time"])
|
2017-07-10 18:14:40 +02:00
|
|
|
|
2019-08-29 10:09:21 +02:00
|
|
|
# filename metadata
|
|
|
|
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
|
|
|
|
sub = re.compile(r"\W").sub
|
|
|
|
deviation["filename"] = "".join((
|
|
|
|
sub("_", deviation["title"].lower()), "_by_",
|
|
|
|
sub("_", deviation["author"]["username"].lower()), "-d",
|
|
|
|
util.bencode(deviation["index"], alphabet),
|
|
|
|
))
|
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
@staticmethod
|
|
|
|
def commit(deviation, target):
|
|
|
|
url = target["src"]
|
2019-08-29 10:09:21 +02:00
|
|
|
target = target.copy()
|
|
|
|
target["filename"] = deviation["filename"]
|
2019-08-23 12:20:25 +02:00
|
|
|
deviation["target"] = target
|
|
|
|
deviation["extension"] = target["extension"] = text.ext_from_url(url)
|
2017-05-10 16:45:45 +02:00
|
|
|
return Message.Url, url, deviation
|
2017-04-03 14:56:47 +02:00
|
|
|
|
2018-07-16 18:14:41 +02:00
|
|
|
def _commit_journal_html(self, deviation, journal):
|
2017-05-13 21:42:29 +02:00
|
|
|
title = text.escape(deviation["title"])
|
|
|
|
url = deviation["url"]
|
2019-08-30 18:47:06 +02:00
|
|
|
thumbs = deviation.get("thumbs") or deviation.get("files")
|
2017-05-15 15:58:06 +02:00
|
|
|
html = journal["html"]
|
2017-05-13 21:42:29 +02:00
|
|
|
shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else ""
|
2017-05-13 15:34:20 +02:00
|
|
|
|
|
|
|
if "css" in journal:
|
|
|
|
css, cls = journal["css"], "withskin"
|
2019-08-30 18:47:06 +02:00
|
|
|
elif html.startswith("<style"):
|
|
|
|
css, _, html = html.partition("</style>")
|
|
|
|
css = css.partition(">")[2]
|
|
|
|
cls = "withskin"
|
2017-05-13 15:34:20 +02:00
|
|
|
else:
|
|
|
|
css, cls = "", "journal-green"
|
|
|
|
|
2017-05-15 15:58:06 +02:00
|
|
|
if html.find('<div class="boxtop journaltop">', 0, 250) != -1:
|
|
|
|
needle = '<div class="boxtop journaltop">'
|
|
|
|
header = HEADER_CUSTOM_TEMPLATE.format(
|
2019-04-29 17:29:05 +02:00
|
|
|
title=title, url=url, date=deviation["date"],
|
2017-05-13 21:42:29 +02:00
|
|
|
)
|
2017-05-15 15:58:06 +02:00
|
|
|
else:
|
|
|
|
needle = '<div usr class="gr">'
|
|
|
|
catlist = deviation["category_path"].split("/")
|
|
|
|
categories = " / ".join(
|
2018-06-28 20:14:18 +02:00
|
|
|
('<span class="crumb"><a href="{}/{}/"><span>{}</span></a>'
|
|
|
|
'</span>').format(self.root, cpath, cat.capitalize())
|
2017-05-15 15:58:06 +02:00
|
|
|
for cat, cpath in zip(
|
|
|
|
catlist,
|
|
|
|
itertools.accumulate(catlist, lambda t, c: t + "/" + c)
|
|
|
|
)
|
|
|
|
)
|
2018-08-29 15:59:58 +02:00
|
|
|
username = deviation["author"]["username"]
|
|
|
|
urlname = deviation.get("username") or username.lower()
|
2017-05-15 15:58:06 +02:00
|
|
|
header = HEADER_TEMPLATE.format(
|
|
|
|
title=title,
|
|
|
|
url=url,
|
2018-08-29 15:59:58 +02:00
|
|
|
userurl="{}/{}/".format(self.root, urlname),
|
|
|
|
username=username,
|
2019-04-29 17:29:05 +02:00
|
|
|
date=deviation["date"],
|
2017-05-15 15:58:06 +02:00
|
|
|
categories=categories,
|
|
|
|
)
|
|
|
|
|
2019-08-30 18:47:06 +02:00
|
|
|
if needle in html:
|
|
|
|
html = html.replace(needle, header, 1)
|
|
|
|
else:
|
|
|
|
html = JOURNAL_TEMPLATE_HTML_EXTRA.format(header, html)
|
|
|
|
|
2018-07-16 18:14:41 +02:00
|
|
|
html = JOURNAL_TEMPLATE_HTML.format(
|
2019-08-30 18:47:06 +02:00
|
|
|
title=title, html=html, shadow=shadow, css=css, cls=cls)
|
2017-05-13 15:34:20 +02:00
|
|
|
|
|
|
|
deviation["extension"] = "htm"
|
|
|
|
return Message.Url, html, deviation
|
|
|
|
|
2018-07-16 18:14:41 +02:00
|
|
|
@staticmethod
|
|
|
|
def _commit_journal_text(deviation, journal):
|
2019-08-30 18:47:06 +02:00
|
|
|
html = journal["html"]
|
|
|
|
if html.startswith("<style"):
|
|
|
|
html = html.partition("</style>")[2]
|
2018-07-16 18:14:41 +02:00
|
|
|
content = "\n".join(
|
|
|
|
text.unescape(text.remove_html(txt))
|
2019-08-30 18:47:06 +02:00
|
|
|
for txt in html.rpartition("<script")[0].split("<br />")
|
2018-07-16 18:14:41 +02:00
|
|
|
)
|
|
|
|
txt = JOURNAL_TEMPLATE_TEXT.format(
|
|
|
|
title=deviation["title"],
|
|
|
|
username=deviation["author"]["username"],
|
2019-04-29 17:29:05 +02:00
|
|
|
date=deviation["date"],
|
2018-07-16 18:14:41 +02:00
|
|
|
content=content,
|
|
|
|
)
|
|
|
|
|
|
|
|
deviation["extension"] = "txt"
|
|
|
|
return Message.Url, txt, deviation
|
|
|
|
|
2017-07-03 21:57:10 +02:00
|
|
|
@staticmethod
|
2017-07-10 18:14:40 +02:00
|
|
|
def _find_folder(folders, name):
|
2019-07-19 18:00:41 +02:00
|
|
|
pattern = re.compile(r"(?i)\W*" + name.replace("-", r"\W+") + r"\W*$")
|
2017-07-03 21:57:10 +02:00
|
|
|
for folder in folders:
|
2019-06-17 19:49:50 +02:00
|
|
|
if pattern.match(folder["name"]):
|
2017-07-03 21:57:10 +02:00
|
|
|
return folder
|
|
|
|
raise exception.NotFoundError("folder")
|
|
|
|
|
2017-07-12 17:05:31 +02:00
|
|
|
def _folder_urls(self, folders, category):
|
2018-06-28 20:14:18 +02:00
|
|
|
url = "{}/{}/{}/0/".format(self.root, self.user, category)
|
2017-09-12 16:19:00 +02:00
|
|
|
return [(url + folder["name"], folder) for folder in folders]
|
2017-07-12 17:05:31 +02:00
|
|
|
|
2019-11-26 23:29:46 +01:00
|
|
|
def _update_content_default(self, deviation, content):
|
|
|
|
content.update(self.api.deviation_download(deviation["deviationid"]))
|
|
|
|
|
|
|
|
def _update_content_image(self, deviation, content):
|
|
|
|
data = self.api.deviation_download(deviation["deviationid"])
|
|
|
|
url = data["src"].partition("?")[0]
|
|
|
|
mtype = mimetypes.guess_type(url, False)[0]
|
|
|
|
if mtype and mtype.startswith("image/"):
|
|
|
|
content.update(data)
|
2018-10-10 21:19:45 +02:00
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
|
2019-11-22 23:20:21 +01:00
|
|
|
class DeviantartUserExtractor(DeviantartExtractor):
|
2019-11-06 23:57:12 +01:00
|
|
|
"""Extractor for an artist's user profile"""
|
|
|
|
subcategory = "user"
|
|
|
|
pattern = BASE_PATTERN + r"/?$"
|
|
|
|
test = (
|
|
|
|
("https://www.deviantart.com/shimoda7", {
|
2019-11-22 23:20:21 +01:00
|
|
|
"pattern": r"/shimoda7/gallery$",
|
|
|
|
}),
|
|
|
|
("https://www.deviantart.com/shimoda7", {
|
|
|
|
"options": (("include", "all"),),
|
|
|
|
"pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)$",
|
2019-11-06 23:57:12 +01:00
|
|
|
"count": 4,
|
|
|
|
}),
|
|
|
|
("https://shimoda7.deviantart.com/"),
|
|
|
|
)
|
|
|
|
|
|
|
|
def items(self):
|
2019-11-22 23:20:21 +01:00
|
|
|
base = "{}/{}/".format(self.root, self.user)
|
|
|
|
return self._dispatch_extractors((
|
|
|
|
(DeviantartGalleryExtractor , base + "gallery"),
|
|
|
|
(DeviantartScrapsExtractor , base + "gallery/scraps"),
|
|
|
|
(DeviantartJournalExtractor , base + "posts"),
|
|
|
|
(DeviantartFavoriteExtractor, base + "favourites"),
|
|
|
|
), ("gallery",))
|
2019-11-06 23:57:12 +01:00
|
|
|
|
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
class DeviantartGalleryExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for all deviations from an artist's gallery"""
|
|
|
|
subcategory = "gallery"
|
2019-11-21 17:00:08 +01:00
|
|
|
archive_fmt = "g_{_username}_{index}.{extension}"
|
2019-11-06 23:57:12 +01:00
|
|
|
pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$"
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2018-06-28 20:14:18 +02:00
|
|
|
("https://www.deviantart.com/shimoda7/gallery/", {
|
2019-11-26 23:29:46 +01:00
|
|
|
"pattern": r"https://(api-da\.wixmp\.com/_api/download/file"
|
2019-10-09 20:24:07 +02:00
|
|
|
r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)",
|
2019-01-01 15:39:34 +01:00
|
|
|
"count": ">= 30",
|
|
|
|
"keyword": {
|
|
|
|
"allows_comments": bool,
|
|
|
|
"author": {
|
|
|
|
"type": "regular",
|
|
|
|
"usericon": str,
|
|
|
|
"userid": "9AE51FC7-0278-806C-3FFF-F4961ABF9E2B",
|
|
|
|
"username": "shimoda7",
|
|
|
|
},
|
|
|
|
"category_path": str,
|
|
|
|
"content": {
|
|
|
|
"filesize": int,
|
|
|
|
"height": int,
|
|
|
|
"src": str,
|
|
|
|
"transparency": bool,
|
|
|
|
"width": int,
|
|
|
|
},
|
|
|
|
"da_category": str,
|
2019-04-29 17:29:05 +02:00
|
|
|
"date": "type:datetime",
|
2019-01-01 15:39:34 +01:00
|
|
|
"deviationid": str,
|
|
|
|
"?download_filesize": int,
|
|
|
|
"extension": str,
|
2019-04-11 20:43:08 +02:00
|
|
|
"index": int,
|
2019-01-01 15:39:34 +01:00
|
|
|
"is_deleted": bool,
|
|
|
|
"is_downloadable": bool,
|
|
|
|
"is_favourited": bool,
|
|
|
|
"is_mature": bool,
|
|
|
|
"preview": {
|
|
|
|
"height": int,
|
|
|
|
"src": str,
|
|
|
|
"transparency": bool,
|
|
|
|
"width": int,
|
|
|
|
},
|
|
|
|
"published_time": int,
|
|
|
|
"stats": {
|
|
|
|
"comments": int,
|
|
|
|
"favourites": int,
|
|
|
|
},
|
|
|
|
"target": dict,
|
|
|
|
"thumbs": list,
|
|
|
|
"title": str,
|
|
|
|
"url": r"re:https://www.deviantart.com/shimoda7/art/[^/]+-\d+",
|
|
|
|
"username": "shimoda7",
|
|
|
|
},
|
2017-07-06 20:40:50 +02:00
|
|
|
}),
|
2019-06-17 19:49:50 +02:00
|
|
|
# group
|
2019-11-06 23:57:12 +01:00
|
|
|
("https://www.deviantart.com/yakuzafc/gallery", {
|
2019-01-01 15:39:34 +01:00
|
|
|
"pattern": r"https://www.deviantart.com/yakuzafc/gallery/0/",
|
|
|
|
"count": ">= 15",
|
2017-07-12 09:47:01 +02:00
|
|
|
}),
|
2019-06-17 19:49:50 +02:00
|
|
|
# 'folders' option (#276)
|
2019-11-06 23:57:12 +01:00
|
|
|
("https://www.deviantart.com/justatest235723/gallery", {
|
2019-08-30 18:47:06 +02:00
|
|
|
"count": 3,
|
2019-05-29 23:50:05 +02:00
|
|
|
"options": (("metadata", 1), ("folders", 1), ("original", 0)),
|
|
|
|
"keyword": {
|
|
|
|
"description": str,
|
|
|
|
"folders": list,
|
|
|
|
"is_watching": bool,
|
|
|
|
"license": str,
|
|
|
|
"tags": list,
|
|
|
|
},
|
|
|
|
}),
|
2018-06-28 20:14:18 +02:00
|
|
|
("https://www.deviantart.com/shimoda8/gallery/", {
|
2018-04-22 17:43:11 +02:00
|
|
|
"exception": exception.NotFoundError,
|
|
|
|
}),
|
2019-10-17 17:54:44 +02:00
|
|
|
|
|
|
|
("https://www.deviantart.com/shimoda7/gallery"),
|
|
|
|
("https://www.deviantart.com/shimoda7/gallery/all"),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://www.deviantart.com/shimoda7/gallery/?catpath=/"),
|
|
|
|
("https://shimoda7.deviantart.com/gallery/"),
|
2019-10-17 17:54:44 +02:00
|
|
|
("https://shimoda7.deviantart.com/gallery/all/"),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://shimoda7.deviantart.com/gallery/?catpath=/"),
|
|
|
|
)
|
2015-12-06 21:13:57 +01:00
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
def deviations(self):
|
2017-08-22 20:15:13 +02:00
|
|
|
if self.flat and not self.group:
|
2017-07-12 09:47:01 +02:00
|
|
|
return self.api.gallery_all(self.user, self.offset)
|
2019-06-17 19:49:50 +02:00
|
|
|
folders = self.api.gallery_folders(self.user)
|
|
|
|
return self._folder_urls(folders, "gallery")
|
2016-11-06 10:44:50 +01:00
|
|
|
|
|
|
|
|
2017-07-03 21:57:10 +02:00
|
|
|
class DeviantartFolderExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for deviations inside an artist's gallery folder"""
|
|
|
|
subcategory = "folder"
|
2019-11-18 22:09:58 +01:00
|
|
|
directory_fmt = ("{category}", "{username}", "{folder[title]}")
|
2018-03-08 14:18:28 +01:00
|
|
|
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
|
2019-02-08 13:45:40 +01:00
|
|
|
pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)"
|
|
|
|
test = (
|
2019-06-17 19:49:50 +02:00
|
|
|
# user
|
2018-06-28 20:14:18 +02:00
|
|
|
("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", {
|
2019-01-01 15:39:34 +01:00
|
|
|
"count": 5,
|
|
|
|
"options": (("original", False),),
|
2017-07-10 18:14:40 +02:00
|
|
|
}),
|
2019-06-17 19:49:50 +02:00
|
|
|
# group
|
2018-09-21 11:25:21 +02:00
|
|
|
("https://www.deviantart.com/yakuzafc/gallery/37412168/Crafts", {
|
2019-01-01 15:39:34 +01:00
|
|
|
"count": ">= 4",
|
2017-10-07 13:07:34 +02:00
|
|
|
"options": (("original", False),),
|
2017-07-10 18:14:40 +02:00
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://shimoda7.deviantart.com/gallery/722019/Miscellaneous"),
|
|
|
|
("https://yakuzafc.deviantart.com/gallery/37412168/Crafts"),
|
|
|
|
)
|
2017-07-03 21:57:10 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
2017-08-22 20:15:13 +02:00
|
|
|
DeviantartExtractor.__init__(self, match)
|
2019-11-18 22:09:58 +01:00
|
|
|
self.folder = None
|
|
|
|
self.folder_id = match.group(3)
|
|
|
|
self.folder_name = match.group(4)
|
2017-07-03 21:57:10 +02:00
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
folders = self.api.gallery_folders(self.user)
|
2019-11-18 22:09:58 +01:00
|
|
|
folder = self._find_folder(folders, self.folder_name)
|
|
|
|
self.folder = {
|
|
|
|
"title": folder["name"],
|
|
|
|
"uuid" : folder["folderid"],
|
|
|
|
"index": self.folder_id,
|
|
|
|
"owner": self.user,
|
|
|
|
}
|
2017-07-03 21:57:10 +02:00
|
|
|
return self.api.gallery(self.user, folder["folderid"], self.offset)
|
|
|
|
|
|
|
|
def prepare(self, deviation):
|
2017-07-10 18:14:40 +02:00
|
|
|
DeviantartExtractor.prepare(self, deviation)
|
2017-07-03 21:57:10 +02:00
|
|
|
deviation["folder"] = self.folder
|
|
|
|
|
|
|
|
|
2019-02-12 10:20:21 +01:00
|
|
|
class DeviantartStashExtractor(DeviantartExtractor):
|
2018-12-26 18:50:55 +01:00
|
|
|
"""Extractor for sta.sh-ed deviations"""
|
|
|
|
subcategory = "stash"
|
|
|
|
archive_fmt = "{index}.{extension}"
|
2019-02-12 10:20:21 +01:00
|
|
|
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2018-12-26 18:50:55 +01:00
|
|
|
("https://sta.sh/022c83odnaxc", {
|
2019-11-26 23:29:46 +01:00
|
|
|
"pattern": r"https://api-da\.wixmp\.com/_api/download/file",
|
|
|
|
"content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
|
2018-12-26 18:50:55 +01:00
|
|
|
"count": 1,
|
|
|
|
}),
|
2019-06-17 19:49:50 +02:00
|
|
|
# multiple stash items
|
2018-12-26 18:50:55 +01:00
|
|
|
("https://sta.sh/21jf51j7pzl2", {
|
2019-02-08 13:45:40 +01:00
|
|
|
"pattern": pattern,
|
2018-12-26 18:50:55 +01:00
|
|
|
"count": 4,
|
|
|
|
}),
|
2019-06-13 21:14:12 +02:00
|
|
|
# downloadable, but no "content" field (#307)
|
|
|
|
("https://sta.sh/024t4coz16mi", {
|
2019-11-26 23:29:46 +01:00
|
|
|
"pattern": r"https://api-da\.wixmp\.com/_api/download/file",
|
2019-06-13 21:14:12 +02:00
|
|
|
"count": 1,
|
|
|
|
}),
|
2018-12-26 18:50:55 +01:00
|
|
|
("https://sta.sh/abcdefghijkl", {
|
|
|
|
"exception": exception.HttpError,
|
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
)
|
2018-12-26 18:50:55 +01:00
|
|
|
|
2019-06-06 18:45:10 +02:00
|
|
|
skip = Extractor.skip
|
|
|
|
|
2019-02-12 10:20:21 +01:00
|
|
|
def __init__(self, match):
|
|
|
|
DeviantartExtractor.__init__(self, match)
|
2019-06-17 19:49:50 +02:00
|
|
|
self.user = None
|
2019-02-12 10:20:21 +01:00
|
|
|
self.stash_id = match.group(1)
|
|
|
|
|
2018-12-26 18:50:55 +01:00
|
|
|
def deviations(self):
|
2019-02-12 10:20:21 +01:00
|
|
|
url = "https://sta.sh/" + self.stash_id
|
|
|
|
page = self.request(url).text
|
2019-11-26 23:29:46 +01:00
|
|
|
deviation_id = text.extract(page, '//deviation/', '"')[0]
|
2018-12-26 18:50:55 +01:00
|
|
|
|
|
|
|
if deviation_id:
|
2019-11-26 23:29:46 +01:00
|
|
|
return (self.api.deviation(deviation_id),)
|
|
|
|
|
2018-12-26 18:50:55 +01:00
|
|
|
else:
|
2019-02-12 21:26:41 +01:00
|
|
|
data = {"_extractor": DeviantartStashExtractor}
|
2019-11-26 23:29:46 +01:00
|
|
|
page = text.extract(page, 'id="stash-body"', 'class="footer"')[0]
|
2019-10-10 18:36:47 +02:00
|
|
|
return [
|
|
|
|
(url, data)
|
|
|
|
for url in text.extract_iter(page, '<a href="', '"')
|
|
|
|
]
|
|
|
|
|
2018-12-26 18:50:55 +01:00
|
|
|
|
2017-04-20 13:20:41 +02:00
|
|
|
class DeviantartFavoriteExtractor(DeviantartExtractor):
|
2017-06-28 17:39:07 +02:00
|
|
|
"""Extractor for an artist's favorites"""
|
2017-04-20 13:20:41 +02:00
|
|
|
subcategory = "favorite"
|
2019-02-08 13:45:40 +01:00
|
|
|
directory_fmt = ("{category}", "{username}", "Favourites")
|
2019-11-21 17:00:08 +01:00
|
|
|
archive_fmt = "f_{_username}_{index}.{extension}"
|
2020-01-05 13:59:35 +01:00
|
|
|
pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2019-05-20 15:53:01 +02:00
|
|
|
("https://www.deviantart.com/h3813067/favourites/", {
|
|
|
|
"options": (("metadata", True), ("flat", False)), # issue #271
|
|
|
|
"count": 1,
|
|
|
|
}),
|
2018-06-28 20:14:18 +02:00
|
|
|
("https://www.deviantart.com/h3813067/favourites/", {
|
2017-04-03 18:23:13 +02:00
|
|
|
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
|
|
|
}),
|
2020-01-05 13:59:35 +01:00
|
|
|
("https://www.deviantart.com/h3813067/favourites/all"),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://www.deviantart.com/h3813067/favourites/?catpath=/"),
|
|
|
|
("https://h3813067.deviantart.com/favourites/"),
|
2020-01-05 13:59:35 +01:00
|
|
|
("https://h3813067.deviantart.com/favourites/all"),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://h3813067.deviantart.com/favourites/?catpath=/"),
|
|
|
|
)
|
2017-04-03 18:23:13 +02:00
|
|
|
|
2017-07-10 18:14:40 +02:00
|
|
|
def deviations(self):
|
2017-07-12 17:05:31 +02:00
|
|
|
folders = self.api.collections_folders(self.user)
|
|
|
|
if self.flat:
|
2018-07-16 18:14:41 +02:00
|
|
|
return itertools.chain.from_iterable(
|
2017-07-12 17:05:31 +02:00
|
|
|
self.api.collections(self.user, folder["folderid"])
|
|
|
|
for folder in folders
|
2018-07-16 18:14:41 +02:00
|
|
|
)
|
2019-06-17 19:49:50 +02:00
|
|
|
return self._folder_urls(folders, "favourites")
|
2017-07-10 18:14:40 +02:00
|
|
|
|
|
|
|
|
|
|
|
class DeviantartCollectionExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for a single favorite collection"""
|
|
|
|
subcategory = "collection"
|
2019-11-18 22:09:58 +01:00
|
|
|
directory_fmt = ("{category}", "{username}", "Favourites",
|
|
|
|
"{collection[title]}")
|
2018-03-08 14:18:28 +01:00
|
|
|
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
|
2019-02-08 13:45:40 +01:00
|
|
|
pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)"
|
|
|
|
test = (
|
2018-06-28 20:14:18 +02:00
|
|
|
(("https://www.deviantart.com/pencilshadings"
|
|
|
|
"/favourites/70595441/3D-Favorites"), {
|
2019-01-01 15:39:34 +01:00
|
|
|
"count": ">= 20",
|
2018-06-28 20:14:18 +02:00
|
|
|
"options": (("original", False),),
|
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://pencilshadings.deviantart.com"
|
|
|
|
"/favourites/70595441/3D-Favorites"),
|
|
|
|
)
|
2017-07-10 18:14:40 +02:00
|
|
|
|
2017-04-03 18:23:13 +02:00
|
|
|
def __init__(self, match):
|
2017-08-22 20:15:13 +02:00
|
|
|
DeviantartExtractor.__init__(self, match)
|
2019-11-18 22:09:58 +01:00
|
|
|
self.collection = None
|
|
|
|
self.collection_id = match.group(3)
|
|
|
|
self.collection_name = match.group(4)
|
2017-04-03 18:23:13 +02:00
|
|
|
|
|
|
|
def deviations(self):
|
2017-07-03 21:57:10 +02:00
|
|
|
folders = self.api.collections_folders(self.user)
|
2019-11-18 22:09:58 +01:00
|
|
|
folder = self._find_folder(folders, self.collection_name)
|
|
|
|
self.collection = {
|
|
|
|
"title": folder["name"],
|
|
|
|
"uuid" : folder["folderid"],
|
|
|
|
"index": self.collection_id,
|
|
|
|
"owner": self.user,
|
|
|
|
}
|
2017-07-03 21:57:10 +02:00
|
|
|
return self.api.collections(self.user, folder["folderid"], self.offset)
|
2017-04-03 18:23:13 +02:00
|
|
|
|
|
|
|
def prepare(self, deviation):
|
2017-07-10 18:14:40 +02:00
|
|
|
DeviantartExtractor.prepare(self, deviation)
|
2017-04-03 18:23:13 +02:00
|
|
|
deviation["collection"] = self.collection
|
|
|
|
|
|
|
|
|
2017-05-10 17:21:33 +02:00
|
|
|
class DeviantartJournalExtractor(DeviantartExtractor):
|
2017-06-28 17:39:07 +02:00
|
|
|
"""Extractor for an artist's journals"""
|
2017-05-10 17:21:33 +02:00
|
|
|
subcategory = "journal"
|
2019-02-08 13:45:40 +01:00
|
|
|
directory_fmt = ("{category}", "{username}", "Journal")
|
2019-11-21 17:00:08 +01:00
|
|
|
archive_fmt = "j_{_username}_{index}.{extension}"
|
2019-08-30 18:47:06 +02:00
|
|
|
pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2019-08-30 18:47:06 +02:00
|
|
|
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
|
2018-06-28 20:14:18 +02:00
|
|
|
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
|
2017-07-06 20:40:50 +02:00
|
|
|
}),
|
2019-08-30 18:47:06 +02:00
|
|
|
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
|
2018-07-16 18:14:41 +02:00
|
|
|
"url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e",
|
|
|
|
"options": (("journals", "text"),),
|
|
|
|
}),
|
2019-08-30 18:47:06 +02:00
|
|
|
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
|
2018-07-16 18:14:41 +02:00
|
|
|
"count": 0,
|
|
|
|
"options": (("journals", "none"),),
|
|
|
|
}),
|
2019-08-30 18:47:06 +02:00
|
|
|
("https://www.deviantart.com/shimoda7/posts/"),
|
|
|
|
("https://www.deviantart.com/shimoda7/journal/"),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://www.deviantart.com/shimoda7/journal/?catpath=/"),
|
2019-02-28 22:34:04 +01:00
|
|
|
("https://shimoda7.deviantart.com/journal/"),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://shimoda7.deviantart.com/journal/?catpath=/"),
|
|
|
|
)
|
2017-05-10 17:21:33 +02:00
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
return self.api.browse_user_journals(self.user, self.offset)
|
|
|
|
|
|
|
|
|
2018-05-08 18:10:50 +02:00
|
|
|
class DeviantartPopularExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for popular deviations"""
|
|
|
|
subcategory = "popular"
|
2019-02-08 13:45:40 +01:00
|
|
|
directory_fmt = ("{category}", "Popular",
|
|
|
|
"{popular[range]}", "{popular[search]}")
|
2018-05-08 18:10:50 +02:00
|
|
|
archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}"
|
2020-01-05 17:32:06 +01:00
|
|
|
pattern = (r"(?:https?://)?www\.deviantart\.com/(?:"
|
|
|
|
r"search(?:/deviations)?"
|
|
|
|
r"|(?:deviations/?)?\?order=(popular-[^/?&#]+)"
|
|
|
|
r"|((?:[\w-]+/)*)(popular-[^/?&#]+)"
|
|
|
|
r")/?(?:\?([^#]*))?")
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2020-01-05 17:32:06 +01:00
|
|
|
("https://www.deviantart.com/?order=popular-all-time", {
|
|
|
|
"options": (("original", False),),
|
|
|
|
"range": "1-30",
|
|
|
|
"count": 30,
|
|
|
|
}),
|
2018-08-29 15:59:58 +02:00
|
|
|
("https://www.deviantart.com/popular-24-hours/?q=tree+house", {
|
2018-05-08 18:10:50 +02:00
|
|
|
"options": (("original", False),),
|
|
|
|
}),
|
2020-01-05 17:32:06 +01:00
|
|
|
("https://www.deviantart.com/search?q=tree"),
|
|
|
|
("https://www.deviantart.com/search/deviations?order=popular-1-week"),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://www.deviantart.com/artisan/popular-all-time/?q=tree"),
|
|
|
|
)
|
2018-05-08 18:10:50 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
2019-02-11 13:31:10 +01:00
|
|
|
DeviantartExtractor.__init__(self, match)
|
2018-05-08 18:10:50 +02:00
|
|
|
self.search_term = self.time_range = self.category_path = None
|
2019-02-11 13:31:10 +01:00
|
|
|
self.user = ""
|
2018-05-08 18:10:50 +02:00
|
|
|
|
2020-01-05 17:32:06 +01:00
|
|
|
trange1, path, trange2, query = match.groups()
|
|
|
|
trange = trange1 or trange2
|
|
|
|
query = text.parse_query(query)
|
|
|
|
|
|
|
|
if not trange:
|
|
|
|
trange = query.get("order")
|
|
|
|
|
2018-05-08 18:10:50 +02:00
|
|
|
if path:
|
2020-01-05 17:32:06 +01:00
|
|
|
self.category_path = path.strip("/")
|
2018-05-08 18:10:50 +02:00
|
|
|
if trange:
|
2020-01-05 17:32:06 +01:00
|
|
|
trange = trange[8:] if trange.startswith("popular-") else ""
|
2018-05-08 18:10:50 +02:00
|
|
|
self.time_range = trange.replace("-", "").replace("hours", "hr")
|
|
|
|
if query:
|
2020-01-05 17:32:06 +01:00
|
|
|
self.search_term = query.get("q")
|
2018-05-08 18:10:50 +02:00
|
|
|
|
|
|
|
self.popular = {
|
|
|
|
"search": self.search_term or "",
|
|
|
|
"range": trange or "24-hours",
|
|
|
|
"path": self.category_path,
|
|
|
|
}
|
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
return self.api.browse_popular(
|
|
|
|
self.search_term, self.time_range, self.category_path, self.offset)
|
|
|
|
|
|
|
|
def prepare(self, deviation):
|
|
|
|
DeviantartExtractor.prepare(self, deviation)
|
|
|
|
deviation["popular"] = self.popular
|
|
|
|
|
|
|
|
|
2019-08-30 18:47:06 +02:00
|
|
|
class DeviantartExtractorV2(DeviantartExtractor):
|
2019-08-21 23:47:17 +02:00
|
|
|
"""Base class for deviantart extractors using the NAPI"""
|
2019-12-07 22:39:30 +01:00
|
|
|
cookiedomain = ".deviantart.com"
|
|
|
|
cookienames = ("auth", "auth_secure", "userinfo")
|
|
|
|
_warning = True
|
2019-08-21 23:47:17 +02:00
|
|
|
|
|
|
|
def items(self):
|
2019-12-11 21:58:43 +01:00
|
|
|
if self.original and not self._check_cookies(self.cookienames):
|
2019-12-07 22:39:30 +01:00
|
|
|
self.original = False
|
|
|
|
if self._warning:
|
|
|
|
DeviantartExtractorV2._warning = False
|
|
|
|
self.log.warning("No session cookies set: "
|
|
|
|
"Disabling original file downloads.")
|
|
|
|
|
2019-08-21 23:47:17 +02:00
|
|
|
yield Message.Version, 1
|
2019-08-23 12:20:25 +02:00
|
|
|
for deviation in self.deviations():
|
2019-10-09 20:24:07 +02:00
|
|
|
data = self.api.deviation_extended_fetch(
|
|
|
|
deviation["deviationId"],
|
|
|
|
deviation["author"]["username"],
|
|
|
|
"journal" if deviation["isJournal"] else "art",
|
|
|
|
)
|
2019-08-23 12:20:25 +02:00
|
|
|
|
2019-08-21 23:47:17 +02:00
|
|
|
if "deviation" not in data:
|
2019-11-26 23:29:46 +01:00
|
|
|
self.log.warning("Unable to fetch deviation ID %s",
|
|
|
|
deviation["deviationId"])
|
2019-10-09 20:24:07 +02:00
|
|
|
self.log.debug("Server response: %s", data)
|
2019-08-21 23:47:17 +02:00
|
|
|
continue
|
2019-12-07 22:07:55 +01:00
|
|
|
|
2019-08-23 12:20:25 +02:00
|
|
|
deviation = self._extract(data)
|
2019-12-07 22:07:55 +01:00
|
|
|
if not deviation:
|
|
|
|
continue
|
2019-08-21 23:47:17 +02:00
|
|
|
|
|
|
|
yield Message.Directory, deviation
|
2019-08-23 12:20:25 +02:00
|
|
|
yield Message.Url, deviation["target"]["src"], deviation
|
2019-08-21 23:47:17 +02:00
|
|
|
if self.extra:
|
|
|
|
for match in DeviantartStashExtractor.pattern.finditer(
|
|
|
|
deviation["description"]):
|
|
|
|
deviation["_extractor"] = DeviantartStashExtractor
|
|
|
|
yield Message.Queue, match.group(0), deviation
|
|
|
|
|
2019-08-23 12:20:25 +02:00
|
|
|
def _extract(self, data):
|
|
|
|
deviation = data["deviation"]
|
|
|
|
extended = deviation["extended"]
|
2019-12-07 22:07:55 +01:00
|
|
|
media = deviation["media"]
|
2019-08-23 12:20:25 +02:00
|
|
|
del deviation["extended"]
|
2019-12-07 22:07:55 +01:00
|
|
|
del deviation["media"]
|
2019-08-23 12:20:25 +02:00
|
|
|
|
|
|
|
# prepare deviation metadata
|
|
|
|
deviation["description"] = extended.get("description", "")
|
2019-11-18 22:09:58 +01:00
|
|
|
deviation["username"] = deviation["author"]["username"]
|
2019-11-21 17:00:08 +01:00
|
|
|
deviation["_username"] = deviation["username"].lower()
|
2019-08-23 12:20:25 +02:00
|
|
|
deviation["stats"] = extended["stats"]
|
|
|
|
deviation["stats"]["comments"] = data["comments"]["total"]
|
|
|
|
deviation["index"] = deviation["deviationId"]
|
|
|
|
deviation["tags"] = [t["name"] for t in extended.get("tags") or ()]
|
|
|
|
deviation["date"] = text.parse_datetime(
|
|
|
|
deviation["publishedTime"])
|
|
|
|
deviation["category_path"] = "/".join(
|
|
|
|
extended[key]["displayNameEn"]
|
|
|
|
for key in ("typeFacet", "contentFacet", "categoryFacet")
|
|
|
|
if key in extended
|
|
|
|
)
|
|
|
|
|
|
|
|
# extract download target
|
2019-12-07 22:07:55 +01:00
|
|
|
target = media["types"][-1]
|
|
|
|
src = token = None
|
2019-08-23 12:20:25 +02:00
|
|
|
|
2019-12-07 22:07:55 +01:00
|
|
|
if "textContent" in deviation:
|
|
|
|
if not self.commit_journal:
|
|
|
|
return None
|
2019-08-30 18:47:06 +02:00
|
|
|
journal = deviation["textContent"]
|
|
|
|
journal["html"] = journal["html"]["markup"]
|
2019-12-07 22:07:55 +01:00
|
|
|
src = self.commit_journal(deviation, journal)[1]
|
|
|
|
|
|
|
|
elif target["t"] == "gif":
|
|
|
|
src = target["b"]
|
|
|
|
token = media["token"][0]
|
|
|
|
|
|
|
|
elif "download" in extended and self.original:
|
2019-08-23 12:20:25 +02:00
|
|
|
target = extended["download"]
|
2019-12-07 22:07:55 +01:00
|
|
|
src = target["url"]
|
2019-08-23 12:20:25 +02:00
|
|
|
del target["url"]
|
2019-12-07 22:07:55 +01:00
|
|
|
|
|
|
|
elif target["t"] == "video":
|
|
|
|
# select largest video
|
|
|
|
target = max(media["types"],
|
|
|
|
key=lambda x: text.parse_int(x.get("q", "")[:-1]))
|
2020-01-30 17:55:12 +01:00
|
|
|
src = target["b"]
|
2019-12-07 22:07:55 +01:00
|
|
|
|
|
|
|
elif target["t"] == "flash":
|
|
|
|
src = target["s"]
|
|
|
|
if src.startswith("https://sandbox.deviantart.com"):
|
|
|
|
# extract SWF file from "sandbox"
|
|
|
|
src = text.extract(
|
|
|
|
self.request(src).text, 'id="sandboxembed" src="', '"')[0]
|
|
|
|
|
|
|
|
else:
|
|
|
|
src = media["baseUri"]
|
2019-12-07 23:38:36 +01:00
|
|
|
if "token" in media:
|
|
|
|
token = media["token"][0]
|
2019-12-07 22:07:55 +01:00
|
|
|
|
|
|
|
if "c" in target:
|
|
|
|
src += "/" + target["c"].replace(
|
|
|
|
"<prettyName>", media["prettyName"])
|
|
|
|
if src.startswith("https://images-wixmp-"):
|
|
|
|
if deviation["index"] <= 790677560:
|
|
|
|
# https://github.com/r888888888/danbooru/issues/4069
|
|
|
|
src = re.sub(
|
|
|
|
r"(/f/[^/]+/[^/]+)/v\d+/.*", r"/intermediary\1", src)
|
|
|
|
if self.quality:
|
|
|
|
src = re.sub(r"q_\d+", self.quality, src)
|
2019-08-23 12:20:25 +02:00
|
|
|
|
2019-08-29 10:09:21 +02:00
|
|
|
# filename and extension metadata
|
|
|
|
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
|
|
|
|
sub = re.compile(r"\W").sub
|
2019-12-07 22:07:55 +01:00
|
|
|
deviation["filename"] = "".join((
|
2019-08-29 10:09:21 +02:00
|
|
|
sub("_", deviation["title"].lower()), "_by_",
|
|
|
|
sub("_", deviation["author"]["username"].lower()), "-d",
|
|
|
|
util.bencode(deviation["index"], alphabet),
|
|
|
|
))
|
2019-08-30 18:47:06 +02:00
|
|
|
if "extension" not in deviation:
|
2019-12-07 22:07:55 +01:00
|
|
|
deviation["extension"] = text.ext_from_url(src)
|
2019-08-29 10:09:21 +02:00
|
|
|
|
2019-12-07 22:07:55 +01:00
|
|
|
if token:
|
|
|
|
src = src + "?token=" + token
|
|
|
|
target["src"] = src
|
|
|
|
deviation["target"] = target
|
2019-08-23 12:20:25 +02:00
|
|
|
return deviation
|
|
|
|
|
2019-12-27 21:27:39 +01:00
|
|
|
def _pagination(self, url, params, headers=None):
|
|
|
|
while True:
|
|
|
|
data = self.request(url, params=params, headers=headers).json()
|
|
|
|
yield from data["results"]
|
|
|
|
|
|
|
|
if not data["hasMore"]:
|
|
|
|
return
|
|
|
|
params["offset"] = data["nextOffset"]
|
|
|
|
|
2019-08-21 23:47:17 +02:00
|
|
|
|
|
|
|
class DeviantartDeviationExtractor(DeviantartExtractorV2):
|
|
|
|
"""Extractor for single deviations"""
|
|
|
|
subcategory = "deviation"
|
|
|
|
archive_fmt = "{index}.{extension}"
|
|
|
|
pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?&#]+-)?(\d+)"
|
|
|
|
test = (
|
2019-08-23 12:20:25 +02:00
|
|
|
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
|
2019-08-21 23:47:17 +02:00
|
|
|
"options": (("original", 0),),
|
2019-11-26 23:29:46 +01:00
|
|
|
# "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
2019-08-21 23:47:17 +02:00
|
|
|
}),
|
|
|
|
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
|
|
|
|
"count": 0,
|
|
|
|
}),
|
2019-08-23 12:20:25 +02:00
|
|
|
(("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
|
2019-12-07 23:38:36 +01:00
|
|
|
# "pattern": (r"https://www.deviantart.com/download/261986576"
|
|
|
|
# r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
|
|
|
|
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
|
|
|
|
r"/intermediary/f/[^/]+/[^.]+\.jpg")
|
2019-08-21 23:47:17 +02:00
|
|
|
}),
|
|
|
|
# wixmp URL rewrite
|
2019-08-23 12:20:25 +02:00
|
|
|
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
|
2019-08-21 23:47:17 +02:00
|
|
|
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
|
2019-12-07 22:07:55 +01:00
|
|
|
r"/intermediary/f/[^/]+/[^.]+\.jpg")
|
2019-08-21 23:47:17 +02:00
|
|
|
}),
|
|
|
|
# wixmp URL rewrite v2 (#369)
|
2019-08-23 12:20:25 +02:00
|
|
|
(("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
|
2019-08-21 23:47:17 +02:00
|
|
|
"pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
|
|
|
|
}),
|
|
|
|
# non-download URL for GIFs (#242)
|
2019-08-23 12:20:25 +02:00
|
|
|
(("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
|
2019-08-21 23:47:17 +02:00
|
|
|
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
|
|
|
|
r"/f/[^/]+/[^.]+\.gif\?token="),
|
|
|
|
}),
|
|
|
|
# external URLs from description (#302)
|
2019-08-23 12:20:25 +02:00
|
|
|
(("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
|
2019-08-21 23:47:17 +02:00
|
|
|
"options": (("extra", 1), ("original", 0)),
|
|
|
|
"pattern": r"https?://sta\.sh/\w+$",
|
|
|
|
"range": "2-",
|
|
|
|
"count": 4,
|
|
|
|
}),
|
2019-08-23 12:20:25 +02:00
|
|
|
# video
|
|
|
|
("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
|
2020-01-30 17:55:12 +01:00
|
|
|
"pattern": r"https://wixmp-.+wixmp.com/v/mp4/.+\.720p\.\w+.mp4",
|
2019-08-23 12:20:25 +02:00
|
|
|
"keyword": {
|
2019-12-07 22:07:55 +01:00
|
|
|
"filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
|
|
|
|
"extension": "mp4",
|
2019-08-23 12:20:25 +02:00
|
|
|
"target": {
|
2019-12-07 22:07:55 +01:00
|
|
|
"d": 306,
|
2020-01-30 17:55:12 +01:00
|
|
|
"f": 19367585,
|
|
|
|
"h": 720,
|
|
|
|
"q": "720p",
|
2019-12-07 22:07:55 +01:00
|
|
|
"t": "video",
|
2020-01-30 17:55:12 +01:00
|
|
|
"w": 1364,
|
2019-08-23 12:20:25 +02:00
|
|
|
"src": str,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}),
|
|
|
|
# archive
|
|
|
|
("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", {
|
2019-12-07 23:38:36 +01:00
|
|
|
# "pattern": r"https://.+deviantart.com/download/763300948/.*rar",
|
|
|
|
"pattern": r"https://images-wixmp-\w+\.wixmp\.com/i/.*\.png"
|
2019-08-23 12:20:25 +02:00
|
|
|
}),
|
|
|
|
# swf
|
|
|
|
("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", {
|
|
|
|
"pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf",
|
|
|
|
}),
|
2019-09-14 22:31:57 +02:00
|
|
|
# journal
|
|
|
|
("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", {
|
|
|
|
"url": "f33f8127ab71819be7de849175b6d5f8b37bb629",
|
|
|
|
"pattern": "text:<!DOCTYPE html>\n",
|
|
|
|
}),
|
|
|
|
# journal-like post with isJournal == False (#419)
|
|
|
|
("https://www.deviantart.com/gliitchlord/art/brashstrokes-812942668", {
|
|
|
|
"url": "1534d6ea0561247ab921d07505e57a9d663a833b",
|
|
|
|
"pattern": "text:<!DOCTYPE html>\n",
|
|
|
|
}),
|
2019-08-21 23:47:17 +02:00
|
|
|
# old-style URLs
|
|
|
|
("https://shimoda7.deviantart.com"
|
|
|
|
"/art/For-the-sake-of-a-memory-10073852"),
|
|
|
|
("https://myria-moon.deviantart.com"
|
|
|
|
"/art/Aime-Moi-part-en-vadrouille-261986576"),
|
|
|
|
("https://zzz.deviantart.com/art/zzz-1234567890"),
|
|
|
|
)
|
|
|
|
|
|
|
|
skip = Extractor.skip
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
DeviantartExtractorV2.__init__(self, match)
|
|
|
|
self.type = match.group(3)
|
|
|
|
self.deviation_id = match.group(4)
|
|
|
|
|
|
|
|
def deviations(self):
|
2019-08-23 12:20:25 +02:00
|
|
|
return ({
|
|
|
|
"deviationId": self.deviation_id,
|
|
|
|
"author" : {"username": self.user},
|
|
|
|
"isJournal" : self.type == "journal",
|
|
|
|
},)
|
2019-08-21 23:47:17 +02:00
|
|
|
|
|
|
|
|
|
|
|
class DeviantartScrapsExtractor(DeviantartExtractorV2):
|
|
|
|
"""Extractor for an artist's scraps"""
|
|
|
|
subcategory = "scraps"
|
|
|
|
directory_fmt = ("{category}", "{username}", "Scraps")
|
2019-11-21 17:00:08 +01:00
|
|
|
archive_fmt = "s_{_username}_{index}.{extension}"
|
2019-08-21 23:47:17 +02:00
|
|
|
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
|
|
|
|
test = (
|
|
|
|
("https://www.deviantart.com/shimoda7/gallery/scraps", {
|
|
|
|
"count": 12,
|
|
|
|
}),
|
|
|
|
("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
|
|
|
|
("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
|
|
|
|
)
|
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
url = self.root + "/_napi/da-user-profile/api/gallery/contents"
|
|
|
|
params = {
|
|
|
|
"username" : self.user,
|
|
|
|
"offset" : self.offset,
|
|
|
|
"limit" : "24",
|
|
|
|
"scraps_folder": "true",
|
|
|
|
}
|
|
|
|
headers = {
|
|
|
|
"Referer": "{}/{}/gallery/scraps".format(self.root, self.user),
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:27:39 +01:00
|
|
|
for obj in self._pagination(url, params, headers):
|
|
|
|
yield obj["deviation"]
|
2019-08-21 23:47:17 +02:00
|
|
|
|
|
|
|
|
2019-12-27 21:27:39 +01:00
|
|
|
class DeviantartFollowingExtractor(DeviantartExtractorV2):
|
|
|
|
subcategory = "following"
|
|
|
|
pattern = BASE_PATTERN + "/about#watching$"
|
|
|
|
test = ("https://www.deviantart.com/shimoda7/about#watching", {
|
|
|
|
"pattern": DeviantartUserExtractor.pattern,
|
|
|
|
"range": "1-50",
|
|
|
|
"count": 50,
|
|
|
|
})
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
url = "{}/_napi/da-user-profile/api/module/watching".format(self.root)
|
|
|
|
params = {
|
|
|
|
"username": self.user,
|
|
|
|
"moduleid": self._module_id(self.user),
|
|
|
|
"offset" : "0",
|
|
|
|
"limit" : "24",
|
|
|
|
}
|
|
|
|
|
|
|
|
yield Message.Version, 1
|
|
|
|
for user in self._pagination(url, params):
|
|
|
|
url = "{}/{}".format(self.root, user["username"])
|
|
|
|
yield Message.Queue, url, user
|
|
|
|
|
|
|
|
def _module_id(self, username):
|
|
|
|
url = "{}/{}/about".format(self.root, username)
|
|
|
|
page = self.request(url).text
|
|
|
|
pos = page.find('\\"type\\":\\"watching\\"')
|
|
|
|
if pos < 0:
|
|
|
|
raise exception.NotFoundError("module")
|
|
|
|
return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ')
|
2019-08-21 23:47:17 +02:00
|
|
|
|
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
class DeviantartAPI():
|
2019-03-21 14:46:47 +01:00
|
|
|
"""Minimal interface for the DeviantArt API
|
|
|
|
|
|
|
|
Ref: https://www.deviantart.com/developers/http/v1/20160316
|
|
|
|
"""
|
2017-12-18 00:12:08 +01:00
|
|
|
CLIENT_ID = "5388"
|
|
|
|
CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
|
|
|
|
|
|
|
|
def __init__(self, extractor):
|
2018-12-22 14:40:35 +01:00
|
|
|
self.extractor = extractor
|
2017-03-08 16:40:20 +01:00
|
|
|
self.log = extractor.log
|
2017-12-18 00:12:08 +01:00
|
|
|
self.headers = {}
|
2018-07-14 11:52:21 +02:00
|
|
|
|
|
|
|
delay = extractor.config("wait-min", 0)
|
|
|
|
self.delay = math.ceil(math.log2(delay)) if delay >= 1 else -1
|
|
|
|
self.delay_min = max(2, self.delay)
|
2017-12-18 00:12:08 +01:00
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
self.mature = extractor.config("mature", "true")
|
2017-05-06 21:26:27 +02:00
|
|
|
if not isinstance(self.mature, str):
|
|
|
|
self.mature = "true" if self.mature else "false"
|
2019-06-17 19:49:50 +02:00
|
|
|
|
2019-05-29 23:50:05 +02:00
|
|
|
self.folders = extractor.config("folders", False)
|
2019-06-17 19:49:50 +02:00
|
|
|
self.metadata = extractor.extra or extractor.config("metadata", False)
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2017-12-18 00:12:08 +01:00
|
|
|
self.refresh_token = extractor.config("refresh-token")
|
|
|
|
self.client_id = extractor.config("client-id", self.CLIENT_ID)
|
|
|
|
self.client_secret = extractor.config(
|
|
|
|
"client-secret", self.CLIENT_SECRET)
|
|
|
|
|
2019-09-25 21:20:55 +02:00
|
|
|
self.log.debug(
|
|
|
|
"Using %s API credentials (client-id %s)",
|
|
|
|
"default" if self.client_id == self.CLIENT_ID else "custom",
|
|
|
|
self.client_id,
|
|
|
|
)
|
|
|
|
|
2018-05-08 18:10:50 +02:00
|
|
|
def browse_popular(self, query=None, timerange=None,
|
|
|
|
category_path=None, offset=0):
|
|
|
|
"""Yield popular deviations"""
|
|
|
|
endpoint = "browse/popular"
|
|
|
|
params = {"q": query, "offset": offset, "limit": 120,
|
|
|
|
"timerange": timerange, "category_path": category_path,
|
|
|
|
"mature_content": self.mature}
|
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
2017-05-10 17:21:33 +02:00
|
|
|
def browse_user_journals(self, username, offset=0):
|
|
|
|
"""Yield all journal entries of a specific user"""
|
|
|
|
endpoint = "browse/user/journals"
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 50,
|
2017-05-10 17:21:33 +02:00
|
|
|
"mature_content": self.mature, "featured": "false"}
|
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
def collections(self, username, folder_id, offset=0):
|
|
|
|
"""Yield all Deviation-objects contained in a collection folder"""
|
|
|
|
endpoint = "collections/" + folder_id
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 24,
|
2017-05-06 21:26:27 +02:00
|
|
|
"mature_content": self.mature}
|
2017-04-03 14:56:47 +02:00
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
2017-07-12 09:47:01 +02:00
|
|
|
@memcache(keyarg=1)
|
2017-04-03 14:56:47 +02:00
|
|
|
def collections_folders(self, username, offset=0):
|
|
|
|
"""Yield all collection folders of a specific user"""
|
|
|
|
endpoint = "collections/folders"
|
2017-07-03 21:57:10 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 50,
|
2017-05-06 21:26:27 +02:00
|
|
|
"mature_content": self.mature}
|
2019-05-20 15:53:01 +02:00
|
|
|
return self._pagination_folders(endpoint, params)
|
2017-04-03 14:56:47 +02:00
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
def deviation(self, deviation_id):
|
|
|
|
"""Query and return info about a single Deviation"""
|
|
|
|
endpoint = "deviation/" + deviation_id
|
2019-03-21 14:46:47 +01:00
|
|
|
deviation = self._call(endpoint)
|
2019-05-29 23:50:05 +02:00
|
|
|
if self.metadata:
|
|
|
|
self._metadata((deviation,))
|
|
|
|
if self.folders:
|
|
|
|
self._folders((deviation,))
|
|
|
|
return deviation
|
2017-05-10 16:45:45 +02:00
|
|
|
|
|
|
|
def deviation_content(self, deviation_id):
|
2017-05-13 15:34:20 +02:00
|
|
|
"""Get extended content of a single Deviation"""
|
2017-05-10 16:45:45 +02:00
|
|
|
endpoint = "deviation/content"
|
|
|
|
params = {"deviationid": deviation_id}
|
|
|
|
return self._call(endpoint, params)
|
|
|
|
|
2017-10-07 13:07:34 +02:00
|
|
|
def deviation_download(self, deviation_id):
|
|
|
|
"""Get the original file download (if allowed)"""
|
|
|
|
endpoint = "deviation/download/" + deviation_id
|
|
|
|
params = {"mature_content": self.mature}
|
|
|
|
return self._call(endpoint, params)
|
|
|
|
|
2019-10-09 20:24:07 +02:00
|
|
|
def deviation_extended_fetch(self, deviation_id, user, kind):
|
|
|
|
url = ("https://www.deviantart.com/_napi/da-browse/shared_api"
|
|
|
|
"/deviation/extended_fetch")
|
|
|
|
headers = {"Referer": "https://www.deviantart.com/"}
|
|
|
|
params = {
|
|
|
|
"deviationid" : deviation_id,
|
|
|
|
"username" : user,
|
|
|
|
"type" : kind,
|
|
|
|
"include_session": "false",
|
|
|
|
}
|
2020-01-30 15:25:33 +01:00
|
|
|
response = self.extractor.request(
|
|
|
|
url, headers=headers, params=params, fatal=None)
|
|
|
|
if response.status_code == 404:
|
|
|
|
raise exception.StopExtraction(
|
|
|
|
"Your account must use the Eclipse interface.")
|
|
|
|
return response.json()
|
2019-10-09 20:24:07 +02:00
|
|
|
|
2019-03-21 14:46:47 +01:00
|
|
|
def deviation_metadata(self, deviations):
|
|
|
|
""" Fetch deviation metadata for a set of deviations"""
|
2019-09-09 22:39:07 +02:00
|
|
|
if not deviations:
|
|
|
|
return []
|
2019-03-21 14:46:47 +01:00
|
|
|
endpoint = "deviation/metadata?" + "&".join(
|
|
|
|
"deviationids[{}]={}".format(num, deviation["deviationid"])
|
|
|
|
for num, deviation in enumerate(deviations)
|
|
|
|
)
|
|
|
|
params = {"mature_content": self.mature}
|
|
|
|
return self._call(endpoint, params)["metadata"]
|
|
|
|
|
2019-05-29 23:50:05 +02:00
|
|
|
def gallery(self, username, folder_id="", offset=0, extend=True):
|
2017-07-03 21:57:10 +02:00
|
|
|
"""Yield all Deviation-objects contained in a gallery folder"""
|
|
|
|
endpoint = "gallery/" + folder_id
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 24,
|
2017-07-03 21:57:10 +02:00
|
|
|
"mature_content": self.mature, "mode": "newest"}
|
2019-05-29 23:50:05 +02:00
|
|
|
return self._pagination(endpoint, params, extend)
|
2017-07-03 21:57:10 +02:00
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
def gallery_all(self, username, offset=0):
|
|
|
|
"""Yield all Deviation-objects of a specific user"""
|
|
|
|
endpoint = "gallery/all"
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 24,
|
2017-05-06 21:26:27 +02:00
|
|
|
"mature_content": self.mature}
|
2017-04-03 14:56:47 +02:00
|
|
|
return self._pagination(endpoint, params)
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2017-07-12 09:47:01 +02:00
|
|
|
@memcache(keyarg=1)
|
2017-07-03 21:57:10 +02:00
|
|
|
def gallery_folders(self, username, offset=0):
|
|
|
|
"""Yield all gallery folders of a specific user"""
|
|
|
|
endpoint = "gallery/folders"
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 50,
|
2017-07-03 21:57:10 +02:00
|
|
|
"mature_content": self.mature}
|
2019-05-20 15:53:01 +02:00
|
|
|
return self._pagination_folders(endpoint, params)
|
2017-07-12 09:47:01 +02:00
|
|
|
|
2017-08-22 20:15:13 +02:00
|
|
|
@memcache(keyarg=1)
|
2017-07-12 09:47:01 +02:00
|
|
|
def user_profile(self, username):
|
|
|
|
"""Get user profile information"""
|
|
|
|
endpoint = "user/profile/" + username
|
2019-07-04 23:45:26 +02:00
|
|
|
return self._call(endpoint, fatal=False)
|
2017-07-03 21:57:10 +02:00
|
|
|
|
2018-07-23 21:40:59 +02:00
|
|
|
def authenticate(self, refresh_token):
|
2017-04-03 14:56:47 +02:00
|
|
|
"""Authenticate the application by requesting an access token"""
|
2018-12-22 14:40:35 +01:00
|
|
|
self.headers["Authorization"] = self._authenticate_impl(refresh_token)
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2019-03-14 22:21:49 +01:00
|
|
|
@cache(maxage=3600, keyarg=1)
|
2017-12-18 00:12:08 +01:00
|
|
|
def _authenticate_impl(self, refresh_token):
|
2017-03-08 16:40:20 +01:00
|
|
|
"""Actual authenticate implementation"""
|
2017-01-12 21:08:49 +01:00
|
|
|
url = "https://www.deviantart.com/oauth2/token"
|
2017-12-18 00:12:08 +01:00
|
|
|
if refresh_token:
|
2018-07-24 20:10:33 +02:00
|
|
|
self.log.info("Refreshing private access token")
|
2017-12-18 00:12:08 +01:00
|
|
|
data = {"grant_type": "refresh_token",
|
2017-12-18 13:14:24 +01:00
|
|
|
"refresh_token": _refresh_token_cache(refresh_token)}
|
2017-12-18 00:12:08 +01:00
|
|
|
else:
|
|
|
|
self.log.info("Requesting public access token")
|
|
|
|
data = {"grant_type": "client_credentials"}
|
2017-12-18 13:14:24 +01:00
|
|
|
|
2017-12-18 00:12:08 +01:00
|
|
|
auth = (self.client_id, self.client_secret)
|
2018-12-22 14:40:35 +01:00
|
|
|
response = self.extractor.request(
|
2019-10-13 22:48:01 +02:00
|
|
|
url, method="POST", data=data, auth=auth, fatal=False)
|
2017-12-18 13:14:24 +01:00
|
|
|
data = response.json()
|
2018-08-11 23:54:25 +02:00
|
|
|
|
|
|
|
if response.status_code != 200:
|
2019-10-13 22:48:01 +02:00
|
|
|
self.log.debug("Server response: %s", data)
|
|
|
|
raise exception.AuthenticationError('"{}" ({})'.format(
|
2018-08-11 23:54:25 +02:00
|
|
|
data.get("error_description"), data.get("error")))
|
2017-12-18 13:14:24 +01:00
|
|
|
if refresh_token:
|
2018-10-12 22:18:29 +02:00
|
|
|
_refresh_token_cache.update(refresh_token, data["refresh_token"])
|
2017-12-18 13:14:24 +01:00
|
|
|
return "Bearer " + data["access_token"]
|
2017-03-08 16:40:20 +01:00
|
|
|
|
2019-07-04 23:45:26 +02:00
|
|
|
def _call(self, endpoint, params=None, fatal=True, public=True):
|
2017-03-08 16:40:20 +01:00
|
|
|
"""Call an API endpoint"""
|
2017-04-03 14:56:47 +02:00
|
|
|
url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint
|
2017-03-08 16:40:20 +01:00
|
|
|
while True:
|
2018-04-22 17:43:11 +02:00
|
|
|
if self.delay >= 0:
|
|
|
|
time.sleep(2 ** self.delay)
|
2017-03-08 16:40:20 +01:00
|
|
|
|
2018-07-23 21:40:59 +02:00
|
|
|
self.authenticate(None if public else self.refresh_token)
|
2018-12-22 14:40:35 +01:00
|
|
|
response = self.extractor.request(
|
2019-08-03 13:43:00 +02:00
|
|
|
url, headers=self.headers, params=params, fatal=None)
|
2018-04-22 17:43:11 +02:00
|
|
|
data = response.json()
|
|
|
|
status = response.status_code
|
2017-03-08 16:40:20 +01:00
|
|
|
|
2018-04-22 17:43:11 +02:00
|
|
|
if 200 <= status < 400:
|
2018-07-14 11:52:21 +02:00
|
|
|
if self.delay > self.delay_min:
|
2018-03-15 15:53:16 +01:00
|
|
|
self.delay -= 1
|
2018-04-22 17:43:11 +02:00
|
|
|
return data
|
2019-09-25 21:16:35 +02:00
|
|
|
if not fatal and status != 429:
|
2018-04-22 17:43:11 +02:00
|
|
|
return None
|
2019-06-17 19:49:50 +02:00
|
|
|
if data.get("error_description") == "User not found.":
|
2018-04-22 17:43:11 +02:00
|
|
|
raise exception.NotFoundError("user or group")
|
|
|
|
|
|
|
|
self.log.debug(response.text)
|
|
|
|
msg = "API responded with {} {}".format(
|
|
|
|
status, response.reason)
|
2018-12-22 14:40:35 +01:00
|
|
|
if status == 429:
|
2018-03-15 16:44:58 +01:00
|
|
|
self.delay += 1
|
2018-04-22 17:43:11 +02:00
|
|
|
self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay)
|
2018-12-22 14:40:35 +01:00
|
|
|
else:
|
|
|
|
self.log.error(msg)
|
|
|
|
return data
|
2017-04-03 14:56:47 +02:00
|
|
|
|
2019-05-20 15:53:01 +02:00
|
|
|
def _pagination(self, endpoint, params, extend=True):
|
2020-01-05 17:37:54 +01:00
|
|
|
public = warn = True
|
2017-04-03 14:56:47 +02:00
|
|
|
while True:
|
2018-07-24 20:10:33 +02:00
|
|
|
data = self._call(endpoint, params, public=public)
|
|
|
|
if "results" not in data:
|
2017-04-03 14:56:47 +02:00
|
|
|
self.log.error("Unexpected API response: %s", data)
|
|
|
|
return
|
2019-05-20 15:53:01 +02:00
|
|
|
|
2019-05-29 23:50:05 +02:00
|
|
|
if extend:
|
2019-09-09 22:39:07 +02:00
|
|
|
if public and len(data["results"]) < params["limit"]:
|
|
|
|
if self.refresh_token:
|
|
|
|
self.log.debug("Switching to private access token")
|
|
|
|
public = False
|
|
|
|
continue
|
2020-01-05 17:37:54 +01:00
|
|
|
elif data["has_more"] and warn:
|
|
|
|
warn = False
|
2019-09-09 22:39:07 +02:00
|
|
|
self.log.warning(
|
|
|
|
"Private deviations detected! Run 'gallery-dl "
|
|
|
|
"oauth:deviantart' and follow the instructions to "
|
|
|
|
"be able to access them.")
|
2019-05-29 23:50:05 +02:00
|
|
|
if self.metadata:
|
|
|
|
self._metadata(data["results"])
|
|
|
|
if self.folders:
|
|
|
|
self._folders(data["results"])
|
2019-05-20 15:53:01 +02:00
|
|
|
yield from data["results"]
|
|
|
|
|
2018-07-24 20:10:33 +02:00
|
|
|
if not data["has_more"]:
|
|
|
|
return
|
|
|
|
params["offset"] = data["next_offset"]
|
2017-05-10 16:45:45 +02:00
|
|
|
|
2019-05-20 15:53:01 +02:00
|
|
|
def _pagination_folders(self, endpoint, params):
|
2017-07-12 09:47:01 +02:00
|
|
|
result = []
|
2019-05-20 15:53:01 +02:00
|
|
|
result.extend(self._pagination(endpoint, params, False))
|
2017-07-12 09:47:01 +02:00
|
|
|
return result
|
|
|
|
|
2019-05-29 23:50:05 +02:00
|
|
|
def _metadata(self, deviations):
|
|
|
|
"""Add extended metadata to each deviation object"""
|
2019-05-20 15:53:01 +02:00
|
|
|
for deviation, metadata in zip(
|
|
|
|
deviations, self.deviation_metadata(deviations)):
|
|
|
|
deviation.update(metadata)
|
|
|
|
deviation["tags"] = [t["tag_name"] for t in deviation["tags"]]
|
2019-03-21 14:46:47 +01:00
|
|
|
|
2019-05-29 23:50:05 +02:00
|
|
|
def _folders(self, deviations):
|
|
|
|
"""Add a list of all containing folders to each deviation object"""
|
|
|
|
for deviation in deviations:
|
|
|
|
deviation["folders"] = self._folders_map(
|
|
|
|
deviation["author"]["username"])[deviation["deviationid"]]
|
|
|
|
|
|
|
|
@memcache(keyarg=1)
|
|
|
|
def _folders_map(self, username):
|
|
|
|
"""Generate a deviation_id -> folders mapping for 'username'"""
|
|
|
|
self.log.info("Collecting folder information for '%s'", username)
|
|
|
|
folders = self.gallery_folders(username)
|
|
|
|
|
|
|
|
# add parent names to folders, but ignore "Featured" as parent
|
|
|
|
fmap = {}
|
|
|
|
featured = folders[0]["folderid"]
|
|
|
|
for folder in folders:
|
|
|
|
if folder["parent"] and folder["parent"] != featured:
|
|
|
|
folder["name"] = fmap[folder["parent"]] + "/" + folder["name"]
|
|
|
|
fmap[folder["folderid"]] = folder["name"]
|
|
|
|
|
|
|
|
# map deviationids to folder names
|
|
|
|
dmap = collections.defaultdict(list)
|
|
|
|
for folder in folders:
|
|
|
|
for deviation in self.gallery(
|
|
|
|
username, folder["folderid"], 0, False):
|
|
|
|
dmap[deviation["deviationid"]].append(folder["name"])
|
|
|
|
return dmap
|
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
|
2019-03-14 22:21:49 +01:00
|
|
|
@cache(maxage=10*365*24*3600, keyarg=0)
|
2017-12-18 13:14:24 +01:00
|
|
|
def _refresh_token_cache(original_token, new_token=None):
|
|
|
|
return new_token or original_token
|
|
|
|
|
|
|
|
|
2017-05-13 21:42:29 +02:00
|
|
|
SHADOW_TEMPLATE = """
|
|
|
|
<span class="shadow">
|
|
|
|
<img src="{src}" class="smshadow" width="{width}" height="{height}">
|
|
|
|
</span>
|
|
|
|
<br><br>
|
|
|
|
"""
|
|
|
|
|
2017-05-13 15:34:20 +02:00
|
|
|
HEADER_TEMPLATE = """<div usr class="gr">
|
|
|
|
<div class="metadata">
|
|
|
|
<h2><a href="{url}">{title}</a></h2>
|
|
|
|
<ul>
|
|
|
|
<li class="author">
|
|
|
|
by <span class="name"><span class="username-with-symbol u">
|
|
|
|
<a class="u regular username" href="{userurl}">{username}</a>\
|
|
|
|
<span class="user-symbol regular"></span></span></span>,
|
|
|
|
<span>{date}</span>
|
|
|
|
</li>
|
|
|
|
<li class="category">
|
|
|
|
{categories}
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</div>
|
|
|
|
"""
|
|
|
|
|
2017-05-15 15:58:06 +02:00
|
|
|
HEADER_CUSTOM_TEMPLATE = """<div class='boxtop journaltop'>
|
|
|
|
<h2>
|
2017-08-16 12:13:42 +02:00
|
|
|
<img src="https://st.deviantart.net/minish/gruzecontrol/icons/journal.gif\
|
2017-05-19 19:22:39 +02:00
|
|
|
?2" style="vertical-align:middle" alt=""/>
|
2017-05-15 15:58:06 +02:00
|
|
|
<a href="{url}">{title}</a>
|
|
|
|
</h2>
|
|
|
|
Journal Entry: <span>{date}</span>
|
|
|
|
"""
|
|
|
|
|
2018-07-16 18:14:41 +02:00
|
|
|
JOURNAL_TEMPLATE_HTML = """text:<!DOCTYPE html>
|
2017-05-10 16:45:45 +02:00
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
|
|
<title>{title}</title>
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/deviantart-network_lc.css?3843780832">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/group_secrets_lc.css?3250492874">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/v6core_lc.css?4246581581">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/sidebar_lc.css?1490570941">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/writer_lc.css?3090682151">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/v6loggedin_lc.css?3001430805">
|
|
|
|
<style>{css}</style>
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
roses/cssmin/core.css?1488405371919" >
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
roses/cssmin/peeky.css?1487067424177" >
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
roses/cssmin/desktop.css?1491362542749" >
|
|
|
|
</head>
|
|
|
|
<body id="deviantART-v7" class="bubble no-apps loggedout w960 deviantart">
|
|
|
|
<div id="output">
|
|
|
|
<div class="dev-page-container bubbleview">
|
|
|
|
<div class="dev-page-view view-mode-normal">
|
|
|
|
<div class="dev-view-main-content">
|
|
|
|
<div class="dev-view-deviation">
|
2017-05-13 21:42:29 +02:00
|
|
|
{shadow}
|
2017-05-10 16:45:45 +02:00
|
|
|
<div class="journal-wrapper tt-a">
|
|
|
|
<div class="journal-wrapper2">
|
2017-05-13 15:34:20 +02:00
|
|
|
<div class="journal {cls} journalcontrol">
|
2017-05-10 16:45:45 +02:00
|
|
|
{html}
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
2018-07-16 18:14:41 +02:00
|
|
|
|
2019-08-30 18:47:06 +02:00
|
|
|
JOURNAL_TEMPLATE_HTML_EXTRA = """\
|
|
|
|
<div id="devskin0"><div class="negate-box-margin" style="">\
|
|
|
|
<div usr class="gr-box gr-genericbox"
|
|
|
|
><i usr class="gr1"><i></i></i
|
|
|
|
><i usr class="gr2"><i></i></i
|
|
|
|
><i usr class="gr3"><i></i></i
|
|
|
|
><div usr class="gr-top">
|
|
|
|
<i usr class="tri"></i>
|
|
|
|
{}
|
|
|
|
</div>
|
|
|
|
</div><div usr class="gr-body"><div usr class="gr">
|
|
|
|
<div class="grf-indent">
|
|
|
|
<div class="text">
|
|
|
|
{} </div>
|
|
|
|
</div>
|
|
|
|
</div></div>
|
|
|
|
<i usr class="gr3 gb"></i>
|
|
|
|
<i usr class="gr2 gb"></i>
|
|
|
|
<i usr class="gr1 gb gb1"></i> </div>
|
|
|
|
</div></div>"""
|
|
|
|
|
2018-07-16 18:14:41 +02:00
|
|
|
JOURNAL_TEMPLATE_TEXT = """text:{title}
|
|
|
|
by {username}, {date}
|
|
|
|
|
|
|
|
{content}
|
|
|
|
"""
|