["deviantart] add 'journals' option
This commit is contained in:
parent
00032b828c
commit
ff436692bf
@ -294,6 +294,19 @@ Description Select the directory structure created by the Gallery- and
|
|||||||
=========== =====
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
|
extractor.deviantart.journals
|
||||||
|
-----------------------------
|
||||||
|
=========== =====
|
||||||
|
Type ``string``
|
||||||
|
Default ``"html"``
|
||||||
|
Description Selects the output format of journal entries.
|
||||||
|
|
||||||
|
- ``"html"``: HTML with (roughly) the same layout as on DeviantArt.
|
||||||
|
- ``"text"``: Plain text with image references and HTML tags removed.
|
||||||
|
- ``"none"``: Don't download journals.
|
||||||
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
extractor.deviantart.mature
|
extractor.deviantart.mature
|
||||||
---------------------------
|
---------------------------
|
||||||
=========== =====
|
=========== =====
|
||||||
@ -338,7 +351,7 @@ extractor.deviantart.wait-min
|
|||||||
=========== =====
|
=========== =====
|
||||||
Type ``int``
|
Type ``int``
|
||||||
Default ``0``
|
Default ``0``
|
||||||
Description Minimum wait time in seconds before any API request.
|
Description Minimum wait time in seconds before API requests.
|
||||||
|
|
||||||
Note: This value will internally be rounded up
|
Note: This value will internally be rounded up
|
||||||
to the next power of 2.
|
to the next power of 2.
|
||||||
|
@ -18,8 +18,10 @@
|
|||||||
{
|
{
|
||||||
"refresh-token": null,
|
"refresh-token": null,
|
||||||
"flat": true,
|
"flat": true,
|
||||||
|
"journals": "html",
|
||||||
"mature": true,
|
"mature": true,
|
||||||
"original": true
|
"original": true,
|
||||||
|
"wait-min": 0
|
||||||
},
|
},
|
||||||
"exhentai":
|
"exhentai":
|
||||||
{
|
{
|
||||||
|
@ -21,7 +21,7 @@ import re
|
|||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
r"(?:https?://)?(?:"
|
r"(?:https?://)?(?:"
|
||||||
r"(?:www\.)?deviantart\.com/([\w-]+)|"
|
r"(?:www\.)?deviantart\.com/([\w-]+)|"
|
||||||
r"(?!www\.)([\w-]+)\.deviantart\.com)"
|
r"([\w-]+)\.deviantart\.com)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -41,6 +41,11 @@ class DeviantartExtractor(Extractor):
|
|||||||
self.user = match.group(1) or match.group(2) if match else None
|
self.user = match.group(1) or match.group(2) if match else None
|
||||||
self.group = False
|
self.group = False
|
||||||
|
|
||||||
|
self.commit_journal = {
|
||||||
|
"html": self._commit_journal_html,
|
||||||
|
"text": self._commit_journal_text,
|
||||||
|
}.get(self.config("journals", "html"))
|
||||||
|
|
||||||
def skip(self, num):
|
def skip(self, num):
|
||||||
self.offset += num
|
self.offset += num
|
||||||
return num
|
return num
|
||||||
@ -77,7 +82,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
if "flash" in deviation:
|
if "flash" in deviation:
|
||||||
yield self.commit(deviation, deviation["flash"])
|
yield self.commit(deviation, deviation["flash"])
|
||||||
|
|
||||||
if "excerpt" in deviation:
|
if "excerpt" in deviation and self.commit_journal:
|
||||||
journal = self.api.deviation_content(deviation["deviationid"])
|
journal = self.api.deviation_content(deviation["deviationid"])
|
||||||
yield self.commit_journal(deviation, journal)
|
yield self.commit_journal(deviation, journal)
|
||||||
|
|
||||||
@ -94,7 +99,6 @@ class DeviantartExtractor(Extractor):
|
|||||||
deviation["index"] = deviation["url"].rpartition("-")[2]
|
deviation["index"] = deviation["url"].rpartition("-")[2]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
deviation["index"] = 0
|
deviation["index"] = 0
|
||||||
|
|
||||||
if self.user:
|
if self.user:
|
||||||
deviation["username"] = self.user
|
deviation["username"] = self.user
|
||||||
deviation["da_category"] = deviation["category"]
|
deviation["da_category"] = deviation["category"]
|
||||||
@ -108,7 +112,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
url = "https:" + url[5:]
|
url = "https:" + url[5:]
|
||||||
return Message.Url, url, deviation
|
return Message.Url, url, deviation
|
||||||
|
|
||||||
def commit_journal(self, deviation, journal):
|
def _commit_journal_html(self, deviation, journal):
|
||||||
title = text.escape(deviation["title"])
|
title = text.escape(deviation["title"])
|
||||||
url = deviation["url"]
|
url = deviation["url"]
|
||||||
thumbs = deviation["thumbs"]
|
thumbs = deviation["thumbs"]
|
||||||
@ -142,11 +146,11 @@ class DeviantartExtractor(Extractor):
|
|||||||
url=url,
|
url=url,
|
||||||
userurl="{}/{}/".format(self.root, deviation["username"]),
|
userurl="{}/{}/".format(self.root, deviation["username"]),
|
||||||
username=deviation["author"]["username"],
|
username=deviation["author"]["username"],
|
||||||
date=str(date),
|
date=date,
|
||||||
categories=categories,
|
categories=categories,
|
||||||
)
|
)
|
||||||
|
|
||||||
html = JOURNAL_TEMPLATE.format(
|
html = JOURNAL_TEMPLATE_HTML.format(
|
||||||
title=title,
|
title=title,
|
||||||
html=html.replace(needle, header, 1),
|
html=html.replace(needle, header, 1),
|
||||||
shadow=shadow,
|
shadow=shadow,
|
||||||
@ -157,6 +161,23 @@ class DeviantartExtractor(Extractor):
|
|||||||
deviation["extension"] = "htm"
|
deviation["extension"] = "htm"
|
||||||
return Message.Url, html, deviation
|
return Message.Url, html, deviation
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _commit_journal_text(deviation, journal):
|
||||||
|
date = datetime.datetime.utcfromtimestamp(deviation["published_time"])
|
||||||
|
content = "\n".join(
|
||||||
|
text.unescape(text.remove_html(txt))
|
||||||
|
for txt in journal["html"].rpartition("<script")[0].split("<br />")
|
||||||
|
)
|
||||||
|
txt = JOURNAL_TEMPLATE_TEXT.format(
|
||||||
|
title=deviation["title"],
|
||||||
|
username=deviation["author"]["username"],
|
||||||
|
date=date,
|
||||||
|
content=content,
|
||||||
|
)
|
||||||
|
|
||||||
|
deviation["extension"] = "txt"
|
||||||
|
return Message.Url, txt, deviation
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _find_folder(folders, name):
|
def _find_folder(folders, name):
|
||||||
pattern = r"[^\w]*" + name.replace("-", r"[^\w]+") + r"[^\w]*$"
|
pattern = r"[^\w]*" + name.replace("-", r"[^\w]+") + r"[^\w]*$"
|
||||||
@ -246,12 +267,12 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
|||||||
subcategory = "deviation"
|
subcategory = "deviation"
|
||||||
archive_fmt = "{index}.{extension}"
|
archive_fmt = "{index}.{extension}"
|
||||||
pattern = [BASE_PATTERN + r"/(?:art|journal)/[^/?&#]+-\d+",
|
pattern = [BASE_PATTERN + r"/(?:art|journal)/[^/?&#]+-\d+",
|
||||||
r"(?:https?://)?(sta\.sh/[a-z0-9]+)"]
|
r"(?:https?://)?sta\.sh/()()[a-z0-9]+"]
|
||||||
test = [
|
test = [
|
||||||
(("https://www.deviantart.com/shimoda7/art/"
|
(("https://www.deviantart.com/shimoda7/art/"
|
||||||
"For-the-sake-of-a-memory-10073852"), {
|
"For-the-sake-of-a-memory-10073852"), {
|
||||||
"url": "eef0c01b3808c535ea673e7b3654ab5209b910b7",
|
"url": "eef0c01b3808c535ea673e7b3654ab5209b910b7",
|
||||||
"keyword": "b7ed053c3fb54b93c90e5ff8ed9f7a11d47a9c74",
|
"keyword": "925217229da46aeb8ce282675dc8639fa20a892c",
|
||||||
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
||||||
}),
|
}),
|
||||||
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
|
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
|
||||||
@ -277,7 +298,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
DeviantartExtractor.__init__(self)
|
DeviantartExtractor.__init__(self, match)
|
||||||
self.url = match.group(0)
|
self.url = match.group(0)
|
||||||
if not self.url.startswith("http"):
|
if not self.url.startswith("http"):
|
||||||
self.url = "https://" + self.url
|
self.url = "https://" + self.url
|
||||||
@ -310,10 +331,10 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
|
|||||||
def deviations(self):
|
def deviations(self):
|
||||||
folders = self.api.collections_folders(self.user)
|
folders = self.api.collections_folders(self.user)
|
||||||
if self.flat:
|
if self.flat:
|
||||||
return itertools.chain.from_iterable([
|
return itertools.chain.from_iterable(
|
||||||
self.api.collections(self.user, folder["folderid"])
|
self.api.collections(self.user, folder["folderid"])
|
||||||
for folder in folders
|
for folder in folders
|
||||||
])
|
)
|
||||||
else:
|
else:
|
||||||
return self._folder_urls(folders, "favourites")
|
return self._folder_urls(folders, "favourites")
|
||||||
|
|
||||||
@ -363,6 +384,14 @@ class DeviantartJournalExtractor(DeviantartExtractor):
|
|||||||
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
|
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
|
||||||
"keyword": "8d11b458f389188cc1f00d09694ce4e00c43efcc",
|
"keyword": "8d11b458f389188cc1f00d09694ce4e00c43efcc",
|
||||||
}),
|
}),
|
||||||
|
("https://www.deviantart.com/angrywhitewanker/journal/", {
|
||||||
|
"url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e",
|
||||||
|
"options": (("journals", "text"),),
|
||||||
|
}),
|
||||||
|
("https://www.deviantart.com/angrywhitewanker/journal/", {
|
||||||
|
"count": 0,
|
||||||
|
"options": (("journals", "none"),),
|
||||||
|
}),
|
||||||
("https://www.deviantart.com/shimoda7/journal/?catpath=/", None),
|
("https://www.deviantart.com/shimoda7/journal/?catpath=/", None),
|
||||||
("https://angrywhitewanker.deviantart.com/journal/", None),
|
("https://angrywhitewanker.deviantart.com/journal/", None),
|
||||||
("https://shimoda7.deviantart.com/journal/?catpath=/", None),
|
("https://shimoda7.deviantart.com/journal/?catpath=/", None),
|
||||||
@ -629,7 +658,7 @@ HEADER_CUSTOM_TEMPLATE = """<div class='boxtop journaltop'>
|
|||||||
Journal Entry: <span>{date}</span>
|
Journal Entry: <span>{date}</span>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
JOURNAL_TEMPLATE = """text:<!DOCTYPE html>
|
JOURNAL_TEMPLATE_HTML = """text:<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
@ -676,3 +705,9 @@ roses/cssmin/desktop.css?1491362542749" >
|
|||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
JOURNAL_TEMPLATE_TEXT = """text:{title}
|
||||||
|
by {username}, {date}
|
||||||
|
|
||||||
|
{content}
|
||||||
|
"""
|
||||||
|
@ -155,8 +155,8 @@ class ImgurAlbumExtractor(ImgurExtractor):
|
|||||||
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
|
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
|
||||||
"url": "695ef0c950023362a0163ee5041796300db76674",
|
"url": "695ef0c950023362a0163ee5041796300db76674",
|
||||||
}),
|
}),
|
||||||
("https://imgur.com/t/unmuted/FVyxO32", { # unmuted URL
|
("https://imgur.com/t/unmuted/YMqBcua", { # unmuted URL
|
||||||
"url": "1df12d96438ad9018ace7665dc893419ce9ec867",
|
"url": "86b4747f8147cec7602f0214e267309af73a8655",
|
||||||
}),
|
}),
|
||||||
("https://imgur.com/a/TcBmQ", {
|
("https://imgur.com/a/TcBmQ", {
|
||||||
"exception": exception.NotFoundError,
|
"exception": exception.NotFoundError,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user