f1ddbff0b5
This is enabled by default and will recursively go through all (sub)folders in an artist's gallery. The old method of using "Latest Updates" lists can be restored by disabling this option.
197 lines
6.8 KiB
Python
197 lines
6.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2020 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
"""Extractors for https://aryion.com/"""
|
|
|
|
from .common import Extractor, Message
|
|
from .. import text, util
|
|
|
|
|
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
|
|
|
|
|
|
class AryionExtractor(Extractor):
|
|
"""Base class for aryion extractors"""
|
|
category = "aryion"
|
|
directory_fmt = ("{category}", "{user!l}", "{path:J - }")
|
|
filename_fmt = "{id} {title}.{extension}"
|
|
archive_fmt = "{id}"
|
|
root = "https://aryion.com"
|
|
|
|
def __init__(self, match):
|
|
Extractor.__init__(self, match)
|
|
self.user = match.group(1)
|
|
self.recursive = True
|
|
|
|
def items(self):
|
|
for post_id in self.posts():
|
|
post = self._parse_post(post_id)
|
|
if post:
|
|
yield Message.Directory, post
|
|
yield Message.Url, post["url"], post
|
|
elif post is False and self.recursive:
|
|
base = self.root + "/g4/view/"
|
|
data = {"_extractor": AryionPostExtractor}
|
|
for post_id in self._pagination(base + post_id):
|
|
yield Message.Queue, base + post_id, data
|
|
|
|
def posts(self):
|
|
"""Yield relevant post IDs"""
|
|
|
|
def _pagination(self, url):
|
|
while True:
|
|
page = self.request(url).text
|
|
yield from text.extract_iter(
|
|
page, "class='thumb' href='/g4/view/", "'")
|
|
|
|
pos = page.find("Next >>")
|
|
if pos < 0:
|
|
return
|
|
url = self.root + text.rextract(page, "href='", "'", pos)[0]
|
|
|
|
def _parse_post(self, post_id):
|
|
url = "{}/g4/data.php?id={}".format(self.root, post_id)
|
|
with self.request(url, method="HEAD", fatal=False) as response:
|
|
|
|
if response.status_code >= 400:
|
|
self.log.warning(
|
|
"Unable to fetch post %s ('%s %s')",
|
|
post_id, response.status_code, response.reason)
|
|
return None
|
|
headers = response.headers
|
|
|
|
# folder
|
|
if headers["content-type"] in (
|
|
"application/x-folder",
|
|
"application/x-comic-folder-nomerge",
|
|
):
|
|
return False
|
|
|
|
# get filename from 'Content-Disposition' header
|
|
cdis = headers["content-disposition"]
|
|
fname, _, ext = text.extract(
|
|
cdis, 'filename="', '"')[0].rpartition(".")
|
|
if not fname:
|
|
fname, ext = ext, fname
|
|
|
|
# get file size from 'Content-Length' header
|
|
clen = headers.get("content-length")
|
|
|
|
# fix 'Last-Modified' header
|
|
lmod = headers["last-modified"]
|
|
if lmod[22] != ":":
|
|
lmod = "{}:{} GMT".format(lmod[:22], lmod[22:24])
|
|
|
|
post_url = "{}/g4/view/{}".format(self.root, post_id)
|
|
extr = text.extract_from(self.request(post_url).text)
|
|
|
|
title, _, artist = text.unescape(extr(
|
|
"<title>g4 :: ", "<")).rpartition(" by ")
|
|
data = {
|
|
"id" : text.parse_int(post_id),
|
|
"url" : url,
|
|
"user" : self.user or artist,
|
|
"title" : title,
|
|
"artist": artist,
|
|
"path" : text.split_html(extr("cookiecrumb'>", '</span'))[4:-1:2],
|
|
"date" : extr("class='pretty-date' title='", "'"),
|
|
"size" : text.parse_int(clen),
|
|
"views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")),
|
|
"width" : text.parse_int(extr("Resolution</b>:", "x")),
|
|
"height": text.parse_int(extr("", "<")),
|
|
"comments" : text.parse_int(extr("Comments</b>:", "<")),
|
|
"favorites": text.parse_int(extr("Favorites</b>:", "<")),
|
|
"tags" : text.split_html(extr("class='taglist'>", "</span>")),
|
|
"description": text.unescape(text.remove_html(extr(
|
|
"<p>", "</p>"), "", "")),
|
|
"filename" : fname,
|
|
"extension": ext,
|
|
"_mtime" : lmod,
|
|
}
|
|
|
|
d1, _, d2 = data["date"].partition(",")
|
|
data["date"] = text.parse_datetime(
|
|
d1[:-2] + d2, "%b %d %Y %I:%M %p", -5)
|
|
|
|
return data
|
|
|
|
|
|
class AryionGalleryExtractor(AryionExtractor):
|
|
"""Extractor for a user's gallery on eka's portal"""
|
|
subcategory = "gallery"
|
|
categorytransfer = True
|
|
pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?&#]+)"
|
|
test = (
|
|
("https://aryion.com/g4/gallery/jameshoward", {
|
|
"options": (("recursive", False),),
|
|
"pattern": r"https://aryion\.com/g4/data\.php\?id=\d+$",
|
|
"range": "48-52",
|
|
"count": 5,
|
|
}),
|
|
("https://aryion.com/g4/user/jameshoward"),
|
|
("https://aryion.com/g4/latest.php?name=jameshoward"),
|
|
)
|
|
|
|
def __init__(self, match):
|
|
AryionExtractor.__init__(self, match)
|
|
self.recursive = self.config("recursive", True)
|
|
self.offset = 0
|
|
|
|
def skip(self, num):
|
|
if self.recursive:
|
|
num = 0
|
|
self.offset += num
|
|
return num
|
|
|
|
def posts(self):
|
|
if self.recursive:
|
|
url = "{}/g4/gallery/{}".format(self.root, self.user)
|
|
return self._pagination(url)
|
|
else:
|
|
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
|
|
return util.advance(self._pagination(url), self.offset)
|
|
|
|
|
|
class AryionPostExtractor(AryionExtractor):
|
|
"""Extractor for individual posts on eka's portal"""
|
|
subcategory = "post"
|
|
pattern = BASE_PATTERN + r"/view/(\d+)"
|
|
test = (
|
|
("https://aryion.com/g4/view/510079", {
|
|
"url": "f233286fa5558c07ae500f7f2d5cb0799881450e",
|
|
"keyword": {
|
|
"artist" : "jameshoward",
|
|
"user" : "jameshoward",
|
|
"filename" : "jameshoward-510079-subscribestar_150",
|
|
"extension": "jpg",
|
|
"id" : 510079,
|
|
"width" : 1665,
|
|
"height" : 1619,
|
|
"size" : 784239,
|
|
"title" : "I'm on subscribestar now too!",
|
|
"description": r"re:Doesn't hurt to have a backup, right\?",
|
|
"tags" : ["Non-Vore", "subscribestar"],
|
|
"date" : "dt:2019-02-16 19:30:00",
|
|
"path" : [],
|
|
"views" : int,
|
|
"favorites": int,
|
|
"comments" : int,
|
|
"_mtime" : "Sat, 16 Feb 2019 19:30:34 GMT",
|
|
},
|
|
}),
|
|
# folder (#694)
|
|
("https://aryion.com/g4/view/588928", {
|
|
"pattern": pattern,
|
|
"count": ">= 8",
|
|
}),
|
|
)
|
|
|
|
def posts(self):
|
|
post_id, self.user = self.user, None
|
|
return (post_id,)
|