[booru] split '_prepare_post()'
This commit is contained in:
parent
53222445d5
commit
e41e2be2f9
@ -13,6 +13,7 @@ from .. import text, util, exception
|
||||
|
||||
from xml.etree import ElementTree
|
||||
import collections
|
||||
import operator
|
||||
import re
|
||||
|
||||
|
||||
@ -25,19 +26,25 @@ class BooruExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
extended_tags = self.config("tags", False)
|
||||
data = self.metadata()
|
||||
tags = self.config("tags", False)
|
||||
|
||||
for post in self.posts():
|
||||
try:
|
||||
url = self._prepare_post(post, extended_tags)
|
||||
url = self._file_url(post)
|
||||
if url[0] == "/":
|
||||
url = self.root + url
|
||||
except (KeyError, TypeError):
|
||||
self.log.debug("Unable to fetch download URL for post %s "
|
||||
"(md5: %s)", post.get("id"), post.get("md5"))
|
||||
continue
|
||||
|
||||
if tags:
|
||||
self._extended_tags(post)
|
||||
self._prepare(post)
|
||||
post.update(data)
|
||||
text.nameext_from_url(url, post)
|
||||
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, url, post
|
||||
|
||||
@ -57,17 +64,14 @@ class BooruExtractor(Extractor):
|
||||
"""Return an iterable with post objects"""
|
||||
return ()
|
||||
|
||||
def _prepare_post(self, post, extended_tags=False):
|
||||
url = post["file_url"]
|
||||
if url[0] == "/":
|
||||
url = self.root + url
|
||||
if extended_tags:
|
||||
self._fetch_extended_tags(post)
|
||||
_file_url = operator.itemgetter("file_url")
|
||||
|
||||
@staticmethod
|
||||
def _prepare(post):
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||
return url
|
||||
|
||||
def _fetch_extended_tags(self, post, page=None):
|
||||
def _extended_tags(self, post, page=None):
|
||||
if not page:
|
||||
url = "{}/index.php?page=post&s=view&id={}".format(
|
||||
self.root, post["id"])
|
||||
|
@ -17,11 +17,12 @@ class GelbooruBase():
|
||||
category = "gelbooru"
|
||||
root = "https://gelbooru.com"
|
||||
|
||||
def _prepare_post(self, post, extended_tags=False):
|
||||
url = booru.BooruExtractor._prepare_post(self, post, extended_tags)
|
||||
if url.startswith("https://mp4.gelbooru.com/"):
|
||||
@staticmethod
|
||||
def _file_url(post):
|
||||
url = post["file_url"]
|
||||
if url.startswith(("https://mp4.gelbooru.com/", "https://video-cdn")):
|
||||
md5 = post["md5"]
|
||||
return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
|
||||
url = "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
|
||||
md5[0:2], md5[2:4], md5)
|
||||
return url
|
||||
|
||||
|
@ -23,14 +23,11 @@ class MoebooruExtractor(BooruExtractor):
|
||||
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||
page_start = 1
|
||||
|
||||
def _prepare_post(self, post, extended_tags=False):
|
||||
url = post["file_url"]
|
||||
if extended_tags:
|
||||
self._fetch_extended_tags(post)
|
||||
@staticmethod
|
||||
def _prepare(post):
|
||||
post["date"] = text.parse_timestamp(post["created_at"])
|
||||
return url
|
||||
|
||||
def _fetch_extended_tags(self, post):
|
||||
def _extended_tags(self, post):
|
||||
url = "{}/post/show/{}".format(self.root, post["id"])
|
||||
page = self.request(url).text
|
||||
html = text.extract(page, '<ul id="tag-', '</ul>')[0]
|
||||
|
@ -41,20 +41,21 @@ class SankakuExtractor(BooruExtractor):
|
||||
def skip(self, num):
|
||||
return 0
|
||||
|
||||
def _prepare_post(self, post, extended_tags=False):
|
||||
def _file_url(self, post):
|
||||
url = post["file_url"]
|
||||
if not url and self._warning:
|
||||
self.log.warning(
|
||||
"Login required to download 'contentious_content' posts")
|
||||
SankakuExtractor._warning = False
|
||||
if extended_tags:
|
||||
self._fetch_extended_tags(post)
|
||||
return url
|
||||
|
||||
@staticmethod
|
||||
def _prepare(post):
|
||||
post["created_at"] = post["created_at"]["s"]
|
||||
post["date"] = text.parse_timestamp(post["created_at"])
|
||||
post["tags"] = [tag["name"] for tag in post["tags"]]
|
||||
return url
|
||||
|
||||
def _fetch_extended_tags(self, post):
|
||||
def _extended_tags(self, post):
|
||||
tags = collections.defaultdict(list)
|
||||
types = self.TAG_TYPES
|
||||
for tag in post["tags"]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user