# -*- coding: utf-8 -*- # Copyright 2014-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract images from https://gelbooru.com/""" from .common import SharedConfigExtractor, Message from .. import text, util, exception import xml.etree.ElementTree as ET class GelbooruExtractor(SharedConfigExtractor): """Base class for gelbooru extractors""" basecategory = "booru" category = "gelbooru" filename_fmt = "{category}_{id}_{md5}.{extension}" api_url = "https://gelbooru.com/index.php?page=dapi&s=post&q=index" def __init__(self): SharedConfigExtractor.__init__(self) self.start_post = 0 self.use_api = self.config("api", True) if self.use_api: self.get_post_data = self.get_post_data_api def items(self): data = self.get_metadata() yield Message.Version, 1 yield Message.Directory, data for post in util.advance(self.get_posts(), self.start_post): if isinstance(post, str): post = self.get_post_data(post) for key in ("id", "width", "height", "score", "change"): post[key] = util.safe_int(post[key]) url = post["file_url"] post.update(data) yield Message.Url, url, text.nameext_from_url(url, post) def skip(self, num): self.start_post += num return num def get_metadata(self): """Return general metadata""" return {} def get_posts(self): """Return an iterable containing all relevant post objects""" def get_post_data(self, post_id): """Extract metadata of a single post""" page = self.request("https://gelbooru.com/index.php?page=post&s=view" "&id=" + post_id).text data = text.extract_all(page, ( (None , 'Id: ', '<'), ("created_at", '
  • Posted: ', '<'), ("width" , '
  • Size: ', 'x'), ("height" , '', '<'), ("source" , '
  • Source: Rating: ', '<'), (None , '
  • Score: ', ''), ("score" , '>', '<'), ("file_url" , '
  • Now Viewing: ", "") self.posts = list(text.extract_iter(page, 'id="p', '"', pos)) if not name: raise exception.NotFoundError("pool") return { "pool": util.safe_int(self.pool_id), "pool_name": text.unescape(name), "count": len(self.posts), } def get_posts(self): return self.posts class GelbooruPostExtractor(GelbooruExtractor): """Extractor for single images from gelbooru.com""" subcategory = "post" archive_fmt = "{id}" pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" r"\?page=post&s=view&id=(\d+)"] test = [("https://gelbooru.com/index.php?page=post&s=view&id=313638", { "content": "5e255713cbf0a8e0801dc423563c34d896bb9229", "count": 1, })] def __init__(self, match): GelbooruExtractor.__init__(self) self.post_id = match.group(1) def get_posts(self): return (self.post_id,)