# -*- coding: utf-8 -*- # Copyright 2014-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract images from https://gelbooru.com/""" from .common import SharedConfigExtractor, Message from .. import text, util, exception import xml.etree.ElementTree as ET class GelbooruExtractor(SharedConfigExtractor): """Base class for gelbooru extractors""" basecategory = "booru" category = "gelbooru" filename_fmt = "{category}_{id}_{md5}.{extension}" api_url = "https://gelbooru.com/index.php?page=dapi&s=post&q=index" def __init__(self): SharedConfigExtractor.__init__(self) self.start_post = 0 self.use_api = self.config("api", True) if self.use_api: self.get_post_data = self.get_post_data_api def items(self): data = self.get_metadata() yield Message.Version, 1 yield Message.Directory, data for post in util.advance(self.get_posts(), self.start_post): if isinstance(post, str): post = self.get_post_data(post) for key in ("id", "width", "height", "score", "change"): post[key] = util.safe_int(post[key]) url = post["file_url"] post.update(data) yield Message.Url, url, text.nameext_from_url(url, post) def skip(self, num): self.start_post += num return num def get_metadata(self): """Return general metadata""" return {} def get_posts(self): """Return an iterable containing all relevant post objects""" def get_post_data(self, post_id): """Extract metadata of a single post""" page = self.request("https://gelbooru.com/index.php?page=post&s=view" "&id=" + post_id).text data = text.extract_all(page, ( (None , 'Id: ', '<'), ("created_at", '