# -*- coding: utf-8 -*- # Copyright 2014-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract images from https://gelbooru.com/""" from .common import SharedConfigExtractor, Message from .. import text, util class GelbooruExtractor(SharedConfigExtractor): """Base class for gelbooru extractors""" basecategory = "booru" category = "gelbooru" filename_fmt = "{category}_{id}_{md5}.{extension}" def __init__(self): SharedConfigExtractor.__init__(self) self.start_post = 0 def items(self): yield Message.Version, 1 yield Message.Directory, self.get_metadata() for post_id in util.advance(self.get_posts(), self.start_post): data = self.get_post_data(post_id) url = data["file_url"] yield Message.Url, url, text.nameext_from_url(url, data) def skip(self, num): self.start_post += num return num def get_metadata(self): """Return general metadata""" return {} def get_posts(self): """Return an iterable containing all relevant post ids""" def get_post_data(self, post_id): """Extract metadata of a single post""" page = self.request("https://gelbooru.com/index.php?page=post&s=view" "&id=" + post_id).text data = text.extract_all(page, ( (None , 'Id: ', '<'), ("created_at", '