# -*- coding: utf-8 -*- # Copyright 2015-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract images from https://nijie.info/""" from .common import AsynchronousExtractor, Message from .. import text, util, exception from ..cache import cache class NijieExtractor(AsynchronousExtractor): """Base class for nijie extractors""" category = "nijie" directory_fmt = ["{category}", "{user_id}"] filename_fmt = "{category}_{artist_id}_{image_id}_p{index:>02}.{extension}" archive_fmt = "{image_id}_{index}" cookiedomain = "nijie.info" root = "https://nijie.info" popup_url = "https://nijie.info/view_popup.php?id=" def __init__(self, match=None): AsynchronousExtractor.__init__(self) self.session.headers["Referer"] = self.root + "/" self.user_id = match.group(1) if match else None def items(self): self.login() data = self.get_job_metadata() yield Message.Version, 1 yield Message.Directory, data for image_id in self.get_image_ids(): for image_url, image_data in self.get_image_data(image_id): image_data.update(data) if not image_data["extension"]: image_data["extension"] = "jpg" yield Message.Url, image_url, image_data def get_job_metadata(self): """Collect metadata for extractor-job""" return {"user_id": util.safe_int(self.user_id)} def get_image_ids(self): """Collect all relevant image-ids""" def get_image_data(self, image_id): """Get URL and metadata for images specified by 'image_id'""" page = self.request(self.popup_url + image_id).text return self.extract_image_data(page, image_id) def extract_image_data(self, page, image_id): """Get URL and metadata for images from 'page'""" title, pos = text.extract( page, '= 18", }), ] def get_image_ids(self): return self._pagination("members_dojin") class NijieFavoriteExtractor(NijieExtractor): """Extractor for all favorites/bookmarks of a nijie-user""" subcategory = "favorite" directory_fmt = ["{category}", "bookmarks", "{user_id}"] archive_fmt = "f_{user_id}_{image_id}_{index}" pattern = [(r"(?:https?://)?(?:www\.)?nijie\.info" r"/user_like_illust_view\.php\?id=(\d+)")] test = [ ("https://nijie.info/user_like_illust_view.php?id=44", { "count": ">= 16", }), ] def get_image_ids(self): return self._pagination("user_like_illust_view") class NijieImageExtractor(NijieExtractor): """Extractor for a work/image from nijie.info""" subcategory = "image" pattern = [r"(?:https?://)?(?:www\.)?nijie\.info" r"/view(?:_popup)?\.php\?id=(\d+)"] test = [ ("https://nijie.info/view.php?id=70720", { "url": "a10d4995645b5f260821e32c60a35f73546c2699", "keyword": "0728fc3bbef1e192abfd59f88f07921d3d336804", "content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6", }), ("https://nijie.info/view.php?id=70724", { "exception": exception.NotFoundError, }), ("https://nijie.info/view_popup.php?id=70720", None), ] def __init__(self, match): NijieExtractor.__init__(self) self.image_id = match.group(1) self.page = "" def get_job_metadata(self): response = self.request(self.popup_url + self.image_id, allow_redirects=False, allow_empty=True) if 300 <= response.status_code < 400: raise exception.NotFoundError("image") self.page = response.text self.user_id = self._userid_from_popup(self.page) return NijieExtractor.get_job_metadata(self) def get_image_ids(self): return (self.image_id,) def get_image_data(self, _): return self.extract_image_data(self.page, self.image_id)