# -*- coding: utf-8 -*- # Copyright 2016-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Collection of extractors for various imagehosts""" from .common import Extractor, Message from .. import text, exception from ..cache import cache from os.path import splitext from urllib.parse import urljoin class ImagehostImageExtractor(Extractor): """Base class for single-image extractors for various imagehosts""" subcategory = "image" https = False method = "post" params = "simple" cookies = None def __init__(self, match): Extractor.__init__(self) self.url = ("https://" if self.https else "http://") + match.group(1) self.token = match.group(2) if self.params == "simple": self.params = { "imgContinue": "Continue+to+image+...+", } elif self.params == "complex": self.params = { "op": "view", "id": self.token, "pre": "1", "adb": "1", "next": "Continue+to+image+...+", } else: self.params = {} self.method = "get" def items(self): page = self.request(self.url, method=self.method, data=self.params, cookies=self.cookies).text url, filename = self.get_info(page) data = text.nameext_from_url(filename, {"token": self.token}) if self.https and url.startswith("http:"): url = "https:" + url[5:] yield Message.Version, 1 yield Message.Directory, data yield Message.Url, url, data def get_info(self, page): """Find image-url and string to get filename from""" return "url", "filename" class ImgytImageExtractor(ImagehostImageExtractor): """Extractor for single images from img.yt""" category = "imgyt" pattern = [r"(?:https?://)?((?:www\.)?img\.yt/img-([a-z0-9]+)\.html)"] test = [ ("https://img.yt/img-57a2050547b97.html", { "url": "6801fac1ff8335bd27a1665ad27ad64cace2cd84", "keyword": "7548cc9915f90f5d7ffbafa079085457ae34562c", "content": "54592f2635674c25677c6872db3709d343cdf92f", }), ("https://img.yt/img-57a2050547b98.html", { "exception": exception.NotFoundError, }), ] https = True def get_info(self, page): url, pos = text.extract(page, '', '') url, pos = text.extract(page, 'src="', '"', pos) return url, url class ImageontimeImageExtractor(HosturimageImageExtractor): """Extractor for single images from imageontime.org""" category = "imageontime" pattern = [(r"(?:https?://)?((?:www\.)?imageontime\.org/" r"img-([a-z0-9]+)\.html)")] test = [] https = False class Img4everImageExtractor(HosturimageImageExtractor): """Extractor for single images from img4ever.net""" category = "img4ever" pattern = [(r"(?:https?://)?((?:www\.)?img4ever\.net/" r"img-([a-z0-9]+)\.html)")] test = [] https = True class ImguploadImageExtractor(HosturimageImageExtractor): """Extractor for single images from imgupload.yt""" category = "imgupload" pattern = [(r"(?:https?://)?((?:www\.)?imgupload\.yt/" r"img-([a-z0-9]+)\.html)")] test = [] https = True class ImgspotImageExtractor(ImagehostImageExtractor): """Extractor for single images from imgspot.org""" category = "imgspot" pattern = [r"(?:https?://)?((?:www\.)?imgspot\.org/img-([a-z0-9]+)\.html)"] https = False def get_info(self, page): url = text.extract(page, "', '') url , pos = text.extract(page, '.png", { "url": "c000618bddda42bd599a590b7972c7396d19d8fe", "keyword": "58905795a9cd3f17d5ff024fc4d63645795ba23c", "content": "0c8768055e4e20e7c7259608b67799171b691140", })] params = None def get_info(self, page): filename, pos = text.extract(page, '