[imgyt] add extractor

2016-08-03 17:09:15 +02:00 · 2016-08-03 17:09:15 +02:00 · 43210391ea
commit 43210391ea
parent b9be7f5907
3 changed files with 46 additions and 2 deletions
--- a/README.rst
+++ b/README.rst
@ -41,7 +41,7 @@ Supported Sites
 * Manga:
    bato.to, kissmanga.com, mangahere.co, mangamint.com, mangapanda.com,
    mangapark.me, mangareader.net, mangashare.com, mangastream.com,
-    powermanga.org, thespectrum.net
+    powermanga.org, raw.senmanga.com, thespectrum.net
 * Hentai:
    exhentai.org, hbrowse.com, hentai2read.com, hentai-foundry.com, hitomi.la,
    luscious.net, nhentai.net
@ -52,7 +52,7 @@ Supported Sites
 * Futaba Channel-like:
    4chan.org, 8ch.net
 * Image Hosts:
-    chronos.to, imagebam.com, imagetwist.com, imgbox.com, imgchili.net,
+    chronos.to, imagebam.com, imagetwist.com, imgbox.com, imgchili.net, img.yt,
    turboimagehost.com


--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -32,6 +32,7 @@ modules = [
    "imgchili",
    "imgth",
    "imgur",
+    "imgyt",
    "khinsider",
    "kissmanga",
    "konachan",
--- a/gallery_dl/extractor/imgyt.py
+++ b/gallery_dl/extractor/imgyt.py
@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2016 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract images from http://img.yt/"""
+
+from .common import Extractor, Message
+from .. import text
+from os.path import splitext
+
+class ImgytImageExtractor(Extractor):
+
+    category = "imgyt"
+    directory_fmt = ["{category}"]
+    filename_fmt = "{filename}"
+    pattern = [r"(?:https?://)?(?:www\.)?img\.yt/img-([a-z0-9]+)\.html"]
+    test = [("http://img.yt/img-57a2050547b97.html", {
+        "url": "6801fac1ff8335bd27a1665ad27ad64cace2cd84",
+        "keyword": "a20aa2215a4a6d5f4605d6370a8d605b525fc4bc",
+        "content": "54592f2635674c25677c6872db3709d343cdf92f",
+    })]
+
+    def __init__(self, match):
+        Extractor.__init__(self)
+        self.token = match.group(1)
+
+    def items(self):
+        data = {"category": self.category, "token": self.token}
+        params = {"imgContinue": "Continue+to+image+...+"}
+        page = self.request("https://img.yt/img-" + self.token + ".html",
+                            method="post", data=params).text
+        url     , pos = text.extract(page, "<img class='centred' src='", "'")
+        filename, pos = text.extract(page, " alt='", "'", pos)
+        text.nameext_from_url(filename + splitext(url)[1], data)
+        if url.startswith("http:"):
+            url = "https:" + url[5:]
+        yield Message.Version, 1
+        yield Message.Directory, data
+        yield Message.Url, url, data