2014-10-31 23:39:59 +01:00

70 lines
2.4 KiB
Python

from .common import BasicExtractor
from ..util import unescape, safe_request
import time
import random
import json
class Extractor(BasicExtractor):
api_url = "http://exhentai.org/api.php"
name_fmt = "{}_{:>04}_{}_{}"
def __init__(self, match, config):
BasicExtractor.__init__(self, config)
self.url = match.group(0)
self.gid, self.token = match.group(1).split("/")
self.category = "exhentai"
self.directory = self.gid
self.session.cookies.update(config["exhentai-cookies"])
def images(self):
e = self.extract
# get gallery page
text = self.request(self.url).text
# get first image page
url, pos = self.extract_all(text, "http://exhentai.org/s/", "-1")
text = self.request(url).text
# extract information
_ , pos = e(text, '<div id="i3"><a onclick="return load_image(', '')
imgkey , pos = e(text, "'", "'", pos)
url , pos = e(text, '<img id="img" src="', '"', pos)
name , pos = e(text, '<div id="i4"><div>', ' :: ', pos)
orgurl , pos = e(text, 'http://exhentai.org/fullimg.php', '"', pos)
gid , pos = e(text, 'var gid=' , ';', pos)
startkey, pos = e(text, 'var startkey="', '";', pos)
showkey , pos = e(text, 'var showkey="' , '";', pos)
#
if orgurl: url = "http://exhentai.org/fullimg.php" + unescape(orgurl)
yield url, self.name_fmt.format(self.gid, 1, startkey, name)
# use json-api for further pages
request = {
"method" : "showpage",
"gid" : int(gid),
"page" : 2,
"imgkey" : imgkey,
"showkey": showkey,
}
while True:
time.sleep( random.uniform(2, 5) )
info = json.loads(safe_request(
self.session, self.api_url, method="POST", data=json.dumps(request)
).text)
imgkey, pos = e(info["i3"], "'", "'")
url , pos = e(info["i3"], '<img id="img" src="', '"', pos)
name , pos = e(info["i" ], '<div>', ' :: ')
orgurl, pos = e(info["i7"], '<a href="', '"')
if orgurl: url = unescape(orgurl)
yield url, self.name_fmt.format(gid, request["page"], request["imgkey"], name)
if request["imgkey"] == imgkey:
return
request["imgkey"] = imgkey
request["page"] += 1