48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
from .common import AsyncExtractor
|
|
|
|
class Extractor(AsyncExtractor):
|
|
|
|
def __init__(self, match, config):
|
|
AsyncExtractor.__init__(self, config)
|
|
title, key = self.get_title(match)
|
|
self.category = "imagebam"
|
|
self.directory = title + " - " + key
|
|
|
|
def images(self):
|
|
next_url = self.url
|
|
num = 1
|
|
done = False
|
|
while not done:
|
|
# get current page
|
|
text = self.request("http://www.imagebam.com" + next_url).text
|
|
|
|
# get url for next page
|
|
next_url, pos = self.extract(text, "<a class='buttonblue' href='", "'")
|
|
|
|
# if the following text isn't "><span>next image" we are done
|
|
if not text.startswith("><span>next image", pos):
|
|
done = True
|
|
|
|
# get image url
|
|
img_url , pos = self.extract(text, 'onclick="scale(this);" src="', '"', pos)
|
|
|
|
# extract filename from image url
|
|
name = img_url[img_url.rindex("/")+1:]
|
|
|
|
yield img_url, "{:>03}-{}".format(num, name)
|
|
num += 1
|
|
|
|
def get_title(self, match):
|
|
if match.group(1) == "image":
|
|
text = self.request(match.group(0)).text
|
|
gallery_url, _ = self.extract(text, "class='gallery_title'><a href='", "'")
|
|
gallery_key = gallery_url.split("/")[-2]
|
|
else:
|
|
gallery_key = match.group(2)
|
|
|
|
text = self.request("http://www.imagebam.com/gallery/" + gallery_key).text
|
|
_ , pos = self.extract(text, "<img src='/img/icons/photos.png'", "")
|
|
title , pos = self.extract(text, "'> ", " <", pos)
|
|
self.url, pos = self.extract(text, "<a href='http://www.imagebam.com", "'", pos)
|
|
return title, gallery_key
|