From fa14ef17ea13210e0f0aae285478670923043b9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 11 Aug 2016 15:50:32 +0200 Subject: [PATCH] [imagefap] deal with long filenames --- gallery_dl/extractor/imagefap.py | 38 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index a70a9fef..0a2a46bf 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -31,40 +31,44 @@ class ImagefapGalleryExtractor(Extractor): self.gid = match.group(1) def items(self): - imgurl_fmt = ("http://x.imagefapusercontent.com/u/{uploader}/" - "{gallery-id}/{image-id}/{filename}") - url = "http://www.imagefap.com/pictures/" + self.gid + "/?view=2" + url = "http://www.imagefap.com/pictures/" + self.gid + "/" page = self.request(url).text data = self.get_job_metadata(page) yield Message.Version, 1 yield Message.Directory, data - for image in self.get_images(page): + for url, image in self.get_images(int(data["count"])): data.update(image) - yield Message.Url, imgurl_fmt.format(**data), data + yield Message.Url, url, data def get_job_metadata(self, page): """Collect metadata for extractor-job""" - data = text.extract_all(page, ( + data, pos = text.extract_all(page, ( ("section" , 'Porn pics of ', ' (Page 1)'), ("uploader", '>Uploaded by ', ''), ("count" , ' 1 of ', ' pics"'), - ), values={"category": self.category, "gallery-id": self.gid})[0] + ), values={"category": self.category, "gallery-id": self.gid}) + self.image_id = text.extract(page, 'id="img_ed_', '"', pos)[0] data["title"] = text.unescape(data["title"]) return data - @staticmethod - def get_images(page): - """Collect image-metadata""" - pos = 0 + def get_images(self, count): + """Collect image-urls and -metadata""" num = 0 + url = "http://www.imagefap.com/photo/" + self.image_id + "/" + params = {"gid": self.gid, "idx": 0, "partial": "true"} while True: - imgid, pos = text.extract(page, '', '', pos) - num += 1 - yield text.nameext_from_url(name, {"image-id": imgid, "num": num}) + pos = 0 + page = self.request(url, params=params).text + for i in range(24): + imgurl, pos = text.extract(page, '= count: + return + params["idx"] += 24