diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 5cd0be75..528d3362 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -10,6 +10,7 @@ from .common import Extractor, Message from .. import text +import json class BehanceExtractor(Extractor): @@ -20,49 +21,52 @@ class BehanceExtractor(Extractor): def items(self): yield Message.Version, 1 for gallery in self.galleries(): - yield Message.Queue, gallery["url"], gallery + yield Message.Queue, gallery["url"], self._update(gallery) def galleries(self): """Return all relevant gallery URLs""" return () - def _pagination(self, url, key): - headers = {"X-Requested-With": "XMLHttpRequest"} - params = {} + @staticmethod + def _update(data): + # compress data to simple lists + data["fields"] = [field["name"] for field in data["fields"]] + data["owners"] = [owner["display_name"] for owner in data["owners"]] + if "tags" in data: + data["tags"] = [tag["title"] for tag in data["tags"]] - while True: - data = self.request(url, headers=headers, params=params).json() - yield from data[key] - if not data.get("offset"): - return - params["offset"] = data["offset"] + # backwards compatibility + data["gallery_id"] = data["id"] + data["title"] = data["name"] + data["user"] = ", ".join(data["owners"]) + + return data class BehanceGalleryExtractor(BehanceExtractor): """Extractor for image galleries from www.behance.net""" subcategory = "gallery" - directory_fmt = ["{category}", "{user}", "{gallery_id} {title}"] - filename_fmt = "{category}_{gallery_id}_{num:>02}.{extension}" - archive_fmt = "{gallery_id}_{num}" + directory_fmt = ["{category}", "{owners:J, }", "{id} {name}"] + filename_fmt = "{category}_{id}_{num:>02}.{extension}" + archive_fmt = "{id}_{num}" pattern = [r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"] test = [ ("https://www.behance.net/gallery/17386197/A-Short-Story", { "count": 2, - "url": "ebe032f78e8af98f9873f85eb77a1e49a3f8e648", + "url": "ab79bd3bef8d3ae48e6ac74fd995c1dfaec1b7d2", "keyword": { - "title": 're:"Hi". A short story about the important things ', - "user": "Place Studio, Julio César Velazquez", + "id": 17386197, + "name": 're:"Hi". A short story about the important things ', + "owners": ["Place Studio", "Julio César Velazquez"], "fields": ["Animation", "Character Design", "Directing"], - "date": 1401810111, - "views": int, - "votes": int, - "comments": int, + "tags": list, + "module": dict, }, }), ("https://www.behance.net/gallery/21324767/Nevada-City", { "count": 6, - "url": "2b2a689d57f113617088eeab4dc81b884bf24410", - "keyword": {"user": "Alex Strohl"}, + "url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d", + "keyword": {"owners": ["Alex Strohl"]}, }), ] @@ -71,72 +75,53 @@ class BehanceGalleryExtractor(BehanceExtractor): self.gallery_id = match.group(1) def items(self): - url = "{}/gallery/{}/a".format(self.root, self.gallery_id) - page = self.request(url, cookies={"ilo0": "true"}).text - - data = self.get_metadata(page) - imgs = self.get_images(page) + data = self.get_gallery_data() + imgs = self.get_images(data) data["count"] = len(imgs) yield Message.Version, 1 yield Message.Directory, data - for data["num"], url in enumerate(imgs, 1): - yield Message.Url, url, text.nameext_from_url(url, data) + for data["num"], (url, module) in enumerate(imgs, 1): + data["module"] = module + data["extension"] = text.ext_from_url(url) + yield Message.Url, url, data - def get_metadata(self, page): - """Collect metadata for extractor-job""" - users, pos = text.extract( - page, 'class="project-owner-info ', 'class="project-owner-actions') - title, pos = text.extract( - page, 'project-title">', '', pos) - fields, pos = text.extract( - page, '