From 46f11a311887a14a9b9429f336ed92a7ee9aa875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 15 Jul 2022 12:38:30 +0200 Subject: [PATCH] [bunkr] fix extraction (#2732) move bunkr.is code to its own module --- docs/supportedsites.md | 12 ++--- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/bunkr.py | 89 ++++++++++++++++++++++++++++++++ gallery_dl/extractor/lolisafe.py | 33 +----------- 4 files changed, 97 insertions(+), 38 deletions(-) create mode 100644 gallery_dl/extractor/bunkr.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 62e115a2..c7fae6d2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -103,6 +103,12 @@ Consider all sites to be NSFW unless otherwise known. Blogs, Posts, Search Results + + Bunkr + https://bunkr.is/ + Albums + + Comic Vine https://comicvine.gamespot.com/ @@ -1261,12 +1267,6 @@ Consider all sites to be NSFW unless otherwise known. lolisafe and chibisafe - - Bunkr - https://app.bunkr.is/ - Albums - - ZzZz https://zz.ht/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index e273f843..70cebb37 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -25,6 +25,7 @@ modules = [ "bcy", "behance", "blogger", + "bunkr", "comicvine", "cyberdrop", "danbooru", diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py new file mode 100644 index 00000000..9904d0a1 --- /dev/null +++ b/gallery_dl/extractor/bunkr.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://bunkr.is/""" + +from .lolisafe import LolisafeAlbumExtractor +from .. import text +import json + + +class BunkrAlbumExtractor(LolisafeAlbumExtractor): + """Extractor for bunkr.is albums""" + category = "bunkr" + root = "https://app.bunkr.is" + pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)" + test = ( + ("https://app.bunkr.is/a/Lktg9Keq", { + "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png", + "content": "0c8768055e4e20e7c7259608b67799171b691140", + "keyword": { + "album_id": "Lktg9Keq", + "album_name": 'test テスト "&>', + "count": 1, + "filename": 'test-テスト-"&>-QjgneIQv', + "id": "QjgneIQv", + "name": 'test-テスト-"&>', + "num": int, + }, + }), + # mp4 (#2239) + ("https://bunkr.is/a/ptRHaCn2", { + "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4", + "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471", + }), + ("https://bunkr.to/a/Lktg9Keq"), + ) + + def fetch_album(self, album_id): + if "//app." in self.root: + return self._fetch_album_api(album_id) + else: + return self._fetch_album_site(album_id) + + def _fetch_album_api(self, album_id): + files, data = LolisafeAlbumExtractor.fetch_album(self, album_id) + + for file in files: + url = file["file"] + if url.endswith(".mp4"): + file["file"] = url.replace( + "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1) + else: + file["_fallback"] = (url.replace("//cdn.", "//cdn3.", 1),) + + return files, data + + def _fetch_album_site(self, album_id): + url = self.root + "/a/" + self.album_id + + try: + data = json.loads(text.extract( + self.request(url).text, + 'id="__NEXT_DATA__" type="application/json">', '<')[0]) + props = data["props"]["pageProps"] + album = props["album"] + files = props["files"] + except Exception as exc: + self.log.debug(exc) + self.root = self.root.replace("bunkr", "app.bunkr", 1) + return self._fetch_album_api(album_id) + + for file in files: + name = file["name"] + if name.endswith(".mp4"): + file["file"] = "https://media-files.bunkr.is/" + name + else: + file["file"] = file["cdn"] + "/" + name + + return files, { + "album_id" : self.album_id, + "album_name" : text.unescape(album["name"]), + "description": text.unescape(album["description"]), + "count" : len(files), + } diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py index 2aea44c3..7c6ef69b 100644 --- a/gallery_dl/extractor/lolisafe.py +++ b/gallery_dl/extractor/lolisafe.py @@ -20,10 +20,6 @@ class LolisafeExtractor(BaseExtractor): BASE_PATTERN = LolisafeExtractor.update({ - "bunkr": { - "root": "https://app.bunkr.is", - "pattern": r"(?:app\.)?bunkr\.(?:is|to)", - }, "zzzz" : { "root": "https://zz.ht", "pattern": r"zz\.(?:ht|fo)", @@ -35,25 +31,6 @@ class LolisafeAlbumExtractor(LolisafeExtractor): subcategory = "album" pattern = BASE_PATTERN + "/a/([^/?#]+)" test = ( - ("https://app.bunkr.is/a/Lktg9Keq", { - "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - "keyword": { - "album_id": "Lktg9Keq", - "album_name": 'test テスト "&>', - "count": 1, - "filename": 'test-テスト-"&>-QjgneIQv', - "id": "QjgneIQv", - "name": 'test-テスト-"&>', - "num": int, - }, - }), - # mp4 (#2239) - ("https://bunkr.is/a/ptRHaCn2", { - "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4", - "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471", - }), - ("https://bunkr.to/a/Lktg9Keq"), ("https://zz.ht/a/lop7W6EZ", { "pattern": r"https://z\.zz\.fo/(4anuY|ih560)\.png", "count": 2, @@ -71,11 +48,7 @@ class LolisafeAlbumExtractor(LolisafeExtractor): domain = self.config("domain") if domain is None or domain == "auto": - if self.category == "bunkr": - self.root = "https://app.bunkr.is" - else: - self.root = text.root_from_url(match.group(0)) - + self.root = text.root_from_url(match.group(0)) else: self.root = text.ensure_http_scheme(domain) @@ -89,10 +62,6 @@ class LolisafeAlbumExtractor(LolisafeExtractor): data["_fallback"] = file["_fallback"] text.nameext_from_url(url, data) data["name"], sep, data["id"] = data["filename"].rpartition("-") - - if data["extension"] == "mp4": - url = url.replace( - "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1) yield Message.Url, url, data def fetch_album(self, album_id):