From 231bcad6141120582fc1f5274fc956b394098562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 27 Jan 2021 23:43:14 +0100 Subject: [PATCH] [shopify] use BaseExtractor --- gallery_dl/extractor/shopify.py | 79 +++++++++++++-------------------- 1 file changed, 32 insertions(+), 47 deletions(-) diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py index d65f3344..ba1ab08a 100644 --- a/gallery_dl/extractor/shopify.py +++ b/gallery_dl/extractor/shopify.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,28 +8,23 @@ """Extractors for Shopify instances""" -from .common import Extractor, Message, generate_extractors +from .common import BaseExtractor, Message from .. import text import re -class ShopifyExtractor(Extractor): +class ShopifyExtractor(BaseExtractor): """Base class for Shopify extractors""" basecategory = "shopify" filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}" archive_fmt = "{id}" def __init__(self, match): - Extractor.__init__(self, match) - self.item_url = self.root + match.group(1) - - def request(self, url, **kwargs): - kwargs["retries"] = float("inf") - return Extractor.request(self, url, **kwargs) + BaseExtractor.__init__(self, match) + self.item_url = self.root + match.group(match.lastindex) def items(self): data = self.metadata() - yield Message.Version, 1 yield Message.Directory, data headers = {"X-Requested-With": "XMLHttpRequest"} @@ -58,22 +53,34 @@ class ShopifyExtractor(Extractor): """Return an iterable with all relevant product URLs""" +BASE_PATTERN = ShopifyExtractor.update({ + "fashionnova": { + "root": "https://www.fashionnova.com", + "pattern": r"(?:www\.)?fashionnova\.com", + }, +}) + + class ShopifyCollectionExtractor(ShopifyExtractor): """Base class for collection extractors for Shopify based sites""" subcategory = "collection" directory_fmt = ("{category}", "{collection[title]}") - pattern_fmt = r"(/collections/[\w-]+)/?(?:\?([^#]+))?(?:$|#)" - - def __init__(self, match): - ShopifyExtractor.__init__(self, match) - self.params = match.group(2) + pattern = BASE_PATTERN + r"(/collections/[\w-]+)/?(?:$|[?#])" + test = ( + ("https://www.fashionnova.com/collections/mini-dresses", { + "range": "1-20", + "count": 20, + "archive": False, + }), + ("https://www.fashionnova.com/collections/mini-dresses/?page=1"), + ("https://www.fashionnova.com/collections/mini-dresses#1"), + ) def metadata(self): return self.request(self.item_url + ".json").json() def products(self): - params = text.parse_query(self.params) - params["page"] = text.parse_int(params.get("page"), 1) + params = {"page": 1} fetch = True last = None @@ -107,36 +114,14 @@ class ShopifyProductExtractor(ShopifyExtractor): """Base class for product extractors for Shopify based sites""" subcategory = "product" directory_fmt = ("{category}", "Products") - pattern_fmt = r"((?:/collections/[\w-]+)?/products/[\w-]+)" + pattern = BASE_PATTERN + r"((?:/collections/[\w-]+)?/products/[\w-]+)" + test = ( + ("https://www.fashionnova.com/products/essential-slide-red", { + "pattern": r"https?://cdn\d*\.shopify.com/", + "count": 3, + }), + ("https://www.fashionnova.com/collections/flats/products/name"), + ) def products(self): return (self.item_url,) - - -EXTRACTORS = { - "fashionnova": { - "root": "https://www.fashionnova.com", - "pattern": r"(?:www\.)?fashionnova\.com", - "test-product": ( - ("https://www.fashionnova.com/products/essential-slide-red", { - "pattern": r"https?://cdn\d*\.shopify.com/", - "count": 3, - }), - ("https://www.fashionnova.com/collections/flats/products/name"), - ), - "test-collection": ( - ("https://www.fashionnova.com/collections/mini-dresses", { - "range": "1-20", - "count": 20, - "archive": False, - }), - ("https://www.fashionnova.com/collections/mini-dresses/?page=1"), - ("https://www.fashionnova.com/collections/mini-dresses#1"), - ), - }, -} - -generate_extractors(EXTRACTORS, globals(), ( - ShopifyProductExtractor, - ShopifyCollectionExtractor, -))