[500px] match 'web.500px.com' subdomains

This commit is contained in:
Mike Fährmann 2020-04-26 22:16:21 +02:00
parent d3b3b30107
commit 38b6bd66b0
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -12,6 +12,9 @@ from .common import Extractor, Message
from .. import text from .. import text
BASE_PATTERN = r"(?:https?://)?(?:web\.)?500px\.com"
class _500pxExtractor(Extractor): class _500pxExtractor(Extractor):
"""Base class for 500px extractors""" """Base class for 500px extractors"""
category = "500px" category = "500px"
@ -86,13 +89,15 @@ class _500pxExtractor(Extractor):
class _500pxUserExtractor(_500pxExtractor): class _500pxUserExtractor(_500pxExtractor):
"""Extractor for photos from a user's photostream on 500px.com""" """Extractor for photos from a user's photostream on 500px.com"""
subcategory = "user" subcategory = "user"
pattern = (r"(?:https?://)?500px\.com" pattern = BASE_PATTERN + r"/(?!photo/)([^/?&#]+)/?(?:$|\?|#)"
r"/(?!photo/)([^/?&#]+)/?(?:$|\?|#)") test = (
test = ("https://500px.com/light_expression_photography", { ("https://500px.com/light_expression_photography", {
"pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2", "pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2",
"range": "1-99", "range": "1-99",
"count": 99, "count": 99,
}) }),
("https://web.500px.com/light_expression_photography"),
)
def __init__(self, match): def __init__(self, match):
_500pxExtractor.__init__(self, match) _500pxExtractor.__init__(self, match)
@ -120,8 +125,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
"""Extractor for photo galleries on 500px.com""" """Extractor for photo galleries on 500px.com"""
subcategory = "gallery" subcategory = "gallery"
directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}") directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}")
pattern = (r"(?:https?://)?500px\.com" pattern = BASE_PATTERN + r"/(?!photo/)([^/?&#]+)/galleries/([^/?&#]+)"
r"/(?!photo/)([^/?&#]+)/galleries/([^/?&#]+)")
test = ("https://500px.com/fashvamp/galleries/lera", { test = ("https://500px.com/fashvamp/galleries/lera", {
"url": "002dc81dee5b4a655f0e31ad8349e8903b296df6", "url": "002dc81dee5b4a655f0e31ad8349e8903b296df6",
"count": 3, "count": 3,
@ -171,7 +175,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
class _500pxImageExtractor(_500pxExtractor): class _500pxImageExtractor(_500pxExtractor):
"""Extractor for individual images from 500px.com""" """Extractor for individual images from 500px.com"""
subcategory = "image" subcategory = "image"
pattern = r"(?:https?://)?500px\.com/photo/(\d+)" pattern = BASE_PATTERN + r"/photo/(\d+)"
test = ("https://500px.com/photo/222049255/queen-of-coasts", { test = ("https://500px.com/photo/222049255/queen-of-coasts", {
"url": "fbdf7df39325cae02f5688e9f92935b0e7113315", "url": "fbdf7df39325cae02f5688e9f92935b0e7113315",
"count": 1, "count": 1,