add 'text.ensure_http_scheme()'
This commit is contained in:
parent
4df2cadf60
commit
6294e2c540
@ -126,8 +126,9 @@ class DeviantartExtractor(Extractor):
|
||||
if self.extra:
|
||||
for match in DeviantartStashExtractor.pattern.finditer(
|
||||
deviation.get("description", "")):
|
||||
url = text.ensure_http_scheme(match.group(0))
|
||||
deviation["_extractor"] = DeviantartStashExtractor
|
||||
yield Message.Queue, match.group(0), deviation
|
||||
yield Message.Queue, url, deviation
|
||||
|
||||
def deviations(self):
|
||||
"""Return an iterable containing all relevant Deviation-objects"""
|
||||
|
@ -224,10 +224,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
|
||||
self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format(
|
||||
self.user, match.group(3))
|
||||
else:
|
||||
url = match.group(0)
|
||||
if not url.startswith("http"):
|
||||
url = "https://" + url
|
||||
self.post_url = url
|
||||
self.post_url = text.ensure_http_scheme(match.group(0))
|
||||
|
||||
def posts(self):
|
||||
return (self.post_url,)
|
||||
@ -414,6 +411,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
|
||||
@staticmethod
|
||||
def _extract_favorites(page):
|
||||
return [
|
||||
"https://" + user.rpartition('"')[2].lstrip("/:")
|
||||
text.ensure_http_scheme(user.rpartition('"')[2])
|
||||
for user in text.extract_iter(page, 'class="item-user', '"><img')
|
||||
]
|
||||
|
@ -98,8 +98,7 @@ class PatreonExtractor(Extractor):
|
||||
headers = {"Referer": self.root}
|
||||
|
||||
while url:
|
||||
if not url.startswith("http"):
|
||||
url = "https://" + url.lstrip("/:")
|
||||
url = text.ensure_http_scheme(url)
|
||||
posts = self.request(url, headers=headers).json()
|
||||
|
||||
if "included" in posts:
|
||||
|
@ -60,6 +60,13 @@ def split_html(txt, sep=None):
|
||||
return []
|
||||
|
||||
|
||||
def ensure_http_scheme(url, scheme="https://"):
|
||||
"""Prepend 'scheme' to 'url' if it doesn't have one"""
|
||||
if url and not url.startswith(("https://", "http://")):
|
||||
return scheme + url.lstrip("/:")
|
||||
return url
|
||||
|
||||
|
||||
def filename_from_url(url):
|
||||
"""Extract the last part of an URL to use as a filename"""
|
||||
try:
|
||||
|
@ -94,6 +94,33 @@ class TestText(unittest.TestCase):
|
||||
for value in INVALID:
|
||||
self.assertEqual(f(value), empty)
|
||||
|
||||
def test_ensure_http_scheme(self, f=text.ensure_http_scheme):
|
||||
result = "https://example.org/filename.ext"
|
||||
|
||||
# standard usage
|
||||
self.assertEqual(f(""), "")
|
||||
self.assertEqual(f("example.org/filename.ext"), result)
|
||||
self.assertEqual(f("/example.org/filename.ext"), result)
|
||||
self.assertEqual(f("//example.org/filename.ext"), result)
|
||||
self.assertEqual(f("://example.org/filename.ext"), result)
|
||||
|
||||
# no change
|
||||
self.assertEqual(f(result), result)
|
||||
self.assertEqual(
|
||||
f("http://example.org/filename.ext"),
|
||||
"http://example.org/filename.ext",
|
||||
)
|
||||
|
||||
# ...
|
||||
self.assertEqual(
|
||||
f("htp://example.org/filename.ext"),
|
||||
"https://htp://example.org/filename.ext",
|
||||
)
|
||||
|
||||
# invalid arguments
|
||||
for value in INVALID_ALT:
|
||||
self.assertEqual(f(value), value)
|
||||
|
||||
def test_filename_from_url(self, f=text.filename_from_url):
|
||||
result = "filename.ext"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user