add 'text.ensure_http_scheme()'
This commit is contained in:
parent
4df2cadf60
commit
6294e2c540
@ -126,8 +126,9 @@ class DeviantartExtractor(Extractor):
|
|||||||
if self.extra:
|
if self.extra:
|
||||||
for match in DeviantartStashExtractor.pattern.finditer(
|
for match in DeviantartStashExtractor.pattern.finditer(
|
||||||
deviation.get("description", "")):
|
deviation.get("description", "")):
|
||||||
|
url = text.ensure_http_scheme(match.group(0))
|
||||||
deviation["_extractor"] = DeviantartStashExtractor
|
deviation["_extractor"] = DeviantartStashExtractor
|
||||||
yield Message.Queue, match.group(0), deviation
|
yield Message.Queue, url, deviation
|
||||||
|
|
||||||
def deviations(self):
|
def deviations(self):
|
||||||
"""Return an iterable containing all relevant Deviation-objects"""
|
"""Return an iterable containing all relevant Deviation-objects"""
|
||||||
|
@ -224,10 +224,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
|
|||||||
self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format(
|
self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format(
|
||||||
self.user, match.group(3))
|
self.user, match.group(3))
|
||||||
else:
|
else:
|
||||||
url = match.group(0)
|
self.post_url = text.ensure_http_scheme(match.group(0))
|
||||||
if not url.startswith("http"):
|
|
||||||
url = "https://" + url
|
|
||||||
self.post_url = url
|
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self.post_url,)
|
return (self.post_url,)
|
||||||
@ -414,6 +411,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_favorites(page):
|
def _extract_favorites(page):
|
||||||
return [
|
return [
|
||||||
"https://" + user.rpartition('"')[2].lstrip("/:")
|
text.ensure_http_scheme(user.rpartition('"')[2])
|
||||||
for user in text.extract_iter(page, 'class="item-user', '"><img')
|
for user in text.extract_iter(page, 'class="item-user', '"><img')
|
||||||
]
|
]
|
||||||
|
@ -98,8 +98,7 @@ class PatreonExtractor(Extractor):
|
|||||||
headers = {"Referer": self.root}
|
headers = {"Referer": self.root}
|
||||||
|
|
||||||
while url:
|
while url:
|
||||||
if not url.startswith("http"):
|
url = text.ensure_http_scheme(url)
|
||||||
url = "https://" + url.lstrip("/:")
|
|
||||||
posts = self.request(url, headers=headers).json()
|
posts = self.request(url, headers=headers).json()
|
||||||
|
|
||||||
if "included" in posts:
|
if "included" in posts:
|
||||||
|
@ -60,6 +60,13 @@ def split_html(txt, sep=None):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_http_scheme(url, scheme="https://"):
|
||||||
|
"""Prepend 'scheme' to 'url' if it doesn't have one"""
|
||||||
|
if url and not url.startswith(("https://", "http://")):
|
||||||
|
return scheme + url.lstrip("/:")
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
def filename_from_url(url):
|
def filename_from_url(url):
|
||||||
"""Extract the last part of an URL to use as a filename"""
|
"""Extract the last part of an URL to use as a filename"""
|
||||||
try:
|
try:
|
||||||
|
@ -94,6 +94,33 @@ class TestText(unittest.TestCase):
|
|||||||
for value in INVALID:
|
for value in INVALID:
|
||||||
self.assertEqual(f(value), empty)
|
self.assertEqual(f(value), empty)
|
||||||
|
|
||||||
|
def test_ensure_http_scheme(self, f=text.ensure_http_scheme):
|
||||||
|
result = "https://example.org/filename.ext"
|
||||||
|
|
||||||
|
# standard usage
|
||||||
|
self.assertEqual(f(""), "")
|
||||||
|
self.assertEqual(f("example.org/filename.ext"), result)
|
||||||
|
self.assertEqual(f("/example.org/filename.ext"), result)
|
||||||
|
self.assertEqual(f("//example.org/filename.ext"), result)
|
||||||
|
self.assertEqual(f("://example.org/filename.ext"), result)
|
||||||
|
|
||||||
|
# no change
|
||||||
|
self.assertEqual(f(result), result)
|
||||||
|
self.assertEqual(
|
||||||
|
f("http://example.org/filename.ext"),
|
||||||
|
"http://example.org/filename.ext",
|
||||||
|
)
|
||||||
|
|
||||||
|
# ...
|
||||||
|
self.assertEqual(
|
||||||
|
f("htp://example.org/filename.ext"),
|
||||||
|
"https://htp://example.org/filename.ext",
|
||||||
|
)
|
||||||
|
|
||||||
|
# invalid arguments
|
||||||
|
for value in INVALID_ALT:
|
||||||
|
self.assertEqual(f(value), value)
|
||||||
|
|
||||||
def test_filename_from_url(self, f=text.filename_from_url):
|
def test_filename_from_url(self, f=text.filename_from_url):
|
||||||
result = "filename.ext"
|
result = "filename.ext"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user