add 'encoding' argument for Extractor.request

This commit is contained in:
Mike Fährmann 2016-07-12 12:06:17 +02:00
parent 3c94d85409
commit 000df8d1fa
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
4 changed files with 10 additions and 16 deletions

View File

@ -32,13 +32,10 @@ class Extractor():
yield Message.Version, 1
return
def request(self, url, *args, **kwargs):
return safe_request(self.session, url, *args, **kwargs)
def enable_useragent(self):
self.session.headers["User-Agent"] = (
"Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0"
)
def request(self, url, encoding=None, *args, **kwargs):
response = safe_request(self.session, url, *args, **kwargs)
response.encoding = encoding
return response
class AsynchronousExtractor(Extractor):

View File

@ -36,9 +36,8 @@ class ImagebamExtractor(AsynchronousExtractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
response = self.request(self.url_base + "/gallery/" + self.gkey)
response.encoding = "utf-8"
page = response.text
url = self.url_base + "/gallery/" + self.gkey
page = self.request(url, encoding="utf-8").text
data = {
"category": self.category,
"gallery-key": self.gkey,

View File

@ -24,7 +24,7 @@ class KhinsiderExtractor(AsynchronousExtractor):
def items(self):
url = "http://downloads.khinsider.com/game-soundtracks/album/" + self.album
page = self.request(url).text
page = self.request(url, encoding="utf-8").text
data = self.get_job_metadata(page)
yield Message.Version, 1
yield Message.Directory, data
@ -44,8 +44,8 @@ class KhinsiderExtractor(AsynchronousExtractor):
def get_album_tracks(self, page):
pos = page.index("Download all songs at once:")
num = 0
for url in text.extract_iter(page, '<tr>\r\n\t\t<td><a href="', '"'):
page = self.request(url).text
for url in text.extract_iter(page, '<tr>\r\n\t\t<td><a href="', '"', pos):
page = self.request(url, encoding="utf-8").text
name, pos = text.extract(page, "Song name: <b>", "</b>")
url , pos = text.extract(page, '<p><a style="color: #21363f;" href="', '"', pos)
num += 1

View File

@ -55,9 +55,7 @@ class PowerMangaExtractor(Extractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
response = self.request(self.url_base + self.part)
response.encoding = "utf-8"
page = response.text
page = self.request(self.url_base + self.part, encoding="utf-8").text
_ , pos = text.extract(page, '<h1 class="tbtitle dnone">', '')
manga , pos = text.extract(page, 'title="', '"', pos)
chapter , pos = text.extract(page, '">', '</a>', pos)