Patches Requests if Python version is below 3.6.0 to fix a quirk in httplib relating to utf-8 headers. Also tracks are now sanitized before tagging.master
parent
370da98e21
commit
88107f7538
|
@ -39,3 +39,4 @@ nosetests.xml
|
|||
*.iml
|
||||
*.xml
|
||||
bandcamp_dl/asyncdownloader.py
|
||||
*.log
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from .bandcampjson import BandcampJSON
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import FeatureNotFound
|
||||
from datetime import datetime
|
||||
import requests
|
||||
import json
|
||||
|
||||
|
@ -26,13 +27,15 @@ class Bandcamp:
|
|||
self.generate_album_json()
|
||||
self.tracks = self.tralbum_data_json['trackinfo']
|
||||
|
||||
album_release = self.tralbum_data_json['album_release_date']
|
||||
|
||||
album = {
|
||||
"tracks": [],
|
||||
"title": self.embed_data_json['album_title'],
|
||||
"artist": self.embed_data_json['artist'],
|
||||
"full": False,
|
||||
"art": "",
|
||||
"date": self.tralbum_data_json['album_release_date']
|
||||
"date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
|
||||
}
|
||||
|
||||
for track in self.tracks:
|
||||
|
|
|
@ -51,7 +51,7 @@ from .bandcampdownloader import BandcampDownloader
|
|||
|
||||
|
||||
def main():
|
||||
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-03')
|
||||
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-05')
|
||||
bandcamp = Bandcamp()
|
||||
|
||||
basedir = arguments['--base-dir'] or os.getcwd()
|
||||
|
|
|
@ -6,6 +6,14 @@ from mutagen.id3._frames import TIT2
|
|||
from mutagen.easyid3 import EasyID3
|
||||
from slugify import slugify
|
||||
|
||||
if not sys.version_info[:2] == (3, 6):
|
||||
import mock
|
||||
from .utils import requests_patch
|
||||
|
||||
# DEBUG
|
||||
# import logging
|
||||
# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
|
||||
|
||||
|
||||
class BandcampDownloader:
|
||||
def __init__(self, urls=None, template=None, directory=None, overwrite=False):
|
||||
|
@ -16,7 +24,7 @@ class BandcampDownloader:
|
|||
:param directory: download location
|
||||
:param overwrite: if True overwrite existing files
|
||||
"""
|
||||
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-02 (https://github.com/iheanyi/bandcamp-dl)'}
|
||||
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-05 (https://github.com/iheanyi/bandcamp-dl)'}
|
||||
self.session = requests.Session()
|
||||
|
||||
if type(urls) is str:
|
||||
|
@ -98,9 +106,13 @@ class BandcampDownloader:
|
|||
|
||||
while True:
|
||||
try:
|
||||
r = self.session.get(track['url'], headers=self.headers, stream=True)
|
||||
file_length = int(r.headers['content-length'])
|
||||
total = int(file_length/100)
|
||||
if not sys.version_info[:2] == (3, 6):
|
||||
with mock.patch('http.client.parse_headers', requests_patch.parse_headers):
|
||||
r = self.session.get(track['url'], headers=self.headers, stream=True)
|
||||
else:
|
||||
r = self.session.get(track['url'], headers=self.headers, stream=True)
|
||||
file_length = int(r.headers.get('content-length', 0))
|
||||
total = int(file_length / 100)
|
||||
# If file exists and is still a tmp file skip downloading and encode
|
||||
if os.path.exists(filepath):
|
||||
self.write_id3_tags(filepath, track_meta)
|
||||
|
@ -121,7 +133,10 @@ class BandcampDownloader:
|
|||
dl += len(data)
|
||||
f.write(data)
|
||||
done = int(50 * dl / file_length)
|
||||
sys.stdout.write("\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks, "=" * done, " " * (50 - done), filename[:-8]))
|
||||
sys.stdout.write(
|
||||
"\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks,
|
||||
"=" * done, " " * (50 - done),
|
||||
filename[:-8]))
|
||||
sys.stdout.flush()
|
||||
local_size = os.path.getsize(filepath)
|
||||
# if the local filesize before encoding doesn't match the remote filesize redownload
|
||||
|
@ -168,6 +183,7 @@ class BandcampDownloader:
|
|||
sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
|
||||
|
||||
audio = MP3(filepath)
|
||||
audio.delete()
|
||||
audio["TIT2"] = TIT2(encoding=3, text=["title"])
|
||||
audio.save(filename=None, v1=2)
|
||||
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
import demjson
|
||||
import re
|
||||
|
||||
"""TODO
|
||||
|
||||
More in-depth error messages
|
||||
"""
|
||||
|
||||
class BandcampJSON:
|
||||
def __init__(self, body, var_name: str, js_data=None):
|
||||
|
|
|
@ -4,3 +4,4 @@ docopt==0.6.2
|
|||
mutagen==1.35.1
|
||||
requests==2.12.4
|
||||
unicode-slugify==0.1.3
|
||||
mock==2.0.0
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
try:
|
||||
import cchardet as chardet
|
||||
except ImportError:
|
||||
import chardet as chardet
|
||||
|
||||
import http.client
|
||||
import email.parser
|
||||
|
||||
|
||||
def parse_headers(fp, _class=http.client.HTTPMessage):
|
||||
"""Parses only RFC2822 headers from a file pointer.
|
||||
|
||||
email Parser wants to see strings rather than bytes.
|
||||
But a TextIOWrapper around self.rfile would buffer too many bytes
|
||||
from the stream, bytes which we later need to read as bytes.
|
||||
So we read the correct bytes here, as bytes, for email Parser
|
||||
to parse.
|
||||
|
||||
Note: Monkey-patched version to try to more intelligently determine
|
||||
header encoding
|
||||
|
||||
"""
|
||||
headers = []
|
||||
while True:
|
||||
line = fp.readline(http.client._MAXLINE + 1)
|
||||
if len(line) > http.client._MAXLINE:
|
||||
raise http.client.LineTooLong("header line")
|
||||
headers.append(line)
|
||||
if len(headers) > http.client._MAXHEADERS:
|
||||
raise HTTPException("got more than {} headers".format(http.client._MAXHEADERS))
|
||||
if line in (b'\r\n', b'\n', b''):
|
||||
break
|
||||
|
||||
hstring = b''.join(headers)
|
||||
inferred = chardet.detect(hstring)
|
||||
if inferred and inferred['confidence'] > 0.8:
|
||||
# print("Parsing headers!", hstring)
|
||||
hstring = hstring.decode(inferred['encoding'])
|
||||
else:
|
||||
hstring = hstring.decode('iso-8859-1')
|
||||
|
||||
return email.parser.Parser(_class=_class).parsestr(hstring)
|
3
setup.py
3
setup.py
|
@ -6,7 +6,7 @@ here = path.abspath(path.dirname(__file__))
|
|||
|
||||
setup(
|
||||
name='bandcamp-downloader',
|
||||
version='0.0.7-03',
|
||||
version='0.0.7-05',
|
||||
description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
|
||||
long_description=open('README.rst').read(),
|
||||
url='https://github.com/iheanyi/bandcamp-dl',
|
||||
|
@ -29,6 +29,7 @@ setup(
|
|||
'mutagen',
|
||||
'requests',
|
||||
'unicode-slugify',
|
||||
'mock',
|
||||
],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
|
|
Loading…
Reference in New Issue