Fixes #100 and possibly #99

Patches Requests if Python version is below 3.6.0 to fix a quirk in
httplib relating to utf-8 headers.

Also tracks are now sanitized before tagging.
master
Anthony Forsberg 2017-01-28 06:12:21 -05:00
parent 370da98e21
commit 88107f7538
8 changed files with 76 additions and 8 deletions

1
.gitignore vendored
View File

@ -39,3 +39,4 @@ nosetests.xml
*.iml
*.xml
bandcamp_dl/asyncdownloader.py
*.log

View File

@ -1,6 +1,7 @@
from .bandcampjson import BandcampJSON
from bs4 import BeautifulSoup
from bs4 import FeatureNotFound
from datetime import datetime
import requests
import json
@ -26,13 +27,15 @@ class Bandcamp:
self.generate_album_json()
self.tracks = self.tralbum_data_json['trackinfo']
album_release = self.tralbum_data_json['album_release_date']
album = {
"tracks": [],
"title": self.embed_data_json['album_title'],
"artist": self.embed_data_json['artist'],
"full": False,
"art": "",
"date": self.tralbum_data_json['album_release_date']
"date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
}
for track in self.tracks:

View File

@ -51,7 +51,7 @@ from .bandcampdownloader import BandcampDownloader
def main():
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-03')
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-05')
bandcamp = Bandcamp()
basedir = arguments['--base-dir'] or os.getcwd()

View File

@ -6,6 +6,14 @@ from mutagen.id3._frames import TIT2
from mutagen.easyid3 import EasyID3
from slugify import slugify
if not sys.version_info[:2] == (3, 6):
import mock
from .utils import requests_patch
# DEBUG
# import logging
# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
class BandcampDownloader:
def __init__(self, urls=None, template=None, directory=None, overwrite=False):
@ -16,7 +24,7 @@ class BandcampDownloader:
:param directory: download location
:param overwrite: if True overwrite existing files
"""
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-02 (https://github.com/iheanyi/bandcamp-dl)'}
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-05 (https://github.com/iheanyi/bandcamp-dl)'}
self.session = requests.Session()
if type(urls) is str:
@ -98,9 +106,13 @@ class BandcampDownloader:
while True:
try:
r = self.session.get(track['url'], headers=self.headers, stream=True)
file_length = int(r.headers['content-length'])
total = int(file_length/100)
if not sys.version_info[:2] == (3, 6):
with mock.patch('http.client.parse_headers', requests_patch.parse_headers):
r = self.session.get(track['url'], headers=self.headers, stream=True)
else:
r = self.session.get(track['url'], headers=self.headers, stream=True)
file_length = int(r.headers.get('content-length', 0))
total = int(file_length / 100)
# If file exists and is still a tmp file skip downloading and encode
if os.path.exists(filepath):
self.write_id3_tags(filepath, track_meta)
@ -121,7 +133,10 @@ class BandcampDownloader:
dl += len(data)
f.write(data)
done = int(50 * dl / file_length)
sys.stdout.write("\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks, "=" * done, " " * (50 - done), filename[:-8]))
sys.stdout.write(
"\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks,
"=" * done, " " * (50 - done),
filename[:-8]))
sys.stdout.flush()
local_size = os.path.getsize(filepath)
# if the local filesize before encoding doesn't match the remote filesize redownload
@ -168,6 +183,7 @@ class BandcampDownloader:
sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
audio = MP3(filepath)
audio.delete()
audio["TIT2"] = TIT2(encoding=3, text=["title"])
audio.save(filename=None, v1=2)

View File

@ -1,6 +1,10 @@
import demjson
import re
"""TODO
More in-depth error messages
"""
class BandcampJSON:
def __init__(self, body, var_name: str, js_data=None):

View File

@ -4,3 +4,4 @@ docopt==0.6.2
mutagen==1.35.1
requests==2.12.4
unicode-slugify==0.1.3
mock==2.0.0

View File

@ -0,0 +1,42 @@
try:
import cchardet as chardet
except ImportError:
import chardet as chardet
import http.client
import email.parser
def parse_headers(fp, _class=http.client.HTTPMessage):
"""Parses only RFC2822 headers from a file pointer.
email Parser wants to see strings rather than bytes.
But a TextIOWrapper around self.rfile would buffer too many bytes
from the stream, bytes which we later need to read as bytes.
So we read the correct bytes here, as bytes, for email Parser
to parse.
Note: Monkey-patched version to try to more intelligently determine
header encoding
"""
headers = []
while True:
line = fp.readline(http.client._MAXLINE + 1)
if len(line) > http.client._MAXLINE:
raise http.client.LineTooLong("header line")
headers.append(line)
if len(headers) > http.client._MAXHEADERS:
raise HTTPException("got more than {} headers".format(http.client._MAXHEADERS))
if line in (b'\r\n', b'\n', b''):
break
hstring = b''.join(headers)
inferred = chardet.detect(hstring)
if inferred and inferred['confidence'] > 0.8:
# print("Parsing headers!", hstring)
hstring = hstring.decode(inferred['encoding'])
else:
hstring = hstring.decode('iso-8859-1')
return email.parser.Parser(_class=_class).parsestr(hstring)

View File

@ -6,7 +6,7 @@ here = path.abspath(path.dirname(__file__))
setup(
name='bandcamp-downloader',
version='0.0.7-03',
version='0.0.7-05',
description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
long_description=open('README.rst').read(),
url='https://github.com/iheanyi/bandcamp-dl',
@ -29,6 +29,7 @@ setup(
'mutagen',
'requests',
'unicode-slugify',
'mock',
],
entry_points={
'console_scripts': [