Fixes #100 and possibly #99

Patches Requests if Python version is below 3.6.0 to fix a quirk in httplib relating to utf-8 headers. Also tracks are now sanitized before tagging.
2017-01-28 06:12:21 -05:00 · 2017-01-28 06:12:21 -05:00 · 88107f7538
parent 370da98e21
commit 88107f7538
8 changed files with 76 additions and 8 deletions
--- a/.gitignore
+++ b/.gitignore
@ -39,3 +39,4 @@ nosetests.xml
 *.iml
 *.xml
 bandcamp_dl/asyncdownloader.py
 *.log
--- a/bandcamp_dl/bandcamp.py
+++ b/bandcamp_dl/bandcamp.py
@ -1,6 +1,7 @@
 from .bandcampjson import BandcampJSON
 from bs4 import BeautifulSoup
 from bs4 import FeatureNotFound
 from datetime import datetime
 import requests
 import json
@ -26,13 +27,15 @@ class Bandcamp:
        self.generate_album_json()
        self.tracks = self.tralbum_data_json['trackinfo']
        album_release = self.tralbum_data_json['album_release_date']
        album = {
            "tracks": [],
            "title": self.embed_data_json['album_title'],
            "artist": self.embed_data_json['artist'],
            "full": False,
            "art": "",
-            "date": self.tralbum_data_json['album_release_date']
+            "date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
        }
        for track in self.tracks:
--- a/bandcamp_dl/bandcamp_dl.py
+++ b/bandcamp_dl/bandcamp_dl.py
@ -51,7 +51,7 @@ from .bandcampdownloader import BandcampDownloader
 def main():
-    arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-03')
+    arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-05')
    bandcamp = Bandcamp()
    basedir = arguments['--base-dir'] or os.getcwd()
--- a/bandcamp_dl/bandcampdownloader.py
+++ b/bandcamp_dl/bandcampdownloader.py
@ -6,6 +6,14 @@ from mutagen.id3._frames import TIT2
 from mutagen.easyid3 import EasyID3
 from slugify import slugify
 if not sys.version_info[:2] == (3, 6):
    import mock
    from .utils import requests_patch
 # DEBUG
 # import logging
 # logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
 class BandcampDownloader:
    def __init__(self, urls=None, template=None, directory=None, overwrite=False):
@ -16,7 +24,7 @@ class BandcampDownloader:
        :param directory: download location
        :param overwrite: if True overwrite existing files
        """
-        self.headers = {'user_agent': 'bandcamp-dl/0.0.7-02 (https://github.com/iheanyi/bandcamp-dl)'}
+        self.headers = {'user_agent': 'bandcamp-dl/0.0.7-05 (https://github.com/iheanyi/bandcamp-dl)'}
        self.session = requests.Session()
        if type(urls) is str:
@ -98,9 +106,13 @@ class BandcampDownloader:
            while True:
                try:
-                    r = self.session.get(track['url'], headers=self.headers, stream=True)
+                    if not sys.version_info[:2] == (3, 6):
-                    file_length = int(r.headers['content-length'])
+                        with mock.patch('http.client.parse_headers', requests_patch.parse_headers):
-                    total = int(file_length/100)
+                            r = self.session.get(track['url'], headers=self.headers, stream=True)
                    else:
                        r = self.session.get(track['url'], headers=self.headers, stream=True)
                    file_length = int(r.headers.get('content-length', 0))
                    total = int(file_length / 100)
                    # If file exists and is still a tmp file skip downloading and encode
                    if os.path.exists(filepath):
                        self.write_id3_tags(filepath, track_meta)
@ -121,7 +133,10 @@ class BandcampDownloader:
                                dl += len(data)
                                f.write(data)
                                done = int(50 * dl / file_length)
-                                sys.stdout.write("\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks, "=" * done, " " * (50 - done), filename[:-8]))
+                                sys.stdout.write(
                                    "\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks,
                                                                                 "=" * done, " " * (50 - done),
                                                                                 filename[:-8]))
                                sys.stdout.flush()
                    local_size = os.path.getsize(filepath)
                    # if the local filesize before encoding doesn't match the remote filesize redownload
@ -168,6 +183,7 @@ class BandcampDownloader:
        sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
        audio = MP3(filepath)
        audio.delete()
        audio["TIT2"] = TIT2(encoding=3, text=["title"])
        audio.save(filename=None, v1=2)
--- a/bandcamp_dl/bandcampjson.py
+++ b/bandcamp_dl/bandcampjson.py
@ -1,6 +1,10 @@
 import demjson
 import re
 """TODO
    More in-depth error messages
 """
 class BandcampJSON:
    def __init__(self, body, var_name: str, js_data=None):
--- a/bandcamp_dl/deps.txt
+++ b/bandcamp_dl/deps.txt
@ -4,3 +4,4 @@ docopt==0.6.2
 mutagen==1.35.1
 requests==2.12.4
 unicode-slugify==0.1.3
 mock==2.0.0
--- a/bandcamp_dl/utils/requests_patch.py
+++ b/bandcamp_dl/utils/requests_patch.py
@ -0,0 +1,42 @@
 try:
    import cchardet as chardet
 except ImportError:
    import chardet as chardet
 import http.client
 import email.parser
 def parse_headers(fp, _class=http.client.HTTPMessage):
    """Parses only RFC2822 headers from a file pointer.
    email Parser wants to see strings rather than bytes.
    But a TextIOWrapper around self.rfile would buffer too many bytes
    from the stream, bytes which we later need to read as bytes.
    So we read the correct bytes here, as bytes, for email Parser
    to parse.
    Note: Monkey-patched version to try to more intelligently determine
    header encoding
    """
    headers = []
    while True:
        line = fp.readline(http.client._MAXLINE + 1)
        if len(line) > http.client._MAXLINE:
            raise http.client.LineTooLong("header line")
        headers.append(line)
        if len(headers) > http.client._MAXHEADERS:
            raise HTTPException("got more than {} headers".format(http.client._MAXHEADERS))
        if line in (b'\r\n', b'\n', b''):
            break
    hstring = b''.join(headers)
    inferred = chardet.detect(hstring)
    if inferred and inferred['confidence'] > 0.8:
        # print("Parsing headers!", hstring)
        hstring = hstring.decode(inferred['encoding'])
    else:
        hstring = hstring.decode('iso-8859-1')
    return email.parser.Parser(_class=_class).parsestr(hstring)
--- a/setup.py
+++ b/setup.py
@ -6,7 +6,7 @@ here = path.abspath(path.dirname(__file__))
 setup(
    name='bandcamp-downloader',
-    version='0.0.7-03',
+    version='0.0.7-05',
    description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
    long_description=open('README.rst').read(),
    url='https://github.com/iheanyi/bandcamp-dl',
@ -29,6 +29,7 @@ setup(
        'mutagen',
        'requests',
        'unicode-slugify',
        'mock',
    ],
    entry_points={
        'console_scripts': [