Fixes #100 and possibly #99

Patches Requests if Python version is below 3.6.0 to fix a quirk in httplib relating to utf-8 headers. Also tracks are now sanitized before tagging.
2017-01-28 06:12:21 -05:00 · 2017-01-28 06:12:21 -05:00 · 88107f7538
parent 370da98e21
commit 88107f7538
8 changed files with 76 additions and 8 deletions
--- a/.gitignore
+++ b/.gitignore
@ -39,3 +39,4 @@ nosetests.xml
 *.iml
 *.xml
 bandcamp_dl/asyncdownloader.py
+*.log
--- a/bandcamp_dl/bandcamp.py
+++ b/bandcamp_dl/bandcamp.py
@ -1,6 +1,7 @@
 from .bandcampjson import BandcampJSON
 from bs4 import BeautifulSoup
 from bs4 import FeatureNotFound
+from datetime import datetime
 import requests
 import json

@ -26,13 +27,15 @@ class Bandcamp:
        self.generate_album_json()
        self.tracks = self.tralbum_data_json['trackinfo']

+        album_release = self.tralbum_data_json['album_release_date']
+
        album = {
            "tracks": [],
            "title": self.embed_data_json['album_title'],
            "artist": self.embed_data_json['artist'],
            "full": False,
            "art": "",
-            "date": self.tralbum_data_json['album_release_date']
+            "date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
        }

        for track in self.tracks:
--- a/bandcamp_dl/bandcamp_dl.py
+++ b/bandcamp_dl/bandcamp_dl.py
@ -51,7 +51,7 @@ from .bandcampdownloader import BandcampDownloader


 def main():
-    arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-03')
+    arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-05')
    bandcamp = Bandcamp()

    basedir = arguments['--base-dir'] or os.getcwd()
--- a/bandcamp_dl/bandcampdownloader.py
+++ b/bandcamp_dl/bandcampdownloader.py
@ -6,6 +6,14 @@ from mutagen.id3._frames import TIT2
 from mutagen.easyid3 import EasyID3
 from slugify import slugify

+if not sys.version_info[:2] == (3, 6):
+    import mock
+    from .utils import requests_patch
+
+# DEBUG
+# import logging
+# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
+

 class BandcampDownloader:
    def __init__(self, urls=None, template=None, directory=None, overwrite=False):
@ -16,7 +24,7 @@ class BandcampDownloader:
        :param directory: download location
        :param overwrite: if True overwrite existing files
        """
-        self.headers = {'user_agent': 'bandcamp-dl/0.0.7-02 (https://github.com/iheanyi/bandcamp-dl)'}
+        self.headers = {'user_agent': 'bandcamp-dl/0.0.7-05 (https://github.com/iheanyi/bandcamp-dl)'}
        self.session = requests.Session()

        if type(urls) is str:
@ -98,9 +106,13 @@ class BandcampDownloader:

            while True:
                try:
-                    r = self.session.get(track['url'], headers=self.headers, stream=True)
-                    file_length = int(r.headers['content-length'])
-                    total = int(file_length/100)
+                    if not sys.version_info[:2] == (3, 6):
+                        with mock.patch('http.client.parse_headers', requests_patch.parse_headers):
+                            r = self.session.get(track['url'], headers=self.headers, stream=True)
+                    else:
+                        r = self.session.get(track['url'], headers=self.headers, stream=True)
+                    file_length = int(r.headers.get('content-length', 0))
+                    total = int(file_length / 100)
                    # If file exists and is still a tmp file skip downloading and encode
                    if os.path.exists(filepath):
                        self.write_id3_tags(filepath, track_meta)
@ -121,7 +133,10 @@ class BandcampDownloader:
                                dl += len(data)
                                f.write(data)
                                done = int(50 * dl / file_length)
-                                sys.stdout.write("\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks, "=" * done, " " * (50 - done), filename[:-8]))
+                                sys.stdout.write(
+                                    "\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks,
+                                                                                 "=" * done, " " * (50 - done),
+                                                                                 filename[:-8]))
                                sys.stdout.flush()
                    local_size = os.path.getsize(filepath)
                    # if the local filesize before encoding doesn't match the remote filesize redownload
@ -168,6 +183,7 @@ class BandcampDownloader:
        sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))

        audio = MP3(filepath)
+        audio.delete()
        audio["TIT2"] = TIT2(encoding=3, text=["title"])
        audio.save(filename=None, v1=2)

--- a/bandcamp_dl/bandcampjson.py
+++ b/bandcamp_dl/bandcampjson.py
@ -1,6 +1,10 @@
 import demjson
 import re

+"""TODO
+
+    More in-depth error messages
+"""

 class BandcampJSON:
    def __init__(self, body, var_name: str, js_data=None):
--- a/bandcamp_dl/deps.txt
+++ b/bandcamp_dl/deps.txt
@ -4,3 +4,4 @@ docopt==0.6.2
 mutagen==1.35.1
 requests==2.12.4
 unicode-slugify==0.1.3
+mock==2.0.0
--- a/bandcamp_dl/utils/requests_patch.py
+++ b/bandcamp_dl/utils/requests_patch.py
@ -0,0 +1,42 @@
+try:
+    import cchardet as chardet
+except ImportError:
+    import chardet as chardet
+
+import http.client
+import email.parser
+
+
+def parse_headers(fp, _class=http.client.HTTPMessage):
+    """Parses only RFC2822 headers from a file pointer.
+
+    email Parser wants to see strings rather than bytes.
+    But a TextIOWrapper around self.rfile would buffer too many bytes
+    from the stream, bytes which we later need to read as bytes.
+    So we read the correct bytes here, as bytes, for email Parser
+    to parse.
+
+    Note: Monkey-patched version to try to more intelligently determine
+    header encoding
+
+    """
+    headers = []
+    while True:
+        line = fp.readline(http.client._MAXLINE + 1)
+        if len(line) > http.client._MAXLINE:
+            raise http.client.LineTooLong("header line")
+        headers.append(line)
+        if len(headers) > http.client._MAXHEADERS:
+            raise HTTPException("got more than {} headers".format(http.client._MAXHEADERS))
+        if line in (b'\r\n', b'\n', b''):
+            break
+
+    hstring = b''.join(headers)
+    inferred = chardet.detect(hstring)
+    if inferred and inferred['confidence'] > 0.8:
+        # print("Parsing headers!", hstring)
+        hstring = hstring.decode(inferred['encoding'])
+    else:
+        hstring = hstring.decode('iso-8859-1')
+
+    return email.parser.Parser(_class=_class).parsestr(hstring)
--- a/setup.py
+++ b/setup.py
@ -6,7 +6,7 @@ here = path.abspath(path.dirname(__file__))

 setup(
    name='bandcamp-downloader',
-    version='0.0.7-03',
+    version='0.0.7-05',
    description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
    long_description=open('README.rst').read(),
    url='https://github.com/iheanyi/bandcamp-dl',
@ -29,6 +29,7 @@ setup(
        'mutagen',
        'requests',
        'unicode-slugify',
+        'mock',
    ],
    entry_points={
        'console_scripts': [