From 88107f7538ed2f66a4f392305ea6d84182579446 Mon Sep 17 00:00:00 2001
From: Anthony Forsberg <forsberganthony@yahoo.com>
Date: Sat, 28 Jan 2017 06:12:21 -0500
Subject: [PATCH] Fixes #100 and possibly #99

Patches Requests if Python version is below 3.6.0 to fix a quirk in
httplib relating to utf-8 headers.

Also tracks are now sanitized before tagging.
---
 .gitignore                          |  1 +
 bandcamp_dl/bandcamp.py             |  5 +++-
 bandcamp_dl/bandcamp_dl.py          |  2 +-
 bandcamp_dl/bandcampdownloader.py   | 26 ++++++++++++++----
 bandcamp_dl/bandcampjson.py         |  4 +++
 bandcamp_dl/deps.txt                |  1 +
 bandcamp_dl/utils/requests_patch.py | 42 +++++++++++++++++++++++++++++
 setup.py                            |  3 ++-
 8 files changed, 76 insertions(+), 8 deletions(-)
 create mode 100644 bandcamp_dl/utils/requests_patch.py

diff --git a/.gitignore b/.gitignore
index c5e9269..523a573 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,3 +39,4 @@ nosetests.xml
 *.iml
 *.xml
 bandcamp_dl/asyncdownloader.py
+*.log
diff --git a/bandcamp_dl/bandcamp.py b/bandcamp_dl/bandcamp.py
index 848b53e..4b7e4ed 100644
--- a/bandcamp_dl/bandcamp.py
+++ b/bandcamp_dl/bandcamp.py
@@ -1,6 +1,7 @@
 from .bandcampjson import BandcampJSON
 from bs4 import BeautifulSoup
 from bs4 import FeatureNotFound
+from datetime import datetime
 import requests
 import json
 
@@ -26,13 +27,15 @@ class Bandcamp:
         self.generate_album_json()
         self.tracks = self.tralbum_data_json['trackinfo']
 
+        album_release = self.tralbum_data_json['album_release_date']
+
         album = {
             "tracks": [],
             "title": self.embed_data_json['album_title'],
             "artist": self.embed_data_json['artist'],
             "full": False,
             "art": "",
-            "date": self.tralbum_data_json['album_release_date']
+            "date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
         }
 
         for track in self.tracks:
diff --git a/bandcamp_dl/bandcamp_dl.py b/bandcamp_dl/bandcamp_dl.py
index a29052a..b1f61f4 100755
--- a/bandcamp_dl/bandcamp_dl.py
+++ b/bandcamp_dl/bandcamp_dl.py
@@ -51,7 +51,7 @@ from .bandcampdownloader import BandcampDownloader
 
 
 def main():
-    arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-03')
+    arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-05')
     bandcamp = Bandcamp()
 
     basedir = arguments['--base-dir'] or os.getcwd()
diff --git a/bandcamp_dl/bandcampdownloader.py b/bandcamp_dl/bandcampdownloader.py
index 39f12fa..d7b7f20 100644
--- a/bandcamp_dl/bandcampdownloader.py
+++ b/bandcamp_dl/bandcampdownloader.py
@@ -6,6 +6,14 @@ from mutagen.id3._frames import TIT2
 from mutagen.easyid3 import EasyID3
 from slugify import slugify
 
+if not sys.version_info[:2] == (3, 6):
+    import mock
+    from .utils import requests_patch
+
+# DEBUG
+# import logging
+# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
+
 
 class BandcampDownloader:
     def __init__(self, urls=None, template=None, directory=None, overwrite=False):
@@ -16,7 +24,7 @@ class BandcampDownloader:
         :param directory: download location
         :param overwrite: if True overwrite existing files
         """
-        self.headers = {'user_agent': 'bandcamp-dl/0.0.7-02 (https://github.com/iheanyi/bandcamp-dl)'}
+        self.headers = {'user_agent': 'bandcamp-dl/0.0.7-05 (https://github.com/iheanyi/bandcamp-dl)'}
         self.session = requests.Session()
 
         if type(urls) is str:
@@ -98,9 +106,13 @@ class BandcampDownloader:
 
             while True:
                 try:
-                    r = self.session.get(track['url'], headers=self.headers, stream=True)
-                    file_length = int(r.headers['content-length'])
-                    total = int(file_length/100)
+                    if not sys.version_info[:2] == (3, 6):
+                        with mock.patch('http.client.parse_headers', requests_patch.parse_headers):
+                            r = self.session.get(track['url'], headers=self.headers, stream=True)
+                    else:
+                        r = self.session.get(track['url'], headers=self.headers, stream=True)
+                    file_length = int(r.headers.get('content-length', 0))
+                    total = int(file_length / 100)
                     # If file exists and is still a tmp file skip downloading and encode
                     if os.path.exists(filepath):
                         self.write_id3_tags(filepath, track_meta)
@@ -121,7 +133,10 @@ class BandcampDownloader:
                                 dl += len(data)
                                 f.write(data)
                                 done = int(50 * dl / file_length)
-                                sys.stdout.write("\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks, "=" * done, " " * (50 - done), filename[:-8]))
+                                sys.stdout.write(
+                                    "\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks,
+                                                                                 "=" * done, " " * (50 - done),
+                                                                                 filename[:-8]))
                                 sys.stdout.flush()
                     local_size = os.path.getsize(filepath)
                     # if the local filesize before encoding doesn't match the remote filesize redownload
@@ -168,6 +183,7 @@ class BandcampDownloader:
         sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
 
         audio = MP3(filepath)
+        audio.delete()
         audio["TIT2"] = TIT2(encoding=3, text=["title"])
         audio.save(filename=None, v1=2)
 
diff --git a/bandcamp_dl/bandcampjson.py b/bandcamp_dl/bandcampjson.py
index 095f6bd..65f211f 100644
--- a/bandcamp_dl/bandcampjson.py
+++ b/bandcamp_dl/bandcampjson.py
@@ -1,6 +1,10 @@
 import demjson
 import re
 
+"""TODO
+
+    More in-depth error messages
+"""
 
 class BandcampJSON:
     def __init__(self, body, var_name: str, js_data=None):
diff --git a/bandcamp_dl/deps.txt b/bandcamp_dl/deps.txt
index f3432fc..0350cbf 100644
--- a/bandcamp_dl/deps.txt
+++ b/bandcamp_dl/deps.txt
@@ -4,3 +4,4 @@ docopt==0.6.2
 mutagen==1.35.1
 requests==2.12.4
 unicode-slugify==0.1.3
+mock==2.0.0
diff --git a/bandcamp_dl/utils/requests_patch.py b/bandcamp_dl/utils/requests_patch.py
new file mode 100644
index 0000000..1fdf90a
--- /dev/null
+++ b/bandcamp_dl/utils/requests_patch.py
@@ -0,0 +1,42 @@
+try:
+    import cchardet as chardet
+except ImportError:
+    import chardet as chardet
+
+import http.client
+import email.parser
+
+
+def parse_headers(fp, _class=http.client.HTTPMessage):
+    """Parses only RFC2822 headers from a file pointer.
+
+    email Parser wants to see strings rather than bytes.
+    But a TextIOWrapper around self.rfile would buffer too many bytes
+    from the stream, bytes which we later need to read as bytes.
+    So we read the correct bytes here, as bytes, for email Parser
+    to parse.
+
+    Note: Monkey-patched version to try to more intelligently determine
+    header encoding
+
+    """
+    headers = []
+    while True:
+        line = fp.readline(http.client._MAXLINE + 1)
+        if len(line) > http.client._MAXLINE:
+            raise http.client.LineTooLong("header line")
+        headers.append(line)
+        if len(headers) > http.client._MAXHEADERS:
+            raise HTTPException("got more than {} headers".format(http.client._MAXHEADERS))
+        if line in (b'\r\n', b'\n', b''):
+            break
+
+    hstring = b''.join(headers)
+    inferred = chardet.detect(hstring)
+    if inferred and inferred['confidence'] > 0.8:
+        # print("Parsing headers!", hstring)
+        hstring = hstring.decode(inferred['encoding'])
+    else:
+        hstring = hstring.decode('iso-8859-1')
+
+    return email.parser.Parser(_class=_class).parsestr(hstring)
diff --git a/setup.py b/setup.py
index 386db3f..2a50e2a 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@ here = path.abspath(path.dirname(__file__))
 
 setup(
     name='bandcamp-downloader',
-    version='0.0.7-03',
+    version='0.0.7-05',
     description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
     long_description=open('README.rst').read(),
     url='https://github.com/iheanyi/bandcamp-dl',
@@ -29,6 +29,7 @@ setup(
         'mutagen',
         'requests',
         'unicode-slugify',
+        'mock',
     ],
     entry_points={
         'console_scripts': [