Patches Requests if Python version is below 3.6.0 to fix a quirk in httplib relating to utf-8 headers. Also tracks are now sanitized before tagging.master
parent
370da98e21
commit
88107f7538
|
@ -39,3 +39,4 @@ nosetests.xml
|
||||||
*.iml
|
*.iml
|
||||||
*.xml
|
*.xml
|
||||||
bandcamp_dl/asyncdownloader.py
|
bandcamp_dl/asyncdownloader.py
|
||||||
|
*.log
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from .bandcampjson import BandcampJSON
|
from .bandcampjson import BandcampJSON
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4 import FeatureNotFound
|
from bs4 import FeatureNotFound
|
||||||
|
from datetime import datetime
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
@ -26,13 +27,15 @@ class Bandcamp:
|
||||||
self.generate_album_json()
|
self.generate_album_json()
|
||||||
self.tracks = self.tralbum_data_json['trackinfo']
|
self.tracks = self.tralbum_data_json['trackinfo']
|
||||||
|
|
||||||
|
album_release = self.tralbum_data_json['album_release_date']
|
||||||
|
|
||||||
album = {
|
album = {
|
||||||
"tracks": [],
|
"tracks": [],
|
||||||
"title": self.embed_data_json['album_title'],
|
"title": self.embed_data_json['album_title'],
|
||||||
"artist": self.embed_data_json['artist'],
|
"artist": self.embed_data_json['artist'],
|
||||||
"full": False,
|
"full": False,
|
||||||
"art": "",
|
"art": "",
|
||||||
"date": self.tralbum_data_json['album_release_date']
|
"date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
|
||||||
}
|
}
|
||||||
|
|
||||||
for track in self.tracks:
|
for track in self.tracks:
|
||||||
|
|
|
@ -51,7 +51,7 @@ from .bandcampdownloader import BandcampDownloader
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-03')
|
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-05')
|
||||||
bandcamp = Bandcamp()
|
bandcamp = Bandcamp()
|
||||||
|
|
||||||
basedir = arguments['--base-dir'] or os.getcwd()
|
basedir = arguments['--base-dir'] or os.getcwd()
|
||||||
|
|
|
@ -6,6 +6,14 @@ from mutagen.id3._frames import TIT2
|
||||||
from mutagen.easyid3 import EasyID3
|
from mutagen.easyid3 import EasyID3
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
|
||||||
|
if not sys.version_info[:2] == (3, 6):
|
||||||
|
import mock
|
||||||
|
from .utils import requests_patch
|
||||||
|
|
||||||
|
# DEBUG
|
||||||
|
# import logging
|
||||||
|
# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
class BandcampDownloader:
|
class BandcampDownloader:
|
||||||
def __init__(self, urls=None, template=None, directory=None, overwrite=False):
|
def __init__(self, urls=None, template=None, directory=None, overwrite=False):
|
||||||
|
@ -16,7 +24,7 @@ class BandcampDownloader:
|
||||||
:param directory: download location
|
:param directory: download location
|
||||||
:param overwrite: if True overwrite existing files
|
:param overwrite: if True overwrite existing files
|
||||||
"""
|
"""
|
||||||
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-02 (https://github.com/iheanyi/bandcamp-dl)'}
|
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-05 (https://github.com/iheanyi/bandcamp-dl)'}
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
|
|
||||||
if type(urls) is str:
|
if type(urls) is str:
|
||||||
|
@ -98,9 +106,13 @@ class BandcampDownloader:
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
r = self.session.get(track['url'], headers=self.headers, stream=True)
|
if not sys.version_info[:2] == (3, 6):
|
||||||
file_length = int(r.headers['content-length'])
|
with mock.patch('http.client.parse_headers', requests_patch.parse_headers):
|
||||||
total = int(file_length/100)
|
r = self.session.get(track['url'], headers=self.headers, stream=True)
|
||||||
|
else:
|
||||||
|
r = self.session.get(track['url'], headers=self.headers, stream=True)
|
||||||
|
file_length = int(r.headers.get('content-length', 0))
|
||||||
|
total = int(file_length / 100)
|
||||||
# If file exists and is still a tmp file skip downloading and encode
|
# If file exists and is still a tmp file skip downloading and encode
|
||||||
if os.path.exists(filepath):
|
if os.path.exists(filepath):
|
||||||
self.write_id3_tags(filepath, track_meta)
|
self.write_id3_tags(filepath, track_meta)
|
||||||
|
@ -121,7 +133,10 @@ class BandcampDownloader:
|
||||||
dl += len(data)
|
dl += len(data)
|
||||||
f.write(data)
|
f.write(data)
|
||||||
done = int(50 * dl / file_length)
|
done = int(50 * dl / file_length)
|
||||||
sys.stdout.write("\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks, "=" * done, " " * (50 - done), filename[:-8]))
|
sys.stdout.write(
|
||||||
|
"\r({}/{}) [{}{}] :: Downloading: {}".format(self.track_num, self.num_tracks,
|
||||||
|
"=" * done, " " * (50 - done),
|
||||||
|
filename[:-8]))
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
local_size = os.path.getsize(filepath)
|
local_size = os.path.getsize(filepath)
|
||||||
# if the local filesize before encoding doesn't match the remote filesize redownload
|
# if the local filesize before encoding doesn't match the remote filesize redownload
|
||||||
|
@ -168,6 +183,7 @@ class BandcampDownloader:
|
||||||
sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
|
sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
|
||||||
|
|
||||||
audio = MP3(filepath)
|
audio = MP3(filepath)
|
||||||
|
audio.delete()
|
||||||
audio["TIT2"] = TIT2(encoding=3, text=["title"])
|
audio["TIT2"] = TIT2(encoding=3, text=["title"])
|
||||||
audio.save(filename=None, v1=2)
|
audio.save(filename=None, v1=2)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
import demjson
|
import demjson
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
"""TODO
|
||||||
|
|
||||||
|
More in-depth error messages
|
||||||
|
"""
|
||||||
|
|
||||||
class BandcampJSON:
|
class BandcampJSON:
|
||||||
def __init__(self, body, var_name: str, js_data=None):
|
def __init__(self, body, var_name: str, js_data=None):
|
||||||
|
|
|
@ -4,3 +4,4 @@ docopt==0.6.2
|
||||||
mutagen==1.35.1
|
mutagen==1.35.1
|
||||||
requests==2.12.4
|
requests==2.12.4
|
||||||
unicode-slugify==0.1.3
|
unicode-slugify==0.1.3
|
||||||
|
mock==2.0.0
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
try:
|
||||||
|
import cchardet as chardet
|
||||||
|
except ImportError:
|
||||||
|
import chardet as chardet
|
||||||
|
|
||||||
|
import http.client
|
||||||
|
import email.parser
|
||||||
|
|
||||||
|
|
||||||
|
def parse_headers(fp, _class=http.client.HTTPMessage):
|
||||||
|
"""Parses only RFC2822 headers from a file pointer.
|
||||||
|
|
||||||
|
email Parser wants to see strings rather than bytes.
|
||||||
|
But a TextIOWrapper around self.rfile would buffer too many bytes
|
||||||
|
from the stream, bytes which we later need to read as bytes.
|
||||||
|
So we read the correct bytes here, as bytes, for email Parser
|
||||||
|
to parse.
|
||||||
|
|
||||||
|
Note: Monkey-patched version to try to more intelligently determine
|
||||||
|
header encoding
|
||||||
|
|
||||||
|
"""
|
||||||
|
headers = []
|
||||||
|
while True:
|
||||||
|
line = fp.readline(http.client._MAXLINE + 1)
|
||||||
|
if len(line) > http.client._MAXLINE:
|
||||||
|
raise http.client.LineTooLong("header line")
|
||||||
|
headers.append(line)
|
||||||
|
if len(headers) > http.client._MAXHEADERS:
|
||||||
|
raise HTTPException("got more than {} headers".format(http.client._MAXHEADERS))
|
||||||
|
if line in (b'\r\n', b'\n', b''):
|
||||||
|
break
|
||||||
|
|
||||||
|
hstring = b''.join(headers)
|
||||||
|
inferred = chardet.detect(hstring)
|
||||||
|
if inferred and inferred['confidence'] > 0.8:
|
||||||
|
# print("Parsing headers!", hstring)
|
||||||
|
hstring = hstring.decode(inferred['encoding'])
|
||||||
|
else:
|
||||||
|
hstring = hstring.decode('iso-8859-1')
|
||||||
|
|
||||||
|
return email.parser.Parser(_class=_class).parsestr(hstring)
|
3
setup.py
3
setup.py
|
@ -6,7 +6,7 @@ here = path.abspath(path.dirname(__file__))
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='bandcamp-downloader',
|
name='bandcamp-downloader',
|
||||||
version='0.0.7-03',
|
version='0.0.7-05',
|
||||||
description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
|
description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
|
||||||
long_description=open('README.rst').read(),
|
long_description=open('README.rst').read(),
|
||||||
url='https://github.com/iheanyi/bandcamp-dl',
|
url='https://github.com/iheanyi/bandcamp-dl',
|
||||||
|
@ -29,6 +29,7 @@ setup(
|
||||||
'mutagen',
|
'mutagen',
|
||||||
'requests',
|
'requests',
|
||||||
'unicode-slugify',
|
'unicode-slugify',
|
||||||
|
'mock',
|
||||||
],
|
],
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
|
|
Loading…
Reference in New Issue