diff --git a/.gitignore b/.gitignore index 4eacb05..c5e9269 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ nosetests.xml .pydevproject *.iml *.xml +bandcamp_dl/asyncdownloader.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e7b37f1..cc3ecb3 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -17,3 +17,9 @@ Version 0.0.6 - [Enhancement] Individual track downloads work now. - [Bugfix] Fixed imports, now working when installed via pip. - [Note] Last version to officially support Python 2.7.x + +Version 0.0.7 +------------- +- [Dependency] Slimit is no longer required +- [Dependency] Ply is no longer required +- [Dependency] demjson is now required diff --git a/README.rst b/README.rst index 4c82099..67a05ca 100644 --- a/README.rst +++ b/README.rst @@ -24,7 +24,7 @@ Description =========== bandcamp-dl is a small command-line app to download audio from -BandCamp.com. It requires the Python interpreter, version 2.7.x - 3.5.x and is +BandCamp.com. It requires the Python interpreter, version 3.5.x and is not platform specific. It is released to the public domain, which means you can modify it, redistribute it or use it how ever you like. @@ -209,14 +209,11 @@ related to bandcamp-dl, by all means, go ahead and report the bug. Dependencies ============ -- `BeautifulSoup `_ - - HTML Parsing +- `BeautifulSoup `_ - HTML Parsing +- `Demjson `_- JavaScript dict to JSON conversion - `Mutagen `_ - ID3 Encoding -- `Requests `_ - for retriving - the HTML -- `Slimit `_ - Javascript parsing -- `Unicode-Slugify `_ - - A slug generator that turns strings into unicode slugs. +- `Requests `_ - for retriving the HTML +- `Unicode-Slugify `_ - A slug generator that turns strings into unicode slugs. Copyright ========= diff --git a/bandcamp_dl/bandcamp.py b/bandcamp_dl/bandcamp.py index 608135e..192aaff 100644 --- a/bandcamp_dl/bandcamp.py +++ b/bandcamp_dl/bandcamp.py @@ -1,119 +1,120 @@ +from .bandcampjson import BandcampJSON from bs4 import BeautifulSoup +from bs4 import FeatureNotFound import requests -from .jsobj import read_js_object +import json class Bandcamp: - def parse(self, url, no_art=True): + def parse(self, url: str, art: bool=True) -> dict or None: + """ + Requests the page, cherry picks album info + + :param url: album/track url + :param art: if True download album art + :return: album metadata + """ try: r = requests.get(url) except requests.exceptions.MissingSchema: return None - self.no_art = no_art - - if r.status_code is not 200: - return None - try: self.soup = BeautifulSoup(r.text, "lxml") - except: + except FeatureNotFound: self.soup = BeautifulSoup(r.text, "html.parser") + self.generate_album_json() + self.tracks = self.tralbum_data_json['trackinfo'] + album = { "tracks": [], - "title": "", - "artist": "", + "title": self.embed_data_json['album_title'], + "artist": self.embed_data_json['artist'], "full": False, "art": "", - "date": "" + "date": self.tralbum_data_json['album_release_date'] } - album_meta = self.extract_album_meta_data(r) - - album['artist'] = album_meta['artist'] - album['title'] = album_meta['title'] - album['date'] = album_meta['date'] - - for track in album_meta['tracks']: - track = self.get_track_meta_data(track) + for track in self.tracks: + track = self.get_track_metadata(track) album['tracks'].append(track) - album['full'] = self.all_tracks_available(album) - if self.no_art: + album['full'] = self.all_tracks_available() + if art: album['art'] = self.get_album_art() return album - def all_tracks_available(self, album): - for track in album['tracks']: - if track['url'] is None: - return False + def all_tracks_available(self) -> bool: + """ + Verify that all tracks have a url + :return: True if all urls accounted for + """ + for track in self.tracks: + if track['file']['mp3-128'] is None: + return False return True - def is_basestring(self, obj): - if isinstance(obj, str) or isinstance(obj, bytes) or isinstance(obj, bytearray): - return True - return False + @staticmethod + def get_track_metadata(track: dict) -> dict: + """ + Extract individual track metadata - def get_track_meta_data(self, track): - new_track = {} - if not self.is_basestring(track['file']): - if 'mp3-128' in track['file']: - new_track['url'] = track['file']['mp3-128'] - else: - new_track['url'] = None + :param track: track dict + :return: track metadata dict + """ + track_metadata = { + "duration": track['duration'], + "track": str(track['track_num']), + "title": track['title'], + "url": None + } - new_track['duration'] = track['duration'] - new_track['track'] = track['track_num'] - new_track['title'] = track['title'] + if 'mp3-128' in track['file']: + track_metadata['url'] = "http:" + track['file']['mp3-128'] + return track_metadata - return new_track + def generate_album_json(self): + """ + Retrieve JavaScript dictionaries from page and generate JSON - def extract_album_meta_data(self, request): - album = {} + :return: True if successful + """ + try: + embed = BandcampJSON(self.soup, "EmbedData") + tralbum = BandcampJSON(self.soup, "TralbumData") - embedData = self.get_embed_string_block(request) + embed_data = embed.js_to_json() + tralbum_data = tralbum.js_to_json() - block = request.text.split("var TralbumData = ") - - stringBlock = block[1] - - stringBlock = stringBlock.split("};")[0] + "};" - stringBlock = read_js_object(u"var TralbumData = {}".format(stringBlock)) - - if 'album_title' not in embedData['EmbedData']: - album['title'] = "Unknown Album" - else: - album['title'] = embedData['EmbedData']['album_title'] - - album['artist'] = stringBlock['TralbumData']['artist'] - album['tracks'] = stringBlock['TralbumData']['trackinfo'] - - if stringBlock['TralbumData']['album_release_date'] == "null": - album['date'] = "" - else: - album['date'] = stringBlock['TralbumData']['album_release_date'].split()[2] - - return album + self.embed_data_json = json.loads(embed_data) + self.tralbum_data_json = json.loads(tralbum_data) + except Exception as e: + print(e) + return None + return True @staticmethod - def generate_album_url(artist, album): + def generate_album_url(artist: str, album: str) -> str: + """ + Generate an album url based on the artist and album name + + :param artist: artist name + :param album: album name + :return: album url as str + """ return "http://{0}.bandcamp.com/album/{1}".format(artist, album) - def get_album_art(self): + def get_album_art(self) -> str: + """ + Find and retrieve album art url from page + + :return: url as str + """ try: url = self.soup.find(id='tralbumArt').find_all('img')[0]['src'] return url - except: + except None: pass - - def get_embed_string_block(self, request): - embedBlock = request.text.split("var EmbedData = ") - - embedStringBlock = embedBlock[1] - embedStringBlock = embedStringBlock.split("};")[0] + "};" - embedStringBlock = read_js_object(u"var EmbedData = {}".format(embedStringBlock)) - - return embedStringBlock diff --git a/bandcamp_dl/bandcamp_dl.py b/bandcamp_dl/bandcamp_dl.py index 2f1d14d..cf6d311 100755 --- a/bandcamp_dl/bandcamp_dl.py +++ b/bandcamp_dl/bandcamp_dl.py @@ -49,7 +49,7 @@ from .bandcampdownloader import BandcampDownloader def main(): - arguments = docopt(__doc__, version='bandcamp-dl 0.0.6-01') + arguments = docopt(__doc__, version='bandcamp-dl 0.0.7') bandcamp = Bandcamp() if arguments['--artist'] and arguments['--album']: @@ -73,3 +73,4 @@ def main(): if __name__ == '__main__': main() + diff --git a/bandcamp_dl/bandcampdownloader.py b/bandcamp_dl/bandcampdownloader.py index f2be591..0c49bb8 100644 --- a/bandcamp_dl/bandcampdownloader.py +++ b/bandcamp_dl/bandcampdownloader.py @@ -9,6 +9,14 @@ from slugify import slugify class BandcampDownloader: def __init__(self, urls=None, template=None, directory=None, overwrite=False): + """ + Initialization function + + :param urls: list of urls + :param template: filename template + :param directory: download location + :param overwrite: if True overwrite existing files + """ if type(urls) is str: self.urls = [urls] @@ -17,11 +25,22 @@ class BandcampDownloader: self.directory = directory self.overwrite = overwrite - def start(self, album): + def start(self, album: dict): + """ + Start album download process + + :param album: album dict + """ print("Starting download process.") self.download_album(album) - def template_to_path(self, track): + def template_to_path(self, track: dict) -> str: + """ + Create valid filepath based on track metadata + + :param track: track metadata + :return: filepath + """ path = self.template path = path.replace("%{artist}", slugify(track['artist'])) path = path.replace("%{album}", slugify(track['album'])) @@ -31,14 +50,27 @@ class BandcampDownloader: return path - def create_directory(self, filename): + @staticmethod + def create_directory(filename: str) -> str: + """ + Create directory based on filename if it doesn't exist + + :param filename: full filename + :return: directory path + """ directory = os.path.dirname(filename) if not os.path.exists(directory): os.makedirs(directory) return directory - def download_album(self, album): + def download_album(self, album: dict) -> bool: + """ + Download all MP3 files in the album + + :param album: album dict + :return: True if successful + """ for track_index, track in enumerate(album['tracks']): track_meta = { "artist": album['artist'], @@ -53,30 +85,17 @@ class BandcampDownloader: filename = self.template_to_path(track_meta) dirname = self.create_directory(filename) - if not track.get('url'): + if not track['url']: print("Skipping track {0} - {1} as it is not available" .format(track['track'], track['title'])) continue try: track_url = track['url'] - # Check and see if HTTP is in the track_url - if 'http' not in track_url: - track_url = 'http:{}'.format(track_url) r = requests.get(track_url, stream=True) file_length = r.headers.get('content-length') - if not self.overwrite and os.path.isfile(filename): - file_size = os.path.getsize(filename) - 128 - if int(file_size) != int(file_length): - print(filename + " is incomplete, redownloading.") - os.remove(filename) - else: - print("Skipping track {0} - {1} as it's already downloaded, use --overwrite to overwrite existing files" - .format(track['track'], track['title'])) - continue - with open(filename, "wb") as f: print("Downloading: " + filename[:-4]) if file_length is None: @@ -106,7 +125,14 @@ class BandcampDownloader: return True - def write_id3_tags(self, filename, meta): + @staticmethod + def write_id3_tags(filename: str, meta: dict): + """ + Write metadata to the MP3 file + + :param filename: name of mp3 file + :param meta: dict of track metadata + """ print("\nEncoding . . .") audio = MP3(filename) diff --git a/bandcamp_dl/bandcampjson.py b/bandcamp_dl/bandcampjson.py new file mode 100644 index 0000000..c6be3f2 --- /dev/null +++ b/bandcamp_dl/bandcampjson.py @@ -0,0 +1,42 @@ +import demjson +import re + + +class BandcampJSON: + def __init__(self, body, var_name: str, js_data=None): + self.body = body + self.var_name = var_name + self.js_data = js_data + + def get_js(self) -> str: + """ + Get