diff --git a/.gitignore b/.gitignore index c5e9269..4eacb05 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,3 @@ nosetests.xml .pydevproject *.iml *.xml -bandcamp_dl/asyncdownloader.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cc3ecb3..e7b37f1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -17,9 +17,3 @@ Version 0.0.6 - [Enhancement] Individual track downloads work now. - [Bugfix] Fixed imports, now working when installed via pip. - [Note] Last version to officially support Python 2.7.x - -Version 0.0.7 -------------- -- [Dependency] Slimit is no longer required -- [Dependency] Ply is no longer required -- [Dependency] demjson is now required diff --git a/README.rst b/README.rst index 67a05ca..4c82099 100644 --- a/README.rst +++ b/README.rst @@ -24,7 +24,7 @@ Description =========== bandcamp-dl is a small command-line app to download audio from -BandCamp.com. It requires the Python interpreter, version 3.5.x and is +BandCamp.com. It requires the Python interpreter, version 2.7.x - 3.5.x and is not platform specific. It is released to the public domain, which means you can modify it, redistribute it or use it how ever you like. @@ -209,11 +209,14 @@ related to bandcamp-dl, by all means, go ahead and report the bug. Dependencies ============ -- `BeautifulSoup `_ - HTML Parsing -- `Demjson `_- JavaScript dict to JSON conversion +- `BeautifulSoup `_ - + HTML Parsing - `Mutagen `_ - ID3 Encoding -- `Requests `_ - for retriving the HTML -- `Unicode-Slugify `_ - A slug generator that turns strings into unicode slugs. +- `Requests `_ - for retriving + the HTML +- `Slimit `_ - Javascript parsing +- `Unicode-Slugify `_ - + A slug generator that turns strings into unicode slugs. Copyright ========= diff --git a/bandcamp_dl/bandcamp.py b/bandcamp_dl/bandcamp.py index 192aaff..608135e 100644 --- a/bandcamp_dl/bandcamp.py +++ b/bandcamp_dl/bandcamp.py @@ -1,120 +1,119 @@ -from .bandcampjson import BandcampJSON from bs4 import BeautifulSoup -from bs4 import FeatureNotFound import requests -import json +from .jsobj import read_js_object class Bandcamp: - def parse(self, url: str, art: bool=True) -> dict or None: - """ - Requests the page, cherry picks album info - - :param url: album/track url - :param art: if True download album art - :return: album metadata - """ + def parse(self, url, no_art=True): try: r = requests.get(url) except requests.exceptions.MissingSchema: return None + self.no_art = no_art + + if r.status_code is not 200: + return None + try: self.soup = BeautifulSoup(r.text, "lxml") - except FeatureNotFound: + except: self.soup = BeautifulSoup(r.text, "html.parser") - self.generate_album_json() - self.tracks = self.tralbum_data_json['trackinfo'] - album = { "tracks": [], - "title": self.embed_data_json['album_title'], - "artist": self.embed_data_json['artist'], + "title": "", + "artist": "", "full": False, "art": "", - "date": self.tralbum_data_json['album_release_date'] + "date": "" } - for track in self.tracks: - track = self.get_track_metadata(track) + album_meta = self.extract_album_meta_data(r) + + album['artist'] = album_meta['artist'] + album['title'] = album_meta['title'] + album['date'] = album_meta['date'] + + for track in album_meta['tracks']: + track = self.get_track_meta_data(track) album['tracks'].append(track) - album['full'] = self.all_tracks_available() - if art: + album['full'] = self.all_tracks_available(album) + if self.no_art: album['art'] = self.get_album_art() return album - def all_tracks_available(self) -> bool: - """ - Verify that all tracks have a url - - :return: True if all urls accounted for - """ - for track in self.tracks: - if track['file']['mp3-128'] is None: + def all_tracks_available(self, album): + for track in album['tracks']: + if track['url'] is None: return False + return True - @staticmethod - def get_track_metadata(track: dict) -> dict: - """ - Extract individual track metadata + def is_basestring(self, obj): + if isinstance(obj, str) or isinstance(obj, bytes) or isinstance(obj, bytearray): + return True + return False - :param track: track dict - :return: track metadata dict - """ - track_metadata = { - "duration": track['duration'], - "track": str(track['track_num']), - "title": track['title'], - "url": None - } + def get_track_meta_data(self, track): + new_track = {} + if not self.is_basestring(track['file']): + if 'mp3-128' in track['file']: + new_track['url'] = track['file']['mp3-128'] + else: + new_track['url'] = None - if 'mp3-128' in track['file']: - track_metadata['url'] = "http:" + track['file']['mp3-128'] - return track_metadata + new_track['duration'] = track['duration'] + new_track['track'] = track['track_num'] + new_track['title'] = track['title'] - def generate_album_json(self): - """ - Retrieve JavaScript dictionaries from page and generate JSON + return new_track - :return: True if successful - """ - try: - embed = BandcampJSON(self.soup, "EmbedData") - tralbum = BandcampJSON(self.soup, "TralbumData") + def extract_album_meta_data(self, request): + album = {} - embed_data = embed.js_to_json() - tralbum_data = tralbum.js_to_json() + embedData = self.get_embed_string_block(request) - self.embed_data_json = json.loads(embed_data) - self.tralbum_data_json = json.loads(tralbum_data) - except Exception as e: - print(e) - return None - return True + block = request.text.split("var TralbumData = ") + + stringBlock = block[1] + + stringBlock = stringBlock.split("};")[0] + "};" + stringBlock = read_js_object(u"var TralbumData = {}".format(stringBlock)) + + if 'album_title' not in embedData['EmbedData']: + album['title'] = "Unknown Album" + else: + album['title'] = embedData['EmbedData']['album_title'] + + album['artist'] = stringBlock['TralbumData']['artist'] + album['tracks'] = stringBlock['TralbumData']['trackinfo'] + + if stringBlock['TralbumData']['album_release_date'] == "null": + album['date'] = "" + else: + album['date'] = stringBlock['TralbumData']['album_release_date'].split()[2] + + return album @staticmethod - def generate_album_url(artist: str, album: str) -> str: - """ - Generate an album url based on the artist and album name - - :param artist: artist name - :param album: album name - :return: album url as str - """ + def generate_album_url(artist, album): return "http://{0}.bandcamp.com/album/{1}".format(artist, album) - def get_album_art(self) -> str: - """ - Find and retrieve album art url from page - - :return: url as str - """ + def get_album_art(self): try: url = self.soup.find(id='tralbumArt').find_all('img')[0]['src'] return url - except None: + except: pass + + def get_embed_string_block(self, request): + embedBlock = request.text.split("var EmbedData = ") + + embedStringBlock = embedBlock[1] + embedStringBlock = embedStringBlock.split("};")[0] + "};" + embedStringBlock = read_js_object(u"var EmbedData = {}".format(embedStringBlock)) + + return embedStringBlock diff --git a/bandcamp_dl/bandcamp_dl.py b/bandcamp_dl/bandcamp_dl.py index cf6d311..2f1d14d 100755 --- a/bandcamp_dl/bandcamp_dl.py +++ b/bandcamp_dl/bandcamp_dl.py @@ -49,7 +49,7 @@ from .bandcampdownloader import BandcampDownloader def main(): - arguments = docopt(__doc__, version='bandcamp-dl 0.0.7') + arguments = docopt(__doc__, version='bandcamp-dl 0.0.6-01') bandcamp = Bandcamp() if arguments['--artist'] and arguments['--album']: @@ -73,4 +73,3 @@ def main(): if __name__ == '__main__': main() - diff --git a/bandcamp_dl/bandcampdownloader.py b/bandcamp_dl/bandcampdownloader.py index 0c49bb8..f2be591 100644 --- a/bandcamp_dl/bandcampdownloader.py +++ b/bandcamp_dl/bandcampdownloader.py @@ -9,14 +9,6 @@ from slugify import slugify class BandcampDownloader: def __init__(self, urls=None, template=None, directory=None, overwrite=False): - """ - Initialization function - - :param urls: list of urls - :param template: filename template - :param directory: download location - :param overwrite: if True overwrite existing files - """ if type(urls) is str: self.urls = [urls] @@ -25,22 +17,11 @@ class BandcampDownloader: self.directory = directory self.overwrite = overwrite - def start(self, album: dict): - """ - Start album download process - - :param album: album dict - """ + def start(self, album): print("Starting download process.") self.download_album(album) - def template_to_path(self, track: dict) -> str: - """ - Create valid filepath based on track metadata - - :param track: track metadata - :return: filepath - """ + def template_to_path(self, track): path = self.template path = path.replace("%{artist}", slugify(track['artist'])) path = path.replace("%{album}", slugify(track['album'])) @@ -50,27 +31,14 @@ class BandcampDownloader: return path - @staticmethod - def create_directory(filename: str) -> str: - """ - Create directory based on filename if it doesn't exist - - :param filename: full filename - :return: directory path - """ + def create_directory(self, filename): directory = os.path.dirname(filename) if not os.path.exists(directory): os.makedirs(directory) return directory - def download_album(self, album: dict) -> bool: - """ - Download all MP3 files in the album - - :param album: album dict - :return: True if successful - """ + def download_album(self, album): for track_index, track in enumerate(album['tracks']): track_meta = { "artist": album['artist'], @@ -85,17 +53,30 @@ class BandcampDownloader: filename = self.template_to_path(track_meta) dirname = self.create_directory(filename) - if not track['url']: + if not track.get('url'): print("Skipping track {0} - {1} as it is not available" .format(track['track'], track['title'])) continue try: track_url = track['url'] + # Check and see if HTTP is in the track_url + if 'http' not in track_url: + track_url = 'http:{}'.format(track_url) r = requests.get(track_url, stream=True) file_length = r.headers.get('content-length') + if not self.overwrite and os.path.isfile(filename): + file_size = os.path.getsize(filename) - 128 + if int(file_size) != int(file_length): + print(filename + " is incomplete, redownloading.") + os.remove(filename) + else: + print("Skipping track {0} - {1} as it's already downloaded, use --overwrite to overwrite existing files" + .format(track['track'], track['title'])) + continue + with open(filename, "wb") as f: print("Downloading: " + filename[:-4]) if file_length is None: @@ -125,14 +106,7 @@ class BandcampDownloader: return True - @staticmethod - def write_id3_tags(filename: str, meta: dict): - """ - Write metadata to the MP3 file - - :param filename: name of mp3 file - :param meta: dict of track metadata - """ + def write_id3_tags(self, filename, meta): print("\nEncoding . . .") audio = MP3(filename) diff --git a/bandcamp_dl/bandcampjson.py b/bandcamp_dl/bandcampjson.py deleted file mode 100644 index c6be3f2..0000000 --- a/bandcamp_dl/bandcampjson.py +++ /dev/null @@ -1,42 +0,0 @@ -import demjson -import re - - -class BandcampJSON: - def __init__(self, body, var_name: str, js_data=None): - self.body = body - self.var_name = var_name - self.js_data = js_data - - def get_js(self) -> str: - """ - Get