From abf0dd261bef224383ad295e75c4b824ef9f53ac Mon Sep 17 00:00:00 2001 From: Anthony Forsberg Date: Thu, 12 Jan 2017 19:25:43 -0500 Subject: [PATCH] Merge 0.0.7 (#94) * Preliminary 0.0.7 changes Moved to a new album/track data parser using demjson. Slimit and Ply are no longer required. Some basic spelling corrections and consistency changes. Function Annotation, return types, and docstrings added. * Initial commit for the Issue Template * Fleshed out the issue template * Switched to rst (oops), reformatted accordingly * Update ISSUE_TEMPLATE.rst * Moved CONTRIBUTING to the hidden .github directory * No longer trips up on unavailable tracks in an album * Much more robust file integrity checking, session file support. Multi-step process in making sure files are downloaded and encoded properly. Bandcamp-dl will now attempt to search for a not.finished file and if it is found load that sessions arguments and resume operation form where it left off. * Improve download status/progress messages Made the download progress and status messages neater, no more multiple progress bars and lines of status messages. * Final 0.0.7 changes Setup imports for distribution again. Reformatted docstrings. Clarified choices in partial download dialog. Updated changelog. Updated manifest. Updated readme. --- CONTRIBUTING.rst => .github/CONTRIBUTING.rst | 6 +- .github/ISSUE_TEMPLATE.rst | 12 ++ .gitignore | 1 + CHANGELOG.rst | 11 ++ MANIFEST.in | 2 +- README.rst | 60 ++----- bandcamp_dl/bandcamp.py | 153 +++++++++--------- bandcamp_dl/bandcamp_dl.py | 57 ++++--- bandcamp_dl/bandcampdownloader.py | 158 ++++++++++++------- bandcamp_dl/bandcampjson.py | 39 +++++ bandcamp_dl/deps.txt | 3 +- bandcamp_dl/jsobj.py | 81 ---------- requirements.txt | 5 +- setup.py | 6 +- 14 files changed, 305 insertions(+), 289 deletions(-) rename CONTRIBUTING.rst => .github/CONTRIBUTING.rst (83%) create mode 100644 .github/ISSUE_TEMPLATE.rst create mode 100644 bandcamp_dl/bandcampjson.py delete mode 100644 bandcamp_dl/jsobj.py diff --git a/CONTRIBUTING.rst b/.github/CONTRIBUTING.rst similarity index 83% rename from CONTRIBUTING.rst rename to .github/CONTRIBUTING.rst index 007bd42..ecf7054 100644 --- a/CONTRIBUTING.rst +++ b/.github/CONTRIBUTING.rst @@ -14,13 +14,15 @@ Workflow Please submit as many fixes for typos and grammar bloopers as you can! - Try to limit each pull request to *one* change only. - Once you've addressed review feedback, make sure to bump the pull request with a short note. - Maintainers don’t receive notifications when you push new commits. Code ---- -- Try to adhere to PEP8 as best you can (Yes some lines will simply be too long, its ok.) +- Try to adhere to PEP8 as best you can. +- Annotate functions +- Specify return types +- Add docstrings ***** diff --git a/.github/ISSUE_TEMPLATE.rst b/.github/ISSUE_TEMPLATE.rst new file mode 100644 index 0000000..829c31a --- /dev/null +++ b/.github/ISSUE_TEMPLATE.rst @@ -0,0 +1,12 @@ +**Python version:** + +**Bandcamp-dl version:** + +**Bancamp-dl options:** + +**url:** + +**options:** + +**Describe the issue:** +------------------------- diff --git a/.gitignore b/.gitignore index 4eacb05..c5e9269 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ nosetests.xml .pydevproject *.iml *.xml +bandcamp_dl/asyncdownloader.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e7b37f1..398699b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -17,3 +17,14 @@ Version 0.0.6 - [Enhancement] Individual track downloads work now. - [Bugfix] Fixed imports, now working when installed via pip. - [Note] Last version to officially support Python 2.7.x + +Version 0.0.7 +------------- +- [Enhancement] Will now resume if it finds a valid ``not.finished`` file. +- [Enhancement] Interrupting downloads is safe, they will resume on next run. +- [Enhancement] Interrupting encoding is safe, it will finish on next run. +- [Enhancement] CLI output is now much neater. +- [Bugfix] Partial albums (some previews disabled) will now download properly. +- [Dependency] Slimit is no longer required. +- [Dependency] Ply is no longer required. +- [Dependency] demjson is now required. diff --git a/MANIFEST.in b/MANIFEST.in index 3af7e42..33edf4d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include README.rst AUTHORS.rst CHANGELOG.rst LICENSE -exclude *.mp3 .gitignore yacctab.py lextab.py .travis.yml +exclude *.mp3 .gitignore .travis.yml setup.cfg global-exclude *.pyc global-exclude *.DS_STORE diff --git a/README.rst b/README.rst index ba4e515..e64d198 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,14 @@ Installation From PyPI --------- -pip install bandcamp-downloader +``pip install bandcamp-downloader`` + +From Wheel +---------- + +1. Download the wheel (``.whl``) from PyPI or the Releases page +2. ``cd`` to the directory containing the ``.whl`` file +2. ``pip install .whl`` From Source ----------- @@ -24,7 +31,7 @@ Description =========== bandcamp-dl is a small command-line app to download audio from -BandCamp.com. It requires the Python interpreter, version 2.7.12+ - 3.5.2+ and is +BandCamp.com. It requires the Python interpreter, version 3.5+ and is not platform specific. It is released to the public domain, which means you can modify it, redistribute it or use it how ever you like. @@ -77,9 +84,9 @@ The default template is: ``%{artist}/%{album}/%{track} - %{title}``. Bugs ==== -Bugs should be reported `here `_. Please include -the full output of the command when run with ``--verbose``. The output -(including the first lines) contain important debugging information. +Bugs should be reported `here `_. +Please include the full output of the command when run with ``--verbose``. +The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. @@ -88,38 +95,6 @@ For discussions, join us in `Discord `_. When you submit a request, please re-read it once to avoid a couple of mistakes (you can and should use this as a checklist): -Is the description of the issue itself sufficient? -================================================== - -We often get issue reports that we cannot really decipher. While in most -cases we eventually get the required information after asking back -multiple times, this poses an unnecessary drain on our resources. Many -contributors, including myself, are also not native speakers, so we may -misread some parts. - -So please elaborate on what feature you are requesting, or what bug you -want to be fixed. Make sure that it's obvious - -- What the problem is -- How it could be fixed -- How your proposed solution would look like - -If your report is shorter than two lines, it is almost certainly missing -some of these, which makes it hard for us to respond to it. We're often -too polite to close the issue outright, but the missing info makes -misinterpretation likely. As a commiter myself, I often get frustrated -by these issues, since the only possible way for me to move forward on -them is to ask for clarification over and over. - -For bug reports, this means that your report should contain the -*complete* output of bandcamp-dl when called with the ``-v`` flag. The -error message you get for (most) bugs even says so, but you would not -believe how many of our bug reports do not contain this information. - -Site support requests **must contain an example URL**. An example URL is -a URL you might want to download, like -``lifeformed.bandcamp.com/album/fastfall``. - Are you using the latest version? ================================= @@ -209,14 +184,11 @@ related to bandcamp-dl, by all means, go ahead and report the bug. Dependencies ============ -- `BeautifulSoup `_ - - HTML Parsing +- `BeautifulSoup `_ - HTML Parsing +- `Demjson `_- JavaScript dict to JSON conversion - `Mutagen `_ - ID3 Encoding -- `Requests `_ - for retriving - the HTML -- `Slimit `_ - Javascript parsing -- `Unicode-Slugify `_ - - A slug generator that turns strings into unicode slugs. +- `Requests `_ - for retriving the HTML +- `Unicode-Slugify `_ - A slug generator that turns strings into unicode slugs. Copyright ========= diff --git a/bandcamp_dl/bandcamp.py b/bandcamp_dl/bandcamp.py index 608135e..848b53e 100644 --- a/bandcamp_dl/bandcamp.py +++ b/bandcamp_dl/bandcamp.py @@ -1,119 +1,118 @@ +from .bandcampjson import BandcampJSON from bs4 import BeautifulSoup +from bs4 import FeatureNotFound import requests -from .jsobj import read_js_object +import json class Bandcamp: - def parse(self, url, no_art=True): + def parse(self, url: str, art: bool=True) -> dict or None: + """Requests the page, cherry picks album info + + :param url: album/track url + :param art: if True download album art + :return: album metadata + """ try: r = requests.get(url) except requests.exceptions.MissingSchema: return None - self.no_art = no_art - - if r.status_code is not 200: - return None - try: self.soup = BeautifulSoup(r.text, "lxml") - except: + except FeatureNotFound: self.soup = BeautifulSoup(r.text, "html.parser") + self.generate_album_json() + self.tracks = self.tralbum_data_json['trackinfo'] + album = { "tracks": [], - "title": "", - "artist": "", + "title": self.embed_data_json['album_title'], + "artist": self.embed_data_json['artist'], "full": False, "art": "", - "date": "" + "date": self.tralbum_data_json['album_release_date'] } - album_meta = self.extract_album_meta_data(r) + for track in self.tracks: + if track['file'] is not None: + track = self.get_track_metadata(track) + album['tracks'].append(track) - album['artist'] = album_meta['artist'] - album['title'] = album_meta['title'] - album['date'] = album_meta['date'] - - for track in album_meta['tracks']: - track = self.get_track_meta_data(track) - album['tracks'].append(track) - - album['full'] = self.all_tracks_available(album) - if self.no_art: + album['full'] = self.all_tracks_available() + if art: album['art'] = self.get_album_art() return album - def all_tracks_available(self, album): - for track in album['tracks']: - if track['url'] is None: - return False + # Possibly redundant now, we skip unavailable tracks. + def all_tracks_available(self) -> bool: + """Verify that all tracks have a url + :return: True if all urls accounted for + """ + for track in self.tracks: + if track['file'] is None: + return False return True - def is_basestring(self, obj): - if isinstance(obj, str) or isinstance(obj, bytes) or isinstance(obj, bytearray): - return True - return False + @staticmethod + def get_track_metadata(track: dict or None) -> dict: + """Extract individual track metadata - def get_track_meta_data(self, track): - new_track = {} - if not self.is_basestring(track['file']): - if 'mp3-128' in track['file']: - new_track['url'] = track['file']['mp3-128'] + :param track: track dict + :return: track metadata dict + """ + track_metadata = { + "duration": track['duration'], + "track": str(track['track_num']), + "title": track['title'], + "url": None + } + + if 'mp3-128' in track['file']: + track_metadata['url'] = "http:" + track['file']['mp3-128'] else: - new_track['url'] = None + track_metadata['url'] = None + return track_metadata - new_track['duration'] = track['duration'] - new_track['track'] = track['track_num'] - new_track['title'] = track['title'] + def generate_album_json(self): + """Retrieve JavaScript dictionaries from page and generate JSON - return new_track + :return: True if successful + """ + try: + embed = BandcampJSON(self.soup, "EmbedData") + tralbum = BandcampJSON(self.soup, "TralbumData") - def extract_album_meta_data(self, request): - album = {} + embed_data = embed.js_to_json() + tralbum_data = tralbum.js_to_json() - embedData = self.get_embed_string_block(request) - - block = request.text.split("var TralbumData = ") - - stringBlock = block[1] - - stringBlock = stringBlock.split("};")[0] + "};" - stringBlock = read_js_object(u"var TralbumData = {}".format(stringBlock)) - - if 'album_title' not in embedData['EmbedData']: - album['title'] = "Unknown Album" - else: - album['title'] = embedData['EmbedData']['album_title'] - - album['artist'] = stringBlock['TralbumData']['artist'] - album['tracks'] = stringBlock['TralbumData']['trackinfo'] - - if stringBlock['TralbumData']['album_release_date'] == "null": - album['date'] = "" - else: - album['date'] = stringBlock['TralbumData']['album_release_date'].split()[2] - - return album + self.embed_data_json = json.loads(embed_data) + self.tralbum_data_json = json.loads(tralbum_data) + except Exception as e: + print(e) + return None + return True @staticmethod - def generate_album_url(artist, album): + def generate_album_url(artist: str, album: str) -> str: + """Generate an album url based on the artist and album name + + :param artist: artist name + :param album: album name + :return: album url as str + """ return "http://{0}.bandcamp.com/album/{1}".format(artist, album) - def get_album_art(self): + def get_album_art(self) -> str: + """Find and retrieve album art url from page + + :return: url as str + """ try: url = self.soup.find(id='tralbumArt').find_all('img')[0]['src'] return url - except: + except None: pass - - def get_embed_string_block(self, request): - embedBlock = request.text.split("var EmbedData = ") - - embedStringBlock = embedBlock[1] - embedStringBlock = embedStringBlock.split("};")[0] + "};" - embedStringBlock = read_js_object(u"var EmbedData = {}".format(embedStringBlock)) - - return embedStringBlock diff --git a/bandcamp_dl/bandcamp_dl.py b/bandcamp_dl/bandcamp_dl.py index de39b0d..a008967 100755 --- a/bandcamp_dl/bandcamp_dl.py +++ b/bandcamp_dl/bandcamp_dl.py @@ -1,27 +1,28 @@ """bandcamp-dl Usage: - bandcamp-dl.py - bandcamp-dl.py [--template=