From 098ae8c6c7c91a8e46c932e299f46fec26b831e5 Mon Sep 17 00:00:00 2001 From: Anthony Forsberg Date: Fri, 17 Feb 2017 22:09:17 -0500 Subject: [PATCH] Refactored BandcampJSON, Added --group option, readability changes. BandcampJSON now returns a list of JSON strings, Album data, Embed data, and Page data and is only called once. Added a `--group` option to insert a group tag, currently this attempts to use the artist/album Label. Made some small readability changes for future work, adjusted the imports and filenames for the last time. --- bandcamp_dl/{bandcamp_dl.py => __main__.py} | 15 +++-- bandcamp_dl/bandcamp.py | 68 ++++++++++----------- bandcamp_dl/bandcampdownloader.py | 37 +++++++---- bandcamp_dl/bandcampjson.py | 59 +++++++++--------- setup.py | 6 +- 5 files changed, 102 insertions(+), 83 deletions(-) rename bandcamp_dl/{bandcamp_dl.py => __main__.py} (83%) mode change 100755 => 100644 diff --git a/bandcamp_dl/bandcamp_dl.py b/bandcamp_dl/__main__.py old mode 100755 new mode 100644 similarity index 83% rename from bandcamp_dl/bandcamp_dl.py rename to bandcamp_dl/__main__.py index 5867feb..54906f8 --- a/bandcamp_dl/bandcamp_dl.py +++ b/bandcamp_dl/__main__.py @@ -7,6 +7,8 @@ Usage: ( | --artist= --album=) [--overwrite] [--no-art] + [--embed-lyrics] + [--group] bandcamp-dl (-h | --help) bandcamp-dl (--version) @@ -21,6 +23,8 @@ Options: -f --full-album Download only if all tracks are available. -o --overwrite Overwrite tracks that already exist. Default is False. -n --no-art Skip grabbing album art + -e --embed-lyrics Embed track lyrics (If available) + -g --group Use album/track Label as iTunes grouping """ """ Coded by: @@ -45,13 +49,15 @@ Iheanyi: import os import ast + from docopt import docopt -from .bandcamp import Bandcamp -from .bandcampdownloader import BandcampDownloader + +from bandcamp_dl.bandcamp import Bandcamp +from bandcamp_dl.bandcampdownloader import BandcampDownloader def main(): - arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-06') + arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-09') bandcamp = Bandcamp() basedir = arguments['--base-dir'] or os.getcwd() @@ -81,7 +87,8 @@ def main(): elif arguments['--full-album'] and not album['full']: print("Full album not available. Skipping...") else: - bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite']) + bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite'], + arguments['--embed-lyrics'], arguments['--group']) bandcamp_downloader.start(album) if __name__ == '__main__': diff --git a/bandcamp_dl/bandcamp.py b/bandcamp_dl/bandcamp.py index 06f8fc9..53af3ea 100644 --- a/bandcamp_dl/bandcamp.py +++ b/bandcamp_dl/bandcamp.py @@ -1,9 +1,11 @@ -from .bandcampjson import BandcampJSON +from datetime import datetime as dt +import json + +import requests from bs4 import BeautifulSoup from bs4 import FeatureNotFound -from datetime import datetime -import requests -import json + +from bandcamp_dl.bandcampjson import BandcampJSON class Bandcamp: @@ -15,34 +17,44 @@ class Bandcamp: :return: album metadata """ try: - r = requests.get(url) + response = requests.get(url) except requests.exceptions.MissingSchema: return None try: - self.soup = BeautifulSoup(r.text, "lxml") + self.soup = BeautifulSoup(response.text, "lxml") except FeatureNotFound: - self.soup = BeautifulSoup(r.text, "html.parser") + self.soup = BeautifulSoup(response.text, "html.parser") - self.generate_album_json() - self.tracks = self.tralbum_data_json['trackinfo'] + bandcamp_json = BandcampJSON(self.soup).generate() + album_json = json.loads(bandcamp_json[0]) + embed_json = json.loads(bandcamp_json[1]) + page_json = json.loads(bandcamp_json[2]) - album_release = self.tralbum_data_json['album_release_date'] + self.tracks = album_json['trackinfo'] + + album_release = album_json['album_release_date'] if album_release is None: - album_release = self.tralbum_data_json['current']['release_date'] + album_release = album_json['current']['release_date'] try: - album_title = self.embed_data_json['album_title'] + album_title = embed_json['album_title'] except KeyError: - album_title = self.tralbum_data_json['trackinfo'][0]['title'] + album_title = album_json['trackinfo'][0]['title'] + + try: + label = page_json['item_sellers']['{}'.format(album_json['current']['selling_band_id'])]['name'] + except KeyError: + label = None album = { "tracks": [], "title": album_title, - "artist": self.embed_data_json['artist'], + "artist": embed_json['artist'], + "label": label, "full": False, "art": "", - "date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y") + "date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year) } for track in self.tracks: @@ -56,7 +68,6 @@ class Bandcamp: return album - # Possibly redundant now, we skip unavailable tracks. def all_tracks_available(self) -> bool: """Verify that all tracks have a url @@ -85,27 +96,14 @@ class Bandcamp: track_metadata['url'] = "http:" + track['file']['mp3-128'] else: track_metadata['url'] = None + + if track['has_lyrics'] is not False: + if track['lyrics'] is None: + track['lyrics'] = "lyrics unavailable" + track_metadata['lyrics'] = track['lyrics'].replace('\\r\\n', '\n') + return track_metadata - def generate_album_json(self): - """Retrieve JavaScript dictionaries from page and generate JSON - - :return: True if successful - """ - try: - embed = BandcampJSON(self.soup, "EmbedData") - tralbum = BandcampJSON(self.soup, "TralbumData") - - embed_data = embed.js_to_json() - tralbum_data = tralbum.js_to_json() - - self.embed_data_json = json.loads(embed_data) - self.tralbum_data_json = json.loads(tralbum_data) - except Exception as e: - print(e) - return None - return True - @staticmethod def generate_album_url(artist: str, album: str) -> str: """Generate an album url based on the artist and album name diff --git a/bandcamp_dl/bandcampdownloader.py b/bandcamp_dl/bandcampdownloader.py index 03136b6..c152704 100644 --- a/bandcamp_dl/bandcampdownloader.py +++ b/bandcamp_dl/bandcampdownloader.py @@ -1,22 +1,20 @@ import os import sys + import requests -from mutagen.mp3 import MP3 +from mutagen.mp3 import MP3, EasyMP3 +from mutagen.id3._frames import TIT1 from mutagen.id3._frames import TIT2 -from mutagen.easyid3 import EasyID3 +from mutagen.id3._frames import USLT from slugify import slugify if not sys.version_info[:2] == (3, 6): import mock - from .utils import requests_patch - -# DEBUG -# import logging -# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO) + from bandcamp_dl.utils import requests_patch class BandcampDownloader: - def __init__(self, urls=None, template=None, directory=None, overwrite=False): + def __init__(self, urls=None, template=None, directory=None, overwrite=False, lyrics=None, grouping=None): """Initialize variables we will need throughout the Class :param urls: list of urls @@ -24,7 +22,7 @@ class BandcampDownloader: :param directory: download location :param overwrite: if True overwrite existing files """ - self.headers = {'user_agent': 'bandcamp-dl/0.0.7-06 (https://github.com/iheanyi/bandcamp-dl)'} + self.headers = {'user_agent': 'bandcamp-dl/0.0.7-09 (https://github.com/iheanyi/bandcamp-dl)'} self.session = requests.Session() if type(urls) is str: @@ -34,6 +32,8 @@ class BandcampDownloader: self.template = template self.directory = directory self.overwrite = overwrite + self.lyrics = lyrics + self.grouping = grouping def start(self, album: dict): """Start album download process @@ -91,12 +91,16 @@ class BandcampDownloader: for track_index, track in enumerate(album['tracks']): track_meta = { "artist": album['artist'], + "label": album['label'], "album": album['title'], "title": track['title'], "track": track['track'], "date": album['date'] } + if 'lyrics' in track.keys() and self.lyrics is not False: + track_meta['lyrics'] = track['lyrics'] + self.num_tracks = len(album['tracks']) self.track_num = track_index + 1 @@ -186,16 +190,23 @@ class BandcampDownloader: sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename)) audio = MP3(filepath) - audio.delete() + audio.tags = None audio["TIT2"] = TIT2(encoding=3, text=["title"]) audio.save(filename=None, v1=2) - audio = EasyID3(filepath) + audio = MP3(filepath) + if self.grouping and meta["label"]: + audio["TIT1"] = TIT1(encoding=3, text=meta["label"]) + if self.lyrics: + audio["USLT"] = USLT(encoding=3, lang='eng', desc='', text=meta['lyrics']) + audio.save() + + audio = EasyMP3(filepath) audio["tracknumber"] = meta['track'] - audio["title"] = meta['title'] + audio["title"] = meta["title"] audio["artist"] = meta['artist'] audio["album"] = meta['album'] - audio["date"] = meta['date'] + audio["date"] = meta["date"] audio.save() os.rename(filepath, filepath[:-4]) diff --git a/bandcamp_dl/bandcampjson.py b/bandcamp_dl/bandcampjson.py index 65f211f..fcc3050 100644 --- a/bandcamp_dl/bandcampjson.py +++ b/bandcamp_dl/bandcampjson.py @@ -1,44 +1,47 @@ -import demjson import re -"""TODO +import demjson - More in-depth error messages -""" class BandcampJSON: - def __init__(self, body, var_name: str, js_data=None): + def __init__(self, body): self.body = body - self.var_name = var_name - self.js_data = js_data - self.regex = re.compile(r"(?<=var\s" + var_name + "\s=\s).*?(?=};)", re.DOTALL) + self.targets = ['TralbumData', 'EmbedData', 'pagedata'] + self.json_data = [] - def get_js(self) -> str: - """Get