Refactored BandcampJSON, Added --group option, readability changes.

BandcampJSON now returns a list of JSON strings, Album data, Embed data,
and Page data and is only called once.

Added a `--group` option to insert a group tag, currently this attempts
to use the artist/album Label.

Made some small readability changes for future work, adjusted the
imports and filenames for the last time.
master
Anthony Forsberg 2017-02-17 22:09:17 -05:00
parent 737fd8256e
commit 098ae8c6c7
5 changed files with 102 additions and 83 deletions

15
bandcamp_dl/bandcamp_dl.py → bandcamp_dl/__main__.py Executable file → Normal file
View File

@ -7,6 +7,8 @@ Usage:
(<url> | --artist=<artist> --album=<album>) (<url> | --artist=<artist> --album=<album>)
[--overwrite] [--overwrite]
[--no-art] [--no-art]
[--embed-lyrics]
[--group]
bandcamp-dl (-h | --help) bandcamp-dl (-h | --help)
bandcamp-dl (--version) bandcamp-dl (--version)
@ -21,6 +23,8 @@ Options:
-f --full-album Download only if all tracks are available. -f --full-album Download only if all tracks are available.
-o --overwrite Overwrite tracks that already exist. Default is False. -o --overwrite Overwrite tracks that already exist. Default is False.
-n --no-art Skip grabbing album art -n --no-art Skip grabbing album art
-e --embed-lyrics Embed track lyrics (If available)
-g --group Use album/track Label as iTunes grouping
""" """
""" """
Coded by: Coded by:
@ -45,13 +49,15 @@ Iheanyi:
import os import os
import ast import ast
from docopt import docopt from docopt import docopt
from .bandcamp import Bandcamp
from .bandcampdownloader import BandcampDownloader from bandcamp_dl.bandcamp import Bandcamp
from bandcamp_dl.bandcampdownloader import BandcampDownloader
def main(): def main():
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-06') arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-09')
bandcamp = Bandcamp() bandcamp = Bandcamp()
basedir = arguments['--base-dir'] or os.getcwd() basedir = arguments['--base-dir'] or os.getcwd()
@ -81,7 +87,8 @@ def main():
elif arguments['--full-album'] and not album['full']: elif arguments['--full-album'] and not album['full']:
print("Full album not available. Skipping...") print("Full album not available. Skipping...")
else: else:
bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite']) bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite'],
arguments['--embed-lyrics'], arguments['--group'])
bandcamp_downloader.start(album) bandcamp_downloader.start(album)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,9 +1,11 @@
from .bandcampjson import BandcampJSON from datetime import datetime as dt
import json
import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4 import FeatureNotFound from bs4 import FeatureNotFound
from datetime import datetime
import requests from bandcamp_dl.bandcampjson import BandcampJSON
import json
class Bandcamp: class Bandcamp:
@ -15,34 +17,44 @@ class Bandcamp:
:return: album metadata :return: album metadata
""" """
try: try:
r = requests.get(url) response = requests.get(url)
except requests.exceptions.MissingSchema: except requests.exceptions.MissingSchema:
return None return None
try: try:
self.soup = BeautifulSoup(r.text, "lxml") self.soup = BeautifulSoup(response.text, "lxml")
except FeatureNotFound: except FeatureNotFound:
self.soup = BeautifulSoup(r.text, "html.parser") self.soup = BeautifulSoup(response.text, "html.parser")
self.generate_album_json() bandcamp_json = BandcampJSON(self.soup).generate()
self.tracks = self.tralbum_data_json['trackinfo'] album_json = json.loads(bandcamp_json[0])
embed_json = json.loads(bandcamp_json[1])
page_json = json.loads(bandcamp_json[2])
album_release = self.tralbum_data_json['album_release_date'] self.tracks = album_json['trackinfo']
album_release = album_json['album_release_date']
if album_release is None: if album_release is None:
album_release = self.tralbum_data_json['current']['release_date'] album_release = album_json['current']['release_date']
try: try:
album_title = self.embed_data_json['album_title'] album_title = embed_json['album_title']
except KeyError: except KeyError:
album_title = self.tralbum_data_json['trackinfo'][0]['title'] album_title = album_json['trackinfo'][0]['title']
try:
label = page_json['item_sellers']['{}'.format(album_json['current']['selling_band_id'])]['name']
except KeyError:
label = None
album = { album = {
"tracks": [], "tracks": [],
"title": album_title, "title": album_title,
"artist": self.embed_data_json['artist'], "artist": embed_json['artist'],
"label": label,
"full": False, "full": False,
"art": "", "art": "",
"date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y") "date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year)
} }
for track in self.tracks: for track in self.tracks:
@ -56,7 +68,6 @@ class Bandcamp:
return album return album
# Possibly redundant now, we skip unavailable tracks.
def all_tracks_available(self) -> bool: def all_tracks_available(self) -> bool:
"""Verify that all tracks have a url """Verify that all tracks have a url
@ -85,27 +96,14 @@ class Bandcamp:
track_metadata['url'] = "http:" + track['file']['mp3-128'] track_metadata['url'] = "http:" + track['file']['mp3-128']
else: else:
track_metadata['url'] = None track_metadata['url'] = None
if track['has_lyrics'] is not False:
if track['lyrics'] is None:
track['lyrics'] = "lyrics unavailable"
track_metadata['lyrics'] = track['lyrics'].replace('\\r\\n', '\n')
return track_metadata return track_metadata
def generate_album_json(self):
"""Retrieve JavaScript dictionaries from page and generate JSON
:return: True if successful
"""
try:
embed = BandcampJSON(self.soup, "EmbedData")
tralbum = BandcampJSON(self.soup, "TralbumData")
embed_data = embed.js_to_json()
tralbum_data = tralbum.js_to_json()
self.embed_data_json = json.loads(embed_data)
self.tralbum_data_json = json.loads(tralbum_data)
except Exception as e:
print(e)
return None
return True
@staticmethod @staticmethod
def generate_album_url(artist: str, album: str) -> str: def generate_album_url(artist: str, album: str) -> str:
"""Generate an album url based on the artist and album name """Generate an album url based on the artist and album name

View File

@ -1,22 +1,20 @@
import os import os
import sys import sys
import requests import requests
from mutagen.mp3 import MP3 from mutagen.mp3 import MP3, EasyMP3
from mutagen.id3._frames import TIT1
from mutagen.id3._frames import TIT2 from mutagen.id3._frames import TIT2
from mutagen.easyid3 import EasyID3 from mutagen.id3._frames import USLT
from slugify import slugify from slugify import slugify
if not sys.version_info[:2] == (3, 6): if not sys.version_info[:2] == (3, 6):
import mock import mock
from .utils import requests_patch from bandcamp_dl.utils import requests_patch
# DEBUG
# import logging
# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
class BandcampDownloader: class BandcampDownloader:
def __init__(self, urls=None, template=None, directory=None, overwrite=False): def __init__(self, urls=None, template=None, directory=None, overwrite=False, lyrics=None, grouping=None):
"""Initialize variables we will need throughout the Class """Initialize variables we will need throughout the Class
:param urls: list of urls :param urls: list of urls
@ -24,7 +22,7 @@ class BandcampDownloader:
:param directory: download location :param directory: download location
:param overwrite: if True overwrite existing files :param overwrite: if True overwrite existing files
""" """
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-06 (https://github.com/iheanyi/bandcamp-dl)'} self.headers = {'user_agent': 'bandcamp-dl/0.0.7-09 (https://github.com/iheanyi/bandcamp-dl)'}
self.session = requests.Session() self.session = requests.Session()
if type(urls) is str: if type(urls) is str:
@ -34,6 +32,8 @@ class BandcampDownloader:
self.template = template self.template = template
self.directory = directory self.directory = directory
self.overwrite = overwrite self.overwrite = overwrite
self.lyrics = lyrics
self.grouping = grouping
def start(self, album: dict): def start(self, album: dict):
"""Start album download process """Start album download process
@ -91,12 +91,16 @@ class BandcampDownloader:
for track_index, track in enumerate(album['tracks']): for track_index, track in enumerate(album['tracks']):
track_meta = { track_meta = {
"artist": album['artist'], "artist": album['artist'],
"label": album['label'],
"album": album['title'], "album": album['title'],
"title": track['title'], "title": track['title'],
"track": track['track'], "track": track['track'],
"date": album['date'] "date": album['date']
} }
if 'lyrics' in track.keys() and self.lyrics is not False:
track_meta['lyrics'] = track['lyrics']
self.num_tracks = len(album['tracks']) self.num_tracks = len(album['tracks'])
self.track_num = track_index + 1 self.track_num = track_index + 1
@ -186,16 +190,23 @@ class BandcampDownloader:
sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename)) sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
audio = MP3(filepath) audio = MP3(filepath)
audio.delete() audio.tags = None
audio["TIT2"] = TIT2(encoding=3, text=["title"]) audio["TIT2"] = TIT2(encoding=3, text=["title"])
audio.save(filename=None, v1=2) audio.save(filename=None, v1=2)
audio = EasyID3(filepath) audio = MP3(filepath)
if self.grouping and meta["label"]:
audio["TIT1"] = TIT1(encoding=3, text=meta["label"])
if self.lyrics:
audio["USLT"] = USLT(encoding=3, lang='eng', desc='', text=meta['lyrics'])
audio.save()
audio = EasyMP3(filepath)
audio["tracknumber"] = meta['track'] audio["tracknumber"] = meta['track']
audio["title"] = meta['title'] audio["title"] = meta["title"]
audio["artist"] = meta['artist'] audio["artist"] = meta['artist']
audio["album"] = meta['album'] audio["album"] = meta['album']
audio["date"] = meta['date'] audio["date"] = meta["date"]
audio.save() audio.save()
os.rename(filepath, filepath[:-4]) os.rename(filepath, filepath[:-4])

View File

@ -1,44 +1,47 @@
import demjson
import re import re
"""TODO import demjson
More in-depth error messages
"""
class BandcampJSON: class BandcampJSON:
def __init__(self, body, var_name: str, js_data=None): def __init__(self, body):
self.body = body self.body = body
self.var_name = var_name self.targets = ['TralbumData', 'EmbedData', 'pagedata']
self.js_data = js_data self.json_data = []
self.regex = re.compile(r"(?<=var\s" + var_name + "\s=\s).*?(?=};)", re.DOTALL)
def get_js(self) -> str: def generate(self) -> list:
"""Get <script> element containing the data we need and return the raw JS """Iterate through targets grabbing needed data"""
for target in self.targets:
if target[:4] == 'page':
self.get_pagedata()
else:
self.regex = re.compile(r"(?<=var\s" + target + "\s=\s).*?(?=};)", re.DOTALL)
self.target = target
self.js_to_json()
return self.json_data
:return js_data: Raw JS as str def get_pagedata(self):
""" """Grab bandcamp pagedata JSON"""
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.var_name)).string pagedata = self.body.find('div', {'id': 'pagedata'})['data-blob']
return self.js_data # Add pagedata to the list of JSON strings
self.json_data.append(pagedata)
def extract_data(self, js: str) -> str: def get_js(self):
"""Get <script> element containing the data we need and return the raw JS"""
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.target)).string
self.extract_data(self.js_data)
def extract_data(self, js: str):
"""Extract values from JS dictionary """Extract values from JS dictionary
:param js: Raw JS :param js: Raw JS
:return: Contents of dictionary as str
""" """
self.js_data = self.regex.search(js).group().replace('" + "', '') + "}" self.js_data = self.regex.search(js).group().replace('" + "', '') + "}"
return self.js_data
def js_to_json(self) -> str: def js_to_json(self):
"""Convert JavaScript dictionary to JSON """Convert JavaScript dictionary to JSON"""
self.get_js()
:return: JSON as str
"""
js = self.get_js()
data = self.extract_data(js)
# Decode with demjson first to reformat keys and lists # Decode with demjson first to reformat keys and lists
js_data = demjson.decode(data) decoded_js = demjson.decode(self.js_data)
# Encode to make valid JSON # Encode to make valid JSON, add to list of JSON strings
js_data = demjson.encode(js_data) self.json_data.append(demjson.encode(decoded_js))
return js_data

View File

@ -10,7 +10,7 @@ here = path.abspath(path.dirname(__file__))
setup( setup(
name='bandcamp-downloader', name='bandcamp-downloader',
version='0.0.7-08', version='0.0.7-09',
description='bandcamp-dl downloads albums and tracks from Bandcamp for you', description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
long_description=open('README.rst').read(), long_description=open('README.rst').read(),
url='https://github.com/iheanyi/bandcamp-dl', url='https://github.com/iheanyi/bandcamp-dl',
@ -28,7 +28,7 @@ setup(
'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.6',
], ],
keywords=['bandcamp', 'downloader', 'music', 'cli', 'albums', 'dl'], keywords=['bandcamp', 'downloader', 'music', 'cli', 'albums', 'dl'],
packages=find_packages(), packages=find_packages(exclude=['tests']),
install_requires=[ install_requires=[
'beautifulsoup4', 'beautifulsoup4',
'demjson', 'demjson',
@ -41,7 +41,7 @@ setup(
], ],
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
'bandcamp-dl=bandcamp_dl.bandcamp_dl:main', 'bandcamp-dl=bandcamp_dl.__main__:main',
], ],
}, },
) )