Refactored BandcampJSON, Added --group option, readability changes.

BandcampJSON now returns a list of JSON strings, Album data, Embed data,
and Page data and is only called once.

Added a `--group` option to insert a group tag, currently this attempts
to use the artist/album Label.

Made some small readability changes for future work, adjusted the
imports and filenames for the last time.
master
Anthony Forsberg 2017-02-17 22:09:17 -05:00
parent 737fd8256e
commit 098ae8c6c7
5 changed files with 102 additions and 83 deletions

15
bandcamp_dl/bandcamp_dl.py → bandcamp_dl/__main__.py Executable file → Normal file
View File

@ -7,6 +7,8 @@ Usage:
(<url> | --artist=<artist> --album=<album>)
[--overwrite]
[--no-art]
[--embed-lyrics]
[--group]
bandcamp-dl (-h | --help)
bandcamp-dl (--version)
@ -21,6 +23,8 @@ Options:
-f --full-album Download only if all tracks are available.
-o --overwrite Overwrite tracks that already exist. Default is False.
-n --no-art Skip grabbing album art
-e --embed-lyrics Embed track lyrics (If available)
-g --group Use album/track Label as iTunes grouping
"""
"""
Coded by:
@ -45,13 +49,15 @@ Iheanyi:
import os
import ast
from docopt import docopt
from .bandcamp import Bandcamp
from .bandcampdownloader import BandcampDownloader
from bandcamp_dl.bandcamp import Bandcamp
from bandcamp_dl.bandcampdownloader import BandcampDownloader
def main():
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-06')
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-09')
bandcamp = Bandcamp()
basedir = arguments['--base-dir'] or os.getcwd()
@ -81,7 +87,8 @@ def main():
elif arguments['--full-album'] and not album['full']:
print("Full album not available. Skipping...")
else:
bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite'])
bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite'],
arguments['--embed-lyrics'], arguments['--group'])
bandcamp_downloader.start(album)
if __name__ == '__main__':

View File

@ -1,9 +1,11 @@
from .bandcampjson import BandcampJSON
from datetime import datetime as dt
import json
import requests
from bs4 import BeautifulSoup
from bs4 import FeatureNotFound
from datetime import datetime
import requests
import json
from bandcamp_dl.bandcampjson import BandcampJSON
class Bandcamp:
@ -15,34 +17,44 @@ class Bandcamp:
:return: album metadata
"""
try:
r = requests.get(url)
response = requests.get(url)
except requests.exceptions.MissingSchema:
return None
try:
self.soup = BeautifulSoup(r.text, "lxml")
self.soup = BeautifulSoup(response.text, "lxml")
except FeatureNotFound:
self.soup = BeautifulSoup(r.text, "html.parser")
self.soup = BeautifulSoup(response.text, "html.parser")
self.generate_album_json()
self.tracks = self.tralbum_data_json['trackinfo']
bandcamp_json = BandcampJSON(self.soup).generate()
album_json = json.loads(bandcamp_json[0])
embed_json = json.loads(bandcamp_json[1])
page_json = json.loads(bandcamp_json[2])
album_release = self.tralbum_data_json['album_release_date']
self.tracks = album_json['trackinfo']
album_release = album_json['album_release_date']
if album_release is None:
album_release = self.tralbum_data_json['current']['release_date']
album_release = album_json['current']['release_date']
try:
album_title = self.embed_data_json['album_title']
album_title = embed_json['album_title']
except KeyError:
album_title = self.tralbum_data_json['trackinfo'][0]['title']
album_title = album_json['trackinfo'][0]['title']
try:
label = page_json['item_sellers']['{}'.format(album_json['current']['selling_band_id'])]['name']
except KeyError:
label = None
album = {
"tracks": [],
"title": album_title,
"artist": self.embed_data_json['artist'],
"artist": embed_json['artist'],
"label": label,
"full": False,
"art": "",
"date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
"date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year)
}
for track in self.tracks:
@ -56,7 +68,6 @@ class Bandcamp:
return album
# Possibly redundant now, we skip unavailable tracks.
def all_tracks_available(self) -> bool:
"""Verify that all tracks have a url
@ -85,27 +96,14 @@ class Bandcamp:
track_metadata['url'] = "http:" + track['file']['mp3-128']
else:
track_metadata['url'] = None
if track['has_lyrics'] is not False:
if track['lyrics'] is None:
track['lyrics'] = "lyrics unavailable"
track_metadata['lyrics'] = track['lyrics'].replace('\\r\\n', '\n')
return track_metadata
def generate_album_json(self):
"""Retrieve JavaScript dictionaries from page and generate JSON
:return: True if successful
"""
try:
embed = BandcampJSON(self.soup, "EmbedData")
tralbum = BandcampJSON(self.soup, "TralbumData")
embed_data = embed.js_to_json()
tralbum_data = tralbum.js_to_json()
self.embed_data_json = json.loads(embed_data)
self.tralbum_data_json = json.loads(tralbum_data)
except Exception as e:
print(e)
return None
return True
@staticmethod
def generate_album_url(artist: str, album: str) -> str:
"""Generate an album url based on the artist and album name

View File

@ -1,22 +1,20 @@
import os
import sys
import requests
from mutagen.mp3 import MP3
from mutagen.mp3 import MP3, EasyMP3
from mutagen.id3._frames import TIT1
from mutagen.id3._frames import TIT2
from mutagen.easyid3 import EasyID3
from mutagen.id3._frames import USLT
from slugify import slugify
if not sys.version_info[:2] == (3, 6):
import mock
from .utils import requests_patch
# DEBUG
# import logging
# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
from bandcamp_dl.utils import requests_patch
class BandcampDownloader:
def __init__(self, urls=None, template=None, directory=None, overwrite=False):
def __init__(self, urls=None, template=None, directory=None, overwrite=False, lyrics=None, grouping=None):
"""Initialize variables we will need throughout the Class
:param urls: list of urls
@ -24,7 +22,7 @@ class BandcampDownloader:
:param directory: download location
:param overwrite: if True overwrite existing files
"""
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-06 (https://github.com/iheanyi/bandcamp-dl)'}
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-09 (https://github.com/iheanyi/bandcamp-dl)'}
self.session = requests.Session()
if type(urls) is str:
@ -34,6 +32,8 @@ class BandcampDownloader:
self.template = template
self.directory = directory
self.overwrite = overwrite
self.lyrics = lyrics
self.grouping = grouping
def start(self, album: dict):
"""Start album download process
@ -91,12 +91,16 @@ class BandcampDownloader:
for track_index, track in enumerate(album['tracks']):
track_meta = {
"artist": album['artist'],
"label": album['label'],
"album": album['title'],
"title": track['title'],
"track": track['track'],
"date": album['date']
}
if 'lyrics' in track.keys() and self.lyrics is not False:
track_meta['lyrics'] = track['lyrics']
self.num_tracks = len(album['tracks'])
self.track_num = track_index + 1
@ -186,16 +190,23 @@ class BandcampDownloader:
sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
audio = MP3(filepath)
audio.delete()
audio.tags = None
audio["TIT2"] = TIT2(encoding=3, text=["title"])
audio.save(filename=None, v1=2)
audio = EasyID3(filepath)
audio = MP3(filepath)
if self.grouping and meta["label"]:
audio["TIT1"] = TIT1(encoding=3, text=meta["label"])
if self.lyrics:
audio["USLT"] = USLT(encoding=3, lang='eng', desc='', text=meta['lyrics'])
audio.save()
audio = EasyMP3(filepath)
audio["tracknumber"] = meta['track']
audio["title"] = meta['title']
audio["title"] = meta["title"]
audio["artist"] = meta['artist']
audio["album"] = meta['album']
audio["date"] = meta['date']
audio["date"] = meta["date"]
audio.save()
os.rename(filepath, filepath[:-4])

View File

@ -1,44 +1,47 @@
import demjson
import re
"""TODO
import demjson
More in-depth error messages
"""
class BandcampJSON:
def __init__(self, body, var_name: str, js_data=None):
def __init__(self, body):
self.body = body
self.var_name = var_name
self.js_data = js_data
self.regex = re.compile(r"(?<=var\s" + var_name + "\s=\s).*?(?=};)", re.DOTALL)
self.targets = ['TralbumData', 'EmbedData', 'pagedata']
self.json_data = []
def get_js(self) -> str:
"""Get <script> element containing the data we need and return the raw JS
def generate(self) -> list:
"""Iterate through targets grabbing needed data"""
for target in self.targets:
if target[:4] == 'page':
self.get_pagedata()
else:
self.regex = re.compile(r"(?<=var\s" + target + "\s=\s).*?(?=};)", re.DOTALL)
self.target = target
self.js_to_json()
return self.json_data
:return js_data: Raw JS as str
"""
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.var_name)).string
return self.js_data
def get_pagedata(self):
"""Grab bandcamp pagedata JSON"""
pagedata = self.body.find('div', {'id': 'pagedata'})['data-blob']
# Add pagedata to the list of JSON strings
self.json_data.append(pagedata)
def extract_data(self, js: str) -> str:
def get_js(self):
"""Get <script> element containing the data we need and return the raw JS"""
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.target)).string
self.extract_data(self.js_data)
def extract_data(self, js: str):
"""Extract values from JS dictionary
:param js: Raw JS
:return: Contents of dictionary as str
"""
self.js_data = self.regex.search(js).group().replace('" + "', '') + "}"
return self.js_data
def js_to_json(self) -> str:
"""Convert JavaScript dictionary to JSON
:return: JSON as str
"""
js = self.get_js()
data = self.extract_data(js)
def js_to_json(self):
"""Convert JavaScript dictionary to JSON"""
self.get_js()
# Decode with demjson first to reformat keys and lists
js_data = demjson.decode(data)
# Encode to make valid JSON
js_data = demjson.encode(js_data)
return js_data
decoded_js = demjson.decode(self.js_data)
# Encode to make valid JSON, add to list of JSON strings
self.json_data.append(demjson.encode(decoded_js))

View File

@ -10,7 +10,7 @@ here = path.abspath(path.dirname(__file__))
setup(
name='bandcamp-downloader',
version='0.0.7-08',
version='0.0.7-09',
description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
long_description=open('README.rst').read(),
url='https://github.com/iheanyi/bandcamp-dl',
@ -28,7 +28,7 @@ setup(
'Programming Language :: Python :: 3.6',
],
keywords=['bandcamp', 'downloader', 'music', 'cli', 'albums', 'dl'],
packages=find_packages(),
packages=find_packages(exclude=['tests']),
install_requires=[
'beautifulsoup4',
'demjson',
@ -41,7 +41,7 @@ setup(
],
entry_points={
'console_scripts': [
'bandcamp-dl=bandcamp_dl.bandcamp_dl:main',
'bandcamp-dl=bandcamp_dl.__main__:main',
],
},
)