Refactored BandcampJSON, Added --group option, readability changes.
BandcampJSON now returns a list of JSON strings, Album data, Embed data, and Page data and is only called once. Added a `--group` option to insert a group tag, currently this attempts to use the artist/album Label. Made some small readability changes for future work, adjusted the imports and filenames for the last time.master
parent
737fd8256e
commit
098ae8c6c7
|
@ -7,6 +7,8 @@ Usage:
|
||||||
(<url> | --artist=<artist> --album=<album>)
|
(<url> | --artist=<artist> --album=<album>)
|
||||||
[--overwrite]
|
[--overwrite]
|
||||||
[--no-art]
|
[--no-art]
|
||||||
|
[--embed-lyrics]
|
||||||
|
[--group]
|
||||||
bandcamp-dl (-h | --help)
|
bandcamp-dl (-h | --help)
|
||||||
bandcamp-dl (--version)
|
bandcamp-dl (--version)
|
||||||
|
|
||||||
|
@ -21,6 +23,8 @@ Options:
|
||||||
-f --full-album Download only if all tracks are available.
|
-f --full-album Download only if all tracks are available.
|
||||||
-o --overwrite Overwrite tracks that already exist. Default is False.
|
-o --overwrite Overwrite tracks that already exist. Default is False.
|
||||||
-n --no-art Skip grabbing album art
|
-n --no-art Skip grabbing album art
|
||||||
|
-e --embed-lyrics Embed track lyrics (If available)
|
||||||
|
-g --group Use album/track Label as iTunes grouping
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
Coded by:
|
Coded by:
|
||||||
|
@ -45,13 +49,15 @@ Iheanyi:
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import ast
|
import ast
|
||||||
|
|
||||||
from docopt import docopt
|
from docopt import docopt
|
||||||
from .bandcamp import Bandcamp
|
|
||||||
from .bandcampdownloader import BandcampDownloader
|
from bandcamp_dl.bandcamp import Bandcamp
|
||||||
|
from bandcamp_dl.bandcampdownloader import BandcampDownloader
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-06')
|
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-09')
|
||||||
bandcamp = Bandcamp()
|
bandcamp = Bandcamp()
|
||||||
|
|
||||||
basedir = arguments['--base-dir'] or os.getcwd()
|
basedir = arguments['--base-dir'] or os.getcwd()
|
||||||
|
@ -81,7 +87,8 @@ def main():
|
||||||
elif arguments['--full-album'] and not album['full']:
|
elif arguments['--full-album'] and not album['full']:
|
||||||
print("Full album not available. Skipping...")
|
print("Full album not available. Skipping...")
|
||||||
else:
|
else:
|
||||||
bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite'])
|
bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite'],
|
||||||
|
arguments['--embed-lyrics'], arguments['--group'])
|
||||||
bandcamp_downloader.start(album)
|
bandcamp_downloader.start(album)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
|
@ -1,9 +1,11 @@
|
||||||
from .bandcampjson import BandcampJSON
|
from datetime import datetime as dt
|
||||||
|
import json
|
||||||
|
|
||||||
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4 import FeatureNotFound
|
from bs4 import FeatureNotFound
|
||||||
from datetime import datetime
|
|
||||||
import requests
|
from bandcamp_dl.bandcampjson import BandcampJSON
|
||||||
import json
|
|
||||||
|
|
||||||
|
|
||||||
class Bandcamp:
|
class Bandcamp:
|
||||||
|
@ -15,34 +17,44 @@ class Bandcamp:
|
||||||
:return: album metadata
|
:return: album metadata
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
r = requests.get(url)
|
response = requests.get(url)
|
||||||
except requests.exceptions.MissingSchema:
|
except requests.exceptions.MissingSchema:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.soup = BeautifulSoup(r.text, "lxml")
|
self.soup = BeautifulSoup(response.text, "lxml")
|
||||||
except FeatureNotFound:
|
except FeatureNotFound:
|
||||||
self.soup = BeautifulSoup(r.text, "html.parser")
|
self.soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
self.generate_album_json()
|
bandcamp_json = BandcampJSON(self.soup).generate()
|
||||||
self.tracks = self.tralbum_data_json['trackinfo']
|
album_json = json.loads(bandcamp_json[0])
|
||||||
|
embed_json = json.loads(bandcamp_json[1])
|
||||||
|
page_json = json.loads(bandcamp_json[2])
|
||||||
|
|
||||||
album_release = self.tralbum_data_json['album_release_date']
|
self.tracks = album_json['trackinfo']
|
||||||
|
|
||||||
|
album_release = album_json['album_release_date']
|
||||||
if album_release is None:
|
if album_release is None:
|
||||||
album_release = self.tralbum_data_json['current']['release_date']
|
album_release = album_json['current']['release_date']
|
||||||
|
|
||||||
try:
|
try:
|
||||||
album_title = self.embed_data_json['album_title']
|
album_title = embed_json['album_title']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
album_title = self.tralbum_data_json['trackinfo'][0]['title']
|
album_title = album_json['trackinfo'][0]['title']
|
||||||
|
|
||||||
|
try:
|
||||||
|
label = page_json['item_sellers']['{}'.format(album_json['current']['selling_band_id'])]['name']
|
||||||
|
except KeyError:
|
||||||
|
label = None
|
||||||
|
|
||||||
album = {
|
album = {
|
||||||
"tracks": [],
|
"tracks": [],
|
||||||
"title": album_title,
|
"title": album_title,
|
||||||
"artist": self.embed_data_json['artist'],
|
"artist": embed_json['artist'],
|
||||||
|
"label": label,
|
||||||
"full": False,
|
"full": False,
|
||||||
"art": "",
|
"art": "",
|
||||||
"date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
|
"date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year)
|
||||||
}
|
}
|
||||||
|
|
||||||
for track in self.tracks:
|
for track in self.tracks:
|
||||||
|
@ -56,7 +68,6 @@ class Bandcamp:
|
||||||
|
|
||||||
return album
|
return album
|
||||||
|
|
||||||
# Possibly redundant now, we skip unavailable tracks.
|
|
||||||
def all_tracks_available(self) -> bool:
|
def all_tracks_available(self) -> bool:
|
||||||
"""Verify that all tracks have a url
|
"""Verify that all tracks have a url
|
||||||
|
|
||||||
|
@ -85,27 +96,14 @@ class Bandcamp:
|
||||||
track_metadata['url'] = "http:" + track['file']['mp3-128']
|
track_metadata['url'] = "http:" + track['file']['mp3-128']
|
||||||
else:
|
else:
|
||||||
track_metadata['url'] = None
|
track_metadata['url'] = None
|
||||||
|
|
||||||
|
if track['has_lyrics'] is not False:
|
||||||
|
if track['lyrics'] is None:
|
||||||
|
track['lyrics'] = "lyrics unavailable"
|
||||||
|
track_metadata['lyrics'] = track['lyrics'].replace('\\r\\n', '\n')
|
||||||
|
|
||||||
return track_metadata
|
return track_metadata
|
||||||
|
|
||||||
def generate_album_json(self):
|
|
||||||
"""Retrieve JavaScript dictionaries from page and generate JSON
|
|
||||||
|
|
||||||
:return: True if successful
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
embed = BandcampJSON(self.soup, "EmbedData")
|
|
||||||
tralbum = BandcampJSON(self.soup, "TralbumData")
|
|
||||||
|
|
||||||
embed_data = embed.js_to_json()
|
|
||||||
tralbum_data = tralbum.js_to_json()
|
|
||||||
|
|
||||||
self.embed_data_json = json.loads(embed_data)
|
|
||||||
self.tralbum_data_json = json.loads(tralbum_data)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
return None
|
|
||||||
return True
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def generate_album_url(artist: str, album: str) -> str:
|
def generate_album_url(artist: str, album: str) -> str:
|
||||||
"""Generate an album url based on the artist and album name
|
"""Generate an album url based on the artist and album name
|
||||||
|
|
|
@ -1,22 +1,20 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from mutagen.mp3 import MP3
|
from mutagen.mp3 import MP3, EasyMP3
|
||||||
|
from mutagen.id3._frames import TIT1
|
||||||
from mutagen.id3._frames import TIT2
|
from mutagen.id3._frames import TIT2
|
||||||
from mutagen.easyid3 import EasyID3
|
from mutagen.id3._frames import USLT
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
|
||||||
if not sys.version_info[:2] == (3, 6):
|
if not sys.version_info[:2] == (3, 6):
|
||||||
import mock
|
import mock
|
||||||
from .utils import requests_patch
|
from bandcamp_dl.utils import requests_patch
|
||||||
|
|
||||||
# DEBUG
|
|
||||||
# import logging
|
|
||||||
# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
|
|
||||||
|
|
||||||
|
|
||||||
class BandcampDownloader:
|
class BandcampDownloader:
|
||||||
def __init__(self, urls=None, template=None, directory=None, overwrite=False):
|
def __init__(self, urls=None, template=None, directory=None, overwrite=False, lyrics=None, grouping=None):
|
||||||
"""Initialize variables we will need throughout the Class
|
"""Initialize variables we will need throughout the Class
|
||||||
|
|
||||||
:param urls: list of urls
|
:param urls: list of urls
|
||||||
|
@ -24,7 +22,7 @@ class BandcampDownloader:
|
||||||
:param directory: download location
|
:param directory: download location
|
||||||
:param overwrite: if True overwrite existing files
|
:param overwrite: if True overwrite existing files
|
||||||
"""
|
"""
|
||||||
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-06 (https://github.com/iheanyi/bandcamp-dl)'}
|
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-09 (https://github.com/iheanyi/bandcamp-dl)'}
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
|
|
||||||
if type(urls) is str:
|
if type(urls) is str:
|
||||||
|
@ -34,6 +32,8 @@ class BandcampDownloader:
|
||||||
self.template = template
|
self.template = template
|
||||||
self.directory = directory
|
self.directory = directory
|
||||||
self.overwrite = overwrite
|
self.overwrite = overwrite
|
||||||
|
self.lyrics = lyrics
|
||||||
|
self.grouping = grouping
|
||||||
|
|
||||||
def start(self, album: dict):
|
def start(self, album: dict):
|
||||||
"""Start album download process
|
"""Start album download process
|
||||||
|
@ -91,12 +91,16 @@ class BandcampDownloader:
|
||||||
for track_index, track in enumerate(album['tracks']):
|
for track_index, track in enumerate(album['tracks']):
|
||||||
track_meta = {
|
track_meta = {
|
||||||
"artist": album['artist'],
|
"artist": album['artist'],
|
||||||
|
"label": album['label'],
|
||||||
"album": album['title'],
|
"album": album['title'],
|
||||||
"title": track['title'],
|
"title": track['title'],
|
||||||
"track": track['track'],
|
"track": track['track'],
|
||||||
"date": album['date']
|
"date": album['date']
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if 'lyrics' in track.keys() and self.lyrics is not False:
|
||||||
|
track_meta['lyrics'] = track['lyrics']
|
||||||
|
|
||||||
self.num_tracks = len(album['tracks'])
|
self.num_tracks = len(album['tracks'])
|
||||||
self.track_num = track_index + 1
|
self.track_num = track_index + 1
|
||||||
|
|
||||||
|
@ -186,16 +190,23 @@ class BandcampDownloader:
|
||||||
sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
|
sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
|
||||||
|
|
||||||
audio = MP3(filepath)
|
audio = MP3(filepath)
|
||||||
audio.delete()
|
audio.tags = None
|
||||||
audio["TIT2"] = TIT2(encoding=3, text=["title"])
|
audio["TIT2"] = TIT2(encoding=3, text=["title"])
|
||||||
audio.save(filename=None, v1=2)
|
audio.save(filename=None, v1=2)
|
||||||
|
|
||||||
audio = EasyID3(filepath)
|
audio = MP3(filepath)
|
||||||
|
if self.grouping and meta["label"]:
|
||||||
|
audio["TIT1"] = TIT1(encoding=3, text=meta["label"])
|
||||||
|
if self.lyrics:
|
||||||
|
audio["USLT"] = USLT(encoding=3, lang='eng', desc='', text=meta['lyrics'])
|
||||||
|
audio.save()
|
||||||
|
|
||||||
|
audio = EasyMP3(filepath)
|
||||||
audio["tracknumber"] = meta['track']
|
audio["tracknumber"] = meta['track']
|
||||||
audio["title"] = meta['title']
|
audio["title"] = meta["title"]
|
||||||
audio["artist"] = meta['artist']
|
audio["artist"] = meta['artist']
|
||||||
audio["album"] = meta['album']
|
audio["album"] = meta['album']
|
||||||
audio["date"] = meta['date']
|
audio["date"] = meta["date"]
|
||||||
audio.save()
|
audio.save()
|
||||||
|
|
||||||
os.rename(filepath, filepath[:-4])
|
os.rename(filepath, filepath[:-4])
|
||||||
|
|
|
@ -1,44 +1,47 @@
|
||||||
import demjson
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
"""TODO
|
import demjson
|
||||||
|
|
||||||
More in-depth error messages
|
|
||||||
"""
|
|
||||||
|
|
||||||
class BandcampJSON:
|
class BandcampJSON:
|
||||||
def __init__(self, body, var_name: str, js_data=None):
|
def __init__(self, body):
|
||||||
self.body = body
|
self.body = body
|
||||||
self.var_name = var_name
|
self.targets = ['TralbumData', 'EmbedData', 'pagedata']
|
||||||
self.js_data = js_data
|
self.json_data = []
|
||||||
self.regex = re.compile(r"(?<=var\s" + var_name + "\s=\s).*?(?=};)", re.DOTALL)
|
|
||||||
|
|
||||||
def get_js(self) -> str:
|
def generate(self) -> list:
|
||||||
"""Get <script> element containing the data we need and return the raw JS
|
"""Iterate through targets grabbing needed data"""
|
||||||
|
for target in self.targets:
|
||||||
|
if target[:4] == 'page':
|
||||||
|
self.get_pagedata()
|
||||||
|
else:
|
||||||
|
self.regex = re.compile(r"(?<=var\s" + target + "\s=\s).*?(?=};)", re.DOTALL)
|
||||||
|
self.target = target
|
||||||
|
self.js_to_json()
|
||||||
|
return self.json_data
|
||||||
|
|
||||||
:return js_data: Raw JS as str
|
def get_pagedata(self):
|
||||||
"""
|
"""Grab bandcamp pagedata JSON"""
|
||||||
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.var_name)).string
|
pagedata = self.body.find('div', {'id': 'pagedata'})['data-blob']
|
||||||
return self.js_data
|
# Add pagedata to the list of JSON strings
|
||||||
|
self.json_data.append(pagedata)
|
||||||
|
|
||||||
def extract_data(self, js: str) -> str:
|
def get_js(self):
|
||||||
|
"""Get <script> element containing the data we need and return the raw JS"""
|
||||||
|
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.target)).string
|
||||||
|
self.extract_data(self.js_data)
|
||||||
|
|
||||||
|
def extract_data(self, js: str):
|
||||||
"""Extract values from JS dictionary
|
"""Extract values from JS dictionary
|
||||||
|
|
||||||
:param js: Raw JS
|
:param js: Raw JS
|
||||||
:return: Contents of dictionary as str
|
|
||||||
"""
|
"""
|
||||||
self.js_data = self.regex.search(js).group().replace('" + "', '') + "}"
|
self.js_data = self.regex.search(js).group().replace('" + "', '') + "}"
|
||||||
return self.js_data
|
|
||||||
|
|
||||||
def js_to_json(self) -> str:
|
def js_to_json(self):
|
||||||
"""Convert JavaScript dictionary to JSON
|
"""Convert JavaScript dictionary to JSON"""
|
||||||
|
self.get_js()
|
||||||
:return: JSON as str
|
|
||||||
"""
|
|
||||||
js = self.get_js()
|
|
||||||
data = self.extract_data(js)
|
|
||||||
# Decode with demjson first to reformat keys and lists
|
# Decode with demjson first to reformat keys and lists
|
||||||
js_data = demjson.decode(data)
|
decoded_js = demjson.decode(self.js_data)
|
||||||
# Encode to make valid JSON
|
# Encode to make valid JSON, add to list of JSON strings
|
||||||
js_data = demjson.encode(js_data)
|
self.json_data.append(demjson.encode(decoded_js))
|
||||||
return js_data
|
|
||||||
|
|
6
setup.py
6
setup.py
|
@ -10,7 +10,7 @@ here = path.abspath(path.dirname(__file__))
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='bandcamp-downloader',
|
name='bandcamp-downloader',
|
||||||
version='0.0.7-08',
|
version='0.0.7-09',
|
||||||
description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
|
description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
|
||||||
long_description=open('README.rst').read(),
|
long_description=open('README.rst').read(),
|
||||||
url='https://github.com/iheanyi/bandcamp-dl',
|
url='https://github.com/iheanyi/bandcamp-dl',
|
||||||
|
@ -28,7 +28,7 @@ setup(
|
||||||
'Programming Language :: Python :: 3.6',
|
'Programming Language :: Python :: 3.6',
|
||||||
],
|
],
|
||||||
keywords=['bandcamp', 'downloader', 'music', 'cli', 'albums', 'dl'],
|
keywords=['bandcamp', 'downloader', 'music', 'cli', 'albums', 'dl'],
|
||||||
packages=find_packages(),
|
packages=find_packages(exclude=['tests']),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'beautifulsoup4',
|
'beautifulsoup4',
|
||||||
'demjson',
|
'demjson',
|
||||||
|
@ -41,7 +41,7 @@ setup(
|
||||||
],
|
],
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'bandcamp-dl=bandcamp_dl.bandcamp_dl:main',
|
'bandcamp-dl=bandcamp_dl.__main__:main',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue