Refactored BandcampJSON, Added --group option, readability changes.
BandcampJSON now returns a list of JSON strings, Album data, Embed data, and Page data and is only called once. Added a `--group` option to insert a group tag, currently this attempts to use the artist/album Label. Made some small readability changes for future work, adjusted the imports and filenames for the last time.master
parent
737fd8256e
commit
098ae8c6c7
|
@ -7,6 +7,8 @@ Usage:
|
|||
(<url> | --artist=<artist> --album=<album>)
|
||||
[--overwrite]
|
||||
[--no-art]
|
||||
[--embed-lyrics]
|
||||
[--group]
|
||||
bandcamp-dl (-h | --help)
|
||||
bandcamp-dl (--version)
|
||||
|
||||
|
@ -21,6 +23,8 @@ Options:
|
|||
-f --full-album Download only if all tracks are available.
|
||||
-o --overwrite Overwrite tracks that already exist. Default is False.
|
||||
-n --no-art Skip grabbing album art
|
||||
-e --embed-lyrics Embed track lyrics (If available)
|
||||
-g --group Use album/track Label as iTunes grouping
|
||||
"""
|
||||
"""
|
||||
Coded by:
|
||||
|
@ -45,13 +49,15 @@ Iheanyi:
|
|||
|
||||
import os
|
||||
import ast
|
||||
|
||||
from docopt import docopt
|
||||
from .bandcamp import Bandcamp
|
||||
from .bandcampdownloader import BandcampDownloader
|
||||
|
||||
from bandcamp_dl.bandcamp import Bandcamp
|
||||
from bandcamp_dl.bandcampdownloader import BandcampDownloader
|
||||
|
||||
|
||||
def main():
|
||||
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-06')
|
||||
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7-09')
|
||||
bandcamp = Bandcamp()
|
||||
|
||||
basedir = arguments['--base-dir'] or os.getcwd()
|
||||
|
@ -81,7 +87,8 @@ def main():
|
|||
elif arguments['--full-album'] and not album['full']:
|
||||
print("Full album not available. Skipping...")
|
||||
else:
|
||||
bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite'])
|
||||
bandcamp_downloader = BandcampDownloader(url, arguments['--template'], basedir, arguments['--overwrite'],
|
||||
arguments['--embed-lyrics'], arguments['--group'])
|
||||
bandcamp_downloader.start(album)
|
||||
|
||||
if __name__ == '__main__':
|
|
@ -1,9 +1,11 @@
|
|||
from .bandcampjson import BandcampJSON
|
||||
from datetime import datetime as dt
|
||||
import json
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import FeatureNotFound
|
||||
from datetime import datetime
|
||||
import requests
|
||||
import json
|
||||
|
||||
from bandcamp_dl.bandcampjson import BandcampJSON
|
||||
|
||||
|
||||
class Bandcamp:
|
||||
|
@ -15,34 +17,44 @@ class Bandcamp:
|
|||
:return: album metadata
|
||||
"""
|
||||
try:
|
||||
r = requests.get(url)
|
||||
response = requests.get(url)
|
||||
except requests.exceptions.MissingSchema:
|
||||
return None
|
||||
|
||||
try:
|
||||
self.soup = BeautifulSoup(r.text, "lxml")
|
||||
self.soup = BeautifulSoup(response.text, "lxml")
|
||||
except FeatureNotFound:
|
||||
self.soup = BeautifulSoup(r.text, "html.parser")
|
||||
self.soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
self.generate_album_json()
|
||||
self.tracks = self.tralbum_data_json['trackinfo']
|
||||
bandcamp_json = BandcampJSON(self.soup).generate()
|
||||
album_json = json.loads(bandcamp_json[0])
|
||||
embed_json = json.loads(bandcamp_json[1])
|
||||
page_json = json.loads(bandcamp_json[2])
|
||||
|
||||
album_release = self.tralbum_data_json['album_release_date']
|
||||
self.tracks = album_json['trackinfo']
|
||||
|
||||
album_release = album_json['album_release_date']
|
||||
if album_release is None:
|
||||
album_release = self.tralbum_data_json['current']['release_date']
|
||||
album_release = album_json['current']['release_date']
|
||||
|
||||
try:
|
||||
album_title = self.embed_data_json['album_title']
|
||||
album_title = embed_json['album_title']
|
||||
except KeyError:
|
||||
album_title = self.tralbum_data_json['trackinfo'][0]['title']
|
||||
album_title = album_json['trackinfo'][0]['title']
|
||||
|
||||
try:
|
||||
label = page_json['item_sellers']['{}'.format(album_json['current']['selling_band_id'])]['name']
|
||||
except KeyError:
|
||||
label = None
|
||||
|
||||
album = {
|
||||
"tracks": [],
|
||||
"title": album_title,
|
||||
"artist": self.embed_data_json['artist'],
|
||||
"artist": embed_json['artist'],
|
||||
"label": label,
|
||||
"full": False,
|
||||
"art": "",
|
||||
"date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
|
||||
"date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year)
|
||||
}
|
||||
|
||||
for track in self.tracks:
|
||||
|
@ -56,7 +68,6 @@ class Bandcamp:
|
|||
|
||||
return album
|
||||
|
||||
# Possibly redundant now, we skip unavailable tracks.
|
||||
def all_tracks_available(self) -> bool:
|
||||
"""Verify that all tracks have a url
|
||||
|
||||
|
@ -85,27 +96,14 @@ class Bandcamp:
|
|||
track_metadata['url'] = "http:" + track['file']['mp3-128']
|
||||
else:
|
||||
track_metadata['url'] = None
|
||||
|
||||
if track['has_lyrics'] is not False:
|
||||
if track['lyrics'] is None:
|
||||
track['lyrics'] = "lyrics unavailable"
|
||||
track_metadata['lyrics'] = track['lyrics'].replace('\\r\\n', '\n')
|
||||
|
||||
return track_metadata
|
||||
|
||||
def generate_album_json(self):
|
||||
"""Retrieve JavaScript dictionaries from page and generate JSON
|
||||
|
||||
:return: True if successful
|
||||
"""
|
||||
try:
|
||||
embed = BandcampJSON(self.soup, "EmbedData")
|
||||
tralbum = BandcampJSON(self.soup, "TralbumData")
|
||||
|
||||
embed_data = embed.js_to_json()
|
||||
tralbum_data = tralbum.js_to_json()
|
||||
|
||||
self.embed_data_json = json.loads(embed_data)
|
||||
self.tralbum_data_json = json.loads(tralbum_data)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return None
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def generate_album_url(artist: str, album: str) -> str:
|
||||
"""Generate an album url based on the artist and album name
|
||||
|
|
|
@ -1,22 +1,20 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
import requests
|
||||
from mutagen.mp3 import MP3
|
||||
from mutagen.mp3 import MP3, EasyMP3
|
||||
from mutagen.id3._frames import TIT1
|
||||
from mutagen.id3._frames import TIT2
|
||||
from mutagen.easyid3 import EasyID3
|
||||
from mutagen.id3._frames import USLT
|
||||
from slugify import slugify
|
||||
|
||||
if not sys.version_info[:2] == (3, 6):
|
||||
import mock
|
||||
from .utils import requests_patch
|
||||
|
||||
# DEBUG
|
||||
# import logging
|
||||
# logging.basicConfig(filename='bandcamp-dl.log', level=logging.INFO)
|
||||
from bandcamp_dl.utils import requests_patch
|
||||
|
||||
|
||||
class BandcampDownloader:
|
||||
def __init__(self, urls=None, template=None, directory=None, overwrite=False):
|
||||
def __init__(self, urls=None, template=None, directory=None, overwrite=False, lyrics=None, grouping=None):
|
||||
"""Initialize variables we will need throughout the Class
|
||||
|
||||
:param urls: list of urls
|
||||
|
@ -24,7 +22,7 @@ class BandcampDownloader:
|
|||
:param directory: download location
|
||||
:param overwrite: if True overwrite existing files
|
||||
"""
|
||||
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-06 (https://github.com/iheanyi/bandcamp-dl)'}
|
||||
self.headers = {'user_agent': 'bandcamp-dl/0.0.7-09 (https://github.com/iheanyi/bandcamp-dl)'}
|
||||
self.session = requests.Session()
|
||||
|
||||
if type(urls) is str:
|
||||
|
@ -34,6 +32,8 @@ class BandcampDownloader:
|
|||
self.template = template
|
||||
self.directory = directory
|
||||
self.overwrite = overwrite
|
||||
self.lyrics = lyrics
|
||||
self.grouping = grouping
|
||||
|
||||
def start(self, album: dict):
|
||||
"""Start album download process
|
||||
|
@ -91,12 +91,16 @@ class BandcampDownloader:
|
|||
for track_index, track in enumerate(album['tracks']):
|
||||
track_meta = {
|
||||
"artist": album['artist'],
|
||||
"label": album['label'],
|
||||
"album": album['title'],
|
||||
"title": track['title'],
|
||||
"track": track['track'],
|
||||
"date": album['date']
|
||||
}
|
||||
|
||||
if 'lyrics' in track.keys() and self.lyrics is not False:
|
||||
track_meta['lyrics'] = track['lyrics']
|
||||
|
||||
self.num_tracks = len(album['tracks'])
|
||||
self.track_num = track_index + 1
|
||||
|
||||
|
@ -186,16 +190,23 @@ class BandcampDownloader:
|
|||
sys.stdout.write("\r({}/{}) [{}] :: Encoding: {}".format(self.track_num, self.num_tracks, "=" * 50, filename))
|
||||
|
||||
audio = MP3(filepath)
|
||||
audio.delete()
|
||||
audio.tags = None
|
||||
audio["TIT2"] = TIT2(encoding=3, text=["title"])
|
||||
audio.save(filename=None, v1=2)
|
||||
|
||||
audio = EasyID3(filepath)
|
||||
audio = MP3(filepath)
|
||||
if self.grouping and meta["label"]:
|
||||
audio["TIT1"] = TIT1(encoding=3, text=meta["label"])
|
||||
if self.lyrics:
|
||||
audio["USLT"] = USLT(encoding=3, lang='eng', desc='', text=meta['lyrics'])
|
||||
audio.save()
|
||||
|
||||
audio = EasyMP3(filepath)
|
||||
audio["tracknumber"] = meta['track']
|
||||
audio["title"] = meta['title']
|
||||
audio["title"] = meta["title"]
|
||||
audio["artist"] = meta['artist']
|
||||
audio["album"] = meta['album']
|
||||
audio["date"] = meta['date']
|
||||
audio["date"] = meta["date"]
|
||||
audio.save()
|
||||
|
||||
os.rename(filepath, filepath[:-4])
|
||||
|
|
|
@ -1,44 +1,47 @@
|
|||
import demjson
|
||||
import re
|
||||
|
||||
"""TODO
|
||||
import demjson
|
||||
|
||||
More in-depth error messages
|
||||
"""
|
||||
|
||||
class BandcampJSON:
|
||||
def __init__(self, body, var_name: str, js_data=None):
|
||||
def __init__(self, body):
|
||||
self.body = body
|
||||
self.var_name = var_name
|
||||
self.js_data = js_data
|
||||
self.regex = re.compile(r"(?<=var\s" + var_name + "\s=\s).*?(?=};)", re.DOTALL)
|
||||
self.targets = ['TralbumData', 'EmbedData', 'pagedata']
|
||||
self.json_data = []
|
||||
|
||||
def get_js(self) -> str:
|
||||
"""Get <script> element containing the data we need and return the raw JS
|
||||
def generate(self) -> list:
|
||||
"""Iterate through targets grabbing needed data"""
|
||||
for target in self.targets:
|
||||
if target[:4] == 'page':
|
||||
self.get_pagedata()
|
||||
else:
|
||||
self.regex = re.compile(r"(?<=var\s" + target + "\s=\s).*?(?=};)", re.DOTALL)
|
||||
self.target = target
|
||||
self.js_to_json()
|
||||
return self.json_data
|
||||
|
||||
:return js_data: Raw JS as str
|
||||
"""
|
||||
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.var_name)).string
|
||||
return self.js_data
|
||||
def get_pagedata(self):
|
||||
"""Grab bandcamp pagedata JSON"""
|
||||
pagedata = self.body.find('div', {'id': 'pagedata'})['data-blob']
|
||||
# Add pagedata to the list of JSON strings
|
||||
self.json_data.append(pagedata)
|
||||
|
||||
def extract_data(self, js: str) -> str:
|
||||
def get_js(self):
|
||||
"""Get <script> element containing the data we need and return the raw JS"""
|
||||
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.target)).string
|
||||
self.extract_data(self.js_data)
|
||||
|
||||
def extract_data(self, js: str):
|
||||
"""Extract values from JS dictionary
|
||||
|
||||
:param js: Raw JS
|
||||
:return: Contents of dictionary as str
|
||||
"""
|
||||
self.js_data = self.regex.search(js).group().replace('" + "', '') + "}"
|
||||
return self.js_data
|
||||
|
||||
def js_to_json(self) -> str:
|
||||
"""Convert JavaScript dictionary to JSON
|
||||
|
||||
:return: JSON as str
|
||||
"""
|
||||
js = self.get_js()
|
||||
data = self.extract_data(js)
|
||||
def js_to_json(self):
|
||||
"""Convert JavaScript dictionary to JSON"""
|
||||
self.get_js()
|
||||
# Decode with demjson first to reformat keys and lists
|
||||
js_data = demjson.decode(data)
|
||||
# Encode to make valid JSON
|
||||
js_data = demjson.encode(js_data)
|
||||
return js_data
|
||||
decoded_js = demjson.decode(self.js_data)
|
||||
# Encode to make valid JSON, add to list of JSON strings
|
||||
self.json_data.append(demjson.encode(decoded_js))
|
||||
|
|
6
setup.py
6
setup.py
|
@ -10,7 +10,7 @@ here = path.abspath(path.dirname(__file__))
|
|||
|
||||
setup(
|
||||
name='bandcamp-downloader',
|
||||
version='0.0.7-08',
|
||||
version='0.0.7-09',
|
||||
description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
|
||||
long_description=open('README.rst').read(),
|
||||
url='https://github.com/iheanyi/bandcamp-dl',
|
||||
|
@ -28,7 +28,7 @@ setup(
|
|||
'Programming Language :: Python :: 3.6',
|
||||
],
|
||||
keywords=['bandcamp', 'downloader', 'music', 'cli', 'albums', 'dl'],
|
||||
packages=find_packages(),
|
||||
packages=find_packages(exclude=['tests']),
|
||||
install_requires=[
|
||||
'beautifulsoup4',
|
||||
'demjson',
|
||||
|
@ -41,7 +41,7 @@ setup(
|
|||
],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'bandcamp-dl=bandcamp_dl.bandcamp_dl:main',
|
||||
'bandcamp-dl=bandcamp_dl.__main__:main',
|
||||
],
|
||||
},
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue