Revert "Preliminary 0.0.7 changes"

This reverts commit 8cc97905a7.
master
Anthony Forsberg 2017-01-04 14:08:56 -05:00
parent 8cc97905a7
commit d8ce58e66d
11 changed files with 194 additions and 183 deletions

1
.gitignore vendored
View File

@ -38,4 +38,3 @@ nosetests.xml
.pydevproject
*.iml
*.xml
bandcamp_dl/asyncdownloader.py

View File

@ -17,9 +17,3 @@ Version 0.0.6
- [Enhancement] Individual track downloads work now.
- [Bugfix] Fixed imports, now working when installed via pip.
- [Note] Last version to officially support Python 2.7.x
Version 0.0.7
-------------
- [Dependency] Slimit is no longer required
- [Dependency] Ply is no longer required
- [Dependency] demjson is now required

View File

@ -24,7 +24,7 @@ Description
===========
bandcamp-dl is a small command-line app to download audio from
BandCamp.com. It requires the Python interpreter, version 3.5.x and is
BandCamp.com. It requires the Python interpreter, version 2.7.x - 3.5.x and is
not platform specific. It is released to the public domain, which means
you can modify it, redistribute it or use it how ever you like.
@ -209,11 +209,14 @@ related to bandcamp-dl, by all means, go ahead and report the bug.
Dependencies
============
- `BeautifulSoup <https://pypi.python.org/pypi/beautifulsoup4>`_ - HTML Parsing
- `Demjson <https://pypi.python.org/pypi/demjson>`_- JavaScript dict to JSON conversion
- `BeautifulSoup <https://pypi.python.org/pypi/beautifulsoup4>`_ -
HTML Parsing
- `Mutagen <https://pypi.python.org/pypi/mutagen>`_ - ID3 Encoding
- `Requests <https://pypi.python.org/pypi/requests>`_ - for retriving the HTML
- `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ - A slug generator that turns strings into unicode slugs.
- `Requests <https://pypi.python.org/pypi/requests>`_ - for retriving
the HTML
- `Slimit <https://pypi.python.org/pypi/slimit>`_ - Javascript parsing
- `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ -
A slug generator that turns strings into unicode slugs.
Copyright
=========

View File

@ -1,120 +1,119 @@
from .bandcampjson import BandcampJSON
from bs4 import BeautifulSoup
from bs4 import FeatureNotFound
import requests
import json
from .jsobj import read_js_object
class Bandcamp:
def parse(self, url: str, art: bool=True) -> dict or None:
"""
Requests the page, cherry picks album info
:param url: album/track url
:param art: if True download album art
:return: album metadata
"""
def parse(self, url, no_art=True):
try:
r = requests.get(url)
except requests.exceptions.MissingSchema:
return None
self.no_art = no_art
if r.status_code is not 200:
return None
try:
self.soup = BeautifulSoup(r.text, "lxml")
except FeatureNotFound:
except:
self.soup = BeautifulSoup(r.text, "html.parser")
self.generate_album_json()
self.tracks = self.tralbum_data_json['trackinfo']
album = {
"tracks": [],
"title": self.embed_data_json['album_title'],
"artist": self.embed_data_json['artist'],
"title": "",
"artist": "",
"full": False,
"art": "",
"date": self.tralbum_data_json['album_release_date']
"date": ""
}
for track in self.tracks:
track = self.get_track_metadata(track)
album_meta = self.extract_album_meta_data(r)
album['artist'] = album_meta['artist']
album['title'] = album_meta['title']
album['date'] = album_meta['date']
for track in album_meta['tracks']:
track = self.get_track_meta_data(track)
album['tracks'].append(track)
album['full'] = self.all_tracks_available()
if art:
album['full'] = self.all_tracks_available(album)
if self.no_art:
album['art'] = self.get_album_art()
return album
def all_tracks_available(self) -> bool:
"""
Verify that all tracks have a url
:return: True if all urls accounted for
"""
for track in self.tracks:
if track['file']['mp3-128'] is None:
def all_tracks_available(self, album):
for track in album['tracks']:
if track['url'] is None:
return False
return True
@staticmethod
def get_track_metadata(track: dict) -> dict:
"""
Extract individual track metadata
def is_basestring(self, obj):
if isinstance(obj, str) or isinstance(obj, bytes) or isinstance(obj, bytearray):
return True
return False
:param track: track dict
:return: track metadata dict
"""
track_metadata = {
"duration": track['duration'],
"track": str(track['track_num']),
"title": track['title'],
"url": None
}
def get_track_meta_data(self, track):
new_track = {}
if not self.is_basestring(track['file']):
if 'mp3-128' in track['file']:
new_track['url'] = track['file']['mp3-128']
else:
new_track['url'] = None
if 'mp3-128' in track['file']:
track_metadata['url'] = "http:" + track['file']['mp3-128']
return track_metadata
new_track['duration'] = track['duration']
new_track['track'] = track['track_num']
new_track['title'] = track['title']
def generate_album_json(self):
"""
Retrieve JavaScript dictionaries from page and generate JSON
return new_track
:return: True if successful
"""
try:
embed = BandcampJSON(self.soup, "EmbedData")
tralbum = BandcampJSON(self.soup, "TralbumData")
def extract_album_meta_data(self, request):
album = {}
embed_data = embed.js_to_json()
tralbum_data = tralbum.js_to_json()
embedData = self.get_embed_string_block(request)
self.embed_data_json = json.loads(embed_data)
self.tralbum_data_json = json.loads(tralbum_data)
except Exception as e:
print(e)
return None
return True
block = request.text.split("var TralbumData = ")
stringBlock = block[1]
stringBlock = stringBlock.split("};")[0] + "};"
stringBlock = read_js_object(u"var TralbumData = {}".format(stringBlock))
if 'album_title' not in embedData['EmbedData']:
album['title'] = "Unknown Album"
else:
album['title'] = embedData['EmbedData']['album_title']
album['artist'] = stringBlock['TralbumData']['artist']
album['tracks'] = stringBlock['TralbumData']['trackinfo']
if stringBlock['TralbumData']['album_release_date'] == "null":
album['date'] = ""
else:
album['date'] = stringBlock['TralbumData']['album_release_date'].split()[2]
return album
@staticmethod
def generate_album_url(artist: str, album: str) -> str:
"""
Generate an album url based on the artist and album name
:param artist: artist name
:param album: album name
:return: album url as str
"""
def generate_album_url(artist, album):
return "http://{0}.bandcamp.com/album/{1}".format(artist, album)
def get_album_art(self) -> str:
"""
Find and retrieve album art url from page
:return: url as str
"""
def get_album_art(self):
try:
url = self.soup.find(id='tralbumArt').find_all('img')[0]['src']
return url
except None:
except:
pass
def get_embed_string_block(self, request):
embedBlock = request.text.split("var EmbedData = ")
embedStringBlock = embedBlock[1]
embedStringBlock = embedStringBlock.split("};")[0] + "};"
embedStringBlock = read_js_object(u"var EmbedData = {}".format(embedStringBlock))
return embedStringBlock

View File

@ -49,7 +49,7 @@ from .bandcampdownloader import BandcampDownloader
def main():
arguments = docopt(__doc__, version='bandcamp-dl 0.0.7')
arguments = docopt(__doc__, version='bandcamp-dl 0.0.6-01')
bandcamp = Bandcamp()
if arguments['--artist'] and arguments['--album']:
@ -73,4 +73,3 @@ def main():
if __name__ == '__main__':
main()

View File

@ -9,14 +9,6 @@ from slugify import slugify
class BandcampDownloader:
def __init__(self, urls=None, template=None, directory=None, overwrite=False):
"""
Initialization function
:param urls: list of urls
:param template: filename template
:param directory: download location
:param overwrite: if True overwrite existing files
"""
if type(urls) is str:
self.urls = [urls]
@ -25,22 +17,11 @@ class BandcampDownloader:
self.directory = directory
self.overwrite = overwrite
def start(self, album: dict):
"""
Start album download process
:param album: album dict
"""
def start(self, album):
print("Starting download process.")
self.download_album(album)
def template_to_path(self, track: dict) -> str:
"""
Create valid filepath based on track metadata
:param track: track metadata
:return: filepath
"""
def template_to_path(self, track):
path = self.template
path = path.replace("%{artist}", slugify(track['artist']))
path = path.replace("%{album}", slugify(track['album']))
@ -50,27 +31,14 @@ class BandcampDownloader:
return path
@staticmethod
def create_directory(filename: str) -> str:
"""
Create directory based on filename if it doesn't exist
:param filename: full filename
:return: directory path
"""
def create_directory(self, filename):
directory = os.path.dirname(filename)
if not os.path.exists(directory):
os.makedirs(directory)
return directory
def download_album(self, album: dict) -> bool:
"""
Download all MP3 files in the album
:param album: album dict
:return: True if successful
"""
def download_album(self, album):
for track_index, track in enumerate(album['tracks']):
track_meta = {
"artist": album['artist'],
@ -85,17 +53,30 @@ class BandcampDownloader:
filename = self.template_to_path(track_meta)
dirname = self.create_directory(filename)
if not track['url']:
if not track.get('url'):
print("Skipping track {0} - {1} as it is not available"
.format(track['track'], track['title']))
continue
try:
track_url = track['url']
# Check and see if HTTP is in the track_url
if 'http' not in track_url:
track_url = 'http:{}'.format(track_url)
r = requests.get(track_url, stream=True)
file_length = r.headers.get('content-length')
if not self.overwrite and os.path.isfile(filename):
file_size = os.path.getsize(filename) - 128
if int(file_size) != int(file_length):
print(filename + " is incomplete, redownloading.")
os.remove(filename)
else:
print("Skipping track {0} - {1} as it's already downloaded, use --overwrite to overwrite existing files"
.format(track['track'], track['title']))
continue
with open(filename, "wb") as f:
print("Downloading: " + filename[:-4])
if file_length is None:
@ -125,14 +106,7 @@ class BandcampDownloader:
return True
@staticmethod
def write_id3_tags(filename: str, meta: dict):
"""
Write metadata to the MP3 file
:param filename: name of mp3 file
:param meta: dict of track metadata
"""
def write_id3_tags(self, filename, meta):
print("\nEncoding . . .")
audio = MP3(filename)

View File

@ -1,42 +0,0 @@
import demjson
import re
class BandcampJSON:
def __init__(self, body, var_name: str, js_data=None):
self.body = body
self.var_name = var_name
self.js_data = js_data
def get_js(self) -> str:
"""
Get <script> element containing the data we need and return the raw JS
:return js_data: Raw JS as str
"""
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.var_name)).string
return self.js_data
def extract_data(self, js: str) -> str:
"""
Extract values from JS dictionary
:param js: Raw JS
:return: Contents of dictionary as str
"""
self.js_data = re.search(r"(?<=var\s" + self.var_name + "\s=\s)[^;]*", js).group().replace('" + "', '')
return self.js_data
def js_to_json(self) -> str:
"""
Convert JavaScript dictionary to JSON
:return: JSON as str
"""
js = self.get_js()
data = self.extract_data(js)
# Decode with demjson first to reformat keys and lists
js_data = demjson.decode(data)
# Encode to make valid JSON
js_data = demjson.encode(js_data)
return js_data

View File

@ -1,6 +1,7 @@
beautifulsoup4==4.5.1
demjson==2.2.4
docopt==0.6.2
mutagen==1.35.1
ply==3.9
requests==2.12.4
slimit==0.8.1
unicode-slugify==0.1.3

81
bandcamp_dl/jsobj.py Normal file
View File

@ -0,0 +1,81 @@
"""
Simple JavaScript/ECMAScript object literal reader
Only supports object literals wrapped in `var x = ...;` statements, so you
might want to do read_js_object('var x = %s;' % literal) if it's in another format.
Requires the slimit <https://github.com/rspivak/slimit> library for parsing.
Basic constand folding on strings and numbers is done, e.g. "hi " + "there!" reduces to "hi there!",
and 1+1 reduces to 2.
Copyright (c) 2013 darkf
Licensed under the terms of the WTFPL:
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.
"""
from slimit.parser import Parser
import slimit.ast as ast
def read_js_object(code):
parser = Parser()
def visit(node):
if isinstance(node, ast.Program):
d = {}
for child in node:
if not isinstance(child, ast.VarStatement):
raise ValueError("All statements should be var statements")
key, val = visit(child)
d[key] = val
return d
elif isinstance(node, ast.VarStatement):
return visit(node.children()[0])
elif isinstance(node, ast.VarDecl):
return visit(node.identifier), visit(node.initializer)
elif isinstance(node, ast.Object):
d = {}
for property in node:
key = visit(property.left)
value = visit(property.right)
d[key] = value
return d
elif isinstance(node, ast.BinOp):
# simple constant folding
if node.op == '+':
if isinstance(node.left, ast.String) and isinstance(node.right, ast.String):
return visit(node.left) + visit(node.right)
elif isinstance(node.left, ast.Number) and isinstance(node.right, ast.Number):
return visit(node.left) + visit(node.right)
else:
raise ValueError("Cannot + on anything other than two literals")
else:
raise ValueError("Cannot do operator '{}'".format(node.op))
elif isinstance(node, ast.String):
return node.value.strip('"').strip("'")
elif isinstance(node, ast.Array):
return [visit(x) for x in node]
elif isinstance(node, ast.Number) or isinstance(node, ast.Identifier)\
or isinstance(node, ast.Boolean) or isinstance(node, ast.Null):
return node.value
else:
raise Exception("Unhandled node: {}".format(node))
return visit(parser.parse(code))
if __name__ == "__main__":
print(read_js_object("""var foo = {x: 10, y: "hi " + "there!"};
var bar = {derp: ["herp", "it", "up", "forever"]};"""))

View File

@ -1,8 +1,9 @@
--index-url https://pypi.python.org/simple/
beautifulsoup4==4.5.1
demjson==2.2.4
docopt==0.6.2
mutagen==1.35.1
ply==3.9
requests==2.12.4
unicode-slugify==0.1.3
slimit==0.8.1
unicode-slugify==0.1.3

View File

@ -6,7 +6,7 @@ here = path.abspath(path.dirname(__file__))
setup(
name='bandcamp-downloader',
version='0.0.7',
version='0.0.6-01',
description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
long_description=open('README.rst').read(),
url='https://github.com/iheanyi/bandcamp-dl',
@ -18,16 +18,18 @@ setup(
'Intended Audience :: End Users/Desktop',
'Topic :: Multimedia :: Sound/Audio',
'License :: Public Domain',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.5',
],
keywords=['bandcamp', 'downloader', 'music', 'cli', 'albums', 'dl'],
packages=find_packages(),
install_requires=[
'beautifulsoup4',
'demjson',
'docopt',
'mutagen',
'ply',
'requests',
'slimit',
'unicode-slugify',
],
entry_points={