diff --git a/bandcamp-dl.py b/bandcamp-dl.py index 138c71b..99e1909 100644 --- a/bandcamp-dl.py +++ b/bandcamp-dl.py @@ -2,12 +2,15 @@ from mutagen.mp3 import MP3 from mutagen.id3 import ID3, TIT2 from mutagen.easyid3 import EasyID3 from shutil import make_archive +import re +import json from bs4 import BeautifulSoup import requests import os import sys import urllib +import jsobj def parse_file(url): @@ -25,81 +28,61 @@ def parse_file(url): embedBlock = r.text.split("var EmbedData = ") embedStringBlock = embedBlock[1] - embedStringBlock = embedStringBlock.split("};")[0] + "}" + + embedStringBlock = embedStringBlock.split("};")[0] + "};" + embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock)) + + print embedStringBlock + + #embedStringBlock = re.sub(r'{\s*(\w)', r'{"\1', embedStringBlock) + #embedStringBlock = re.sub(r',\s*(\w)', r',"\1', embedStringBlock) + #embedStringBlock = re.sub(r'(\w):', r'\1":', embedStringBlock) + + #embedStringBlock = embedStringBlock.replace(r'http\":', 'http:') - sys.exit() - embedStringBlock = embedStringBlock.strip().replace(" ", "") + #print embedStringBlock + #currData = json.loads(embedStringBlock) + #print currData - tralbum_param = "tralbum_param" - name = "name" - value = "value" - swf_base_url = "swf_base_url" - album_title = "album_title" - art_url = "art_url" - lg_art_url = "lg_art_url" - numtracks = "numtracks" - title = "title" - artist = "artist" - linkback = "linkback" - embedData = eval(embedStringBlock) + print embedStringBlock - artistName = embedData['artist'] + + embedData = embedStringBlock + + artistName = embedData['EmbedData']['artist'] if "name" in embedData: fileType = "track" - trackName = embedData['name'] + trackName = embedData['EmbedData']['name'] else: fileType = "album" - albumTitle = embedData['album_title'] + albumTitle = embedData['EmbedData']['album_title'] block = r.text.split("var TralbumData = ") #print block[0] stringBlock = block[1] - stringBlock = stringBlock.split("};")[0] + "}" + stringBlock = stringBlock.split("};")[0] + "};" + stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock)) #print stringBlock - stringArray = stringBlock.split("\n") - del stringArray[1:4] + #sys.exit() + + #stringArray = stringBlock.split("\n") + #del stringArray[1:4] #print stringArray - stringBlock = "".join(stringArray).strip().replace(" ", "") + #stringBlock = "".join(stringArray).strip().replace(" ", "") - null = None - current = "current" - is_preorder = "is_preorder" - album_is_preorder = "album_is_preorder" - album_release_date = "album_release_date" - album_url = "album_url" - preorder_count = "preorder_count" - hasAudio = "hasAudio" - artThumbURL = "artThumbURL" - artFullsizeUrl = "artFullsizeUrl" - trackinfo = "trackinfo" - playing_from = "playing_from" - featured_track_id = "featured_track_id" - initial_track_num = "initial_track_num" - defaultPrice = "defaultPrice" - freeDownloadPage = "freeDownloadPage" - packages = "packages" - maxPrice = "maxPrice" - minPrice = "minPrice" - FREE = "FREE" - PAID = "PAID" - artist = "artist" - item_type = "item_type" - id = "id" - true = True - false = False - #print unicode(stringBlock.strip()) - data = eval(stringBlock.strip()) - artistName = data['artist'] + data = stringBlock + + artistName = data['TralbumData']['artist'] firstLetter = artistName[0] @@ -125,7 +108,7 @@ def parse_file(url): os.makedirs("files/" + firstLetter + "/" + artistName) - tracks = data['trackinfo'] + tracks = data['TralbumData']['trackinfo'] albumPath = albumTitle.replace(" ", "").replace("/","").replace(".", "") @@ -195,20 +178,6 @@ def parse_results(text): result = Result(itemURL, artistName, albumTitle, itemType) results.append(result) - #print itemType, albumTitle, artistName, itemURL - -def printTags(): - for r in results: - print r.artist, r.title, r.type, r.url, r.type - - -def getTags(path): - - myfile = mpeg.Mpeg('inthecity.mp3') - print myfile.artist - print myfile.album - print myfile.original - diff --git a/jsobj.py b/jsobj.py new file mode 100755 index 0000000..68fe07a --- /dev/null +++ b/jsobj.py @@ -0,0 +1,76 @@ +""" Simple JavaScript/ECMAScript object literal reader + Only supports object literals wrapped in `var x = ...;` statements, so you + might want to do read_js_object('var x = %s;' % literal) if it's in another format. + + Requires the slimit library for parsing. + + Basic constand folding on strings and numbers is done, e.g. "hi " + "there!" reduces to "hi there!", + and 1+1 reduces to 2. + + Copyright (c) 2013 darkf + Licensed under the terms of the WTFPL: + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. +""" + +from slimit.parser import Parser +from slimit.visitors.nodevisitor import ASTVisitor +import slimit.ast as ast + +def read_js_object(code): + def visit(node): + if isinstance(node, ast.Program): + d = {} + for child in node: + if not isinstance(child, ast.VarStatement): + raise ValueError("All statements should be var statements") + key, val = visit(child) + d[key] = val + return d + elif isinstance(node, ast.VarStatement): + return visit(node.children()[0]) + elif isinstance(node, ast.VarDecl): + return (visit(node.identifier), visit(node.initializer)) + elif isinstance(node, ast.Object): + d = {} + for property in node: + key = visit(property.left) + value = visit(property.right) + d[key] = value + return d + elif isinstance(node, ast.BinOp): + # simple constant folding + if node.op == '+': + if isinstance(node.left, ast.String) and isinstance(node.right, ast.String): + return visit(node.left) + visit(node.right) + elif isinstance(node.left, ast.Number) and isinstance(node.right, ast.Number): + return visit(node.left) + visit(node.right) + else: + raise ValueError("Cannot + on anything other than two literals") + else: + raise ValueError("Cannot do operator '%s'" % node.op) + + elif isinstance(node, ast.String): + return node.value.strip('"').strip("'") + elif isinstance(node, ast.Array): + return [visit(x) for x in node] + elif isinstance(node, ast.Number) or isinstance(node, ast.Identifier) or isinstance(node, ast.Boolean) or isinstance(node, ast.Null): + return node.value + else: + raise Exception("Unhandled node: %r" % node) + return visit(Parser().parse(code)) + +if __name__ == "__main__": + # test + print read_js_object("""var foo = {x: 10, y: "hi " + "there!"}; + var bar = {derp: ["herp", "it", "up", "forever"]};""") \ No newline at end of file diff --git a/py-js-object-parser-master.zip b/py-js-object-parser-master.zip new file mode 100644 index 0000000..134bea0 Binary files /dev/null and b/py-js-object-parser-master.zip differ diff --git a/py-js-object-parser-master/py-js-object-parser-master/README.md b/py-js-object-parser-master/py-js-object-parser-master/README.md new file mode 100755 index 0000000..d92954a --- /dev/null +++ b/py-js-object-parser-master/py-js-object-parser-master/README.md @@ -0,0 +1,30 @@ +Simple JavaScript/ECMAScript object literal reader + +Only supports object literals wrapped in `var x = ...;` statements, so you + might want to do `read_js_object('var x = %s;' % literal)` if it's in another format. + +Basic constant folding on strings and numbers is done, e.g. "hi " + "there!" reduces to "hi there!", +and 1+1 reduces to 2. + +** Dependencies ** + +Requires the [slimit](http://github.com/rspivak/slimit) library for parsing. + +** License ** + +Copyright (c) 2013 darkf + +Licensed under the terms of the WTFPL: + + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. \ No newline at end of file diff --git a/py-js-object-parser-master/py-js-object-parser-master/jsobj.py b/py-js-object-parser-master/py-js-object-parser-master/jsobj.py new file mode 100755 index 0000000..68fe07a --- /dev/null +++ b/py-js-object-parser-master/py-js-object-parser-master/jsobj.py @@ -0,0 +1,76 @@ +""" Simple JavaScript/ECMAScript object literal reader + Only supports object literals wrapped in `var x = ...;` statements, so you + might want to do read_js_object('var x = %s;' % literal) if it's in another format. + + Requires the slimit library for parsing. + + Basic constand folding on strings and numbers is done, e.g. "hi " + "there!" reduces to "hi there!", + and 1+1 reduces to 2. + + Copyright (c) 2013 darkf + Licensed under the terms of the WTFPL: + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. +""" + +from slimit.parser import Parser +from slimit.visitors.nodevisitor import ASTVisitor +import slimit.ast as ast + +def read_js_object(code): + def visit(node): + if isinstance(node, ast.Program): + d = {} + for child in node: + if not isinstance(child, ast.VarStatement): + raise ValueError("All statements should be var statements") + key, val = visit(child) + d[key] = val + return d + elif isinstance(node, ast.VarStatement): + return visit(node.children()[0]) + elif isinstance(node, ast.VarDecl): + return (visit(node.identifier), visit(node.initializer)) + elif isinstance(node, ast.Object): + d = {} + for property in node: + key = visit(property.left) + value = visit(property.right) + d[key] = value + return d + elif isinstance(node, ast.BinOp): + # simple constant folding + if node.op == '+': + if isinstance(node.left, ast.String) and isinstance(node.right, ast.String): + return visit(node.left) + visit(node.right) + elif isinstance(node.left, ast.Number) and isinstance(node.right, ast.Number): + return visit(node.left) + visit(node.right) + else: + raise ValueError("Cannot + on anything other than two literals") + else: + raise ValueError("Cannot do operator '%s'" % node.op) + + elif isinstance(node, ast.String): + return node.value.strip('"').strip("'") + elif isinstance(node, ast.Array): + return [visit(x) for x in node] + elif isinstance(node, ast.Number) or isinstance(node, ast.Identifier) or isinstance(node, ast.Boolean) or isinstance(node, ast.Null): + return node.value + else: + raise Exception("Unhandled node: %r" % node) + return visit(Parser().parse(code)) + +if __name__ == "__main__": + # test + print read_js_object("""var foo = {x: 10, y: "hi " + "there!"}; + var bar = {derp: ["herp", "it", "up", "forever"]};""") \ No newline at end of file