Added support for unicode, fixed downloading

The program should no longer throw an error when trying to parse an album with unicode, instead of doing arguments you simply need to paste the url now. The program is still a bit slow downloading, will fix later.
2014-03-08 03:07:34 -05:00 · 2014-03-08 03:07:34 -05:00 · cd360880df
parent a18755ab63
commit cd360880df
1 changed files with 60 additions and 152 deletions
--- a/bandcamp-dl.py
+++ b/bandcamp-dl.py
@ -7,194 +7,102 @@ Feel free to use this in any way you wish. I made this just for fun.
 Shout out to darkf for writing a helper function for parsing the JavaScript! """
 import unicodedata
 import os
 import urllib
 from mutagen.mp3 import MP3
-from mutagen.id3 import ID3, TIT2
+from mutagen.id3 import TIT2
 from mutagen.easyid3 import EasyID3
 from shutil import make_archive
 import re
 import json
 from bs4 import BeautifulSoup
 import requests
 import os
 import sys
-import urllib
+
 import jsobj
 def parse_file(url):
    print "Starting the parsing for: " + url
    r = requests.get(url)
    soup = BeautifulSoup(r.text)
 	print "Starting the parsing for: " + url
 	r = requests.get(url)
 	soup = BeautifulSoup(r.text)
 	if "album" in url:
 		songType = "album"
 	else:
 		songType = "track"
-	albumTitle = soup.head.title.text
+    embedBlock = r.text.split("var EmbedData = ")
 	embedBlock = r.text.split("var EmbedData = ")
-	embedStringBlock = embedBlock[1]
+    embedStringBlock = embedBlock[1]
    embedStringBlock = unicodedata.normalize('NFKD', embedStringBlock).encode('ascii', 'ignore')
    embedStringBlock = embedStringBlock.split("};")[0] + "};"
    embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))
 	embedStringBlock = embedStringBlock.split("};")[0] + "};"
 	embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))
 	#print embedStringBlock
-	#embedStringBlock = re.sub(r'{\s*(\w)', r'{"\1', embedStringBlock)
+    embedData = embedStringBlock
 	#embedStringBlock = re.sub(r',\s*(\w)', r',"\1', embedStringBlock)
 	#embedStringBlock = re.sub(r'(\w):', r'\1":', embedStringBlock)
 	#embedStringBlock = embedStringBlock.replace(r'http\":', 'http:')
    albumTitle = embedData['EmbedData']['album_title']
-	#print embedStringBlock
+    block = r.text.split("var TralbumData = ")
-	#currData = json.loads(embedStringBlock)
+    #print block[0]
 	#print currData
    stringBlock = block[1]
    stringBlock = unicodedata.normalize('NFKD', stringBlock).encode('ascii', 'ignore')
    stringBlock = stringBlock.split("};")[0] + "};"
    stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
 	#print embedStringBlock
    data = stringBlock
-	embedData = embedStringBlock
+    artistName = data['TralbumData']['artist']
-	artistName = embedData['EmbedData']['artist']
+    firstLetter = artistName[0]
-	if "name" in embedData:
+    if not firstLetter.isalpha:
-		fileType = "track"
+        firstLetter = "0"
-		trackName = embedData['EmbedData']['name']
+    else:
-	else:
+        firstLetter = firstLetter.capitalize()
 		fileType = "album"
-	albumTitle = embedData['EmbedData']['album_title']
+    if not os.path.exists("files"):
        os.makedirs("files")
 	block = r.text.split("var TralbumData = ")
 	#print block[0]
-	stringBlock = block[1]
+    if not os.path.exists("files/" + firstLetter):
        if (firstLetter.isalpha):
            os.makedirs("files/" + firstLetter)
-	stringBlock = stringBlock.split("};")[0] + "};"
+    if not os.path.exists("files/" + firstLetter + "/" + artistName):
-	stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
+        os.makedirs("files/" + firstLetter + "/" + artistName)
 	#print stringBlock
-	#sys.exit()
+    tracks = data['TralbumData']['trackinfo']
-	#stringArray = stringBlock.split("\n")
+    albumPath = albumTitle.replace(" ", "").replace("/", "").replace(".", "")
 	#del stringArray[1:4]
 	#print stringArray
-	#stringBlock = "".join(stringArray).strip().replace("    ", "")
+    albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
    if not os.path.exists("files/zips"):
        os.makedirs("files/zips")
    if not os.path.exists(albumPath):
        os.makedirs(albumPath)
    for each in tracks:
        songTitle = each['title'].replace(" ", "").replace(".", "")
        songURL = each['file']['mp3-128']
-	data = stringBlock
+        print "Now Downloading: " + each['title'], each['file']['mp3-128']
        urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3")
-	artistName = data['TralbumData']['artist']
+        print "Encoding . . . "
        audio = MP3(albumPath + "/" + songTitle + ".mp3")
        audio["TIT2"] = TIT2(encoding=3, text=["title"])
        audio.save()
        audio = EasyID3(albumPath + "/" + songTitle + ".mp3")
        audio["title"] = each['title']
        audio["artist"] = artistName
        audio["album"] = albumTitle
        audio.save()
        print "Done downloading " + songTitle
 	firstLetter = artistName[0]
-	if not firstLetter.isalpha:
+url = raw_input("Please enter the url of the album or song you wish to download: ")
-		firstLetter = "0"
+parse_file(url)
 	else:
 		firstLetter = firstLetter.capitalize()
 	if not os.path.exists("files"):
 		os.makedirs("files")
 	letterDirectory = "files/" + firstLetter
 	if not os.path.exists("files/" + firstLetter):
 		if(firstLetter.isalpha):
 			os.makedirs("files/" + firstLetter)
 	if not os.path.exists("files/" + firstLetter + "/" + artistName):
 		os.makedirs("files/" + firstLetter + "/" + artistName)
 	tracks = data['TralbumData']['trackinfo']	
 	albumPath = albumTitle.replace(" ", "").replace("/","").replace(".", "")
 	albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
 	if not os.path.exists("files/zips"):
 			os.makedirs("files/zips")
 	if not os.path.exists(albumPath):
 		os.makedirs(albumPath)
 	for each in tracks:
 		songTitle = each['title'].replace(" ", "").replace(".", "")
 		songURL = each['file']['mp3-128']
 		track_num = each['track_num']
 		print "Now Downloading: " +  each['title'], each['file']['mp3-128']
 		urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3")
 		print "Encoding . . . "
 		audio = MP3(albumPath + "/" + songTitle + ".mp3")
 		audio["TIT2"]=TIT2(encoding=3, text=["title"])
 		audio.save()
 		audio = EasyID3(albumPath + "/" + songTitle + ".mp3")
 		audio["title"] = each['title']
 		audio["artist"] = artistName
 		audio["album"] = albumTitle
 		#audio["tracknumber"] = track_num
 		audio.save()
 		#audiofile.tag.save()
 		print "Done downloading " + songTitle
 	# if(len(tracks) > 1):
 	# 	if not os.path.isfile("files/zips/" + albumTitle.replace(" ", "") + ".zip"):
 	# 		make_archive("files/zips/" + albumTitle.replace(" ", ""), 'zip', albumPath)
 	# 	else:
 	# 		print "Already have a zipfile of this junts, serve that up!"
 		#zip = zipfile.ZipFile("files/zips/" + albumTitle.replace(" ", "") + ".zip", 'w')
 		#zipdir(albumPath + "/", zip)
 		#zip.close
 def parse_results(text):
 	soup = BeautifulSoup(text)
 	items = soup.findAll("li", "searchresult")
 	for item in items:
 		typeText = item.find(class_="itemtype").text.strip()
 		albumTitle = item.find(class_="heading").text.strip()
 		artistName = item.find(class_="subhead").text.strip()
 		artistName = artistName.replace("by ", "")
 		itemURL = item.find(class_="itemurl").text.strip()
 		if "track" in itemURL:
 			itemType = "track"
 		else:
 			itemType = "album"
 		result = Result(itemURL, artistName, albumTitle, itemType)
 		results.append(result)
 url = sys.argv[1]
 if(len(sys.argv) != 2):
 	print "usage: bandcamp-dl.py <url to download>"
 	sys.exit()
 parse_file(url)