Merge pull request #3 from Codeusa/master

Faster downloads, Progress notfication
2014-03-20 18:50:58 -04:00 · 2014-03-20 18:50:58 -04:00 · f05ef93077
parent a18755ab63 4eb1b7a753
commit f05ef93077
2 changed files with 80 additions and 151 deletions
--- a/4
+++ b/4
@ -1,6 +1,6 @@
-Usage: Run script with the URL you want to parse as the argument . . . 
+Usage: Run the script bandcamp-dl.py from command line
-python bandcamp-dl.py <URL>
+paste the url of the album or song you wish to download
 Dependencies:
--- a/bandcamp-dl.py
+++ b/bandcamp-dl.py
@ -7,194 +7,123 @@ Feel free to use this in any way you wish. I made this just for fun.
 Shout out to darkf for writing a helper function for parsing the JavaScript! """
 import unicodedata
 import os
 import urllib2
 from mutagen.mp3 import MP3
-from mutagen.id3 import ID3, TIT2
+from mutagen.id3 import TIT2
 from mutagen.easyid3 import EasyID3
 from shutil import make_archive
 import re
 import json
 from bs4 import BeautifulSoup
 import requests
 import os
 import sys
-import urllib
+
 import jsobj
 def parse_file(url):
    print "Starting the parsing for: " + url
    r = requests.get(url)
    soup = BeautifulSoup(r.text)
 	print "Starting the parsing for: " + url
 	r = requests.get(url)
 	soup = BeautifulSoup(r.text)
 	if "album" in url:
 		songType = "album"
 	else:
 		songType = "track"
-	albumTitle = soup.head.title.text
+    embedBlock = r.text.split("var EmbedData = ")
 	embedBlock = r.text.split("var EmbedData = ")
-	embedStringBlock = embedBlock[1]
+    embedStringBlock = embedBlock[1]
    embedStringBlock = unicodedata.normalize('NFKD', embedStringBlock).encode('ascii', 'ignore')
    embedStringBlock = embedStringBlock.split("};")[0] + "};"
    embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))
 	embedStringBlock = embedStringBlock.split("};")[0] + "};"
 	embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))
 	#print embedStringBlock
-	#embedStringBlock = re.sub(r'{\s*(\w)', r'{"\1', embedStringBlock)
+    embedData = embedStringBlock
 	#embedStringBlock = re.sub(r',\s*(\w)', r',"\1', embedStringBlock)
 	#embedStringBlock = re.sub(r'(\w):', r'\1":', embedStringBlock)
 	#embedStringBlock = embedStringBlock.replace(r'http\":', 'http:')
    albumTitle = embedData['EmbedData']['album_title']
-	#print embedStringBlock
+    block = r.text.split("var TralbumData = ")
-	#currData = json.loads(embedStringBlock)
+    #print block[0]
 	#print currData
    stringBlock = block[1]
    stringBlock = unicodedata.normalize('NFKD', stringBlock).encode('ascii', 'ignore')
    stringBlock = stringBlock.split("};")[0] + "};"
    stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
 	#print embedStringBlock
    data = stringBlock
-	embedData = embedStringBlock
+    artistName = data['TralbumData']['artist']
-	artistName = embedData['EmbedData']['artist']
+    firstLetter = artistName[0]
-	if "name" in embedData:
+    if not firstLetter.isalpha:
-		fileType = "track"
+        firstLetter = "0"
-		trackName = embedData['EmbedData']['name']
+    else:
-	else:
+        firstLetter = firstLetter.capitalize()
 		fileType = "album"
-	albumTitle = embedData['EmbedData']['album_title']
+    if not os.path.exists("files"):
        os.makedirs("files")
 	block = r.text.split("var TralbumData = ")
 	#print block[0]
-	stringBlock = block[1]
+    if not os.path.exists("files/" + firstLetter):
        if (firstLetter.isalpha):
            os.makedirs("files/" + firstLetter)
-	stringBlock = stringBlock.split("};")[0] + "};"
+    if not os.path.exists("files/" + firstLetter + "/" + artistName):
-	stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
+        os.makedirs("files/" + firstLetter + "/" + artistName)
 	#print stringBlock
-	#sys.exit()
+    tracks = data['TralbumData']['trackinfo']
-	#stringArray = stringBlock.split("\n")
+    albumPath = albumTitle.replace(" ", "").replace("/", "").replace(".", "")
 	#del stringArray[1:4]
 	#print stringArray
-	#stringBlock = "".join(stringArray).strip().replace("    ", "")
+    albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
    if not os.path.exists("files/zips"):
        os.makedirs("files/zips")
    if not os.path.exists(albumPath):
        os.makedirs(albumPath)
    for each in tracks:
        songTitle = each['title'].replace(" ", "").replace(".", "")
        songURL = each['file']['mp3-128']
-	data = stringBlock
+        print "Now Downloading: " + each['title'], each['file']['mp3-128']
-	artistName = data['TralbumData']['artist']
+        req = urllib2.Request(songURL, headers={'User-Agent': "Magic Browser"})
        u = urllib2.urlopen(req)
        f = open(albumPath+'/' + each['title']+'.mp3', 'wb')
        meta = u.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        file_size_dl = 0
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            file_size_dl += len(buffer)
            f.write(buffer)
            p = float(file_size_dl) / file_size
            status = r"[{1:.2%}]".format(file_size_dl, p)
            status = status + chr(8) * (len(status) + 1)
            sys.stdout.write("Download progress: %s%%   \r" % (status))
            sys.stdout.flush()
 	firstLetter = artistName[0]
-	if not firstLetter.isalpha:
+        f.close()
-		firstLetter = "0"
+        print "Encoding . . . "
-	else:
+        audio = MP3(albumPath + '/' + each['title'] + '.mp3')
-		firstLetter = firstLetter.capitalize()
+        audio["TIT2"] = TIT2(encoding=3, text=["title"])
        audio.save()
        audio = EasyID3(albumPath + '/' + each['title'] + '.mp3')
        audio["title"] = each['title']
        audio["artist"] = artistName
        audio["album"] = albumTitle
        audio.save()
        print "Done downloading " + songTitle
 	if not os.path.exists("files"):
 		os.makedirs("files")
-
+url = raw_input("Please enter the url of the album or song you wish to download: ")
-	letterDirectory = "files/" + firstLetter
+parse_file(url)
 	if not os.path.exists("files/" + firstLetter):
 		if(firstLetter.isalpha):
 			os.makedirs("files/" + firstLetter)
 	if not os.path.exists("files/" + firstLetter + "/" + artistName):
 		os.makedirs("files/" + firstLetter + "/" + artistName)
 	tracks = data['TralbumData']['trackinfo']	
 	albumPath = albumTitle.replace(" ", "").replace("/","").replace(".", "")
 	albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
 	if not os.path.exists("files/zips"):
 			os.makedirs("files/zips")
 	if not os.path.exists(albumPath):
 		os.makedirs(albumPath)
 	for each in tracks:
 		songTitle = each['title'].replace(" ", "").replace(".", "")
 		songURL = each['file']['mp3-128']
 		track_num = each['track_num']
 		print "Now Downloading: " +  each['title'], each['file']['mp3-128']
 		urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3")
 		print "Encoding . . . "
 		audio = MP3(albumPath + "/" + songTitle + ".mp3")
 		audio["TIT2"]=TIT2(encoding=3, text=["title"])
 		audio.save()
 		audio = EasyID3(albumPath + "/" + songTitle + ".mp3")
 		audio["title"] = each['title']
 		audio["artist"] = artistName
 		audio["album"] = albumTitle
 		#audio["tracknumber"] = track_num
 		audio.save()
 		#audiofile.tag.save()
 		print "Done downloading " + songTitle
 	# if(len(tracks) > 1):
 	# 	if not os.path.isfile("files/zips/" + albumTitle.replace(" ", "") + ".zip"):
 	# 		make_archive("files/zips/" + albumTitle.replace(" ", ""), 'zip', albumPath)
 	# 	else:
 	# 		print "Already have a zipfile of this junts, serve that up!"
 		#zip = zipfile.ZipFile("files/zips/" + albumTitle.replace(" ", "") + ".zip", 'w')
 		#zipdir(albumPath + "/", zip)
 		#zip.close
 def parse_results(text):
 	soup = BeautifulSoup(text)
 	items = soup.findAll("li", "searchresult")
 	for item in items:
 		typeText = item.find(class_="itemtype").text.strip()
 		albumTitle = item.find(class_="heading").text.strip()
 		artistName = item.find(class_="subhead").text.strip()
 		artistName = artistName.replace("by ", "")
 		itemURL = item.find(class_="itemurl").text.strip()
 		if "track" in itemURL:
 			itemType = "track"
 		else:
 			itemType = "album"
 		result = Result(itemURL, artistName, albumTitle, itemType)
 		results.append(result)
 url = sys.argv[1]
 if(len(sys.argv) != 2):
 	print "usage: bandcamp-dl.py <url to download>"
 	sys.exit()
 parse_file(url)