Merge pull request #3 from Codeusa/master

Faster downloads, Progress notfication
2014-03-20 18:50:58 -04:00 · 2014-03-20 18:50:58 -04:00 · f05ef93077
parent a18755ab63 4eb1b7a753
commit f05ef93077
2 changed files with 80 additions and 151 deletions
--- a/4
+++ b/4
@ -1,6 +1,6 @@
-Usage: Run script with the URL you want to parse as the argument . . . 
+Usage: Run the script bandcamp-dl.py from command line

-python bandcamp-dl.py <URL>
+paste the url of the album or song you wish to download

 Dependencies:

--- a/bandcamp-dl.py
+++ b/bandcamp-dl.py
@ -7,194 +7,123 @@ Feel free to use this in any way you wish. I made this just for fun.

 Shout out to darkf for writing a helper function for parsing the JavaScript! """

+import unicodedata
+import os
+import urllib2

 from mutagen.mp3 import MP3
-from mutagen.id3 import ID3, TIT2
+from mutagen.id3 import TIT2
 from mutagen.easyid3 import EasyID3
-from shutil import make_archive
-import re
-import json
 from bs4 import BeautifulSoup
-
 import requests
-import os
 import sys
-import urllib
+
 import jsobj

+
 def parse_file(url):
+    print "Starting the parsing for: " + url
+    r = requests.get(url)
+    soup = BeautifulSoup(r.text)

-	print "Starting the parsing for: " + url
-	r = requests.get(url)
-	soup = BeautifulSoup(r.text)

-	if "album" in url:
-		songType = "album"
-	else:
-		songType = "track"

-	albumTitle = soup.head.title.text
-	
-	embedBlock = r.text.split("var EmbedData = ")
+    embedBlock = r.text.split("var EmbedData = ")

-	embedStringBlock = embedBlock[1]
+    embedStringBlock = embedBlock[1]
+    embedStringBlock = unicodedata.normalize('NFKD', embedStringBlock).encode('ascii', 'ignore')
+    embedStringBlock = embedStringBlock.split("};")[0] + "};"
+    embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))

-	embedStringBlock = embedStringBlock.split("};")[0] + "};"
-	embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))

-	#print embedStringBlock

-	#embedStringBlock = re.sub(r'{\s*(\w)', r'{"\1', embedStringBlock)
-	#embedStringBlock = re.sub(r',\s*(\w)', r',"\1', embedStringBlock)
-	#embedStringBlock = re.sub(r'(\w):', r'\1":', embedStringBlock)
+    embedData = embedStringBlock

-	#embedStringBlock = embedStringBlock.replace(r'http\":', 'http:')

+    albumTitle = embedData['EmbedData']['album_title']

-	#print embedStringBlock
-	#currData = json.loads(embedStringBlock)
-	#print currData
+    block = r.text.split("var TralbumData = ")
+    #print block[0]

+    stringBlock = block[1]
+    stringBlock = unicodedata.normalize('NFKD', stringBlock).encode('ascii', 'ignore')
+    stringBlock = stringBlock.split("};")[0] + "};"
+    stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))

-	#print embedStringBlock

+    data = stringBlock

-	embedData = embedStringBlock
+    artistName = data['TralbumData']['artist']

-	artistName = embedData['EmbedData']['artist']
+    firstLetter = artistName[0]

-	if "name" in embedData:
-		fileType = "track"
-		trackName = embedData['EmbedData']['name']
-	else:
-		fileType = "album"
+    if not firstLetter.isalpha:
+        firstLetter = "0"
+    else:
+        firstLetter = firstLetter.capitalize()

-	albumTitle = embedData['EmbedData']['album_title']
+    if not os.path.exists("files"):
+        os.makedirs("files")

-	block = r.text.split("var TralbumData = ")
-	#print block[0]

-	stringBlock = block[1]
+    if not os.path.exists("files/" + firstLetter):
+        if (firstLetter.isalpha):
+            os.makedirs("files/" + firstLetter)

-	stringBlock = stringBlock.split("};")[0] + "};"
-	stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
-	#print stringBlock
+    if not os.path.exists("files/" + firstLetter + "/" + artistName):
+        os.makedirs("files/" + firstLetter + "/" + artistName)

-	#sys.exit()
+    tracks = data['TralbumData']['trackinfo']

-	#stringArray = stringBlock.split("\n")
-	#del stringArray[1:4]
-	#print stringArray
+    albumPath = albumTitle.replace(" ", "").replace("/", "").replace(".", "")

-	#stringBlock = "".join(stringArray).strip().replace("    ", "")
+    albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
+    if not os.path.exists("files/zips"):
+        os.makedirs("files/zips")

+    if not os.path.exists(albumPath):
+        os.makedirs(albumPath)

+    for each in tracks:
+        songTitle = each['title'].replace(" ", "").replace(".", "")
+        songURL = each['file']['mp3-128']

-	data = stringBlock
+        print "Now Downloading: " + each['title'], each['file']['mp3-128']

-	artistName = data['TralbumData']['artist']
+        req = urllib2.Request(songURL, headers={'User-Agent': "Magic Browser"})
+        u = urllib2.urlopen(req)
+        f = open(albumPath+'/' + each['title']+'.mp3', 'wb')
+        meta = u.info()
+        file_size = int(meta.getheaders("Content-Length")[0])
+        file_size_dl = 0
+        block_sz = 8192
+        while True:
+            buffer = u.read(block_sz)
+            if not buffer:
+                break

+            file_size_dl += len(buffer)
+            f.write(buffer)
+            p = float(file_size_dl) / file_size
+            status = r"[{1:.2%}]".format(file_size_dl, p)
+            status = status + chr(8) * (len(status) + 1)
+            sys.stdout.write("Download progress: %s%%   \r" % (status))
+            sys.stdout.flush()

-	firstLetter = artistName[0]

-	if not firstLetter.isalpha:
-		firstLetter = "0"
-	else:
-		firstLetter = firstLetter.capitalize()
+        f.close()
+        print "Encoding . . . "
+        audio = MP3(albumPath + '/' + each['title'] + '.mp3')
+        audio["TIT2"] = TIT2(encoding=3, text=["title"])
+        audio.save()
+        audio = EasyID3(albumPath + '/' + each['title'] + '.mp3')
+        audio["title"] = each['title']
+        audio["artist"] = artistName
+        audio["album"] = albumTitle
+        audio.save()

+        print "Done downloading " + songTitle

-	if not os.path.exists("files"):
-		os.makedirs("files")

-
-	letterDirectory = "files/" + firstLetter
-
-	if not os.path.exists("files/" + firstLetter):
-		if(firstLetter.isalpha):
-			os.makedirs("files/" + firstLetter)
-
-
-	if not os.path.exists("files/" + firstLetter + "/" + artistName):
-		os.makedirs("files/" + firstLetter + "/" + artistName)
-
-
-	tracks = data['TralbumData']['trackinfo']	
-
-	albumPath = albumTitle.replace(" ", "").replace("/","").replace(".", "")
-
-	albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
-	if not os.path.exists("files/zips"):
-			os.makedirs("files/zips")
-
-	if not os.path.exists(albumPath):
-		os.makedirs(albumPath)
-
-	for each in tracks:
-		songTitle = each['title'].replace(" ", "").replace(".", "")
-		songURL = each['file']['mp3-128']
-		track_num = each['track_num']
-
-		print "Now Downloading: " +  each['title'], each['file']['mp3-128']
-		urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3")
-
-
-
-		print "Encoding . . . "
-		audio = MP3(albumPath + "/" + songTitle + ".mp3")
-		audio["TIT2"]=TIT2(encoding=3, text=["title"])
-		audio.save()
-		audio = EasyID3(albumPath + "/" + songTitle + ".mp3")
-		audio["title"] = each['title']
-		audio["artist"] = artistName
-		audio["album"] = albumTitle
-		#audio["tracknumber"] = track_num
-		audio.save()
-
-		#audiofile.tag.save()
-		print "Done downloading " + songTitle
-
-		
-
-
-	# if(len(tracks) > 1):
-	# 	if not os.path.isfile("files/zips/" + albumTitle.replace(" ", "") + ".zip"):
-	# 		make_archive("files/zips/" + albumTitle.replace(" ", ""), 'zip', albumPath)
-	# 	else:
-	# 		print "Already have a zipfile of this junts, serve that up!"
-		#zip = zipfile.ZipFile("files/zips/" + albumTitle.replace(" ", "") + ".zip", 'w')
-		#zipdir(albumPath + "/", zip)
-		#zip.close
-
-def parse_results(text):
-	soup = BeautifulSoup(text)
-
-	items = soup.findAll("li", "searchresult")
-
-	for item in items:
-		typeText = item.find(class_="itemtype").text.strip()
-		albumTitle = item.find(class_="heading").text.strip()
-
-		artistName = item.find(class_="subhead").text.strip()
-
-		artistName = artistName.replace("by ", "")
-
-		itemURL = item.find(class_="itemurl").text.strip()
-
-		if "track" in itemURL:
-			itemType = "track"
-		else:
-			itemType = "album"
-
-
-		result = Result(itemURL, artistName, albumTitle, itemType)
-		results.append(result)
-
-
-
-
-url = sys.argv[1]
-if(len(sys.argv) != 2):
-	print "usage: bandcamp-dl.py <url to download>"
-	sys.exit()
-
-parse_file(url)
+url = raw_input("Please enter the url of the album or song you wish to download: ")
+parse_file(url)