Merge pull request #3 from Codeusa/master

Faster downloads, Progress notfication
master
Iheanyi Ekechukwu 2014-03-20 18:50:58 -04:00
commit f05ef93077
2 changed files with 80 additions and 151 deletions

4
README
View File

@ -1,6 +1,6 @@
Usage: Run script with the URL you want to parse as the argument . . . Usage: Run the script bandcamp-dl.py from command line
python bandcamp-dl.py <URL> paste the url of the album or song you wish to download
Dependencies: Dependencies:

View File

@ -7,194 +7,123 @@ Feel free to use this in any way you wish. I made this just for fun.
Shout out to darkf for writing a helper function for parsing the JavaScript! """ Shout out to darkf for writing a helper function for parsing the JavaScript! """
import unicodedata
import os
import urllib2
from mutagen.mp3 import MP3 from mutagen.mp3 import MP3
from mutagen.id3 import ID3, TIT2 from mutagen.id3 import TIT2
from mutagen.easyid3 import EasyID3 from mutagen.easyid3 import EasyID3
from shutil import make_archive
import re
import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import requests import requests
import os
import sys import sys
import urllib
import jsobj import jsobj
def parse_file(url): def parse_file(url):
print "Starting the parsing for: " + url
r = requests.get(url)
soup = BeautifulSoup(r.text)
print "Starting the parsing for: " + url
r = requests.get(url)
soup = BeautifulSoup(r.text)
if "album" in url:
songType = "album"
else:
songType = "track"
albumTitle = soup.head.title.text embedBlock = r.text.split("var EmbedData = ")
embedBlock = r.text.split("var EmbedData = ")
embedStringBlock = embedBlock[1] embedStringBlock = embedBlock[1]
embedStringBlock = unicodedata.normalize('NFKD', embedStringBlock).encode('ascii', 'ignore')
embedStringBlock = embedStringBlock.split("};")[0] + "};"
embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))
embedStringBlock = embedStringBlock.split("};")[0] + "};"
embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))
#print embedStringBlock
#embedStringBlock = re.sub(r'{\s*(\w)', r'{"\1', embedStringBlock) embedData = embedStringBlock
#embedStringBlock = re.sub(r',\s*(\w)', r',"\1', embedStringBlock)
#embedStringBlock = re.sub(r'(\w):', r'\1":', embedStringBlock)
#embedStringBlock = embedStringBlock.replace(r'http\":', 'http:')
albumTitle = embedData['EmbedData']['album_title']
#print embedStringBlock block = r.text.split("var TralbumData = ")
#currData = json.loads(embedStringBlock) #print block[0]
#print currData
stringBlock = block[1]
stringBlock = unicodedata.normalize('NFKD', stringBlock).encode('ascii', 'ignore')
stringBlock = stringBlock.split("};")[0] + "};"
stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
#print embedStringBlock
data = stringBlock
embedData = embedStringBlock artistName = data['TralbumData']['artist']
artistName = embedData['EmbedData']['artist'] firstLetter = artistName[0]
if "name" in embedData: if not firstLetter.isalpha:
fileType = "track" firstLetter = "0"
trackName = embedData['EmbedData']['name'] else:
else: firstLetter = firstLetter.capitalize()
fileType = "album"
albumTitle = embedData['EmbedData']['album_title'] if not os.path.exists("files"):
os.makedirs("files")
block = r.text.split("var TralbumData = ")
#print block[0]
stringBlock = block[1] if not os.path.exists("files/" + firstLetter):
if (firstLetter.isalpha):
os.makedirs("files/" + firstLetter)
stringBlock = stringBlock.split("};")[0] + "};" if not os.path.exists("files/" + firstLetter + "/" + artistName):
stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock)) os.makedirs("files/" + firstLetter + "/" + artistName)
#print stringBlock
#sys.exit() tracks = data['TralbumData']['trackinfo']
#stringArray = stringBlock.split("\n") albumPath = albumTitle.replace(" ", "").replace("/", "").replace(".", "")
#del stringArray[1:4]
#print stringArray
#stringBlock = "".join(stringArray).strip().replace(" ", "") albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
if not os.path.exists("files/zips"):
os.makedirs("files/zips")
if not os.path.exists(albumPath):
os.makedirs(albumPath)
for each in tracks:
songTitle = each['title'].replace(" ", "").replace(".", "")
songURL = each['file']['mp3-128']
data = stringBlock print "Now Downloading: " + each['title'], each['file']['mp3-128']
artistName = data['TralbumData']['artist'] req = urllib2.Request(songURL, headers={'User-Agent': "Magic Browser"})
u = urllib2.urlopen(req)
f = open(albumPath+'/' + each['title']+'.mp3', 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
p = float(file_size_dl) / file_size
status = r"[{1:.2%}]".format(file_size_dl, p)
status = status + chr(8) * (len(status) + 1)
sys.stdout.write("Download progress: %s%% \r" % (status))
sys.stdout.flush()
firstLetter = artistName[0]
if not firstLetter.isalpha: f.close()
firstLetter = "0" print "Encoding . . . "
else: audio = MP3(albumPath + '/' + each['title'] + '.mp3')
firstLetter = firstLetter.capitalize() audio["TIT2"] = TIT2(encoding=3, text=["title"])
audio.save()
audio = EasyID3(albumPath + '/' + each['title'] + '.mp3')
audio["title"] = each['title']
audio["artist"] = artistName
audio["album"] = albumTitle
audio.save()
print "Done downloading " + songTitle
if not os.path.exists("files"):
os.makedirs("files")
url = raw_input("Please enter the url of the album or song you wish to download: ")
letterDirectory = "files/" + firstLetter parse_file(url)
if not os.path.exists("files/" + firstLetter):
if(firstLetter.isalpha):
os.makedirs("files/" + firstLetter)
if not os.path.exists("files/" + firstLetter + "/" + artistName):
os.makedirs("files/" + firstLetter + "/" + artistName)
tracks = data['TralbumData']['trackinfo']
albumPath = albumTitle.replace(" ", "").replace("/","").replace(".", "")
albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
if not os.path.exists("files/zips"):
os.makedirs("files/zips")
if not os.path.exists(albumPath):
os.makedirs(albumPath)
for each in tracks:
songTitle = each['title'].replace(" ", "").replace(".", "")
songURL = each['file']['mp3-128']
track_num = each['track_num']
print "Now Downloading: " + each['title'], each['file']['mp3-128']
urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3")
print "Encoding . . . "
audio = MP3(albumPath + "/" + songTitle + ".mp3")
audio["TIT2"]=TIT2(encoding=3, text=["title"])
audio.save()
audio = EasyID3(albumPath + "/" + songTitle + ".mp3")
audio["title"] = each['title']
audio["artist"] = artistName
audio["album"] = albumTitle
#audio["tracknumber"] = track_num
audio.save()
#audiofile.tag.save()
print "Done downloading " + songTitle
# if(len(tracks) > 1):
# if not os.path.isfile("files/zips/" + albumTitle.replace(" ", "") + ".zip"):
# make_archive("files/zips/" + albumTitle.replace(" ", ""), 'zip', albumPath)
# else:
# print "Already have a zipfile of this junts, serve that up!"
#zip = zipfile.ZipFile("files/zips/" + albumTitle.replace(" ", "") + ".zip", 'w')
#zipdir(albumPath + "/", zip)
#zip.close
def parse_results(text):
soup = BeautifulSoup(text)
items = soup.findAll("li", "searchresult")
for item in items:
typeText = item.find(class_="itemtype").text.strip()
albumTitle = item.find(class_="heading").text.strip()
artistName = item.find(class_="subhead").text.strip()
artistName = artistName.replace("by ", "")
itemURL = item.find(class_="itemurl").text.strip()
if "track" in itemURL:
itemType = "track"
else:
itemType = "album"
result = Result(itemURL, artistName, albumTitle, itemType)
results.append(result)
url = sys.argv[1]
if(len(sys.argv) != 2):
print "usage: bandcamp-dl.py <url to download>"
sys.exit()
parse_file(url)