diff --git a/README b/README index 1e22563..87cdf11 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ -Usage: Run script with the URL you want to parse as the argument . . . +Usage: Run the script bandcamp-dl.py from command line -python bandcamp-dl.py +paste the url of the album or song you wish to download Dependencies: diff --git a/bandcamp-dl.py b/bandcamp-dl.py index b1368d2..da15bd2 100644 --- a/bandcamp-dl.py +++ b/bandcamp-dl.py @@ -7,194 +7,123 @@ Feel free to use this in any way you wish. I made this just for fun. Shout out to darkf for writing a helper function for parsing the JavaScript! """ +import unicodedata +import os +import urllib2 from mutagen.mp3 import MP3 -from mutagen.id3 import ID3, TIT2 +from mutagen.id3 import TIT2 from mutagen.easyid3 import EasyID3 -from shutil import make_archive -import re -import json from bs4 import BeautifulSoup - import requests -import os import sys -import urllib + import jsobj + def parse_file(url): + print "Starting the parsing for: " + url + r = requests.get(url) + soup = BeautifulSoup(r.text) - print "Starting the parsing for: " + url - r = requests.get(url) - soup = BeautifulSoup(r.text) - if "album" in url: - songType = "album" - else: - songType = "track" - albumTitle = soup.head.title.text - - embedBlock = r.text.split("var EmbedData = ") + embedBlock = r.text.split("var EmbedData = ") - embedStringBlock = embedBlock[1] + embedStringBlock = embedBlock[1] + embedStringBlock = unicodedata.normalize('NFKD', embedStringBlock).encode('ascii', 'ignore') + embedStringBlock = embedStringBlock.split("};")[0] + "};" + embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock)) - embedStringBlock = embedStringBlock.split("};")[0] + "};" - embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock)) - #print embedStringBlock - #embedStringBlock = re.sub(r'{\s*(\w)', r'{"\1', embedStringBlock) - #embedStringBlock = re.sub(r',\s*(\w)', r',"\1', embedStringBlock) - #embedStringBlock = re.sub(r'(\w):', r'\1":', embedStringBlock) + embedData = embedStringBlock - #embedStringBlock = embedStringBlock.replace(r'http\":', 'http:') + albumTitle = embedData['EmbedData']['album_title'] - #print embedStringBlock - #currData = json.loads(embedStringBlock) - #print currData + block = r.text.split("var TralbumData = ") + #print block[0] + stringBlock = block[1] + stringBlock = unicodedata.normalize('NFKD', stringBlock).encode('ascii', 'ignore') + stringBlock = stringBlock.split("};")[0] + "};" + stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock)) - #print embedStringBlock + data = stringBlock - embedData = embedStringBlock + artistName = data['TralbumData']['artist'] - artistName = embedData['EmbedData']['artist'] + firstLetter = artistName[0] - if "name" in embedData: - fileType = "track" - trackName = embedData['EmbedData']['name'] - else: - fileType = "album" + if not firstLetter.isalpha: + firstLetter = "0" + else: + firstLetter = firstLetter.capitalize() - albumTitle = embedData['EmbedData']['album_title'] + if not os.path.exists("files"): + os.makedirs("files") - block = r.text.split("var TralbumData = ") - #print block[0] - stringBlock = block[1] + if not os.path.exists("files/" + firstLetter): + if (firstLetter.isalpha): + os.makedirs("files/" + firstLetter) - stringBlock = stringBlock.split("};")[0] + "};" - stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock)) - #print stringBlock + if not os.path.exists("files/" + firstLetter + "/" + artistName): + os.makedirs("files/" + firstLetter + "/" + artistName) - #sys.exit() + tracks = data['TralbumData']['trackinfo'] - #stringArray = stringBlock.split("\n") - #del stringArray[1:4] - #print stringArray + albumPath = albumTitle.replace(" ", "").replace("/", "").replace(".", "") - #stringBlock = "".join(stringArray).strip().replace(" ", "") + albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath + if not os.path.exists("files/zips"): + os.makedirs("files/zips") + if not os.path.exists(albumPath): + os.makedirs(albumPath) + for each in tracks: + songTitle = each['title'].replace(" ", "").replace(".", "") + songURL = each['file']['mp3-128'] - data = stringBlock + print "Now Downloading: " + each['title'], each['file']['mp3-128'] - artistName = data['TralbumData']['artist'] + req = urllib2.Request(songURL, headers={'User-Agent': "Magic Browser"}) + u = urllib2.urlopen(req) + f = open(albumPath+'/' + each['title']+'.mp3', 'wb') + meta = u.info() + file_size = int(meta.getheaders("Content-Length")[0]) + file_size_dl = 0 + block_sz = 8192 + while True: + buffer = u.read(block_sz) + if not buffer: + break + file_size_dl += len(buffer) + f.write(buffer) + p = float(file_size_dl) / file_size + status = r"[{1:.2%}]".format(file_size_dl, p) + status = status + chr(8) * (len(status) + 1) + sys.stdout.write("Download progress: %s%% \r" % (status)) + sys.stdout.flush() - firstLetter = artistName[0] - if not firstLetter.isalpha: - firstLetter = "0" - else: - firstLetter = firstLetter.capitalize() + f.close() + print "Encoding . . . " + audio = MP3(albumPath + '/' + each['title'] + '.mp3') + audio["TIT2"] = TIT2(encoding=3, text=["title"]) + audio.save() + audio = EasyID3(albumPath + '/' + each['title'] + '.mp3') + audio["title"] = each['title'] + audio["artist"] = artistName + audio["album"] = albumTitle + audio.save() + print "Done downloading " + songTitle - if not os.path.exists("files"): - os.makedirs("files") - - letterDirectory = "files/" + firstLetter - - if not os.path.exists("files/" + firstLetter): - if(firstLetter.isalpha): - os.makedirs("files/" + firstLetter) - - - if not os.path.exists("files/" + firstLetter + "/" + artistName): - os.makedirs("files/" + firstLetter + "/" + artistName) - - - tracks = data['TralbumData']['trackinfo'] - - albumPath = albumTitle.replace(" ", "").replace("/","").replace(".", "") - - albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath - if not os.path.exists("files/zips"): - os.makedirs("files/zips") - - if not os.path.exists(albumPath): - os.makedirs(albumPath) - - for each in tracks: - songTitle = each['title'].replace(" ", "").replace(".", "") - songURL = each['file']['mp3-128'] - track_num = each['track_num'] - - print "Now Downloading: " + each['title'], each['file']['mp3-128'] - urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3") - - - - print "Encoding . . . " - audio = MP3(albumPath + "/" + songTitle + ".mp3") - audio["TIT2"]=TIT2(encoding=3, text=["title"]) - audio.save() - audio = EasyID3(albumPath + "/" + songTitle + ".mp3") - audio["title"] = each['title'] - audio["artist"] = artistName - audio["album"] = albumTitle - #audio["tracknumber"] = track_num - audio.save() - - #audiofile.tag.save() - print "Done downloading " + songTitle - - - - - # if(len(tracks) > 1): - # if not os.path.isfile("files/zips/" + albumTitle.replace(" ", "") + ".zip"): - # make_archive("files/zips/" + albumTitle.replace(" ", ""), 'zip', albumPath) - # else: - # print "Already have a zipfile of this junts, serve that up!" - #zip = zipfile.ZipFile("files/zips/" + albumTitle.replace(" ", "") + ".zip", 'w') - #zipdir(albumPath + "/", zip) - #zip.close - -def parse_results(text): - soup = BeautifulSoup(text) - - items = soup.findAll("li", "searchresult") - - for item in items: - typeText = item.find(class_="itemtype").text.strip() - albumTitle = item.find(class_="heading").text.strip() - - artistName = item.find(class_="subhead").text.strip() - - artistName = artistName.replace("by ", "") - - itemURL = item.find(class_="itemurl").text.strip() - - if "track" in itemURL: - itemType = "track" - else: - itemType = "album" - - - result = Result(itemURL, artistName, albumTitle, itemType) - results.append(result) - - - - -url = sys.argv[1] -if(len(sys.argv) != 2): - print "usage: bandcamp-dl.py " - sys.exit() - -parse_file(url) \ No newline at end of file +url = raw_input("Please enter the url of the album or song you wish to download: ") +parse_file(url)