Added support for unicode, fixed downloading
The program should no longer throw an error when trying to parse an album with unicode, instead of doing arguments you simply need to paste the url now. The program is still a bit slow downloading, will fix later.master
parent
a18755ab63
commit
cd360880df
212
bandcamp-dl.py
212
bandcamp-dl.py
|
@ -7,194 +7,102 @@ Feel free to use this in any way you wish. I made this just for fun.
|
||||||
|
|
||||||
Shout out to darkf for writing a helper function for parsing the JavaScript! """
|
Shout out to darkf for writing a helper function for parsing the JavaScript! """
|
||||||
|
|
||||||
|
import unicodedata
|
||||||
|
import os
|
||||||
|
import urllib
|
||||||
|
|
||||||
from mutagen.mp3 import MP3
|
from mutagen.mp3 import MP3
|
||||||
from mutagen.id3 import ID3, TIT2
|
from mutagen.id3 import TIT2
|
||||||
from mutagen.easyid3 import EasyID3
|
from mutagen.easyid3 import EasyID3
|
||||||
from shutil import make_archive
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
import urllib
|
|
||||||
import jsobj
|
import jsobj
|
||||||
|
|
||||||
|
|
||||||
def parse_file(url):
|
def parse_file(url):
|
||||||
|
print "Starting the parsing for: " + url
|
||||||
|
r = requests.get(url)
|
||||||
|
soup = BeautifulSoup(r.text)
|
||||||
|
|
||||||
print "Starting the parsing for: " + url
|
|
||||||
r = requests.get(url)
|
|
||||||
soup = BeautifulSoup(r.text)
|
|
||||||
|
|
||||||
if "album" in url:
|
|
||||||
songType = "album"
|
|
||||||
else:
|
|
||||||
songType = "track"
|
|
||||||
|
|
||||||
albumTitle = soup.head.title.text
|
embedBlock = r.text.split("var EmbedData = ")
|
||||||
|
|
||||||
embedBlock = r.text.split("var EmbedData = ")
|
|
||||||
|
|
||||||
embedStringBlock = embedBlock[1]
|
embedStringBlock = embedBlock[1]
|
||||||
|
embedStringBlock = unicodedata.normalize('NFKD', embedStringBlock).encode('ascii', 'ignore')
|
||||||
|
embedStringBlock = embedStringBlock.split("};")[0] + "};"
|
||||||
|
embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))
|
||||||
|
|
||||||
embedStringBlock = embedStringBlock.split("};")[0] + "};"
|
|
||||||
embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))
|
|
||||||
|
|
||||||
#print embedStringBlock
|
|
||||||
|
|
||||||
#embedStringBlock = re.sub(r'{\s*(\w)', r'{"\1', embedStringBlock)
|
embedData = embedStringBlock
|
||||||
#embedStringBlock = re.sub(r',\s*(\w)', r',"\1', embedStringBlock)
|
|
||||||
#embedStringBlock = re.sub(r'(\w):', r'\1":', embedStringBlock)
|
|
||||||
|
|
||||||
#embedStringBlock = embedStringBlock.replace(r'http\":', 'http:')
|
|
||||||
|
|
||||||
|
albumTitle = embedData['EmbedData']['album_title']
|
||||||
|
|
||||||
#print embedStringBlock
|
block = r.text.split("var TralbumData = ")
|
||||||
#currData = json.loads(embedStringBlock)
|
#print block[0]
|
||||||
#print currData
|
|
||||||
|
|
||||||
|
stringBlock = block[1]
|
||||||
|
stringBlock = unicodedata.normalize('NFKD', stringBlock).encode('ascii', 'ignore')
|
||||||
|
stringBlock = stringBlock.split("};")[0] + "};"
|
||||||
|
stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
|
||||||
|
|
||||||
#print embedStringBlock
|
|
||||||
|
|
||||||
|
data = stringBlock
|
||||||
|
|
||||||
embedData = embedStringBlock
|
artistName = data['TralbumData']['artist']
|
||||||
|
|
||||||
artistName = embedData['EmbedData']['artist']
|
firstLetter = artistName[0]
|
||||||
|
|
||||||
if "name" in embedData:
|
if not firstLetter.isalpha:
|
||||||
fileType = "track"
|
firstLetter = "0"
|
||||||
trackName = embedData['EmbedData']['name']
|
else:
|
||||||
else:
|
firstLetter = firstLetter.capitalize()
|
||||||
fileType = "album"
|
|
||||||
|
|
||||||
albumTitle = embedData['EmbedData']['album_title']
|
if not os.path.exists("files"):
|
||||||
|
os.makedirs("files")
|
||||||
|
|
||||||
block = r.text.split("var TralbumData = ")
|
|
||||||
#print block[0]
|
|
||||||
|
|
||||||
stringBlock = block[1]
|
if not os.path.exists("files/" + firstLetter):
|
||||||
|
if (firstLetter.isalpha):
|
||||||
|
os.makedirs("files/" + firstLetter)
|
||||||
|
|
||||||
stringBlock = stringBlock.split("};")[0] + "};"
|
if not os.path.exists("files/" + firstLetter + "/" + artistName):
|
||||||
stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
|
os.makedirs("files/" + firstLetter + "/" + artistName)
|
||||||
#print stringBlock
|
|
||||||
|
|
||||||
#sys.exit()
|
tracks = data['TralbumData']['trackinfo']
|
||||||
|
|
||||||
#stringArray = stringBlock.split("\n")
|
albumPath = albumTitle.replace(" ", "").replace("/", "").replace(".", "")
|
||||||
#del stringArray[1:4]
|
|
||||||
#print stringArray
|
|
||||||
|
|
||||||
#stringBlock = "".join(stringArray).strip().replace(" ", "")
|
albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
|
||||||
|
if not os.path.exists("files/zips"):
|
||||||
|
os.makedirs("files/zips")
|
||||||
|
|
||||||
|
if not os.path.exists(albumPath):
|
||||||
|
os.makedirs(albumPath)
|
||||||
|
|
||||||
|
for each in tracks:
|
||||||
|
songTitle = each['title'].replace(" ", "").replace(".", "")
|
||||||
|
songURL = each['file']['mp3-128']
|
||||||
|
|
||||||
data = stringBlock
|
print "Now Downloading: " + each['title'], each['file']['mp3-128']
|
||||||
|
urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3")
|
||||||
|
|
||||||
artistName = data['TralbumData']['artist']
|
print "Encoding . . . "
|
||||||
|
audio = MP3(albumPath + "/" + songTitle + ".mp3")
|
||||||
|
audio["TIT2"] = TIT2(encoding=3, text=["title"])
|
||||||
|
audio.save()
|
||||||
|
audio = EasyID3(albumPath + "/" + songTitle + ".mp3")
|
||||||
|
audio["title"] = each['title']
|
||||||
|
audio["artist"] = artistName
|
||||||
|
audio["album"] = albumTitle
|
||||||
|
audio.save()
|
||||||
|
|
||||||
|
print "Done downloading " + songTitle
|
||||||
|
|
||||||
firstLetter = artistName[0]
|
|
||||||
|
|
||||||
if not firstLetter.isalpha:
|
url = raw_input("Please enter the url of the album or song you wish to download: ")
|
||||||
firstLetter = "0"
|
parse_file(url)
|
||||||
else:
|
|
||||||
firstLetter = firstLetter.capitalize()
|
|
||||||
|
|
||||||
|
|
||||||
if not os.path.exists("files"):
|
|
||||||
os.makedirs("files")
|
|
||||||
|
|
||||||
|
|
||||||
letterDirectory = "files/" + firstLetter
|
|
||||||
|
|
||||||
if not os.path.exists("files/" + firstLetter):
|
|
||||||
if(firstLetter.isalpha):
|
|
||||||
os.makedirs("files/" + firstLetter)
|
|
||||||
|
|
||||||
|
|
||||||
if not os.path.exists("files/" + firstLetter + "/" + artistName):
|
|
||||||
os.makedirs("files/" + firstLetter + "/" + artistName)
|
|
||||||
|
|
||||||
|
|
||||||
tracks = data['TralbumData']['trackinfo']
|
|
||||||
|
|
||||||
albumPath = albumTitle.replace(" ", "").replace("/","").replace(".", "")
|
|
||||||
|
|
||||||
albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
|
|
||||||
if not os.path.exists("files/zips"):
|
|
||||||
os.makedirs("files/zips")
|
|
||||||
|
|
||||||
if not os.path.exists(albumPath):
|
|
||||||
os.makedirs(albumPath)
|
|
||||||
|
|
||||||
for each in tracks:
|
|
||||||
songTitle = each['title'].replace(" ", "").replace(".", "")
|
|
||||||
songURL = each['file']['mp3-128']
|
|
||||||
track_num = each['track_num']
|
|
||||||
|
|
||||||
print "Now Downloading: " + each['title'], each['file']['mp3-128']
|
|
||||||
urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
print "Encoding . . . "
|
|
||||||
audio = MP3(albumPath + "/" + songTitle + ".mp3")
|
|
||||||
audio["TIT2"]=TIT2(encoding=3, text=["title"])
|
|
||||||
audio.save()
|
|
||||||
audio = EasyID3(albumPath + "/" + songTitle + ".mp3")
|
|
||||||
audio["title"] = each['title']
|
|
||||||
audio["artist"] = artistName
|
|
||||||
audio["album"] = albumTitle
|
|
||||||
#audio["tracknumber"] = track_num
|
|
||||||
audio.save()
|
|
||||||
|
|
||||||
#audiofile.tag.save()
|
|
||||||
print "Done downloading " + songTitle
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# if(len(tracks) > 1):
|
|
||||||
# if not os.path.isfile("files/zips/" + albumTitle.replace(" ", "") + ".zip"):
|
|
||||||
# make_archive("files/zips/" + albumTitle.replace(" ", ""), 'zip', albumPath)
|
|
||||||
# else:
|
|
||||||
# print "Already have a zipfile of this junts, serve that up!"
|
|
||||||
#zip = zipfile.ZipFile("files/zips/" + albumTitle.replace(" ", "") + ".zip", 'w')
|
|
||||||
#zipdir(albumPath + "/", zip)
|
|
||||||
#zip.close
|
|
||||||
|
|
||||||
def parse_results(text):
|
|
||||||
soup = BeautifulSoup(text)
|
|
||||||
|
|
||||||
items = soup.findAll("li", "searchresult")
|
|
||||||
|
|
||||||
for item in items:
|
|
||||||
typeText = item.find(class_="itemtype").text.strip()
|
|
||||||
albumTitle = item.find(class_="heading").text.strip()
|
|
||||||
|
|
||||||
artistName = item.find(class_="subhead").text.strip()
|
|
||||||
|
|
||||||
artistName = artistName.replace("by ", "")
|
|
||||||
|
|
||||||
itemURL = item.find(class_="itemurl").text.strip()
|
|
||||||
|
|
||||||
if "track" in itemURL:
|
|
||||||
itemType = "track"
|
|
||||||
else:
|
|
||||||
itemType = "album"
|
|
||||||
|
|
||||||
|
|
||||||
result = Result(itemURL, artistName, albumTitle, itemType)
|
|
||||||
results.append(result)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
url = sys.argv[1]
|
|
||||||
if(len(sys.argv) != 2):
|
|
||||||
print "usage: bandcamp-dl.py <url to download>"
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
parse_file(url)
|
|
||||||
|
|
Loading…
Reference in New Issue