Major refactor

* Moved code blocks into functions for better modularity
master
Simon W. Jackson 2014-05-11 19:25:29 +02:00
parent ee9e1502f2
commit df590ef1fb
1 changed files with 97 additions and 76 deletions

View File

@ -20,109 +20,130 @@ import sys
import jsobj
#####################################
def parse_file(url):
print "Starting the parsing for: " + url
r = requests.get(url)
soup = BeautifulSoup(r.text)
DOWNLOAD_DIR = "/Users/simonwjackson/music"
#####################################
embedBlock = r.text.split("var EmbedData = ")
def get_embed_string_block(request):
embedBlock = request.text.split("var EmbedData = ")
embedStringBlock = embedBlock[1]
embedStringBlock = unicodedata.normalize('NFKD', embedStringBlock).encode('ascii', 'ignore')
embedStringBlock = embedStringBlock.split("};")[0] + "};"
embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))
return embedStringBlock
embedData = embedStringBlock
def sanatize_text(text, space=False, slash=False, period=False):
result = text
if not space:
result = result.replace(" ", "")
if not slash:
result = result.replace("/", "")
if not period:
result = result.replace(".", "")
return result
def download_track(track, url, title, album_path, artist, album):
print "Now Downloading: " + track['title'], track['file']['mp3-128']
req = urllib2.Request(url, headers={'User-Agent': "Magic Browser"})
u = urllib2.urlopen(req)
f = open(album_path + '/' + track['title'] + '.mp3', 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
p = float(file_size_dl) / file_size
status = r"[{1:.2%}]".format(file_size_dl, p)
status = status + chr(8) * (len(status) + 1)
sys.stdout.write("Download progress: %s%% \r" % (status))
sys.stdout.flush()
f.close()
print "Done downloading " + title
write_id3_tags(track, title, album_path, artist, album)
albumTitle = embedData['EmbedData']['album_title']
def write_id3_tags(track, title, album_path, artist, album):
print "Encoding . . . "
block = r.text.split("var TralbumData = ")
#print block[0]
audio = MP3(album_path + '/' + track['title'] + '.mp3')
audio["TIT2"] = TIT2(encoding=3, text=["title"])
audio.save()
audio = EasyID3(album_path + '/' + track['title'] + '.mp3')
audio["title"] = track['title']
audio["artist"] = artist
audio["album"] = album
audio.save()
print "Done encoding . . . "
def create_directories(artist, album):
album_path = DOWNLOAD_DIR + "/" + artist + "/" + sanatize_text(album)
if not os.path.exists(DOWNLOAD_DIR):
os.makedirs(DOWNLOAD_DIR)
if not os.path.exists(DOWNLOAD_DIR + "/zips"):
os.makedirs(DOWNLOAD_DIR + "/zips")
if not os.path.exists(album_path):
os.makedirs(album_path)
return album_path
def extract_album_meta_data(request):
album = {}
embedData = get_embed_string_block(request)
block = request.text.split("var TralbumData = ")
stringBlock = block[1]
stringBlock = unicodedata.normalize('NFKD', stringBlock).encode('ascii', 'ignore')
stringBlock = stringBlock.split("};")[0] + "};"
stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
album['title'] = embedData['EmbedData']['album_title']
album['artist'] = stringBlock['TralbumData']['artist']
album['tracks'] = stringBlock['TralbumData']['trackinfo']
data = stringBlock
artistName = data['TralbumData']['artist']
firstLetter = artistName[0]
if not firstLetter.isalpha:
firstLetter = "0"
else:
firstLetter = firstLetter.capitalize()
if not os.path.exists("files"):
os.makedirs("files")
return album
if not os.path.exists("files/" + firstLetter):
if (firstLetter.isalpha):
os.makedirs("files/" + firstLetter)
def parse_file(url):
print "Starting the parsing for: " + url
if not os.path.exists("files/" + firstLetter + "/" + artistName):
os.makedirs("files/" + firstLetter + "/" + artistName)
r = requests.get(url)
soup = BeautifulSoup(r.text)
tracks = data['TralbumData']['trackinfo']
album = extract_album_meta_data(r)
album['path'] = create_directories(album['artist'], album['title'])
albumPath = albumTitle.replace(" ", "").replace("/", "").replace(".", "")
for track in album['tracks']:
title = sanatize_text(track['title'], space=True)
url = track['file']['mp3-128']
albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
if not os.path.exists("files/zips"):
os.makedirs("files/zips")
if not os.path.exists(albumPath):
os.makedirs(albumPath)
for each in tracks:
songTitle = each['title'].replace(" ", "").replace(".", "")
songURL = each['file']['mp3-128']
print "Now Downloading: " + each['title'], each['file']['mp3-128']
req = urllib2.Request(songURL, headers={'User-Agent': "Magic Browser"})
u = urllib2.urlopen(req)
f = open(albumPath+'/' + each['title']+'.mp3', 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
p = float(file_size_dl) / file_size
status = r"[{1:.2%}]".format(file_size_dl, p)
status = status + chr(8) * (len(status) + 1)
sys.stdout.write("Download progress: %s%% \r" % (status))
sys.stdout.flush()
f.close()
print "Encoding . . . "
audio = MP3(albumPath + '/' + each['title'] + '.mp3')
audio["TIT2"] = TIT2(encoding=3, text=["title"])
audio.save()
audio = EasyID3(albumPath + '/' + each['title'] + '.mp3')
audio["title"] = each['title']
audio["artist"] = artistName
audio["album"] = albumTitle
audio.save()
print "Done downloading " + songTitle
download_track(track, url, title, album['path'], album['artist'], album['title'])
url = raw_input("Please enter the url of the album or song you wish to download: ")