bandcamp-dl/bandcamp_dl/bandcamp.py

122 lines
3.5 KiB
Python
Raw Normal View History

from .bandcampjson import BandcampJSON
from bs4 import BeautifulSoup
from bs4 import FeatureNotFound
from datetime import datetime
import requests
import json
class Bandcamp:
def parse(self, url: str, art: bool=True) -> dict or None:
"""Requests the page, cherry picks album info
:param url: album/track url
:param art: if True download album art
:return: album metadata
"""
2014-06-07 21:14:56 -07:00
try:
r = requests.get(url)
except requests.exceptions.MissingSchema:
return None
try:
self.soup = BeautifulSoup(r.text, "lxml")
except FeatureNotFound:
self.soup = BeautifulSoup(r.text, "html.parser")
self.generate_album_json()
self.tracks = self.tralbum_data_json['trackinfo']
album_release = self.tralbum_data_json['album_release_date']
album = {
"tracks": [],
"title": self.embed_data_json['album_title'],
"artist": self.embed_data_json['artist'],
"full": False,
2014-06-02 01:51:27 -07:00
"art": "",
"date": datetime.strptime(album_release, "%d %b %Y %X %Z").strftime("%m%d%Y")
}
for track in self.tracks:
if track['file'] is not None:
track = self.get_track_metadata(track)
album['tracks'].append(track)
album['full'] = self.all_tracks_available()
if art:
album['art'] = self.get_album_art()
return album
# Possibly redundant now, we skip unavailable tracks.
def all_tracks_available(self) -> bool:
"""Verify that all tracks have a url
:return: True if all urls accounted for
"""
for track in self.tracks:
if track['file'] is None:
return False
return True
@staticmethod
def get_track_metadata(track: dict or None) -> dict:
"""Extract individual track metadata
:param track: track dict
:return: track metadata dict
"""
track_metadata = {
"duration": track['duration'],
"track": str(track['track_num']),
"title": track['title'],
"url": None
}
if 'mp3-128' in track['file']:
track_metadata['url'] = "http:" + track['file']['mp3-128']
else:
track_metadata['url'] = None
return track_metadata
def generate_album_json(self):
"""Retrieve JavaScript dictionaries from page and generate JSON
:return: True if successful
"""
try:
embed = BandcampJSON(self.soup, "EmbedData")
tralbum = BandcampJSON(self.soup, "TralbumData")
embed_data = embed.js_to_json()
tralbum_data = tralbum.js_to_json()
self.embed_data_json = json.loads(embed_data)
self.tralbum_data_json = json.loads(tralbum_data)
except Exception as e:
print(e)
return None
return True
@staticmethod
def generate_album_url(artist: str, album: str) -> str:
"""Generate an album url based on the artist and album name
:param artist: artist name
:param album: album name
:return: album url as str
"""
return "http://{0}.bandcamp.com/album/{1}".format(artist, album)
def get_album_art(self) -> str:
"""Find and retrieve album art url from page
:return: url as str
"""
try:
url = self.soup.find(id='tralbumArt').find_all('img')[0]['src']
return url
except None:
pass