Lyrics embedding update

Fixed lyrics embedding, they will now work even if the lyrics were left out of the track data as we now insert it ourselves by grabbing it from the tracks individual page.

This of course slows things down when using the option so be aware.
master
Anthony Forsberg 2017-05-24 15:29:04 -04:00
parent 5d9327195a
commit 2f8b87713c
5 changed files with 41 additions and 35 deletions

View File

@ -49,18 +49,10 @@ Details
::
Usage:
bandcamp-dl [url]
bandcamp-dl [--template=<template>] [--base-dir=<dir>]
[--full-album]
(<url> | --artist=<artist> --album=<album>)
[--overwrite]
[--no-art]
[--embed-lyrics]
[--group]
[--embed-art]
[--no-slugify]
bandcamp-dl (-h | --help)
bandcamp-dl (--version)
bandcamp-dl [options] [URL]
Arguments:
URL Bandcamp album/track URL
Options
-------
@ -68,20 +60,21 @@ Options
::
Options:
-h --help Show this screen.
-v --version Show version.
-a --artist=<artist> The artist's slug (from the URL)
-b --album=<album> The album's slug (from the URL)
-t --template=<template> Output filename template.
[default: %{artist}/%{album}/%{track} - %{title}]
-d --base-dir=<dir> Base location of which all files are downloaded.
-f --full-album Download only if all tracks are available.
-o --overwrite Overwrite tracks that already exist. Default is False.
-n --no-art Skip grabbing album art
-e --embed-lyrics Embed track lyrics (If available)
-g --group Use album/track Label as iTunes grouping
-r --embed-art Embed album art (If available)
-n --no-slugify Disable slugification of track, album, and artist names.
-h --help Show this screen.
-v --version Show version.
--artist=<artist> The artist's slug (from the URL)
--track=<track> The track's slug (from the URL)
--album=<album> The album's slug (from the URL)
--template=<template> Output filename template.
[default: %{artist}/%{album}/%{track} - %{title}]
--base-dir=<dir> Base location of which all files are downloaded.
-f --full-album Download only if all tracks are available.
-o --overwrite Overwrite tracks that already exist. Default is False.
-n --no-art Skip grabbing album art
-e --embed-lyrics Embed track lyrics (If available)
-g --group Use album/track Label as iTunes grouping
-r --embed-art Embed album art (If available)
-y --no-slugify Disable slugification of track, album, and artist names.
Filename Template
-----------------

View File

@ -79,9 +79,9 @@ def main():
url = arguments['URL']
if arguments['--no-art']:
album = bandcamp.parse(url, False)
album = bandcamp.parse(url, False, arguments['--embed-lyrics'])
else:
album = bandcamp.parse(url)
album = bandcamp.parse(url, True, arguments['--embed-lyrics'])
if arguments['--full-album'] and not album['full']:
print("Full album not available. Skipping...")

View File

@ -13,11 +13,12 @@ class Bandcamp:
def __init__(self):
self.headers = {'User-Agent': 'bandcamp-dl/{} (https://github.com/iheanyi/bandcamp-dl)'.format(__version__)}
def parse(self, url: str, art: bool=True) -> dict or None:
def parse(self, url: str, art: bool=True, lyrics: bool=False) -> dict or None:
"""Requests the page, cherry picks album info
:param url: album/track url
:param art: if True download album art
:param lyrics: if True fetch track lyrics
:return: album metadata
"""
try:
@ -61,7 +62,10 @@ class Bandcamp:
"date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year)
}
artist_url = album_json['url'].rpartition('/album/')[0]
for track in self.tracks:
if lyrics:
track['lyrics'] = self.get_track_lyrics("{}{}#lyrics".format(artist_url, track['title_link']))
if track['file'] is not None:
track = self.get_track_metadata(track)
album['tracks'].append(track)
@ -72,6 +76,18 @@ class Bandcamp:
return album
def get_track_lyrics(self, track_url):
track_page = requests.get(track_url, headers=self.headers)
try:
track_soup = BeautifulSoup(track_page.text, "lxml")
except FeatureNotFound:
track_soup = BeautifulSoup(track_page.text, "html.parser")
track_lyrics = track_soup.find("div", {"class": "lyricsText"})
if track_lyrics:
return track_lyrics.text
else:
return "lyrics unavailable"
def all_tracks_available(self) -> bool:
"""Verify that all tracks have a url

View File

@ -108,12 +108,11 @@ class BandcampDownloader:
"album": album['title'],
"title": track['title'],
"track": track['track'],
# TODO: Find out why the 'lyrics' key seems to vanish.
"lyrics": track.get('lyrics', "lyrics unavailable"),
"date": album['date']
}
if 'lyrics' in track and self.embed_lyrics:
track_meta['lyrics'] = track['lyrics']
self.num_tracks = len(album['tracks'])
self.track_num = track_index + 1
@ -218,7 +217,7 @@ class BandcampDownloader:
if self.grouping and 'label' in meta:
audio["TIT1"] = TIT1(encoding=3, text=meta["label"])
if self.embed_lyrics and 'lyrics' in meta:
if self.embed_lyrics:
audio["USLT"] = USLT(encoding=3, lang='eng', desc='', text=meta['lyrics'])
if self.embed_art:

View File

@ -9,8 +9,6 @@ class BandcampJSON:
self.targets = ['TralbumData', 'EmbedData', 'pagedata']
self.json_data = []
# Lyrics can be found by appending #lyrics to the /track/ url
def generate(self) -> list:
"""Iterate through targets grabbing needed data"""
for target in self.targets: