Exposed slugify args

Enhanced flexibility when naming output files:
Example: `--template="%{artist}/%{album}/%{track}-%{title}" --space-char="_"`

Would produce "scene" style filenames.

Also added is the ability to:
 - Retain upper case letters
 - Leave spaces intact
 - Convert characters to ASCII (北京 -> beijing)
 - Allow additional characters like `()[];` etc
master
AnthonyF 2021-01-10 15:03:53 -05:00
parent 0b9ce91621
commit f3e91a7d4d
10 changed files with 175 additions and 25 deletions

2
.gitignore vendored
View File

@ -45,3 +45,5 @@ bandcamp_dl/asyncdownloader.py
*.log
bandcamp_dl/__init__\.py
venv/

View File

@ -79,6 +79,13 @@ Options
-g --group Use album/track Label as iTunes grouping.
-r --embed-art Embed album art (If available)
-y --no-slugify Disable slugification of track, album, and artist names.
-c --ok-chars=<chars> Specify allowed chars in slugify.
[default: -_~]
-s --space-char=<char> Specify the char to use in place of spaces.
[default: -]
-a --ascii-only Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
-k --keep-spaces Retain whitespace in filenames
-u --keep-upper Retain uppercase letters in filenames
Filename Template
-----------------
@ -170,9 +177,9 @@ Dependencies
- `BeautifulSoup4 <https://pypi.python.org/pypi/beautifulsoup4>`_ - HTML Parsing
- `Demjson <https://pypi.python.org/pypi/demjson>`_- JavaScript dict to JSON conversion
- `Mutagen <https://pypi.python.org/pypi/mutagen>`_ - ID3 Encoding
- `Requests <https://pypi.python.org/pypi/requests>`_ - for retriving the HTML
- `Requests <https://pypi.python.org/pypi/requests>`_ - for retrieving the HTML
- `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ - A slug generator that turns strings into unicode slugs.
- `Chardet <https://pypi.python.org/pypi/chardet>`_ - Charecter encoding detection
- `Chardet <https://pypi.python.org/pypi/chardet>`_ - Character encoding detection
- `Docopt <https://pypi.python.org/pypi/docopt>`_ - CLI help
- `Six <https://pypi.python.org/pypi/six>`_ - Python 2-3 compatibility
- `Unidecode <https://pypi.python.org/pypi/unidecode>`_ - ASCII representation of Unicode text

View File

@ -23,6 +23,14 @@ Options:
-g --group Use album/track Label as iTunes grouping.
-r --embed-art Embed album art (If available)
-y --no-slugify Disable slugification of track, album, and artist names.
-c --ok-chars=<chars> Specify allowed chars in slugify.
[default: -_~]
-s --space-char=<char> Specify the char to use in place of spaces.
[default: -]
-a --ascii-only Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
-k --keep-spaces Retain whitespace in filenames
-u --keep-upper Retain uppercase letters in filenames
"""
"""
Coded by:
@ -87,7 +95,7 @@ def main():
for url in urls:
logging.debug("\n\tURL: {}".format(url))
# url is now a list of URLs. So lets make an albumList and append each parsed album to it.
albumList = [];
albumList = []
for url in urls:
albumList.append(bandcamp.parse(url, not arguments['--no-art'], arguments['--embed-lyrics'], arguments['--debug']))
@ -97,15 +105,17 @@ def main():
for album in albumList:
if arguments['--full-album'] and not album['full']:
print("Full album not available. Skipping ", album['title'], " ...")
albumList.remove(album) #Remove not-full albums BUT continue with the rest of the albums.
albumList.remove(album) # Remove not-full albums BUT continue with the rest of the albums.
if arguments['URL'] or arguments['--artist']:
logging.debug("Preparing download process..")
for album in albumList:
bandcamp_downloader = BandcampDownloader(arguments['--template'], basedir, arguments['--overwrite'],
arguments['--embed-lyrics'], arguments['--group'],
arguments['--embed-art'], arguments['--no-slugify'],
arguments['--debug'], album['url'])
arguments['--embed-lyrics'], arguments['--group'],
arguments['--embed-art'], arguments['--no-slugify'],
arguments['--ok-chars'], arguments['--space-char'],
arguments['--ascii-only'], arguments['--keep-spaces'],
arguments['--keep-upper'], arguments['--debug'], album['url'])
logging.debug("Initiating download process..")
bandcamp_downloader.start(album)
# Add a newline to stop prompt mangling

View File

@ -68,7 +68,7 @@ class Bandcamp:
"full": False,
"art": "",
"date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year),
"url":url
"url": url
}
artist_url = page_json['url'].rpartition('/album/')[0]
@ -84,7 +84,8 @@ class Bandcamp:
album['art'] = self.get_album_art()
logging.debug(" Album generated..")
print("ALBUM URL:", album["url"])
logging.debug(" Album URL: {}".format(album['url']))
return album
def get_track_lyrics(self, track_url):

View File

@ -8,7 +8,7 @@ from mutagen.id3._frames import TIT1
from mutagen.id3._frames import TIT2
from mutagen.id3._frames import USLT
from mutagen.id3._frames import APIC
from slugify import slugify
from bandcamp_dl.utils.unicode_slugify import slugify
if not sys.version_info[:2] == (3, 6):
import mock
@ -20,7 +20,8 @@ from bandcamp_dl.utils.clean_print import print_clean
class BandcampDownloader:
def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, debugging, urls=None):
def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, ok_chars,
space_char, ascii_only, keep_space, keep_upper, debugging, urls=None):
"""Initialize variables we will need throughout the Class
:param urls: list of urls
@ -42,6 +43,11 @@ class BandcampDownloader:
self.embed_art = embed_art
self.embed_lyrics = embed_lyrics
self.no_slugify = no_slugify
self.ok_chars = ok_chars
self.space_char = space_char
self.ascii_only = ascii_only
self.keep_space = keep_space
self.keep_upper = keep_upper
self.debugging = debugging
def start(self, album: dict):
@ -63,23 +69,33 @@ class BandcampDownloader:
else:
self.download_album(album)
def template_to_path(self, track: dict) -> str:
def template_to_path(self, track: dict, ascii_only, ok_chars, space_char, keep_space, keep_upper) -> str:
"""Create valid filepath based on template
:param track: track metadata
:param ok_chars: optional chars to allow
:param ascii_only: allow only ascii chars in filename
:param keep_space: retain whitespace in filename
:param keep_upper: retain uppercase chars in filename
:param space_char: char to use in place of spaces
:return: filepath
"""
logging.debug(" Generating filepath/trackname..")
path = self.template
def slugify_preset(content):
slugged = slugify(content, ok=ok_chars, only_ascii=ascii_only, spaces=keep_space, lower=not keep_upper,
space_replacement=space_char)
return slugged
if self.no_slugify:
path = path.replace("%{artist}", track['artist'])
path = path.replace("%{album}", track['album'])
path = path.replace("%{title}", track['title'])
else:
path = path.replace("%{artist}", slugify(track['artist']))
path = path.replace("%{album}", slugify(track['album']))
path = path.replace("%{title}", slugify(track['title']))
path = path.replace("%{artist}", slugify_preset(track['artist']))
path = path.replace("%{album}", slugify_preset(track['album']))
path = path.replace("%{title}", slugify_preset(track['title']))
if track['track'] == "None":
path = path.replace("%{track}", "Single")
@ -128,7 +144,7 @@ class BandcampDownloader:
self.num_tracks = len(album['tracks'])
self.track_num = track_index + 1
filepath = self.template_to_path(track_meta) + ".tmp"
filepath = self.template_to_path(track_meta, self.ascii_only, self.ok_chars, self.space_char, self.keep_space, self.keep_upper) + ".tmp"
filename = filepath.rsplit('/', 1)[1]
dirname = self.create_directory(filepath)

View File

@ -1,4 +1,3 @@
import re
import logging
import demjson
@ -37,11 +36,11 @@ class BandcampJSON:
js_data = self.js_to_json(script)
self.json_data.append(js_data)
def js_to_json(self, js_data):
@staticmethod
def js_to_json(js_data):
"""Convert JavaScript dictionary to JSON"""
logging.debug(" Converting JS to JSON..")
# Decode with demjson first to reformat keys and lists
decoded_js = demjson.decode(js_data)
# Encode to make valid JSON, add to list of JSON strings
return demjson.encode(decoded_js)

View File

@ -0,0 +1,27 @@
Copyright (c) 2011, Mozilla Foundation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of unicode-slugify nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,88 @@
# -*- coding: utf-8
from __future__ import unicode_literals
import re
import six
import unicodedata
from unidecode import unidecode
def smart_text(s, encoding='utf-8', errors='strict'):
if isinstance(s, six.text_type):
return s
if not isinstance(s, six.string_types):
if six.PY3:
if isinstance(s, bytes):
s = six.text_type(s, encoding, errors)
else:
s = six.text_type(s)
elif hasattr(s, '__unicode__'):
s = six.text_type(s)
else:
s = six.text_type(bytes(s), encoding, errors)
else:
s = six.text_type(s)
return s
def _sanitize(text, ok):
rv = []
for c in text:
cat = unicodedata.category(c)[0]
if cat in 'LN' or c in ok:
rv.append(c)
elif cat == 'Z': # space
rv.append(' ')
return ''.join(rv).strip()
# Extra characters outside of alphanumerics that we'll allow.
SLUG_OK = '-_~'
def slugify(s, ok=SLUG_OK, lower=True, spaces=False, only_ascii=False, space_replacement='-'):
"""
Creates a unicode slug for given string with several options.
L and N signify letter/number.
http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
:param s: Your unicode string.
:param ok: Extra characters outside of alphanumerics to be allowed.
Default is '-_~'
:param lower: Lower the output string.
Default is True
:param spaces: True allows spaces, False replaces a space with the "space_replacement" param
:param only_ascii: True to replace non-ASCII unicode characters with
their ASCII representations.
:param space_replacement: Char used to replace spaces if "spaces" is False.
Default is dash ("-") or first char in ok if dash not allowed
:type s: String
:type ok: String
:type lower: Bool
:type spaces: Bool
:type only_ascii: Bool
:type space_replacement: String
:return: Slugified unicode string
"""
if only_ascii and ok != SLUG_OK and hasattr(ok, 'decode'):
try:
ok.decode('ascii')
except UnicodeEncodeError:
raise ValueError(('You can not use "only_ascii=True" with '
'a non ascii available chars in "ok" ("%s" given)') % ok)
new = _sanitize(unicodedata.normalize('NFKC', smart_text(s)), ok)
if only_ascii:
new = _sanitize(smart_text(unidecode(new)), ok)
if not spaces:
if space_replacement and space_replacement not in ok:
space_replacement = ok[0] if ok else ''
new = re.sub('[%s\s]+' % space_replacement, space_replacement, new)
if lower:
new = new.lower()
return new

View File

@ -1,10 +1,10 @@
--index-url https://pypi.python.org/simple/
beautifulsoup4==4.6.0
beautifulsoup4==4.9.3
demjson==2.2.4
docopt==0.6.2
mutagen==1.38
requests==2.18.4
mutagen==1.45.1
requests==2.25.1
unicode-slugify==0.1.3
mock==2.0.0
chardet==3.0.4
mock==4.0.3
chardet==4.0.0

View File

@ -3,7 +3,7 @@ from codecs import open
from os import path
import sys
appversion = "0.0.9-01"
appversion = "0.0.10"
here = path.abspath(path.dirname(__file__))