Exposed slugify args

Enhanced flexibility when naming output files: Example: `--template="%{artist}/%{album}/%{track}-%{title}" --space-char="_"` Would produce "scene" style filenames. Also added is the ability to: - Retain upper case letters - Leave spaces intact - Convert characters to ASCII (北京 -> beijing) - Allow additional characters like `()[];` etc
2021-01-10 15:03:53 -05:00 · 2021-01-10 15:03:53 -05:00 · f3e91a7d4d
parent 0b9ce91621
commit f3e91a7d4d
10 changed files with 175 additions and 25 deletions
--- a/.gitignore
+++ b/.gitignore
@ -45,3 +45,5 @@ bandcamp_dl/asyncdownloader.py
 *.log
 bandcamp_dl/__init__\.py
 venv/
--- a/README.rst
+++ b/README.rst
@ -79,6 +79,13 @@ Options
        -g --group              Use album/track Label as iTunes grouping.
        -r --embed-art          Embed album art (If available)
        -y --no-slugify         Disable slugification of track, album, and artist names.
        -c --ok-chars=<chars>   Specify allowed chars in slugify.
                                [default: -_~]
        -s --space-char=<char>  Specify the char to use in place of spaces.
                                [default: -]
        -a --ascii-only         Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
        -k --keep-spaces        Retain whitespace in filenames
        -u --keep-upper         Retain uppercase letters in filenames
 Filename Template
 -----------------
@ -170,9 +177,9 @@ Dependencies
 -  `BeautifulSoup4 <https://pypi.python.org/pypi/beautifulsoup4>`_ - HTML Parsing
 -  `Demjson <https://pypi.python.org/pypi/demjson>`_- JavaScript dict to JSON conversion
 -  `Mutagen <https://pypi.python.org/pypi/mutagen>`_ - ID3 Encoding
-  `Requests <https://pypi.python.org/pypi/requests>`_ - for retriving the HTML
+-  `Requests <https://pypi.python.org/pypi/requests>`_ - for retrieving the HTML
 -  `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ - A slug generator that turns strings into unicode slugs.
-  `Chardet <https://pypi.python.org/pypi/chardet>`_ - Charecter encoding detection
+-  `Chardet <https://pypi.python.org/pypi/chardet>`_ - Character encoding detection
 -  `Docopt <https://pypi.python.org/pypi/docopt>`_ - CLI help
 -  `Six <https://pypi.python.org/pypi/six>`_ - Python 2-3 compatibility
 -  `Unidecode <https://pypi.python.org/pypi/unidecode>`_ - ASCII representation of Unicode text
--- a/bandcamp_dl/main.py
+++ b/bandcamp_dl/main.py
@ -23,6 +23,14 @@ Options:
    -g --group              Use album/track Label as iTunes grouping.
    -r --embed-art          Embed album art (If available)
    -y --no-slugify         Disable slugification of track, album, and artist names.
    -c --ok-chars=<chars>   Specify allowed chars in slugify.
                            [default: -_~]
    -s --space-char=<char>  Specify the char to use in place of spaces.
                            [default: -]
    -a --ascii-only         Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
    -k --keep-spaces        Retain whitespace in filenames
    -u --keep-upper         Retain uppercase letters in filenames
 """
 """
 Coded by:
@ -87,7 +95,7 @@ def main():
    for url in urls:
        logging.debug("\n\tURL: {}".format(url))
    # url is now a list of URLs. So lets make an albumList and append each parsed album to it.
-    albumList = [];
+    albumList = []
    for url in urls:
        albumList.append(bandcamp.parse(url, not arguments['--no-art'], arguments['--embed-lyrics'], arguments['--debug']))
@ -97,15 +105,17 @@ def main():
    for album in albumList:
        if arguments['--full-album'] and not album['full']:
            print("Full album not available. Skipping ", album['title'], " ...")
-            albumList.remove(album) #Remove not-full albums BUT continue with the rest of the albums.
+            albumList.remove(album)  # Remove not-full albums BUT continue with the rest of the albums.
    if arguments['URL'] or arguments['--artist']:
        logging.debug("Preparing download process..")
        for album in albumList:
            bandcamp_downloader = BandcampDownloader(arguments['--template'], basedir, arguments['--overwrite'],
-                                                 arguments['--embed-lyrics'], arguments['--group'],
+                                                     arguments['--embed-lyrics'], arguments['--group'],
-                                                 arguments['--embed-art'], arguments['--no-slugify'],
+                                                     arguments['--embed-art'], arguments['--no-slugify'],
-                                                 arguments['--debug'], album['url'])
+                                                     arguments['--ok-chars'], arguments['--space-char'],
                                                     arguments['--ascii-only'], arguments['--keep-spaces'],
                                                     arguments['--keep-upper'], arguments['--debug'], album['url'])
            logging.debug("Initiating download process..")
            bandcamp_downloader.start(album)
            # Add a newline to stop prompt mangling
--- a/bandcamp_dl/bandcamp.py
+++ b/bandcamp_dl/bandcamp.py
@ -68,7 +68,7 @@ class Bandcamp:
            "full": False,
            "art": "",
            "date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year),
-            "url":url
+            "url": url
        }
        artist_url = page_json['url'].rpartition('/album/')[0]
@ -84,7 +84,8 @@ class Bandcamp:
            album['art'] = self.get_album_art()
        logging.debug(" Album generated..")
-        print("ALBUM URL:", album["url"])
+        logging.debug(" Album URL: {}".format(album['url']))
        return album
    def get_track_lyrics(self, track_url):
--- a/bandcamp_dl/bandcampdownloader.py
+++ b/bandcamp_dl/bandcampdownloader.py
@ -8,7 +8,7 @@ from mutagen.id3._frames import TIT1
 from mutagen.id3._frames import TIT2
 from mutagen.id3._frames import USLT
 from mutagen.id3._frames import APIC
-from slugify import slugify
+from bandcamp_dl.utils.unicode_slugify import slugify
 if not sys.version_info[:2] == (3, 6):
    import mock
@ -20,7 +20,8 @@ from bandcamp_dl.utils.clean_print import print_clean
 class BandcampDownloader:
-    def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, debugging, urls=None):
+    def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, ok_chars,
                 space_char, ascii_only, keep_space, keep_upper, debugging, urls=None):
        """Initialize variables we will need throughout the Class
        :param urls: list of urls
@ -42,6 +43,11 @@ class BandcampDownloader:
        self.embed_art = embed_art
        self.embed_lyrics = embed_lyrics
        self.no_slugify = no_slugify
        self.ok_chars = ok_chars
        self.space_char = space_char
        self.ascii_only = ascii_only
        self.keep_space = keep_space
        self.keep_upper = keep_upper
        self.debugging = debugging
    def start(self, album: dict):
@ -63,23 +69,33 @@ class BandcampDownloader:
        else:
            self.download_album(album)
-    def template_to_path(self, track: dict) -> str:
+    def template_to_path(self, track: dict, ascii_only, ok_chars, space_char, keep_space, keep_upper) -> str:
        """Create valid filepath based on template
        :param track: track metadata
        :param ok_chars: optional chars to allow
        :param ascii_only: allow only ascii chars in filename
        :param keep_space: retain whitespace in filename
        :param keep_upper: retain uppercase chars in filename
        :param space_char: char to use in place of spaces
        :return: filepath
        """
        logging.debug(" Generating filepath/trackname..")
        path = self.template
        def slugify_preset(content):
            slugged = slugify(content, ok=ok_chars, only_ascii=ascii_only, spaces=keep_space, lower=not keep_upper,
                              space_replacement=space_char)
            return slugged
        if self.no_slugify:
            path = path.replace("%{artist}", track['artist'])
            path = path.replace("%{album}", track['album'])
            path = path.replace("%{title}", track['title'])
        else:
-            path = path.replace("%{artist}", slugify(track['artist']))
+            path = path.replace("%{artist}", slugify_preset(track['artist']))
-            path = path.replace("%{album}", slugify(track['album']))
+            path = path.replace("%{album}", slugify_preset(track['album']))
-            path = path.replace("%{title}", slugify(track['title']))
+            path = path.replace("%{title}", slugify_preset(track['title']))
        if track['track'] == "None":
            path = path.replace("%{track}", "Single")
@ -128,7 +144,7 @@ class BandcampDownloader:
            self.num_tracks = len(album['tracks'])
            self.track_num = track_index + 1
-            filepath = self.template_to_path(track_meta) + ".tmp"
+            filepath = self.template_to_path(track_meta, self.ascii_only, self.ok_chars, self.space_char, self.keep_space, self.keep_upper) + ".tmp"
            filename = filepath.rsplit('/', 1)[1]
            dirname = self.create_directory(filepath)
--- a/bandcamp_dl/bandcampjson.py
+++ b/bandcamp_dl/bandcampjson.py
@ -1,4 +1,3 @@
 import re
 import logging
 import demjson
@ -37,11 +36,11 @@ class BandcampJSON:
            js_data = self.js_to_json(script)
            self.json_data.append(js_data)
-    def js_to_json(self, js_data):
+    @staticmethod
    def js_to_json(js_data):
        """Convert JavaScript dictionary to JSON"""
        logging.debug(" Converting JS to JSON..")
        # Decode with demjson first to reformat keys and lists
        decoded_js = demjson.decode(js_data)
        # Encode to make valid JSON, add to list of JSON strings
        return demjson.encode(decoded_js)
--- a/bandcamp_dl/utils/LICENSE-Unicode-Slugify
+++ b/bandcamp_dl/utils/LICENSE-Unicode-Slugify
@ -0,0 +1,27 @@
 Copyright (c) 2011, Mozilla Foundation
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
    1. Redistributions of source code must retain the above copyright notice,
       this list of conditions and the following disclaimer.
    2. Redistributions in binary form must reproduce the above copyright
       notice, this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.
    3. Neither the name of unicode-slugify nor the names of its contributors
       may be used to endorse or promote products derived from this software
       without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/bandcamp_dl/utils/unicode_slugify.py
+++ b/bandcamp_dl/utils/unicode_slugify.py
@ -0,0 +1,88 @@
 # -*- coding: utf-8
 from __future__ import unicode_literals
 import re
 import six
 import unicodedata
 from unidecode import unidecode
 def smart_text(s, encoding='utf-8', errors='strict'):
 	if isinstance(s, six.text_type):
 		return s
 	if not isinstance(s, six.string_types):
 		if six.PY3:
 			if isinstance(s, bytes):
 				s = six.text_type(s, encoding, errors)
 			else:
 				s = six.text_type(s)
 		elif hasattr(s, '__unicode__'):
 			s = six.text_type(s)
 		else:
 			s = six.text_type(bytes(s), encoding, errors)
 	else:
 		s = six.text_type(s)
 	return s
 def _sanitize(text, ok):
 	rv = []
 	for c in text:
 		cat = unicodedata.category(c)[0]
 		if cat in 'LN' or c in ok:
 			rv.append(c)
 		elif cat == 'Z':  # space
 			rv.append(' ')
 	return ''.join(rv).strip()
 # Extra characters outside of alphanumerics that we'll allow.
 SLUG_OK = '-_~'
 def slugify(s, ok=SLUG_OK, lower=True, spaces=False, only_ascii=False, space_replacement='-'):
 	"""
 	Creates a unicode slug for given string with several options.
 	L and N signify letter/number.
 	http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
 	:param s: Your unicode string.
 	:param ok: Extra characters outside of alphanumerics to be allowed.
 				Default is '-_~'
 	:param lower: Lower the output string.
 					Default is True
 	:param spaces: True allows spaces, False replaces a space with the "space_replacement" param
 	:param only_ascii: True to replace non-ASCII unicode characters with
 						their ASCII representations.
 	:param space_replacement: Char used to replace spaces if "spaces" is False.
 								Default is dash ("-") or first char in ok if dash not allowed
 	:type s: String
 	:type ok: String
 	:type lower: Bool
 	:type spaces: Bool
 	:type only_ascii: Bool
 	:type space_replacement: String
 	:return: Slugified unicode string
 	"""
 	if only_ascii and ok != SLUG_OK and hasattr(ok, 'decode'):
 		try:
 			ok.decode('ascii')
 		except UnicodeEncodeError:
 			raise ValueError(('You can not use "only_ascii=True" with '
 									'a non ascii available chars in "ok" ("%s" given)') % ok)
 	new = _sanitize(unicodedata.normalize('NFKC', smart_text(s)), ok)
 	if only_ascii:
 		new = _sanitize(smart_text(unidecode(new)), ok)
 	if not spaces:
 		if space_replacement and space_replacement not in ok:
 			space_replacement = ok[0] if ok else ''
 		new = re.sub('[%s\s]+' % space_replacement, space_replacement, new)
 	if lower:
 		new = new.lower()
 	return new
--- a/requirements.txt
+++ b/requirements.txt
@ -1,10 +1,10 @@
 --index-url https://pypi.python.org/simple/
-beautifulsoup4==4.6.0
+beautifulsoup4==4.9.3
 demjson==2.2.4
 docopt==0.6.2
-mutagen==1.38
+mutagen==1.45.1
-requests==2.18.4
+requests==2.25.1
 unicode-slugify==0.1.3
-mock==2.0.0
+mock==4.0.3
-chardet==3.0.4
+chardet==4.0.0
--- a/setup.py
+++ b/setup.py
@ -3,7 +3,7 @@ from codecs import open
 from os import path
 import sys
-appversion = "0.0.9-01"
+appversion = "0.0.10"
 here = path.abspath(path.dirname(__file__))