From f3e91a7d4dbc73b9b6d687f49b37379f8d93068d Mon Sep 17 00:00:00 2001
From: AnthonyF <xxanthonykanexx@gmail.com>
Date: Sun, 10 Jan 2021 15:03:53 -0500
Subject: [PATCH] Exposed slugify args
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enhanced flexibility when naming output files:
Example: `--template="%{artist}/%{album}/%{track}-%{title}" --space-char="_"`

Would produce "scene" style filenames.

Also added is the ability to:
 - Retain upper case letters
 - Leave spaces intact
 - Convert characters to ASCII (北京 -> beijing)
 - Allow additional characters like `()[];` etc
---
 .gitignore                                |  2 +
 README.rst                                | 11 ++-
 bandcamp_dl/__main__.py                   | 20 ++++--
 bandcamp_dl/bandcamp.py                   |  5 +-
 bandcamp_dl/bandcampdownloader.py         | 30 ++++++--
 bandcamp_dl/bandcampjson.py               |  5 +-
 bandcamp_dl/utils/LICENSE-Unicode-Slugify | 27 +++++++
 bandcamp_dl/utils/unicode_slugify.py      | 88 +++++++++++++++++++++++
 requirements.txt                          | 10 +--
 setup.py                                  |  2 +-
 10 files changed, 175 insertions(+), 25 deletions(-)
 create mode 100644 bandcamp_dl/utils/LICENSE-Unicode-Slugify
 create mode 100644 bandcamp_dl/utils/unicode_slugify.py
diff --git a/.gitignore b/.gitignore
index b83a140..9641cd4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,3 +45,5 @@ bandcamp_dl/asyncdownloader.py
 *.log
 
 bandcamp_dl/__init__\.py
+
+venv/
diff --git a/README.rst b/README.rst
index df436bb..e8fdcbc 100644
--- a/README.rst
+++ b/README.rst
@@ -79,6 +79,13 @@ Options
         -g --group              Use album/track Label as iTunes grouping.
         -r --embed-art          Embed album art (If available)
         -y --no-slugify         Disable slugification of track, album, and artist names.
+        -c --ok-chars=<chars>   Specify allowed chars in slugify.
+                                [default: -_~]
+        -s --space-char=<char>  Specify the char to use in place of spaces.
+                                [default: -]
+        -a --ascii-only         Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
+        -k --keep-spaces        Retain whitespace in filenames
+        -u --keep-upper         Retain uppercase letters in filenames
 
 Filename Template
 -----------------
@@ -170,9 +177,9 @@ Dependencies
 -  `BeautifulSoup4 <https://pypi.python.org/pypi/beautifulsoup4>`_ - HTML Parsing
 -  `Demjson <https://pypi.python.org/pypi/demjson>`_- JavaScript dict to JSON conversion
 -  `Mutagen <https://pypi.python.org/pypi/mutagen>`_ - ID3 Encoding
--  `Requests <https://pypi.python.org/pypi/requests>`_ - for retriving the HTML
+-  `Requests <https://pypi.python.org/pypi/requests>`_ - for retrieving the HTML
 -  `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ - A slug generator that turns strings into unicode slugs.
--  `Chardet <https://pypi.python.org/pypi/chardet>`_ - Charecter encoding detection
+-  `Chardet <https://pypi.python.org/pypi/chardet>`_ - Character encoding detection
 -  `Docopt <https://pypi.python.org/pypi/docopt>`_ - CLI help
 -  `Six <https://pypi.python.org/pypi/six>`_ - Python 2-3 compatibility
 -  `Unidecode <https://pypi.python.org/pypi/unidecode>`_ - ASCII representation of Unicode text
diff --git a/bandcamp_dl/__main__.py b/bandcamp_dl/__main__.py
index f99e3c0..e0051b9 100644
--- a/bandcamp_dl/__main__.py
+++ b/bandcamp_dl/__main__.py
@@ -23,6 +23,14 @@ Options:
     -g --group              Use album/track Label as iTunes grouping.
     -r --embed-art          Embed album art (If available)
     -y --no-slugify         Disable slugification of track, album, and artist names.
+    -c --ok-chars=<chars>   Specify allowed chars in slugify.
+                            [default: -_~]
+    -s --space-char=<char>  Specify the char to use in place of spaces.
+                            [default: -]
+    -a --ascii-only         Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
+    -k --keep-spaces        Retain whitespace in filenames
+    -u --keep-upper         Retain uppercase letters in filenames
+
 """
 """
 Coded by:
@@ -87,7 +95,7 @@ def main():
     for url in urls:
         logging.debug("\n\tURL: {}".format(url))
     # url is now a list of URLs. So lets make an albumList and append each parsed album to it.
-    albumList = [];
+    albumList = []
     for url in urls:
         albumList.append(bandcamp.parse(url, not arguments['--no-art'], arguments['--embed-lyrics'], arguments['--debug']))
     
@@ -97,15 +105,17 @@ def main():
     for album in albumList:
         if arguments['--full-album'] and not album['full']:
             print("Full album not available. Skipping ", album['title'], " ...")
-            albumList.remove(album) #Remove not-full albums BUT continue with the rest of the albums.
+            albumList.remove(album)  # Remove not-full albums BUT continue with the rest of the albums.
 
     if arguments['URL'] or arguments['--artist']:
         logging.debug("Preparing download process..")
         for album in albumList:
             bandcamp_downloader = BandcampDownloader(arguments['--template'], basedir, arguments['--overwrite'],
-                                                 arguments['--embed-lyrics'], arguments['--group'],
-                                                 arguments['--embed-art'], arguments['--no-slugify'],
-                                                 arguments['--debug'], album['url'])
+                                                     arguments['--embed-lyrics'], arguments['--group'],
+                                                     arguments['--embed-art'], arguments['--no-slugify'],
+                                                     arguments['--ok-chars'], arguments['--space-char'],
+                                                     arguments['--ascii-only'], arguments['--keep-spaces'],
+                                                     arguments['--keep-upper'], arguments['--debug'], album['url'])
             logging.debug("Initiating download process..")
             bandcamp_downloader.start(album)
             # Add a newline to stop prompt mangling
diff --git a/bandcamp_dl/bandcamp.py b/bandcamp_dl/bandcamp.py
index 65a8133..b1713a3 100644
--- a/bandcamp_dl/bandcamp.py
+++ b/bandcamp_dl/bandcamp.py
@@ -68,7 +68,7 @@ class Bandcamp:
             "full": False,
             "art": "",
             "date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year),
-            "url":url
+            "url": url
         }
 
         artist_url = page_json['url'].rpartition('/album/')[0]
@@ -84,7 +84,8 @@ class Bandcamp:
             album['art'] = self.get_album_art()
 
         logging.debug(" Album generated..")
-        print("ALBUM URL:", album["url"])
+        logging.debug(" Album URL: {}".format(album['url']))
+
         return album
 
     def get_track_lyrics(self, track_url):
diff --git a/bandcamp_dl/bandcampdownloader.py b/bandcamp_dl/bandcampdownloader.py
index ebc0c4d..b618c8c 100644
--- a/bandcamp_dl/bandcampdownloader.py
+++ b/bandcamp_dl/bandcampdownloader.py
@@ -8,7 +8,7 @@ from mutagen.id3._frames import TIT1
 from mutagen.id3._frames import TIT2
 from mutagen.id3._frames import USLT
 from mutagen.id3._frames import APIC
-from slugify import slugify
+from bandcamp_dl.utils.unicode_slugify import slugify
 
 if not sys.version_info[:2] == (3, 6):
     import mock
@@ -20,7 +20,8 @@ from bandcamp_dl.utils.clean_print import print_clean
 
 
 class BandcampDownloader:
-    def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, debugging, urls=None):
+    def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, ok_chars,
+                 space_char, ascii_only, keep_space, keep_upper, debugging, urls=None):
         """Initialize variables we will need throughout the Class
 
         :param urls: list of urls
@@ -42,6 +43,11 @@ class BandcampDownloader:
         self.embed_art = embed_art
         self.embed_lyrics = embed_lyrics
         self.no_slugify = no_slugify
+        self.ok_chars = ok_chars
+        self.space_char = space_char
+        self.ascii_only = ascii_only
+        self.keep_space = keep_space
+        self.keep_upper = keep_upper
         self.debugging = debugging
 
     def start(self, album: dict):
@@ -63,23 +69,33 @@ class BandcampDownloader:
         else:
             self.download_album(album)
 
-    def template_to_path(self, track: dict) -> str:
+    def template_to_path(self, track: dict, ascii_only, ok_chars, space_char, keep_space, keep_upper) -> str:
         """Create valid filepath based on template
 
         :param track: track metadata
+        :param ok_chars: optional chars to allow
+        :param ascii_only: allow only ascii chars in filename
+        :param keep_space: retain whitespace in filename
+        :param keep_upper: retain uppercase chars in filename
+        :param space_char: char to use in place of spaces
         :return: filepath
         """
         logging.debug(" Generating filepath/trackname..")
         path = self.template
 
+        def slugify_preset(content):
+            slugged = slugify(content, ok=ok_chars, only_ascii=ascii_only, spaces=keep_space, lower=not keep_upper,
+                              space_replacement=space_char)
+            return slugged
+
         if self.no_slugify:
             path = path.replace("%{artist}", track['artist'])
             path = path.replace("%{album}", track['album'])
             path = path.replace("%{title}", track['title'])
         else:
-            path = path.replace("%{artist}", slugify(track['artist']))
-            path = path.replace("%{album}", slugify(track['album']))
-            path = path.replace("%{title}", slugify(track['title']))
+            path = path.replace("%{artist}", slugify_preset(track['artist']))
+            path = path.replace("%{album}", slugify_preset(track['album']))
+            path = path.replace("%{title}", slugify_preset(track['title']))
 
         if track['track'] == "None":
             path = path.replace("%{track}", "Single")
@@ -128,7 +144,7 @@ class BandcampDownloader:
             self.num_tracks = len(album['tracks'])
             self.track_num = track_index + 1
 
-            filepath = self.template_to_path(track_meta) + ".tmp"
+            filepath = self.template_to_path(track_meta, self.ascii_only, self.ok_chars, self.space_char, self.keep_space, self.keep_upper) + ".tmp"
             filename = filepath.rsplit('/', 1)[1]
             dirname = self.create_directory(filepath)
 
diff --git a/bandcamp_dl/bandcampjson.py b/bandcamp_dl/bandcampjson.py
index 65b1887..425d83d 100644
--- a/bandcamp_dl/bandcampjson.py
+++ b/bandcamp_dl/bandcampjson.py
@@ -1,4 +1,3 @@
-import re
 import logging
 
 import demjson
@@ -37,11 +36,11 @@ class BandcampJSON:
             js_data = self.js_to_json(script)
             self.json_data.append(js_data)
 
-    def js_to_json(self, js_data):
+    @staticmethod
+    def js_to_json(js_data):
         """Convert JavaScript dictionary to JSON"""
         logging.debug(" Converting JS to JSON..")
         # Decode with demjson first to reformat keys and lists
         decoded_js = demjson.decode(js_data)
         # Encode to make valid JSON, add to list of JSON strings
         return demjson.encode(decoded_js)
-        
diff --git a/bandcamp_dl/utils/LICENSE-Unicode-Slugify b/bandcamp_dl/utils/LICENSE-Unicode-Slugify
new file mode 100644
index 0000000..4a9af50
--- /dev/null
+++ b/bandcamp_dl/utils/LICENSE-Unicode-Slugify
@@ -0,0 +1,27 @@
+Copyright (c) 2011, Mozilla Foundation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+    3. Neither the name of unicode-slugify nor the names of its contributors
+       may be used to endorse or promote products derived from this software
+       without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/bandcamp_dl/utils/unicode_slugify.py b/bandcamp_dl/utils/unicode_slugify.py
new file mode 100644
index 0000000..a0307f3
--- /dev/null
+++ b/bandcamp_dl/utils/unicode_slugify.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import six
+import unicodedata
+from unidecode import unidecode
+
+
+def smart_text(s, encoding='utf-8', errors='strict'):
+	if isinstance(s, six.text_type):
+		return s
+
+	if not isinstance(s, six.string_types):
+		if six.PY3:
+			if isinstance(s, bytes):
+				s = six.text_type(s, encoding, errors)
+			else:
+				s = six.text_type(s)
+		elif hasattr(s, '__unicode__'):
+			s = six.text_type(s)
+		else:
+			s = six.text_type(bytes(s), encoding, errors)
+	else:
+		s = six.text_type(s)
+	return s
+
+
+def _sanitize(text, ok):
+	rv = []
+	for c in text:
+		cat = unicodedata.category(c)[0]
+		if cat in 'LN' or c in ok:
+			rv.append(c)
+		elif cat == 'Z':  # space
+			rv.append(' ')
+	return ''.join(rv).strip()
+
+
+# Extra characters outside of alphanumerics that we'll allow.
+SLUG_OK = '-_~'
+
+
+def slugify(s, ok=SLUG_OK, lower=True, spaces=False, only_ascii=False, space_replacement='-'):
+	"""
+	Creates a unicode slug for given string with several options.
+
+	L and N signify letter/number.
+	http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
+
+	:param s: Your unicode string.
+	:param ok: Extra characters outside of alphanumerics to be allowed.
+				Default is '-_~'
+	:param lower: Lower the output string.
+					Default is True
+	:param spaces: True allows spaces, False replaces a space with the "space_replacement" param
+	:param only_ascii: True to replace non-ASCII unicode characters with
+						their ASCII representations.
+	:param space_replacement: Char used to replace spaces if "spaces" is False.
+								Default is dash ("-") or first char in ok if dash not allowed
+	:type s: String
+	:type ok: String
+	:type lower: Bool
+	:type spaces: Bool
+	:type only_ascii: Bool
+	:type space_replacement: String
+	:return: Slugified unicode string
+
+	"""
+
+	if only_ascii and ok != SLUG_OK and hasattr(ok, 'decode'):
+		try:
+			ok.decode('ascii')
+		except UnicodeEncodeError:
+			raise ValueError(('You can not use "only_ascii=True" with '
+									'a non ascii available chars in "ok" ("%s" given)') % ok)
+
+	new = _sanitize(unicodedata.normalize('NFKC', smart_text(s)), ok)
+	if only_ascii:
+		new = _sanitize(smart_text(unidecode(new)), ok)
+	if not spaces:
+		if space_replacement and space_replacement not in ok:
+			space_replacement = ok[0] if ok else ''
+		new = re.sub('[%s\s]+' % space_replacement, space_replacement, new)
+	if lower:
+		new = new.lower()
+
+	return new
diff --git a/requirements.txt b/requirements.txt
index 4cf6eb5..7b1e169 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
 --index-url https://pypi.python.org/simple/
 
-beautifulsoup4==4.6.0
+beautifulsoup4==4.9.3
 demjson==2.2.4
 docopt==0.6.2
-mutagen==1.38
-requests==2.18.4
+mutagen==1.45.1
+requests==2.25.1
 unicode-slugify==0.1.3
-mock==2.0.0
-chardet==3.0.4
+mock==4.0.3
+chardet==4.0.0
diff --git a/setup.py b/setup.py
index 87dbc81..c7f311e 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ from codecs import open
 from os import path
 import sys
 
-appversion = "0.0.9-01"
+appversion = "0.0.10"
 
 here = path.abspath(path.dirname(__file__))