Exposed slugify args
Enhanced flexibility when naming output files: Example: `--template="%{artist}/%{album}/%{track}-%{title}" --space-char="_"` Would produce "scene" style filenames. Also added is the ability to: - Retain upper case letters - Leave spaces intact - Convert characters to ASCII (北京 -> beijing) - Allow additional characters like `()[];` etcmaster
parent
0b9ce91621
commit
f3e91a7d4d
|
@ -45,3 +45,5 @@ bandcamp_dl/asyncdownloader.py
|
|||
*.log
|
||||
|
||||
bandcamp_dl/__init__\.py
|
||||
|
||||
venv/
|
||||
|
|
11
README.rst
11
README.rst
|
@ -79,6 +79,13 @@ Options
|
|||
-g --group Use album/track Label as iTunes grouping.
|
||||
-r --embed-art Embed album art (If available)
|
||||
-y --no-slugify Disable slugification of track, album, and artist names.
|
||||
-c --ok-chars=<chars> Specify allowed chars in slugify.
|
||||
[default: -_~]
|
||||
-s --space-char=<char> Specify the char to use in place of spaces.
|
||||
[default: -]
|
||||
-a --ascii-only Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
|
||||
-k --keep-spaces Retain whitespace in filenames
|
||||
-u --keep-upper Retain uppercase letters in filenames
|
||||
|
||||
Filename Template
|
||||
-----------------
|
||||
|
@ -170,9 +177,9 @@ Dependencies
|
|||
- `BeautifulSoup4 <https://pypi.python.org/pypi/beautifulsoup4>`_ - HTML Parsing
|
||||
- `Demjson <https://pypi.python.org/pypi/demjson>`_- JavaScript dict to JSON conversion
|
||||
- `Mutagen <https://pypi.python.org/pypi/mutagen>`_ - ID3 Encoding
|
||||
- `Requests <https://pypi.python.org/pypi/requests>`_ - for retriving the HTML
|
||||
- `Requests <https://pypi.python.org/pypi/requests>`_ - for retrieving the HTML
|
||||
- `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ - A slug generator that turns strings into unicode slugs.
|
||||
- `Chardet <https://pypi.python.org/pypi/chardet>`_ - Charecter encoding detection
|
||||
- `Chardet <https://pypi.python.org/pypi/chardet>`_ - Character encoding detection
|
||||
- `Docopt <https://pypi.python.org/pypi/docopt>`_ - CLI help
|
||||
- `Six <https://pypi.python.org/pypi/six>`_ - Python 2-3 compatibility
|
||||
- `Unidecode <https://pypi.python.org/pypi/unidecode>`_ - ASCII representation of Unicode text
|
||||
|
|
|
@ -23,6 +23,14 @@ Options:
|
|||
-g --group Use album/track Label as iTunes grouping.
|
||||
-r --embed-art Embed album art (If available)
|
||||
-y --no-slugify Disable slugification of track, album, and artist names.
|
||||
-c --ok-chars=<chars> Specify allowed chars in slugify.
|
||||
[default: -_~]
|
||||
-s --space-char=<char> Specify the char to use in place of spaces.
|
||||
[default: -]
|
||||
-a --ascii-only Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
|
||||
-k --keep-spaces Retain whitespace in filenames
|
||||
-u --keep-upper Retain uppercase letters in filenames
|
||||
|
||||
"""
|
||||
"""
|
||||
Coded by:
|
||||
|
@ -87,7 +95,7 @@ def main():
|
|||
for url in urls:
|
||||
logging.debug("\n\tURL: {}".format(url))
|
||||
# url is now a list of URLs. So lets make an albumList and append each parsed album to it.
|
||||
albumList = [];
|
||||
albumList = []
|
||||
for url in urls:
|
||||
albumList.append(bandcamp.parse(url, not arguments['--no-art'], arguments['--embed-lyrics'], arguments['--debug']))
|
||||
|
||||
|
@ -97,7 +105,7 @@ def main():
|
|||
for album in albumList:
|
||||
if arguments['--full-album'] and not album['full']:
|
||||
print("Full album not available. Skipping ", album['title'], " ...")
|
||||
albumList.remove(album) #Remove not-full albums BUT continue with the rest of the albums.
|
||||
albumList.remove(album) # Remove not-full albums BUT continue with the rest of the albums.
|
||||
|
||||
if arguments['URL'] or arguments['--artist']:
|
||||
logging.debug("Preparing download process..")
|
||||
|
@ -105,7 +113,9 @@ def main():
|
|||
bandcamp_downloader = BandcampDownloader(arguments['--template'], basedir, arguments['--overwrite'],
|
||||
arguments['--embed-lyrics'], arguments['--group'],
|
||||
arguments['--embed-art'], arguments['--no-slugify'],
|
||||
arguments['--debug'], album['url'])
|
||||
arguments['--ok-chars'], arguments['--space-char'],
|
||||
arguments['--ascii-only'], arguments['--keep-spaces'],
|
||||
arguments['--keep-upper'], arguments['--debug'], album['url'])
|
||||
logging.debug("Initiating download process..")
|
||||
bandcamp_downloader.start(album)
|
||||
# Add a newline to stop prompt mangling
|
||||
|
|
|
@ -68,7 +68,7 @@ class Bandcamp:
|
|||
"full": False,
|
||||
"art": "",
|
||||
"date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year),
|
||||
"url":url
|
||||
"url": url
|
||||
}
|
||||
|
||||
artist_url = page_json['url'].rpartition('/album/')[0]
|
||||
|
@ -84,7 +84,8 @@ class Bandcamp:
|
|||
album['art'] = self.get_album_art()
|
||||
|
||||
logging.debug(" Album generated..")
|
||||
print("ALBUM URL:", album["url"])
|
||||
logging.debug(" Album URL: {}".format(album['url']))
|
||||
|
||||
return album
|
||||
|
||||
def get_track_lyrics(self, track_url):
|
||||
|
|
|
@ -8,7 +8,7 @@ from mutagen.id3._frames import TIT1
|
|||
from mutagen.id3._frames import TIT2
|
||||
from mutagen.id3._frames import USLT
|
||||
from mutagen.id3._frames import APIC
|
||||
from slugify import slugify
|
||||
from bandcamp_dl.utils.unicode_slugify import slugify
|
||||
|
||||
if not sys.version_info[:2] == (3, 6):
|
||||
import mock
|
||||
|
@ -20,7 +20,8 @@ from bandcamp_dl.utils.clean_print import print_clean
|
|||
|
||||
|
||||
class BandcampDownloader:
|
||||
def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, debugging, urls=None):
|
||||
def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, ok_chars,
|
||||
space_char, ascii_only, keep_space, keep_upper, debugging, urls=None):
|
||||
"""Initialize variables we will need throughout the Class
|
||||
|
||||
:param urls: list of urls
|
||||
|
@ -42,6 +43,11 @@ class BandcampDownloader:
|
|||
self.embed_art = embed_art
|
||||
self.embed_lyrics = embed_lyrics
|
||||
self.no_slugify = no_slugify
|
||||
self.ok_chars = ok_chars
|
||||
self.space_char = space_char
|
||||
self.ascii_only = ascii_only
|
||||
self.keep_space = keep_space
|
||||
self.keep_upper = keep_upper
|
||||
self.debugging = debugging
|
||||
|
||||
def start(self, album: dict):
|
||||
|
@ -63,23 +69,33 @@ class BandcampDownloader:
|
|||
else:
|
||||
self.download_album(album)
|
||||
|
||||
def template_to_path(self, track: dict) -> str:
|
||||
def template_to_path(self, track: dict, ascii_only, ok_chars, space_char, keep_space, keep_upper) -> str:
|
||||
"""Create valid filepath based on template
|
||||
|
||||
:param track: track metadata
|
||||
:param ok_chars: optional chars to allow
|
||||
:param ascii_only: allow only ascii chars in filename
|
||||
:param keep_space: retain whitespace in filename
|
||||
:param keep_upper: retain uppercase chars in filename
|
||||
:param space_char: char to use in place of spaces
|
||||
:return: filepath
|
||||
"""
|
||||
logging.debug(" Generating filepath/trackname..")
|
||||
path = self.template
|
||||
|
||||
def slugify_preset(content):
|
||||
slugged = slugify(content, ok=ok_chars, only_ascii=ascii_only, spaces=keep_space, lower=not keep_upper,
|
||||
space_replacement=space_char)
|
||||
return slugged
|
||||
|
||||
if self.no_slugify:
|
||||
path = path.replace("%{artist}", track['artist'])
|
||||
path = path.replace("%{album}", track['album'])
|
||||
path = path.replace("%{title}", track['title'])
|
||||
else:
|
||||
path = path.replace("%{artist}", slugify(track['artist']))
|
||||
path = path.replace("%{album}", slugify(track['album']))
|
||||
path = path.replace("%{title}", slugify(track['title']))
|
||||
path = path.replace("%{artist}", slugify_preset(track['artist']))
|
||||
path = path.replace("%{album}", slugify_preset(track['album']))
|
||||
path = path.replace("%{title}", slugify_preset(track['title']))
|
||||
|
||||
if track['track'] == "None":
|
||||
path = path.replace("%{track}", "Single")
|
||||
|
@ -128,7 +144,7 @@ class BandcampDownloader:
|
|||
self.num_tracks = len(album['tracks'])
|
||||
self.track_num = track_index + 1
|
||||
|
||||
filepath = self.template_to_path(track_meta) + ".tmp"
|
||||
filepath = self.template_to_path(track_meta, self.ascii_only, self.ok_chars, self.space_char, self.keep_space, self.keep_upper) + ".tmp"
|
||||
filename = filepath.rsplit('/', 1)[1]
|
||||
dirname = self.create_directory(filepath)
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import re
|
||||
import logging
|
||||
|
||||
import demjson
|
||||
|
@ -37,11 +36,11 @@ class BandcampJSON:
|
|||
js_data = self.js_to_json(script)
|
||||
self.json_data.append(js_data)
|
||||
|
||||
def js_to_json(self, js_data):
|
||||
@staticmethod
|
||||
def js_to_json(js_data):
|
||||
"""Convert JavaScript dictionary to JSON"""
|
||||
logging.debug(" Converting JS to JSON..")
|
||||
# Decode with demjson first to reformat keys and lists
|
||||
decoded_js = demjson.decode(js_data)
|
||||
# Encode to make valid JSON, add to list of JSON strings
|
||||
return demjson.encode(decoded_js)
|
||||
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
Copyright (c) 2011, Mozilla Foundation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of unicode-slugify nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,88 @@
|
|||
# -*- coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import six
|
||||
import unicodedata
|
||||
from unidecode import unidecode
|
||||
|
||||
|
||||
def smart_text(s, encoding='utf-8', errors='strict'):
|
||||
if isinstance(s, six.text_type):
|
||||
return s
|
||||
|
||||
if not isinstance(s, six.string_types):
|
||||
if six.PY3:
|
||||
if isinstance(s, bytes):
|
||||
s = six.text_type(s, encoding, errors)
|
||||
else:
|
||||
s = six.text_type(s)
|
||||
elif hasattr(s, '__unicode__'):
|
||||
s = six.text_type(s)
|
||||
else:
|
||||
s = six.text_type(bytes(s), encoding, errors)
|
||||
else:
|
||||
s = six.text_type(s)
|
||||
return s
|
||||
|
||||
|
||||
def _sanitize(text, ok):
|
||||
rv = []
|
||||
for c in text:
|
||||
cat = unicodedata.category(c)[0]
|
||||
if cat in 'LN' or c in ok:
|
||||
rv.append(c)
|
||||
elif cat == 'Z': # space
|
||||
rv.append(' ')
|
||||
return ''.join(rv).strip()
|
||||
|
||||
|
||||
# Extra characters outside of alphanumerics that we'll allow.
|
||||
SLUG_OK = '-_~'
|
||||
|
||||
|
||||
def slugify(s, ok=SLUG_OK, lower=True, spaces=False, only_ascii=False, space_replacement='-'):
|
||||
"""
|
||||
Creates a unicode slug for given string with several options.
|
||||
|
||||
L and N signify letter/number.
|
||||
http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
|
||||
|
||||
:param s: Your unicode string.
|
||||
:param ok: Extra characters outside of alphanumerics to be allowed.
|
||||
Default is '-_~'
|
||||
:param lower: Lower the output string.
|
||||
Default is True
|
||||
:param spaces: True allows spaces, False replaces a space with the "space_replacement" param
|
||||
:param only_ascii: True to replace non-ASCII unicode characters with
|
||||
their ASCII representations.
|
||||
:param space_replacement: Char used to replace spaces if "spaces" is False.
|
||||
Default is dash ("-") or first char in ok if dash not allowed
|
||||
:type s: String
|
||||
:type ok: String
|
||||
:type lower: Bool
|
||||
:type spaces: Bool
|
||||
:type only_ascii: Bool
|
||||
:type space_replacement: String
|
||||
:return: Slugified unicode string
|
||||
|
||||
"""
|
||||
|
||||
if only_ascii and ok != SLUG_OK and hasattr(ok, 'decode'):
|
||||
try:
|
||||
ok.decode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
raise ValueError(('You can not use "only_ascii=True" with '
|
||||
'a non ascii available chars in "ok" ("%s" given)') % ok)
|
||||
|
||||
new = _sanitize(unicodedata.normalize('NFKC', smart_text(s)), ok)
|
||||
if only_ascii:
|
||||
new = _sanitize(smart_text(unidecode(new)), ok)
|
||||
if not spaces:
|
||||
if space_replacement and space_replacement not in ok:
|
||||
space_replacement = ok[0] if ok else ''
|
||||
new = re.sub('[%s\s]+' % space_replacement, space_replacement, new)
|
||||
if lower:
|
||||
new = new.lower()
|
||||
|
||||
return new
|
|
@ -1,10 +1,10 @@
|
|||
--index-url https://pypi.python.org/simple/
|
||||
|
||||
beautifulsoup4==4.6.0
|
||||
beautifulsoup4==4.9.3
|
||||
demjson==2.2.4
|
||||
docopt==0.6.2
|
||||
mutagen==1.38
|
||||
requests==2.18.4
|
||||
mutagen==1.45.1
|
||||
requests==2.25.1
|
||||
unicode-slugify==0.1.3
|
||||
mock==2.0.0
|
||||
chardet==3.0.4
|
||||
mock==4.0.3
|
||||
chardet==4.0.0
|
||||
|
|
Loading…
Reference in New Issue