Exposed slugify args
Enhanced flexibility when naming output files: Example: `--template="%{artist}/%{album}/%{track}-%{title}" --space-char="_"` Would produce "scene" style filenames. Also added is the ability to: - Retain upper case letters - Leave spaces intact - Convert characters to ASCII (北京 -> beijing) - Allow additional characters like `()[];` etcmaster
parent
0b9ce91621
commit
f3e91a7d4d
|
@ -45,3 +45,5 @@ bandcamp_dl/asyncdownloader.py
|
||||||
*.log
|
*.log
|
||||||
|
|
||||||
bandcamp_dl/__init__\.py
|
bandcamp_dl/__init__\.py
|
||||||
|
|
||||||
|
venv/
|
||||||
|
|
11
README.rst
11
README.rst
|
@ -79,6 +79,13 @@ Options
|
||||||
-g --group Use album/track Label as iTunes grouping.
|
-g --group Use album/track Label as iTunes grouping.
|
||||||
-r --embed-art Embed album art (If available)
|
-r --embed-art Embed album art (If available)
|
||||||
-y --no-slugify Disable slugification of track, album, and artist names.
|
-y --no-slugify Disable slugification of track, album, and artist names.
|
||||||
|
-c --ok-chars=<chars> Specify allowed chars in slugify.
|
||||||
|
[default: -_~]
|
||||||
|
-s --space-char=<char> Specify the char to use in place of spaces.
|
||||||
|
[default: -]
|
||||||
|
-a --ascii-only Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
|
||||||
|
-k --keep-spaces Retain whitespace in filenames
|
||||||
|
-u --keep-upper Retain uppercase letters in filenames
|
||||||
|
|
||||||
Filename Template
|
Filename Template
|
||||||
-----------------
|
-----------------
|
||||||
|
@ -170,9 +177,9 @@ Dependencies
|
||||||
- `BeautifulSoup4 <https://pypi.python.org/pypi/beautifulsoup4>`_ - HTML Parsing
|
- `BeautifulSoup4 <https://pypi.python.org/pypi/beautifulsoup4>`_ - HTML Parsing
|
||||||
- `Demjson <https://pypi.python.org/pypi/demjson>`_- JavaScript dict to JSON conversion
|
- `Demjson <https://pypi.python.org/pypi/demjson>`_- JavaScript dict to JSON conversion
|
||||||
- `Mutagen <https://pypi.python.org/pypi/mutagen>`_ - ID3 Encoding
|
- `Mutagen <https://pypi.python.org/pypi/mutagen>`_ - ID3 Encoding
|
||||||
- `Requests <https://pypi.python.org/pypi/requests>`_ - for retriving the HTML
|
- `Requests <https://pypi.python.org/pypi/requests>`_ - for retrieving the HTML
|
||||||
- `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ - A slug generator that turns strings into unicode slugs.
|
- `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ - A slug generator that turns strings into unicode slugs.
|
||||||
- `Chardet <https://pypi.python.org/pypi/chardet>`_ - Charecter encoding detection
|
- `Chardet <https://pypi.python.org/pypi/chardet>`_ - Character encoding detection
|
||||||
- `Docopt <https://pypi.python.org/pypi/docopt>`_ - CLI help
|
- `Docopt <https://pypi.python.org/pypi/docopt>`_ - CLI help
|
||||||
- `Six <https://pypi.python.org/pypi/six>`_ - Python 2-3 compatibility
|
- `Six <https://pypi.python.org/pypi/six>`_ - Python 2-3 compatibility
|
||||||
- `Unidecode <https://pypi.python.org/pypi/unidecode>`_ - ASCII representation of Unicode text
|
- `Unidecode <https://pypi.python.org/pypi/unidecode>`_ - ASCII representation of Unicode text
|
||||||
|
|
|
@ -23,6 +23,14 @@ Options:
|
||||||
-g --group Use album/track Label as iTunes grouping.
|
-g --group Use album/track Label as iTunes grouping.
|
||||||
-r --embed-art Embed album art (If available)
|
-r --embed-art Embed album art (If available)
|
||||||
-y --no-slugify Disable slugification of track, album, and artist names.
|
-y --no-slugify Disable slugification of track, album, and artist names.
|
||||||
|
-c --ok-chars=<chars> Specify allowed chars in slugify.
|
||||||
|
[default: -_~]
|
||||||
|
-s --space-char=<char> Specify the char to use in place of spaces.
|
||||||
|
[default: -]
|
||||||
|
-a --ascii-only Only allow ASCII chars (北京 (capital of china) -> bei-jing-capital-of-china)
|
||||||
|
-k --keep-spaces Retain whitespace in filenames
|
||||||
|
-u --keep-upper Retain uppercase letters in filenames
|
||||||
|
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
Coded by:
|
Coded by:
|
||||||
|
@ -87,7 +95,7 @@ def main():
|
||||||
for url in urls:
|
for url in urls:
|
||||||
logging.debug("\n\tURL: {}".format(url))
|
logging.debug("\n\tURL: {}".format(url))
|
||||||
# url is now a list of URLs. So lets make an albumList and append each parsed album to it.
|
# url is now a list of URLs. So lets make an albumList and append each parsed album to it.
|
||||||
albumList = [];
|
albumList = []
|
||||||
for url in urls:
|
for url in urls:
|
||||||
albumList.append(bandcamp.parse(url, not arguments['--no-art'], arguments['--embed-lyrics'], arguments['--debug']))
|
albumList.append(bandcamp.parse(url, not arguments['--no-art'], arguments['--embed-lyrics'], arguments['--debug']))
|
||||||
|
|
||||||
|
@ -97,15 +105,17 @@ def main():
|
||||||
for album in albumList:
|
for album in albumList:
|
||||||
if arguments['--full-album'] and not album['full']:
|
if arguments['--full-album'] and not album['full']:
|
||||||
print("Full album not available. Skipping ", album['title'], " ...")
|
print("Full album not available. Skipping ", album['title'], " ...")
|
||||||
albumList.remove(album) #Remove not-full albums BUT continue with the rest of the albums.
|
albumList.remove(album) # Remove not-full albums BUT continue with the rest of the albums.
|
||||||
|
|
||||||
if arguments['URL'] or arguments['--artist']:
|
if arguments['URL'] or arguments['--artist']:
|
||||||
logging.debug("Preparing download process..")
|
logging.debug("Preparing download process..")
|
||||||
for album in albumList:
|
for album in albumList:
|
||||||
bandcamp_downloader = BandcampDownloader(arguments['--template'], basedir, arguments['--overwrite'],
|
bandcamp_downloader = BandcampDownloader(arguments['--template'], basedir, arguments['--overwrite'],
|
||||||
arguments['--embed-lyrics'], arguments['--group'],
|
arguments['--embed-lyrics'], arguments['--group'],
|
||||||
arguments['--embed-art'], arguments['--no-slugify'],
|
arguments['--embed-art'], arguments['--no-slugify'],
|
||||||
arguments['--debug'], album['url'])
|
arguments['--ok-chars'], arguments['--space-char'],
|
||||||
|
arguments['--ascii-only'], arguments['--keep-spaces'],
|
||||||
|
arguments['--keep-upper'], arguments['--debug'], album['url'])
|
||||||
logging.debug("Initiating download process..")
|
logging.debug("Initiating download process..")
|
||||||
bandcamp_downloader.start(album)
|
bandcamp_downloader.start(album)
|
||||||
# Add a newline to stop prompt mangling
|
# Add a newline to stop prompt mangling
|
||||||
|
|
|
@ -68,7 +68,7 @@ class Bandcamp:
|
||||||
"full": False,
|
"full": False,
|
||||||
"art": "",
|
"art": "",
|
||||||
"date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year),
|
"date": str(dt.strptime(album_release, "%d %b %Y %H:%M:%S GMT").year),
|
||||||
"url":url
|
"url": url
|
||||||
}
|
}
|
||||||
|
|
||||||
artist_url = page_json['url'].rpartition('/album/')[0]
|
artist_url = page_json['url'].rpartition('/album/')[0]
|
||||||
|
@ -84,7 +84,8 @@ class Bandcamp:
|
||||||
album['art'] = self.get_album_art()
|
album['art'] = self.get_album_art()
|
||||||
|
|
||||||
logging.debug(" Album generated..")
|
logging.debug(" Album generated..")
|
||||||
print("ALBUM URL:", album["url"])
|
logging.debug(" Album URL: {}".format(album['url']))
|
||||||
|
|
||||||
return album
|
return album
|
||||||
|
|
||||||
def get_track_lyrics(self, track_url):
|
def get_track_lyrics(self, track_url):
|
||||||
|
|
|
@ -8,7 +8,7 @@ from mutagen.id3._frames import TIT1
|
||||||
from mutagen.id3._frames import TIT2
|
from mutagen.id3._frames import TIT2
|
||||||
from mutagen.id3._frames import USLT
|
from mutagen.id3._frames import USLT
|
||||||
from mutagen.id3._frames import APIC
|
from mutagen.id3._frames import APIC
|
||||||
from slugify import slugify
|
from bandcamp_dl.utils.unicode_slugify import slugify
|
||||||
|
|
||||||
if not sys.version_info[:2] == (3, 6):
|
if not sys.version_info[:2] == (3, 6):
|
||||||
import mock
|
import mock
|
||||||
|
@ -20,7 +20,8 @@ from bandcamp_dl.utils.clean_print import print_clean
|
||||||
|
|
||||||
|
|
||||||
class BandcampDownloader:
|
class BandcampDownloader:
|
||||||
def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, debugging, urls=None):
|
def __init__(self, template, directory, overwrite, embed_lyrics, grouping, embed_art, no_slugify, ok_chars,
|
||||||
|
space_char, ascii_only, keep_space, keep_upper, debugging, urls=None):
|
||||||
"""Initialize variables we will need throughout the Class
|
"""Initialize variables we will need throughout the Class
|
||||||
|
|
||||||
:param urls: list of urls
|
:param urls: list of urls
|
||||||
|
@ -42,6 +43,11 @@ class BandcampDownloader:
|
||||||
self.embed_art = embed_art
|
self.embed_art = embed_art
|
||||||
self.embed_lyrics = embed_lyrics
|
self.embed_lyrics = embed_lyrics
|
||||||
self.no_slugify = no_slugify
|
self.no_slugify = no_slugify
|
||||||
|
self.ok_chars = ok_chars
|
||||||
|
self.space_char = space_char
|
||||||
|
self.ascii_only = ascii_only
|
||||||
|
self.keep_space = keep_space
|
||||||
|
self.keep_upper = keep_upper
|
||||||
self.debugging = debugging
|
self.debugging = debugging
|
||||||
|
|
||||||
def start(self, album: dict):
|
def start(self, album: dict):
|
||||||
|
@ -63,23 +69,33 @@ class BandcampDownloader:
|
||||||
else:
|
else:
|
||||||
self.download_album(album)
|
self.download_album(album)
|
||||||
|
|
||||||
def template_to_path(self, track: dict) -> str:
|
def template_to_path(self, track: dict, ascii_only, ok_chars, space_char, keep_space, keep_upper) -> str:
|
||||||
"""Create valid filepath based on template
|
"""Create valid filepath based on template
|
||||||
|
|
||||||
:param track: track metadata
|
:param track: track metadata
|
||||||
|
:param ok_chars: optional chars to allow
|
||||||
|
:param ascii_only: allow only ascii chars in filename
|
||||||
|
:param keep_space: retain whitespace in filename
|
||||||
|
:param keep_upper: retain uppercase chars in filename
|
||||||
|
:param space_char: char to use in place of spaces
|
||||||
:return: filepath
|
:return: filepath
|
||||||
"""
|
"""
|
||||||
logging.debug(" Generating filepath/trackname..")
|
logging.debug(" Generating filepath/trackname..")
|
||||||
path = self.template
|
path = self.template
|
||||||
|
|
||||||
|
def slugify_preset(content):
|
||||||
|
slugged = slugify(content, ok=ok_chars, only_ascii=ascii_only, spaces=keep_space, lower=not keep_upper,
|
||||||
|
space_replacement=space_char)
|
||||||
|
return slugged
|
||||||
|
|
||||||
if self.no_slugify:
|
if self.no_slugify:
|
||||||
path = path.replace("%{artist}", track['artist'])
|
path = path.replace("%{artist}", track['artist'])
|
||||||
path = path.replace("%{album}", track['album'])
|
path = path.replace("%{album}", track['album'])
|
||||||
path = path.replace("%{title}", track['title'])
|
path = path.replace("%{title}", track['title'])
|
||||||
else:
|
else:
|
||||||
path = path.replace("%{artist}", slugify(track['artist']))
|
path = path.replace("%{artist}", slugify_preset(track['artist']))
|
||||||
path = path.replace("%{album}", slugify(track['album']))
|
path = path.replace("%{album}", slugify_preset(track['album']))
|
||||||
path = path.replace("%{title}", slugify(track['title']))
|
path = path.replace("%{title}", slugify_preset(track['title']))
|
||||||
|
|
||||||
if track['track'] == "None":
|
if track['track'] == "None":
|
||||||
path = path.replace("%{track}", "Single")
|
path = path.replace("%{track}", "Single")
|
||||||
|
@ -128,7 +144,7 @@ class BandcampDownloader:
|
||||||
self.num_tracks = len(album['tracks'])
|
self.num_tracks = len(album['tracks'])
|
||||||
self.track_num = track_index + 1
|
self.track_num = track_index + 1
|
||||||
|
|
||||||
filepath = self.template_to_path(track_meta) + ".tmp"
|
filepath = self.template_to_path(track_meta, self.ascii_only, self.ok_chars, self.space_char, self.keep_space, self.keep_upper) + ".tmp"
|
||||||
filename = filepath.rsplit('/', 1)[1]
|
filename = filepath.rsplit('/', 1)[1]
|
||||||
dirname = self.create_directory(filepath)
|
dirname = self.create_directory(filepath)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import re
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
import demjson
|
import demjson
|
||||||
|
@ -37,11 +36,11 @@ class BandcampJSON:
|
||||||
js_data = self.js_to_json(script)
|
js_data = self.js_to_json(script)
|
||||||
self.json_data.append(js_data)
|
self.json_data.append(js_data)
|
||||||
|
|
||||||
def js_to_json(self, js_data):
|
@staticmethod
|
||||||
|
def js_to_json(js_data):
|
||||||
"""Convert JavaScript dictionary to JSON"""
|
"""Convert JavaScript dictionary to JSON"""
|
||||||
logging.debug(" Converting JS to JSON..")
|
logging.debug(" Converting JS to JSON..")
|
||||||
# Decode with demjson first to reformat keys and lists
|
# Decode with demjson first to reformat keys and lists
|
||||||
decoded_js = demjson.decode(js_data)
|
decoded_js = demjson.decode(js_data)
|
||||||
# Encode to make valid JSON, add to list of JSON strings
|
# Encode to make valid JSON, add to list of JSON strings
|
||||||
return demjson.encode(decoded_js)
|
return demjson.encode(decoded_js)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
Copyright (c) 2011, Mozilla Foundation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
3. Neither the name of unicode-slugify nor the names of its contributors
|
||||||
|
may be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,88 @@
|
||||||
|
# -*- coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import six
|
||||||
|
import unicodedata
|
||||||
|
from unidecode import unidecode
|
||||||
|
|
||||||
|
|
||||||
|
def smart_text(s, encoding='utf-8', errors='strict'):
|
||||||
|
if isinstance(s, six.text_type):
|
||||||
|
return s
|
||||||
|
|
||||||
|
if not isinstance(s, six.string_types):
|
||||||
|
if six.PY3:
|
||||||
|
if isinstance(s, bytes):
|
||||||
|
s = six.text_type(s, encoding, errors)
|
||||||
|
else:
|
||||||
|
s = six.text_type(s)
|
||||||
|
elif hasattr(s, '__unicode__'):
|
||||||
|
s = six.text_type(s)
|
||||||
|
else:
|
||||||
|
s = six.text_type(bytes(s), encoding, errors)
|
||||||
|
else:
|
||||||
|
s = six.text_type(s)
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize(text, ok):
|
||||||
|
rv = []
|
||||||
|
for c in text:
|
||||||
|
cat = unicodedata.category(c)[0]
|
||||||
|
if cat in 'LN' or c in ok:
|
||||||
|
rv.append(c)
|
||||||
|
elif cat == 'Z': # space
|
||||||
|
rv.append(' ')
|
||||||
|
return ''.join(rv).strip()
|
||||||
|
|
||||||
|
|
||||||
|
# Extra characters outside of alphanumerics that we'll allow.
|
||||||
|
SLUG_OK = '-_~'
|
||||||
|
|
||||||
|
|
||||||
|
def slugify(s, ok=SLUG_OK, lower=True, spaces=False, only_ascii=False, space_replacement='-'):
|
||||||
|
"""
|
||||||
|
Creates a unicode slug for given string with several options.
|
||||||
|
|
||||||
|
L and N signify letter/number.
|
||||||
|
http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
|
||||||
|
|
||||||
|
:param s: Your unicode string.
|
||||||
|
:param ok: Extra characters outside of alphanumerics to be allowed.
|
||||||
|
Default is '-_~'
|
||||||
|
:param lower: Lower the output string.
|
||||||
|
Default is True
|
||||||
|
:param spaces: True allows spaces, False replaces a space with the "space_replacement" param
|
||||||
|
:param only_ascii: True to replace non-ASCII unicode characters with
|
||||||
|
their ASCII representations.
|
||||||
|
:param space_replacement: Char used to replace spaces if "spaces" is False.
|
||||||
|
Default is dash ("-") or first char in ok if dash not allowed
|
||||||
|
:type s: String
|
||||||
|
:type ok: String
|
||||||
|
:type lower: Bool
|
||||||
|
:type spaces: Bool
|
||||||
|
:type only_ascii: Bool
|
||||||
|
:type space_replacement: String
|
||||||
|
:return: Slugified unicode string
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if only_ascii and ok != SLUG_OK and hasattr(ok, 'decode'):
|
||||||
|
try:
|
||||||
|
ok.decode('ascii')
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
raise ValueError(('You can not use "only_ascii=True" with '
|
||||||
|
'a non ascii available chars in "ok" ("%s" given)') % ok)
|
||||||
|
|
||||||
|
new = _sanitize(unicodedata.normalize('NFKC', smart_text(s)), ok)
|
||||||
|
if only_ascii:
|
||||||
|
new = _sanitize(smart_text(unidecode(new)), ok)
|
||||||
|
if not spaces:
|
||||||
|
if space_replacement and space_replacement not in ok:
|
||||||
|
space_replacement = ok[0] if ok else ''
|
||||||
|
new = re.sub('[%s\s]+' % space_replacement, space_replacement, new)
|
||||||
|
if lower:
|
||||||
|
new = new.lower()
|
||||||
|
|
||||||
|
return new
|
|
@ -1,10 +1,10 @@
|
||||||
--index-url https://pypi.python.org/simple/
|
--index-url https://pypi.python.org/simple/
|
||||||
|
|
||||||
beautifulsoup4==4.6.0
|
beautifulsoup4==4.9.3
|
||||||
demjson==2.2.4
|
demjson==2.2.4
|
||||||
docopt==0.6.2
|
docopt==0.6.2
|
||||||
mutagen==1.38
|
mutagen==1.45.1
|
||||||
requests==2.18.4
|
requests==2.25.1
|
||||||
unicode-slugify==0.1.3
|
unicode-slugify==0.1.3
|
||||||
mock==2.0.0
|
mock==4.0.3
|
||||||
chardet==3.0.4
|
chardet==4.0.0
|
||||||
|
|
Loading…
Reference in New Issue