89 lines
2.4 KiB
Python
89 lines
2.4 KiB
Python
|
# -*- coding: utf-8
|
||
|
from __future__ import unicode_literals
|
||
|
|
||
|
import re
|
||
|
import six
|
||
|
import unicodedata
|
||
|
from unidecode import unidecode
|
||
|
|
||
|
|
||
|
def smart_text(s, encoding='utf-8', errors='strict'):
|
||
|
if isinstance(s, six.text_type):
|
||
|
return s
|
||
|
|
||
|
if not isinstance(s, six.string_types):
|
||
|
if six.PY3:
|
||
|
if isinstance(s, bytes):
|
||
|
s = six.text_type(s, encoding, errors)
|
||
|
else:
|
||
|
s = six.text_type(s)
|
||
|
elif hasattr(s, '__unicode__'):
|
||
|
s = six.text_type(s)
|
||
|
else:
|
||
|
s = six.text_type(bytes(s), encoding, errors)
|
||
|
else:
|
||
|
s = six.text_type(s)
|
||
|
return s
|
||
|
|
||
|
|
||
|
def _sanitize(text, ok):
|
||
|
rv = []
|
||
|
for c in text:
|
||
|
cat = unicodedata.category(c)[0]
|
||
|
if cat in 'LN' or c in ok:
|
||
|
rv.append(c)
|
||
|
elif cat == 'Z': # space
|
||
|
rv.append(' ')
|
||
|
return ''.join(rv).strip()
|
||
|
|
||
|
|
||
|
# Extra characters outside of alphanumerics that we'll allow.
|
||
|
SLUG_OK = '-_~'
|
||
|
|
||
|
|
||
|
def slugify(s, ok=SLUG_OK, lower=True, spaces=False, only_ascii=False, space_replacement='-'):
|
||
|
"""
|
||
|
Creates a unicode slug for given string with several options.
|
||
|
|
||
|
L and N signify letter/number.
|
||
|
http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
|
||
|
|
||
|
:param s: Your unicode string.
|
||
|
:param ok: Extra characters outside of alphanumerics to be allowed.
|
||
|
Default is '-_~'
|
||
|
:param lower: Lower the output string.
|
||
|
Default is True
|
||
|
:param spaces: True allows spaces, False replaces a space with the "space_replacement" param
|
||
|
:param only_ascii: True to replace non-ASCII unicode characters with
|
||
|
their ASCII representations.
|
||
|
:param space_replacement: Char used to replace spaces if "spaces" is False.
|
||
|
Default is dash ("-") or first char in ok if dash not allowed
|
||
|
:type s: String
|
||
|
:type ok: String
|
||
|
:type lower: Bool
|
||
|
:type spaces: Bool
|
||
|
:type only_ascii: Bool
|
||
|
:type space_replacement: String
|
||
|
:return: Slugified unicode string
|
||
|
|
||
|
"""
|
||
|
|
||
|
if only_ascii and ok != SLUG_OK and hasattr(ok, 'decode'):
|
||
|
try:
|
||
|
ok.decode('ascii')
|
||
|
except UnicodeEncodeError:
|
||
|
raise ValueError(('You can not use "only_ascii=True" with '
|
||
|
'a non ascii available chars in "ok" ("%s" given)') % ok)
|
||
|
|
||
|
new = _sanitize(unicodedata.normalize('NFKC', smart_text(s)), ok)
|
||
|
if only_ascii:
|
||
|
new = _sanitize(smart_text(unidecode(new)), ok)
|
||
|
if not spaces:
|
||
|
if space_replacement and space_replacement not in ok:
|
||
|
space_replacement = ok[0] if ok else ''
|
||
|
new = re.sub('[%s\s]+' % space_replacement, space_replacement, new)
|
||
|
if lower:
|
||
|
new = new.lower()
|
||
|
|
||
|
return new
|