implement a way to shorten filenames with east-asian characters
(#1377) Setting 'output.shorten' to "eaw" (East-Asian Width) uses a slower algorithm that also considers characters with a width > 1.
This commit is contained in:
parent
2ff2974353
commit
bd845303ad
@ -2531,6 +2531,9 @@ Description
|
||||
Controls whether the output strings should be shortened to fit
|
||||
on one console line.
|
||||
|
||||
Set this option to ``"eaw"`` to also work with east-asian characters
|
||||
with a display width greater than 1.
|
||||
|
||||
|
||||
output.skip
|
||||
-----------
|
||||
|
@ -10,6 +10,7 @@ import os
|
||||
import sys
|
||||
import shutil
|
||||
import logging
|
||||
import unicodedata
|
||||
from . import config, util
|
||||
|
||||
|
||||
@ -270,9 +271,14 @@ class PipeOutput(NullOutput):
|
||||
class TerminalOutput(NullOutput):
|
||||
|
||||
def __init__(self):
|
||||
self.short = config.get(("output",), "shorten", True)
|
||||
if self.short:
|
||||
self.width = shutil.get_terminal_size().columns - OFFSET
|
||||
shorten = config.get(("output",), "shorten", True)
|
||||
if shorten:
|
||||
func = shorten_string_eaw if shorten == "eaw" else shorten_string
|
||||
limit = shutil.get_terminal_size().columns - OFFSET
|
||||
sep = CHAR_ELLIPSIES
|
||||
self.shorten = lambda txt: func(txt, limit, sep)
|
||||
else:
|
||||
self.shorten = util.identity
|
||||
|
||||
def start(self, path):
|
||||
print(self.shorten(" " + path), end="", flush=True)
|
||||
@ -283,17 +289,6 @@ class TerminalOutput(NullOutput):
|
||||
def success(self, path, tries):
|
||||
print("\r", self.shorten(CHAR_SUCCESS + path), sep="")
|
||||
|
||||
def shorten(self, txt):
|
||||
"""Reduce the length of 'txt' to the width of the terminal"""
|
||||
if self.short and len(txt) > self.width:
|
||||
hwidth = self.width // 2 - OFFSET
|
||||
return "".join((
|
||||
txt[:hwidth-1],
|
||||
CHAR_ELLIPSIES,
|
||||
txt[-hwidth-(self.width % 2):]
|
||||
))
|
||||
return txt
|
||||
|
||||
|
||||
class ColorOutput(TerminalOutput):
|
||||
|
||||
@ -307,6 +302,56 @@ class ColorOutput(TerminalOutput):
|
||||
print("\r\033[1;32m", self.shorten(path), "\033[0m", sep="")
|
||||
|
||||
|
||||
class EAWCache(dict):
|
||||
|
||||
def __missing__(self, key):
|
||||
width = self[key] = \
|
||||
2 if unicodedata.east_asian_width(key) in "WF" else 1
|
||||
return width
|
||||
|
||||
|
||||
def shorten_string(txt, limit, sep="…"):
|
||||
"""Limit width of 'txt'; assume all characters have a width of 1"""
|
||||
if len(txt) <= limit:
|
||||
return txt
|
||||
limit -= len(sep)
|
||||
return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
|
||||
|
||||
|
||||
def shorten_string_eaw(txt, limit, sep="…", cache=EAWCache()):
|
||||
"""Limit width of 'txt'; check for east-asian characters with width > 1"""
|
||||
char_widths = [cache[c] for c in txt]
|
||||
text_width = sum(char_widths)
|
||||
|
||||
if text_width <= limit:
|
||||
# no shortening required
|
||||
return txt
|
||||
|
||||
limit -= len(sep)
|
||||
if text_width == len(txt):
|
||||
# all characters have a width of 1
|
||||
return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
|
||||
|
||||
# wide characters
|
||||
left = 0
|
||||
lwidth = limit // 2
|
||||
while True:
|
||||
lwidth -= char_widths[left]
|
||||
if lwidth < 0:
|
||||
break
|
||||
left += 1
|
||||
|
||||
right = -1
|
||||
rwidth = (limit+1) // 2 + (lwidth + char_widths[left])
|
||||
while True:
|
||||
rwidth -= char_widths[right]
|
||||
if rwidth < 0:
|
||||
break
|
||||
right -= 1
|
||||
|
||||
return txt[:left] + sep + txt[right+1:]
|
||||
|
||||
|
||||
if util.WINDOWS:
|
||||
ANSI = os.environ.get("TERM") == "ANSI"
|
||||
OFFSET = 1
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
TESTS_CORE=(cache config cookies downloader extractor job oauth postprocessor text util)
|
||||
TESTS_CORE=(cache config cookies downloader extractor job oauth output postprocessor text util)
|
||||
TESTS_RESULTS=(results)
|
||||
|
||||
|
||||
|
156
test/test_output.py
Normal file
156
test/test_output.py
Normal file
@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from gallery_dl import output # noqa E402
|
||||
|
||||
|
||||
class TestShorten(unittest.TestCase):
|
||||
|
||||
def test_shorten_noop(self, f=output.shorten_string):
|
||||
self.assertEqual(f("" , 10), "")
|
||||
self.assertEqual(f("foobar", 10), "foobar")
|
||||
|
||||
def test_shorten(self, f=output.shorten_string):
|
||||
s = "01234567890123456789" # string of length 20
|
||||
self.assertEqual(f(s, 30), s)
|
||||
self.assertEqual(f(s, 25), s)
|
||||
self.assertEqual(f(s, 20), s)
|
||||
self.assertEqual(f(s, 19), "012345678…123456789")
|
||||
self.assertEqual(f(s, 18), "01234567…123456789")
|
||||
self.assertEqual(f(s, 17), "01234567…23456789")
|
||||
self.assertEqual(f(s, 16), "0123456…23456789")
|
||||
self.assertEqual(f(s, 15), "0123456…3456789")
|
||||
self.assertEqual(f(s, 14), "012345…3456789")
|
||||
self.assertEqual(f(s, 13), "012345…456789")
|
||||
self.assertEqual(f(s, 12), "01234…456789")
|
||||
self.assertEqual(f(s, 11), "01234…56789")
|
||||
self.assertEqual(f(s, 10), "0123…56789")
|
||||
self.assertEqual(f(s, 9) , "0123…6789")
|
||||
self.assertEqual(f(s, 3) , "0…9")
|
||||
self.assertEqual(f(s, 2) , "…9")
|
||||
|
||||
def test_shorten_separator(self, f=output.shorten_string):
|
||||
s = "01234567890123456789" # string of length 20
|
||||
self.assertEqual(f(s, 20, "|---|"), s)
|
||||
self.assertEqual(f(s, 19, "|---|"), "0123456|---|3456789")
|
||||
self.assertEqual(f(s, 15, "|---|"), "01234|---|56789")
|
||||
self.assertEqual(f(s, 10, "|---|"), "01|---|789")
|
||||
|
||||
self.assertEqual(f(s, 19, "..."), "01234567...23456789")
|
||||
self.assertEqual(f(s, 19, "..") , "01234567..123456789")
|
||||
self.assertEqual(f(s, 19, ".") , "012345678.123456789")
|
||||
self.assertEqual(f(s, 19, "") , "0123456780123456789")
|
||||
|
||||
|
||||
class TestShortenEAW(unittest.TestCase):
|
||||
|
||||
def test_shorten_eaw_noop(self, f=output.shorten_string_eaw):
|
||||
self.assertEqual(f("" , 10), "")
|
||||
self.assertEqual(f("foobar", 10), "foobar")
|
||||
|
||||
def test_shorten_eaw(self, f=output.shorten_string_eaw):
|
||||
s = "01234567890123456789" # 20 ascii characters
|
||||
self.assertEqual(f(s, 30), s)
|
||||
self.assertEqual(f(s, 25), s)
|
||||
self.assertEqual(f(s, 20), s)
|
||||
self.assertEqual(f(s, 19), "012345678…123456789")
|
||||
self.assertEqual(f(s, 18), "01234567…123456789")
|
||||
self.assertEqual(f(s, 17), "01234567…23456789")
|
||||
self.assertEqual(f(s, 16), "0123456…23456789")
|
||||
self.assertEqual(f(s, 15), "0123456…3456789")
|
||||
self.assertEqual(f(s, 14), "012345…3456789")
|
||||
self.assertEqual(f(s, 13), "012345…456789")
|
||||
self.assertEqual(f(s, 12), "01234…456789")
|
||||
self.assertEqual(f(s, 11), "01234…56789")
|
||||
self.assertEqual(f(s, 10), "0123…56789")
|
||||
self.assertEqual(f(s, 9) , "0123…6789")
|
||||
self.assertEqual(f(s, 3) , "0…9")
|
||||
self.assertEqual(f(s, 2) , "…9")
|
||||
|
||||
def test_shorten_eaw_wide(self, f=output.shorten_string_eaw):
|
||||
s = "幻想郷幻想郷幻想郷幻想郷" # 12 wide characters
|
||||
self.assertEqual(f(s, 30), s)
|
||||
self.assertEqual(f(s, 25), s)
|
||||
self.assertEqual(f(s, 20), "幻想郷幻…想郷幻想郷")
|
||||
self.assertEqual(f(s, 19), "幻想郷幻…想郷幻想郷")
|
||||
self.assertEqual(f(s, 18), "幻想郷幻…郷幻想郷")
|
||||
self.assertEqual(f(s, 17), "幻想郷幻…郷幻想郷")
|
||||
self.assertEqual(f(s, 16), "幻想郷…郷幻想郷")
|
||||
self.assertEqual(f(s, 15), "幻想郷…郷幻想郷")
|
||||
self.assertEqual(f(s, 14), "幻想郷…幻想郷")
|
||||
self.assertEqual(f(s, 13), "幻想郷…幻想郷")
|
||||
self.assertEqual(f(s, 12), "幻想…幻想郷")
|
||||
self.assertEqual(f(s, 11), "幻想…幻想郷")
|
||||
self.assertEqual(f(s, 10), "幻想…想郷")
|
||||
self.assertEqual(f(s, 9) , "幻想…想郷")
|
||||
self.assertEqual(f(s, 3) , "…郷")
|
||||
|
||||
def test_shorten_eaw_mix(self, f=output.shorten_string_eaw):
|
||||
s = "幻-想-郷##幻-想-郷##幻-想-郷" # mixed characters
|
||||
self.assertEqual(f(s, 28), s)
|
||||
self.assertEqual(f(s, 25), "幻-想-郷##幻…郷##幻-想-郷")
|
||||
|
||||
self.assertEqual(f(s, 20), "幻-想-郷#…##幻-想-郷")
|
||||
self.assertEqual(f(s, 19), "幻-想-郷#…#幻-想-郷")
|
||||
self.assertEqual(f(s, 18), "幻-想-郷…#幻-想-郷")
|
||||
self.assertEqual(f(s, 17), "幻-想-郷…幻-想-郷")
|
||||
self.assertEqual(f(s, 16), "幻-想-…#幻-想-郷")
|
||||
self.assertEqual(f(s, 15), "幻-想-…幻-想-郷")
|
||||
self.assertEqual(f(s, 14), "幻-想-…-想-郷")
|
||||
self.assertEqual(f(s, 13), "幻-想-…-想-郷")
|
||||
self.assertEqual(f(s, 12), "幻-想…-想-郷")
|
||||
self.assertEqual(f(s, 11), "幻-想…想-郷")
|
||||
self.assertEqual(f(s, 10), "幻-…-想-郷")
|
||||
self.assertEqual(f(s, 9) , "幻-…想-郷")
|
||||
self.assertEqual(f(s, 3) , "…郷")
|
||||
|
||||
def test_shorten_eaw_separator(self, f=output.shorten_string_eaw):
|
||||
s = "01234567890123456789" # 20 ascii characters
|
||||
self.assertEqual(f(s, 20, "|---|"), s)
|
||||
self.assertEqual(f(s, 19, "|---|"), "0123456|---|3456789")
|
||||
self.assertEqual(f(s, 15, "|---|"), "01234|---|56789")
|
||||
self.assertEqual(f(s, 10, "|---|"), "01|---|789")
|
||||
|
||||
self.assertEqual(f(s, 19, "..."), "01234567...23456789")
|
||||
self.assertEqual(f(s, 19, "..") , "01234567..123456789")
|
||||
self.assertEqual(f(s, 19, ".") , "012345678.123456789")
|
||||
self.assertEqual(f(s, 19, "") , "0123456780123456789")
|
||||
|
||||
def test_shorten_eaw_separator_wide(self, f=output.shorten_string_eaw):
|
||||
s = "幻想郷幻想郷幻想郷幻想郷" # 12 wide characters
|
||||
self.assertEqual(f(s, 24, "|---|"), s)
|
||||
self.assertEqual(f(s, 19, "|---|"), "幻想郷|---|郷幻想郷")
|
||||
self.assertEqual(f(s, 15, "|---|"), "幻想|---|幻想郷")
|
||||
self.assertEqual(f(s, 10, "|---|"), "幻|---|郷")
|
||||
|
||||
self.assertEqual(f(s, 19, "..."), "幻想郷幻...郷幻想郷")
|
||||
self.assertEqual(f(s, 19, "..") , "幻想郷幻..郷幻想郷")
|
||||
self.assertEqual(f(s, 19, ".") , "幻想郷幻.想郷幻想郷")
|
||||
self.assertEqual(f(s, 19, "") , "幻想郷幻想郷幻想郷")
|
||||
|
||||
def test_shorten_eaw_separator_mix_(self, f=output.shorten_string_eaw):
|
||||
s = "幻-想-郷##幻-想-郷##幻-想-郷" # mixed characters
|
||||
self.assertEqual(f(s, 30, "|---|"), s)
|
||||
self.assertEqual(f(s, 19, "|---|"), "幻-想-|---|幻-想-郷")
|
||||
self.assertEqual(f(s, 15, "|---|"), "幻-想|---|想-郷")
|
||||
self.assertEqual(f(s, 10, "|---|"), "幻|---|-郷")
|
||||
|
||||
self.assertEqual(f(s, 19, "..."), "幻-想-郷...幻-想-郷")
|
||||
self.assertEqual(f(s, 19, "..") , "幻-想-郷..#幻-想-郷")
|
||||
self.assertEqual(f(s, 19, ".") , "幻-想-郷#.#幻-想-郷")
|
||||
self.assertEqual(f(s, 19, "") , "幻-想-郷###幻-想-郷")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
x
Reference in New Issue
Block a user