utils: fix suboptimal elipsis substitution by utils_strv_shorten_file_list()

Because utils_strv_find_lcs() didn't consisider path component boundaries
it have have found substrings that are longest in itself but not so ideal
when utils_strv_shorten_file_list() applied path boundaries.

utils_strv_find_lcs() now can optionally restrict the substring between
delimiters (i.e. dir separators). In that mode it will find the longest
substring that is also sorrounded by the delimiters (there may be more
delimiters inside the string).

The unit test that demonstrated the deficient is fixed since
now the expected substitution takes place.
This commit is contained in:
Thomas Martitz 2019-08-16 14:58:09 +02:00
parent 64a32db5e1
commit 30a486d624
3 changed files with 98 additions and 34 deletions

View File

@ -2086,7 +2086,7 @@ gchar *utils_strv_find_common_prefix(gchar **strv, gssize strv_len)
* @return The common prefix that is part of all strings. * @return The common prefix that is part of all strings.
*/ */
GEANY_EXPORT_SYMBOL GEANY_EXPORT_SYMBOL
gchar *utils_strv_find_lcs(gchar **strv, gssize strv_len) gchar *utils_strv_find_lcs(gchar **strv, gssize strv_len, const gchar *delim)
{ {
gchar *first, *_sub, *sub; gchar *first, *_sub, *sub;
gsize num; gsize num;
@ -2112,8 +2112,18 @@ gchar *utils_strv_find_lcs(gchar **strv, gssize strv_len)
/* No point in continuing if the remainder is too short */ /* No point in continuing if the remainder is too short */
if (max > chars_left) if (max > chars_left)
break; break;
/* If delimiters are given, we only need to compare substrings which start and
* end with one of them, so skip any non-delim chars at front ... */
if (NZV(delim) && (strchr(delim, _sub[0]) == NULL))
continue;
for (n_chars = 1; n_chars <= chars_left; n_chars++) for (n_chars = 1; n_chars <= chars_left; n_chars++)
{ {
if (NZV(delim))
{ /* ... and advance to the next delim char at the end, if any */
if (!_sub[n_chars] || strchr(delim, _sub[n_chars]) == NULL)
continue;
n_chars += 1;
}
g_strlcpy(sub, _sub, n_chars+1); g_strlcpy(sub, _sub, n_chars+1);
found = 1; found = 1;
for (gsize i = 1; i < num; i++) for (gsize i = 1; i < num; i++)
@ -2199,7 +2209,7 @@ gchar **utils_strv_shorten_file_list(gchar **file_names, gssize file_names_len)
} }
lcs_len = 0; lcs_len = 0;
substring = (i == num) ? utils_strv_find_lcs(names, num) : NULL; substring = (i == num) ? utils_strv_find_lcs(names, num, G_DIR_SEPARATOR_S"/") : NULL;
if (NZV(substring)) if (NZV(substring))
{ {
/* Strip leading component. */ /* Strip leading component. */

View File

@ -302,7 +302,7 @@ gchar **utils_strv_join(gchar **first, gchar **second) G_GNUC_WARN_UNUSED_RESULT
gchar *utils_strv_find_common_prefix(gchar **strv, gssize strv_len) G_GNUC_WARN_UNUSED_RESULT; gchar *utils_strv_find_common_prefix(gchar **strv, gssize strv_len) G_GNUC_WARN_UNUSED_RESULT;
gchar *utils_strv_find_lcs(gchar **strv, gssize strv_len) G_GNUC_WARN_UNUSED_RESULT; gchar *utils_strv_find_lcs(gchar **strv, gssize strv_len, const gchar *delim) G_GNUC_WARN_UNUSED_RESULT;
gchar **utils_strv_shorten_file_list(gchar **file_names, gssize file_names_len); gchar **utils_strv_shorten_file_list(gchar **file_names, gssize file_names_len);

View File

@ -106,51 +106,86 @@ static void test_utils_strv_find_common_prefix(void)
g_strfreev(data); g_strfreev(data);
} }
#define DIR_SEP "\\/"
void test_utils_strv_find_lcs(void) void test_utils_strv_find_lcs(void)
{ {
gchar **data, *s; gchar **data, *s;
s = utils_strv_find_lcs(NULL, 0); s = utils_strv_find_lcs(NULL, 0, "");
g_assert_null(s); g_assert_null(s);
data = utils_strv_new("", NULL); data = utils_strv_new("", NULL);
s = utils_strv_find_lcs(data, -1); s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, ""); g_assert_cmpstr(s, ==, "");
g_free(s); g_free(s);
g_strfreev(data); g_strfreev(data);
data = utils_strv_new("1", "2", "3", NULL); data = utils_strv_new("1", "2", "3", NULL);
s = utils_strv_find_lcs(data, -1); s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "");
g_free(s);
s = utils_strv_find_lcs(data, -1, DIR_SEP);
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, ""); g_assert_cmpstr(s, ==, "");
g_free(s); g_free(s);
g_strfreev(data); g_strfreev(data);
data = utils_strv_new("abc", "amn", "axy", NULL); data = utils_strv_new("abc", "amn", "axy", NULL);
s = utils_strv_find_lcs(data, -1); s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "a"); g_assert_cmpstr(s, ==, "a");
g_free(s); g_free(s);
s = utils_strv_find_lcs(data, -1, DIR_SEP);
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "");
g_free(s);
g_strfreev(data);
data = utils_strv_new("bca", "mna", "xya", NULL);
s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "a");
g_free(s);
s = utils_strv_find_lcs(data, -1, DIR_SEP);
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "");
g_free(s);
g_strfreev(data); g_strfreev(data);
data = utils_strv_new("abc", "", "axy", NULL); data = utils_strv_new("abc", "", "axy", NULL);
s = utils_strv_find_lcs(data, -1); s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "");
g_free(s);
s = utils_strv_find_lcs(data, -1, DIR_SEP);
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "");
g_free(s);
g_strfreev(data);
data = utils_strv_new("a123b", "b123c", "c123d", NULL);
s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "123");
g_free(s);
s = utils_strv_find_lcs(data, -1, DIR_SEP);
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, ""); g_assert_cmpstr(s, ==, "");
g_free(s); g_free(s);
g_strfreev(data); g_strfreev(data);
data = utils_strv_new("22", "23", "33", NULL); data = utils_strv_new("22", "23", "33", NULL);
s = utils_strv_find_lcs(data, 1); s = utils_strv_find_lcs(data, 1, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "22"); g_assert_cmpstr(s, ==, "22");
g_free(s); g_free(s);
s = utils_strv_find_lcs(data, 2); s = utils_strv_find_lcs(data, 2, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "2"); g_assert_cmpstr(s, ==, "2");
g_free(s); g_free(s);
s = utils_strv_find_lcs(data, 3); s = utils_strv_find_lcs(data, 3, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, ""); g_assert_cmpstr(s, ==, "");
g_free(s); g_free(s);
@ -163,48 +198,77 @@ void test_utils_strv_find_lcs(void)
"/home/user/src/geany/src/main.c", "/home/user/src/geany/src/main.c",
"/home/user/src/geany-plugins/addons/src/addons.c", "/home/user/src/geany-plugins/addons/src/addons.c",
NULL); NULL);
s = utils_strv_find_lcs(data, 4); s = utils_strv_find_lcs(data, 4, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/home/user/src/geany/src/s"); g_assert_cmpstr(s, ==, "/home/user/src/geany/src/s");
g_free(s); g_free(s);
s = utils_strv_find_lcs(data, 5); s = utils_strv_find_lcs(data, 4, DIR_SEP);
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/home/user/src/geany/src/"); g_assert_cmpstr(s, ==, "/home/user/src/geany/src/");
g_free(s); g_free(s);
s = utils_strv_find_lcs(data, -1); s = utils_strv_find_lcs(data, 5, "");
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/home/user/src/geany/src/");
g_free(s);
s = utils_strv_find_lcs(data, 5, DIR_SEP);
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/home/user/src/geany/src/");
g_free(s);
s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/home/user/src/geany"); g_assert_cmpstr(s, ==, "/home/user/src/geany");
g_free(s); g_free(s);
s = utils_strv_find_lcs(data, -1, DIR_SEP);
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/home/user/src/");
g_free(s);
g_strfreev(data); g_strfreev(data);
data = utils_strv_new("/src/a/app-1.2.3/src/lib/module/source.c", data = utils_strv_new("/src/a/app-1.2.3/src/lib/module/source.c",
"/src/b/app-2.2.3/src/module/source.c", NULL); "/src/b/app-2.2.3/src/module/source.c", NULL);
s = utils_strv_find_lcs(data, -1); s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/module/source.c"); g_assert_cmpstr(s, ==, "/module/source.c");
g_free(s); g_free(s);
s = utils_strv_find_lcs(data, -1, DIR_SEP);
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/module/");
g_free(s);
g_strfreev(data);
data = utils_strv_new("/src/a/app-1.2.3/src/lib/module\\source.c",
"/src/b/app-2.2.3/src/module\\source.c", NULL);
s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/module\\source.c");
g_free(s);
s = utils_strv_find_lcs(data, -1, DIR_SEP);
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/module\\");
g_free(s);
g_strfreev(data); g_strfreev(data);
data = utils_strv_new("/src/a/app-1.2.3/src/lib/module/", data = utils_strv_new("/src/a/app-1.2.3/src/lib/module/",
"/src/b/app-2.2.3/src/module/", NULL); "/src/b/app-2.2.3/src/module/", NULL);
s = utils_strv_find_lcs(data, -1); s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, ".2.3/src/"); g_assert_cmpstr(s, ==, ".2.3/src/");
g_free(s); g_free(s);
g_strfreev(data); s = utils_strv_find_lcs(data, -1, DIR_SEP);
data = utils_strv_new("a123b", "b123c", "c123d", NULL);
s = utils_strv_find_lcs(data, -1);
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "123"); g_assert_cmpstr(s, ==, "/module/");
g_free(s); g_free(s);
g_strfreev(data); g_strfreev(data);
data = utils_strv_new("/usr/local/bin/geany", "/usr/bin/geany", "/home/user/src/geany/src/geany", NULL); data = utils_strv_new("/usr/local/bin/geany", "/usr/bin/geany", "/home/user/src/geany/src/geany", NULL);
s = utils_strv_find_lcs(data, -1); s = utils_strv_find_lcs(data, -1, "");
g_assert_nonnull(s); g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "/geany"); g_assert_cmpstr(s, ==, "/geany");
g_free(s); g_free(s);
s = utils_strv_find_lcs(data, -1, DIR_SEP);
g_assert_nonnull(s);
g_assert_cmpstr(s, ==, "");
g_free(s);
g_strfreev(data); g_strfreev(data);
} }
@ -333,21 +397,11 @@ void test_utils_strv_shorten_file_list(void)
g_strfreev(result); g_strfreev(result);
g_strfreev(data); g_strfreev(data);
/* This case shows a weakness. It would be better to elipsize "module". Instead,
* nothing is elipsized as the code determines ".2.3/src/" to be the
* longest common substring. Then it reduces it to directory components, so "src" remains.
* This is shorter than the threshold of 5 chars and consequently not elipsized
*
* Plain utils_strv_find_lcs() actually finds /module/source.c,
* but utils_strv_shorten_file_list() calls it without the file name part, so
* ".2.3/src/" is longer than "/module/". This is illustrated in the test
* test_utils_strv_find_lcs()
*/
data = utils_strv_new("/src/a/app-1.2.3/src/lib/module/source.c", data = utils_strv_new("/src/a/app-1.2.3/src/lib/module/source.c",
"/src/b/app-2.2.3/src/module/source.c", NULL); "/src/b/app-2.2.3/src/module/source.c", NULL);
result = utils_strv_shorten_file_list(data, -1); result = utils_strv_shorten_file_list(data, -1);
expected = utils_strv_new("a/app-1.2.3/src/lib/module/source.c", expected = utils_strv_new("a/app-1.2.3/src/lib/.../source.c",
"b/app-2.2.3/src/module/source.c", NULL); "b/app-2.2.3/src/.../source.c", NULL);
g_assert_true(strv_eq(result, expected)); g_assert_true(strv_eq(result, expected));
g_strfreev(expected); g_strfreev(expected);
g_strfreev(result); g_strfreev(result);