Add a "prefix" search for non-scope autocompletion

The main reason for separating m_workspace_find() into two parts is the
fact that when matching only the prefix, the result may contain too
many results and we need to go through all of them, return them and at the
end discard most of them.

For instance, when considering the linux kernel project with 2300000 tags
and when autocompletion is set to be invoked after typing a single character,
we get on average something like 100000 results (tag_num/alphabet_size).
But from these 100000 results, we get only the first 30 which we display
in the popup and discard the rest which means going through the list of
the 100000 tags and comparing them for no reason.

Thanks to using binary search for the start and the end of the sequence of
matching tags (added in a separate patch), we can get the start of the
sequence and the length of the sequence very quickly without going through
it.

For the prefix search we can limit the number of tags we are interested
in and go through at most this number of returned tags (to be precise,
times two, because we need to go both through the workspace array and
global tags array and remove the extras only after sorting the two).

It would be possible to combine both tm_workspace_find() and
tm_workspace_find_prefix() into a single function but the result is a bit
hard to read because some of the logic is used only in tm_workspace_find()
and some only in tm_workspace_find_prefix() so even though there is some
code duplication, I believe it's easier to understand this way.
This commit is contained in:
Jiří Techet 2016-01-15 15:46:00 +01:00
parent 5b4c6f96b2
commit ad77ee15da
3 changed files with 77 additions and 32 deletions

View File

@ -1903,7 +1903,6 @@ static gchar *find_calltip(const gchar *word, GeanyFiletype *ft)
GPtrArray *tags;
const TMTagType arg_types = tm_tag_function_t | tm_tag_prototype_t |
tm_tag_method_t | tm_tag_macro_with_arg_t;
TMTagAttrType *attrs = NULL;
TMTag *tag;
GString *str = NULL;
guint i;
@ -1911,7 +1910,7 @@ static gchar *find_calltip(const gchar *word, GeanyFiletype *ft)
g_return_val_if_fail(ft && word && *word, NULL);
/* use all types in case language uses wrong tag type e.g. python "members" instead of "methods" */
tags = tm_workspace_find(word, NULL, tm_tag_max_t, attrs, FALSE, ft->lang);
tags = tm_workspace_find(word, NULL, tm_tag_max_t, NULL, ft->lang);
if (tags->len == 0)
{
g_ptr_array_free(tags, TRUE);
@ -1925,8 +1924,7 @@ static gchar *find_calltip(const gchar *word, GeanyFiletype *ft)
{
g_ptr_array_free(tags, TRUE);
/* user typed e.g. 'new Classname(' so lookup D constructor Classname::this() */
tags = tm_workspace_find("this", tag->name,
arg_types, attrs, FALSE, ft->lang);
tags = tm_workspace_find("this", tag->name, arg_types, NULL, ft->lang);
if (tags->len == 0)
{
g_ptr_array_free(tags, TRUE);
@ -2108,7 +2106,6 @@ autocomplete_html(ScintillaObject *sci, const gchar *root, gsize rootlen)
static gboolean
autocomplete_tags(GeanyEditor *editor, const gchar *root, gsize rootlen)
{
TMTagAttrType attrs[] = { tm_tag_attr_name_t, 0 };
GPtrArray *tags;
GeanyDocument *doc;
gboolean found;
@ -2117,7 +2114,7 @@ autocomplete_tags(GeanyEditor *editor, const gchar *root, gsize rootlen)
doc = editor->document;
tags = tm_workspace_find(root, NULL, tm_tag_max_t, attrs, TRUE, doc->file_type->lang);
tags = tm_workspace_find_prefix(root, doc->file_type->lang, editor_prefs.autocompletion_max_entries);
found = tags->len > 0;
if (found)
show_tags_list(editor, tags, rootlen);

View File

@ -700,28 +700,26 @@ static gboolean langs_compatible(langType lang, langType other)
}
static guint fill_find_tags_array(GPtrArray *dst, const GPtrArray *src,
const char *name, const char *scope, TMTagType type, gboolean partial, langType lang)
static void fill_find_tags_array(GPtrArray *dst, const GPtrArray *src,
const char *name, const char *scope, TMTagType type, langType lang)
{
TMTag **matches;
guint tagIter;
guint tagCount;
TMTag **tag;
guint i, num;
if (!src || !dst || !name || !*name)
return 0;
return;
matches = tm_tags_find(src, name, partial, &tagCount);
for (tagIter = 0; tagIter < tagCount; ++tagIter)
tag = tm_tags_find(src, name, FALSE, &num);
for (i = 0; i < num; ++i)
{
if ((type & (*matches)->type) &&
langs_compatible(lang, (*matches)->lang) &&
(!scope || g_strcmp0((*matches)->scope, scope) == 0))
g_ptr_array_add(dst, *matches);
matches++;
if ((type & (*tag)->type) &&
langs_compatible(lang, (*tag)->lang) &&
(!scope || g_strcmp0((*tag)->scope, scope) == 0))
{
g_ptr_array_add(dst, *tag);
}
tag++;
}
return dst->len;
}
@ -730,18 +728,17 @@ static guint fill_find_tags_array(GPtrArray *dst, const GPtrArray *src,
@param scope The scope name of the tag to find, or NULL.
@param type The tag types to return (TMTagType). Can be a bitmask.
@param attrs The attributes to sort and dedup on (0 terminated integer array).
@param partial Whether partial match is allowed.
@param lang Specifies the language(see the table in parsers.h) of the tags to be found,
-1 for all
@return Array of matching tags.
*/
GPtrArray *tm_workspace_find(const char *name, const char *scope, TMTagType type,
TMTagAttrType *attrs, gboolean partial, langType lang)
TMTagAttrType *attrs, langType lang)
{
GPtrArray *tags = g_ptr_array_new();
fill_find_tags_array(tags, theWorkspace->tags_array, name, scope, type, partial, lang);
fill_find_tags_array(tags, theWorkspace->global_tags, name, scope, type, partial, lang);
fill_find_tags_array(tags, theWorkspace->tags_array, name, scope, type, lang);
fill_find_tags_array(tags, theWorkspace->global_tags, name, scope, type, lang);
if (attrs)
tm_tags_sort(tags, attrs, TRUE, FALSE);
@ -750,6 +747,56 @@ GPtrArray *tm_workspace_find(const char *name, const char *scope, TMTagType type
}
static void fill_find_tags_array_prefix(GPtrArray *dst, const GPtrArray *src,
const char *name, langType lang, guint max_num)
{
TMTag **tag, *last = NULL;
guint i, count, num;
if (!src || !dst || !name || !*name)
return;
num = 0;
tag = tm_tags_find(src, name, TRUE, &count);
for (i = 0; i < count && num < max_num; ++i)
{
if (langs_compatible(lang, (*tag)->lang) &&
!tm_tag_is_anon(*tag) &&
(!last || g_strcmp0(last->name, (*tag)->name) != 0))
{
g_ptr_array_add(dst, *tag);
last = *tag;
num++;
}
tag++;
}
}
/* Returns tags with the specified prefix sorted by name. If there are several
tags with the same name, only one of them appears in the resulting array.
@param prefix The prefix of the tag to find.
@param lang Specifies the language(see the table in parsers.h) of the tags to be found,
-1 for all.
@param max_num The maximum number of tags to return.
@return Array of matching tags sorted by their name.
*/
GPtrArray *tm_workspace_find_prefix(const char *prefix, langType lang, guint max_num)
{
TMTagAttrType attrs[] = { tm_tag_attr_name_t, 0 };
GPtrArray *tags = g_ptr_array_new();
fill_find_tags_array_prefix(tags, theWorkspace->tags_array, prefix, lang, max_num);
fill_find_tags_array_prefix(tags, theWorkspace->global_tags, prefix, lang, max_num);
tm_tags_sort(tags, attrs, TRUE, FALSE);
if (tags->len > max_num)
tags->len = max_num;
return tags;
}
/* Gets all members of type_tag; search them inside the all array.
* The namespace parameter determines whether we are performing the "namespace"
* search (user has typed something like "A::" where A is a type) or "scope" search
@ -839,11 +886,11 @@ find_scope_members (const GPtrArray *tags_array, const char *name, langType lang
* anon_struct_* and searching for it in the whole workspace returns
* too many (wrong) results. */
fill_find_tags_array(type_tags, tag->file->tags_array, type_name,
NULL, types, FALSE, lang);
NULL, types, lang);
typedef_struct = type_tags->len > 0;
}
if (type_tags->len == 0)
fill_find_tags_array(type_tags, tags_array, type_name, NULL, types, FALSE, lang);
fill_find_tags_array(type_tags, tags_array, type_name, NULL, types, lang);
tag = NULL;
for (j = 0; j < type_tags->len; j++)
@ -918,8 +965,7 @@ static gboolean member_at_method_scope(const GPtrArray *tags, const gchar *metho
GPtrArray *cls_tags = g_ptr_array_new();
/* check whether the class exists */
fill_find_tags_array(cls_tags, src, cls, cls_scope, TM_TYPE_WITH_MEMBERS,
FALSE, lang);
fill_find_tags_array(cls_tags, src, cls, cls_scope, TM_TYPE_WITH_MEMBERS, lang);
ret = cls_tags->len > 0;
g_ptr_array_free(cls_tags, TRUE);
}
@ -999,7 +1045,7 @@ tm_workspace_find_scope_members (TMSourceFile *source_file, const char *name,
tag_type = function_types;
/* tags corresponding to the variable/type name */
tags = tm_workspace_find(name, NULL, tag_type, NULL, FALSE, lang);
tags = tm_workspace_find(name, NULL, tag_type, NULL, lang);
/* Start searching inside the source file, continue with workspace tags and
* end with global tags. This way we find the "closest" tag to the current

View File

@ -56,7 +56,9 @@ gboolean tm_workspace_create_global_tags(const char *pre_process, const char **i
int includes_count, const char *tags_file, int lang);
GPtrArray *tm_workspace_find(const char *name, const char *scope, TMTagType type,
TMTagAttrType *attrs, gboolean partial, langType lang);
TMTagAttrType *attrs, langType lang);
GPtrArray *tm_workspace_find_prefix(const char *prefix, langType lang, guint max_num);
GPtrArray *tm_workspace_find_scope_members (TMSourceFile *source_file, const char *name,
gboolean function, gboolean member, const gchar *current_scope);