Add linear tag remove path for cases where not many files are open

When tested with 200000 LOC python file (created by making many copies of scripts/create_py_tags.py), the tm_tags_remove_file_tags() function takes about 50% of the CPU time when only this file is open. After adding the linear path to tm_tags_remove_file_tags() it takes just about 2%. See the comment in the patch for more details.
2015-06-14 17:52:24 +02:00 · 2015-06-14 17:52:24 +02:00 · e26c9ba2ce
commit e26c9ba2ce
parent 935c389f28
1 changed files with 51 additions and 25 deletions
--- a/tagmanager/src/tm_tag.c
+++ b/tagmanager/src/tm_tag.c
@ -828,37 +828,63 @@ gboolean tm_tags_sort(GPtrArray *tags_array, TMTagAttrType *sort_attributes,
 void tm_tags_remove_file_tags(TMSourceFile *source_file, GPtrArray *tags_array)
 {
 	guint i;
-	GPtrArray *to_delete = g_ptr_array_sized_new(source_file->tags_array->len);
-	
-	for (i = 0; i < source_file->tags_array->len; i++)
+
+	/* Now we choose between an algorithm with complexity O(tags_array->len) and
+	 * O(source_file->tags_array->len * log(tags_array->len)). The latter algorithm
+	 * is better when tags_array contains many times more tags than
+	 * source_file->tags_array so instead of trying to find the removed tags
+	 * linearly, binary search is used. The constant 20 is more or less random
+	 * but seems to work well. It's exact value isn't so critical because it's
+	 * the extremes where the difference is the biggest: when
+	 * source_file->tags_array->len == tags_array->len (single file open) and
+	 * source_file->tags_array->len << tags_array->len (the number of tags
+	 * from the file is a small fraction of all tags).
+	 */
+	if (source_file->tags_array->len != 0 &&
+		tags_array->len / source_file->tags_array->len < 20)
 	{
-		guint j;
-		guint tag_count;
-		TMTag **found;
-		TMTag *tag = source_file->tags_array->pdata[i];
-		
-		found = tm_tags_find(tags_array, tag->name, FALSE, TRUE, &tag_count);
-		
-		for (j = 0; j < tag_count; j++)
+		for (i = 0; i < tags_array->len; i++)
 		{
-			if (*found != NULL && (*found)->file == source_file)
-			{
-				/* we cannot set the pointer to NULL now because the search wouldn't work */
-				g_ptr_array_add(to_delete, found);
-				/* no break - if there are multiple tags of the same name, we would 
-				 * always find the first instance and wouldn't remove others; duplicates
-				 * in the to_delete list aren't a problem */
-			}
-			found++;
+			TMTag *tag = tags_array->pdata[i];
+
+			if (tag->file == source_file)
+				tags_array->pdata[i] = NULL;
 		}
 	}
-
-	for (i = 0; i < to_delete->len; i++)
+	else
 	{
-		TMTag **tag = to_delete->pdata[i];
-		*tag = NULL;
+		GPtrArray *to_delete = g_ptr_array_sized_new(source_file->tags_array->len);
+
+		for (i = 0; i < source_file->tags_array->len; i++)
+		{
+			guint j;
+			guint tag_count;
+			TMTag **found;
+			TMTag *tag = source_file->tags_array->pdata[i];
+
+			found = tm_tags_find(tags_array, tag->name, FALSE, TRUE, &tag_count);
+
+			for (j = 0; j < tag_count; j++)
+			{
+				if (*found != NULL && (*found)->file == source_file)
+				{
+					/* we cannot set the pointer to NULL now because the search wouldn't work */
+					g_ptr_array_add(to_delete, found);
+					/* no break - if there are multiple tags of the same name, we would
+					 * always find the first instance and wouldn't remove others; duplicates
+					 * in the to_delete list aren't a problem */
+				}
+				found++;
+			}
+		}
+
+		for (i = 0; i < to_delete->len; i++)
+		{
+			TMTag **tag = to_delete->pdata[i];
+			*tag = NULL;
+		}
+		g_ptr_array_free(to_delete, TRUE);
 	}
-	g_ptr_array_free(to_delete, TRUE);

 	tm_tags_prune(tags_array);
 }