python: optimize skipEverything()

Most of the time there's no start of a string which means all the 10 strcmp()s are done for every character of the input. This is very expensive: before this patch this function alone takes 55% of the parser time. When comparing by character (and avoiding further comparison if the first character doesn't match), this function takes only 11% of the parser time so the performance of the parser nearly doubles. In addition check for the "rb" prefix which is possible in Python 3. Ported from universal-ctags.
2015-06-25 22:10:32 +02:00 · 2015-06-25 22:10:32 +02:00 · 6781ab30c5
commit 6781ab30c5
parent f427a3a6e1
1 changed files with 20 additions and 13 deletions
--- a/tagmanager/ctags/python.c
+++ b/tagmanager/ctags/python.c
@ -244,20 +244,27 @@ static const char *skipEverything (const char *cp)
 			match = 1;

 		/* these checks find unicode, binary (Python 3) and raw strings */
-		if (!match && (
-			!strncasecmp(cp, "u'", 2) || !strncasecmp(cp, "u\"", 2) ||
-			!strncasecmp(cp, "r'", 2) || !strncasecmp(cp, "r\"", 2) ||
-			!strncasecmp(cp, "b'", 2) || !strncasecmp(cp, "b\"", 2)))
+		if (!match)
+		{
+			boolean r_first = (*cp == 'r' || *cp == 'R');
+
+			/* "r" | "R" | "u" | "U" | "b" | "B" */
+			if (r_first || *cp == 'u' || *cp == 'U' ||  *cp == 'b' || *cp == 'B')
+			{
+				unsigned int i = 1;
+
+				/*  r_first -> "rb" | "rB" | "Rb" | "RB"
+				   !r_first -> "ur" | "UR" | "Ur" | "uR" | "br" | "Br" | "bR" | "BR" */
+				if (( r_first && (cp[i] == 'b' || cp[i] == 'B')) ||
+					(!r_first && (cp[i] == 'r' || cp[i] == 'R')))
+					i++;
+
+				if (cp[i] == '\'' || cp[i] == '"')
 				{
 					match = 1;
-			cp += 1;
+					cp += i;
+				}
 			}
-		if (!match && (
-			!strncasecmp(cp, "ur'", 3) || !strncasecmp(cp, "ur\"", 3) ||
-			!strncasecmp(cp, "br'", 3) || !strncasecmp(cp, "br\"", 3)))
-		{
-			match = 1;
-			cp += 2;
 		}
 		if (match)
 		{