python: optimize skipEverything()

Most of the time there's no start of a string which means all the 10
strcmp()s are done for every character of the input. This is very expensive:
before this patch this function alone takes 55% of the parser time.
When comparing by character (and avoiding further comparison if the first
character doesn't match), this function takes only 11% of the parser time
so the performance of the parser nearly doubles.

In addition check for the "rb" prefix which is possible in Python 3.

Ported from universal-ctags.
This commit is contained in:
Jiří Techet 2015-06-25 22:10:32 +02:00
parent f427a3a6e1
commit 6781ab30c5

View File

@ -244,20 +244,27 @@ static const char *skipEverything (const char *cp)
match = 1;
/* these checks find unicode, binary (Python 3) and raw strings */
if (!match && (
!strncasecmp(cp, "u'", 2) || !strncasecmp(cp, "u\"", 2) ||
!strncasecmp(cp, "r'", 2) || !strncasecmp(cp, "r\"", 2) ||
!strncasecmp(cp, "b'", 2) || !strncasecmp(cp, "b\"", 2)))
if (!match)
{
match = 1;
cp += 1;
}
if (!match && (
!strncasecmp(cp, "ur'", 3) || !strncasecmp(cp, "ur\"", 3) ||
!strncasecmp(cp, "br'", 3) || !strncasecmp(cp, "br\"", 3)))
{
match = 1;
cp += 2;
boolean r_first = (*cp == 'r' || *cp == 'R');
/* "r" | "R" | "u" | "U" | "b" | "B" */
if (r_first || *cp == 'u' || *cp == 'U' || *cp == 'b' || *cp == 'B')
{
unsigned int i = 1;
/* r_first -> "rb" | "rB" | "Rb" | "RB"
!r_first -> "ur" | "UR" | "Ur" | "uR" | "br" | "Br" | "bR" | "BR" */
if (( r_first && (cp[i] == 'b' || cp[i] == 'B')) ||
(!r_first && (cp[i] == 'r' || cp[i] == 'R')))
i++;
if (cp[i] == '\'' || cp[i] == '"')
{
match = 1;
cp += i;
}
}
}
if (match)
{