Do not use regular expressions for special sequences

This commit is contained in:
Yevgen Muntyan 2006-04-26 13:24:57 -05:00
parent 7ea6d2f457
commit dd5dbf1b5f
2 changed files with 544 additions and 44 deletions

View File

@ -24,6 +24,11 @@
#define CHAR_IS_ASCII(ch__) ((guint8) ch__ < 128)
#define CHAR_IS_DIGIT(c__) ((c__) >= '0' && (c__) <= '9')
#define CHAR_IS_OCTAL(c__) ((c__) >= '0' && (c__) <= '7')
#define CHAR_IS_HEX(c__) (((c__) >= '0' && (c__) <= '9') || ((c__) >= 'A' && (c__) <= 'F') || ((c__) >= 'a' && (c__) <= 'f'))
#define CHAR_IS_WORD(c__) ((c__) == '_' || g_ascii_isalnum (c__))
#define CHAR_IS_SPACE(c__) ((c__) == ' ' || (c__) == '\t')
#define ASCII_TOLOWER(c__) \
(g_ascii_isupper (c__) ? (c__) - 'A' + 'a' : (c__))

View File

@ -843,8 +843,26 @@ moo_rule_include_new (MooContext *ctx)
/* Special sequences
*/
#if 0
#define ISDIGIT(c__) (c__ >= '0' && c__ <= '9')
inline static char *
find_digit (char *string,
char *limit,
char *line_start)
{
while (TRUE)
{
while (string <= limit && !CHAR_IS_DIGIT (*string))
string++;
if (string > limit)
return NULL;
if (string == line_start || !CHAR_IS_WORD (string[-1]))
return string;
}
return NULL;
}
static MooRule*
rule_int_match (MooRule *rule,
@ -852,48 +870,138 @@ rule_int_match (MooRule *rule,
MatchResult *result,
MatchFlags flags)
{
guint i;
char *limit = data->limit;
char *start = data->start;
if (flags & MATCH_START_ONLY)
limit = start;
while (start <= limit)
{
if (ISDIGIT(data->start[0]))
start = find_digit (start, limit, data->line_string);
if (!start)
return NULL;
for (i = 1; CHAR_IS_DIGIT (start[i]); ++i) ;
if (!CHAR_IS_WORD (start[i]))
{
guint i;
for (i = 1; ISDIGIT(data->start[i]); ++i) ;
result->match_start = data->start;
result->match_end = result->match_start + i;
result->match_start = start;
result->match_end = start + i;
result->match_len = i;
result->match_offset = -1;
return rule;
}
start = start + i;
}
return NULL;
}
MooRule*
moo_rule_int_new (MooRuleFlags flags,
const char *style)
{
MooRule *rule = rule_new (flags, style, rule_int_match, NULL);
g_return_val_if_fail (rule != NULL, NULL);
rule->description = g_strdup ("INT");
return rule;
}
static MooRule*
rule_float_match (MooRule *rule,
MatchData *data,
MatchResult *result,
MatchFlags flags)
{
guint i;
char *limit = data->limit;
char *start = data->start;
if (flags & MATCH_START_ONLY)
limit = start;
while (start <= limit)
{
while (start <= limit && !CHAR_IS_DIGIT (*start) && *start != '.')
start++;
if (start > limit)
return NULL;
if (*start == '.')
{
if (start > data->line_string && CHAR_IS_DIGIT (start[-1]))
{
do start++;
while (start <= limit && CHAR_IS_DIGIT (*start));
continue;
}
if (!CHAR_IS_DIGIT (start[1]))
{
start++;
continue;
}
for (i = 2; CHAR_IS_DIGIT (start[i]); ++i) ;
if (CHAR_IS_WORD (start[i]))
{
start = start + i;
continue;
}
result->match_start = start;
result->match_end = start + i;
result->match_len = i;
result->match_offset = -1;
return rule;
}
else
{
return NULL;
if (start > data->line_string && CHAR_IS_WORD (start[-1]))
{
do start++;
while (start <= limit && CHAR_IS_DIGIT (*start));
continue;
}
for (i = 1; CHAR_IS_DIGIT (start[i]); ++i) ;
if (start[i] != '.')
{
start = start + i;
continue;
}
for (i = i + 1; CHAR_IS_DIGIT (start[i]); ++i) ;
if (CHAR_IS_WORD (start[i]))
{
start = start + i;
continue;
}
result->match_start = start;
result->match_end = start + i;
result->match_len = i;
result->match_offset = -1;
return rule;
}
}
else
{
guint i;
for (i = 0; data->start[i] && !ISDIGIT(data->start[i]); ++i) ;
if (!data->start[i])
return NULL;
result->match_start = data->start + i;
for ( ; ISDIGIT(data->start[i]); ++i) ;
result->match_end = result->match_start + i;
result->match_len = result->match_end - result->match_start;
result->match_offset = -1;
return rule;
}
return NULL;
}
#endif
#define PATTERN_INT "[0-9]*"
#define PATTERN_FLOAT "[0-9]*\\.[0-9]*"
#define PATTERN_OCTAL "0[0-7]+"
#define PATTERN_HEX "0x[0-9A-Fa-f]+"
#define PATTERN_ESC_CHAR "\\\\([abefnrtv\"'?\\\\]|0[0-7]*|x[0-9A-Fa-f])"
@ -902,60 +1010,446 @@ rule_int_match (MooRule *rule,
#define PATTERN_WHITESPACE "\\s+"
MooRule*
moo_rule_int_new (MooRuleFlags flags,
const char *style)
{
return moo_rule_regex_new (PATTERN_INT, TRUE, 0, 0, flags, style);
}
MooRule*
moo_rule_float_new (MooRuleFlags flags,
const char *style)
{
return moo_rule_regex_new (PATTERN_FLOAT, TRUE, 0, 0, flags, style);
MooRule *rule = rule_new (flags, style, rule_float_match, NULL);
g_return_val_if_fail (rule != NULL, NULL);
rule->description = g_strdup ("FLOAT");
return rule;
}
static MooRule*
rule_octal_match (MooRule *rule,
MatchData *data,
MatchResult *result,
MatchFlags flags)
{
guint i;
char *limit = data->limit;
char *start = data->start;
if (flags & MATCH_START_ONLY)
limit = start;
while (start <= limit)
{
while (start <= limit && !CHAR_IS_DIGIT (*start))
start++;
if (start > limit)
return NULL;
if ((start != data->line_string && CHAR_IS_WORD (start[-1])) ||
*start != '0')
{
while (start <= limit && CHAR_IS_DIGIT (*start))
start++;
continue;
}
for (i = 1; CHAR_IS_OCTAL (start[i]); ++i) ;
if (CHAR_IS_WORD (start[i]) || i < 2)
{
start = start + i;
continue;
}
result->match_start = start;
result->match_end = start + i;
result->match_len = i;
result->match_offset = -1;
return rule;
}
return NULL;
}
MooRule*
moo_rule_octal_new (MooRuleFlags flags,
const char *style)
{
return moo_rule_regex_new (PATTERN_OCTAL, TRUE, 0, 0, flags, style);
MooRule *rule = rule_new (flags, style, rule_octal_match, NULL);
g_return_val_if_fail (rule != NULL, NULL);
rule->description = g_strdup ("OCTAL");
return rule;
}
static MooRule*
rule_hex_match (MooRule *rule,
MatchData *data,
MatchResult *result,
MatchFlags flags)
{
guint i;
char *limit = data->limit;
char *start = data->start;
if (flags & MATCH_START_ONLY)
limit = start;
while (start <= limit)
{
while (start <= limit && *start != '0')
start++;
if (start > limit)
return NULL;
if ((start != data->line_string && CHAR_IS_WORD (start[-1])) ||
(start[1] != 'x' && start[1] != 'X'))
{
start += 2;
continue;
}
for (i = 2; CHAR_IS_HEX (start[i]); ++i) ;
if (CHAR_IS_WORD (start[i]) || i < 2)
{
start = start + i;
continue;
}
result->match_start = start;
result->match_end = start + i;
result->match_len = i;
result->match_offset = -1;
return rule;
}
return NULL;
}
MooRule*
moo_rule_hex_new (MooRuleFlags flags,
const char *style)
{
return moo_rule_regex_new (PATTERN_HEX, TRUE, 0, 0, flags, style);
MooRule *rule = rule_new (flags, style, rule_hex_match, NULL);
g_return_val_if_fail (rule != NULL, NULL);
rule->description = g_strdup ("HEX");
return rule;
}
static MooRule*
rule_escaped_char_match (MooRule *rule,
MatchData *data,
MatchResult *result,
MatchFlags flags)
{
guint i;
char *limit = data->limit;
char *start = data->start;
if (flags & MATCH_START_ONLY)
limit = start;
while (start <= limit)
{
while (start <= limit && *start != '\\')
start++;
if (start > limit)
return NULL;
switch (start[1])
{
case '\\':
case 'a':
case 'b':
case 'e':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
case '\"':
case '\'':
case '?':
result->match_start = start;
result->match_end = start + 2;
result->match_len = 2;
result->match_offset = -1;
return rule;
case '0':
for (i = 2; CHAR_IS_OCTAL (start[i]); ++i) ;
result->match_start = start;
result->match_end = start + i;
result->match_len = i;
result->match_offset = -1;
return rule;
case 'x':
case 'X':
for (i = 2; CHAR_IS_HEX (start[i]); ++i) ;
result->match_start = start;
result->match_end = start + i;
result->match_len = i;
result->match_offset = -1;
return rule;
}
start++;
}
return NULL;
}
MooRule*
moo_rule_escaped_char_new (MooRuleFlags flags,
const char *style)
{
return moo_rule_regex_new (PATTERN_ESC_CHAR, TRUE, 0, 0, flags, style);
MooRule *rule = rule_new (flags, style, rule_escaped_char_match, NULL);
g_return_val_if_fail (rule != NULL, NULL);
rule->description = g_strdup ("ESCAPED CHAR");
return rule;
}
static MooRule*
rule_c_char_match (MooRule *rule,
MatchData *data,
MatchResult *result,
MatchFlags flags)
{
guint i;
char *limit = data->limit;
char *start = data->start;
if (flags & MATCH_START_ONLY)
limit = start;
while (start <= limit)
{
while (start <= limit && *start != '\'')
start++;
if (start > limit)
return NULL;
if (start[1] != '\\')
{
if (start[2] != '\'')
{
start = start + 2;
continue;
}
result->match_start = start;
result->match_end = start + 3;
result->match_len = 3;
result->match_offset = -1;
return rule;
}
switch (start[2])
{
case '\\':
case 'a':
case 'b':
case 'e':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
case '\"':
case '\'':
case '?':
if (start[3] != '\'')
{
start = start + 3;
continue;
}
result->match_start = start;
result->match_end = start + 4;
result->match_len = 4;
result->match_offset = -1;
return rule;
case '0':
for (i = 3; CHAR_IS_OCTAL (start[i]); ++i) ;
if (start[i] != '\'')
{
start = start + i;
continue;
}
result->match_start = start;
result->match_end = start + i + 1;
result->match_len = i + 1;
result->match_offset = -1;
return rule;
case 'x':
case 'X':
for (i = 3; CHAR_IS_HEX (start[i]); ++i) ;
if (start[i] != '\'')
{
start = start + i;
continue;
}
result->match_start = start;
result->match_end = start + i + 1;
result->match_len = i + 1;
result->match_offset = -1;
return rule;
}
start++;
}
return NULL;
}
MooRule*
moo_rule_c_char_new (MooRuleFlags flags,
const char *style)
{
return moo_rule_regex_new (PATTERN_C_CHAR, TRUE, 0, 0, flags, style);
MooRule *rule = rule_new (flags, style, rule_c_char_match, NULL);
g_return_val_if_fail (rule != NULL, NULL);
rule->description = g_strdup ("C CHAR");
return rule;
}
static MooRule*
rule_whitespace_match (MooRule *rule,
MatchData *data,
MatchResult *result,
G_GNUC_UNUSED MatchFlags flags)
{
guint i;
char *start = data->start;
if (!CHAR_IS_SPACE (*start))
return NULL;
for (i = 1; CHAR_IS_SPACE (start[i]); ++i) ;
result->match_start = start;
result->match_end = start + i;
result->match_len = i;
result->match_offset = -1;
return rule;
}
MooRule*
moo_rule_whitespace_new (MooRuleFlags flags,
const char *style)
{
return moo_rule_regex_new (PATTERN_WHITESPACE, TRUE, 0, 0, flags, style);
MooRule *rule = rule_new (flags, style, rule_whitespace_match, NULL);
g_return_val_if_fail (rule != NULL, NULL);
rule->description = g_strdup ("WHITESPACE");
return rule;
}
static MooRule*
rule_identifier_match (MooRule *rule,
MatchData *data,
MatchResult *result,
MatchFlags flags)
{
guint i;
char *limit = data->limit;
char *start = data->start;
if (flags & MATCH_START_ONLY)
limit = start;
while (start <= limit)
{
while (start <= limit && !CHAR_IS_WORD (*start))
start++;
if (start > limit)
return NULL;
if ((start != data->line_string && CHAR_IS_WORD (start[-1])) ||
CHAR_IS_DIGIT (*start))
{
while (start <= limit && CHAR_IS_WORD (*start))
start++;
continue;
}
for (i = 1; CHAR_IS_WORD (start[i]); ++i) ;
result->match_start = start;
result->match_end = start + i;
result->match_len = i;
result->match_offset = -1;
return rule;
}
return NULL;
}
MooRule*
moo_rule_identifier_new (MooRuleFlags flags,
const char *style)
{
return moo_rule_regex_new (PATTERN_IDENTIFIER, TRUE, 0, 0, flags, style);
MooRule *rule = rule_new (flags, style, rule_identifier_match, NULL);
g_return_val_if_fail (rule != NULL, NULL);
rule->description = g_strdup ("WHITESPACE");
return rule;
}
static MooRule*
rule_line_continue_match (MooRule *rule,
MatchData *data,
MatchResult *result,
MatchFlags flags)
{
char *limit = data->limit;
char *start;
if (flags & MATCH_START_ONLY)
limit = start;
g_assert (data->line_string_len >= 0);
if (data->line_string_len && data->line_string[data->line_string_len - 1] == '\\')
{
start = data->line_string + data->line_string_len - 1;
if (start > limit)
return NULL;
result->match_start = start;
result->match_end = start + 1;
result->match_len = 1;
result->match_offset = -1;
return rule;
}
return NULL;
}
@ -963,8 +1457,9 @@ MooRule*
moo_rule_line_continue_new (MooRuleFlags flags,
const char *style)
{
MooRule *rule = moo_rule_regex_new ("\\\\$", TRUE, 0, 0, flags, style);
MooRule *rule = rule_new (flags, style, rule_line_continue_match, NULL);
g_return_val_if_fail (rule != NULL, NULL);
rule->description = g_strdup ("LINE_CONTINUE");
rule->include_eol = TRUE;
return rule;
}