From dd5dbf1b5fff2e3504b31a587bfb1665ecd8c2b1 Mon Sep 17 00:00:00 2001 From: Yevgen Muntyan <17531749+muntyan@users.noreply.github.com> Date: Wed, 26 Apr 2006 13:24:57 -0500 Subject: [PATCH] Do not use regular expressions for special sequences --- moo/mooedit/moolang-aux.h | 5 + moo/mooedit/moolang-rules.c | 583 +++++++++++++++++++++++++++++++++--- 2 files changed, 544 insertions(+), 44 deletions(-) diff --git a/moo/mooedit/moolang-aux.h b/moo/mooedit/moolang-aux.h index e5e4c1fc..96894aca 100644 --- a/moo/mooedit/moolang-aux.h +++ b/moo/mooedit/moolang-aux.h @@ -24,6 +24,11 @@ #define CHAR_IS_ASCII(ch__) ((guint8) ch__ < 128) +#define CHAR_IS_DIGIT(c__) ((c__) >= '0' && (c__) <= '9') +#define CHAR_IS_OCTAL(c__) ((c__) >= '0' && (c__) <= '7') +#define CHAR_IS_HEX(c__) (((c__) >= '0' && (c__) <= '9') || ((c__) >= 'A' && (c__) <= 'F') || ((c__) >= 'a' && (c__) <= 'f')) +#define CHAR_IS_WORD(c__) ((c__) == '_' || g_ascii_isalnum (c__)) +#define CHAR_IS_SPACE(c__) ((c__) == ' ' || (c__) == '\t') #define ASCII_TOLOWER(c__) \ (g_ascii_isupper (c__) ? (c__) - 'A' + 'a' : (c__)) diff --git a/moo/mooedit/moolang-rules.c b/moo/mooedit/moolang-rules.c index eabe5c6b..48149c37 100644 --- a/moo/mooedit/moolang-rules.c +++ b/moo/mooedit/moolang-rules.c @@ -843,8 +843,26 @@ moo_rule_include_new (MooContext *ctx) /* Special sequences */ -#if 0 -#define ISDIGIT(c__) (c__ >= '0' && c__ <= '9') +inline static char * +find_digit (char *string, + char *limit, + char *line_start) +{ + while (TRUE) + { + while (string <= limit && !CHAR_IS_DIGIT (*string)) + string++; + + if (string > limit) + return NULL; + + if (string == line_start || !CHAR_IS_WORD (string[-1])) + return string; + } + + return NULL; +} + static MooRule* rule_int_match (MooRule *rule, @@ -852,48 +870,138 @@ rule_int_match (MooRule *rule, MatchResult *result, MatchFlags flags) { + guint i; + char *limit = data->limit; + char *start = data->start; + if (flags & MATCH_START_ONLY) + limit = start; + + while (start <= limit) { - if (ISDIGIT(data->start[0])) + start = find_digit (start, limit, data->line_string); + + if (!start) + return NULL; + + for (i = 1; CHAR_IS_DIGIT (start[i]); ++i) ; + + if (!CHAR_IS_WORD (start[i])) { - guint i; - for (i = 1; ISDIGIT(data->start[i]); ++i) ; - result->match_start = data->start; - result->match_end = result->match_start + i; + result->match_start = start; + result->match_end = start + i; + result->match_len = i; + result->match_offset = -1; + return rule; + } + + start = start + i; + } + + return NULL; +} + + +MooRule* +moo_rule_int_new (MooRuleFlags flags, + const char *style) +{ + MooRule *rule = rule_new (flags, style, rule_int_match, NULL); + g_return_val_if_fail (rule != NULL, NULL); + + rule->description = g_strdup ("INT"); + + return rule; +} + + +static MooRule* +rule_float_match (MooRule *rule, + MatchData *data, + MatchResult *result, + MatchFlags flags) +{ + guint i; + char *limit = data->limit; + char *start = data->start; + + if (flags & MATCH_START_ONLY) + limit = start; + + while (start <= limit) + { + while (start <= limit && !CHAR_IS_DIGIT (*start) && *start != '.') + start++; + + if (start > limit) + return NULL; + + if (*start == '.') + { + if (start > data->line_string && CHAR_IS_DIGIT (start[-1])) + { + do start++; + while (start <= limit && CHAR_IS_DIGIT (*start)); + continue; + } + + if (!CHAR_IS_DIGIT (start[1])) + { + start++; + continue; + } + + for (i = 2; CHAR_IS_DIGIT (start[i]); ++i) ; + + if (CHAR_IS_WORD (start[i])) + { + start = start + i; + continue; + } + + result->match_start = start; + result->match_end = start + i; result->match_len = i; result->match_offset = -1; return rule; } else { - return NULL; + if (start > data->line_string && CHAR_IS_WORD (start[-1])) + { + do start++; + while (start <= limit && CHAR_IS_DIGIT (*start)); + continue; + } + + for (i = 1; CHAR_IS_DIGIT (start[i]); ++i) ; + + if (start[i] != '.') + { + start = start + i; + continue; + } + + for (i = i + 1; CHAR_IS_DIGIT (start[i]); ++i) ; + + if (CHAR_IS_WORD (start[i])) + { + start = start + i; + continue; + } + + result->match_start = start; + result->match_end = start + i; + result->match_len = i; + result->match_offset = -1; + return rule; } } - else - { - guint i; - for (i = 0; data->start[i] && !ISDIGIT(data->start[i]); ++i) ; - - if (!data->start[i]) - return NULL; - - result->match_start = data->start + i; - - for ( ; ISDIGIT(data->start[i]); ++i) ; - - result->match_end = result->match_start + i; - result->match_len = result->match_end - result->match_start; - result->match_offset = -1; - - return rule; - } + return NULL; } -#endif -#define PATTERN_INT "[0-9]*" -#define PATTERN_FLOAT "[0-9]*\\.[0-9]*" #define PATTERN_OCTAL "0[0-7]+" #define PATTERN_HEX "0x[0-9A-Fa-f]+" #define PATTERN_ESC_CHAR "\\\\([abefnrtv\"'?\\\\]|0[0-7]*|x[0-9A-Fa-f])" @@ -902,60 +1010,446 @@ rule_int_match (MooRule *rule, #define PATTERN_WHITESPACE "\\s+" -MooRule* -moo_rule_int_new (MooRuleFlags flags, - const char *style) -{ - return moo_rule_regex_new (PATTERN_INT, TRUE, 0, 0, flags, style); -} - MooRule* moo_rule_float_new (MooRuleFlags flags, const char *style) { - return moo_rule_regex_new (PATTERN_FLOAT, TRUE, 0, 0, flags, style); + MooRule *rule = rule_new (flags, style, rule_float_match, NULL); + g_return_val_if_fail (rule != NULL, NULL); + + rule->description = g_strdup ("FLOAT"); + + return rule; } + +static MooRule* +rule_octal_match (MooRule *rule, + MatchData *data, + MatchResult *result, + MatchFlags flags) +{ + guint i; + char *limit = data->limit; + char *start = data->start; + + if (flags & MATCH_START_ONLY) + limit = start; + + while (start <= limit) + { + while (start <= limit && !CHAR_IS_DIGIT (*start)) + start++; + + if (start > limit) + return NULL; + + if ((start != data->line_string && CHAR_IS_WORD (start[-1])) || + *start != '0') + { + while (start <= limit && CHAR_IS_DIGIT (*start)) + start++; + continue; + } + + for (i = 1; CHAR_IS_OCTAL (start[i]); ++i) ; + + if (CHAR_IS_WORD (start[i]) || i < 2) + { + start = start + i; + continue; + } + + result->match_start = start; + result->match_end = start + i; + result->match_len = i; + result->match_offset = -1; + return rule; + } + + return NULL; +} + + MooRule* moo_rule_octal_new (MooRuleFlags flags, const char *style) { - return moo_rule_regex_new (PATTERN_OCTAL, TRUE, 0, 0, flags, style); + MooRule *rule = rule_new (flags, style, rule_octal_match, NULL); + g_return_val_if_fail (rule != NULL, NULL); + + rule->description = g_strdup ("OCTAL"); + + return rule; } + +static MooRule* +rule_hex_match (MooRule *rule, + MatchData *data, + MatchResult *result, + MatchFlags flags) +{ + guint i; + char *limit = data->limit; + char *start = data->start; + + if (flags & MATCH_START_ONLY) + limit = start; + + while (start <= limit) + { + while (start <= limit && *start != '0') + start++; + + if (start > limit) + return NULL; + + if ((start != data->line_string && CHAR_IS_WORD (start[-1])) || + (start[1] != 'x' && start[1] != 'X')) + { + start += 2; + continue; + } + + for (i = 2; CHAR_IS_HEX (start[i]); ++i) ; + + if (CHAR_IS_WORD (start[i]) || i < 2) + { + start = start + i; + continue; + } + + result->match_start = start; + result->match_end = start + i; + result->match_len = i; + result->match_offset = -1; + return rule; + } + + return NULL; +} + + MooRule* moo_rule_hex_new (MooRuleFlags flags, const char *style) { - return moo_rule_regex_new (PATTERN_HEX, TRUE, 0, 0, flags, style); + MooRule *rule = rule_new (flags, style, rule_hex_match, NULL); + g_return_val_if_fail (rule != NULL, NULL); + + rule->description = g_strdup ("HEX"); + + return rule; } + +static MooRule* +rule_escaped_char_match (MooRule *rule, + MatchData *data, + MatchResult *result, + MatchFlags flags) +{ + guint i; + char *limit = data->limit; + char *start = data->start; + + if (flags & MATCH_START_ONLY) + limit = start; + + while (start <= limit) + { + while (start <= limit && *start != '\\') + start++; + + if (start > limit) + return NULL; + + switch (start[1]) + { + case '\\': + case 'a': + case 'b': + case 'e': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + case '\"': + case '\'': + case '?': + result->match_start = start; + result->match_end = start + 2; + result->match_len = 2; + result->match_offset = -1; + return rule; + + case '0': + for (i = 2; CHAR_IS_OCTAL (start[i]); ++i) ; + + result->match_start = start; + result->match_end = start + i; + result->match_len = i; + result->match_offset = -1; + return rule; + + case 'x': + case 'X': + for (i = 2; CHAR_IS_HEX (start[i]); ++i) ; + + result->match_start = start; + result->match_end = start + i; + result->match_len = i; + result->match_offset = -1; + return rule; + } + + start++; + } + + return NULL; +} + + MooRule* moo_rule_escaped_char_new (MooRuleFlags flags, const char *style) { - return moo_rule_regex_new (PATTERN_ESC_CHAR, TRUE, 0, 0, flags, style); + MooRule *rule = rule_new (flags, style, rule_escaped_char_match, NULL); + g_return_val_if_fail (rule != NULL, NULL); + rule->description = g_strdup ("ESCAPED CHAR"); + return rule; } + +static MooRule* +rule_c_char_match (MooRule *rule, + MatchData *data, + MatchResult *result, + MatchFlags flags) +{ + guint i; + char *limit = data->limit; + char *start = data->start; + + if (flags & MATCH_START_ONLY) + limit = start; + + while (start <= limit) + { + while (start <= limit && *start != '\'') + start++; + + if (start > limit) + return NULL; + + if (start[1] != '\\') + { + if (start[2] != '\'') + { + start = start + 2; + continue; + } + + result->match_start = start; + result->match_end = start + 3; + result->match_len = 3; + result->match_offset = -1; + return rule; + } + + switch (start[2]) + { + case '\\': + case 'a': + case 'b': + case 'e': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + case '\"': + case '\'': + case '?': + if (start[3] != '\'') + { + start = start + 3; + continue; + } + + result->match_start = start; + result->match_end = start + 4; + result->match_len = 4; + result->match_offset = -1; + return rule; + + case '0': + for (i = 3; CHAR_IS_OCTAL (start[i]); ++i) ; + + if (start[i] != '\'') + { + start = start + i; + continue; + } + + result->match_start = start; + result->match_end = start + i + 1; + result->match_len = i + 1; + result->match_offset = -1; + return rule; + + case 'x': + case 'X': + for (i = 3; CHAR_IS_HEX (start[i]); ++i) ; + + if (start[i] != '\'') + { + start = start + i; + continue; + } + + result->match_start = start; + result->match_end = start + i + 1; + result->match_len = i + 1; + result->match_offset = -1; + return rule; + } + + start++; + } + + return NULL; +} + + MooRule* moo_rule_c_char_new (MooRuleFlags flags, const char *style) { - return moo_rule_regex_new (PATTERN_C_CHAR, TRUE, 0, 0, flags, style); + MooRule *rule = rule_new (flags, style, rule_c_char_match, NULL); + g_return_val_if_fail (rule != NULL, NULL); + rule->description = g_strdup ("C CHAR"); + return rule; } + +static MooRule* +rule_whitespace_match (MooRule *rule, + MatchData *data, + MatchResult *result, + G_GNUC_UNUSED MatchFlags flags) +{ + guint i; + char *start = data->start; + + if (!CHAR_IS_SPACE (*start)) + return NULL; + + for (i = 1; CHAR_IS_SPACE (start[i]); ++i) ; + + result->match_start = start; + result->match_end = start + i; + result->match_len = i; + result->match_offset = -1; + return rule; +} + + MooRule* moo_rule_whitespace_new (MooRuleFlags flags, const char *style) { - return moo_rule_regex_new (PATTERN_WHITESPACE, TRUE, 0, 0, flags, style); + MooRule *rule = rule_new (flags, style, rule_whitespace_match, NULL); + g_return_val_if_fail (rule != NULL, NULL); + rule->description = g_strdup ("WHITESPACE"); + return rule; } + +static MooRule* +rule_identifier_match (MooRule *rule, + MatchData *data, + MatchResult *result, + MatchFlags flags) +{ + guint i; + char *limit = data->limit; + char *start = data->start; + + if (flags & MATCH_START_ONLY) + limit = start; + + while (start <= limit) + { + while (start <= limit && !CHAR_IS_WORD (*start)) + start++; + + if (start > limit) + return NULL; + + if ((start != data->line_string && CHAR_IS_WORD (start[-1])) || + CHAR_IS_DIGIT (*start)) + { + while (start <= limit && CHAR_IS_WORD (*start)) + start++; + continue; + } + + for (i = 1; CHAR_IS_WORD (start[i]); ++i) ; + + result->match_start = start; + result->match_end = start + i; + result->match_len = i; + result->match_offset = -1; + return rule; + } + + return NULL; +} + + MooRule* moo_rule_identifier_new (MooRuleFlags flags, const char *style) { - return moo_rule_regex_new (PATTERN_IDENTIFIER, TRUE, 0, 0, flags, style); + MooRule *rule = rule_new (flags, style, rule_identifier_match, NULL); + g_return_val_if_fail (rule != NULL, NULL); + rule->description = g_strdup ("WHITESPACE"); + return rule; +} + + +static MooRule* +rule_line_continue_match (MooRule *rule, + MatchData *data, + MatchResult *result, + MatchFlags flags) +{ + char *limit = data->limit; + char *start; + + if (flags & MATCH_START_ONLY) + limit = start; + + g_assert (data->line_string_len >= 0); + + if (data->line_string_len && data->line_string[data->line_string_len - 1] == '\\') + { + start = data->line_string + data->line_string_len - 1; + + if (start > limit) + return NULL; + + result->match_start = start; + result->match_end = start + 1; + result->match_len = 1; + result->match_offset = -1; + return rule; + } + + return NULL; } @@ -963,8 +1457,9 @@ MooRule* moo_rule_line_continue_new (MooRuleFlags flags, const char *style) { - MooRule *rule = moo_rule_regex_new ("\\\\$", TRUE, 0, 0, flags, style); + MooRule *rule = rule_new (flags, style, rule_line_continue_match, NULL); g_return_val_if_fail (rule != NULL, NULL); + rule->description = g_strdup ("LINE_CONTINUE"); rule->include_eol = TRUE; return rule; }