Added back word boundary check to keyword rule; but it's checked without \b in the regular expression

This commit is contained in:
Yevgen Muntyan 2006-04-29 02:49:59 -05:00
parent a699621d65
commit eda83c059a
4 changed files with 133 additions and 110 deletions

View File

@ -2074,6 +2074,7 @@ rule_keywords_xml_create_rule (RuleKeywordsXML *xml,
return _moo_rule_keywords_new (kw_xml->words, return _moo_rule_keywords_new (kw_xml->words,
rule_xml_get_flags (xml), rule_xml_get_flags (xml),
kw_xml->prefix, kw_xml->suffix, kw_xml->prefix, kw_xml->suffix,
kw_xml->word_boundary,
rule_xml_get_style (xml)); rule_xml_get_style (xml));
} }

View File

@ -103,11 +103,11 @@ typedef MooRuleMatchFlags MatchFlags;
#define MooRule2Char MooRuleAscii2Char #define MooRule2Char MooRuleAscii2Char
#define MooRuleAnyChar MooRuleAsciiAnyChar #define MooRuleAnyChar MooRuleAsciiAnyChar
typedef MooRule* (*MatchFunc) (MooRule *self, typedef MooRule* (*MatchFunc) (MooRule *self,
MooRuleMatchData *data, const MooRuleMatchData *data,
MooRuleMatchResult *result, MooRuleMatchResult *result,
MooRuleMatchFlags flags); MooRuleMatchFlags flags);
typedef void (*DestroyFunc) (MooRule *self); typedef void (*DestroyFunc) (MooRule *self);
static MooRule *rule_new (MooRuleFlags flags, static MooRule *rule_new (MooRuleFlags flags,
@ -116,13 +116,13 @@ static MooRule *rule_new (MooRuleFlags flags,
DestroyFunc destroy_func); DestroyFunc destroy_func);
static void child_rules_match (MooRuleArray *array, static void child_rules_match (MooRuleArray *array,
MatchData *data, MatchData *data,
MatchResult *result); MatchResult *result);
static MooRule *rules_match_real (MooRuleArray *array, static MooRule *rules_match_real (MooRuleArray *array,
MatchData *data, MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags); MatchFlags flags);
void void
@ -196,10 +196,10 @@ _moo_match_data_destroy (MatchData *data)
static MooRule* static MooRule*
rules_match_real (MooRuleArray *array, rules_match_real (MooRuleArray *array,
MatchData *data, MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
guint i; guint i;
MooRule *matched = NULL; MooRule *matched = NULL;
@ -335,9 +335,9 @@ rules_match_real (MooRuleArray *array,
static void static void
child_rules_match (MooRuleArray *array, child_rules_match (MooRuleArray *array,
MatchData *data, MatchData *data,
MatchResult *result) MatchResult *result)
{ {
MatchResult tmp; MatchResult tmp;
MooRule *matched; MooRule *matched;
@ -473,10 +473,10 @@ _moo_rule_set_end_switch (MooRule *rule,
*/ */
static MooRule* static MooRule*
rule_string_match (MooRule *rule, rule_string_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
/* TODO: limit */ /* TODO: limit */
@ -564,7 +564,7 @@ _moo_rule_string_new (const char *string,
static MooRule* static MooRule*
rule_regex_match (MooRule *rule, rule_regex_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
@ -572,34 +572,53 @@ rule_regex_match (MooRule *rule,
/* XXX line start and stuff */ /* XXX line start and stuff */
int n_matches, start_pos, end_pos; int n_matches, start_pos, end_pos;
EggRegexMatchFlags regex_flags = 0; EggRegexMatchFlags regex_flags = 0;
char *start = data->start;
egg_regex_clear (rule->regex.regex);
if (flags & MATCH_START_ONLY) if (flags & MATCH_START_ONLY)
regex_flags |= EGG_REGEX_MATCH_ANCHORED; regex_flags |= EGG_REGEX_MATCH_ANCHORED;
n_matches = egg_regex_match_extended (rule->regex.regex, while (start <= data->limit)
data->line_string, {
data->line_string_len, egg_regex_clear (rule->regex.regex);
data->start - data->line_string,
regex_flags);
if (n_matches < 1) n_matches = egg_regex_match_extended (rule->regex.regex,
return NULL; data->line_string,
data->line_string_len,
start - data->line_string,
regex_flags);
egg_regex_fetch_pos (rule->regex.regex, data->line_string, 0, if (n_matches < 1)
&start_pos, &end_pos); return NULL;
if (data->line_string + start_pos > data->limit) egg_regex_fetch_pos (rule->regex.regex, data->line_string, 0,
return NULL; &start_pos, &end_pos);
result->match_start = data->line_string + start_pos; if (data->line_string + start_pos > data->limit)
result->match_end = data->line_string + end_pos; return NULL;
result->match_len = -1; result->match_start = data->line_string + start_pos;
result->match_offset = -1; result->match_end = data->line_string + end_pos;
result->match_len = -1;
result->match_offset = -1;
return rule; if (rule->regex.left_word_bndry && result->match_start > data->line_string &&
CHAR_IS_WORD (result->match_start[0]) && CHAR_IS_WORD (result->match_start[-1]))
{
start = result->match_start + 1;
continue;
}
if (rule->regex.right_word_bndry && result->match_end > data->line_string &&
CHAR_IS_WORD (result->match_end[0]) && CHAR_IS_WORD (result->match_end[-1]))
{
start = result->match_start + 1;
continue;
}
return rule;
}
return NULL;
} }
@ -673,10 +692,10 @@ _moo_rule_regex_new (const char *pattern,
*/ */
static MooRule* static MooRule*
rule_char_match (MooRule *rule, rule_char_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
result->match_start = NULL; result->match_start = NULL;
@ -713,10 +732,10 @@ rule_char_match (MooRule *rule,
static MooRule* static MooRule*
rule_2char_match (MooRule *rule, rule_2char_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
result->match_start = NULL; result->match_start = NULL;
@ -803,10 +822,10 @@ _moo_rule_2char_new (char ch1,
*/ */
static MooRule* static MooRule*
rule_any_char_match (MooRule *rule, rule_any_char_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
guint i; guint i;
@ -904,6 +923,7 @@ _moo_rule_keywords_new (GSList *words,
MooRuleFlags flags, MooRuleFlags flags,
const char *prefix, const char *prefix,
const char *suffix, const char *suffix,
gboolean word_boundary,
const char *style) const char *style)
{ {
GSList *l; GSList *l;
@ -936,6 +956,13 @@ _moo_rule_keywords_new (GSList *words,
suffix ? suffix : ""); suffix ? suffix : "");
rule = _moo_rule_regex_new (pattern->str, TRUE, 0, 0, flags, style); rule = _moo_rule_regex_new (pattern->str, TRUE, 0, 0, flags, style);
g_return_val_if_fail (rule != NULL, NULL);
if (word_boundary)
{
rule->regex.left_word_bndry = TRUE;
rule->regex.right_word_bndry = TRUE;
}
out: out:
g_string_free (pattern, TRUE); g_string_free (pattern, TRUE);
@ -948,12 +975,13 @@ out:
*/ */
static MooRule* static MooRule*
rule_include_match (MooRule *rule, rule_include_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
return rules_match_real (rule->incl.ctx->rules, data, result, flags); return rules_match_real (rule->incl.ctx->rules,
(MatchData*) data, result, flags);
} }
@ -980,10 +1008,10 @@ _moo_rule_include_new (MooContext *ctx)
*/ */
static MooRule* static MooRule*
rule_int_match (MooRule *rule, rule_int_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
guint i; guint i;
char *limit = data->limit; char *limit = data->limit;
@ -1027,10 +1055,10 @@ _moo_rule_int_new (MooRuleFlags flags,
static MooRule* static MooRule*
rule_float_match (MooRule *rule, rule_float_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
guint i; guint i;
char *limit = data->limit; char *limit = data->limit;
@ -1116,10 +1144,10 @@ _moo_rule_float_new (MooRuleFlags flags,
static MooRule* static MooRule*
rule_octal_match (MooRule *rule, rule_octal_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
guint i; guint i;
char *limit = data->limit; char *limit = data->limit;
@ -1169,10 +1197,10 @@ _moo_rule_octal_new (MooRuleFlags flags,
static MooRule* static MooRule*
rule_hex_match (MooRule *rule, rule_hex_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
guint i; guint i;
char *limit = data->limit; char *limit = data->limit;
@ -1222,10 +1250,10 @@ _moo_rule_hex_new (MooRuleFlags flags,
static MooRule* static MooRule*
rule_escaped_char_match (MooRule *rule, rule_escaped_char_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
guint i; guint i;
char *limit = data->limit; char *limit = data->limit;
@ -1301,10 +1329,10 @@ _moo_rule_escaped_char_new (MooRuleFlags flags,
static MooRule* static MooRule*
rule_c_char_match (MooRule *rule, rule_c_char_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
guint i; guint i;
char *limit = data->limit; char *limit = data->limit;
@ -1323,17 +1351,8 @@ rule_c_char_match (MooRule *rule,
if (start[1] != '\\') if (start[1] != '\\')
{ {
if (start[2] != '\'') start++;
{ continue;
start = start + 2;
continue;
}
result->match_start = start;
result->match_end = start + 3;
result->match_len = 3;
result->match_offset = -1;
return rule;
} }
switch (start[2]) switch (start[2])
@ -1413,9 +1432,9 @@ _moo_rule_c_char_new (MooRuleFlags flags,
static MooRule* static MooRule*
rule_whitespace_match (MooRule *rule, rule_whitespace_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
G_GNUC_UNUSED MatchFlags flags) G_GNUC_UNUSED MatchFlags flags)
{ {
guint i; guint i;
@ -1446,10 +1465,10 @@ _moo_rule_whitespace_new (MooRuleFlags flags,
static MooRule* static MooRule*
rule_identifier_match (MooRule *rule, rule_identifier_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
guint i; guint i;
char *limit = data->limit; char *limit = data->limit;
@ -1491,10 +1510,10 @@ _moo_rule_identifier_new (MooRuleFlags flags,
static MooRule* static MooRule*
rule_line_continue_match (MooRule *rule, rule_line_continue_match (MooRule *rule,
MatchData *data, const MatchData *data,
MatchResult *result, MatchResult *result,
MatchFlags flags) MatchFlags flags)
{ {
char *limit = data->limit; char *limit = data->limit;
char *start; char *start;

View File

@ -75,6 +75,7 @@ MooRule *_moo_rule_keywords_new (GSList *words,
MooRuleFlags flags, MooRuleFlags flags,
const char *prefix, const char *prefix,
const char *suffix, const char *suffix,
gboolean word_boundary,
const char *style); const char *style);
MooRule *_moo_rule_zero_new (MooRuleFlags flags); MooRule *_moo_rule_zero_new (MooRuleFlags flags);
MooRule *_moo_rule_include_new (MooContext *context); MooRule *_moo_rule_include_new (MooContext *context);

View File

@ -128,6 +128,8 @@ typedef struct {
typedef struct { typedef struct {
gpointer regex; /* EggRegex* */ gpointer regex; /* EggRegex* */
guint non_empty : 1; guint non_empty : 1;
guint left_word_bndry : 1;
guint right_word_bndry : 1;
} MooRuleRegex; } MooRuleRegex;
typedef struct { typedef struct {
@ -151,11 +153,11 @@ typedef struct {
struct _MooRule struct _MooRule
{ {
MooRule* (*match) (MooRule *self, MooRule* (*match) (MooRule *self,
MooRuleMatchData *data, const MooRuleMatchData *data,
MooRuleMatchResult *result, MooRuleMatchResult *result,
MooRuleMatchFlags flags); MooRuleMatchFlags flags);
void (*destroy) (MooRule *self); void (*destroy) (MooRule *self);
char *description; char *description;
char *style; char *style;