Added back word boundary check to keyword rule; but it's checked without \b in the regular expression
This commit is contained in:
parent
a699621d65
commit
eda83c059a
@ -2074,6 +2074,7 @@ rule_keywords_xml_create_rule (RuleKeywordsXML *xml,
|
|||||||
return _moo_rule_keywords_new (kw_xml->words,
|
return _moo_rule_keywords_new (kw_xml->words,
|
||||||
rule_xml_get_flags (xml),
|
rule_xml_get_flags (xml),
|
||||||
kw_xml->prefix, kw_xml->suffix,
|
kw_xml->prefix, kw_xml->suffix,
|
||||||
|
kw_xml->word_boundary,
|
||||||
rule_xml_get_style (xml));
|
rule_xml_get_style (xml));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,11 +103,11 @@ typedef MooRuleMatchFlags MatchFlags;
|
|||||||
#define MooRule2Char MooRuleAscii2Char
|
#define MooRule2Char MooRuleAscii2Char
|
||||||
#define MooRuleAnyChar MooRuleAsciiAnyChar
|
#define MooRuleAnyChar MooRuleAsciiAnyChar
|
||||||
|
|
||||||
typedef MooRule* (*MatchFunc) (MooRule *self,
|
typedef MooRule* (*MatchFunc) (MooRule *self,
|
||||||
MooRuleMatchData *data,
|
const MooRuleMatchData *data,
|
||||||
MooRuleMatchResult *result,
|
MooRuleMatchResult *result,
|
||||||
MooRuleMatchFlags flags);
|
MooRuleMatchFlags flags);
|
||||||
typedef void (*DestroyFunc) (MooRule *self);
|
typedef void (*DestroyFunc) (MooRule *self);
|
||||||
|
|
||||||
|
|
||||||
static MooRule *rule_new (MooRuleFlags flags,
|
static MooRule *rule_new (MooRuleFlags flags,
|
||||||
@ -116,13 +116,13 @@ static MooRule *rule_new (MooRuleFlags flags,
|
|||||||
DestroyFunc destroy_func);
|
DestroyFunc destroy_func);
|
||||||
|
|
||||||
|
|
||||||
static void child_rules_match (MooRuleArray *array,
|
static void child_rules_match (MooRuleArray *array,
|
||||||
MatchData *data,
|
MatchData *data,
|
||||||
MatchResult *result);
|
MatchResult *result);
|
||||||
static MooRule *rules_match_real (MooRuleArray *array,
|
static MooRule *rules_match_real (MooRuleArray *array,
|
||||||
MatchData *data,
|
MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags);
|
MatchFlags flags);
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -196,10 +196,10 @@ _moo_match_data_destroy (MatchData *data)
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rules_match_real (MooRuleArray *array,
|
rules_match_real (MooRuleArray *array,
|
||||||
MatchData *data,
|
MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
MooRule *matched = NULL;
|
MooRule *matched = NULL;
|
||||||
@ -335,9 +335,9 @@ rules_match_real (MooRuleArray *array,
|
|||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
child_rules_match (MooRuleArray *array,
|
child_rules_match (MooRuleArray *array,
|
||||||
MatchData *data,
|
MatchData *data,
|
||||||
MatchResult *result)
|
MatchResult *result)
|
||||||
{
|
{
|
||||||
MatchResult tmp;
|
MatchResult tmp;
|
||||||
MooRule *matched;
|
MooRule *matched;
|
||||||
@ -473,10 +473,10 @@ _moo_rule_set_end_switch (MooRule *rule,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_string_match (MooRule *rule,
|
rule_string_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
/* TODO: limit */
|
/* TODO: limit */
|
||||||
|
|
||||||
@ -564,7 +564,7 @@ _moo_rule_string_new (const char *string,
|
|||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_regex_match (MooRule *rule,
|
rule_regex_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
@ -572,34 +572,53 @@ rule_regex_match (MooRule *rule,
|
|||||||
/* XXX line start and stuff */
|
/* XXX line start and stuff */
|
||||||
int n_matches, start_pos, end_pos;
|
int n_matches, start_pos, end_pos;
|
||||||
EggRegexMatchFlags regex_flags = 0;
|
EggRegexMatchFlags regex_flags = 0;
|
||||||
|
char *start = data->start;
|
||||||
egg_regex_clear (rule->regex.regex);
|
|
||||||
|
|
||||||
if (flags & MATCH_START_ONLY)
|
if (flags & MATCH_START_ONLY)
|
||||||
regex_flags |= EGG_REGEX_MATCH_ANCHORED;
|
regex_flags |= EGG_REGEX_MATCH_ANCHORED;
|
||||||
|
|
||||||
n_matches = egg_regex_match_extended (rule->regex.regex,
|
while (start <= data->limit)
|
||||||
data->line_string,
|
{
|
||||||
data->line_string_len,
|
egg_regex_clear (rule->regex.regex);
|
||||||
data->start - data->line_string,
|
|
||||||
regex_flags);
|
|
||||||
|
|
||||||
if (n_matches < 1)
|
n_matches = egg_regex_match_extended (rule->regex.regex,
|
||||||
return NULL;
|
data->line_string,
|
||||||
|
data->line_string_len,
|
||||||
|
start - data->line_string,
|
||||||
|
regex_flags);
|
||||||
|
|
||||||
egg_regex_fetch_pos (rule->regex.regex, data->line_string, 0,
|
if (n_matches < 1)
|
||||||
&start_pos, &end_pos);
|
return NULL;
|
||||||
|
|
||||||
if (data->line_string + start_pos > data->limit)
|
egg_regex_fetch_pos (rule->regex.regex, data->line_string, 0,
|
||||||
return NULL;
|
&start_pos, &end_pos);
|
||||||
|
|
||||||
result->match_start = data->line_string + start_pos;
|
if (data->line_string + start_pos > data->limit)
|
||||||
result->match_end = data->line_string + end_pos;
|
return NULL;
|
||||||
|
|
||||||
result->match_len = -1;
|
result->match_start = data->line_string + start_pos;
|
||||||
result->match_offset = -1;
|
result->match_end = data->line_string + end_pos;
|
||||||
|
result->match_len = -1;
|
||||||
|
result->match_offset = -1;
|
||||||
|
|
||||||
return rule;
|
if (rule->regex.left_word_bndry && result->match_start > data->line_string &&
|
||||||
|
CHAR_IS_WORD (result->match_start[0]) && CHAR_IS_WORD (result->match_start[-1]))
|
||||||
|
{
|
||||||
|
start = result->match_start + 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rule->regex.right_word_bndry && result->match_end > data->line_string &&
|
||||||
|
CHAR_IS_WORD (result->match_end[0]) && CHAR_IS_WORD (result->match_end[-1]))
|
||||||
|
{
|
||||||
|
start = result->match_start + 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return rule;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -673,10 +692,10 @@ _moo_rule_regex_new (const char *pattern,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_char_match (MooRule *rule,
|
rule_char_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
result->match_start = NULL;
|
result->match_start = NULL;
|
||||||
|
|
||||||
@ -713,10 +732,10 @@ rule_char_match (MooRule *rule,
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_2char_match (MooRule *rule,
|
rule_2char_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
result->match_start = NULL;
|
result->match_start = NULL;
|
||||||
|
|
||||||
@ -803,10 +822,10 @@ _moo_rule_2char_new (char ch1,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_any_char_match (MooRule *rule,
|
rule_any_char_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
|
|
||||||
@ -904,6 +923,7 @@ _moo_rule_keywords_new (GSList *words,
|
|||||||
MooRuleFlags flags,
|
MooRuleFlags flags,
|
||||||
const char *prefix,
|
const char *prefix,
|
||||||
const char *suffix,
|
const char *suffix,
|
||||||
|
gboolean word_boundary,
|
||||||
const char *style)
|
const char *style)
|
||||||
{
|
{
|
||||||
GSList *l;
|
GSList *l;
|
||||||
@ -936,6 +956,13 @@ _moo_rule_keywords_new (GSList *words,
|
|||||||
suffix ? suffix : "");
|
suffix ? suffix : "");
|
||||||
|
|
||||||
rule = _moo_rule_regex_new (pattern->str, TRUE, 0, 0, flags, style);
|
rule = _moo_rule_regex_new (pattern->str, TRUE, 0, 0, flags, style);
|
||||||
|
g_return_val_if_fail (rule != NULL, NULL);
|
||||||
|
|
||||||
|
if (word_boundary)
|
||||||
|
{
|
||||||
|
rule->regex.left_word_bndry = TRUE;
|
||||||
|
rule->regex.right_word_bndry = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
g_string_free (pattern, TRUE);
|
g_string_free (pattern, TRUE);
|
||||||
@ -948,12 +975,13 @@ out:
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_include_match (MooRule *rule,
|
rule_include_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
return rules_match_real (rule->incl.ctx->rules, data, result, flags);
|
return rules_match_real (rule->incl.ctx->rules,
|
||||||
|
(MatchData*) data, result, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -980,10 +1008,10 @@ _moo_rule_include_new (MooContext *ctx)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_int_match (MooRule *rule,
|
rule_int_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
char *limit = data->limit;
|
char *limit = data->limit;
|
||||||
@ -1027,10 +1055,10 @@ _moo_rule_int_new (MooRuleFlags flags,
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_float_match (MooRule *rule,
|
rule_float_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
char *limit = data->limit;
|
char *limit = data->limit;
|
||||||
@ -1116,10 +1144,10 @@ _moo_rule_float_new (MooRuleFlags flags,
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_octal_match (MooRule *rule,
|
rule_octal_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
char *limit = data->limit;
|
char *limit = data->limit;
|
||||||
@ -1169,10 +1197,10 @@ _moo_rule_octal_new (MooRuleFlags flags,
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_hex_match (MooRule *rule,
|
rule_hex_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
char *limit = data->limit;
|
char *limit = data->limit;
|
||||||
@ -1222,10 +1250,10 @@ _moo_rule_hex_new (MooRuleFlags flags,
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_escaped_char_match (MooRule *rule,
|
rule_escaped_char_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
char *limit = data->limit;
|
char *limit = data->limit;
|
||||||
@ -1301,10 +1329,10 @@ _moo_rule_escaped_char_new (MooRuleFlags flags,
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_c_char_match (MooRule *rule,
|
rule_c_char_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
char *limit = data->limit;
|
char *limit = data->limit;
|
||||||
@ -1323,17 +1351,8 @@ rule_c_char_match (MooRule *rule,
|
|||||||
|
|
||||||
if (start[1] != '\\')
|
if (start[1] != '\\')
|
||||||
{
|
{
|
||||||
if (start[2] != '\'')
|
start++;
|
||||||
{
|
continue;
|
||||||
start = start + 2;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
result->match_start = start;
|
|
||||||
result->match_end = start + 3;
|
|
||||||
result->match_len = 3;
|
|
||||||
result->match_offset = -1;
|
|
||||||
return rule;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (start[2])
|
switch (start[2])
|
||||||
@ -1413,9 +1432,9 @@ _moo_rule_c_char_new (MooRuleFlags flags,
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_whitespace_match (MooRule *rule,
|
rule_whitespace_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
G_GNUC_UNUSED MatchFlags flags)
|
G_GNUC_UNUSED MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
@ -1446,10 +1465,10 @@ _moo_rule_whitespace_new (MooRuleFlags flags,
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_identifier_match (MooRule *rule,
|
rule_identifier_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
guint i;
|
guint i;
|
||||||
char *limit = data->limit;
|
char *limit = data->limit;
|
||||||
@ -1491,10 +1510,10 @@ _moo_rule_identifier_new (MooRuleFlags flags,
|
|||||||
|
|
||||||
|
|
||||||
static MooRule*
|
static MooRule*
|
||||||
rule_line_continue_match (MooRule *rule,
|
rule_line_continue_match (MooRule *rule,
|
||||||
MatchData *data,
|
const MatchData *data,
|
||||||
MatchResult *result,
|
MatchResult *result,
|
||||||
MatchFlags flags)
|
MatchFlags flags)
|
||||||
{
|
{
|
||||||
char *limit = data->limit;
|
char *limit = data->limit;
|
||||||
char *start;
|
char *start;
|
||||||
|
@ -75,6 +75,7 @@ MooRule *_moo_rule_keywords_new (GSList *words,
|
|||||||
MooRuleFlags flags,
|
MooRuleFlags flags,
|
||||||
const char *prefix,
|
const char *prefix,
|
||||||
const char *suffix,
|
const char *suffix,
|
||||||
|
gboolean word_boundary,
|
||||||
const char *style);
|
const char *style);
|
||||||
MooRule *_moo_rule_zero_new (MooRuleFlags flags);
|
MooRule *_moo_rule_zero_new (MooRuleFlags flags);
|
||||||
MooRule *_moo_rule_include_new (MooContext *context);
|
MooRule *_moo_rule_include_new (MooContext *context);
|
||||||
|
@ -128,6 +128,8 @@ typedef struct {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
gpointer regex; /* EggRegex* */
|
gpointer regex; /* EggRegex* */
|
||||||
guint non_empty : 1;
|
guint non_empty : 1;
|
||||||
|
guint left_word_bndry : 1;
|
||||||
|
guint right_word_bndry : 1;
|
||||||
} MooRuleRegex;
|
} MooRuleRegex;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -151,11 +153,11 @@ typedef struct {
|
|||||||
|
|
||||||
struct _MooRule
|
struct _MooRule
|
||||||
{
|
{
|
||||||
MooRule* (*match) (MooRule *self,
|
MooRule* (*match) (MooRule *self,
|
||||||
MooRuleMatchData *data,
|
const MooRuleMatchData *data,
|
||||||
MooRuleMatchResult *result,
|
MooRuleMatchResult *result,
|
||||||
MooRuleMatchFlags flags);
|
MooRuleMatchFlags flags);
|
||||||
void (*destroy) (MooRule *self);
|
void (*destroy) (MooRule *self);
|
||||||
|
|
||||||
char *description;
|
char *description;
|
||||||
char *style;
|
char *style;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user