2005-06-22 11:20:32 -07:00
|
|
|
/* EggRegex -- regular expression API wrapper around PCRE.
|
|
|
|
* Copyright (C) 1999, 2000 Scott Wimer
|
|
|
|
* Copyright (C) 2004 Matthias Clasen <mclasen@redhat.com>
|
|
|
|
*
|
|
|
|
* This is basically an ease of user wrapper around the functionality of
|
|
|
|
* PCRE.
|
|
|
|
*
|
|
|
|
* With this library, we are, hopefully, drastically reducing the code
|
|
|
|
* complexity necessary by making use of a more complex and detailed
|
|
|
|
* data structure to store the regex info. I am hoping to have a regex
|
|
|
|
* interface that is almost as easy to use as Perl's. <fingers crossed>
|
|
|
|
*
|
|
|
|
* Author: Scott Wimer <scottw@cylant.com>
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
2005-07-25 05:25:35 -07:00
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
2005-06-22 11:20:32 -07:00
|
|
|
*
|
|
|
|
* This library is free software, you can distribute it or modify it
|
|
|
|
* under any of the following terms:
|
|
|
|
* 1) The GNU General Public License (GPL)
|
|
|
|
* 2) The GNU Library General Public License (LGPL)
|
|
|
|
* 3) The Perl Artistic license (Artistic)
|
|
|
|
* 4) The BSD license (BSD)
|
|
|
|
*
|
|
|
|
* In short, you can use this library in any code you desire, so long as
|
|
|
|
* the Copyright notice above remains intact. If you do make changes to
|
2005-07-25 05:25:35 -07:00
|
|
|
* it, I would appreciate that you let me know so I can improve this
|
2005-06-22 11:20:32 -07:00
|
|
|
* library for everybody, but I'm not gonna force you to.
|
2005-07-25 05:25:35 -07:00
|
|
|
*
|
2005-06-22 11:20:32 -07:00
|
|
|
* Please note that this library is just a wrapper around Philip Hazel's
|
|
|
|
* PCRE library. Please see the file 'LICENSE' in your PCRE distribution.
|
|
|
|
* And, if you live in England, please send him a pint of good beer, his
|
|
|
|
* library is great.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
|
|
* Changed by Muntyan
|
|
|
|
*
|
|
|
|
* 04/24/2005: added refcounting
|
|
|
|
* 04/30/2005: added egg_regex_eval_replacement and egg_regex_check_replacement
|
|
|
|
* 05/31/2005: changed expand_escape: \0 means whole match
|
2005-07-25 05:25:35 -07:00
|
|
|
* 07/25/2005: silent gcc
|
2005-10-13 07:08:18 -07:00
|
|
|
* 10/03/2005: removed #include "config.h", removed odd 'break' after 'goto' to
|
|
|
|
* avoid warning
|
2005-06-22 11:20:32 -07:00
|
|
|
*
|
|
|
|
* mooutils/eggregex.c
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "eggregex.h"
|
|
|
|
#include <glib.h>
|
|
|
|
#include "pcre/pcre.h"
|
|
|
|
|
|
|
|
/* FIXME when this is in glib */
|
|
|
|
#define _(s) s
|
|
|
|
|
|
|
|
struct _EggRegex
|
|
|
|
{
|
|
|
|
guint ref_count;
|
|
|
|
gchar *pattern; /* the pattern */
|
|
|
|
pcre *regex; /* compiled form of the pattern */
|
|
|
|
pcre_extra *extra; /* data stored when egg_regex_optimize() is used */
|
|
|
|
gint matches; /* number of matching sub patterns */
|
|
|
|
gint pos; /* position in the string where last match left off */
|
|
|
|
gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */
|
|
|
|
gint n_offsets; /* number of offsets */
|
|
|
|
EggRegexCompileFlags compile_opts; /* options used at compile time on the pattern */
|
|
|
|
EggRegexMatchFlags match_opts; /* options used at match time on the regex */
|
|
|
|
gint string_len; /* length of the string last used against */
|
|
|
|
GSList *delims; /* delimiter sub strings from split next */
|
|
|
|
};
|
|
|
|
|
|
|
|
GQuark
|
|
|
|
egg_regex_error_quark (void)
|
|
|
|
{
|
|
|
|
static GQuark error_quark = 0;
|
|
|
|
|
|
|
|
if (error_quark == 0)
|
|
|
|
error_quark = g_quark_from_static_string ("g-regex-error-quark");
|
|
|
|
|
|
|
|
return error_quark;
|
|
|
|
}
|
|
|
|
|
2005-07-25 05:25:35 -07:00
|
|
|
/**
|
2005-06-22 11:20:32 -07:00
|
|
|
* egg_regex_new:
|
|
|
|
* @pattern: the regular expression
|
|
|
|
* @compile_options: compile options for the regular expression
|
|
|
|
* @match_options: match options for the regular expression
|
|
|
|
* @error: return location for a #GError
|
2005-07-25 05:25:35 -07:00
|
|
|
*
|
2005-06-22 11:20:32 -07:00
|
|
|
* Compiles the regular expression to an internal form, and does the initial
|
2005-07-25 05:25:35 -07:00
|
|
|
* setup of the #EggRegex structure.
|
|
|
|
*
|
2005-06-22 11:20:32 -07:00
|
|
|
* Returns: a #EggRegex structure
|
|
|
|
*/
|
|
|
|
EggRegex *
|
2005-07-25 05:25:35 -07:00
|
|
|
egg_regex_new (const gchar *pattern,
|
2005-06-22 11:20:32 -07:00
|
|
|
EggRegexCompileFlags compile_options,
|
|
|
|
EggRegexMatchFlags match_options,
|
|
|
|
GError **error)
|
|
|
|
{
|
|
|
|
EggRegex *regex = g_new0 (EggRegex, 1);
|
|
|
|
const gchar *errmsg;
|
|
|
|
gint erroffset;
|
|
|
|
gint capture_count;
|
|
|
|
|
|
|
|
regex->ref_count = 1;
|
2005-07-25 05:25:35 -07:00
|
|
|
|
2005-06-22 11:20:32 -07:00
|
|
|
/* preset the parts of gregex that need to be set, regardless of the
|
|
|
|
* type of match that will be checked */
|
|
|
|
regex->pattern = g_strdup (pattern);
|
|
|
|
regex->extra = NULL;
|
|
|
|
regex->pos = 0;
|
|
|
|
regex->string_len = -1; /* not set yet */
|
|
|
|
|
|
|
|
/* set the options */
|
|
|
|
regex->compile_opts = compile_options | PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
|
|
|
|
regex->match_opts = match_options | PCRE_NO_UTF8_CHECK;
|
|
|
|
|
|
|
|
/* compile the pattern */
|
|
|
|
regex->regex = _pcre_compile (pattern, regex->compile_opts,
|
|
|
|
&errmsg, &erroffset, NULL);
|
|
|
|
|
2005-07-25 05:25:35 -07:00
|
|
|
/* if the compilation failed, set the error member and return
|
2005-06-22 11:20:32 -07:00
|
|
|
* immediately */
|
|
|
|
if (regex->regex == NULL)
|
|
|
|
{
|
2005-07-25 05:25:35 -07:00
|
|
|
GError *tmp_error = g_error_new (EGG_REGEX_ERROR,
|
2005-06-22 11:20:32 -07:00
|
|
|
EGG_REGEX_ERROR_COMPILE,
|
|
|
|
_("Error while compiling regular "
|
|
|
|
"expression %s at char %d: %s"),
|
|
|
|
pattern, erroffset, errmsg);
|
|
|
|
g_propagate_error (error, tmp_error);
|
|
|
|
|
|
|
|
return regex;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* otherwise, find out how many sub patterns exist in this pattern,
|
|
|
|
* and setup the offsets array and n_offsets accordingly */
|
2005-07-25 05:25:35 -07:00
|
|
|
_pcre_fullinfo (regex->regex, regex->extra,
|
2005-06-22 11:20:32 -07:00
|
|
|
PCRE_INFO_CAPTURECOUNT, &capture_count);
|
|
|
|
regex->n_offsets = (capture_count + 1) * 3;
|
|
|
|
regex->offsets = g_new0 (gint, regex->n_offsets);
|
|
|
|
|
|
|
|
return regex;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_free:
|
|
|
|
* @regex: a #EggRegex structure from egg_regex_new()
|
|
|
|
*
|
|
|
|
* Frees all the memory associated with the regex structure.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
egg_regex_unref (EggRegex *regex)
|
|
|
|
{
|
|
|
|
if (--regex->ref_count)
|
|
|
|
return;
|
|
|
|
|
|
|
|
g_free (regex->pattern);
|
|
|
|
g_slist_free (regex->delims);
|
|
|
|
g_free (regex->offsets);
|
|
|
|
if (regex->regex != NULL)
|
|
|
|
g_free (regex->regex);
|
|
|
|
if (regex->extra != NULL)
|
|
|
|
g_free (regex->extra);
|
|
|
|
g_free (regex);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
egg_regex_ref (EggRegex *regex)
|
|
|
|
{
|
|
|
|
++regex->ref_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
egg_regex_free (EggRegex *regex)
|
|
|
|
{
|
|
|
|
egg_regex_unref (regex);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* FIXME */
|
|
|
|
const gchar *
|
|
|
|
egg_regex_get_pattern (EggRegex *regex)
|
|
|
|
{
|
|
|
|
return regex == NULL ? NULL : regex->pattern;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_clear:
|
|
|
|
* @regex: a #EggRegex structure
|
|
|
|
*
|
|
|
|
* Clears out the members of @regex that are holding information about the
|
|
|
|
* last set of matches for this pattern. egg_regex_clear() needs to be
|
|
|
|
* called between uses of egg_regex_match() or egg_regex_match_next() against
|
2005-07-25 05:25:35 -07:00
|
|
|
* new target strings.
|
2005-06-22 11:20:32 -07:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
egg_regex_clear (EggRegex *regex)
|
|
|
|
{
|
|
|
|
regex->matches = -1;
|
|
|
|
regex->string_len = -1;
|
|
|
|
regex->pos = 0;
|
|
|
|
|
|
|
|
/* if the pattern was used with egg_regex_split_next(), it may have
|
|
|
|
* delimiter offsets stored. Free up those guys as well. */
|
|
|
|
if (regex->delims != NULL)
|
|
|
|
g_slist_free (regex->delims);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_optimize:
|
|
|
|
* @regex: a #EggRegex structure
|
|
|
|
* @error: return location for a #GError
|
|
|
|
*
|
|
|
|
* If the pattern will be used many times, then it may be worth the
|
|
|
|
* effort to optimize it to improve the speed of matches.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
egg_regex_optimize (EggRegex *regex,
|
|
|
|
GError **error)
|
|
|
|
{
|
|
|
|
const gchar *errmsg;
|
|
|
|
|
|
|
|
regex->extra = _pcre_study (regex->regex, 0, &errmsg);
|
|
|
|
|
|
|
|
if (errmsg)
|
|
|
|
{
|
|
|
|
GError *tmp_error = g_error_new (EGG_REGEX_ERROR,
|
2005-07-25 05:25:35 -07:00
|
|
|
EGG_REGEX_ERROR_OPTIMIZE,
|
2005-06-22 11:20:32 -07:00
|
|
|
_("Error while optimizing "
|
|
|
|
"regular expression %s: %s"),
|
|
|
|
regex->pattern,
|
|
|
|
errmsg);
|
|
|
|
g_propagate_error (error, tmp_error);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_match:
|
|
|
|
* @regex: a #EggRegex structure from egg_regex_new()
|
|
|
|
* @string: the string to scan for matches
|
|
|
|
* @string_len: the length of @string, or -1 to use strlen()
|
|
|
|
* @match_options: match options
|
|
|
|
*
|
|
|
|
* Scans for a match in string for the pattern in @regex. The starting index
|
|
|
|
* of the match goes into the pos member of the @regex struct. The indexes
|
|
|
|
* of the full match, and all matches get stored off in the offsets array.
|
|
|
|
*
|
2005-07-25 05:25:35 -07:00
|
|
|
* The @match_options are combined with the match options specified when the
|
2005-06-22 11:20:32 -07:00
|
|
|
* @regex structure was created, letting you have more flexibility in reusing
|
|
|
|
* #EggRegex structures.
|
|
|
|
*
|
|
|
|
* Returns: Number of matched substrings + 1, or 1 if the pattern has no
|
|
|
|
* substrings in it. Returns #GREGEX_NOMATCH if the pattern
|
|
|
|
* did not match.
|
|
|
|
*/
|
2005-07-25 05:25:35 -07:00
|
|
|
gint
|
|
|
|
egg_regex_match (EggRegex *regex,
|
|
|
|
const gchar *string,
|
2005-06-22 11:20:32 -07:00
|
|
|
gssize string_len,
|
|
|
|
EggRegexMatchFlags match_options)
|
|
|
|
{
|
|
|
|
if (string_len < 0)
|
|
|
|
string_len = strlen (string);
|
|
|
|
|
|
|
|
regex->string_len = string_len;
|
|
|
|
|
|
|
|
/* perform the match */
|
2005-07-25 05:25:35 -07:00
|
|
|
regex->matches = _pcre_exec (regex->regex, regex->extra,
|
2005-06-22 11:20:32 -07:00
|
|
|
string, regex->string_len, 0,
|
|
|
|
regex->match_opts | match_options,
|
|
|
|
regex->offsets, regex->n_offsets);
|
|
|
|
|
2005-07-25 05:25:35 -07:00
|
|
|
/* if the regex matched, set regex->pos to the character past the
|
2005-06-22 11:20:32 -07:00
|
|
|
* end of the match.
|
|
|
|
*/
|
|
|
|
if (regex->matches > 0)
|
|
|
|
regex->pos = regex->offsets[1];
|
|
|
|
|
|
|
|
return regex->matches; /* return what pcre_exec() returned */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* FIXME:
|
|
|
|
* - egg_regex_match should call this.
|
|
|
|
* - egg_regex_match_next cannot be used after this.
|
|
|
|
* - document this function.
|
|
|
|
*/
|
2005-07-25 05:25:35 -07:00
|
|
|
gint
|
|
|
|
egg_regex_match_extended (EggRegex *regex,
|
|
|
|
const gchar *string,
|
2005-06-22 11:20:32 -07:00
|
|
|
gssize string_len,
|
|
|
|
gint string_index,
|
|
|
|
EggRegexMatchFlags match_options)
|
|
|
|
{
|
|
|
|
if (string_len < 0)
|
|
|
|
string_len = strlen (string);
|
|
|
|
|
|
|
|
regex->string_len = string_len;
|
|
|
|
|
|
|
|
/* perform the match */
|
2005-07-25 05:25:35 -07:00
|
|
|
regex->matches = _pcre_exec (regex->regex, regex->extra,
|
2005-06-22 11:20:32 -07:00
|
|
|
string, regex->string_len, string_index,
|
|
|
|
regex->match_opts | match_options,
|
|
|
|
regex->offsets, regex->n_offsets);
|
|
|
|
|
2005-07-25 05:25:35 -07:00
|
|
|
/* if the regex matched, set regex->pos to the character past the
|
2005-06-22 11:20:32 -07:00
|
|
|
* end of the match.
|
|
|
|
*/
|
|
|
|
if (regex->matches > 0)
|
|
|
|
regex->pos = regex->offsets[1];
|
|
|
|
|
|
|
|
return regex->matches; /* return what pcre_exec() returned */
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_match_next:
|
2005-07-25 05:25:35 -07:00
|
|
|
* @regex: a #EggRegex structure
|
2005-06-22 11:20:32 -07:00
|
|
|
* @string: the string to scan for matches
|
|
|
|
* @string_len: the length of @string, or -1 to use strlen()
|
|
|
|
* @match_options: the match options
|
|
|
|
*
|
2005-07-25 05:25:35 -07:00
|
|
|
* Scans for the next match in @string of the pattern in @regex. The starting
|
|
|
|
* index of the match goes into the pos member of the @regex struct. The
|
|
|
|
* indexes of the full match, and all matches get stored off in the offsets
|
2005-06-22 11:20:32 -07:00
|
|
|
* array. The match options are ored with the match options set when
|
|
|
|
* the @regex was created.
|
|
|
|
*
|
|
|
|
* You have to call egg_regex_clear() to reuse the same pattern on a new string.
|
|
|
|
* This is especially true for use with egg_regex_match_next().
|
|
|
|
*
|
|
|
|
* Returns: Number of matched substrings + 1, or 1 if the pattern has no
|
|
|
|
* substrings in it. Returns #GREGEX_NOMATCH if the pattern
|
|
|
|
* did not match.
|
|
|
|
*/
|
2005-07-25 05:25:35 -07:00
|
|
|
gint
|
|
|
|
egg_regex_match_next (EggRegex *regex,
|
|
|
|
const gchar *string,
|
2005-06-22 11:20:32 -07:00
|
|
|
gssize string_len,
|
|
|
|
EggRegexMatchFlags match_options)
|
|
|
|
{
|
|
|
|
/* if this regex hasn't been used on this string before, then we
|
|
|
|
* need to calculate the length of the string, and set pos to the
|
2005-07-25 05:25:35 -07:00
|
|
|
* start of it.
|
|
|
|
* Knowing if this regex has been used on this string is a bit of
|
2005-06-22 11:20:32 -07:00
|
|
|
* a challenge. For now, we require the user to call egg_regex_clear()
|
|
|
|
* in between usages on a new string. Not perfect, but not such a
|
|
|
|
* bad solution either.
|
|
|
|
*/
|
|
|
|
if (regex->string_len == -1)
|
|
|
|
{
|
|
|
|
if (string_len < 0)
|
|
|
|
string_len = strlen (string);
|
2005-07-25 05:25:35 -07:00
|
|
|
|
2005-06-22 11:20:32 -07:00
|
|
|
regex->string_len = string_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* perform the match */
|
|
|
|
regex->matches = _pcre_exec (regex->regex, regex->extra,
|
2005-07-25 05:25:35 -07:00
|
|
|
string + regex->pos,
|
2005-06-22 11:20:32 -07:00
|
|
|
regex->string_len - regex->pos,
|
|
|
|
0, regex->match_opts | match_options,
|
|
|
|
regex->offsets, regex->n_offsets);
|
|
|
|
|
|
|
|
/* if the regex matched, adjust the offsets array to take into account
|
|
|
|
* the fact that the string they're out of is shorter than the string
|
|
|
|
* that the caller passed us, by regex->pos to be exact.
|
|
|
|
* Then, update regex->pos to take into account the new starting point.
|
|
|
|
*/
|
|
|
|
if (regex->matches > 0)
|
|
|
|
{
|
|
|
|
gint i, pieces;
|
|
|
|
pieces = (regex->matches * 2) - 1;
|
|
|
|
|
|
|
|
for (i = 0; i <= pieces; i++)
|
|
|
|
regex->offsets[i] += regex->pos;
|
|
|
|
|
|
|
|
regex->pos = regex->offsets[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
return regex->matches;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_fetch:
|
|
|
|
* @regex: #EggRegex structure used in last match
|
|
|
|
* @string: the string on which the last match was made
|
|
|
|
* @match_num: number of the sub expression
|
|
|
|
*
|
|
|
|
* Retrieves the text matching the @match_num<!-- -->'th capturing parentheses.
|
|
|
|
* 0 is the full text of the match, 1 is the first paren set, 2 the second,
|
|
|
|
* and so on.
|
|
|
|
*
|
|
|
|
* Returns: The matched substring. You have to free it yourself.
|
|
|
|
*/
|
|
|
|
gchar *
|
2005-07-25 05:25:35 -07:00
|
|
|
egg_regex_fetch (EggRegex *regex,
|
2005-06-22 11:20:32 -07:00
|
|
|
const gchar *string,
|
|
|
|
gint match_num)
|
|
|
|
{
|
|
|
|
gchar *match;
|
|
|
|
|
|
|
|
/* make sure the sub expression number they're requesting is less than
|
|
|
|
* the total number of sub expressions that were matched. */
|
|
|
|
if (match_num >= regex->matches)
|
|
|
|
return NULL;
|
|
|
|
|
2005-07-25 05:25:35 -07:00
|
|
|
_pcre_get_substring (string, regex->offsets, regex->matches,
|
2005-06-22 11:20:32 -07:00
|
|
|
match_num, (const char **)&match);
|
|
|
|
|
|
|
|
return match;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_fetch_pos:
|
|
|
|
* @regex: #EggRegex structure used in last match
|
|
|
|
* @string: the string on which the last match was made
|
|
|
|
* @match_num: number of the sub expression
|
|
|
|
* @start_pos: pointer to location where to store the start position
|
|
|
|
* @end_pos: pointer to location where to store the end position
|
|
|
|
*
|
|
|
|
* Retrieves the position of the @match_num<!-- -->'th capturing parentheses.
|
|
|
|
* 0 is the full text of the match, 1 is the first paren set, 2 the second,
|
|
|
|
* and so on.
|
|
|
|
*/
|
|
|
|
void
|
2005-07-25 05:25:35 -07:00
|
|
|
egg_regex_fetch_pos (EggRegex *regex,
|
|
|
|
G_GNUC_UNUSED const gchar *string,
|
2005-06-22 11:20:32 -07:00
|
|
|
gint match_num,
|
|
|
|
gint *start_pos,
|
|
|
|
gint *end_pos)
|
|
|
|
{
|
|
|
|
/* make sure the sub expression number they're requesting is less than
|
|
|
|
* the total number of sub expressions that were matched. */
|
|
|
|
if (match_num >= regex->matches)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (start_pos)
|
|
|
|
*start_pos = regex->offsets[2 * match_num];
|
|
|
|
|
|
|
|
if (end_pos)
|
|
|
|
*end_pos = regex->offsets[2 * match_num + 1];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_fetch_named:
|
|
|
|
* @regex: #EggRegex structure used in last match
|
|
|
|
* @string: the string on which the last match was made
|
|
|
|
* @name: name of the subexpression
|
|
|
|
*
|
|
|
|
* Retrieves the text matching the capturing parentheses named @name.
|
|
|
|
*
|
|
|
|
* Returns: The matched substring. You have to free it yourself.
|
|
|
|
*/
|
|
|
|
gchar *
|
2005-07-25 05:25:35 -07:00
|
|
|
egg_regex_fetch_named (EggRegex *regex,
|
2005-06-22 11:20:32 -07:00
|
|
|
const gchar *string,
|
|
|
|
const gchar *name)
|
|
|
|
{
|
|
|
|
gchar *match;
|
|
|
|
|
2005-07-25 05:25:35 -07:00
|
|
|
_pcre_get_named_substring (regex->regex,
|
|
|
|
string, regex->offsets, regex->matches,
|
2005-06-22 11:20:32 -07:00
|
|
|
name, (const char **)&match);
|
|
|
|
|
|
|
|
return match;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_fetch_all:
|
|
|
|
* @regex: a #EggRegex structure
|
|
|
|
* @string: the string on which the last match was made
|
|
|
|
*
|
2005-07-25 05:25:35 -07:00
|
|
|
* Bundles up pointers to each of the matching substrings from a match
|
2005-06-22 11:20:32 -07:00
|
|
|
* and stores then in an array of gchar pointers.
|
|
|
|
*
|
|
|
|
* Returns: a %NULL-terminated array of gchar * pointers. It must be freed using
|
|
|
|
* g_strfreev(). If the memory can't be allocated, returns %NULL.
|
|
|
|
*/
|
|
|
|
gchar **
|
|
|
|
egg_regex_fetch_all (EggRegex *regex,
|
|
|
|
const gchar *string)
|
|
|
|
{
|
|
|
|
gchar **listptr = NULL; /* the list pcre_get_substring_list() will fill */
|
|
|
|
gchar **result;
|
|
|
|
|
|
|
|
if (regex->matches < 0)
|
|
|
|
return NULL;
|
2005-07-25 05:25:35 -07:00
|
|
|
|
|
|
|
_pcre_get_substring_list (string, regex->offsets,
|
2005-06-22 11:20:32 -07:00
|
|
|
regex->matches, (const char ***)&listptr);
|
|
|
|
|
|
|
|
if (listptr)
|
|
|
|
{
|
|
|
|
/* PCRE returns a single block of memory which
|
|
|
|
* isn't suitable for g_strfreev().
|
|
|
|
*/
|
|
|
|
result = g_strdupv (listptr);
|
|
|
|
g_free (listptr);
|
|
|
|
}
|
2005-07-25 05:25:35 -07:00
|
|
|
else
|
2005-06-22 11:20:32 -07:00
|
|
|
result = NULL;
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_split:
|
|
|
|
* @regex: a #EggRegex structure
|
|
|
|
* @string: the string to split with the pattern
|
|
|
|
* @string_len: the length of @string, or -1 to use strlen()
|
|
|
|
* @match_options: match time option flags
|
2005-07-25 05:25:35 -07:00
|
|
|
* @max_pieces: maximum number of pieces to split the string into,
|
2005-06-22 11:20:32 -07:00
|
|
|
* or 0 for no limit
|
|
|
|
*
|
2005-07-25 05:25:35 -07:00
|
|
|
* Breaks the string on the pattern, and returns an array of the pieces.
|
2005-06-22 11:20:32 -07:00
|
|
|
*
|
|
|
|
* Returns: a %NULL-terminated gchar ** array. Free it using g_strfreev().
|
|
|
|
**/
|
|
|
|
gchar **
|
2005-07-25 05:25:35 -07:00
|
|
|
egg_regex_split (EggRegex *regex,
|
|
|
|
const gchar *string,
|
2005-06-22 11:20:32 -07:00
|
|
|
gssize string_len,
|
|
|
|
EggRegexMatchFlags match_options,
|
|
|
|
gint max_pieces)
|
|
|
|
{
|
|
|
|
gchar **string_list; /* The array of char **s worked on */
|
|
|
|
gint pos;
|
|
|
|
gint match_ret;
|
|
|
|
gint pieces;
|
|
|
|
gint start_pos;
|
|
|
|
gchar *piece;
|
|
|
|
GList *list, *last;
|
|
|
|
|
|
|
|
start_pos = 0;
|
|
|
|
pieces = 0;
|
|
|
|
list = NULL;
|
|
|
|
while (TRUE)
|
|
|
|
{
|
|
|
|
match_ret = egg_regex_match_next (regex, string, string_len, match_options);
|
|
|
|
if ((match_ret > 0) && ((max_pieces == 0) || (pieces < max_pieces)))
|
|
|
|
{
|
|
|
|
piece = g_strndup (string + start_pos, regex->offsets[0] - start_pos);
|
|
|
|
list = g_list_prepend (list, piece);
|
|
|
|
|
|
|
|
/* if there were substrings, these need to get added to the
|
|
|
|
* list as well */
|
|
|
|
if (match_ret > 1)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 1; i < match_ret; i++)
|
|
|
|
list = g_list_prepend (list, egg_regex_fetch (regex, string, i));
|
|
|
|
}
|
|
|
|
|
|
|
|
start_pos = regex->pos; /* move start_pos to end of match */
|
|
|
|
pieces++;
|
|
|
|
}
|
|
|
|
else /* if there was no match, copy to end of string, and break */
|
|
|
|
{
|
|
|
|
piece = g_strndup (string + start_pos, regex->string_len - start_pos);
|
|
|
|
list = g_list_prepend (list, piece);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
string_list = (gchar **) g_malloc (sizeof (gchar *) * (g_list_length (list) + 1));
|
|
|
|
pos = 0;
|
|
|
|
for (last = g_list_last (list); last; last = last->prev)
|
|
|
|
string_list[pos++] = last->data;
|
|
|
|
string_list[pos] = 0;
|
|
|
|
|
|
|
|
g_list_free (list);
|
|
|
|
return string_list;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_split_next:
|
|
|
|
* @pattern: gchar pointer to the pattern
|
|
|
|
* @string: the string to split on pattern
|
|
|
|
* @string_len: the length of @string, or -1 to use strlen()
|
|
|
|
* @match_options: match time options for the regex
|
|
|
|
*
|
2005-07-25 05:25:35 -07:00
|
|
|
* egg_regex_split_next() breaks the string on pattern, and returns the
|
|
|
|
* pieces, one per call. If the pattern contains capturing parentheses,
|
2005-06-22 11:20:32 -07:00
|
|
|
* then the text for each of the substrings will also be returned.
|
2005-07-25 05:25:35 -07:00
|
|
|
* If the pattern does not match anywhere in the string, then the whole
|
2005-06-22 11:20:32 -07:00
|
|
|
* string is returned as the first piece.
|
|
|
|
*
|
|
|
|
* Returns: a gchar * to the next piece of the string
|
|
|
|
*/
|
|
|
|
gchar *
|
2005-07-25 05:25:35 -07:00
|
|
|
egg_regex_split_next (EggRegex *regex,
|
|
|
|
const gchar *string,
|
|
|
|
gssize string_len,
|
2005-06-22 11:20:32 -07:00
|
|
|
EggRegexMatchFlags match_options)
|
|
|
|
{
|
|
|
|
gint start_pos = regex->pos;
|
|
|
|
gchar *piece = NULL;
|
|
|
|
gint match_ret;
|
|
|
|
|
|
|
|
/* if there are delimiter substrings stored, return those one at a
|
2005-07-25 05:25:35 -07:00
|
|
|
* time.
|
2005-06-22 11:20:32 -07:00
|
|
|
*/
|
|
|
|
if (regex->delims != NULL)
|
|
|
|
{
|
|
|
|
piece = regex->delims->data;
|
|
|
|
regex->delims = g_slist_remove (regex->delims, piece);
|
|
|
|
return piece;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* otherwise...
|
|
|
|
* use egg_regex_match_next() to find the next occurance of the pattern
|
|
|
|
* in the string. We use start_pos to keep track of where the stuff
|
|
|
|
* up to the current match starts. Copy that piece of the string off
|
|
|
|
* and append it to the buffer using strncpy. We have to NUL term the
|
|
|
|
* piece we copied off before returning it.
|
|
|
|
*/
|
|
|
|
match_ret = egg_regex_match_next (regex, string, string_len, match_options);
|
|
|
|
if (match_ret > 0)
|
|
|
|
{
|
|
|
|
piece = g_strndup (string + start_pos, regex->offsets[0] - start_pos);
|
|
|
|
|
|
|
|
/* if there were substrings, these need to get added to the
|
|
|
|
* list of delims */
|
|
|
|
if (match_ret > 1)
|
|
|
|
{
|
|
|
|
gint i;
|
|
|
|
for (i = 1; i < match_ret; i++)
|
|
|
|
regex->delims = g_slist_append (regex->delims,
|
|
|
|
egg_regex_fetch (regex, string, i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else /* if there was no match, copy to end of string */
|
|
|
|
piece = g_strndup (string + start_pos, regex->string_len - start_pos);
|
|
|
|
|
|
|
|
return piece;
|
|
|
|
}
|
|
|
|
|
2005-07-25 05:25:35 -07:00
|
|
|
#if 0
|
2005-06-22 11:20:32 -07:00
|
|
|
static gboolean
|
2005-07-25 05:25:35 -07:00
|
|
|
copy_replacement (G_GNUC_UNUSED EggRegex *regex,
|
|
|
|
G_GNUC_UNUSED const gchar *string,
|
2005-06-22 11:20:32 -07:00
|
|
|
GString *result,
|
|
|
|
gpointer data)
|
|
|
|
{
|
|
|
|
g_string_append (result, (gchar *)data);
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
2005-07-25 05:25:35 -07:00
|
|
|
#endif
|
2005-06-22 11:20:32 -07:00
|
|
|
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
REPL_TYPE_STRING,
|
|
|
|
REPL_TYPE_CHARACTER,
|
|
|
|
REPL_TYPE_SYMBOLIC_REFERENCE,
|
|
|
|
REPL_TYPE_NUMERIC_REFERENCE
|
2005-07-25 05:25:35 -07:00
|
|
|
};
|
2005-06-22 11:20:32 -07:00
|
|
|
|
2005-07-25 05:25:35 -07:00
|
|
|
typedef struct
|
2005-06-22 11:20:32 -07:00
|
|
|
{
|
2005-07-25 05:25:35 -07:00
|
|
|
gchar *text;
|
|
|
|
gint type;
|
2005-06-22 11:20:32 -07:00
|
|
|
gint num;
|
|
|
|
gchar c;
|
|
|
|
} InterpolationData;
|
|
|
|
|
|
|
|
static void
|
|
|
|
free_interpolation_data (InterpolationData *data)
|
|
|
|
{
|
|
|
|
g_free (data->text);
|
|
|
|
g_free (data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const gchar *
|
|
|
|
expand_escape (const gchar *replacement,
|
2005-07-25 05:25:35 -07:00
|
|
|
const gchar *p,
|
2005-06-22 11:20:32 -07:00
|
|
|
InterpolationData *data,
|
|
|
|
GError **error)
|
|
|
|
{
|
|
|
|
const gchar *q, *r;
|
|
|
|
gint x, d, h, i;
|
2005-07-25 05:25:35 -07:00
|
|
|
const gchar *error_detail;
|
2005-06-22 11:20:32 -07:00
|
|
|
gint base = 0;
|
|
|
|
GError *tmp_error = NULL;
|
|
|
|
|
|
|
|
p++;
|
|
|
|
switch (*p)
|
|
|
|
{
|
|
|
|
case 't':
|
|
|
|
p++;
|
|
|
|
data->c = '\t';
|
|
|
|
data->type = REPL_TYPE_CHARACTER;
|
|
|
|
break;
|
|
|
|
case 'n':
|
|
|
|
p++;
|
|
|
|
data->c = '\n';
|
|
|
|
data->type = REPL_TYPE_CHARACTER;
|
|
|
|
break;
|
|
|
|
case 'v':
|
|
|
|
p++;
|
|
|
|
data->c = '\v';
|
|
|
|
data->type = REPL_TYPE_CHARACTER;
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
p++;
|
|
|
|
data->c = '\r';
|
|
|
|
data->type = REPL_TYPE_CHARACTER;
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
p++;
|
|
|
|
data->c = '\f';
|
|
|
|
data->type = REPL_TYPE_CHARACTER;
|
|
|
|
break;
|
|
|
|
case 'a':
|
|
|
|
p++;
|
|
|
|
data->c = '\a';
|
|
|
|
data->type = REPL_TYPE_CHARACTER;
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
p++;
|
|
|
|
data->c = '\b';
|
|
|
|
data->type = REPL_TYPE_CHARACTER;
|
|
|
|
break;
|
|
|
|
case '\\':
|
|
|
|
p++;
|
|
|
|
data->c = '\\';
|
|
|
|
data->type = REPL_TYPE_CHARACTER;
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
p++;
|
|
|
|
x = 0;
|
|
|
|
if (*p == '{')
|
|
|
|
{
|
|
|
|
p++;
|
2005-07-25 05:25:35 -07:00
|
|
|
do
|
2005-06-22 11:20:32 -07:00
|
|
|
{
|
|
|
|
h = g_ascii_xdigit_value (*p);
|
|
|
|
if (h < 0)
|
|
|
|
{
|
|
|
|
error_detail = _("hexadecimal digit or '}' expected");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
x = x * 16 + h;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
while (*p != '}');
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (i = 0; i < 2; i++)
|
|
|
|
{
|
|
|
|
h = g_ascii_xdigit_value (*p);
|
|
|
|
if (h < 0)
|
|
|
|
{
|
|
|
|
error_detail = _("hexadecimal digit expected");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
x = x * 16 + h;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
data->type = REPL_TYPE_STRING;
|
|
|
|
data->text = g_new0 (gchar, 8);
|
|
|
|
g_unichar_to_utf8 (x, data->text);
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
case 'u':
|
|
|
|
case 'L':
|
|
|
|
case 'U':
|
|
|
|
case 'E':
|
|
|
|
case 'Q':
|
|
|
|
case 'G':
|
|
|
|
error_detail = _("escape sequence not allowed");
|
|
|
|
goto error;
|
|
|
|
case 'g':
|
|
|
|
p++;
|
|
|
|
if (*p != '<')
|
|
|
|
{
|
|
|
|
error_detail = _("missing '<' in symbolic reference");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
q = p + 1;
|
2005-07-25 05:25:35 -07:00
|
|
|
do
|
2005-06-22 11:20:32 -07:00
|
|
|
{
|
|
|
|
p++;
|
|
|
|
if (!*p)
|
|
|
|
{
|
|
|
|
error_detail = _("unfinished symbolic reference");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
while (*p != '>');
|
|
|
|
if (p - q == 0)
|
|
|
|
{
|
|
|
|
error_detail = _("zero-length symbolic reference");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (g_ascii_isdigit (*q))
|
|
|
|
{
|
|
|
|
x = 0;
|
2005-07-25 05:25:35 -07:00
|
|
|
do
|
2005-06-22 11:20:32 -07:00
|
|
|
{
|
|
|
|
h = g_ascii_digit_value (*q);
|
|
|
|
if (h < 0)
|
|
|
|
{
|
|
|
|
error_detail = _("digit expected");
|
|
|
|
p = q;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
x = x * 10 + h;
|
|
|
|
q++;
|
|
|
|
}
|
|
|
|
while (q != p);
|
|
|
|
data->num = x;
|
|
|
|
data->type = REPL_TYPE_NUMERIC_REFERENCE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
r = q;
|
2005-07-25 05:25:35 -07:00
|
|
|
do
|
2005-06-22 11:20:32 -07:00
|
|
|
{
|
|
|
|
if (!g_ascii_isalnum (*r))
|
|
|
|
{
|
|
|
|
error_detail = _("illegal symbolic reference");
|
|
|
|
p = r;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
r++;
|
|
|
|
}
|
|
|
|
while (r != p);
|
|
|
|
data->text = g_strndup (q, p - q);
|
|
|
|
data->type = REPL_TYPE_SYMBOLIC_REFERENCE;
|
|
|
|
}
|
|
|
|
p++;
|
|
|
|
break;
|
|
|
|
case '0':
|
|
|
|
base = 8;
|
|
|
|
case '1':
|
|
|
|
case '2':
|
|
|
|
case '3':
|
|
|
|
case '4':
|
|
|
|
case '5':
|
|
|
|
case '6':
|
|
|
|
case '7':
|
|
|
|
case '8':
|
|
|
|
case '9':
|
|
|
|
x = 0;
|
|
|
|
d = 0;
|
|
|
|
for (i = 0; i < 3; i++)
|
|
|
|
{
|
|
|
|
h = g_ascii_digit_value (*p);
|
2005-07-25 05:25:35 -07:00
|
|
|
if (h < 0)
|
2005-06-22 11:20:32 -07:00
|
|
|
break;
|
|
|
|
if (h > 7)
|
|
|
|
{
|
|
|
|
if (base == 8)
|
|
|
|
break;
|
2005-07-25 05:25:35 -07:00
|
|
|
else
|
2005-06-22 11:20:32 -07:00
|
|
|
base = 10;
|
|
|
|
}
|
|
|
|
if (i == 2 && base == 10)
|
|
|
|
break;
|
|
|
|
x = x * 8 + h;
|
|
|
|
d = d * 10 + h;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
/* added by muntyan - \0 means whole match */
|
|
|
|
if (base == 8 && x == 0 && i == 1)
|
|
|
|
{
|
|
|
|
data->type = REPL_TYPE_NUMERIC_REFERENCE;
|
|
|
|
data->num = 0;
|
|
|
|
}
|
|
|
|
/* end */
|
|
|
|
else if (base == 8 || i == 3)
|
|
|
|
{
|
|
|
|
data->type = REPL_TYPE_STRING;
|
|
|
|
data->text = g_new0 (gchar, 8);
|
|
|
|
g_unichar_to_utf8 (x, data->text);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
data->type = REPL_TYPE_NUMERIC_REFERENCE;
|
|
|
|
data->num = d;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0:
|
|
|
|
error_detail = _("stray final '\\'");
|
|
|
|
goto error;
|
|
|
|
default:
|
|
|
|
data->type = REPL_TYPE_STRING;
|
|
|
|
data->text = g_new0 (gchar, 8);
|
|
|
|
g_unichar_to_utf8 (g_utf8_get_char (p), data->text);
|
|
|
|
p = g_utf8_next_char (p);
|
|
|
|
}
|
|
|
|
|
|
|
|
return p;
|
|
|
|
|
|
|
|
error:
|
2005-07-25 05:25:35 -07:00
|
|
|
tmp_error = g_error_new (EGG_REGEX_ERROR,
|
2005-06-22 11:20:32 -07:00
|
|
|
EGG_REGEX_ERROR_REPLACE,
|
|
|
|
_("Error while parsing replacement "
|
|
|
|
"text \"%s\" at char %d: %s"),
|
2005-07-25 05:25:35 -07:00
|
|
|
replacement,
|
2005-06-22 11:20:32 -07:00
|
|
|
p - replacement,
|
|
|
|
error_detail);
|
|
|
|
g_propagate_error (error, tmp_error);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static GList *
|
|
|
|
split_replacement (const gchar *replacement,
|
|
|
|
GError **error)
|
|
|
|
{
|
|
|
|
GList *list = NULL;
|
|
|
|
InterpolationData *data;
|
|
|
|
const gchar *p, *start;
|
2005-07-25 05:25:35 -07:00
|
|
|
|
|
|
|
start = p = replacement;
|
2005-06-22 11:20:32 -07:00
|
|
|
while (*p)
|
|
|
|
{
|
|
|
|
if (*p == '\\')
|
|
|
|
{
|
|
|
|
data = g_new0 (InterpolationData, 1);
|
|
|
|
start = p = expand_escape (replacement, p, data, error);
|
|
|
|
if (*error)
|
|
|
|
{
|
|
|
|
g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
|
|
|
|
g_list_free (list);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
list = g_list_prepend (list, data);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
p++;
|
|
|
|
if (*p == '\\' || *p == '\0')
|
|
|
|
{
|
|
|
|
if (p - start > 0)
|
|
|
|
{
|
|
|
|
data = g_new0 (InterpolationData, 1);
|
|
|
|
data->text = g_strndup (start, p - start);
|
|
|
|
data->type = REPL_TYPE_STRING;
|
|
|
|
list = g_list_prepend (list, data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return g_list_reverse (list);
|
|
|
|
}
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
interpolate_replacement (EggRegex *regex,
|
|
|
|
const gchar *string,
|
|
|
|
GString *result,
|
|
|
|
gpointer data)
|
|
|
|
{
|
|
|
|
GList *list;
|
|
|
|
InterpolationData *idata;
|
|
|
|
gchar *match;
|
|
|
|
|
|
|
|
for (list = data; list; list = list->next)
|
|
|
|
{
|
|
|
|
idata = list->data;
|
|
|
|
switch (idata->type)
|
|
|
|
{
|
|
|
|
case REPL_TYPE_STRING:
|
|
|
|
g_string_append (result, idata->text);
|
|
|
|
break;
|
|
|
|
case REPL_TYPE_CHARACTER:
|
|
|
|
g_string_append_c (result, idata->c);
|
|
|
|
break;
|
|
|
|
case REPL_TYPE_NUMERIC_REFERENCE:
|
|
|
|
match = egg_regex_fetch (regex, string, idata->num);
|
2005-07-25 05:25:35 -07:00
|
|
|
if (match)
|
2005-06-22 11:20:32 -07:00
|
|
|
{
|
|
|
|
g_string_append (result, match);
|
|
|
|
g_free (match);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case REPL_TYPE_SYMBOLIC_REFERENCE:
|
|
|
|
match = egg_regex_fetch_named (regex, string, idata->text);
|
2005-07-25 05:25:35 -07:00
|
|
|
if (match)
|
2005-06-22 11:20:32 -07:00
|
|
|
{
|
|
|
|
g_string_append (result, match);
|
|
|
|
g_free (match);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-07-25 05:25:35 -07:00
|
|
|
return FALSE;
|
2005-06-22 11:20:32 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_replace:
|
|
|
|
* @regex: a #EggRegex structure
|
|
|
|
* @string: the string to perform matches against
|
|
|
|
* @string_len: the length of @string, or -1 to use strlen()
|
|
|
|
* @replacement: text to replace each match with
|
|
|
|
* @match_options: options for the match
|
|
|
|
*
|
2005-07-25 05:25:35 -07:00
|
|
|
* Replaces all occurances of the pattern in @regex with the
|
|
|
|
* replacement text. Backreferences of the form '\number' or '\g<number>'
|
|
|
|
* in the replacement text are interpolated by the number-th captured
|
2005-06-22 11:20:32 -07:00
|
|
|
* subexpression of the match, '\g<name>' refers to the captured subexpression
|
2005-07-25 05:25:35 -07:00
|
|
|
* with the given name. '\0' refers to the complete match. To include a
|
2005-06-22 11:20:32 -07:00
|
|
|
* literal '\' in the replacement, write '\\'.
|
|
|
|
*
|
|
|
|
* Returns: a newly allocated string containing the replacements.
|
|
|
|
*/
|
|
|
|
gchar *
|
2005-07-25 05:25:35 -07:00
|
|
|
egg_regex_replace (EggRegex *regex,
|
|
|
|
const gchar *string,
|
2005-06-22 11:20:32 -07:00
|
|
|
gssize string_len,
|
|
|
|
const gchar *replacement,
|
|
|
|
EggRegexMatchFlags match_options,
|
|
|
|
GError **error)
|
|
|
|
{
|
|
|
|
gchar *result;
|
|
|
|
GList *list;
|
|
|
|
|
|
|
|
list = split_replacement (replacement, error);
|
2005-07-25 05:25:35 -07:00
|
|
|
result = egg_regex_replace_eval (regex,
|
2005-06-22 11:20:32 -07:00
|
|
|
string, string_len,
|
|
|
|
interpolate_replacement,
|
|
|
|
(gpointer)list,
|
|
|
|
match_options);
|
|
|
|
g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
|
|
|
|
g_list_free (list);
|
2005-07-25 05:25:35 -07:00
|
|
|
|
2005-06-22 11:20:32 -07:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_replace_eval:
|
|
|
|
* @gregex: a #EggRegex structure
|
|
|
|
* @string: string to perform matches against
|
|
|
|
* @string_len: the length of @string, or -1 to use strlen()
|
|
|
|
* @eval: a function to call for each match
|
|
|
|
* @match_options: Options for the match
|
|
|
|
*
|
|
|
|
* Replaces occurances of the pattern in regex with
|
|
|
|
* the output of @eval for that occurance.
|
|
|
|
*
|
|
|
|
* Returns: a newly allocated string containing the replacements.
|
|
|
|
*/
|
|
|
|
gchar *
|
2005-07-25 05:25:35 -07:00
|
|
|
egg_regex_replace_eval (EggRegex *regex,
|
2005-06-22 11:20:32 -07:00
|
|
|
const gchar *string,
|
|
|
|
gssize string_len,
|
|
|
|
EggRegexEvalCallback eval,
|
2005-07-25 05:25:35 -07:00
|
|
|
gpointer user_data,
|
2005-06-22 11:20:32 -07:00
|
|
|
EggRegexMatchFlags match_options)
|
|
|
|
{
|
|
|
|
GString *result;
|
|
|
|
gint str_pos = 0;
|
|
|
|
gboolean done = FALSE;
|
|
|
|
|
|
|
|
if (string_len < 0)
|
|
|
|
string_len = strlen (string);
|
|
|
|
|
|
|
|
/* clear out the regex for reuse, just in case */
|
|
|
|
egg_regex_clear (regex);
|
|
|
|
|
|
|
|
result = g_string_sized_new (string_len);
|
|
|
|
|
|
|
|
/* run down the string making matches. */
|
|
|
|
while (egg_regex_match_next (regex, string, string_len, match_options) > 0 && !done)
|
|
|
|
{
|
2005-07-25 05:25:35 -07:00
|
|
|
g_string_append_len (result,
|
|
|
|
string + str_pos,
|
2005-06-22 11:20:32 -07:00
|
|
|
regex->offsets[0] - str_pos);
|
|
|
|
done = (*eval) (regex, string, result, user_data);
|
|
|
|
str_pos = regex->offsets[1];
|
|
|
|
}
|
2005-07-25 05:25:35 -07:00
|
|
|
|
2005-06-22 11:20:32 -07:00
|
|
|
g_string_append_len (result, string + str_pos, string_len - str_pos);
|
|
|
|
|
|
|
|
return g_string_free (result, FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_eval_replacement:
|
|
|
|
* @gregex: a #EggRegex structure
|
|
|
|
* @string: the string on which the last match was made
|
|
|
|
* @replacement: replacement string
|
|
|
|
* @error: location to store error
|
|
|
|
*
|
|
|
|
* Evaluates replacement after successful match.
|
|
|
|
*
|
|
|
|
* Returns: a newly allocated string containing the replacement.
|
|
|
|
*/
|
|
|
|
gchar *
|
|
|
|
egg_regex_eval_replacement (EggRegex *regex,
|
|
|
|
const gchar *string,
|
|
|
|
const gchar *replacement,
|
|
|
|
GError **error)
|
|
|
|
{
|
|
|
|
GString *result;
|
|
|
|
GList *list;
|
|
|
|
|
|
|
|
list = split_replacement (replacement, error);
|
|
|
|
|
|
|
|
if (!list) return NULL;
|
|
|
|
|
|
|
|
result = g_string_new (NULL);
|
|
|
|
interpolate_replacement (regex, string, result, list);
|
|
|
|
g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
|
|
|
|
g_list_free (list);
|
|
|
|
|
|
|
|
return g_string_free (result, FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* egg_regex_check_replacement:
|
|
|
|
* @replacement: replacement string
|
|
|
|
* @error: location to store error
|
|
|
|
*/
|
|
|
|
gboolean
|
|
|
|
egg_regex_check_replacement (const gchar *replacement,
|
|
|
|
GError **error)
|
|
|
|
{
|
|
|
|
GList *list;
|
|
|
|
|
|
|
|
list = split_replacement (replacement, error);
|
|
|
|
|
|
|
|
if (!list) return FALSE;
|
|
|
|
|
|
|
|
g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
|
|
|
|
g_list_free (list);
|
|
|
|
return TRUE;
|
|
|
|
}
|