medit/moo/mooutils/eggregex.c

/* EggRegex -- regular expression API wrapper around PCRE.
 * Copyright (C) 1999, 2000 Scott Wimer
 * Copyright (C) 2004 Matthias Clasen <mclasen@redhat.com>
 *
 * This is basically an ease of user wrapper around the functionality of
 * PCRE.
 *
 * With this library, we are, hopefully, drastically reducing the code
 * complexity necessary by making use of a more complex and detailed
 * data structure to store the regex info.  I am hoping to have a regex
 * interface that is almost as easy to use as Perl's.  <fingers crossed>
 *
 * Author: Scott Wimer <scottw@cylant.com>
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * This library is free software, you can distribute it or modify it
 * under any of the following terms:
 *  1) The GNU General Public License (GPL)
 *  2) The GNU Library General Public License (LGPL)
 *  3) The Perl Artistic license (Artistic)
 *  4) The BSD license (BSD)
 *
 * In short, you can use this library in any code you desire, so long as
 * the Copyright notice above remains intact.  If you do make changes to
 * it, I would appreciate that you let me know so I can improve this
 * library for everybody, but I'm not gonna force you to.
 *
 * Please note that this library is just a wrapper around Philip Hazel's
 * PCRE library.  Please see the file 'LICENSE' in your PCRE distribution.
 * And, if you live in England, please send him a pint of good beer, his
 * library is great.
 *
 */

/*****************************************************************************
 * Changed by Muntyan
 *
 * 04/24/2005: added refcounting
 * 04/30/2005: added egg_regex_eval_replacement and egg_regex_check_replacement
 * 05/31/2005: changed expand_escape: \0 means whole match
 * 07/25/2005: silent gcc
 * 10/03/2005: removed #include "config.h", removed odd 'break' after 'goto' to
 *             avoid warning
 *
 * mooutils/eggregex.c
 *****************************************************************************/

#include <stdlib.h>
#include <string.h>

#include "eggregex.h"
#include <glib.h>
#include "pcre/pcre.h"

/* FIXME when this is in glib */
#define _(s) s

struct _EggRegex
{
  guint ref_count;
  gchar *pattern;       /* the pattern */
  pcre *regex;		/* compiled form of the pattern */
  pcre_extra *extra;	/* data stored when egg_regex_optimize() is used */
  gint matches;		/* number of matching sub patterns */
  gint pos;		/* position in the string where last match left off */
  gint *offsets;	/* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */
  gint n_offsets;	/* number of offsets */
  EggRegexCompileFlags compile_opts;	/* options used at compile time on the pattern */
  EggRegexMatchFlags match_opts;	/* options used at match time on the regex */
  gint string_len;	/* length of the string last used against */
  GSList *delims;	/* delimiter sub strings from split next */
};

GQuark
egg_regex_error_quark (void)
{
  static GQuark error_quark = 0;

  if (error_quark == 0)
    error_quark = g_quark_from_static_string ("g-regex-error-quark");

  return error_quark;
}

/**
 * egg_regex_new:
 * @pattern: the regular expression
 * @compile_options: compile options for the regular expression
 * @match_options: match options for the regular expression
 * @error: return location for a #GError
 *
 * Compiles the regular expression to an internal form, and does the initial
 * setup of the #EggRegex structure.
 *
 * Returns: a #EggRegex structure
 */
EggRegex *
egg_regex_new (const gchar         *pattern,
 	     EggRegexCompileFlags   compile_options,
	     EggRegexMatchFlags     match_options,
	     GError             **error)
{
  EggRegex *regex = g_new0 (EggRegex, 1);
  const gchar *errmsg;
  gint erroffset;
  gint capture_count;

  regex->ref_count = 1;

  /* preset the parts of gregex that need to be set, regardless of the
   * type of match that will be checked */
  regex->pattern = g_strdup (pattern);
  regex->extra = NULL;
  regex->pos = 0;
  regex->string_len = -1;	/* not set yet */

  /* set the options */
  regex->compile_opts = compile_options | PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
  regex->match_opts = match_options | PCRE_NO_UTF8_CHECK;

  /* compile the pattern */
  regex->regex = _pcre_compile (pattern, regex->compile_opts,
				 &errmsg, &erroffset, NULL);

  /* if the compilation failed, set the error member and return
   * immediately */
  if (regex->regex == NULL)
    {
      GError *tmp_error = g_error_new (EGG_REGEX_ERROR,
				       EGG_REGEX_ERROR_COMPILE,
				       _("Error while compiling regular "
					 "expression %s at char %d: %s"),
				       pattern, erroffset, errmsg);
      g_propagate_error (error, tmp_error);

      return regex;
    }

  /* otherwise, find out how many sub patterns exist in this pattern,
   * and setup the offsets array and n_offsets accordingly */
  _pcre_fullinfo (regex->regex, regex->extra,
		  PCRE_INFO_CAPTURECOUNT, &capture_count);
  regex->n_offsets = (capture_count + 1) * 3;
  regex->offsets = g_new0 (gint, regex->n_offsets);

  return regex;
}


/**
 * egg_regex_free:
 * @regex: a #EggRegex structure from egg_regex_new()
 *
 * Frees all the memory associated with the regex structure.
 */
void
egg_regex_unref (EggRegex *regex)
{
  if (--regex->ref_count)
    return;

  g_free (regex->pattern);
  g_slist_free (regex->delims);
  g_free (regex->offsets);
  if (regex->regex != NULL)
    g_free (regex->regex);
  if (regex->extra != NULL)
    g_free (regex->extra);
  g_free (regex);
}

void
egg_regex_ref (EggRegex *regex)
{
  ++regex->ref_count;
}

void
egg_regex_free (EggRegex *regex)
{
  egg_regex_unref (regex);
}


/* FIXME */
const gchar *
egg_regex_get_pattern (EggRegex *regex)
{
	return regex == NULL ? NULL : regex->pattern;
}

/**
 * egg_regex_clear:
 * @regex: a #EggRegex structure
 *
 * Clears out the members of @regex that are holding information about the
 * last set of matches for this pattern.  egg_regex_clear() needs to be
 * called between uses of egg_regex_match() or egg_regex_match_next() against
 * new target strings.
 */
void
egg_regex_clear (EggRegex *regex)
{
  regex->matches = -1;
  regex->string_len = -1;
  regex->pos = 0;

  /* if the pattern was used with egg_regex_split_next(), it may have
   * delimiter offsets stored.  Free up those guys as well. */
  if (regex->delims != NULL)
    g_slist_free (regex->delims);
}

/**
 * egg_regex_optimize:
 * @regex: a #EggRegex structure
 * @error: return location for a #GError
 *
 * If the pattern will be used many times, then it may be worth the
 * effort to optimize it to improve the speed of matches.
 */
void
egg_regex_optimize (EggRegex  *regex,
		  GError **error)
{
  const gchar *errmsg;

  regex->extra = _pcre_study (regex->regex, 0, &errmsg);

  if (errmsg)
    {
      GError *tmp_error = g_error_new (EGG_REGEX_ERROR,
				       EGG_REGEX_ERROR_OPTIMIZE,
				       _("Error while optimizing "
					 "regular expression %s: %s"),
				       regex->pattern,
				       errmsg);
      g_propagate_error (error, tmp_error);
    }
}

/**
 * egg_regex_match:
 * @regex: a #EggRegex structure from egg_regex_new()
 * @string: the string to scan for matches
 * @string_len: the length of @string, or -1 to use strlen()
 * @match_options:  match options
 *
 * Scans for a match in string for the pattern in @regex. The starting index
 * of the match goes into the pos member of the @regex struct. The indexes
 * of the full match, and all matches get stored off in the offsets array.
 *
 * The @match_options are combined with the match options specified when the
 * @regex structure was created, letting you have more flexibility in reusing
 * #EggRegex structures.
 *
 * Returns:  Number of matched substrings + 1, or 1 if the pattern has no
 *           substrings in it.  Returns #GREGEX_NOMATCH if the pattern
 *           did not match.
 */
gint
egg_regex_match (EggRegex          *regex,
	       const gchar     *string,
	       gssize           string_len,
	       EggRegexMatchFlags match_options)
{
  if (string_len < 0)
    string_len = strlen (string);

  regex->string_len = string_len;

  /* perform the match */
  regex->matches = _pcre_exec (regex->regex, regex->extra,
			       string, regex->string_len, 0,
			       regex->match_opts | match_options,
			       regex->offsets, regex->n_offsets);

  /* if the regex matched, set regex->pos to the character past the
   * end of the match.
   */
  if (regex->matches > 0)
    regex->pos = regex->offsets[1];

  return regex->matches;	/* return what pcre_exec() returned */
}

/* FIXME:
 * - egg_regex_match should call this.
 * - egg_regex_match_next cannot be used after this.
 * - document this function.
 */
gint
egg_regex_match_extended (EggRegex *regex,
	       const gchar         *string,
	       gssize               string_len,
	       gint                 string_index,
	       EggRegexMatchFlags   match_options)
{
  if (string_len < 0)
    string_len = strlen (string);

  regex->string_len = string_len;

  /* perform the match */
  regex->matches = _pcre_exec (regex->regex, regex->extra,
			       string, regex->string_len, string_index,
			       regex->match_opts | match_options,
			       regex->offsets, regex->n_offsets);

  /* if the regex matched, set regex->pos to the character past the
   * end of the match.
   */
  if (regex->matches > 0)
    regex->pos = regex->offsets[1];

  return regex->matches;	/* return what pcre_exec() returned */
}

/**
 * egg_regex_match_next:
 * @regex: a #EggRegex structure
 * @string: the string to scan for matches
 * @string_len: the length of @string, or -1 to use strlen()
 * @match_options: the match options
 *
 * Scans for the next match in @string of the pattern in @regex.  The starting
 * index of the match goes into the pos member of the @regex struct.  The
 * indexes of the full match, and all matches get stored off in the offsets
 * array.  The match options are ored with the match options set when
 * the @regex was created.
 *
 * You have to call egg_regex_clear() to reuse the same pattern on a new string.
 * This is especially true for use with egg_regex_match_next().
 *
 * Returns:  Number of matched substrings + 1, or 1 if the pattern has no
 *           substrings in it.  Returns #GREGEX_NOMATCH if the pattern
 *           did not match.
 */
gint
egg_regex_match_next (EggRegex          *regex,
		    const gchar     *string,
		    gssize           string_len,
		    EggRegexMatchFlags match_options)
{
  /* if this regex hasn't been used on this string before, then we
   * need to calculate the length of the string, and set pos to the
   * start of it.
   * Knowing if this regex has been used on this string is a bit of
   * a challenge.  For now, we require the user to call egg_regex_clear()
   * in between usages on a new string.  Not perfect, but not such a
   * bad solution either.
   */
  if (regex->string_len == -1)
    {
      if (string_len < 0)
	string_len = strlen (string);

      regex->string_len = string_len;
    }

  /* perform the match */
  regex->matches = _pcre_exec (regex->regex, regex->extra,
			       string + regex->pos,
			       regex->string_len - regex->pos,
			       0, regex->match_opts | match_options,
			       regex->offsets, regex->n_offsets);

  /* if the regex matched, adjust the offsets array to take into account
   * the fact that the string they're out of is shorter than the string
   * that the caller passed us, by regex->pos to be exact.
   * Then, update regex->pos to take into account the new starting point.
   */
  if (regex->matches > 0)
    {
      gint i, pieces;
      pieces = (regex->matches * 2) - 1;

      for (i = 0; i <= pieces; i++)
	regex->offsets[i] += regex->pos;

      regex->pos = regex->offsets[1];
    }

  return regex->matches;
}


/**
 * egg_regex_fetch:
 * @regex: #EggRegex structure used in last match
 * @string: the string on which the last match was made
 * @match_num: number of the sub expression
 *
 * Retrieves the text matching the @match_num<!-- -->'th capturing parentheses.
 * 0 is the full text of the match, 1 is the first paren set, 2 the second,
 * and so on.
 *
 * Returns: The matched substring.  You have to free it yourself.
 */
gchar *
egg_regex_fetch (EggRegex      *regex,
	       const gchar *string,
	       gint         match_num)
{
  gchar *match;

  /* make sure the sub expression number they're requesting is less than
   * the total number of sub expressions that were matched. */
  if (match_num >= regex->matches)
    return NULL;

  _pcre_get_substring (string, regex->offsets, regex->matches,
		       match_num, (const char **)&match);

  return match;
}

/**
 * egg_regex_fetch_pos:
 * @regex: #EggRegex structure used in last match
 * @string: the string on which the last match was made
 * @match_num: number of the sub expression
 * @start_pos: pointer to location where to store the start position
 * @end_pos: pointer to location where to store the end position
 *
 * Retrieves the position of the @match_num<!-- -->'th capturing parentheses.
 * 0 is the full text of the match, 1 is the first paren set, 2 the second,
 * and so on.
 */
void
egg_regex_fetch_pos (EggRegex      *regex,
		     G_GNUC_UNUSED const gchar *string,
		     gint         match_num,
		     gint        *start_pos,
		     gint        *end_pos)
{
  /* make sure the sub expression number they're requesting is less than
   * the total number of sub expressions that were matched. */
  if (match_num >= regex->matches)
    return;

  if (start_pos)
    *start_pos = regex->offsets[2 * match_num];

  if (end_pos)
    *end_pos = regex->offsets[2 * match_num + 1];
}

/**
 * egg_regex_fetch_named:
 * @regex: #EggRegex structure used in last match
 * @string: the string on which the last match was made
 * @name: name of the subexpression
 *
 * Retrieves the text matching the capturing parentheses named @name.
 *
 * Returns: The matched substring.  You have to free it yourself.
 */
gchar *
egg_regex_fetch_named (EggRegex      *regex,
		     const gchar *string,
		     const gchar *name)
{
  gchar *match;

  _pcre_get_named_substring (regex->regex,
			     string, regex->offsets, regex->matches,
			     name, (const char **)&match);

  return match;
}

/**
 * egg_regex_fetch_all:
 * @regex: a #EggRegex structure
 * @string: the string on which the last match was made
 *
 * Bundles up pointers to each of the matching substrings from a match
 * and stores then in an array of gchar pointers.
 *
 * Returns: a %NULL-terminated array of gchar * pointers. It must be freed using
 * g_strfreev(). If the memory can't be allocated, returns %NULL.
 */
gchar **
egg_regex_fetch_all (EggRegex      *regex,
		   const gchar *string)
{
  gchar **listptr = NULL; /* the list pcre_get_substring_list() will fill */
  gchar **result;

  if (regex->matches < 0)
    return NULL;

  _pcre_get_substring_list (string, regex->offsets,
			    regex->matches, (const char ***)&listptr);

  if (listptr)
    {
      /* PCRE returns a single block of memory which
       * isn't suitable for g_strfreev().
       */
      result = g_strdupv (listptr);
      g_free (listptr);
    }
  else
    result = NULL;

  return result;
}


/**
 * egg_regex_split:
 * @regex:  a #EggRegex structure
 * @string:  the string to split with the pattern
 * @string_len: the length of @string, or -1 to use strlen()
 * @match_options:  match time option flags
 * @max_pieces:  maximum number of pieces to split the string into,
 *    or 0 for no limit
 *
 * Breaks the string on the pattern, and returns an array of the pieces.
 *
 * Returns: a %NULL-terminated gchar ** array. Free it using g_strfreev().
 **/
gchar **
egg_regex_split (EggRegex           *regex,
	       const gchar      *string,
	       gssize            string_len,
	       EggRegexMatchFlags  match_options,
	       gint              max_pieces)
{
  gchar **string_list;		/* The array of char **s worked on */
  gint pos;
  gint match_ret;
  gint pieces;
  gint start_pos;
  gchar *piece;
  GList *list, *last;

  start_pos = 0;
  pieces = 0;
  list = NULL;
  while (TRUE)
    {
      match_ret = egg_regex_match_next (regex, string, string_len, match_options);
      if ((match_ret > 0) && ((max_pieces == 0) || (pieces < max_pieces)))
	{
	  piece = g_strndup (string + start_pos, regex->offsets[0] - start_pos);
	  list = g_list_prepend (list, piece);

	  /* if there were substrings, these need to get added to the
	   * list as well */
	  if (match_ret > 1)
	    {
	      int i;
	      for (i = 1; i < match_ret; i++)
		list = g_list_prepend (list, egg_regex_fetch (regex, string, i));
	    }

	  start_pos = regex->pos;	/* move start_pos to end of match */
	  pieces++;
	}
      else	 /* if there was no match, copy to end of string, and break */
	{
	  piece = g_strndup (string + start_pos, regex->string_len - start_pos);
	  list = g_list_prepend (list, piece);
	  break;
	}
    }

  string_list = (gchar **) g_malloc (sizeof (gchar *) * (g_list_length (list) + 1));
  pos = 0;
  for (last = g_list_last (list); last; last = last->prev)
    string_list[pos++] = last->data;
  string_list[pos] = 0;

  g_list_free (list);
  return string_list;
}


/**
 * egg_regex_split_next:
 * @pattern:  gchar pointer to the pattern
 * @string:  the string to split on pattern
 * @string_len: the length of @string, or -1 to use strlen()
 * @match_options:  match time options for the regex
 *
 * egg_regex_split_next() breaks the string on pattern, and returns the
 * pieces, one per call.  If the pattern contains capturing parentheses,
 * then the text for each of the substrings will also be returned.
 * If the pattern does not match anywhere in the string, then the whole
 * string is returned as the first piece.
 *
 * Returns:  a gchar * to the next piece of the string
 */
gchar *
egg_regex_split_next (EggRegex      *regex,
		    const gchar *string,
		    gssize       string_len,
		    EggRegexMatchFlags match_options)
{
  gint start_pos = regex->pos;
  gchar *piece = NULL;
  gint match_ret;

  /* if there are delimiter substrings stored, return those one at a
   * time.
   */
  if (regex->delims != NULL)
    {
      piece = regex->delims->data;
      regex->delims = g_slist_remove (regex->delims, piece);
      return piece;
    }

  /* otherwise...
   * use egg_regex_match_next() to find the next occurance of the pattern
   * in the string.  We use start_pos to keep track of where the stuff
   * up to the current match starts.  Copy that piece of the string off
   * and append it to the buffer using strncpy.  We have to NUL term the
   * piece we copied off before returning it.
   */
  match_ret = egg_regex_match_next (regex, string, string_len, match_options);
  if (match_ret > 0)
    {
      piece = g_strndup (string + start_pos, regex->offsets[0] - start_pos);

      /* if there were substrings, these need to get added to the
       * list of delims */
      if (match_ret > 1)
	{
	  gint i;
	  for (i = 1; i < match_ret; i++)
	    regex->delims = g_slist_append (regex->delims,
					     egg_regex_fetch (regex, string, i));
	}
    }
  else		/* if there was no match, copy to end of string */
    piece = g_strndup (string + start_pos, regex->string_len - start_pos);

  return piece;
}

#if 0
static gboolean
copy_replacement (G_GNUC_UNUSED EggRegex *regex,
                  G_GNUC_UNUSED const gchar *string,
		  GString     *result,
	          gpointer     data)
{
  g_string_append (result, (gchar *)data);

  return FALSE;
}
#endif

enum
{
  REPL_TYPE_STRING,
  REPL_TYPE_CHARACTER,
  REPL_TYPE_SYMBOLIC_REFERENCE,
  REPL_TYPE_NUMERIC_REFERENCE
};

typedef struct
{
  gchar *text;
  gint   type;
  gint   num;
  gchar  c;
} InterpolationData;

static void
free_interpolation_data (InterpolationData *data)
{
  g_free (data->text);
  g_free (data);
}

static const gchar *
expand_escape (const gchar        *replacement,
	       const gchar        *p,
	       InterpolationData  *data,
	       GError            **error)
{
  const gchar *q, *r;
  gint x, d, h, i;
  const gchar *error_detail;
  gint base = 0;
  GError *tmp_error = NULL;

  p++;
  switch (*p)
    {
    case 't':
      p++;
      data->c = '\t';
      data->type = REPL_TYPE_CHARACTER;
      break;
    case 'n':
      p++;
      data->c = '\n';
      data->type = REPL_TYPE_CHARACTER;
      break;
    case 'v':
      p++;
      data->c = '\v';
      data->type = REPL_TYPE_CHARACTER;
      break;
    case 'r':
      p++;
      data->c = '\r';
      data->type = REPL_TYPE_CHARACTER;
      break;
    case 'f':
      p++;
      data->c = '\f';
      data->type = REPL_TYPE_CHARACTER;
      break;
    case 'a':
      p++;
      data->c = '\a';
      data->type = REPL_TYPE_CHARACTER;
      break;
    case 'b':
      p++;
      data->c = '\b';
      data->type = REPL_TYPE_CHARACTER;
      break;
    case '\\':
      p++;
      data->c = '\\';
      data->type = REPL_TYPE_CHARACTER;
      break;
    case 'x':
      p++;
      x = 0;
      if (*p == '{')
	{
	  p++;
	  do
	    {
	      h = g_ascii_xdigit_value (*p);
	      if (h < 0)
		{
		  error_detail = _("hexadecimal digit or '}' expected");
		  goto error;
		}
	      x = x * 16 + h;
	      p++;
	    }
	  while (*p != '}');
	  p++;
	}
      else
	{
	  for (i = 0; i < 2; i++)
	    {
	      h = g_ascii_xdigit_value (*p);
	      if (h < 0)
		{
		  error_detail = _("hexadecimal digit expected");
		  goto error;
		}
	      x = x * 16 + h;
	      p++;
	    }
	}
      data->type = REPL_TYPE_STRING;
      data->text = g_new0 (gchar, 8);
      g_unichar_to_utf8 (x, data->text);
      break;
    case 'l':
    case 'u':
    case 'L':
    case 'U':
    case 'E':
    case 'Q':
    case 'G':
      error_detail = _("escape sequence not allowed");
      goto error;
    case 'g':
      p++;
      if (*p != '<')
	{
	  error_detail = _("missing '<' in symbolic reference");
	  goto error;
	}
      q = p + 1;
      do
	{
	  p++;
	  if (!*p)
	    {
	      error_detail = _("unfinished symbolic reference");
	      goto error;
	    }
	}
      while (*p != '>');
      if (p - q == 0)
	{
	  error_detail = _("zero-length symbolic reference");
	  goto error;
	}
      if (g_ascii_isdigit (*q))
	{
	  x = 0;
	  do
	    {
	      h = g_ascii_digit_value (*q);
	      if (h < 0)
		{
		  error_detail = _("digit expected");
		  p = q;
		  goto error;
		}
	      x = x * 10 + h;
	      q++;
	    }
	  while (q != p);
	  data->num = x;
	  data->type = REPL_TYPE_NUMERIC_REFERENCE;
	}
      else
	{
	  r = q;
	  do
	    {
	      if (!g_ascii_isalnum (*r))
		{
		  error_detail = _("illegal symbolic reference");
		  p = r;
		  goto error;
		}
	      r++;
	    }
	  while (r != p);
	  data->text = g_strndup (q, p - q);
	  data->type = REPL_TYPE_SYMBOLIC_REFERENCE;
	}
      p++;
      break;
    case '0':
      base = 8;
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
      x = 0;
      d = 0;
      for (i = 0; i < 3; i++)
	{
	  h = g_ascii_digit_value (*p);
	  if (h < 0)
	    break;
	  if (h > 7)
	    {
	      if (base == 8)
		break;
	      else
		base = 10;
	    }
	  if (i == 2 && base == 10)
	    break;
	  x = x * 8 + h;
	  d = d * 10 + h;
	  p++;
	}
      /* added by muntyan - \0 means whole match */
      if (base == 8 && x == 0 && i == 1)
        {
          data->type = REPL_TYPE_NUMERIC_REFERENCE;
          data->num = 0;
        }
      /* end */
      else if (base == 8 || i == 3)
	{
	  data->type = REPL_TYPE_STRING;
	  data->text = g_new0 (gchar, 8);
	  g_unichar_to_utf8 (x, data->text);
	}
      else
	{
	  data->type = REPL_TYPE_NUMERIC_REFERENCE;
	  data->num = d;
	}
      break;
    case 0:
      error_detail = _("stray final '\\'");
      goto error;
    default:
      data->type = REPL_TYPE_STRING;
      data->text = g_new0 (gchar, 8);
      g_unichar_to_utf8 (g_utf8_get_char (p), data->text);
      p = g_utf8_next_char (p);
    }

  return p;

 error:
  tmp_error = g_error_new (EGG_REGEX_ERROR,
			   EGG_REGEX_ERROR_REPLACE,
			   _("Error while parsing replacement "
			     "text \"%s\" at char %d: %s"),
			   replacement,
			   p - replacement,
			   error_detail);
  g_propagate_error (error, tmp_error);

  return NULL;
}

static GList *
split_replacement (const gchar  *replacement,
		   GError      **error)
{
  GList *list = NULL;
  InterpolationData *data;
  const gchar *p, *start;

  start = p = replacement;
  while (*p)
    {
      if (*p == '\\')
	{
	  data = g_new0 (InterpolationData, 1);
	  start = p = expand_escape (replacement, p, data, error);
	  if (*error)
	    {
	      g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
	      g_list_free (list);

	      return NULL;
	    }
	  list = g_list_prepend (list, data);
	}
      else
	{
	  p++;
	  if (*p == '\\' || *p == '\0')
	    {
	      if (p - start > 0)
		{
		  data = g_new0 (InterpolationData, 1);
		  data->text = g_strndup (start, p - start);
		  data->type = REPL_TYPE_STRING;
		  list = g_list_prepend (list, data);
		}
	    }
	}
    }

  return g_list_reverse (list);
}

static gboolean
interpolate_replacement (EggRegex      *regex,
			 const gchar *string,
			 GString     *result,
			 gpointer     data)
{
  GList *list;
  InterpolationData *idata;
  gchar *match;

  for (list = data; list; list = list->next)
    {
      idata = list->data;
      switch (idata->type)
	{
	case REPL_TYPE_STRING:
	  g_string_append (result, idata->text);
	  break;
	case REPL_TYPE_CHARACTER:
	  g_string_append_c (result, idata->c);
	  break;
	case REPL_TYPE_NUMERIC_REFERENCE:
	  match = egg_regex_fetch (regex, string, idata->num);
	  if (match)
	    {
	      g_string_append (result, match);
	      g_free (match);
	    }
	  break;
	case REPL_TYPE_SYMBOLIC_REFERENCE:
	  match = egg_regex_fetch_named (regex, string, idata->text);
	  if (match)
	    {
	      g_string_append (result, match);
	      g_free (match);
	    }
	  break;
	}
    }

  return FALSE;
}

/**
 * egg_regex_replace:
 * @regex:  a #EggRegex structure
 * @string:  the string to perform matches against
 * @string_len: the length of @string, or -1 to use strlen()
 * @replacement:  text to replace each match with
 * @match_options:  options for the match
 *
 * Replaces all occurances of the pattern in @regex with the
 * replacement text. Backreferences of the form '\number' or '\g<number>'
 * in the replacement text are interpolated by the number-th captured
 * subexpression of the match, '\g<name>' refers to the captured subexpression
 * with the given name. '\0' refers to the complete match. To include a
 * literal '\' in the replacement, write '\\'.
 *
 * Returns: a newly allocated string containing the replacements.
 */
gchar *
egg_regex_replace (EggRegex            *regex,
		 const gchar       *string,
		 gssize             string_len,
		 const gchar       *replacement,
		 EggRegexMatchFlags   match_options,
		 GError           **error)
{
  gchar *result;
  GList *list;

  list = split_replacement (replacement, error);
  result = egg_regex_replace_eval (regex,
				 string, string_len,
				 interpolate_replacement,
				 (gpointer)list,
				 match_options);
  g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
  g_list_free (list);

  return result;
}


/**
 * egg_regex_replace_eval:
 * @gregex:  a #EggRegex structure
 * @string:  string to perform matches against
 * @string_len: the length of @string, or -1 to use strlen()
 * @eval: a function to call for each match
 * @match_options:  Options for the match
 *
 * Replaces occurances of the pattern in regex with
 * the output of @eval for that occurance.
 *
 * Returns: a newly allocated string containing the replacements.
 */
gchar *
egg_regex_replace_eval (EggRegex             *regex,
		      const gchar        *string,
		      gssize              string_len,
		      EggRegexEvalCallback  eval,
		      gpointer            user_data,
		      EggRegexMatchFlags match_options)
{
  GString *result;
  gint str_pos = 0;
  gboolean done = FALSE;

  if (string_len < 0)
    string_len = strlen (string);

  /* clear out the regex for reuse, just in case */
  egg_regex_clear (regex);

  result = g_string_sized_new (string_len);

  /* run down the string making matches. */
  while (egg_regex_match_next (regex, string, string_len, match_options) > 0 && !done)
    {
      g_string_append_len (result,
			   string + str_pos,
			   regex->offsets[0] - str_pos);
      done = (*eval) (regex, string, result, user_data);
      str_pos = regex->offsets[1];
    }

  g_string_append_len (result, string + str_pos, string_len - str_pos);

  return g_string_free (result, FALSE);
}


/**
 * egg_regex_eval_replacement:
 * @gregex:  a #EggRegex structure
 * @string: the string on which the last match was made
 * @replacement: replacement string
 * @error: location to store error
 *
 * Evaluates replacement after successful match.
 *
 * Returns: a newly allocated string containing the replacement.
 */
gchar *
egg_regex_eval_replacement (EggRegex     *regex,
                            const gchar  *string,
                            const gchar  *replacement,
                            GError      **error)
{
    GString *result;
    GList *list;

    list = split_replacement (replacement, error);

    if (!list) return NULL;

    result = g_string_new (NULL);
    interpolate_replacement (regex, string, result, list);
    g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
    g_list_free (list);

    return g_string_free (result, FALSE);
}


/**
 * egg_regex_check_replacement:
 * @replacement: replacement string
 * @error: location to store error
 */
gboolean
egg_regex_check_replacement (const gchar           *replacement,
			     GError               **error)
{
    GList *list;

    list = split_replacement (replacement, error);

    if (!list) return FALSE;

    g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
    g_list_free (list);
    return TRUE;
}