medit/moo/gtksourceview/gtksourceiter.c
2015-07-11 14:32:02 -07:00

802 lines
19 KiB
C

/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
* gtksourceiter.h
*
* Copyright (C) 2000 - 2005 Paolo Maggi
* Copyright (C) 2002, 2003 Jeroen Zwartepoorte
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/*
* Parts of this file are copied from the gedit and glimmer project.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include "gtksourceiter.h"
#define GTK_TEXT_UNKNOWN_CHAR 0xFFFC
#if GLIB_CHECK_VERSION(2,30,0) && !defined(G_UNICODE_COMBINING_MARK)
#define G_UNICODE_COMBINING_MARK G_UNICODE_SPACING_MARK
#endif
/* this function acts like g_utf8_offset_to_pointer() except that if it finds a
* decomposable character it consumes the decomposition length from the given
* offset. So it's useful when the offset was calculated for the normalized
* version of str, but we need a pointer to str itself. */
static const gchar *
pointer_from_offset_skipping_decomp (const gchar *str, gint offset)
{
gchar *casefold, *normal;
const gchar *p, *q;
p = str;
while (offset > 0)
{
q = g_utf8_next_char (p);
casefold = g_utf8_casefold (p, q - p);
normal = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
offset -= g_utf8_strlen (normal, -1);
g_free (casefold);
g_free (normal);
p = q;
}
return p;
}
static gboolean
exact_prefix_cmp (const gchar *string,
const gchar *prefix,
guint prefix_len)
{
GUnicodeType type;
if (strncmp (string, prefix, prefix_len) != 0)
return FALSE;
if (string[prefix_len] == '\0')
return TRUE;
type = g_unichar_type (g_utf8_get_char (string + prefix_len));
/* If string contains prefix, check that prefix is not followed
* by a unicode mark symbol, e.g. that trailing 'a' in prefix
* is not part of two-char a-with-hat symbol in string. */
return type != G_UNICODE_COMBINING_MARK &&
type != G_UNICODE_ENCLOSING_MARK &&
type != G_UNICODE_NON_SPACING_MARK;
}
static const gchar *
utf8_strcasestr (const gchar *haystack, const gchar *needle)
{
gsize needle_len;
gsize haystack_len;
const gchar *ret = NULL;
gchar *p;
gchar *casefold;
gchar *caseless_haystack;
gint i;
g_return_val_if_fail (haystack != NULL, NULL);
g_return_val_if_fail (needle != NULL, NULL);
casefold = g_utf8_casefold (haystack, -1);
caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
g_free (casefold);
needle_len = g_utf8_strlen (needle, -1);
haystack_len = g_utf8_strlen (caseless_haystack, -1);
if (needle_len == 0)
{
ret = (gchar *)haystack;
goto finally_1;
}
if (haystack_len < needle_len)
{
ret = NULL;
goto finally_1;
}
p = (gchar*)caseless_haystack;
needle_len = strlen (needle);
i = 0;
while (*p)
{
if (exact_prefix_cmp (p, needle, needle_len))
{
ret = pointer_from_offset_skipping_decomp (haystack, i);
goto finally_1;
}
p = g_utf8_next_char (p);
i++;
}
finally_1:
g_free (caseless_haystack);
return ret;
}
static const gchar *
utf8_strrcasestr (const gchar *haystack, const gchar *needle)
{
gsize needle_len;
gsize haystack_len;
const gchar *ret = NULL;
gchar *p;
gchar *casefold;
gchar *caseless_haystack;
gint i;
g_return_val_if_fail (haystack != NULL, NULL);
g_return_val_if_fail (needle != NULL, NULL);
casefold = g_utf8_casefold (haystack, -1);
caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
g_free (casefold);
needle_len = g_utf8_strlen (needle, -1);
haystack_len = g_utf8_strlen (caseless_haystack, -1);
if (needle_len == 0)
{
ret = (gchar *)haystack;
goto finally_1;
}
if (haystack_len < needle_len)
{
ret = NULL;
goto finally_1;
}
i = haystack_len - needle_len;
p = g_utf8_offset_to_pointer (caseless_haystack, i);
needle_len = strlen (needle);
while (p >= caseless_haystack)
{
if (exact_prefix_cmp (p, needle, needle_len))
{
ret = pointer_from_offset_skipping_decomp (haystack, i);
goto finally_1;
}
p = g_utf8_prev_char (p);
i--;
}
finally_1:
g_free (caseless_haystack);
return ret;
}
static gboolean
utf8_caselessnmatch (const char *s1, const char *s2,
gssize n1, gssize n2)
{
gchar *casefold;
gchar *normalized_s1;
gchar *normalized_s2;
gint len_s1;
gint len_s2;
gboolean ret = FALSE;
g_return_val_if_fail (s1 != NULL, FALSE);
g_return_val_if_fail (s2 != NULL, FALSE);
g_return_val_if_fail (n1 > 0, FALSE);
g_return_val_if_fail (n2 > 0, FALSE);
casefold = g_utf8_casefold (s1, n1);
normalized_s1 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
g_free (casefold);
casefold = g_utf8_casefold (s2, n2);
normalized_s2 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
g_free (casefold);
len_s1 = strlen (normalized_s1);
len_s2 = strlen (normalized_s2);
if (len_s1 < len_s2)
goto finally_2;
ret = (strncmp (normalized_s1, normalized_s2, len_s2) == 0);
finally_2:
g_free (normalized_s1);
g_free (normalized_s2);
return ret;
}
/* FIXME: total horror */
static gboolean
char_is_invisible (const GtkTextIter *iter)
{
GSList *tags;
gboolean invisible = FALSE;
tags = gtk_text_iter_get_tags (iter);
while (tags)
{
gboolean this_invisible, invisible_set;
g_object_get (tags->data, "invisible", &this_invisible,
"invisible-set", &invisible_set, NULL);
if (invisible_set)
invisible = this_invisible;
tags = g_slist_delete_link (tags, tags);
}
return invisible;
}
static void
forward_chars_with_skipping (GtkTextIter *iter,
gint count,
gboolean skip_invisible,
gboolean skip_nontext,
gboolean skip_decomp)
{
gint i;
g_return_if_fail (count >= 0);
i = count;
while (i > 0)
{
gboolean ignored = FALSE;
/* minimal workaround to avoid the infinite loop of bug #168247.
* It doesn't fix the problemjust the symptom...
*/
if (gtk_text_iter_is_end (iter))
return;
if (skip_nontext && gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR)
ignored = TRUE;
/* FIXME: char_is_invisible() gets list of tags for each char there,
and checks every tag. It doesn't sound like a good idea. */
if (!ignored && skip_invisible && char_is_invisible (iter))
ignored = TRUE;
if (!ignored && skip_decomp)
{
/* being UTF8 correct sucks; this accounts for extra
offsets coming from canonical decompositions of
UTF8 characters (e.g. accented characters) which
g_utf8_normalize() performs */
gchar *normal;
gchar *casefold;
gchar buffer[6];
gint buffer_len;
buffer_len = g_unichar_to_utf8 (gtk_text_iter_get_char (iter), buffer);
casefold = g_utf8_casefold (buffer, buffer_len);
normal = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
i -= (g_utf8_strlen (normal, -1) - 1);
g_free (normal);
g_free (casefold);
}
gtk_text_iter_forward_char (iter);
if (!ignored)
--i;
}
}
static gboolean
lines_match (const GtkTextIter *start,
const gchar **lines,
gboolean visible_only,
gboolean slice,
GtkTextIter *match_start,
GtkTextIter *match_end)
{
GtkTextIter next;
gchar *line_text;
const gchar *found;
gint offset;
if (*lines == NULL || **lines == '\0')
{
if (match_start)
*match_start = *start;
if (match_end)
*match_end = *start;
return TRUE;
}
next = *start;
gtk_text_iter_forward_line (&next);
/* No more text in buffer, but *lines is nonempty */
if (gtk_text_iter_equal (start, &next))
return FALSE;
if (slice)
{
if (visible_only)
line_text = gtk_text_iter_get_visible_slice (start, &next);
else
line_text = gtk_text_iter_get_slice (start, &next);
}
else
{
if (visible_only)
line_text = gtk_text_iter_get_visible_text (start, &next);
else
line_text = gtk_text_iter_get_text (start, &next);
}
if (match_start) /* if this is the first line we're matching */
{
found = utf8_strcasestr (line_text, *lines);
}
else
{
/* If it's not the first line, we have to match from the
* start of the line.
*/
if (utf8_caselessnmatch (line_text, *lines, strlen (line_text),
strlen (*lines)))
found = line_text;
else
found = NULL;
}
if (found == NULL)
{
g_free (line_text);
return FALSE;
}
/* Get offset to start of search string */
offset = g_utf8_strlen (line_text, found - line_text);
next = *start;
/* If match start needs to be returned, set it to the
* start of the search string.
*/
forward_chars_with_skipping (&next, offset, visible_only, !slice, FALSE);
if (match_start)
{
*match_start = next;
}
/* Go to end of search string */
forward_chars_with_skipping (&next, g_utf8_strlen (*lines, -1), visible_only, !slice, TRUE);
g_free (line_text);
++lines;
if (match_end)
*match_end = next;
/* pass NULL for match_start, since we don't need to find the
* start again.
*/
return lines_match (&next, lines, visible_only, slice, NULL, match_end);
}
static gboolean
backward_lines_match (const GtkTextIter *start,
const gchar **lines,
gboolean visible_only,
gboolean slice,
GtkTextIter *match_start,
GtkTextIter *match_end)
{
GtkTextIter line, next;
gchar *line_text;
const gchar *found;
gint offset;
if (*lines == NULL || **lines == '\0')
{
if (match_start)
*match_start = *start;
if (match_end)
*match_end = *start;
return TRUE;
}
line = next = *start;
if (gtk_text_iter_get_line_offset (&next) == 0)
{
if (!gtk_text_iter_backward_line (&next))
return FALSE;
}
else
gtk_text_iter_set_line_offset (&next, 0);
if (slice)
{
if (visible_only)
line_text = gtk_text_iter_get_visible_slice (&next, &line);
else
line_text = gtk_text_iter_get_slice (&next, &line);
}
else
{
if (visible_only)
line_text = gtk_text_iter_get_visible_text (&next, &line);
else
line_text = gtk_text_iter_get_text (&next, &line);
}
if (match_start) /* if this is the first line we're matching */
{
found = utf8_strrcasestr (line_text, *lines);
}
else
{
/* If it's not the first line, we have to match from the
* start of the line.
*/
if (utf8_caselessnmatch (line_text, *lines, strlen (line_text),
strlen (*lines)))
found = line_text;
else
found = NULL;
}
if (found == NULL)
{
g_free (line_text);
return FALSE;
}
/* Get offset to start of search string */
offset = g_utf8_strlen (line_text, found - line_text);
forward_chars_with_skipping (&next, offset, visible_only, !slice, FALSE);
/* If match start needs to be returned, set it to the
* start of the search string.
*/
if (match_start)
{
*match_start = next;
}
/* Go to end of search string */
forward_chars_with_skipping (&next, g_utf8_strlen (*lines, -1), visible_only, !slice, TRUE);
g_free (line_text);
++lines;
if (match_end)
*match_end = next;
/* try to match the rest of the lines forward, passing NULL
* for match_start so lines_match will try to match the entire
* line */
return lines_match (&next, lines, visible_only,
slice, NULL, match_end);
}
/* strsplit () that retains the delimiter as part of the string. */
static gchar **
breakup_string (const char *string,
const char *delimiter,
gint max_tokens)
{
GSList *string_list = NULL, *slist;
gchar **str_array, *s, *casefold, *new_string;
guint i, n = 1;
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (delimiter != NULL, NULL);
if (max_tokens < 1)
max_tokens = G_MAXINT;
s = strstr (string, delimiter);
if (s)
{
gsize delimiter_len = strlen (delimiter);
do
{
guint len;
len = s - string + delimiter_len;
new_string = g_new (gchar, len + 1);
strncpy (new_string, string, len);
new_string[len] = 0;
casefold = g_utf8_casefold (new_string, -1);
g_free (new_string);
new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
g_free (casefold);
string_list = g_slist_prepend (string_list, new_string);
n++;
string = s + delimiter_len;
s = strstr (string, delimiter);
} while (--max_tokens && s);
}
if (*string)
{
n++;
casefold = g_utf8_casefold (string, -1);
new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
g_free (casefold);
string_list = g_slist_prepend (string_list, new_string);
}
str_array = g_new (gchar*, n);
i = n - 1;
str_array[i--] = NULL;
for (slist = string_list; slist; slist = slist->next)
str_array[i--] = slist->data;
g_slist_free (string_list);
return str_array;
}
/**
* gtk_source_iter_forward_search:
* @iter: start of search.
* @str: a search string.
* @flags: flags affecting how the search is done.
* @match_start: return location for start of match, or %%NULL.
* @match_end: return location for end of match, or %%NULL.
* @limit: bound for the search, or %%NULL for the end of the buffer.
*
* Searches forward for @str. Any match is returned by setting
* @match_start to the first character of the match and @match_end to the
* first character after the match. The search will not continue past
* @limit. Note that a search is a linear or O(n) operation, so you
* may wish to use @limit to avoid locking up your UI on large
* buffers.
*
* If the #GTK_SOURCE_SEARCH_VISIBLE_ONLY flag is present, the match may
* have invisible text interspersed in @str. i.e. @str will be a
* possibly-noncontiguous subsequence of the matched range. similarly,
* if you specify #GTK_SOURCE_SEARCH_TEXT_ONLY, the match may have
* pixbufs or child widgets mixed inside the matched range. If these
* flags are not given, the match must be exact; the special 0xFFFC
* character in @str will match embedded pixbufs or child widgets.
* If you specify the #GTK_SOURCE_SEARCH_CASE_INSENSITIVE flag, the text will
* be matched regardless of what case it is in.
*
* Same as gtk_text_iter_forward_search(), but supports case insensitive
* searching.
*
* Return value: whether a match was found.
**/
gboolean
gtk_source_iter_forward_search (const GtkTextIter *iter,
const gchar *str,
GtkSourceSearchFlags flags,
GtkTextIter *match_start,
GtkTextIter *match_end,
const GtkTextIter *limit)
{
gchar **lines = NULL;
GtkTextIter match;
gboolean retval = FALSE;
GtkTextIter search;
gboolean visible_only;
gboolean slice;
g_return_val_if_fail (iter != NULL, FALSE);
g_return_val_if_fail (str != NULL, FALSE);
if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0)
return gtk_text_iter_forward_search (iter, str, flags,
match_start, match_end,
limit);
if (limit && gtk_text_iter_compare (iter, limit) >= 0)
return FALSE;
if (*str == '\0')
{
/* If we can move one char, return the empty string there */
match = *iter;
if (gtk_text_iter_forward_char (&match))
{
if (limit && gtk_text_iter_equal (&match, limit))
return FALSE;
if (match_start)
*match_start = match;
if (match_end)
*match_end = match;
return TRUE;
}
else
{
return FALSE;
}
}
visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0;
slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0;
/* locate all lines */
lines = breakup_string (str, "\n", -1);
search = *iter;
do
{
/* This loop has an inefficient worst-case, where
* gtk_text_iter_get_text () is called repeatedly on
* a single line.
*/
GtkTextIter end;
if (limit && gtk_text_iter_compare (&search, limit) >= 0)
break;
if (lines_match (&search, (const gchar**)lines,
visible_only, slice, &match, &end))
{
if (limit == NULL ||
(limit && gtk_text_iter_compare (&end, limit) <= 0))
{
retval = TRUE;
if (match_start)
*match_start = match;
if (match_end)
*match_end = end;
}
break;
}
} while (gtk_text_iter_forward_line (&search));
g_strfreev ((gchar**)lines);
return retval;
}
/**
* gtk_source_iter_backward_search:
* @iter: a #GtkTextIter where the search begins.
* @str: search string.
* @flags: bitmask of flags affecting the search.
* @match_start: return location for start of match, or %%NULL.
* @match_end: return location for end of match, or %%NULL.
* @limit: location of last possible @match_start, or %%NULL for start of buffer.
*
* Same as gtk_text_iter_backward_search(), but supports case insensitive
* searching.
*
* Return value: whether a match was found.
**/
gboolean
gtk_source_iter_backward_search (const GtkTextIter *iter,
const gchar *str,
GtkSourceSearchFlags flags,
GtkTextIter *match_start,
GtkTextIter *match_end,
const GtkTextIter *limit)
{
gchar **lines = NULL;
GtkTextIter match;
gboolean retval = FALSE;
GtkTextIter search;
gboolean visible_only;
gboolean slice;
g_return_val_if_fail (iter != NULL, FALSE);
g_return_val_if_fail (str != NULL, FALSE);
if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0)
return gtk_text_iter_backward_search (iter, str, flags,
match_start, match_end,
limit);
if (limit && gtk_text_iter_compare (iter, limit) <= 0)
return FALSE;
if (*str == '\0')
{
/* If we can move one char, return the empty string there */
match = *iter;
if (gtk_text_iter_backward_char (&match))
{
if (limit && gtk_text_iter_equal (&match, limit))
return FALSE;
if (match_start)
*match_start = match;
if (match_end)
*match_end = match;
return TRUE;
}
else
{
return FALSE;
}
}
visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0;
slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0;
/* locate all lines */
lines = breakup_string (str, "\n", -1);
search = *iter;
while (TRUE)
{
/* This loop has an inefficient worst-case, where
* gtk_text_iter_get_text () is called repeatedly on
* a single line.
*/
GtkTextIter end;
if (limit && gtk_text_iter_compare (&search, limit) <= 0)
break;
if (backward_lines_match (&search, (const gchar**)lines,
visible_only, slice, &match, &end))
{
if (limit == NULL || (limit &&
gtk_text_iter_compare (&end, limit) > 0))
{
retval = TRUE;
if (match_start)
*match_start = match;
if (match_end)
*match_end = end;
}
break;
}
if (gtk_text_iter_get_line_offset (&search) == 0)
{
if (!gtk_text_iter_backward_line (&search))
break;
}
else
{
gtk_text_iter_set_line_offset (&search, 0);
}
}
g_strfreev ((gchar**)lines);
return retval;
}
/*
* gtk_source_iter_find_matching_bracket is implemented in gtksourcebuffer.c
*/