medit/moo/gtksourceview/gtksourcelanguage-parser-1.c
2010-08-30 22:19:58 -07:00

794 lines
18 KiB
C

/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; coding: utf-8 -*-
* gtksourcelanguage-parser-ver1.c
* Language specification parser for 1.0 version .lang files
*
* Copyright (C) 2003 - Paolo Maggi <paolo.maggi@polito.it>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <libxml/parser.h>
#include "gtksourceview-i18n.h"
#include "gtksourcebuffer.h"
#include "gtksourcelanguage.h"
#include "gtksourcelanguage-private.h"
static gchar *
fix_pattern (const gchar *pattern,
gboolean *end_at_line_end)
{
char *slash;
if (pattern == NULL)
return NULL;
slash = strchr (pattern, '/');
if (slash != NULL)
{
GString *str;
str = g_string_new_len (pattern, slash - pattern);
g_string_append (str, "\\/");
pattern = slash + 1;
while ((slash = strchr (pattern, '/')) != NULL)
{
g_string_append_len (str, pattern, slash - pattern);
g_string_append (str, "\\/");
pattern = slash + 1;
}
if (g_str_has_suffix (pattern, "\\n"))
g_string_append_len (str, pattern, strlen(pattern) - 2);
else
g_string_append (str, pattern);
return g_string_free (str, FALSE);
}
else if (g_str_has_suffix (pattern, "\\n"))
{
if (end_at_line_end)
*end_at_line_end = TRUE;
return g_strndup (pattern, strlen (pattern) - 2);
}
else
{
return g_strdup (pattern);
}
}
static gboolean
ctx_data_add_simple_pattern (GtkSourceContextData *ctx_data,
GtkSourceLanguage *language,
const gchar *id,
const gchar *style,
const gchar *pattern)
{
gboolean result;
gchar *real_id, *root_id, *fixed;
GError *error = NULL;
g_return_val_if_fail (id != NULL, FALSE);
root_id = g_strdup_printf ("%s:%s", language->priv->id, language->priv->id);
real_id = g_strdup_printf ("%s:%s", language->priv->id, id);
fixed = fix_pattern (pattern, NULL);
result = _gtk_source_context_data_define_context (ctx_data, real_id,
root_id,
fixed, NULL, NULL,
style,
GTK_SOURCE_CONTEXT_EXTEND_PARENT |
GTK_SOURCE_CONTEXT_END_AT_LINE_END,
&error);
if (error != NULL)
{
g_warning ("%s", error->message);
g_error_free (error);
}
g_free (fixed);
g_free (real_id);
g_free (root_id);
return result;
}
static gboolean
ctx_data_add_syntax_pattern (GtkSourceContextData *ctx_data,
GtkSourceLanguage *language,
const gchar *id,
const gchar *style,
const gchar *pattern_start,
const gchar *pattern_end,
gboolean end_at_line_end)
{
gboolean result;
gchar *real_id, *root_id;
gchar *fixed_start, *fixed_end;
GError *error = NULL;
GtkSourceContextFlags flags = GTK_SOURCE_CONTEXT_EXTEND_PARENT;
g_return_val_if_fail (id != NULL, FALSE);
root_id = g_strdup_printf ("%s:%s", language->priv->id, language->priv->id);
real_id = g_strdup_printf ("%s:%s", language->priv->id, id);
fixed_start = fix_pattern (pattern_start, &end_at_line_end);
fixed_end = fix_pattern (pattern_end, &end_at_line_end);
if (end_at_line_end)
flags |= GTK_SOURCE_CONTEXT_END_AT_LINE_END;
result = _gtk_source_context_data_define_context (ctx_data, real_id, root_id,
NULL,
pattern_start,
pattern_end,
style,
flags,
&error);
if (error != NULL)
{
g_warning ("%s", error->message);
g_error_free (error);
}
g_free (real_id);
g_free (root_id);
g_free (fixed_start);
g_free (fixed_end);
return result;
}
static gchar *
build_keyword_list (const GSList *keywords,
gboolean case_sensitive,
gboolean match_empty_string_at_beginning,
gboolean match_empty_string_at_end,
const gchar *beginning_regex,
const gchar *end_regex)
{
GString *str;
g_return_val_if_fail (keywords != NULL, NULL);
str = g_string_new ("");
if (keywords != NULL)
{
if (match_empty_string_at_beginning)
g_string_append (str, "\\b");
if (beginning_regex != NULL)
g_string_append (str, beginning_regex);
if (case_sensitive)
g_string_append (str, "(?:");
else
g_string_append (str, "(?i:");
/* TODO Make sure pcre can handle big lists, and split lists if necessary.
* See #110991 */
while (keywords != NULL)
{
g_string_append (str, (gchar*) keywords->data);
keywords = g_slist_next (keywords);
if (keywords != NULL)
g_string_append (str, "|");
}
g_string_append (str, ")");
if (end_regex != NULL)
g_string_append (str, end_regex);
if (match_empty_string_at_end)
g_string_append (str, "\\b");
}
return g_string_free (str, FALSE);
}
static void
parseLineComment (xmlNodePtr cur,
gchar *id,
xmlChar *style,
GtkSourceContextData *ctx_data,
GtkSourceLanguage *language)
{
xmlNodePtr child;
child = cur->xmlChildrenNode;
if ((child != NULL) && !xmlStrcmp (child->name, (const xmlChar *)"start-regex"))
{
xmlChar *start_regex;
start_regex = xmlNodeListGetString (child->doc, child->xmlChildrenNode, 1);
ctx_data_add_syntax_pattern (ctx_data, language, id,
(gchar*) style,
(gchar*) start_regex,
NULL, TRUE);
xmlFree (start_regex);
}
else
{
g_warning ("Missing start-regex in tag 'line-comment' (%s, line %ld)",
child->doc->name, xmlGetLineNo (child));
}
}
static void
parseBlockComment (xmlNodePtr cur,
gchar *id,
xmlChar *style,
GtkSourceContextData *ctx_data,
GtkSourceLanguage *language)
{
xmlChar *start_regex = NULL;
xmlChar *end_regex = NULL;
xmlNodePtr child;
child = cur->xmlChildrenNode;
while (child != NULL)
{
if (!xmlStrcmp (child->name, (const xmlChar *)"start-regex"))
{
start_regex = xmlNodeListGetString (child->doc, child->xmlChildrenNode, 1);
}
else
if (!xmlStrcmp (child->name, (const xmlChar *)"end-regex"))
{
end_regex = xmlNodeListGetString (child->doc, child->xmlChildrenNode, 1);
}
child = child->next;
}
if (start_regex == NULL)
{
g_warning ("Missing start-regex in tag 'block-comment' (%s, line %ld)",
child->doc->name, xmlGetLineNo (cur));
return;
}
if (end_regex == NULL)
{
xmlFree (start_regex);
g_warning ("Missing end-regex in tag 'block-comment' (%s, line %ld)",
child->doc->name, xmlGetLineNo (cur));
return;
}
ctx_data_add_syntax_pattern (ctx_data, language, id,
(gchar*) style,
(gchar*) start_regex,
(gchar*) end_regex,
FALSE);
xmlFree (start_regex);
xmlFree (end_regex);
}
static void
parseString (xmlNodePtr cur,
gchar *id,
xmlChar *style,
GtkSourceContextData *ctx_data,
GtkSourceLanguage *language)
{
xmlChar *start_regex = NULL;
xmlChar *end_regex = NULL;
xmlChar *prop = NULL;
gboolean end_at_line_end = TRUE;
xmlNodePtr child;
prop = xmlGetProp (cur, BAD_CAST "end-at-line-end");
if (prop != NULL)
{
if (!xmlStrcasecmp (prop, (const xmlChar *)"TRUE") ||
!xmlStrcmp (prop, (const xmlChar *)"1"))
end_at_line_end = TRUE;
else
end_at_line_end = FALSE;
xmlFree (prop);
}
child = cur->xmlChildrenNode;
while (child != NULL)
{
if (!xmlStrcmp (child->name, (const xmlChar *)"start-regex"))
{
start_regex = xmlNodeListGetString (child->doc, child->xmlChildrenNode, 1);
}
else
if (!xmlStrcmp (child->name, (const xmlChar *)"end-regex"))
{
end_regex = xmlNodeListGetString (child->doc, child->xmlChildrenNode, 1);
}
child = child->next;
}
if (start_regex == NULL)
{
g_warning ("Missing start-regex in tag 'string' (%s, line %ld)",
child->doc->name, xmlGetLineNo (cur));
return;
}
if (end_regex == NULL)
{
xmlFree (start_regex);
g_warning ("Missing end-regex in tag 'string' (%s, line %ld)",
child->doc->name, xmlGetLineNo (cur));
return;
}
ctx_data_add_syntax_pattern (ctx_data, language, id,
(gchar*) style,
(gchar*) start_regex,
(gchar*) end_regex,
end_at_line_end);
xmlFree (start_regex);
xmlFree (end_regex);
}
static void
parseKeywordList (xmlNodePtr cur,
gchar *id,
xmlChar *style,
GtkSourceContextData *ctx_data,
GtkSourceLanguage *language)
{
gboolean case_sensitive = TRUE;
gboolean match_empty_string_at_beginning = TRUE;
gboolean match_empty_string_at_end = TRUE;
gchar *beginning_regex = NULL;
gchar *end_regex = NULL;
GSList *list = NULL;
gchar *regex;
xmlChar *prop;
xmlNodePtr child;
prop = xmlGetProp (cur, BAD_CAST "case-sensitive");
if (prop != NULL)
{
if (!xmlStrcasecmp (prop, (const xmlChar *)"TRUE") ||
!xmlStrcmp (prop, (const xmlChar *)"1"))
case_sensitive = TRUE;
else
case_sensitive = FALSE;
xmlFree (prop);
}
prop = xmlGetProp (cur, BAD_CAST "match-empty-string-at-beginning");
if (prop != NULL)
{
if (!xmlStrcasecmp (prop, (const xmlChar *)"TRUE") ||
!xmlStrcmp (prop, (const xmlChar *)"1"))
match_empty_string_at_beginning = TRUE;
else
match_empty_string_at_beginning = FALSE;
xmlFree (prop);
}
prop = xmlGetProp (cur, BAD_CAST "match-empty-string-at-end");
if (prop != NULL)
{
if (!xmlStrcasecmp (prop, (const xmlChar *)"TRUE") ||
!xmlStrcmp (prop, (const xmlChar *)"1"))
match_empty_string_at_end = TRUE;
else
match_empty_string_at_end = FALSE;
xmlFree (prop);
}
prop = xmlGetProp (cur, BAD_CAST "beginning-regex");
if (prop != NULL)
{
beginning_regex = g_strdup ((gchar *)prop);
xmlFree (prop);
}
prop = xmlGetProp (cur, BAD_CAST "end-regex");
if (prop != NULL)
{
end_regex = g_strdup ((gchar *)prop);
xmlFree (prop);
}
child = cur->xmlChildrenNode;
while (child != NULL)
{
if (!xmlStrcmp (child->name, BAD_CAST "keyword"))
{
xmlChar *keyword;
keyword = xmlNodeListGetString (child->doc, child->xmlChildrenNode, 1);
list = g_slist_prepend (list, keyword);
}
child = child->next;
}
list = g_slist_reverse (list);
if (list == NULL)
{
g_warning ("No keywords in tag 'keyword-list' (%s, line %ld)",
child->doc->name, xmlGetLineNo (cur));
g_free (beginning_regex),
g_free (end_regex);
return;
}
regex = build_keyword_list (list,
case_sensitive,
match_empty_string_at_beginning,
match_empty_string_at_end,
beginning_regex,
end_regex);
g_free (beginning_regex),
g_free (end_regex);
g_slist_foreach (list, (GFunc) xmlFree, NULL);
g_slist_free (list);
ctx_data_add_simple_pattern (ctx_data, language, id, (gchar*) style, regex);
g_free (regex);
}
static void
parsePatternItem (xmlNodePtr cur,
gchar *id,
xmlChar *style,
GtkSourceContextData *ctx_data,
GtkSourceLanguage *language)
{
xmlNodePtr child;
child = cur->xmlChildrenNode;
if ((child != NULL) && !xmlStrcmp (child->name, (const xmlChar *)"regex"))
{
xmlChar *regex;
regex = xmlNodeListGetString (child->doc, child->xmlChildrenNode, 1);
ctx_data_add_simple_pattern (ctx_data, language, id,
(gchar*) style,
(gchar*) regex);
xmlFree (regex);
}
else
{
g_warning ("Missing regex in tag 'pattern-item' (%s, line %ld)",
child->doc->name, xmlGetLineNo (child));
}
}
static void
parseSyntaxItem (xmlNodePtr cur,
const gchar *id,
xmlChar *style,
GtkSourceContextData *ctx_data,
GtkSourceLanguage *language)
{
xmlChar *start_regex = NULL;
xmlChar *end_regex = NULL;
xmlNodePtr child;
child = cur->xmlChildrenNode;
while (child != NULL)
{
if (!xmlStrcmp (child->name, (const xmlChar *)"start-regex"))
{
start_regex = xmlNodeListGetString (child->doc, child->xmlChildrenNode, 1);
}
else
if (!xmlStrcmp (child->name, (const xmlChar *)"end-regex"))
{
end_regex = xmlNodeListGetString (child->doc, child->xmlChildrenNode, 1);
}
child = child->next;
}
if (start_regex == NULL)
{
g_warning ("Missing start-regex in tag 'syntax-item' (%s, line %ld)",
child->doc->name, xmlGetLineNo (cur));
return;
}
if (end_regex == NULL)
{
xmlFree (start_regex);
g_warning ("Missing end-regex in tag 'syntax-item' (%s, line %ld)",
child->doc->name, xmlGetLineNo (cur));
return;
}
ctx_data_add_syntax_pattern (ctx_data, language, id,
(gchar*) style,
(gchar*) start_regex,
(gchar*) end_regex,
FALSE);
xmlFree (start_regex);
xmlFree (end_regex);
}
static void
parseTag (GtkSourceLanguage *language,
xmlNodePtr cur,
GtkSourceContextData *ctx_data)
{
xmlChar *name;
xmlChar *style;
xmlChar *id;
name = xmlGetProp (cur, BAD_CAST "_name");
if (name == NULL)
{
name = xmlGetProp (cur, BAD_CAST "name");
id = xmlStrdup (name);
}
else
{
gchar *tmp1 = _gtk_source_language_translate_string (language, (gchar*) name);
xmlChar *tmp2 = xmlStrdup (BAD_CAST tmp1);
id = name;
name = tmp2;
g_free (tmp1);
}
if (name == NULL)
{
return;
}
style = xmlGetProp (cur, BAD_CAST "style");
if (!xmlStrcmp (cur->name, (const xmlChar*) "line-comment"))
{
parseLineComment (cur, (gchar*) id, style, ctx_data, language);
}
else if (!xmlStrcmp (cur->name, (const xmlChar*) "block-comment"))
{
parseBlockComment (cur, (gchar*) id, style, ctx_data, language);
}
else if (!xmlStrcmp (cur->name, (const xmlChar*) "string"))
{
parseString (cur, (gchar*) id, style, ctx_data, language);
}
else if (!xmlStrcmp (cur->name, (const xmlChar*) "keyword-list"))
{
parseKeywordList (cur, (gchar*) id, style, ctx_data, language);
}
else if (!xmlStrcmp (cur->name, (const xmlChar*) "pattern-item"))
{
parsePatternItem (cur, (gchar*) id, style, ctx_data, language);
}
else if (!xmlStrcmp (cur->name, (const xmlChar*) "syntax-item"))
{
parseSyntaxItem (cur, (gchar*) id, style, ctx_data, language);
}
else
{
g_print ("Unknown tag: %s\n", cur->name);
}
xmlFree (name);
xmlFree (style);
xmlFree (id);
}
static gboolean
define_root_context (GtkSourceContextData *ctx_data,
GtkSourceLanguage *language)
{
gboolean result;
gchar *id;
GError *error = NULL;
g_return_val_if_fail (language->priv->id != NULL, FALSE);
id = g_strdup_printf ("%s:%s", language->priv->id, language->priv->id);
result = _gtk_source_context_data_define_context (ctx_data, id,
NULL, NULL, NULL, NULL,
NULL,
GTK_SOURCE_CONTEXT_EXTEND_PARENT,
&error);
if (error != NULL)
{
g_warning ("%s", error->message);
g_error_free (error);
}
g_free (id);
return result;
}
gboolean
_gtk_source_language_file_parse_version1 (GtkSourceLanguage *language,
GtkSourceContextData *ctx_data)
{
xmlDocPtr doc;
xmlNodePtr cur;
GMappedFile *mf;
gunichar esc_char = 0;
xmlChar *lang_version = NULL;
xmlKeepBlanksDefault (0);
mf = g_mapped_file_new (language->priv->lang_file_name, FALSE, NULL);
if (mf == NULL)
{
doc = NULL;
}
else
{
doc = xmlParseMemory (g_mapped_file_get_contents (mf),
g_mapped_file_get_length (mf));
g_mapped_file_free (mf);
}
if (doc == NULL)
{
g_warning ("Impossible to parse file '%s'",
language->priv->lang_file_name);
return FALSE;
}
cur = xmlDocGetRootElement (doc);
if (cur == NULL)
{
g_warning ("The lang file '%s' is empty",
language->priv->lang_file_name);
goto error;
}
if (xmlStrcmp (cur->name, (const xmlChar *) "language") != 0)
{
g_warning ("File '%s' is of the wrong type",
language->priv->lang_file_name);
goto error;
}
lang_version = xmlGetProp (cur, BAD_CAST "version");
if (lang_version == NULL || strcmp ("1.0", (char*) lang_version) != 0)
{
if (lang_version != NULL)
g_warning ("Wrong language version '%s' in file '%s', expected '%s'",
(char*) lang_version, language->priv->lang_file_name, "1.0");
else
g_warning ("Language version missing in file '%s'",
language->priv->lang_file_name);
goto error;
}
if (!define_root_context (ctx_data, language))
{
g_warning ("Could not create root context for file '%s'",
language->priv->lang_file_name);
goto error;
}
/* FIXME: check that the language name, version, etc. are the
* right ones - Paolo */
cur = xmlDocGetRootElement (doc);
cur = cur->xmlChildrenNode;
g_return_val_if_fail (cur != NULL, FALSE);
while (cur != NULL)
{
if (!xmlStrcmp (cur->name, (const xmlChar *)"escape-char"))
{
xmlChar *escape;
escape = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1);
esc_char = g_utf8_get_char_validated ((gchar*) escape, -1);
if (esc_char == (gunichar) -1 || esc_char == (gunichar) -2)
{
g_warning ("Invalid (non UTF8) escape character in file '%s'",
language->priv->lang_file_name);
esc_char = 0;
}
xmlFree (escape);
}
else
{
parseTag (language, cur, ctx_data);
}
cur = cur->next;
}
if (esc_char != 0)
_gtk_source_context_data_set_escape_char (ctx_data, esc_char);
_gtk_source_context_data_finish_parse (ctx_data, NULL, NULL);
_gtk_source_language_define_language_styles (language);
xmlFreeDoc (doc);
xmlFree (lang_version);
return TRUE;
error:
if (doc)
xmlFreeDoc (doc);
xmlFree (lang_version);
return FALSE;
}