medit/moo/mooutils/mooscript/mooscript-parser.c

677 lines
14 KiB
C
Raw Normal View History

/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4; coding: utf-8 -*-
*
* as-script-parser.c
*
* Copyright (C) 2004-2006 by Yevgen Muntyan <muntyan@math.tamu.edu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* See COPYING file that comes with this distribution.
*/
2006-02-24 20:03:13 -08:00
#include "mooscript-parser.h"
#include "mooscript-yacc.h"
#include <string.h>
typedef struct {
const char *string;
guint len;
int token;
} Keyword;
static Keyword keywords[] = {
{ "if", 2, IF },
{ "then", 4, THEN },
{ "else", 4, ELSE },
{ "fi", 2, FI },
{ "while", 5, WHILE },
{ "for", 3, FOR },
{ "in", 2, IN },
{ "do", 2, DO },
{ "od", 2, OD },
{ "not", 3, NOT },
};
typedef struct {
const guchar *input;
guint len;
guint ptr;
GHashTable *hash;
2006-02-24 20:39:12 -08:00
} MSLex;
2006-02-24 20:39:12 -08:00
struct _MSParser {
MSLex *lex;
gboolean failed;
GSList *nodes;
2006-02-24 20:39:12 -08:00
MSNode *script;
};
#define IS_QUOTE(c__) (c__ == '"' || c__ == '\'')
#define IS_SPACE(c__) (c__ == ' ' || c__ == '\t' || c__ == '\r' || c__ == '\n')
#define IS_DIGIT(c__) ('0' <= c__ && c__ <= '9')
#define IS_LETTER(c__) (('a' <= c__ && c__ <= 'z') || ('A' <= c__ && c__ <= 'Z'))
#define IS_WORD(c__) (IS_LETTER (c__) || IS_DIGIT (c__) || c__ == '_')
static int
2006-02-24 20:39:12 -08:00
ms_lex_error (MSParser *parser)
{
parser->failed = TRUE;
return -1;
}
static char *
2006-02-24 20:39:12 -08:00
ms_lex_add_string (MSLex *lex,
const char *string,
guint len)
{
gpointer orig, dummy;
char *copy = g_strndup (string, len);
if (g_hash_table_lookup_extended (lex->hash, copy, &orig, &dummy))
{
g_free (copy);
return orig;
}
else
{
g_hash_table_insert (lex->hash, copy, NULL);
return copy;
}
}
static int
2006-02-24 20:39:12 -08:00
ms_lex_parse_string (MSLex *lex,
MSParser *parser)
{
guint first, last;
guchar quote, second;
GString *string;
int token = -1;
g_assert (IS_QUOTE (lex->input[lex->ptr]));
last = first = lex->ptr + 1;
quote = lex->input[lex->ptr];
string = g_string_new (NULL);
if (quote == '\'')
second = '"';
else
second = '\'';
while (last < lex->len)
{
guchar c = lex->input[last];
if (c == quote)
{
2006-02-24 20:39:12 -08:00
_ms_script_yylval.str = ms_lex_add_string (lex, string->str, string->len);
lex->ptr = last + 1;
token = LITERAL;
goto out;
}
else if (c == '\\')
{
guchar next;
if (last + 1 == lex->len)
break;
next = lex->input[last + 1];
switch (next)
{
case 't':
g_string_append_c (string, '\t');
last += 2;
break;
case 'n':
g_string_append_c (string, '\n');
last += 2;
break;
case '\'':
case '"':
case '\\':
g_string_append_c (string, next);
last += 2;
break;
default:
g_string_append_c (string, '\\');
last++;
break;
}
}
else
{
g_string_append_c (string, c);
last++;
}
}
g_warning ("unterminated string literal");
2006-02-24 20:39:12 -08:00
token = ms_lex_error (parser);
out:
g_string_free (string, TRUE);
return token;
}
static int
2006-02-24 20:39:12 -08:00
ms_lex_parse_number (MSLex *lex,
MSParser *parser)
{
int value = 0;
g_assert (IS_DIGIT (lex->input[lex->ptr]));
while (lex->ptr < lex->len)
{
guchar c = lex->input[lex->ptr];
if (IS_DIGIT (c))
{
if (value > 1000000)
{
g_warning ("number is too big");
2006-02-24 20:39:12 -08:00
return ms_lex_error (parser);
}
value = (value * 10) + (c - '0');
lex->ptr++;
}
else if (IS_WORD (c))
{
g_warning ("number followed by word char");
2006-02-24 20:39:12 -08:00
return ms_lex_error (parser);
}
else
{
2006-02-24 20:39:12 -08:00
_ms_script_yylval.ival = value;
return NUMBER;
}
}
g_critical ("oops");
2006-02-24 20:39:12 -08:00
return ms_lex_error (parser);
}
static int
2006-02-24 20:39:12 -08:00
ms_lex_parse_word (MSLex *lex,
G_GNUC_UNUSED MSParser *parser)
{
guint last, i;
const char *string;
g_assert (IS_WORD (lex->input[lex->ptr]) && !IS_DIGIT (lex->input[lex->ptr]));
2006-01-27 12:48:10 -08:00
string = (const char *) &lex->input[lex->ptr];
for (i = 0; i < G_N_ELEMENTS (keywords); ++i)
{
Keyword *kw = &keywords[i];
if (lex->ptr + kw->len <= lex->len &&
!strncmp (string, kw->string, kw->len) &&
(lex->ptr + kw->len == lex->len || !IS_WORD (string[kw->len])))
{
lex->ptr += keywords[i].len;
return keywords[i].token;
}
}
for (last = lex->ptr + 1; last < lex->len && IS_WORD (lex->input[last]); ++last) ;
2006-02-24 20:39:12 -08:00
_ms_script_yylval.str = ms_lex_add_string (lex, string, last - lex->ptr);
lex->ptr = last;
return IDENTIFIER;
}
#define THIS (lex->input[lex->ptr])
#define NEXT (lex->input[lex->ptr+1])
#define RETURN1(what) \
G_STMT_START { \
lex->ptr += 1; \
return what; \
} G_STMT_END
#define CHECK1(c_, what_) \
G_STMT_START { \
if (THIS == c_) \
RETURN2 (what_); \
} G_STMT_END
#define RETURN2(what) \
G_STMT_START { \
lex->ptr += 2; \
return what; \
} G_STMT_END
#define CHECK2(c1_, c2_, what_) \
G_STMT_START { \
if (THIS == c1_ && NEXT == c2_) \
RETURN2 (what_); \
} G_STMT_END
int
2006-02-24 20:39:12 -08:00
_ms_script_yylex (MSParser *parser)
{
2006-02-24 20:39:12 -08:00
MSLex *lex = parser->lex;
guchar c;
while (lex->ptr < lex->len && IS_SPACE(lex->input[lex->ptr]))
lex->ptr++;
if (lex->ptr == lex->len)
return 0;
c = lex->input[lex->ptr];
if (c & 0x80)
{
g_warning ("got unicode character");
2006-02-24 20:39:12 -08:00
return ms_lex_error (parser);
}
if (IS_QUOTE (c))
2006-02-24 20:39:12 -08:00
return ms_lex_parse_string (lex, parser);
if (IS_DIGIT (c))
2006-02-24 20:39:12 -08:00
return ms_lex_parse_number (lex, parser);
if (IS_LETTER (c) || c == '_')
2006-02-24 20:39:12 -08:00
return ms_lex_parse_word (lex, parser);
CHECK2 ('.', '.', TWODOTS);
CHECK2 ('=', '=', EQ);
CHECK2 ('!', '=', NEQ);
CHECK2 ('&', '&', AND);
CHECK2 ('|', '|', OR);
CHECK2 ('<', '=', LE);
CHECK2 ('>', '=', GE);
CHECK1 ('!', NOT);
lex->ptr++;
return c;
}
void
2006-02-24 20:39:12 -08:00
_ms_script_yyerror (MSParser *parser,
const char *string)
{
g_print ("error: %s\n", string);
parser->failed = TRUE;
}
2006-02-24 20:39:12 -08:00
static MSLex *
ms_lex_new (const char *string,
int len)
{
2006-02-24 20:39:12 -08:00
MSLex *lex = g_new0 (MSLex, 1);
if (len < 0)
len = strlen (string);
2006-01-29 12:22:22 -08:00
lex->input = (const guchar *) string;
lex->len = len;
2006-02-25 13:53:11 -08:00
lex->hash = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
return lex;
}
static void
2006-02-24 20:39:12 -08:00
ms_lex_free (MSLex *lex)
{
if (lex)
{
g_hash_table_destroy (lex->hash);
g_free (lex);
}
}
2006-02-24 20:39:12 -08:00
static MSParser *
ms_parser_new (void)
{
2006-02-24 20:39:12 -08:00
MSParser *parser = g_new0 (MSParser, 1);
return parser;
}
static void
2006-02-24 20:39:12 -08:00
ms_parser_cleanup (MSParser *parser)
{
2006-02-24 20:39:12 -08:00
ms_lex_free (parser->lex);
parser->lex = NULL;
g_slist_foreach (parser->nodes, (GFunc) g_object_unref, NULL);
g_slist_free (parser->nodes);
parser->nodes = NULL;
parser->script = NULL;
}
static void
2006-02-24 20:39:12 -08:00
ms_parser_free (MSParser *parser)
{
if (parser)
{
2006-02-24 20:39:12 -08:00
ms_parser_cleanup (parser);
g_free (parser);
}
}
2006-02-24 20:39:12 -08:00
static MSNode *
ms_parser_parse (MSParser *parser,
const char *string,
int len)
{
2006-02-24 20:39:12 -08:00
ms_parser_cleanup (parser);
parser->lex = ms_lex_new (string, len);
2006-02-24 20:39:12 -08:00
_ms_script_yyparse (parser);
return parser->failed ? NULL : parser->script;
}
2006-02-24 20:39:12 -08:00
MSNode *
ms_script_parse (const char *string)
{
2006-02-24 20:39:12 -08:00
MSParser *parser;
MSNode *script;
g_return_val_if_fail (string != NULL, FALSE);
if (!string[0])
return NULL;
2006-02-24 20:39:12 -08:00
parser = ms_parser_new ();
script = ms_parser_parse (parser, string, -1);
if (script)
g_object_ref (script);
2006-02-24 20:39:12 -08:00
ms_parser_free (parser);
return script;
}
static void
2006-02-24 20:39:12 -08:00
parser_add_node (MSParser *parser,
gpointer node)
{
2006-02-24 20:39:12 -08:00
g_return_if_fail (MS_IS_NODE (node));
parser->nodes = g_slist_prepend (parser->nodes, node);
}
void
2006-02-24 20:39:12 -08:00
_ms_parser_set_top_node (MSParser *parser,
MSNode *node)
{
g_assert (parser != NULL);
g_assert (parser->script == NULL);
if (!node)
{
2006-02-24 20:39:12 -08:00
node = (MSNode*) ms_node_list_new ();
parser_add_node (parser, node);
}
parser->script = node;
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_list_add (MSParser *parser,
MSNodeList *list,
MSNode *node)
{
if (!node)
return NULL;
if (!list)
{
2006-02-24 20:39:12 -08:00
list = ms_node_list_new ();
parser_add_node (parser, list);
}
2006-02-24 20:39:12 -08:00
ms_node_list_add (list, node);
return MS_NODE (list);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_command (MSParser *parser,
const char *name,
2006-02-24 20:39:12 -08:00
MSNodeList *list)
{
2006-02-24 20:39:12 -08:00
MSNodeCommand *cmd;
g_return_val_if_fail (name != NULL, NULL);
2006-02-24 20:39:12 -08:00
g_return_val_if_fail (!list || MS_IS_NODE_LIST (list), NULL);
2006-02-24 20:39:12 -08:00
cmd = ms_node_command_new (name, list);
parser_add_node (parser, cmd);
2006-02-24 20:39:12 -08:00
return MS_NODE (cmd);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_if_else (MSParser *parser,
MSNode *condition,
MSNode *then_,
MSNode *else_)
{
2006-02-24 20:39:12 -08:00
MSNodeIfElse *node;
2006-02-24 20:39:12 -08:00
g_return_val_if_fail (MS_IS_NODE (condition), NULL);
g_return_val_if_fail (MS_IS_NODE (then_), NULL);
g_return_val_if_fail (!else_ || MS_IS_NODE (else_), NULL);
2006-02-24 20:39:12 -08:00
node = ms_node_if_else_new (condition, then_, else_);
parser_add_node (parser, node);
2006-02-24 20:39:12 -08:00
return MS_NODE (node);
}
2006-02-24 20:39:12 -08:00
static MSNode *
ms_parser_while (MSParser *parser,
MSCondType type,
MSNode *cond,
MSNode *what)
{
MSNodeWhile *loop;
g_return_val_if_fail (MS_IS_NODE (cond), NULL);
g_return_val_if_fail (!what || MS_IS_NODE (what), NULL);
loop = ms_node_while_new (type, cond, what);
parser_add_node (parser, loop);
2006-02-24 20:39:12 -08:00
return MS_NODE (loop);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_while (MSParser *parser,
MSNode *cond,
MSNode *what)
{
return ms_parser_while (parser, MS_COND_BEFORE, cond, what);
}
MSNode *
_ms_parser_node_do_while (MSParser *parser,
MSNode *cond,
MSNode *what)
{
return ms_parser_while (parser, MS_COND_AFTER, cond, what);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_for (MSParser *parser,
MSNode *var,
MSNode *list,
MSNode *what)
{
MSNodeFor *loop;
g_return_val_if_fail (MS_IS_NODE (var), NULL);
g_return_val_if_fail (MS_IS_NODE (list), NULL);
g_return_val_if_fail (!what || MS_IS_NODE (what), NULL);
loop = ms_node_for_new (var, list, what);
parser_add_node (parser, loop);
return MS_NODE (loop);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_assignment (MSParser *parser,
2006-02-26 01:59:55 -08:00
const char *name,
2006-02-24 20:39:12 -08:00
MSNode *val)
{
2006-02-24 20:39:12 -08:00
MSNodeAssign *node;
2006-02-26 01:59:55 -08:00
MSNode *var;
2006-02-26 01:59:55 -08:00
g_return_val_if_fail (name && name[0], NULL);
2006-02-24 20:39:12 -08:00
g_return_val_if_fail (MS_IS_NODE (val), NULL);
2006-02-26 01:59:55 -08:00
var = _ms_parser_node_var (parser, name);
node = ms_node_assign_new (MS_NODE_VAR (var), val);
parser_add_node (parser, node);
2006-02-24 20:39:12 -08:00
return MS_NODE (node);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_binary_op (MSParser *parser,
MSBinaryOp op,
MSNode *lval,
MSNode *rval)
{
2006-02-24 20:39:12 -08:00
MSNodeCommand *cmd;
2006-02-24 20:39:12 -08:00
g_return_val_if_fail (MS_IS_NODE (lval), NULL);
g_return_val_if_fail (MS_IS_NODE (rval), NULL);
2006-02-24 20:39:12 -08:00
cmd = ms_node_binary_op_new (op, lval, rval);
parser_add_node (parser, cmd);
2006-02-24 20:39:12 -08:00
return MS_NODE (cmd);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_unary_op (MSParser *parser,
MSUnaryOp op,
MSNode *val)
{
2006-02-24 20:39:12 -08:00
MSNodeCommand *cmd;
2006-02-24 20:39:12 -08:00
g_return_val_if_fail (MS_IS_NODE (val), NULL);
2006-02-24 20:39:12 -08:00
cmd = ms_node_unary_op_new (op, val);
parser_add_node (parser, cmd);
2006-02-24 20:39:12 -08:00
return MS_NODE (cmd);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_int (MSParser *parser,
int n)
{
2006-02-24 20:39:12 -08:00
MSNodeValue *node;
MSValue *value;
2006-02-24 20:39:12 -08:00
value = ms_value_int (n);
node = ms_node_value_new (value);
ms_value_unref (value);
parser_add_node (parser, node);
2006-02-24 20:39:12 -08:00
return MS_NODE (node);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_string (MSParser *parser,
const char *string)
{
2006-02-24 20:39:12 -08:00
MSNodeValue *node;
MSValue *value;
2006-02-24 20:39:12 -08:00
value = ms_value_string (string);
node = ms_node_value_new (value);
ms_value_unref (value);
parser_add_node (parser, node);
2006-02-24 20:39:12 -08:00
return MS_NODE (node);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_var (MSParser *parser,
const char *name)
{
2006-02-24 20:39:12 -08:00
MSNodeVar *node;
node = ms_node_var_new (name);
parser_add_node (parser, node);
2006-02-24 20:39:12 -08:00
return MS_NODE (node);
}
2006-02-24 20:39:12 -08:00
MSNode *
_ms_parser_node_value_list (MSParser *parser,
MSNodeList *list)
{
2006-02-24 20:39:12 -08:00
MSNodeValList *node;
2006-02-24 20:39:12 -08:00
node = ms_node_val_list_new (list);
parser_add_node (parser, node);
2006-02-24 20:39:12 -08:00
return MS_NODE (node);
}
MSNode *
_ms_parser_node_value_range (MSParser *parser,
2006-02-25 13:35:05 -08:00
MSNode *first,
MSNode *last)
{
2006-02-25 13:35:05 -08:00
MSNodeValList *node;
2006-02-25 13:35:05 -08:00
node = ms_node_val_range_new (first, last);
parser_add_node (parser, node);
return MS_NODE (node);
}