e2112fac0c
git-svn-id: https://geany.svn.sourceforge.net/svnroot/geany/trunk@2078 ea778897-0a13-0410-b9d1-a72fbfd435f5
409 lines
9.1 KiB
C
409 lines
9.1 KiB
C
/*
|
|
* $Id: ruby.c 571 2007-06-24 23:32:14Z elliotth $
|
|
*
|
|
* Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
|
|
* Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
|
|
* Copyright (c) 2004 Elliott Hughes <enh@acm.org>
|
|
*
|
|
* This source code is released for free distribution under the terms of the
|
|
* GNU General Public License.
|
|
*
|
|
* This module contains functions for generating tags for Ruby language
|
|
* files.
|
|
*/
|
|
|
|
/*
|
|
* INCLUDE FILES
|
|
*/
|
|
#include "general.h" /* must always come first */
|
|
|
|
#include <string.h>
|
|
|
|
#include "entry.h"
|
|
#include "parse.h"
|
|
#include "read.h"
|
|
#include "vstring.h"
|
|
|
|
/*
|
|
* DATA DECLARATIONS
|
|
*/
|
|
typedef enum {
|
|
K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON
|
|
} rubyKind;
|
|
|
|
/*
|
|
* DATA DEFINITIONS
|
|
*/
|
|
static kindOption RubyKinds [] = {
|
|
{ TRUE, 'c', "class", "classes" },
|
|
{ TRUE, 'f', "method", "methods" },
|
|
{ TRUE, 'm', "namespace", "modules" },
|
|
{ TRUE, 'F', "member", "singleton methods" }
|
|
};
|
|
|
|
static stringList* nesting = 0;
|
|
|
|
/*
|
|
* FUNCTION DEFINITIONS
|
|
*/
|
|
|
|
/*
|
|
* Returns a string describing the scope in 'list'.
|
|
* We record the current scope as a list of entered scopes.
|
|
* Scopes corresponding to 'if' statements and the like are
|
|
* represented by empty strings. Scopes corresponding to
|
|
* modules and classes are represented by the name of the
|
|
* module or class.
|
|
*/
|
|
static vString* stringListToScope (const stringList* list)
|
|
{
|
|
unsigned int i;
|
|
unsigned int chunks_output = 0;
|
|
vString* result = vStringNew ();
|
|
const unsigned int max = stringListCount (list);
|
|
for (i = 0; i < max; ++i)
|
|
{
|
|
vString* chunk = stringListItem (list, i);
|
|
if (vStringLength (chunk) > 0)
|
|
{
|
|
vStringCatS (result, (chunks_output++ > 0) ? "." : "");
|
|
vStringCatS (result, vStringValue (chunk));
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Attempts to advance 's' past 'literal'.
|
|
* Returns TRUE if it did, FALSE (and leaves 's' where
|
|
* it was) otherwise.
|
|
*/
|
|
static boolean canMatch (const unsigned char** s, const char* literal)
|
|
{
|
|
const int literal_length = strlen (literal);
|
|
const unsigned char next_char = *(*s + literal_length);
|
|
if (strncmp ((const char*) *s, literal, literal_length) != 0)
|
|
{
|
|
return FALSE;
|
|
}
|
|
/* Additionally check that we're at the end of a token. */
|
|
if ( ! (next_char == 0 || isspace (next_char) || next_char == '('))
|
|
{
|
|
return FALSE;
|
|
}
|
|
*s += literal_length;
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* Attempts to advance 'cp' past a Ruby operator method name. Returns
|
|
* TRUE if successful (and copies the name into 'name'), FALSE otherwise.
|
|
*/
|
|
static boolean parseRubyOperator (vString* name, const unsigned char** cp)
|
|
{
|
|
static const char* RUBY_OPERATORS[] = {
|
|
"[]", "[]=",
|
|
"**",
|
|
"!", "~", "+@", "-@",
|
|
"*", "/", "%",
|
|
"+", "-",
|
|
">>", "<<",
|
|
"&",
|
|
"^", "|",
|
|
"<=", "<", ">", ">=",
|
|
"<=>", "==", "===", "!=", "=~", "!~",
|
|
"`",
|
|
0
|
|
};
|
|
int i;
|
|
for (i = 0; RUBY_OPERATORS[i] != 0; ++i)
|
|
{
|
|
if (canMatch (cp, RUBY_OPERATORS[i]))
|
|
{
|
|
vStringCatS (name, RUBY_OPERATORS[i]);
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
* Emits a tag for the given 'name' of kind 'kind' at the current nesting.
|
|
*/
|
|
static void emitRubyTag (vString* name, rubyKind kind)
|
|
{
|
|
tagEntryInfo tag;
|
|
vString* scope;
|
|
|
|
vStringTerminate (name);
|
|
scope = stringListToScope (nesting);
|
|
|
|
initTagEntry (&tag, vStringValue (name));
|
|
if (vStringLength (scope) > 0) {
|
|
tag.extensionFields.scope [0] = "class";
|
|
tag.extensionFields.scope [1] = vStringValue (scope);
|
|
}
|
|
tag.kindName = RubyKinds [kind].name;
|
|
tag.kind = RubyKinds [kind].letter;
|
|
makeTagEntry (&tag);
|
|
|
|
stringListAdd (nesting, vStringNewCopy (name));
|
|
|
|
vStringClear (name);
|
|
vStringDelete (scope);
|
|
}
|
|
|
|
/* Tests whether 'ch' is a character in 'list'. */
|
|
static boolean charIsIn (char ch, const char* list)
|
|
{
|
|
return (strchr (list, ch) != 0);
|
|
}
|
|
|
|
/* Advances 'cp' over leading whitespace. */
|
|
static void skipWhitespace (const unsigned char** cp)
|
|
{
|
|
while (isspace (**cp))
|
|
{
|
|
++*cp;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Copies the characters forming an identifier from *cp into
|
|
* name, leaving *cp pointing to the character after the identifier.
|
|
*/
|
|
static rubyKind parseIdentifier (
|
|
const unsigned char** cp, vString* name, rubyKind kind)
|
|
{
|
|
/* Method names are slightly different to class and variable names.
|
|
* A method name may optionally end with a question mark, exclamation
|
|
* point or equals sign. These are all part of the name.
|
|
* A method name may also contain a period if it's a singleton method.
|
|
*/
|
|
const char* also_ok = (kind == K_METHOD) ? "_.?!=" : "_";
|
|
|
|
skipWhitespace (cp);
|
|
|
|
/* Check for an anonymous (singleton) class such as "class << HTTP". */
|
|
if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
|
|
{
|
|
return K_UNDEFINED;
|
|
}
|
|
|
|
/* Check for operators such as "def []=(key, val)". */
|
|
if (kind == K_METHOD || kind == K_SINGLETON)
|
|
{
|
|
if (parseRubyOperator (name, cp))
|
|
{
|
|
return kind;
|
|
}
|
|
}
|
|
|
|
/* Copy the identifier into 'name'. */
|
|
while (**cp != 0 && (isalnum (**cp) || charIsIn (**cp, also_ok)))
|
|
{
|
|
char last_char = **cp;
|
|
|
|
vStringPut (name, last_char);
|
|
++*cp;
|
|
|
|
if (kind == K_METHOD)
|
|
{
|
|
/* Recognize singleton methods. */
|
|
if (last_char == '.')
|
|
{
|
|
vStringTerminate (name);
|
|
vStringClear (name);
|
|
return parseIdentifier (cp, name, K_SINGLETON);
|
|
}
|
|
|
|
/* Recognize characters which mark the end of a method name. */
|
|
if (charIsIn (last_char, "?!="))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return kind;
|
|
}
|
|
|
|
static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
|
|
{
|
|
if (isspace (**cp))
|
|
{
|
|
vString *name = vStringNew ();
|
|
rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
|
|
|
|
if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
|
|
{
|
|
/*
|
|
* What kind of tags should we create for code like this?
|
|
*
|
|
* %w(self.clfloor clfloor).each do |name|
|
|
* module_eval <<-"end;"
|
|
* def #{name}(x, y=1)
|
|
* q, r = x.divmod(y)
|
|
* q = q.to_i
|
|
* return q, r
|
|
* end
|
|
* end;
|
|
* end
|
|
*
|
|
* Or this?
|
|
*
|
|
* class << HTTP
|
|
*
|
|
* For now, we don't create any.
|
|
*/
|
|
}
|
|
else
|
|
{
|
|
emitRubyTag (name, actual_kind);
|
|
}
|
|
vStringDelete (name);
|
|
}
|
|
}
|
|
|
|
static void enterUnnamedScope (void)
|
|
{
|
|
stringListAdd (nesting, vStringNewInit (""));
|
|
}
|
|
|
|
static void findRubyTags (void)
|
|
{
|
|
const unsigned char *line;
|
|
boolean inMultiLineComment = FALSE;
|
|
|
|
nesting = stringListNew ();
|
|
|
|
/* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
|
|
* You could perfectly well write:
|
|
*
|
|
* def
|
|
* method
|
|
* puts("hello")
|
|
* end
|
|
*
|
|
* if you wished, and this function would fail to recognize anything.
|
|
*/
|
|
while ((line = fileReadLine ()) != NULL)
|
|
{
|
|
const unsigned char *cp = line;
|
|
|
|
if (canMatch (&cp, "=begin"))
|
|
{
|
|
inMultiLineComment = TRUE;
|
|
continue;
|
|
}
|
|
if (canMatch (&cp, "=end"))
|
|
{
|
|
inMultiLineComment = FALSE;
|
|
continue;
|
|
}
|
|
|
|
skipWhitespace (&cp);
|
|
|
|
/* Avoid mistakenly starting a scope for modifiers such as
|
|
*
|
|
* return if <exp>
|
|
*
|
|
* FIXME: this is fooled by code such as
|
|
*
|
|
* result = if <exp>
|
|
* <a>
|
|
* else
|
|
* <b>
|
|
* end
|
|
*
|
|
* FIXME: we're also fooled if someone does something heinous such as
|
|
*
|
|
* puts("hello") \
|
|
* unless <exp>
|
|
*/
|
|
if (canMatch (&cp, "case") || canMatch (&cp, "for") ||
|
|
canMatch (&cp, "if") || canMatch (&cp, "unless") ||
|
|
canMatch (&cp, "while"))
|
|
{
|
|
enterUnnamedScope ();
|
|
}
|
|
|
|
/*
|
|
* "module M", "class C" and "def m" should only be at the beginning
|
|
* of a line.
|
|
*/
|
|
if (canMatch (&cp, "module"))
|
|
{
|
|
readAndEmitTag (&cp, K_MODULE);
|
|
}
|
|
else if (canMatch (&cp, "class"))
|
|
{
|
|
readAndEmitTag (&cp, K_CLASS);
|
|
}
|
|
else if (canMatch (&cp, "def"))
|
|
{
|
|
readAndEmitTag (&cp, K_METHOD);
|
|
}
|
|
|
|
while (*cp != '\0')
|
|
{
|
|
/* FIXME: we don't cope with here documents,
|
|
* or regular expression literals, or ... you get the idea.
|
|
* Hopefully, the restriction above that insists on seeing
|
|
* definitions at the starts of lines should keep us out of
|
|
* mischief.
|
|
*/
|
|
if (inMultiLineComment || isspace (*cp))
|
|
{
|
|
++cp;
|
|
}
|
|
else if (*cp == '#')
|
|
{
|
|
/* FIXME: this is wrong, but there *probably* won't be a
|
|
* definition after an interpolated string (where # doesn't
|
|
* mean 'comment').
|
|
*/
|
|
break;
|
|
}
|
|
else if (canMatch (&cp, "begin") || canMatch (&cp, "do"))
|
|
{
|
|
enterUnnamedScope ();
|
|
}
|
|
else if (canMatch (&cp, "end") && stringListCount (nesting) > 0)
|
|
{
|
|
/* Leave the most recent scope. */
|
|
vStringDelete (stringListLast (nesting));
|
|
stringListRemoveLast (nesting);
|
|
}
|
|
else if (*cp == '"')
|
|
{
|
|
/* Skip string literals.
|
|
* FIXME: should cope with escapes and interpolation.
|
|
*/
|
|
do {
|
|
++cp;
|
|
} while (*cp != 0 && *cp != '"');
|
|
}
|
|
else if (*cp != '\0')
|
|
{
|
|
do
|
|
++cp;
|
|
while (isalnum (*cp) || *cp == '_');
|
|
}
|
|
}
|
|
}
|
|
stringListDelete (nesting);
|
|
}
|
|
|
|
extern parserDefinition* RubyParser (void)
|
|
{
|
|
static const char *const extensions [] = { "rb", "ruby", NULL };
|
|
parserDefinition* def = parserNew ("Ruby");
|
|
def->kinds = RubyKinds;
|
|
def->kindCount = KIND_COUNT (RubyKinds);
|
|
def->extensions = extensions;
|
|
def->parser = findRubyTags;
|
|
return def;
|
|
}
|
|
|
|
/* vi:set tabstop=4 shiftwidth=4: */
|