Add new parser for JSON

This commit is contained in:
Colomban Wendling 2014-11-28 16:22:33 +01:00
parent b0cf3b4e2d
commit 2ff1386d96
8 changed files with 446 additions and 2 deletions

View File

@ -29,6 +29,7 @@ parsers = \
haxe.c \
html.c \
js.c \
json.c \
latex.c \
lregex.c \
lua.c \

396
tagmanager/ctags/json.c Normal file
View File

@ -0,0 +1,396 @@
/*
* Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*/
/*
* This module contains functions for generating tags for JSON files.
*
* http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
*
* This implementation is forgiving and allows many constructs that are not
* actually valid but that don't conflict with the format. This is intend to
* better support partly broken or unfinished files.
*/
#include "general.h"
#include <string.h>
#include "main.h"
#include "entry.h"
#include "keyword.h"
#include "parse.h"
#include "read.h"
#include "vstring.h"
typedef enum {
TOKEN_EOF,
TOKEN_UNDEFINED,
TOKEN_OPEN_SQUARE,
TOKEN_CLOSE_SQUARE,
TOKEN_OPEN_CURLY,
TOKEN_CLOSE_CURLY,
TOKEN_COLON,
TOKEN_COMMA,
TOKEN_TRUE,
TOKEN_FALSE,
TOKEN_NULL,
TOKEN_NUMBER,
TOKEN_STRING
} tokenType;
typedef enum {
TAG_NONE = -1,
TAG_OBJECT,
TAG_ARRAY,
TAG_NUMBER,
TAG_STRING,
TAG_BOOLEAN,
TAG_NULL,
TAG_COUNT
} jsonKind;
typedef struct {
tokenType type;
jsonKind scopeKind;
vString *string;
vString *scope;
unsigned long lineNumber;
MIOPos filePosition;
} tokenInfo;
typedef enum {
KEYWORD_true,
KEYWORD_false,
KEYWORD_null
} keywordId;
static langType Lang_json;
static kindOption JsonKinds [] = {
{ TRUE, 'o', "object", "objects" },
{ TRUE, 'a', "array", "arrays" },
{ TRUE, 'n', "number", "numbers" },
{ TRUE, 's', "string", "strings" },
{ TRUE, 'b', "boolean", "booleans" },
{ TRUE, 'z', "null", "nulls" }
};
static tokenInfo *newToken (void)
{
tokenInfo *const token = xMalloc (1, tokenInfo);
token->type = TOKEN_UNDEFINED;
token->scopeKind = TAG_NONE;
token->string = vStringNew ();
token->scope = vStringNew ();
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
return token;
}
static void deleteToken (tokenInfo *const token)
{
vStringDelete (token->string);
vStringDelete (token->scope);
eFree (token);
}
static void copyToken (tokenInfo *const dest, tokenInfo *const src)
{
dest->type = src->type;
dest->scopeKind = src->scopeKind;
vStringCopy (dest->string, src->string);
vStringCopy (dest->scope, src->scope);
dest->lineNumber = src->lineNumber;
dest->filePosition = src->filePosition;
}
static void makeJsonTag (tokenInfo *const token, const jsonKind kind)
{
tagEntryInfo e;
if (! JsonKinds[kind].enabled)
return;
initTagEntry (&e, vStringValue (token->string));
e.lineNumber = token->lineNumber;
e.filePosition = token->filePosition;
e.kindName = JsonKinds[kind].name;
e.kind = JsonKinds[kind].letter;
if (vStringLength (token->scope) > 0)
{
Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT);
e.extensionFields.scope[0] = JsonKinds[token->scopeKind].name;
e.extensionFields.scope[1] = vStringValue (token->scope);
}
makeTagEntry (&e);
}
static boolean isIdentChar (int c)
{
return (isalnum (c) || c == '+' || c == '-' || c == '.');
}
static void readToken (tokenInfo *const token)
{
int c;
token->type = TOKEN_UNDEFINED;
vStringClear (token->string);
do
c = fileGetc ();
while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
switch (c)
{
case EOF: token->type = TOKEN_EOF; break;
case '[': token->type = TOKEN_OPEN_SQUARE; break;
case ']': token->type = TOKEN_CLOSE_SQUARE; break;
case '{': token->type = TOKEN_OPEN_CURLY; break;
case '}': token->type = TOKEN_CLOSE_CURLY; break;
case ':': token->type = TOKEN_COLON; break;
case ',': token->type = TOKEN_COMMA; break;
case '"':
{
boolean escaped = FALSE;
token->type = TOKEN_STRING;
while (TRUE)
{
c = fileGetc ();
/* we don't handle unicode escapes but they are safe */
if (escaped)
escaped = FALSE;
else if (c == '\\')
escaped = TRUE;
else if (c >= 0x00 && c <= 0x1F)
break; /* break on invalid, unescaped, control characters */
else if (c == '"' || c == EOF)
break;
vStringPut (token->string, c);
}
vStringTerminate (token->string);
break;
}
default:
if (! isIdentChar (c))
token->type = TOKEN_UNDEFINED;
else
{
do
{
vStringPut (token->string, c);
c = fileGetc ();
}
while (c != EOF && isIdentChar (c));
vStringTerminate (token->string);
fileUngetc (c);
switch (lookupKeyword (vStringValue (token->string), Lang_json))
{
case KEYWORD_true: token->type = TOKEN_TRUE; break;
case KEYWORD_false: token->type = TOKEN_FALSE; break;
case KEYWORD_null: token->type = TOKEN_NULL; break;
default: token->type = TOKEN_NUMBER; break;
}
}
break;
}
}
static void pushScope (tokenInfo *const token,
const tokenInfo *const parent,
const jsonKind parentKind)
{
if (vStringLength (token->scope) > 0)
vStringPut (token->scope, '.');
vStringCat (token->scope, parent->string);
vStringTerminate (token->scope);
token->scopeKind = parentKind;
}
static void popScope (tokenInfo *const token,
const tokenInfo *const parent)
{
char *dot = strrchr (token->scope->buffer, '.');
if (! dot)
vStringClear (token->scope);
else
{
*dot = 0;
token->scope->length = dot - token->scope->buffer;
}
token->scopeKind = parent->scopeKind;
}
#define skipToOneOf2(token, type1, type2) \
(skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
#define skipTo(token, type) \
(skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
static void skipToOneOf3 (tokenInfo *const token,
const tokenType type1,
const tokenType type2,
const tokenType type3)
{
while (token->type != TOKEN_EOF &&
token->type != type1 &&
token->type != type2 &&
token->type != type3)
{
readToken (token);
if (token->type == TOKEN_OPEN_CURLY)
{
skipTo (token, TOKEN_CLOSE_CURLY);
readToken (token);
}
else if (token->type == TOKEN_OPEN_SQUARE)
{
skipTo (token, TOKEN_CLOSE_SQUARE);
readToken (token);
}
}
}
static jsonKind tokenToKind (const tokenType type)
{
switch (type)
{
case TOKEN_OPEN_CURLY: return TAG_OBJECT;
case TOKEN_OPEN_SQUARE: return TAG_ARRAY;
case TOKEN_STRING: return TAG_STRING;
case TOKEN_TRUE:
case TOKEN_FALSE: return TAG_BOOLEAN;
case TOKEN_NUMBER: return TAG_NUMBER;
default: return TAG_NULL;
}
}
static void parseValue (tokenInfo *const token)
{
if (token->type == TOKEN_OPEN_CURLY)
{
tokenInfo *name = newToken ();
do
{
readToken (token);
if (token->type == TOKEN_STRING)
{
jsonKind tagKind = TAG_NULL; /* default in case of invalid value */
copyToken (name, token);
/* skip any possible garbage before the value */
skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA);
if (token->type == TOKEN_COLON)
{
readToken (token);
tagKind = tokenToKind (token->type);
pushScope (token, name, tagKind);
parseValue (token);
popScope (token, name);
}
makeJsonTag (name, tagKind);
}
/* skip to the end of the construct */
skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA);
}
while (token->type != TOKEN_EOF &&
token->type != TOKEN_CLOSE_CURLY);
if (token->type == TOKEN_CLOSE_CURLY)
readToken (token);
deleteToken (name);
}
else if (token->type == TOKEN_OPEN_SQUARE)
{
tokenInfo *name = newToken ();
char buf[32];
unsigned int nth = 0;
readToken (token);
while (token->type != TOKEN_EOF &&
token->type != TOKEN_CLOSE_SQUARE)
{
jsonKind tagKind;
tagKind = tokenToKind (token->type);
copyToken (name, token);
snprintf (buf, sizeof buf, "%u", nth++);
vStringCopyS (name->string, buf);
makeJsonTag (name, tagKind);
pushScope (token, name, tagKind);
parseValue (token);
popScope (token, name);
/* skip to the end of the construct */
skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA);
if (token->type != TOKEN_CLOSE_SQUARE)
readToken (token);
}
if (token->type == TOKEN_CLOSE_SQUARE)
readToken (token);
deleteToken (name);
}
}
static void findJsonTags (void)
{
tokenInfo *const token = newToken ();
/* We allow multiple top-level elements, although it's not actually valid
* JSON. An interesting side effect of this is that we allow a leading
* Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
do
{
readToken (token);
parseValue (token);
}
while (token->type != TOKEN_EOF);
deleteToken (token);
}
static void initialize (const langType language)
{
Lang_json = language;
addKeyword ("true", language, KEYWORD_true);
addKeyword ("false", language, KEYWORD_false);
addKeyword ("null", language, KEYWORD_null);
}
/* Create parser definition stucture */
extern parserDefinition* JsonParser (void)
{
static const char *const extensions [] = { "json", NULL };
parserDefinition *const def = parserNew ("JSON");
def->extensions = extensions;
def->kinds = JsonKinds;
def->kindCount = KIND_COUNT (JsonKinds);
def->parser = findJsonTags;
def->initialize = initialize;
return def;
}

View File

@ -44,7 +44,7 @@ all: $(COMPLIB)
clean:
-$(RM) deps.mak *.o $(COMPLIB)
$(COMPLIB): abaqus.o abc.o args.o c.o cobol.o fortran.o make.o conf.o pascal.o perl.o php.o diff.o vhdl.o verilog.o lua.o js.o \
$(COMPLIB): abaqus.o abc.o args.o c.o cobol.o fortran.o make.o conf.o pascal.o perl.o php.o diff.o vhdl.o verilog.o lua.o js.o json.o \
actionscript.o nsis.o objc.o \
haskell.o haxe.o html.o python.o lregex.o asciidoc.o rest.o sh.o ctags.o entry.o get.o keyword.o nestlevel.o \
options.o \

View File

@ -62,7 +62,8 @@
AsciidocParser, \
AbaqusParser, \
RustParser, \
GoParser
GoParser, \
JsonParser
#endif /* _PARSERS_H */

View File

@ -69,6 +69,7 @@ typedef enum
TM_PARSER_ABAQUS,
TM_PARSER_RUST,
TM_PARSER_GO,
TM_PARSER_JSON,
TM_PARSER_COUNT
} TMParserType;

25
tests/ctags/simple.json Normal file
View File

@ -0,0 +1,25 @@
{
"firstName": "John",
"lastName": "Smith",
"isAlive": true,
"age": 25,
"height_cm": 167.6,
"address": {
"streetAddress": "21 2nd Street",
"city": "New York",
"state": "NY",
"postalCode": "10021-3100"
},
"phoneNumbers": [
{
"type": "home",
"number": "212 555-1234"
},
{
"type": "office",
"number": "646 555-4567"
}
],
"children": [],
"spouse": null
}

View File

@ -0,0 +1,19 @@
0 input.json /^ {$/;" o array:phoneNumbers
1 input.json /^ {$/;" o array:phoneNumbers
address input.json /^ "address": {$/;" o
age input.json /^ "age": 25,$/;" n
children input.json /^ "children": [],$/;" a
city input.json /^ "city": "New York",$/;" s object:address
firstName input.json /^ "firstName": "John",$/;" s
height_cm input.json /^ "height_cm": 167.6,$/;" n
isAlive input.json /^ "isAlive": true,$/;" b
lastName input.json /^ "lastName": "Smith",$/;" s
number input.json /^ "number": "212 555-1234"$/;" s object:phoneNumbers.0
number input.json /^ "number": "646 555-4567"$/;" s object:phoneNumbers.1
phoneNumbers input.json /^ "phoneNumbers": [$/;" a
postalCode input.json /^ "postalCode": "10021-3100"$/;" s object:address
spouse input.json /^ "spouse": null$/;" z
state input.json /^ "state": "NY",$/;" s object:address
streetAddress input.json /^ "streetAddress": "21 2nd Street",$/;" s object:address
type input.json /^ "type": "home",$/;" s object:phoneNumbers.0
type input.json /^ "type": "office",$/;" s object:phoneNumbers.1

View File

@ -87,6 +87,7 @@ ctags_sources = set([
'tagmanager/ctags/haxe.c',
'tagmanager/ctags/html.c',
'tagmanager/ctags/js.c',
'tagmanager/ctags/json.c',
'tagmanager/ctags/keyword.c',
'tagmanager/ctags/latex.c',
'tagmanager/ctags/lregex.c',