Rewrite the Txt2tags parser for better conformance and features

This fixes parsing indented titles and titles with embedded delimiter
characters, and adds support for title nesting information.

Syntax: http://txt2tags.org/rules.html

Closes [feature-requests:#690].
This commit is contained in:
Colomban Wendling 2014-08-19 16:11:23 +02:00
parent 9e18884390
commit b7b34ec451
2 changed files with 121 additions and 56 deletions

View File

@ -315,6 +315,7 @@ const gchar *symbols_get_context_separator(gint ft_id)
/* no context separator */
case GEANY_FILETYPES_ASCIIDOC:
case GEANY_FILETYPES_TXT2TAGS:
return "\x03";
default:

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2009, Eric Forgeot
* Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
*
* Based on work by Jon Strait
*
@ -19,102 +20,165 @@
#include "parse.h"
#include "read.h"
#include "nestlevel.h"
#include "vstring.h"
/* as any character may happen in an input, use something highly unlikely */
#define SCOPE_SEPARATOR '\x3' /* ASCII ETX */
/*
* DATA DEFINITIONS
*/
typedef enum {
K_SECTION = 0, K_HEADER
K_SECTION = 0
} Txt2tagsKind;
static kindOption Txt2tagsKinds[] = {
{ TRUE, 'm', "member", "sections" },
{ TRUE, 's', "struct", "header1"}
{ TRUE, 'm', "member", "sections" }
};
/*
* FUNCTION DEFINITIONS
*/
static void parse_title (vString* const name, const char control_char)
{
char *text = vStringValue(name);
char *p = text;
int offset_start = 0;
boolean in_or_after_title = FALSE;
while (p != NULL && *p != '\0')
{
if (*p == control_char)
{
if (in_or_after_title)
break;
else
offset_start++;
}
else
in_or_after_title = TRUE;
p++;
}
*p = '\0';
vStringCopyS(name, text + offset_start);
vStringStripLeading(name);
vStringStripTrailing(name);
}
static void makeTxt2tagsTag (const vString* const name, boolean name_before, Txt2tagsKind type)
static void makeTxt2tagsTag (const vString* const name,
const NestingLevels *const nls,
Txt2tagsKind type)
{
tagEntryInfo e;
vString *scope = NULL;
kindOption *kind = &Txt2tagsKinds[type];
initTagEntry (&e, vStringValue(name));
if (name_before)
e.lineNumber--; /* we want the line before the underline chars */
e.kindName = kind->name;
e.kind = kind->letter;
if (nls->n > 0) {
int i;
kindOption *parentKind;
scope = vStringNew();
for (i = 0; i < nls->n; i++) {
if (vStringLength(scope) > 0)
vStringPut(scope, SCOPE_SEPARATOR);
vStringCat(scope, nls->levels[i].name);
}
parentKind = &Txt2tagsKinds[nls->levels[nls->n - 1].type];
e.extensionFields.scope[0] = parentKind->name;
e.extensionFields.scope[1] = vStringValue(scope);
}
makeTagEntry(&e);
vStringDelete(scope);
}
/* matches: ^ *[=_-]{20,} *$ */
static boolean isTxt2tagsLine (const unsigned char *line)
{
unsigned int len;
while (isspace(*line)) line++;
for (len = 0; *line == '=' || *line == '-' || *line == '_'; len++)
line++;
while (isspace(*line)) line++;
return len >= 20 && *line == 0;
}
static boolean parseTxt2tagsTitle (const unsigned char *line,
vString *const title,
int *const depth_)
{
const int MAX_TITLE_DEPTH = 5; /* maximum length of a title delimiter */
unsigned char delim;
int delim_delta = 0;
const unsigned char *end;
/* skip leading spaces, but no tabs (probably because they create quotes) */
while (*line == ' ') line++;
/* normal/numbered titles */
if (*line != '=' && *line != '+')
return FALSE;
delim = *line;
/* find the start delimiter length */
while (*line == delim && delim_delta < MAX_TITLE_DEPTH+1)
{
line++;
delim_delta++;
}
while (isspace(*line))
line++;
if (delim_delta > MAX_TITLE_DEPTH) /* invalid */
return FALSE;
*depth_ = delim_delta;
/* find the end delimiter */
end = line + strlen((const char *) line) - 1;
while (end > line && isspace(*end)) end--;
/* skip a possible label: \[[A-Za-z0-9_-]+\] */
if (*end == ']')
{
end--;
while (end > line && (isalnum(*end) || *end == '_' || *end == '-'))
end--;
if (*end != '[') /* invalid */
return FALSE;
end--;
}
while (end > line && *end == delim && delim_delta >= 0)
{
delim_delta--;
end--;
}
while (end > line && isspace(*end)) end--;
end++;
/* if start and end delimiters are not identical, or the the name is empty */
if (delim_delta != 0 || (end - line) <= 0)
return FALSE;
vStringNCopyS(title, (const char *) line, end - line);
return TRUE;
}
static void findTxt2tagsTags (void)
{
NestingLevels *nls = nestingLevelsNew();
vString *name = vStringNew();
const unsigned char *line;
while ((line = fileReadLine()) != NULL)
{
/*int name_len = vStringLength(name);*/
int depth;
/* underlines must be the same length or more */
/*if (name_len > 0 && (line[0] == '=' || line[0] == '-') && issame((const char*) line))
if (isTxt2tagsLine(line))
; /* skip not to improperly match titles */
else if (parseTxt2tagsTitle(line, name, &depth))
{
makeTxt2tagsTag(name, TRUE);
}*/
if (line[0] == '=' || line[0] == '+') {
/*vStringClear(name);*/
vStringCatS(name, (const char *) line);
vStringTerminate(name);
parse_title(name, line[0]);
makeTxt2tagsTag(name, FALSE, K_SECTION);
}
/* TODO what exactly should this match?
* K_HEADER ('struct') isn't matched in src/symbols.c */
else if (strcmp((char*)line, "°") == 0) {
/*vStringClear(name);*/
vStringCatS(name, (const char *) line);
vStringTerminate(name);
makeTxt2tagsTag(name, FALSE, K_HEADER);
}
else {
vStringClear (name);
if (! isspace(*line))
vStringCatS(name, (const char*) line);
NestingLevel *nl = nestingLevelsGetCurrent(nls);
while (nl && nl->indentation >= depth)
{
nestingLevelsPop(nls);
nl = nestingLevelsGetCurrent(nls);
}
vStringTerminate(name);
makeTxt2tagsTag(name, nls, K_SECTION);
nestingLevelsPush(nls, name, K_SECTION);
nestingLevelsGetCurrent(nls)->indentation = depth;
}
}
vStringDelete (name);
nestingLevelsFree(nls);
}
extern parserDefinition* Txt2tagsParser (void)