Merge pull request #319 from b4n/better-txt2tags-parser

Better txt2tags parser
This commit is contained in:
Colomban Wendling 2014-11-29 23:40:58 +01:00
commit af7d63cdf2
9 changed files with 1224 additions and 56 deletions

View File

@ -311,6 +311,7 @@ const gchar *symbols_get_context_separator(gint ft_id)
/* no context separator */ /* no context separator */
case GEANY_FILETYPES_ASCIIDOC: case GEANY_FILETYPES_ASCIIDOC:
case GEANY_FILETYPES_TXT2TAGS:
return "\x03"; return "\x03";
default: default:

View File

@ -1,5 +1,6 @@
/* /*
* Copyright (c) 2009, Eric Forgeot * Copyright (c) 2009, Eric Forgeot
* Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
* *
* Based on work by Jon Strait * Based on work by Jon Strait
* *
@ -19,102 +20,165 @@
#include "parse.h" #include "parse.h"
#include "read.h" #include "read.h"
#include "nestlevel.h"
#include "vstring.h" #include "vstring.h"
/* as any character may happen in an input, use something highly unlikely */
#define SCOPE_SEPARATOR '\x3' /* ASCII ETX */
/* /*
* DATA DEFINITIONS * DATA DEFINITIONS
*/ */
typedef enum { typedef enum {
K_SECTION = 0, K_HEADER K_SECTION = 0
} Txt2tagsKind; } Txt2tagsKind;
static kindOption Txt2tagsKinds[] = { static kindOption Txt2tagsKinds[] = {
{ TRUE, 'm', "member", "sections" }, { TRUE, 'm', "member", "sections" }
{ TRUE, 's', "struct", "header1"}
}; };
/* /*
* FUNCTION DEFINITIONS * FUNCTION DEFINITIONS
*/ */
static void parse_title (vString* const name, const char control_char) static void makeTxt2tagsTag (const vString* const name,
{ const NestingLevels *const nls,
char *text = vStringValue(name); Txt2tagsKind type)
char *p = text;
int offset_start = 0;
boolean in_or_after_title = FALSE;
while (p != NULL && *p != '\0')
{
if (*p == control_char)
{
if (in_or_after_title)
break;
else
offset_start++;
}
else
in_or_after_title = TRUE;
p++;
}
*p = '\0';
vStringCopyS(name, text + offset_start);
vStringStripLeading(name);
vStringStripTrailing(name);
}
static void makeTxt2tagsTag (const vString* const name, boolean name_before, Txt2tagsKind type)
{ {
tagEntryInfo e; tagEntryInfo e;
vString *scope = NULL;
kindOption *kind = &Txt2tagsKinds[type]; kindOption *kind = &Txt2tagsKinds[type];
initTagEntry (&e, vStringValue(name)); initTagEntry (&e, vStringValue(name));
if (name_before)
e.lineNumber--; /* we want the line before the underline chars */
e.kindName = kind->name; e.kindName = kind->name;
e.kind = kind->letter; e.kind = kind->letter;
if (nls->n > 0) {
int i;
kindOption *parentKind;
scope = vStringNew();
for (i = 0; i < nls->n; i++) {
if (vStringLength(scope) > 0)
vStringPut(scope, SCOPE_SEPARATOR);
vStringCat(scope, nls->levels[i].name);
}
parentKind = &Txt2tagsKinds[nls->levels[nls->n - 1].type];
e.extensionFields.scope[0] = parentKind->name;
e.extensionFields.scope[1] = vStringValue(scope);
}
makeTagEntry(&e); makeTagEntry(&e);
vStringDelete(scope);
}
/* matches: ^ *[=_-]{20,} *$ */
static boolean isTxt2tagsLine (const unsigned char *line)
{
unsigned int len;
while (isspace(*line)) line++;
for (len = 0; *line == '=' || *line == '-' || *line == '_'; len++)
line++;
while (isspace(*line)) line++;
return len >= 20 && *line == 0;
}
static boolean parseTxt2tagsTitle (const unsigned char *line,
vString *const title,
int *const depth_)
{
const int MAX_TITLE_DEPTH = 5; /* maximum length of a title delimiter */
unsigned char delim;
int delim_delta = 0;
const unsigned char *end;
/* skip leading spaces, but no tabs (probably because they create quotes) */
while (*line == ' ') line++;
/* normal/numbered titles */
if (*line != '=' && *line != '+')
return FALSE;
delim = *line;
/* find the start delimiter length */
while (*line == delim && delim_delta < MAX_TITLE_DEPTH+1)
{
line++;
delim_delta++;
}
while (isspace(*line))
line++;
if (delim_delta > MAX_TITLE_DEPTH) /* invalid */
return FALSE;
*depth_ = delim_delta;
/* find the end delimiter */
end = line + strlen((const char *) line) - 1;
while (end > line && isspace(*end)) end--;
/* skip a possible label: \[[A-Za-z0-9_-]+\] */
if (*end == ']')
{
end--;
while (end > line && (isalnum(*end) || *end == '_' || *end == '-'))
end--;
if (*end != '[') /* invalid */
return FALSE;
end--;
}
while (end > line && *end == delim && delim_delta >= 0)
{
delim_delta--;
end--;
}
while (end > line && isspace(*end)) end--;
end++;
/* if start and end delimiters are not identical, or the the name is empty */
if (delim_delta != 0 || (end - line) <= 0)
return FALSE;
vStringNCopyS(title, (const char *) line, end - line);
return TRUE;
} }
static void findTxt2tagsTags (void) static void findTxt2tagsTags (void)
{ {
NestingLevels *nls = nestingLevelsNew();
vString *name = vStringNew(); vString *name = vStringNew();
const unsigned char *line; const unsigned char *line;
while ((line = fileReadLine()) != NULL) while ((line = fileReadLine()) != NULL)
{ {
/*int name_len = vStringLength(name);*/ int depth;
/* underlines must be the same length or more */ if (isTxt2tagsLine(line))
/*if (name_len > 0 && (line[0] == '=' || line[0] == '-') && issame((const char*) line)) ; /* skip not to improperly match titles */
else if (parseTxt2tagsTitle(line, name, &depth))
{ {
makeTxt2tagsTag(name, TRUE); NestingLevel *nl = nestingLevelsGetCurrent(nls);
}*/ while (nl && nl->indentation >= depth)
if (line[0] == '=' || line[0] == '+') { {
/*vStringClear(name);*/ nestingLevelsPop(nls);
vStringCatS(name, (const char *) line); nl = nestingLevelsGetCurrent(nls);
vStringTerminate(name); }
parse_title(name, line[0]);
makeTxt2tagsTag(name, FALSE, K_SECTION);
}
/* TODO what exactly should this match?
* K_HEADER ('struct') isn't matched in src/symbols.c */
else if (strcmp((char*)line, "°") == 0) {
/*vStringClear(name);*/
vStringCatS(name, (const char *) line);
vStringTerminate(name);
makeTxt2tagsTag(name, FALSE, K_HEADER);
}
else {
vStringClear (name);
if (! isspace(*line))
vStringCatS(name, (const char*) line);
vStringTerminate(name); vStringTerminate(name);
makeTxt2tagsTag(name, nls, K_SECTION);
nestingLevelsPush(nls, name, K_SECTION);
nestingLevelsGetCurrent(nls)->indentation = depth;
} }
} }
vStringDelete (name); vStringDelete (name);
nestingLevelsFree(nls);
} }
extern parserDefinition* Txt2tagsParser (void) extern parserDefinition* Txt2tagsParser (void)

View File

@ -233,6 +233,8 @@ test_sources = \
recursive.f95 \ recursive.f95 \
refcurs.sql \ refcurs.sql \
regexp.js \ regexp.js \
rules.t2t \
sample.t2t \
secondary_fcn_name.js \ secondary_fcn_name.js \
semicolon.f90 \ semicolon.f90 \
shebang.js \ shebang.js \
@ -264,6 +266,7 @@ test_sources = \
test.vhd \ test.vhd \
test_input.rs \ test_input.rs \
test_input2.rs \ test_input2.rs \
titles.t2t \
traffic_signal.v \ traffic_signal.v \
traits.php \ traits.php \
ui5.controller.js \ ui5.controller.js \

782
tests/ctags/rules.t2t Normal file
View File

@ -0,0 +1,782 @@
Txt2tags Markup Rules
%!includeconf: rules.conf
This document describes all the details about each txt2tags mark.
The target audience are **experienced** users. You may find it
useful if you want to master the marks or solve a specific problem
about a mark.
If you are new to txt2tags or just want to know which are the
available marks, please read the [Markup Demo MARKUPDEMO].
Note 1: This document is generated directly from the txt2tags
test-suite. All the rules mentioned here are 100% in sync with the
current program code.
Note 2: A good practice is to consult [the sources rules.t2t] when
reading, to see how the texts were made.
Table of Contents:
%%TOC
-------------------------------------------------------------
= Paragraph =[paragraph]
%INCLUDED(t2t) starts here: ../../../test/marks/paragraph.t2t
BODYINIT
%%% Syntax: Lines grouped together
A paragraph is composed by one or more lines.
A blank line (or a table, or a list) ends the
current paragraph.
%%% Syntax: Leading and trailing spaces are ignored
Leading and trailing spaces are ignored.
%%% Syntax: A comment don't close a paragraph
A comment line can be placed inside a paragraph.
% this comment will be ignored
It will not affect it.
%%% Closing: EOF closes the open paragraph
The end of the file (EOF) closes the
currently open paragraph.
= Comment =[comment]
%INCLUDED(t2t) starts here: ../../../test/marks/comment.t2t
BODYINIT
%%% Syntax: The % character at the line beginning (column 1)
%glued with the % mark
% separated from the % mark
% very distant from the % mark
%%%%%%% lots of % marks
% a blank comment, used for vertical spacing:
%
% NOTE: what matters is the first % being at the line beginning,
% the rest of the line is just ignored.
%%% Syntax: Area (block)
%%%
You're not seeing this.
%%%
%%% Syntax: Area (block) with trailing spaces
%%%
You're not seeing this.
%%%
%%% Invalid: The % in any other position
% not on the line beginning (at column 2)
some text % half line comments are not allowed
= Line =[line]
%INCLUDED(t2t) starts here: ../../../test/marks/line.t2t
BODYINIT
%%% Syntax: At least 20 chars of - = _
--------------------
====================
____________________
%%% Syntax: Any kind of mixing is allowed
%% Free mixing is allowed to make the line,
%% but the first char is the identifier for
%% the difference between separator ( - _ )
%% and strong ( = ) lines.
=========-----------
-_-_-_-_-_-_-_-_-_-_
=-=-=-=-=-=-=-=-=-=-
=------------------=
--------====--------
%%% Syntax: Leading and/or trailing spaces are allowed
--------------------
--------------------
--------------------
%%% Invalid: Less than 20 chars (but strike matches)
---------
%%% Invalid: Strange chars (but strike matches)
--------- ----------
---------+----------
( -------------------- )
= Inline =[inline]
%INCLUDED(t2t) starts here: ../../../test/marks/inline.t2t
BODYINIT
%%% Syntax: Marks are greedy and must be "glued" with contents
%% GLUED: The contents must be glued with the marks, no spaces
%% between them. Right after the opening mark there must be a
%% non-blank character, as well as right before the closing mark.
%%
%% GREEDY: If the contents boundary character is the same as
%% the mark character, it is considered contents, not mark.
%% So ""****bold****"" turns to ""<B>**bold**</B>"" in HTML.
i) **b** //i// __u__ --s-- ``m`` ""r"" ''t''
i) **bo** //it// __un__ --st-- ``mo`` ""ra"" ''tg''
i) **bold** //ital// __undr__ --strk-- ``mono`` ""raw"" ''tggd''
i) **bo ld** //it al// __un dr__ --st rk-- ``mo no`` ""r aw"" ''tg gd''
i) **bo * ld** //it / al// __un _ dr__ --st - rk-- ``mo ` no`` ""r " aw"" ''tg ' gd''
i) **bo **ld** //it //al// __un __dr__ --st --rk-- ``mo ``no`` ""r ""aw"" ''tg ''gd''
i) **bo ** ld** //it // al// __un __ dr__ --st -- rk-- ``mo `` no`` ""r "" aw"" ''tg '' gd''
i) ****bold**** ////ital//// ____undr____ ----strk---- ````mono```` """"raw"""" ''''tggd''''
i) ***bold*** ///ital/// ___undr___ ---strk--- ```mono``` """raw""" '''tggd'''
%%% Syntax: Repetition is greedy
%% When the mark character is repeated many times,
%% the contents are expanded to the largest possible.
%% Thats why they are greedy, the outer marks are
%% the ones used.
i) ***** ///// _____ ----- ````` """"" '''''
i) ****** ////// ______ ------ `````` """""" ''''''
i) ******* /////// _______ ------- ``````` """"""" '''''''
i) ******** //////// ________ -------- ```````` """""""" ''''''''
i) ********* ///////// _________ --------- ````````` """"""""" '''''''''
i) ********** ////////// __________ ---------- `````````` """""""""" ''''''''''
%%% Invalid: No contents
i) **** //// ____ ---- ```` """" ''''
i) ** ** // // __ __ -- -- `` `` "" "" '' ''
%%% Invalid: Contents not "glued" with marks
%% Spaces between the marks and the contents in any side
%% invalidate the mark.
i) ** bold** // ital// __ undr__ -- strk-- `` mono`` "" raw"" '' tggd''
i) **bold ** //ital // __undr __ --strk -- ``mono `` ""raw "" ''tggd ''
i) ** bold ** // ital // __ undr __ -- strk -- `` mono `` "" raw "" '' tggd ''
= Link =[link]
%INCLUDED(t2t) starts here: ../../../test/marks/link.t2t
BODYINIT
%%% Syntax: E-mail
user@domain.com
user@domain.com.
user@domain.com. any text.
any text: user@domain.com. any text.
[label user@domain.com]
%%% Syntax: E-mail with form data
user@domain.com?subject=bla
user@domain.com?subject=bla.
user@domain.com?subject=bla,
user@domain.com?subject=bla&cc=otheruser@domain.com
user@domain.com?subject=bla&cc=otheruser@domain.com.
user@domain.com?subject=bla&cc=otheruser@domain.com,
[label user@domain.com?subject=bla&cc=otheruser@domain.com].
[label user@domain.com?subject=bla&cc=otheruser@domain.com.].
%%% Syntax: URL
http://www.domain.com
http://www.domain.com/dir/
http://www.domain.com/dir///
http://www.domain.com.
http://www.domain.com,
http://www.domain.com. any text.
http://www.domain.com, any text.
http://www.domain.com/dir/. any text.
any text: http://www.domain.com. any text.
any text: http://www.domain.com/dir/. any text.
any text: http://www.domain.com/dir/index.html. any text.
any text: http://www.domain.com/dir/index.html, any text.
%%% Syntax: URL with anchor
http://www.domain.com/dir/#anchor
http://www.domain.com/dir/index.html#anchor
http://www.domain.com/dir/index.html#anchor.
http://www.domain.com/dir/#anchor. any text.
http://www.domain.com/dir/index.html#anchor. any text.
any text: http://www.domain.com/dir/#anchor. any text.
any text: http://www.domain.com/dir/index.html#anchor. any text.
%%% Syntax: URL with form data
http://domain.com?a=a@a.a&b=a+b+c.
http://domain.com?a=a@a.a&b=a+b+c,
http://domain.com/bla.cgi?a=a@a.a&b=a+b+c.
http://domain.com/bla.cgi?a=a@a.a&b=a+b+c@.
%%% Syntax: URL with form data and anchor
http://domain.com?a=a@a.a&b=a+b+c.#anchor
http://domain.com/bla.cgi?a=a@a.a&b=a+b+c.#anchor
http://domain.com/bla.cgi?a=a@a.a&b=a+b+c@.#anchor
%%% Syntax: URL with login data
http://user:password@domain.com/bla.html.
http://user:password@domain.com/dir/.
http://user:password@domain.com.
http://user:@domain.com.
http://user@domain.com.
%%% Syntax: URL with login, form and anchor
http://user:password@domain.com/bla.cgi?a=a@a.a&b=a+b+c.#anchor
http://user:password@domain.com/bla.cgi?a=a@a.a&b=a+b+c@#anchor
%%% Syntax: URL with label
[label www.domain.com]
%%% Syntax: URL with label (trailing spaces are discarded, leading are maintained)
%TODO normalize this behavior
[ label www.domain.com]
[label www.domain.com]
%%% Syntax: URL with label, stressing
[anchor http://www.domain.com/dir/index.html#anchor.]
[login http://user:password@domain.com/bla.html]
[form http://www.domain.com/bla.cgi?a=a@a.a&b=a+b+c.]
[form & anchor http://www.domain.com/bla.cgi?a=a@a.a&b=a+b+c.#anchor]
[login & form http://user:password@domain.com/bla.cgi?a=a@a.a&b=a+b+c.]
%%% Syntax: Link with label for local files
[local link up ..]
[local link file bla.html]
[local link anchor #anchor]
[local link file/anchor bla.html#anchor]
[local link file/anchor bla.html#anchor.]
[local link img abc.gif]
%%% Syntax: Another link as a label
[www.fake.com www.domain.com]
%%% Syntax: URL with funny chars
http://domain.com:8080/~user/_st-r@a=n$g,e/index%20new.htm
http://domain.com:8080/~user/_st-r@a=n$g,e/index%20new.htm?a=/%22&b=+.@*_-
http://domain.com:8080/~user/_st-r@a=n$g,e/index%20new.htm?a=/%22&b=+.@*_-#anchor_-1%.
http://foo._user-9:pass!#$%&*()+word@domain.com:8080/~user/_st-r@a=n$g,e/index%20new.htm?a=/%22&b=+.@*_-#anchor_-1%.
%%% Test: Various per line
http://L1.com ! L2@www.com ! [L3 www.com] ! [L4 w@ww.com] ! www.L5.com
%%% Feature: Guessed link, adding protocol automatically
www.domain.com
www2.domain.com
ftp.domain.com
WWW.DOMAIN.COM
FTP.DOMAIN.COM
[label www.domain.com]
[label ftp.domain.com]
[label WWW.DOMAIN.COM]
[label FTP.DOMAIN.COM]
%%% Invalid: Trailing space on link
[label www.domain.com ]
%%% Invalid: Label with ] char (use postproc)
[label] www.domain.com]
= Image =[image]
%INCLUDED(t2t) starts here: ../../../test/marks/image.t2t
BODYINIT
%%% Syntax: Image name inside brackets: [img]
[img.png]
%%% Syntax: Image pointing to a link: [[img] link]
[[img.png] http://txt2tags.org]
%%% Align: Image position is preserved when inside paragraph
[img.png] Image at the line beginning.
Image in the middle [img.png] of the line.
Image at the line end. [img.png]
%%% Align: Image alone with spaces around is aligned
[img.png]
[img.png]
[img.png]
%%% Test: Two glued images with no spaces (left & right)
[img.png][img.png]
%%% Test: Various per line
Images [img.png] mixed [img.png] with [img.png] text.
Images glued together: [img.png][img.png][img.png].
%%% Invalid: Spaces inside are not allowed
[img.png ]
[ img.png]
[ img.png ]
= Macro =[macro]
%INCLUDED(t2t) starts here: ../../../test/marks/macro.t2t
BODYINIT
%%% Syntax: Macro without formatting string
Date : %%date - %%date()
Mtime : %%mtime - %%mtime()
Infile : %%infile - %%infile()
Outfile : %%outfile - %%outfile()
%%% Syntax: Macro name is case insensitive
Date : %%dAtE
Mtime : %%mTiMe
Infile : %%iNfIlE
Outfile : %%oUtFiLe
%%% Syntax: Macro with formatting string
Date : %%date(txt %C txt)
Mtime : %%mtime(txt %C txt)
Infile : %%infile(txt %e txt)
Outfile : %%outfile(txt %e txt)
%%% Syntax: Leading and trailing spaces are preserved
Date : (%%date( txt )) - (%%date( %C ))
Mtime : (%%mtime( txt )) - (%%mtime( %C ))
Infile : (%%infile( txt )) - (%%infile( %e ))
Outfile : (%%outfile( txt )) - (%%outfile( %e ))
%%% Test: Expansion of the percent char
Date : %%date(%) - %%date(%%) - %%date(%%%) - %%date(%%%)
Mtime : %%mtime(%) - %%mtime(%%) - %%mtime(%%%) - %%mtime(%%%)
Infile : %%infile(%) - %%infile(%%) - %%infile(%%%) - %%infile(%%%)
Outfile : %%outfile(%) - %%outfile(%%) - %%outfile(%%%) - %%outfile(%%%)
%%% Test: Various per line, glued
Date : %%date(%C)%%date%%date
Mtime : %%mtime(%C)%%mtime%%mtime
Infile : %%infile(%e)%%infile%%infile
Outfile : %%outfile(%e)%%outfile%%outfile
%%% Test: Path formatters
Path : %%infile(%p)
Path : %%outfile(%p)
Dirname : %%infile(%d, %D)
Dirname : %%outfile(%d, %D)
File : %%infile(%F + %e = %f)
File : %%outfile(%F + %e = %f)
= Numbered Title =[numtitle]
See [Title #title], the same rules apply.
= Title =[title]
%INCLUDED(t2t) starts here: ../../../test/marks/title.t2t
BODYINIT
%%% Syntax: Balanced equal signs (from 1 to 5)
= Title Level 1 =
== Title Level 2 ==
=== Title Level 3 ===
==== Title Level 4 ====
===== Title Level 5 =====
%%% Label: Between brackets, alphanumeric [A-Za-z0-9_-]
= Title Level 1 =[lab_el-1]
== Title Level 2 ==[lab_el-2]
=== Title Level 3 ===[lab_el-3]
==== Title Level 4 ====[lab_el-4]
===== Title Level 5 =====[lab_el-5]
%%% Syntax: Spaces around and/or inside are allowed (and ignored)
===Title Level 3===
=== Title Level 3 ===
=== Title Level 3 ===
=== Title Level 3 ===
=== Title Level 3 ===
=== Title Level 3 ===[lab_el-9]
%%% Invalid: Unbalanced equal signs
=Not Title
==Not Title=
===Not Title====
%%% Invalid: Level deeper than 5
======Not Title 6======
=======Not Title 7=======
%%% Invalid: Space between title and label
=Not Title= [label1]
%%% Invalid: Space inside label
=Not Title=[ label ]
%%% Invalid: Strange chars inside label
=Not Title=[la/bel]
= Quote =[quote]
%INCLUDED(t2t) starts here: ../../../test/marks/quote.t2t
BODYINIT
%%% Syntax: TAB defines quote
To quote a paragraph, just prefix it by a TAB
character. All the lines of the paragraph must
begin with a TAB.
Any non-tabbed line closes the quote block.
%%% Nesting: Creating deeper quotes
The number of leading TABs identifies the quote
block depth. This is quote level 1.
With two TABs, we are on the quote
level 2.
The more TABs, more deep is
the quote level.
There isn't a limit.
%%% Nesting: Reverse nesting works
This quote starts at
level 4.
Then its depth is decreased.
Counting down, one by one.
Until the level 1.
%%% Nesting: Random count
Unlike lists, any quote block is
independent, not part of a tree.
The TAB count don't need to be incremental
by one.
The nesting don't need
to follow any rule.
Quotes can be opened and closed
in any way.
You choose.
%%% Nesting: When not supported
Some targets (as sgml) don't support the
nesting of quotes. There is only one quote
level.
In this case, no matter how much
TABs are used to define the quote
block, it always will be level 1.
%%% Syntax: Spaces after TAB
Spaces AFTER the TAB character are allowed.
But be careful, it can be confusing.
%%% Invalid: Spaces before TAB
Spaces BEFORE the TAB character
invalidate the mark. It's not quote.
%%% Invalid: Paragraphs inside
Paragraph breaks inside a quote aren't
possible.
This sample are two separated quoted
paragraphs, not a quote block with
two paragraphs inside.
%%% Closing: EOF closes the open block
The end of the file (EOF) closes the
currently open quote block.
= Raw =[raw]
See [Verbatim #verbatim], the same rules apply.
= Verbatim =[verbatim]
%INCLUDED(t2t) starts here: ../../../test/marks/verbatim.t2t
BODYINIT
%%% Syntax: A single line
``` A verbatim line.
%%% Syntax: A single line with leading spaces
``` Another verbatim line, with leading spaces.
%%% Syntax: Area (block)
```
A verbatim area delimited
by lines with marks.
```
%%% Syntax: Area (block) with trailing spaces
```
Trailing spaces and TABs after the area marks
are allowed, but not encouraged nor documented.
```
%%% Invalid: No space between mark and contents
```Not a verbatim line, need one space after mark.
%%% Invalid: Leading spaces on block marks
```
Not a verbatim area.
The marks must be at the line beginning,
no leading spaces.
```
%%% Closing: EOF closes the open block
```
The end of the file (EOF) closes
the currently open verbatim area.
```
= Definition List =[deflist]
See [List #list], the same rules apply.
= Numbered List =[numlist]
See [List #list], the same rules apply.
= List =[list]
%INCLUDED(t2t) starts here: ../../../test/marks/list.t2t
BODYINIT
%%% Items: Prefixed by hyphen
- Use the hyphen to prefix list items.
- There must be one space after the hyphen.
- The list is closed by two consecutive blank lines.
%%% Items: Free leading spacing (indentation)
- The list can be indented on the source document.
- You can use any number of spaces.
- The result will be the same.
%%% Items: Vertical spacing between items
- Let one blank line between the list items.
- It will be maintained on the conversion.
- Some targets don't support this behavior.
- This one was separated by a line with blanks.
You can also put a blank line inside
the item contents and it will be preserved.
%%% Items: Exactly ONE space after the hyphen
-This is not a list (no space)
- This is not a list (more than one space)
- This is not a list (a TAB instead the space)
%%% Items: Catchy cases
- - This is a list
- + This is a list
- : This is a list
%%% Nesting: Creating sublists
- This is the "mother" list first item.
- Here is the second, but inside this item,
- there is a sublist, with its own items.
- Note that the items of the same sublist
- must have the same indentation.
- And this can go on, opening sublists.
- Just add leading spaces before the
- hyphen and sublists will be opened.
- The two blank lines closes them all.
%%% Nesting: Free leading spacing (indentation)
- When nesting lists, the additional spaces are free.
- You can add just one,
- or many.
- What matters is to put more than the previous.
- But remember that the other items of the same list
- must use the same indentation.
%%% Nesting: Maximum depth
- There is not a depth limit,
- you can go deeper and deeper.
- But some targets may have restrictions.
- The LaTeX maximum is here, 4 levels.
- This one and the following sublists
- are moved up to the level 4
- when converting to LaTeX.
- On the other targets,
- it is just fine
- to have a very deep list.
%%% Nesting: Reverse doesn't work
- Reverse nesting doesn't work.
- Because a sublist *must* have a mother list.
- It's the list concept, not a txt2tags limitation.
- All this sublists will be bumped to mother lists.
- At level 1, like this one.
%%% Nesting: Going deeper and back
%% When nesting back to an upper level, the previous sublist
%% is automatically closed.
- Level 1
- Level 2
- Level 3
- Level 4
- Level 3 -- (closed Level 4)
- Level 2 -- (closed Level 3)
- Level 1 -- (closed Level 2)
%% More than one list can be closed when nesting back.
- Level 1
- Level 2
- Level 3
- Level 4
- Level 1 -- (closed Level 4, Level 3 and Level 2)
%%% Nesting: Vertical spacing between lists
- Level 1
- Level 2 -- blank BEFORE and AFTER (in)
- Level 3
% comment lines are NOT considered blank lines
- Level 4
% comment lines are NOT considered blank lines
- Level 3
- Level 2 -- blank BEFORE and AFTER (out)
- Level 1
- Level 2 -- blank BEFORE (spaces) and AFTER (TAB)
- Level 3
%%% Nesting: Messing up
%% Be careful when going back on the nesting,
%% it must be on a valid level! If not, it will
%% be bumped up to the previous valid level.
- Level 1
- Level 2
- Level 3
- Level 4
- Level 3.5 ???
- Level 3
- Level 2.5 ???
- Level 2
- Level 1.5 ???
- Level 1
%%% Closing: Two (not so) empty lines
- This list is closed by a line with spaces and other with TABs
- This list is NOT closed by two comment lines
% comment lines are NOT considered blank lines
% comment lines are NOT considered blank lines
- This list is closed by a line with spaces and TAB,
- then a comment line, then an empty line.
% comment lines are NOT considered blank lines
%%% Closing: Empty item closes current (sub)list
%% The two blank lines closes ALL the lists.
%% To close just the current, use an empty item.
- Level 1
- Level 2
- Level 3
-
Level 2
-
Level 1
-
%% The empty item can have trailing blanks.
- Empty item with trailing spaces.
-
- Empty item with trailing TAB.
-
%%% Closing: EOF closes the lists
- If the end of the file (EOF) is hit,
- all the currently opened list are closed,
- just like when using the two blank lines.
= Table =[table]
%INCLUDED(t2t) starts here: ../../../test/marks/table.t2t
BODYINIT
%%% Syntax: Lines starting with a pipe |
| Cell 1
%%% Syntax: Extra pipes separate cells
| Cell 1 | Cell 2 | Cell 3
%%% Syntax: With a trailing pipe, make border
| Cell 1 | Cell 2 | Cell 3 |
%%% Syntax: Table lines starting with double pipe are heading
|| Cell 1 | Cell 2 | Cell 3 |
%%% Align: Spaces before the leading pipe centralize the table
| Cell 1 | Cell 2 | Cell 3 |
%%% Align: Spaces inside the cell denote its alignment
|| Heading | Heading | Heading |
% comments don't close an opened table
| <- | -- | -> |
| -- | -- | -- |
| -> | -- | <- |
%%% Span: Column span is defined by extra pipes at cell closing
|| 1 | 2 | 3+4 ||
| 1 | 2 | 3 | 4 |
| 1+2+3 ||| 4 |
| 1 | 2+3 || 4 |
| 1+2+3+4 ||||
%%% Test: Empty cells are placed as expected
| 0 | 1 | 2 | |
| 4 | 5 | | 7 |
| 8 | | A | B |
| | D | E | F |
%%% Test: Lines with different number of cells
| 1 |
| 1 | 2 |
| 1 | 2 | 3 |
| 1 | 2 | 3 | 4 |
| 1 | 2 | 3 | 4 | 5 |
%%% Test: Empty cells + Span + Messy cell number = Fun!
| Jan |
| Fev ||
| Mar |||
| Apr ||||
| May |||||
| 20% | 40% | 60% | 80% | 100% |
| | | / | | |
| | / / / / / ||| |
| / / / / / / / / / |||||
| | o | | o | |
| | | . | | |
| | = = = = ||| |
| 01 | 02 | | | 05 | | 07 | |
| | | 11 | | 13 | | | 16 |
| 17 | | 19 | 20 | | | 23 | |
| 25 | 26 | | | 29 | 30 | | 32 |
| | | 35 | | 37 | | 39 | 40 |
%%% Test: Lots of cells at the same line
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F |
%%% Test: Empty lines
| |
| |
| |
%%% Invalid: There must be at least one space around the pipe
|this|is|not|a|table|
|this| is| not| a| table|
|this |is |not |a |table |
%%% Invalid: You must use spaces, not TABs
| this | is | not | a | table |
------------------------------------------------------------
The End.

View File

@ -0,0 +1,22 @@
# format=tagmanager
CommentÌ64Ö0
Definition ListÌ64Ö0
ImageÌ64Ö0
InlineÌ64Ö0
LineÌ64Ö0
LinkÌ64Ö0
ListÌ64Ö0
MacroÌ64Ö0
Numbered ListÌ64Ö0
Numbered TitleÌ64Ö0
ParagraphÌ64Ö0
QuoteÌ64Ö0
RawÌ64Ö0
TableÌ64Ö0
TitleÌ64Ö0
Title Level 1Ì64Ö0
Title Level 2Ì64ÎTitle Level 1Ö0
Title Level 3Ì64ÎTitle Level 1Title Level 2Ö0
Title Level 4Ì64ÎTitle Level 1Title Level 2Title Level 3Ö0
Title Level 5Ì64ÎTitle Level 1Title Level 2Title Level 3Title Level 4Ö0
VerbatimÌ64Ö0

225
tests/ctags/sample.t2t Normal file
View File

@ -0,0 +1,225 @@
TXT2TAGS SAMPLE
Aurelio Jargas
%%mtime(%m/%d/%Y)
%!encoding: UTF-8
This text is before the introduction.
But it's OK.
= Introduction =
Welcome to the txt2tags sample file.
Here you have examples and a brief explanation of all
marks.
The first 3 lines of this file are used as headers,
on the following format:
```
line1: document title
line2: author name, email
line3: date, version
```
Lines with balanced equal signs = around are titles.
% a secret comment!
%TODO link to program site http://txt2tags.org
= Fonts and Beautifiers =
We have two sets of fonts:
The NORMAL type that can be improved with beautifiers.
The TYPEWRITER type that uses monospaced font for
pre-formatted text.
We will now enter on a subtitle...
== Beautifiers ==
The text marks for beautifiers are simple, just as you
type on a plain text email message.
We use double *, /, - and _ to represent **bold**,
//italic//, --strike-- and __underline__.
The **//bold italic//** style is also supported as a
combination.
== Pre-Formatted Text ==
We can put a code sample or other pre-formatted text:
```
here is pre-formatted
//marks// are **not** ``interpreted``
```
And also, it's easy to put a one line pre-formatted
text:
``` prompt$ ls /etc
Or use ``pre-formatted`` inside sentences.
== More Cosmetics ==
Special entities like email (duh@somewhere.com) and
URL (http://www.duh.com) are detected automagically,
as long as the horizontal line:
--------------------------------------------------------
^ thin or large v
========================================================
You can also specify an [explicit link http://duh.org]
or an [explicit email duh@somewhere.com] with label.
And remember,
A TAB in front of the line does a quotation.
More TABs, more depth (if allowed).
Nice.
= Lists =
A list of items is natural, just putting a **dash** or
a **plus** at the beginning of the line.
== Plain List ==
The dash is the default list identifier. For sublists,
just add **spaces** at the beginning of the line. More
spaces, more sublists.
- Earth
- America
- South America
- Brazil
- How deep can I go?
- Europe
- Lots of countries
- Mars
- Who knows?
The list ends with **two** consecutive blank lines.
== Numbered List ==
The same rules as the plain list, just a different
identifier (plus).
+ one
+ two
+ three
- mixed lists!
- what a mess
+ counting again
+ ...
+ four
== Definition List ==
The definition list identifier is a colon, followed by
the term. The term contents is placed on the next line.
: orange
a yellow fruit
: apple
a green or red fruit
: other fruits
- wee!
- mixing lists
+ again!
+ and again!
= Tables =
Use pipes to compose table rows and cells.
Double pipe at the line beginning starts a heading row.
Natural spaces specify each cell alignment.
| cell 1.1 | cell 1.2 | cell 1.3 |
| cell 2.1 | cell 2.2 | cell 2.3 |
| cell 3.1 | cell 3.2 | cell 3.3 |
|| heading 1 | heading 2 | heading 3 |
| cell 1.1 | cell 1.2 | cell 1.3 |
| cell 2.1 | cell 2.2 | cell 2.3 |
|_ heading 1 | cell 1.1 | cell 1.2 |
| heading 2 | cell 2.1 | cell 2.2 |
| heading 3 | cell 3.1 | cell 3.2 |
|/ heading | heading 1 | heading 2 |
| heading 1 | cell 1.1 | cell 1.2 |
| heading 2 | cell 2.1 | cell 2.2 |
Without the last pipe, no border:
| cell 1.1 | cell 1.2 | cell 1.3
| cell 2.1 | cell 2.2 | cell 2.3
| cell 3.1 | cell 3.2 | cell 3.3
|| heading 1 | heading 2 | heading 3
| cell 1.1 | cell 1.2 | cell 1.3
| cell 2.1 | cell 2.2 | cell 2.3
|_ heading 1 | cell 1.1 | cell 1.2
| heading 2 | cell 2.1 | cell 2.2
| heading 3 | cell 3.1 | cell 3.2
|/ heading | heading 1 | heading 2
| heading 1 | cell 1.1 | cell 1.2
| heading 2 | cell 2.1 | cell 2.2
= Special Entities =
Because things were too simple.
== Images ==
The image mark is as simple as it can be: ``[filename]``.
[img/photo.jpg]
And with some targets the image is linkable :
[[img/photo.jpg] http://www.txt2tags.org]
- The filename must end in PNG, JPG, GIF, or similar.
- No spaces inside the brackets!
== Other ==
When the target needs, special chars like <, > and &
are escaped.
The handy ``%%date`` macro expands to the current date.
So today is %%date on the ISO ``YYYYMMDD`` format.
You can also specify the date format with the %? flags,
as ``%%date(%m-%d-%Y)`` which gives: %%date(%m-%d-%Y).
That's all for now.
-------------------------------------------------------
%%% TRANSLATOR: Uncomment and translate the next two lines
%Translated by John Smith.
%-------------------------------------------------------
[img/t2tpowered.png] ([%%infile %%infile])
% vim: tw=55

View File

@ -0,0 +1,14 @@
# format=tagmanager
BeautifiersÌ64ÎFonts and BeautifiersÖ0
Definition ListÌ64ÎListsÖ0
Fonts and BeautifiersÌ64Ö0
ImagesÌ64ÎSpecial EntitiesÖ0
IntroductionÌ64Ö0
ListsÌ64Ö0
More CosmeticsÌ64ÎFonts and BeautifiersÖ0
Numbered ListÌ64ÎListsÖ0
OtherÌ64ÎSpecial EntitiesÖ0
Plain ListÌ64ÎListsÖ0
Pre-Formatted TextÌ64ÎFonts and BeautifiersÖ0
Special EntitiesÌ64Ö0
TablesÌ64Ö0

40
tests/ctags/titles.t2t Normal file
View File

@ -0,0 +1,40 @@
% from rules.t2t, with unique title content as our output removed
% duplicate tags (even with different scopes)
%%% Syntax: Balanced equal signs (from 1 to 5)
= First Title Level 1 =
== First Title Level 2 ==
=== First Title Level 3 ===
==== First Title Level 4 ====
===== First Title Level 5 =====
%%% Label: Between brackets, alphanumeric [A-Za-z0-9_-]
= Second Title Level 1 =[lab_el-1]
== Second Title Level 2 ==[lab_el-2]
=== Second Title Level 3 ===[lab_el-3]
==== Second Title Level 4 ====[lab_el-4]
===== Second Title Level 5 =====[lab_el-5]
%%% Syntax: Spaces around and/or inside are allowed (and ignored)
===Third Title Level 3===
=== Fourth Title Level 3 ===
=== Fifth Title Level 3 ===
=== Sixth Title Level 3 ===
=== Seventh Title Level 3 ===
=== Eighth Title Level 3 ===[lab_el-9]
%%% Invalid: Unbalanced equal signs
=First Not Title
==Second Not Title=
===Third Not Title====
%%% Invalid: Level deeper than 5
======First Not Title 6======
=======First Not Title 7=======
%%% Invalid: Space between title and label
=Fourth Not Title= [label1]
%%% Invalid: Space inside label
=Fifth Not Title=[ label ]
%%% Invalid: Strange chars inside label
=Sixth Not Title=[la/bel]

View File

@ -0,0 +1,17 @@
# format=tagmanager
Eighth Title Level 3Ì64ÎSecond Title Level 1Second Title Level 2Ö0
Fifth Title Level 3Ì64ÎSecond Title Level 1Second Title Level 2Ö0
First Title Level 1Ì64Ö0
First Title Level 2Ì64ÎFirst Title Level 1Ö0
First Title Level 3Ì64ÎFirst Title Level 1First Title Level 2Ö0
First Title Level 4Ì64ÎFirst Title Level 1First Title Level 2First Title Level 3Ö0
First Title Level 5Ì64ÎFirst Title Level 1First Title Level 2First Title Level 3First Title Level 4Ö0
Fourth Title Level 3Ì64ÎSecond Title Level 1Second Title Level 2Ö0
Second Title Level 1Ì64Ö0
Second Title Level 2Ì64ÎSecond Title Level 1Ö0
Second Title Level 3Ì64ÎSecond Title Level 1Second Title Level 2Ö0
Second Title Level 4Ì64ÎSecond Title Level 1Second Title Level 2Second Title Level 3Ö0
Second Title Level 5Ì64ÎSecond Title Level 1Second Title Level 2Second Title Level 3Second Title Level 4Ö0
Seventh Title Level 3Ì64ÎSecond Title Level 1Second Title Level 2Ö0
Sixth Title Level 3Ì64ÎSecond Title Level 1Second Title Level 2Ö0
Third Title Level 3Ì64ÎSecond Title Level 1Second Title Level 2Ö0