2017-11-07 00:22:27 -08:00
|
|
|
const std = @import("std");
|
|
|
|
const io = std.io;
|
|
|
|
const os = std.os;
|
2018-01-16 20:19:05 -08:00
|
|
|
const warn = std.debug.warn;
|
|
|
|
const mem = std.mem;
|
|
|
|
|
|
|
|
pub const max_doc_file_size = 10 * 1024 * 1024;
|
2017-11-07 00:22:27 -08:00
|
|
|
|
|
|
|
pub fn main() -> %void {
|
|
|
|
// TODO use a more general purpose allocator here
|
2018-01-16 20:19:05 -08:00
|
|
|
var inc_allocator = try std.heap.IncrementingAllocator.init(max_doc_file_size);
|
2017-11-07 00:22:27 -08:00
|
|
|
defer inc_allocator.deinit();
|
|
|
|
const allocator = &inc_allocator.allocator;
|
|
|
|
|
|
|
|
var args_it = os.args();
|
|
|
|
|
|
|
|
if (!args_it.skip()) @panic("expected self arg");
|
|
|
|
|
2018-01-08 21:07:01 -08:00
|
|
|
const in_file_name = try (args_it.next(allocator) ?? @panic("expected input arg"));
|
2017-11-07 00:22:27 -08:00
|
|
|
defer allocator.free(in_file_name);
|
|
|
|
|
2018-01-08 21:07:01 -08:00
|
|
|
const out_file_name = try (args_it.next(allocator) ?? @panic("expected output arg"));
|
2017-11-07 00:22:27 -08:00
|
|
|
defer allocator.free(out_file_name);
|
|
|
|
|
2018-01-08 21:07:01 -08:00
|
|
|
var in_file = try io.File.openRead(in_file_name, allocator);
|
2017-11-07 00:22:27 -08:00
|
|
|
defer in_file.close();
|
|
|
|
|
2018-01-08 21:07:01 -08:00
|
|
|
var out_file = try io.File.openWrite(out_file_name, allocator);
|
2017-11-07 00:22:27 -08:00
|
|
|
defer out_file.close();
|
|
|
|
|
|
|
|
var file_in_stream = io.FileInStream.init(&in_file);
|
2018-01-16 20:19:05 -08:00
|
|
|
|
|
|
|
const input_file_bytes = try file_in_stream.stream.readAllAlloc(allocator, max_doc_file_size);
|
2017-11-07 00:22:27 -08:00
|
|
|
|
|
|
|
var file_out_stream = io.FileOutStream.init(&out_file);
|
|
|
|
var buffered_out_stream = io.BufferedOutStream.init(&file_out_stream.stream);
|
|
|
|
|
2018-01-16 21:22:33 -08:00
|
|
|
var tokenizer = Tokenizer.init(in_file_name, input_file_bytes);
|
|
|
|
var toc = try genToc(allocator, &tokenizer);
|
2017-11-07 00:22:27 -08:00
|
|
|
|
2018-01-16 21:22:33 -08:00
|
|
|
try genHtml(allocator, &tokenizer, &toc, &buffered_out_stream.stream);
|
2018-01-16 20:19:05 -08:00
|
|
|
try buffered_out_stream.flush();
|
2017-11-07 00:22:27 -08:00
|
|
|
}
|
|
|
|
|
2018-01-16 20:19:05 -08:00
|
|
|
const Token = struct {
|
|
|
|
id: Id,
|
|
|
|
start: usize,
|
|
|
|
end: usize,
|
|
|
|
|
|
|
|
const Id = enum {
|
|
|
|
Invalid,
|
|
|
|
Content,
|
|
|
|
BracketOpen,
|
|
|
|
TagContent,
|
|
|
|
Separator,
|
|
|
|
BracketClose,
|
|
|
|
Eof,
|
|
|
|
};
|
2017-11-07 00:22:27 -08:00
|
|
|
};
|
|
|
|
|
2018-01-16 20:19:05 -08:00
|
|
|
const Tokenizer = struct {
|
|
|
|
buffer: []const u8,
|
|
|
|
index: usize,
|
|
|
|
state: State,
|
|
|
|
source_file_name: []const u8,
|
2017-11-07 00:22:27 -08:00
|
|
|
|
2018-01-16 20:19:05 -08:00
|
|
|
const State = enum {
|
|
|
|
Start,
|
|
|
|
LBracket,
|
|
|
|
Hash,
|
|
|
|
TagName,
|
|
|
|
Eof,
|
|
|
|
};
|
|
|
|
|
|
|
|
fn init(source_file_name: []const u8, buffer: []const u8) -> Tokenizer {
|
|
|
|
return Tokenizer {
|
|
|
|
.buffer = buffer,
|
|
|
|
.index = 0,
|
|
|
|
.state = State.Start,
|
|
|
|
.source_file_name = source_file_name,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
fn next(self: &Tokenizer) -> Token {
|
|
|
|
var result = Token {
|
|
|
|
.id = Token.Id.Eof,
|
|
|
|
.start = self.index,
|
|
|
|
.end = undefined,
|
2017-11-07 00:22:27 -08:00
|
|
|
};
|
2018-01-16 20:19:05 -08:00
|
|
|
while (self.index < self.buffer.len) : (self.index += 1) {
|
|
|
|
const c = self.buffer[self.index];
|
|
|
|
switch (self.state) {
|
|
|
|
State.Start => switch (c) {
|
|
|
|
'{' => {
|
|
|
|
self.state = State.LBracket;
|
|
|
|
},
|
|
|
|
else => {
|
|
|
|
result.id = Token.Id.Content;
|
|
|
|
},
|
|
|
|
},
|
|
|
|
State.LBracket => switch (c) {
|
|
|
|
'#' => {
|
|
|
|
if (result.id != Token.Id.Eof) {
|
|
|
|
self.index -= 1;
|
|
|
|
self.state = State.Start;
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
result.id = Token.Id.BracketOpen;
|
|
|
|
self.index += 1;
|
|
|
|
self.state = State.TagName;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
},
|
|
|
|
else => {
|
|
|
|
result.id = Token.Id.Content;
|
|
|
|
self.state = State.Start;
|
|
|
|
},
|
|
|
|
},
|
|
|
|
State.TagName => switch (c) {
|
|
|
|
'|' => {
|
|
|
|
if (result.id != Token.Id.Eof) {
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
result.id = Token.Id.Separator;
|
|
|
|
self.index += 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
},
|
|
|
|
'#' => {
|
|
|
|
self.state = State.Hash;
|
|
|
|
},
|
|
|
|
else => {
|
|
|
|
result.id = Token.Id.TagContent;
|
|
|
|
},
|
|
|
|
},
|
|
|
|
State.Hash => switch (c) {
|
|
|
|
'}' => {
|
|
|
|
if (result.id != Token.Id.Eof) {
|
|
|
|
self.index -= 1;
|
|
|
|
self.state = State.TagName;
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
result.id = Token.Id.BracketClose;
|
|
|
|
self.index += 1;
|
|
|
|
self.state = State.Start;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
},
|
|
|
|
else => {
|
|
|
|
result.id = Token.Id.TagContent;
|
|
|
|
self.state = State.TagName;
|
|
|
|
},
|
|
|
|
},
|
|
|
|
State.Eof => unreachable,
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch (self.state) {
|
|
|
|
State.Start, State.LBracket, State.Eof => {},
|
2017-11-07 00:22:27 -08:00
|
|
|
else => {
|
2018-01-16 20:19:05 -08:00
|
|
|
result.id = Token.Id.Invalid;
|
2017-11-07 00:22:27 -08:00
|
|
|
},
|
2018-01-16 20:19:05 -08:00
|
|
|
}
|
|
|
|
self.state = State.Eof;
|
|
|
|
}
|
|
|
|
result.end = self.index;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
const Location = struct {
|
|
|
|
line: usize,
|
|
|
|
column: usize,
|
|
|
|
line_start: usize,
|
|
|
|
line_end: usize,
|
|
|
|
};
|
|
|
|
|
|
|
|
fn getTokenLocation(self: &Tokenizer, token: &const Token) -> Location {
|
|
|
|
var loc = Location {
|
|
|
|
.line = 0,
|
|
|
|
.column = 0,
|
|
|
|
.line_start = 0,
|
|
|
|
.line_end = 0,
|
|
|
|
};
|
|
|
|
for (self.buffer) |c, i| {
|
|
|
|
if (i == token.start) {
|
|
|
|
loc.line_end = i;
|
|
|
|
while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
|
|
|
|
return loc;
|
|
|
|
}
|
|
|
|
if (c == '\n') {
|
|
|
|
loc.line += 1;
|
|
|
|
loc.column = 0;
|
|
|
|
loc.line_start = i + 1;
|
|
|
|
} else {
|
|
|
|
loc.column += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return loc;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
error ParseError;
|
|
|
|
|
|
|
|
fn parseError(tokenizer: &Tokenizer, token: &const Token, comptime fmt: []const u8, args: ...) -> error {
|
|
|
|
const loc = tokenizer.getTokenLocation(token);
|
|
|
|
warn("{}:{}:{}: error: " ++ fmt ++ "\n", tokenizer.source_file_name, loc.line + 1, loc.column + 1, args);
|
|
|
|
if (loc.line_start <= loc.line_end) {
|
|
|
|
warn("{}\n", tokenizer.buffer[loc.line_start..loc.line_end]);
|
|
|
|
{
|
|
|
|
var i: usize = 0;
|
|
|
|
while (i < loc.column) : (i += 1) {
|
|
|
|
warn(" ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
{
|
|
|
|
const caret_count = token.end - token.start;
|
|
|
|
var i: usize = 0;
|
|
|
|
while (i < caret_count) : (i += 1) {
|
|
|
|
warn("~");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
warn("\n");
|
|
|
|
}
|
|
|
|
return error.ParseError;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn assertToken(tokenizer: &Tokenizer, token: &const Token, id: Token.Id) -> %void {
|
|
|
|
if (token.id != id) {
|
|
|
|
return parseError(tokenizer, token, "expected {}, found {}", @tagName(id), @tagName(token.id));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn eatToken(tokenizer: &Tokenizer, id: Token.Id) -> %Token {
|
|
|
|
const token = tokenizer.next();
|
|
|
|
try assertToken(tokenizer, token, id);
|
|
|
|
return token;
|
|
|
|
}
|
|
|
|
|
|
|
|
const HeaderOpen = struct {
|
|
|
|
name: []const u8,
|
|
|
|
url: []const u8,
|
|
|
|
n: usize,
|
|
|
|
};
|
|
|
|
|
2018-01-16 21:22:33 -08:00
|
|
|
const SeeAlsoItem = struct {
|
|
|
|
name: []const u8,
|
|
|
|
token: Token,
|
2018-01-16 20:19:05 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
const Node = union(enum) {
|
|
|
|
Content: []const u8,
|
|
|
|
Nav,
|
|
|
|
HeaderOpen: HeaderOpen,
|
2018-01-16 21:22:33 -08:00
|
|
|
SeeAlso: []const SeeAlsoItem,
|
2018-01-16 20:19:05 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
const Toc = struct {
|
|
|
|
nodes: []Node,
|
|
|
|
toc: []u8,
|
2018-01-16 21:22:33 -08:00
|
|
|
urls: std.HashMap([]const u8, Token, mem.hash_slice_u8, mem.eql_slice_u8),
|
2018-01-16 20:19:05 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
const Action = enum {
|
|
|
|
Open,
|
|
|
|
Close,
|
|
|
|
};
|
|
|
|
|
2018-01-16 21:22:33 -08:00
|
|
|
fn genToc(allocator: &mem.Allocator, tokenizer: &Tokenizer) -> %Toc {
|
2018-01-16 20:19:05 -08:00
|
|
|
var urls = std.HashMap([]const u8, Token, mem.hash_slice_u8, mem.eql_slice_u8).init(allocator);
|
2018-01-16 21:22:33 -08:00
|
|
|
%defer urls.deinit();
|
2018-01-16 20:19:05 -08:00
|
|
|
|
|
|
|
var header_stack_size: usize = 0;
|
|
|
|
var last_action = Action.Open;
|
|
|
|
|
|
|
|
var toc_buf = try std.Buffer.initSize(allocator, 0);
|
|
|
|
defer toc_buf.deinit();
|
|
|
|
|
|
|
|
var toc_buf_adapter = io.BufferOutStream.init(&toc_buf);
|
|
|
|
var toc = &toc_buf_adapter.stream;
|
|
|
|
|
|
|
|
var nodes = std.ArrayList(Node).init(allocator);
|
|
|
|
defer nodes.deinit();
|
|
|
|
|
|
|
|
try toc.writeByte('\n');
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
const token = tokenizer.next();
|
|
|
|
switch (token.id) {
|
|
|
|
Token.Id.Eof => {
|
|
|
|
if (header_stack_size != 0) {
|
2018-01-16 21:22:33 -08:00
|
|
|
return parseError(tokenizer, token, "unbalanced headers");
|
2018-01-16 20:19:05 -08:00
|
|
|
}
|
|
|
|
try toc.write(" </ul>\n");
|
|
|
|
break;
|
|
|
|
},
|
|
|
|
Token.Id.Content => {
|
2018-01-16 21:22:33 -08:00
|
|
|
try nodes.append(Node {.Content = tokenizer.buffer[token.start..token.end] });
|
2018-01-16 20:19:05 -08:00
|
|
|
},
|
|
|
|
Token.Id.BracketOpen => {
|
2018-01-16 21:22:33 -08:00
|
|
|
const tag_token = try eatToken(tokenizer, Token.Id.TagContent);
|
|
|
|
const tag_name = tokenizer.buffer[tag_token.start..tag_token.end];
|
2018-01-16 20:19:05 -08:00
|
|
|
|
|
|
|
if (mem.eql(u8, tag_name, "nav")) {
|
2018-01-16 21:22:33 -08:00
|
|
|
_ = eatToken(tokenizer, Token.Id.BracketClose);
|
|
|
|
|
|
|
|
try nodes.append(Node.Nav);
|
2018-01-16 20:19:05 -08:00
|
|
|
} else if (mem.eql(u8, tag_name, "header_open")) {
|
2018-01-16 21:22:33 -08:00
|
|
|
_ = eatToken(tokenizer, Token.Id.Separator);
|
|
|
|
const content_token = try eatToken(tokenizer, Token.Id.TagContent);
|
|
|
|
const content = tokenizer.buffer[content_token.start..content_token.end];
|
|
|
|
_ = eatToken(tokenizer, Token.Id.BracketClose);
|
|
|
|
|
2018-01-16 20:19:05 -08:00
|
|
|
header_stack_size += 1;
|
2018-01-16 21:22:33 -08:00
|
|
|
|
|
|
|
const urlized = try urlize(allocator, content);
|
|
|
|
try nodes.append(Node{.HeaderOpen = HeaderOpen {
|
|
|
|
.name = content,
|
|
|
|
.url = urlized,
|
|
|
|
.n = header_stack_size,
|
|
|
|
}});
|
|
|
|
if (try urls.put(urlized, tag_token)) |other_tag_token| {
|
|
|
|
parseError(tokenizer, tag_token, "duplicate header url: #{}", urlized) catch {};
|
|
|
|
parseError(tokenizer, other_tag_token, "other tag here") catch {};
|
|
|
|
return error.ParseError;
|
|
|
|
}
|
|
|
|
if (last_action == Action.Open) {
|
|
|
|
try toc.writeByte('\n');
|
|
|
|
try toc.writeByteNTimes(' ', header_stack_size * 4);
|
|
|
|
try toc.write("<ul>\n");
|
|
|
|
} else {
|
|
|
|
last_action = Action.Open;
|
|
|
|
}
|
|
|
|
try toc.writeByteNTimes(' ', 4 + header_stack_size * 4);
|
|
|
|
try toc.print("<li><a href=\"#{}\">{}</a>", urlized, content);
|
2018-01-16 20:19:05 -08:00
|
|
|
} else if (mem.eql(u8, tag_name, "header_close")) {
|
|
|
|
if (header_stack_size == 0) {
|
2018-01-16 21:22:33 -08:00
|
|
|
return parseError(tokenizer, tag_token, "unbalanced close header");
|
2018-01-16 20:19:05 -08:00
|
|
|
}
|
|
|
|
header_stack_size -= 1;
|
2018-01-16 21:22:33 -08:00
|
|
|
_ = eatToken(tokenizer, Token.Id.BracketClose);
|
|
|
|
|
|
|
|
if (last_action == Action.Close) {
|
|
|
|
try toc.writeByteNTimes(' ', 8 + header_stack_size * 4);
|
|
|
|
try toc.write("</ul></li>\n");
|
|
|
|
} else {
|
|
|
|
try toc.write("</li>\n");
|
|
|
|
last_action = Action.Close;
|
|
|
|
}
|
|
|
|
} else if (mem.eql(u8, tag_name, "see_also")) {
|
|
|
|
var list = std.ArrayList(SeeAlsoItem).init(allocator);
|
|
|
|
%defer list.deinit();
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
const see_also_tok = tokenizer.next();
|
|
|
|
switch (see_also_tok.id) {
|
|
|
|
Token.Id.TagContent => {
|
|
|
|
const content = tokenizer.buffer[see_also_tok.start..see_also_tok.end];
|
|
|
|
try list.append(SeeAlsoItem {
|
|
|
|
.name = content,
|
|
|
|
.token = see_also_tok,
|
|
|
|
});
|
|
|
|
},
|
|
|
|
Token.Id.Separator => {},
|
|
|
|
Token.Id.BracketClose => {
|
|
|
|
try nodes.append(Node {.SeeAlso = list.toOwnedSlice() } );
|
|
|
|
break;
|
|
|
|
},
|
|
|
|
else => return parseError(tokenizer, see_also_tok, "invalid see_also token"),
|
2018-01-16 20:19:05 -08:00
|
|
|
}
|
2018-01-16 21:22:33 -08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return parseError(tokenizer, tag_token, "unrecognized tag name: {}", tag_name);
|
2018-01-16 20:19:05 -08:00
|
|
|
}
|
|
|
|
},
|
2018-01-16 21:22:33 -08:00
|
|
|
else => return parseError(tokenizer, token, "invalid token"),
|
2018-01-16 20:19:05 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Toc {
|
|
|
|
.nodes = nodes.toOwnedSlice(),
|
|
|
|
.toc = toc_buf.toOwnedSlice(),
|
2018-01-16 21:22:33 -08:00
|
|
|
.urls = urls,
|
2018-01-16 20:19:05 -08:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
fn urlize(allocator: &mem.Allocator, input: []const u8) -> %[]u8 {
|
|
|
|
var buf = try std.Buffer.initSize(allocator, 0);
|
|
|
|
defer buf.deinit();
|
|
|
|
|
|
|
|
var buf_adapter = io.BufferOutStream.init(&buf);
|
|
|
|
var out = &buf_adapter.stream;
|
|
|
|
for (input) |c| {
|
|
|
|
switch (c) {
|
|
|
|
'a'...'z', 'A'...'Z', '_', '-' => {
|
|
|
|
try out.writeByte(c);
|
|
|
|
},
|
|
|
|
' ' => {
|
|
|
|
try out.writeByte('-');
|
|
|
|
},
|
|
|
|
else => {},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return buf.toOwnedSlice();
|
|
|
|
}
|
|
|
|
|
2018-01-16 21:22:33 -08:00
|
|
|
fn genHtml(allocator: &mem.Allocator, tokenizer: &Tokenizer, toc: &Toc, out: &io.OutStream) -> %void {
|
2018-01-16 20:19:05 -08:00
|
|
|
for (toc.nodes) |node| {
|
|
|
|
switch (node) {
|
|
|
|
Node.Content => |data| {
|
|
|
|
try out.write(data);
|
|
|
|
},
|
|
|
|
Node.Nav => {
|
|
|
|
try out.write(toc.toc);
|
|
|
|
},
|
|
|
|
Node.HeaderOpen => |info| {
|
|
|
|
try out.print("<h{} id=\"{}\">{}</h{}>\n", info.n, info.url, info.name, info.n);
|
2017-11-07 00:22:27 -08:00
|
|
|
},
|
2018-01-16 21:22:33 -08:00
|
|
|
Node.SeeAlso => |items| {
|
|
|
|
try out.write("<p>See also:</p><ul>\n");
|
|
|
|
for (items) |item| {
|
|
|
|
const url = try urlize(allocator, item.name);
|
|
|
|
if (!toc.urls.contains(url)) {
|
|
|
|
return parseError(tokenizer, item.token, "url not found: {}", url);
|
|
|
|
}
|
|
|
|
try out.print("<li><a href=\"#{}\">{}</a></li>\n", url, item.name);
|
|
|
|
}
|
|
|
|
try out.write("</ul>\n");
|
|
|
|
},
|
2017-11-07 00:22:27 -08:00
|
|
|
}
|
|
|
|
}
|
2018-01-16 20:19:05 -08:00
|
|
|
|
2017-11-07 00:22:27 -08:00
|
|
|
}
|