Copy source to commonmarker

master
Melroy van den Berg 2020-11-13 01:57:29 +01:00
parent 8494daf274
commit 7921c28183
54 changed files with 28532 additions and 0 deletions

103
lib/commonmarker/arena.c Normal file
View File

@ -0,0 +1,103 @@
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "cmark-gfm.h"
#include "cmark-gfm-extension_api.h"
static struct arena_chunk {
size_t sz, used;
uint8_t push_point;
void *ptr;
struct arena_chunk *prev;
} *A = NULL;
static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c));
if (!c)
abort();
c->sz = sz;
c->ptr = calloc(1, sz);
if (!c->ptr)
abort();
c->prev = prev;
return c;
}
void cmark_arena_push(void) {
if (!A)
return;
A->push_point = 1;
A = alloc_arena_chunk(10240, A);
}
int cmark_arena_pop(void) {
if (!A)
return 0;
while (A && !A->push_point) {
free(A->ptr);
struct arena_chunk *n = A->prev;
free(A);
A = n;
}
if (A)
A->push_point = 0;
return 1;
}
static void init_arena(void) {
A = alloc_arena_chunk(4 * 1048576, NULL);
}
void cmark_arena_reset(void) {
while (A) {
free(A->ptr);
struct arena_chunk *n = A->prev;
free(A);
A = n;
}
}
static void *arena_calloc(size_t nmem, size_t size) {
if (!A)
init_arena();
size_t sz = nmem * size + sizeof(size_t);
// Round allocation sizes to largest integer size to
// ensure returned memory is correctly aligned
const size_t align = sizeof(size_t) - 1;
sz = (sz + align) & ~align;
if (sz > A->sz) {
A->prev = alloc_arena_chunk(sz, A->prev);
return (uint8_t *) A->prev->ptr + sizeof(size_t);
}
if (sz > A->sz - A->used) {
A = alloc_arena_chunk(A->sz + A->sz / 2, A);
}
void *ptr = (uint8_t *) A->ptr + A->used;
A->used += sz;
*((size_t *) ptr) = sz - sizeof(size_t);
return (uint8_t *) ptr + sizeof(size_t);
}
static void *arena_realloc(void *ptr, size_t size) {
if (!A)
init_arena();
void *new_ptr = arena_calloc(1, size);
if (ptr)
memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
return new_ptr;
}
static void arena_free(void *ptr) {
(void) ptr;
/* no-op */
}
cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
cmark_mem *cmark_get_arena_mem_allocator() {
return &CMARK_ARENA_MEM_ALLOCATOR;
}

1585
lib/commonmarker/blocks.c Normal file

File diff suppressed because it is too large Load Diff

278
lib/commonmarker/buffer.c Normal file
View File

@ -0,0 +1,278 @@
#include <stdarg.h>
#include <string.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>
#include "config.h"
#include "cmark_ctype.h"
#include "buffer.h"
/* Used as default value for cmark_strbuf->ptr so that people can always
* assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
*/
unsigned char cmark_strbuf__initbuf[1];
#ifndef MIN
#define MIN(x, y) ((x < y) ? x : y)
#endif
void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
bufsize_t initial_size) {
buf->mem = mem;
buf->asize = 0;
buf->size = 0;
buf->ptr = cmark_strbuf__initbuf;
if (initial_size > 0)
cmark_strbuf_grow(buf, initial_size);
}
static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
cmark_strbuf_grow(buf, buf->size + add);
}
void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
assert(target_size > 0);
if (target_size < buf->asize)
return;
if (target_size > (bufsize_t)(INT32_MAX / 2)) {
fprintf(stderr,
"[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n",
(INT32_MAX / 2));
abort();
}
/* Oversize the buffer by 50% to guarantee amortized linear time
* complexity on append operations. */
bufsize_t new_size = target_size + target_size / 2;
new_size += 1;
new_size = (new_size + 7) & ~7;
buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
new_size);
buf->asize = new_size;
}
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
void cmark_strbuf_free(cmark_strbuf *buf) {
if (!buf)
return;
if (buf->ptr != cmark_strbuf__initbuf)
buf->mem->free(buf->ptr);
cmark_strbuf_init(buf->mem, buf, 0);
}
void cmark_strbuf_clear(cmark_strbuf *buf) {
buf->size = 0;
if (buf->asize > 0)
buf->ptr[0] = '\0';
}
void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len) {
if (len <= 0 || data == NULL) {
cmark_strbuf_clear(buf);
} else {
if (data != buf->ptr) {
if (len >= buf->asize)
cmark_strbuf_grow(buf, len);
memmove(buf->ptr, data, len);
}
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) {
cmark_strbuf_set(buf, (const unsigned char *)string,
string ? (bufsize_t)strlen(string) : 0);
}
void cmark_strbuf_putc(cmark_strbuf *buf, int c) {
S_strbuf_grow_by(buf, 1);
buf->ptr[buf->size++] = (unsigned char)(c & 0xFF);
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len) {
if (len <= 0)
return;
S_strbuf_grow_by(buf, len);
memmove(buf->ptr + buf->size, data, len);
buf->size += len;
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) {
cmark_strbuf_put(buf, (const unsigned char *)string, (bufsize_t)strlen(string));
}
void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
const cmark_strbuf *buf) {
bufsize_t copylen;
assert(buf);
if (!data || datasize <= 0)
return;
data[0] = '\0';
if (buf->size == 0 || buf->asize <= 0)
return;
copylen = buf->size;
if (copylen > datasize - 1)
copylen = datasize - 1;
memmove(data, buf->ptr, copylen);
data[copylen] = '\0';
}
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) {
cmark_strbuf t = *buf_a;
*buf_a = *buf_b;
*buf_b = t;
}
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) {
unsigned char *data = buf->ptr;
if (buf->asize == 0) {
/* return an empty string */
return (unsigned char *)buf->mem->calloc(1, 1);
}
cmark_strbuf_init(buf->mem, buf, 0);
return data;
}
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) {
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
return (result != 0) ? result
: (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}
bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
if (pos >= buf->size)
return -1;
if (pos < 0)
pos = 0;
const unsigned char *p =
(unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
if (!p)
return -1;
return (bufsize_t)(p - (const unsigned char *)buf->ptr);
}
bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
if (pos < 0 || buf->size == 0)
return -1;
if (pos >= buf->size)
pos = buf->size - 1;
bufsize_t i;
for (i = pos; i >= 0; i--) {
if (buf->ptr[i] == (unsigned char)c)
return i;
}
return -1;
}
void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) {
if (len < 0)
len = 0;
if (len < buf->size) {
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) {
if (n > 0) {
if (n > buf->size)
n = buf->size;
buf->size = buf->size - n;
if (buf->size)
memmove(buf->ptr, buf->ptr + n, buf->size);
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_rtrim(cmark_strbuf *buf) {
if (!buf->size)
return;
while (buf->size > 0) {
if (!cmark_isspace(buf->ptr[buf->size - 1]))
break;
buf->size--;
}
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_trim(cmark_strbuf *buf) {
bufsize_t i = 0;
if (!buf->size)
return;
while (i < buf->size && cmark_isspace(buf->ptr[i]))
i++;
cmark_strbuf_drop(buf, i);
cmark_strbuf_rtrim(buf);
}
// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) {
bool last_char_was_space = false;
bufsize_t r, w;
for (r = 0, w = 0; r < s->size; ++r) {
if (cmark_isspace(s->ptr[r])) {
if (!last_char_was_space) {
s->ptr[w++] = ' ';
last_char_was_space = true;
}
} else {
s->ptr[w++] = s->ptr[r];
last_char_was_space = false;
}
}
cmark_strbuf_truncate(s, w);
}
// Destructively unescape a string: remove backslashes before punctuation chars.
extern void cmark_strbuf_unescape(cmark_strbuf *buf) {
bufsize_t r, w;
for (r = 0, w = 0; r < buf->size; ++r) {
if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
r++;
buf->ptr[w++] = buf->ptr[r];
}
cmark_strbuf_truncate(buf, w);
}

116
lib/commonmarker/buffer.h Normal file
View File

@ -0,0 +1,116 @@
#ifndef CMARK_BUFFER_H
#define CMARK_BUFFER_H
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#include "config.h"
#include "cmark-gfm.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
cmark_mem *mem;
unsigned char *ptr;
bufsize_t asize, size;
} cmark_strbuf;
extern unsigned char cmark_strbuf__initbuf[];
#define CMARK_BUF_INIT(mem) \
{ mem, cmark_strbuf__initbuf, 0, 0 }
/**
* Initialize a cmark_strbuf structure.
*
* For the cases where CMARK_BUF_INIT cannot be used to do static
* initialization.
*/
CMARK_GFM_EXPORT
void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
bufsize_t initial_size);
/**
* Grow the buffer to hold at least `target_size` bytes.
*/
CMARK_GFM_EXPORT
void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
CMARK_GFM_EXPORT
void cmark_strbuf_free(cmark_strbuf *buf);
CMARK_GFM_EXPORT
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
CMARK_GFM_EXPORT
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf);
CMARK_GFM_EXPORT
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
CMARK_GFM_EXPORT
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
CMARK_GFM_EXPORT
void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
const cmark_strbuf *buf);
static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) {
return (char *)buf->ptr;
}
#define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
CMARK_GFM_EXPORT
void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len);
CMARK_GFM_EXPORT
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
CMARK_GFM_EXPORT
void cmark_strbuf_putc(cmark_strbuf *buf, int c);
CMARK_GFM_EXPORT
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len);
CMARK_GFM_EXPORT
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
CMARK_GFM_EXPORT
void cmark_strbuf_clear(cmark_strbuf *buf);
CMARK_GFM_EXPORT
bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos);
CMARK_GFM_EXPORT
bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos);
CMARK_GFM_EXPORT
void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n);
CMARK_GFM_EXPORT
void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len);
CMARK_GFM_EXPORT
void cmark_strbuf_rtrim(cmark_strbuf *buf);
CMARK_GFM_EXPORT
void cmark_strbuf_trim(cmark_strbuf *buf);
CMARK_GFM_EXPORT
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
CMARK_GFM_EXPORT
void cmark_strbuf_unescape(cmark_strbuf *s);
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

135
lib/commonmarker/chunk.h Normal file
View File

@ -0,0 +1,135 @@
#ifndef CMARK_CHUNK_H
#define CMARK_CHUNK_H
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "cmark-gfm.h"
#include "buffer.h"
#include "cmark_ctype.h"
#define CMARK_CHUNK_EMPTY \
{ NULL, 0, 0 }
typedef struct cmark_chunk {
unsigned char *data;
bufsize_t len;
bufsize_t alloc; // also implies a NULL-terminated string
} cmark_chunk;
static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
if (c->alloc)
mem->free(c->data);
c->data = NULL;
c->alloc = 0;
c->len = 0;
}
static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) {
assert(!c->alloc);
while (c->len && cmark_isspace(c->data[0])) {
c->data++;
c->len--;
}
}
static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) {
assert(!c->alloc);
while (c->len > 0) {
if (!cmark_isspace(c->data[c->len - 1]))
break;
c->len--;
}
}
static CMARK_INLINE void cmark_chunk_trim(cmark_chunk *c) {
cmark_chunk_ltrim(c);
cmark_chunk_rtrim(c);
}
static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c,
bufsize_t offset) {
const unsigned char *p =
(unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
return p ? (bufsize_t)(p - ch->data) : ch->len;
}
static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_mem *mem,
cmark_chunk *c) {
unsigned char *str;
if (c->alloc) {
return (char *)c->data;
}
str = (unsigned char *)mem->calloc(c->len + 1, 1);
if (c->len > 0) {
memcpy(str, c->data, c->len);
}
str[c->len] = 0;
c->data = str;
c->alloc = 1;
return (char *)str;
}
static CMARK_INLINE void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c,
const char *str) {
unsigned char *old = c->alloc ? c->data : NULL;
if (str == NULL) {
c->len = 0;
c->data = NULL;
c->alloc = 0;
} else {
c->len = (bufsize_t)strlen(str);
c->data = (unsigned char *)mem->calloc(c->len + 1, 1);
c->alloc = 1;
memcpy(c->data, str, c->len + 1);
}
if (old != NULL) {
mem->free(old);
}
}
static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) {
bufsize_t len = data ? (bufsize_t)strlen(data) : 0;
cmark_chunk c = {(unsigned char *)data, len, 0};
return c;
}
static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch,
bufsize_t pos, bufsize_t len) {
cmark_chunk c = {ch->data + pos, len, 0};
return c;
}
static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) {
cmark_chunk c;
c.len = buf->size;
c.data = cmark_strbuf_detach(buf);
c.alloc = 1;
return c;
}
/* trim_new variants are to be used when the source chunk may or may not be
* allocated; forces a newly allocated chunk. */
static CMARK_INLINE cmark_chunk cmark_chunk_ltrim_new(cmark_mem *mem, cmark_chunk *c) {
cmark_chunk r = cmark_chunk_dup(c, 0, c->len);
cmark_chunk_ltrim(&r);
cmark_chunk_to_cstr(mem, &r);
return r;
}
static CMARK_INLINE cmark_chunk cmark_chunk_rtrim_new(cmark_mem *mem, cmark_chunk *c) {
cmark_chunk r = cmark_chunk_dup(c, 0, c->len);
cmark_chunk_rtrim(&r);
cmark_chunk_to_cstr(mem, &r);
return r;
}
#endif

View File

@ -0,0 +1,736 @@
#ifndef CMARK_GFM_EXTENSION_API_H
#define CMARK_GFM_EXTENSION_API_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cmark-gfm.h"
struct cmark_renderer;
struct cmark_html_renderer;
struct cmark_chunk;
/**
* ## Extension Support
*
* While the "core" of libcmark is strictly compliant with the
* specification, an API is provided for extension writers to
* hook into the parsing process.
*
* It should be noted that the cmark_node API already offers
* room for customization, with methods offered to traverse and
* modify the AST, and even define custom blocks.
* When the desired customization is achievable in an error-proof
* way using that API, it should be the preferred method.
*
* The following API requires a more in-depth understanding
* of libcmark's parsing strategy, which is exposed
* [here](http://spec.commonmark.org/0.24/#appendix-a-parsing-strategy).
*
* It should be used when "a posteriori" modification of the AST
* proves to be too difficult / impossible to implement correctly.
*
* It can also serve as an intermediary step before extending
* the specification, as an extension implemented using this API
* will be trivially integrated in the core if it proves to be
* desirable.
*/
typedef struct cmark_plugin cmark_plugin;
/** A syntax extension that can be attached to a cmark_parser
* with cmark_parser_attach_syntax_extension().
*
* Extension writers should assign functions matching
* the signature of the following 'virtual methods' to
* implement new functionality.
*
* Their calling order and expected behaviour match the procedure outlined
* at <http://spec.commonmark.org/0.24/#phase-1-block-structure>:
*
* During step 1, cmark will call the function provided through
* 'cmark_syntax_extension_set_match_block_func' when it
* iterates over an open block created by this extension,
* to determine whether it could contain the new line.
* If no function was provided, cmark will close the block.
*
* During step 2, if and only if the new line doesn't match any
* of the standard syntax rules, cmark will call the function
* provided through 'cmark_syntax_extension_set_open_block_func'
* to let the extension determine whether that new line matches
* one of its syntax rules.
* It is the responsibility of the parser to create and add the
* new block with cmark_parser_make_block and cmark_parser_add_child.
* If no function was provided is NULL, the extension will have
* no effect at all on the final block structure of the AST.
*
* #### Inline parsing phase hooks
*
* For each character provided by the extension through
* 'cmark_syntax_extension_set_special_inline_chars',
* the function provided by the extension through
* 'cmark_syntax_extension_set_match_inline_func'
* will get called, it is the responsibility of the extension
* to scan the characters located at the current inline parsing offset
* with the cmark_inline_parser API.
*
* Depending on the type of the extension, it can either:
*
* * Scan forward, determine that the syntax matches and return
* a newly-created inline node with the appropriate type.
* This is the technique that would be used if inline code
* (with backticks) was implemented as an extension.
* * Scan only the character(s) that its syntax rules require
* for opening and closing nodes, push a delimiter on the
* delimiter stack, and return a simple text node with its
* contents set to the character(s) consumed.
* This is the technique that would be used if emphasis
* inlines were implemented as an extension.
*
* When an extension has pushed delimiters on the stack,
* the function provided through
* 'cmark_syntax_extension_set_inline_from_delim_func'
* will get called in a latter phase,
* when the inline parser has matched opener and closer delimiters
* created by the extension together.
*
* It is then the responsibility of the extension to modify
* and populate the opener inline text node, and to remove
* the necessary delimiters from the delimiter stack.
*
* Finally, the extension should return NULL if its scan didn't
* match its syntax rules.
*
* The extension can store whatever private data it might need
* with 'cmark_syntax_extension_set_private',
* and optionally define a free function for this data.
*/
typedef struct subject cmark_inline_parser;
/** Exposed raw for now */
typedef struct delimiter {
struct delimiter *previous;
struct delimiter *next;
cmark_node *inl_text;
bufsize_t length;
unsigned char delim_char;
int can_open;
int can_close;
} delimiter;
/**
* ### Plugin API.
*
* Extensions should be distributed as dynamic libraries,
* with a single exported function named after the distributed
* filename.
*
* When discovering extensions (see cmark_init), cmark will
* try to load a symbol named "init_{{filename}}" in all the
* dynamic libraries it encounters.
*
* For example, given a dynamic library named myextension.so
* (or myextension.dll), cmark will try to load the symbol
* named "init_myextension". This means that the filename
* must lend itself to forming a valid C identifier, with
* the notable exception of dashes, which will be translated
* to underscores, which means cmark will look for a function
* named "init_my_extension" if it encounters a dynamic library
* named "my-extension.so".
*
* See the 'cmark_plugin_init_func' typedef for the exact prototype
* this function should follow.
*
* For now the extensibility of cmark is not complete, as
* it only offers API to hook into the block parsing phase
* (<http://spec.commonmark.org/0.24/#phase-1-block-structure>).
*
* See 'cmark_plugin_register_syntax_extension' for more information.
*/
/** The prototype plugins' init function should follow.
*/
typedef int (*cmark_plugin_init_func)(cmark_plugin *plugin);
/** Register a syntax 'extension' with the 'plugin', it will be made
* available as an extension and, if attached to a cmark_parser
* with 'cmark_parser_attach_syntax_extension', it will contribute
* to the block parsing process.
*
* See the documentation for 'cmark_syntax_extension' for information
* on how to implement one.
*
* This function will typically be called from the init function
* of external modules.
*
* This takes ownership of 'extension', one should not call
* 'cmark_syntax_extension_free' on a registered extension.
*/
CMARK_GFM_EXPORT
int cmark_plugin_register_syntax_extension(cmark_plugin *plugin,
cmark_syntax_extension *extension);
/** This will search for the syntax extension named 'name' among the
* registered syntax extensions.
*
* It can then be attached to a cmark_parser
* with the cmark_parser_attach_syntax_extension method.
*/
CMARK_GFM_EXPORT
cmark_syntax_extension *cmark_find_syntax_extension(const char *name);
/** Should create and add a new open block to 'parent_container' if
* 'input' matches a syntax rule for that block type. It is allowed
* to modify the type of 'parent_container'.
*
* Should return the newly created block if there is one, or
* 'parent_container' if its type was modified, or NULL.
*/
typedef cmark_node * (*cmark_open_block_func) (cmark_syntax_extension *extension,
int indented,
cmark_parser *parser,
cmark_node *parent_container,
unsigned char *input,
int len);
typedef cmark_node *(*cmark_match_inline_func)(cmark_syntax_extension *extension,
cmark_parser *parser,
cmark_node *parent,
unsigned char character,
cmark_inline_parser *inline_parser);
typedef delimiter *(*cmark_inline_from_delim_func)(cmark_syntax_extension *extension,
cmark_parser *parser,
cmark_inline_parser *inline_parser,
delimiter *opener,
delimiter *closer);
/** Should return 'true' if 'input' can be contained in 'container',
* 'false' otherwise.
*/
typedef int (*cmark_match_block_func) (cmark_syntax_extension *extension,
cmark_parser *parser,
unsigned char *input,
int len,
cmark_node *container);
typedef const char *(*cmark_get_type_string_func) (cmark_syntax_extension *extension,
cmark_node *node);
typedef int (*cmark_can_contain_func) (cmark_syntax_extension *extension,
cmark_node *node,
cmark_node_type child);
typedef int (*cmark_contains_inlines_func) (cmark_syntax_extension *extension,
cmark_node *node);
typedef void (*cmark_common_render_func) (cmark_syntax_extension *extension,
struct cmark_renderer *renderer,
cmark_node *node,
cmark_event_type ev_type,
int options);
typedef int (*cmark_commonmark_escape_func) (cmark_syntax_extension *extension,
cmark_node *node,
int c);
typedef const char* (*cmark_xml_attr_func) (cmark_syntax_extension *extension,
cmark_node *node);
typedef void (*cmark_html_render_func) (cmark_syntax_extension *extension,
struct cmark_html_renderer *renderer,
cmark_node *node,
cmark_event_type ev_type,
int options);
typedef int (*cmark_html_filter_func) (cmark_syntax_extension *extension,
const unsigned char *tag,
size_t tag_len);
typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension,
cmark_parser *parser,
cmark_node *root);
typedef int (*cmark_ispunct_func) (char c);
typedef void (*cmark_opaque_alloc_func) (cmark_syntax_extension *extension,
cmark_mem *mem,
cmark_node *node);
typedef void (*cmark_opaque_free_func) (cmark_syntax_extension *extension,
cmark_mem *mem,
cmark_node *node);
/** Free a cmark_syntax_extension.
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_free (cmark_mem *mem, cmark_syntax_extension *extension);
/** Return a newly-constructed cmark_syntax_extension, named 'name'.
*/
CMARK_GFM_EXPORT
cmark_syntax_extension *cmark_syntax_extension_new (const char *name);
CMARK_GFM_EXPORT
cmark_node_type cmark_syntax_extension_add_node(int is_inline);
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, int emphasis);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension,
cmark_open_block_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension,
cmark_match_block_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension,
cmark_match_inline_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension,
cmark_inline_from_delim_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension,
cmark_llist *special_chars);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension,
cmark_get_type_string_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension,
cmark_can_contain_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension,
cmark_contains_inlines_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_xml_attr_func(cmark_syntax_extension *extension,
cmark_xml_attr_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension,
cmark_html_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension,
cmark_html_filter_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension,
cmark_commonmark_escape_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_private(cmark_syntax_extension *extension,
void *priv,
cmark_free_func free_func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension,
cmark_postprocess_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_opaque_alloc_func(cmark_syntax_extension *extension,
cmark_opaque_alloc_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension,
cmark_opaque_free_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser,
cmark_ispunct_func func);
/** Return the index of the line currently being parsed, starting with 1.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_line_number(cmark_parser *parser);
/** Return the offset in bytes in the line being processed.
*
* Example:
*
* ### foo
*
* Here, offset will first be 0, then 5 (the index of the 'f' character).
*/
CMARK_GFM_EXPORT
int cmark_parser_get_offset(cmark_parser *parser);
/**
* Return the offset in 'columns' in the line being processed.
*
* This value may differ from the value returned by
* cmark_parser_get_offset() in that it accounts for tabs,
* and as such should not be used as an index in the current line's
* buffer.
*
* Example:
*
* cmark_parser_advance_offset() can be called to advance the
* offset by a number of columns, instead of a number of bytes.
*
* In that case, if offset falls "in the middle" of a tab
* character, 'column' and offset will differ.
*
* ```
* foo \t bar
* ^ ^^
* offset (0) 20
* ```
*
* If cmark_parser_advance_offset is called here with 'columns'
* set to 'true' and 'offset' set to 22, cmark_parser_get_offset()
* will return 20, whereas cmark_parser_get_column() will return
* 22.
*
* Additionally, as tabs expand to the next multiple of 4 column,
* cmark_parser_has_partially_consumed_tab() will now return
* 'true'.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_column(cmark_parser *parser);
/** Return the absolute index in bytes of the first nonspace
* character coming after the offset as returned by
* cmark_parser_get_offset() in the line currently being processed.
*
* Example:
*
* ```
* foo bar baz \n
* ^ ^ ^
* 0 offset (16) first_nonspace (28)
* ```
*/
CMARK_GFM_EXPORT
int cmark_parser_get_first_nonspace(cmark_parser *parser);
/** Return the absolute index of the first nonspace column coming after 'offset'
* in the line currently being processed, counting tabs as multiple
* columns as appropriate.
*
* See the documentation for cmark_parser_get_first_nonspace() and
* cmark_parser_get_column() for more information.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_first_nonspace_column(cmark_parser *parser);
/** Return the difference between the values returned by
* cmark_parser_get_first_nonspace_column() and
* cmark_parser_get_column().
*
* This is not a byte offset, as it can count one tab as multiple
* characters.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_indent(cmark_parser *parser);
/** Return 'true' if the line currently being processed has been entirely
* consumed, 'false' otherwise.
*
* Example:
*
* ```
* foo bar baz \n
* ^
* offset
* ```
*
* This function will return 'false' here.
*
* ```
* foo bar baz \n
* ^
* offset
* ```
* This function will still return 'false'.
*
* ```
* foo bar baz \n
* ^
* offset
* ```
*
* At this point, this function will now return 'true'.
*/
CMARK_GFM_EXPORT
int cmark_parser_is_blank(cmark_parser *parser);
/** Return 'true' if the value returned by cmark_parser_get_offset()
* is 'inside' an expanded tab.
*
* See the documentation for cmark_parser_get_column() for more
* information.
*/
CMARK_GFM_EXPORT
int cmark_parser_has_partially_consumed_tab(cmark_parser *parser);
/** Return the length in bytes of the previously processed line, excluding potential
* newline (\n) and carriage return (\r) trailing characters.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_last_line_length(cmark_parser *parser);
/** Add a child to 'parent' during the parsing process.
*
* If 'parent' isn't the kind of node that can accept this child,
* this function will back up till it hits a node that can, closing
* blocks as appropriate.
*/
CMARK_GFM_EXPORT
cmark_node*cmark_parser_add_child(cmark_parser *parser,
cmark_node *parent,
cmark_node_type block_type,
int start_column);
/** Advance the 'offset' of the parser in the current line.
*
* See the documentation of cmark_parser_get_offset() and
* cmark_parser_get_column() for more information.
*/
CMARK_GFM_EXPORT
void cmark_parser_advance_offset(cmark_parser *parser,
const char *input,
int count,
int columns);
CMARK_GFM_EXPORT
void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len);
/** Attach the syntax 'extension' to the 'parser', to provide extra syntax
* rules.
* See the documentation for cmark_syntax_extension for more information.
*
* Returns 'true' if the 'extension' was successfully attached,
* 'false' otherwise.
*/
CMARK_GFM_EXPORT
int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension);
/** Change the type of 'node'.
*
* Return 0 if the type could be changed, 1 otherwise.
*/
CMARK_GFM_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type);
/** Return the string content for all types of 'node'.
* The pointer stays valid as long as 'node' isn't freed.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_string_content(cmark_node *node);
/** Set the string 'content' for all types of 'node'.
* Copies 'content'.
*/
CMARK_GFM_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content);
/** Get the syntax extension responsible for the creation of 'node'.
* Return NULL if 'node' was created because it matched standard syntax rules.
*/
CMARK_GFM_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node);
/** Set the syntax extension responsible for creating 'node'.
*/
CMARK_GFM_EXPORT int cmark_node_set_syntax_extension(cmark_node *node,
cmark_syntax_extension *extension);
/**
* ## Inline syntax extension helpers
*
* The inline parsing process is described in detail at
* <http://spec.commonmark.org/0.24/#phase-2-inline-structure>
*/
/** Should return 'true' if the predicate matches 'c', 'false' otherwise
*/
typedef int (*cmark_inline_predicate)(int c);
/** Advance the current inline parsing offset */
CMARK_GFM_EXPORT
void cmark_inline_parser_advance_offset(cmark_inline_parser *parser);
/** Get the current inline parsing offset */
CMARK_GFM_EXPORT
int cmark_inline_parser_get_offset(cmark_inline_parser *parser);
/** Set the offset in bytes in the chunk being processed by the given inline parser.
*/
CMARK_GFM_EXPORT
void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset);
/** Gets the cmark_chunk being operated on by the given inline parser.
* Use cmark_inline_parser_get_offset to get our current position in the chunk.
*/
CMARK_GFM_EXPORT
struct cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser);
/** Returns 1 if the inline parser is currently in a bracket; pass 1 for 'image'
* if you want to know about an image-type bracket, 0 for link-type. */
CMARK_GFM_EXPORT
int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image);
/** Remove the last n characters from the last child of the given node.
* This only works where all n characters are in the single last child, and the last
* child is CMARK_NODE_TEXT.
*/
CMARK_GFM_EXPORT
void cmark_node_unput(cmark_node *node, int n);
/** Get the character located at the current inline parsing offset
*/
CMARK_GFM_EXPORT
unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser);
/** Get the character located 'pos' bytes in the current line.
*/
CMARK_GFM_EXPORT
unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, int pos);
/** Whether the inline parser has reached the end of the current line
*/
CMARK_GFM_EXPORT
int cmark_inline_parser_is_eof(cmark_inline_parser *parser);
/** Get the characters located after the current inline parsing offset
* while 'pred' matches. Free after usage.
*/
CMARK_GFM_EXPORT
char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred);
/** Push a delimiter on the delimiter stack.
* See <<http://spec.commonmark.org/0.24/#phase-2-inline-structure> for
* more information on the parameters
*/
CMARK_GFM_EXPORT
void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser,
unsigned char c,
int can_open,
int can_close,
cmark_node *inl_text);
/** Remove 'delim' from the delimiter stack
*/
CMARK_GFM_EXPORT
void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim);
CMARK_GFM_EXPORT
delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser);
CMARK_GFM_EXPORT
int cmark_inline_parser_get_line(cmark_inline_parser *parser);
CMARK_GFM_EXPORT
int cmark_inline_parser_get_column(cmark_inline_parser *parser);
/** Convenience function to scan a given delimiter.
*
* 'left_flanking' and 'right_flanking' will be set to true if they
* respectively precede and follow a non-space, non-punctuation
* character.
*
* Additionally, 'punct_before' and 'punct_after' will respectively be set
* if the preceding or following character is a punctuation character.
*
* Note that 'left_flanking' and 'right_flanking' can both be 'true'.
*
* Returns the number of delimiters encountered, in the limit
* of 'max_delims', and advances the inline parsing offset.
*/
CMARK_GFM_EXPORT
int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser,
int max_delims,
unsigned char c,
int *left_flanking,
int *right_flanking,
int *punct_before,
int *punct_after);
CMARK_GFM_EXPORT
void cmark_manage_extensions_special_characters(cmark_parser *parser, int add);
CMARK_GFM_EXPORT
cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser);
CMARK_GFM_EXPORT
void cmark_arena_push(void);
CMARK_GFM_EXPORT
int cmark_arena_pop(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,817 @@
#ifndef CMARK_GFM_H
#define CMARK_GFM_H
#include <stdio.h>
#include <stdint.h>
#include "cmark-gfm_export.h"
#include "cmark-gfm_version.h"
#ifdef __cplusplus
extern "C" {
#endif
/** # NAME
*
* **cmark-gfm** - CommonMark parsing, manipulating, and rendering
*/
/** # DESCRIPTION
*
* ## Simple Interface
*/
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
* 'len') from CommonMark Markdown to HTML, returning a null-terminated,
* UTF-8-encoded string. It is the caller's responsibility
* to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_markdown_to_html(const char *text, size_t len, int options);
/** ## Node Structure
*/
#define CMARK_NODE_TYPE_PRESENT (0x8000)
#define CMARK_NODE_TYPE_BLOCK (CMARK_NODE_TYPE_PRESENT | 0x0000)
#define CMARK_NODE_TYPE_INLINE (CMARK_NODE_TYPE_PRESENT | 0x4000)
#define CMARK_NODE_TYPE_MASK (0xc000)
#define CMARK_NODE_VALUE_MASK (0x3fff)
typedef enum {
/* Error status */
CMARK_NODE_NONE = 0x0000,
/* Block */
CMARK_NODE_DOCUMENT = CMARK_NODE_TYPE_BLOCK | 0x0001,
CMARK_NODE_BLOCK_QUOTE = CMARK_NODE_TYPE_BLOCK | 0x0002,
CMARK_NODE_LIST = CMARK_NODE_TYPE_BLOCK | 0x0003,
CMARK_NODE_ITEM = CMARK_NODE_TYPE_BLOCK | 0x0004,
CMARK_NODE_CODE_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0005,
CMARK_NODE_HTML_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0006,
CMARK_NODE_CUSTOM_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0007,
CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008,
CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009,
CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a,
CMARK_NODE_FOOTNOTE_DEFINITION = CMARK_NODE_TYPE_BLOCK | 0x000b,
/* Inline */
CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001,
CMARK_NODE_SOFTBREAK = CMARK_NODE_TYPE_INLINE | 0x0002,
CMARK_NODE_LINEBREAK = CMARK_NODE_TYPE_INLINE | 0x0003,
CMARK_NODE_CODE = CMARK_NODE_TYPE_INLINE | 0x0004,
CMARK_NODE_HTML_INLINE = CMARK_NODE_TYPE_INLINE | 0x0005,
CMARK_NODE_CUSTOM_INLINE = CMARK_NODE_TYPE_INLINE | 0x0006,
CMARK_NODE_EMPH = CMARK_NODE_TYPE_INLINE | 0x0007,
CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008,
CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009,
CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a,
CMARK_NODE_FOOTNOTE_REFERENCE = CMARK_NODE_TYPE_INLINE | 0x000b,
} cmark_node_type;
extern cmark_node_type CMARK_NODE_LAST_BLOCK;
extern cmark_node_type CMARK_NODE_LAST_INLINE;
/* For backwards compatibility: */
#define CMARK_NODE_HEADER CMARK_NODE_HEADING
#define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK
#define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK
#define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE
typedef enum {
CMARK_NO_LIST,
CMARK_BULLET_LIST,
CMARK_ORDERED_LIST
} cmark_list_type;
typedef enum {
CMARK_NO_DELIM,
CMARK_PERIOD_DELIM,
CMARK_PAREN_DELIM
} cmark_delim_type;
typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
typedef struct cmark_iter cmark_iter;
typedef struct cmark_syntax_extension cmark_syntax_extension;
/**
* ## Custom memory allocator support
*/
/** Defines the memory allocation functions to be used by CMark
* when parsing and allocating a document tree
*/
typedef struct cmark_mem {
void *(*calloc)(size_t, size_t);
void *(*realloc)(void *, size_t);
void (*free)(void *);
} cmark_mem;
/** The default memory allocator; uses the system's calloc,
* realloc and free.
*/
CMARK_GFM_EXPORT
cmark_mem *cmark_get_default_mem_allocator();
/** An arena allocator; uses system calloc to allocate large
* slabs of memory. Memory in these slabs is not reused at all.
*/
CMARK_GFM_EXPORT
cmark_mem *cmark_get_arena_mem_allocator();
/** Resets the arena allocator, quickly returning all used memory
* to the operating system.
*/
CMARK_GFM_EXPORT
void cmark_arena_reset(void);
/** Callback for freeing user data with a 'cmark_mem' context.
*/
typedef void (*cmark_free_func) (cmark_mem *mem, void *user_data);
/*
* ## Basic data structures
*
* To keep dependencies to the strict minimum, libcmark implements
* its own versions of "classic" data structures.
*/
/**
* ### Linked list
*/
/** A generic singly linked list.
*/
typedef struct _cmark_llist
{
struct _cmark_llist *next;
void *data;
} cmark_llist;
/** Append an element to the linked list, return the possibly modified
* head of the list.
*/
CMARK_GFM_EXPORT
cmark_llist * cmark_llist_append (cmark_mem * mem,
cmark_llist * head,
void * data);
/** Free the list starting with 'head', calling 'free_func' with the
* data pointer of each of its elements
*/
CMARK_GFM_EXPORT
void cmark_llist_free_full (cmark_mem * mem,
cmark_llist * head,
cmark_free_func free_func);
/** Free the list starting with 'head'
*/
CMARK_GFM_EXPORT
void cmark_llist_free (cmark_mem * mem,
cmark_llist * head);
/**
* ## Creating and Destroying Nodes
*/
/** Creates a new node of type 'type'. Note that the node may have
* other required properties, which it is the caller's responsibility
* to assign.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_new(cmark_node_type type);
/** Same as `cmark_node_new`, but explicitly listing the memory
* allocator used to allocate the node. Note: be sure to use the same
* allocator for every node in a tree, or bad things can happen.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type,
cmark_mem *mem);
CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_ext(cmark_node_type type,
cmark_syntax_extension *extension);
CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_mem_and_ext(cmark_node_type type,
cmark_mem *mem,
cmark_syntax_extension *extension);
/** Frees the memory allocated for a node and any children.
*/
CMARK_GFM_EXPORT void cmark_node_free(cmark_node *node);
/**
* ## Tree Traversal
*/
/** Returns the next node in the sequence after 'node', or NULL if
* there is none.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_next(cmark_node *node);
/** Returns the previous node in the sequence after 'node', or NULL if
* there is none.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_previous(cmark_node *node);
/** Returns the parent of 'node', or NULL if there is none.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_parent(cmark_node *node);
/** Returns the first child of 'node', or NULL if 'node' has no children.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
/** Returns the last child of 'node', or NULL if 'node' has no children.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
/**
* ## Iterator
*
* An iterator will walk through a tree of nodes, starting from a root
* node, returning one node at a time, together with information about
* whether the node is being entered or exited. The iterator will
* first descend to a child node, if there is one. When there is no
* child, the iterator will go to the next sibling. When there is no
* next sibling, the iterator will return to the parent (but with
* a 'cmark_event_type' of `CMARK_EVENT_EXIT`). The iterator will
* return `CMARK_EVENT_DONE` when it reaches the root node again.
* One natural application is an HTML renderer, where an `ENTER` event
* outputs an open tag and an `EXIT` event outputs a close tag.
* An iterator might also be used to transform an AST in some systematic
* way, for example, turning all level-3 headings into regular paragraphs.
*
* void
* usage_example(cmark_node *root) {
* cmark_event_type ev_type;
* cmark_iter *iter = cmark_iter_new(root);
*
* while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
* cmark_node *cur = cmark_iter_get_node(iter);
* // Do something with `cur` and `ev_type`
* }
*
* cmark_iter_free(iter);
* }
*
* Iterators will never return `EXIT` events for leaf nodes, which are nodes
* of type:
*
* * CMARK_NODE_HTML_BLOCK
* * CMARK_NODE_THEMATIC_BREAK
* * CMARK_NODE_CODE_BLOCK
* * CMARK_NODE_TEXT
* * CMARK_NODE_SOFTBREAK
* * CMARK_NODE_LINEBREAK
* * CMARK_NODE_CODE
* * CMARK_NODE_HTML_INLINE
*
* Nodes must only be modified after an `EXIT` event, or an `ENTER` event for
* leaf nodes.
*/
typedef enum {
CMARK_EVENT_NONE,
CMARK_EVENT_DONE,
CMARK_EVENT_ENTER,
CMARK_EVENT_EXIT
} cmark_event_type;
/** Creates a new iterator starting at 'root'. The current node and event
* type are undefined until 'cmark_iter_next' is called for the first time.
* The memory allocated for the iterator should be released using
* 'cmark_iter_free' when it is no longer needed.
*/
CMARK_GFM_EXPORT
cmark_iter *cmark_iter_new(cmark_node *root);
/** Frees the memory allocated for an iterator.
*/
CMARK_GFM_EXPORT
void cmark_iter_free(cmark_iter *iter);
/** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`,
* `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`).
*/
CMARK_GFM_EXPORT
cmark_event_type cmark_iter_next(cmark_iter *iter);
/** Returns the current node.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_iter_get_node(cmark_iter *iter);
/** Returns the current event type.
*/
CMARK_GFM_EXPORT
cmark_event_type cmark_iter_get_event_type(cmark_iter *iter);
/** Returns the root node.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_iter_get_root(cmark_iter *iter);
/** Resets the iterator so that the current node is 'current' and
* the event type is 'event_type'. The new current node must be a
* descendant of the root node or the root node itself.
*/
CMARK_GFM_EXPORT
void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
cmark_event_type event_type);
/**
* ## Accessors
*/
/** Returns the user data of 'node'.
*/
CMARK_GFM_EXPORT void *cmark_node_get_user_data(cmark_node *node);
/** Sets arbitrary user data for 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data);
/** Set free function for user data */
CMARK_GFM_EXPORT
int cmark_node_set_user_data_free_func(cmark_node *node,
cmark_free_func free_func);
/** Returns the type of 'node', or `CMARK_NODE_NONE` on error.
*/
CMARK_GFM_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node);
/** Like 'cmark_node_get_type', but returns a string representation
of the type, or `"<unknown>"`.
*/
CMARK_GFM_EXPORT
const char *cmark_node_get_type_string(cmark_node *node);
/** Returns the string contents of 'node', or an empty
string if none is set. Returns NULL if called on a
node that does not have string content.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_literal(cmark_node *node);
/** Sets the string contents of 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content);
/** Returns the heading level of 'node', or 0 if 'node' is not a heading.
*/
CMARK_GFM_EXPORT int cmark_node_get_heading_level(cmark_node *node);
/* For backwards compatibility */
#define cmark_node_get_header_level cmark_node_get_heading_level
#define cmark_node_set_header_level cmark_node_set_heading_level
/** Sets the heading level of 'node', returning 1 on success and 0 on error.
*/
CMARK_GFM_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level);
/** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node'
* is not a list.
*/
CMARK_GFM_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node);
/** Sets the list type of 'node', returning 1 on success and 0 on error.
*/
CMARK_GFM_EXPORT int cmark_node_set_list_type(cmark_node *node,
cmark_list_type type);
/** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node'
* is not a list.
*/
CMARK_GFM_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node);
/** Sets the list delimiter type of 'node', returning 1 on success and 0
* on error.
*/
CMARK_GFM_EXPORT int cmark_node_set_list_delim(cmark_node *node,
cmark_delim_type delim);
/** Returns starting number of 'node', if it is an ordered list, otherwise 0.
*/
CMARK_GFM_EXPORT int cmark_node_get_list_start(cmark_node *node);
/** Sets starting number of 'node', if it is an ordered list. Returns 1
* on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_list_start(cmark_node *node, int start);
/** Returns 1 if 'node' is a tight list, 0 otherwise.
*/
CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node);
/** Sets the "tightness" of a list. Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
/** Returns the info string from a fenced code block.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);
/** Sets the info string in a fenced code block, returning 1 on
* success and 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info);
/** Sets code blocks fencing details
*/
CMARK_GFM_EXPORT int cmark_node_set_fenced(cmark_node * node, int fenced,
int length, int offset, char character);
/** Returns code blocks fencing details
*/
CMARK_GFM_EXPORT int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character);
/** Returns the URL of a link or image 'node', or an empty string
if no URL is set. Returns NULL if called on a node that is
not a link or image.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_url(cmark_node *node);
/** Sets the URL of a link or image 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_url(cmark_node *node, const char *url);
/** Returns the title of a link or image 'node', or an empty
string if no title is set. Returns NULL if called on a node
that is not a link or image.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_title(cmark_node *node);
/** Sets the title of a link or image 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_title(cmark_node *node, const char *title);
/** Returns the literal "on enter" text for a custom 'node', or
an empty string if no on_enter is set. Returns NULL if called
on a non-custom node.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_on_enter(cmark_node *node);
/** Sets the literal text to render "on enter" for a custom 'node'.
Any children of the node will be rendered after this text.
Returns 1 on success 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_on_enter(cmark_node *node,
const char *on_enter);
/** Returns the literal "on exit" text for a custom 'node', or
an empty string if no on_exit is set. Returns NULL if
called on a non-custom node.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_on_exit(cmark_node *node);
/** Sets the literal text to render "on exit" for a custom 'node'.
Any children of the node will be rendered before this text.
Returns 1 on success 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit);
/** Returns the line on which 'node' begins.
*/
CMARK_GFM_EXPORT int cmark_node_get_start_line(cmark_node *node);
/** Returns the column at which 'node' begins.
*/
CMARK_GFM_EXPORT int cmark_node_get_start_column(cmark_node *node);
/** Returns the line on which 'node' ends.
*/
CMARK_GFM_EXPORT int cmark_node_get_end_line(cmark_node *node);
/** Returns the column at which 'node' ends.
*/
CMARK_GFM_EXPORT int cmark_node_get_end_column(cmark_node *node);
/**
* ## Tree Manipulation
*/
/** Unlinks a 'node', removing it from the tree, but not freeing its
* memory. (Use 'cmark_node_free' for that.)
*/
CMARK_GFM_EXPORT void cmark_node_unlink(cmark_node *node);
/** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_insert_before(cmark_node *node,
cmark_node *sibling);
/** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
/** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does
* not free its memory).
* Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode);
/** Adds 'child' to the beginning of the children of 'node'.
* Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child);
/** Adds 'child' to the end of the children of 'node'.
* Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child);
/** Consolidates adjacent text nodes.
*/
CMARK_GFM_EXPORT void cmark_consolidate_text_nodes(cmark_node *root);
/** Ensures a node and all its children own their own chunk memory.
*/
CMARK_GFM_EXPORT void cmark_node_own(cmark_node *root);
/**
* ## Parsing
*
* Simple interface:
*
* cmark_node *document = cmark_parse_document("Hello *world*", 13,
* CMARK_OPT_DEFAULT);
*
* Streaming interface:
*
* cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
* FILE *fp = fopen("myfile.md", "rb");
* while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
* cmark_parser_feed(parser, buffer, bytes);
* if (bytes < sizeof(buffer)) {
* break;
* }
* }
* document = cmark_parser_finish(parser);
* cmark_parser_free(parser);
*/
/** Creates a new parser object.
*/
CMARK_GFM_EXPORT
cmark_parser *cmark_parser_new(int options);
/** Creates a new parser object with the given memory allocator
*/
CMARK_GFM_EXPORT
cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
/** Frees memory allocated for a parser object.
*/
CMARK_GFM_EXPORT
void cmark_parser_free(cmark_parser *parser);
/** Feeds a string of length 'len' to 'parser'.
*/
CMARK_GFM_EXPORT
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
/** Finish parsing and return a pointer to a tree of nodes.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
/** Parse a CommonMark document in 'buffer' of length 'len'.
* Returns a pointer to a tree of nodes. The memory allocated for
* the node tree should be released using 'cmark_node_free'
* when it is no longer needed.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
/** Parse a CommonMark document in file 'f', returning a pointer to
* a tree of nodes. The memory allocated for the node tree should be
* released using 'cmark_node_free' when it is no longer needed.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_parse_file(FILE *f, int options);
/**
* ## Rendering
*/
/** Render a 'node' tree as XML. It is the caller's responsibility
* to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_xml(cmark_node *root, int options);
/** As for 'cmark_render_xml', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem);
/** Render a 'node' tree as an HTML fragment. It is up to the user
* to add an appropriate header and footer. It is the caller's
* responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions);
/** As for 'cmark_render_html', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem);
/** Render a 'node' tree as a groff man page, without the header.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_man(cmark_node *root, int options, int width);
/** As for 'cmark_render_man', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
/** Render a 'node' tree as a commonmark document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_commonmark(cmark_node *root, int options, int width);
/** As for 'cmark_render_commonmark', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
/** Render a 'node' tree as a plain text document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_plaintext(cmark_node *root, int options, int width);
/** As for 'cmark_render_plaintext', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
/** Render a 'node' tree as a LaTeX document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_latex(cmark_node *root, int options, int width);
/** As for 'cmark_render_latex', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
/**
* ## Options
*/
/** Default options.
*/
#define CMARK_OPT_DEFAULT 0
/**
* ### Options affecting rendering
*/
/** Include a `data-sourcepos` attribute on all block elements.
*/
#define CMARK_OPT_SOURCEPOS (1 << 1)
/** Render `softbreak` elements as hard line breaks.
*/
#define CMARK_OPT_HARDBREAKS (1 << 2)
/** `CMARK_OPT_SAFE` is defined here for API compatibility,
but it no longer has any effect. "Safe" mode is now the default:
set `CMARK_OPT_UNSAFE` to disable it.
*/
#define CMARK_OPT_SAFE (1 << 3)
/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
* `file:`, and `data:`, except for `image/png`, `image/gif`,
* `image/jpeg`, or `image/webp` mime types). By default,
* raw HTML is replaced by a placeholder HTML comment. Unsafe
* links are replaced by empty strings.
*/
#define CMARK_OPT_UNSAFE (1 << 17)
/** Render `softbreak` elements as spaces.
*/
#define CMARK_OPT_NOBREAKS (1 << 4)
/**
* ### Options affecting parsing
*/
/** Legacy option (no effect).
*/
#define CMARK_OPT_NORMALIZE (1 << 8)
/** Validate UTF-8 in the input before parsing, replacing illegal
* sequences with the replacement character U+FFFD.
*/
#define CMARK_OPT_VALIDATE_UTF8 (1 << 9)
/** Convert straight quotes to curly, --- to em dashes, -- to en dashes.
*/
#define CMARK_OPT_SMART (1 << 10)
/** Use GitHub-style <pre lang="x"> tags for code blocks instead of <pre><code
* class="language-x">.
*/
#define CMARK_OPT_GITHUB_PRE_LANG (1 << 11)
/** Be liberal in interpreting inline HTML tags.
*/
#define CMARK_OPT_LIBERAL_HTML_TAG (1 << 12)
/** Parse footnotes.
*/
#define CMARK_OPT_FOOTNOTES (1 << 13)
/** Only parse strikethroughs if surrounded by exactly 2 tildes.
* Gives some compatibility with redcarpet.
*/
#define CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE (1 << 14)
/** Use style attributes to align table cells instead of align attributes.
*/
#define CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (1 << 15)
/** Include the remainder of the info string in code blocks in
* a separate attribute.
*/
#define CMARK_OPT_FULL_INFO_STRING (1 << 16)
/**
* ## Version information
*/
/** The library version as integer for runtime checks. Also available as
* macro CMARK_VERSION for compile time checks.
*
* * Bits 16-23 contain the major version.
* * Bits 8-15 contain the minor version.
* * Bits 0-7 contain the patchlevel.
*
* In hexadecimal format, the number 0x010203 represents version 1.2.3.
*/
CMARK_GFM_EXPORT
int cmark_version(void);
/** The library version string for runtime checks. Also available as
* macro CMARK_VERSION_STRING for compile time checks.
*/
CMARK_GFM_EXPORT
const char *cmark_version_string(void);
/** # AUTHORS
*
* John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
*/
#ifndef CMARK_NO_SHORT_NAMES
#define NODE_DOCUMENT CMARK_NODE_DOCUMENT
#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE
#define NODE_LIST CMARK_NODE_LIST
#define NODE_ITEM CMARK_NODE_ITEM
#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK
#define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK
#define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK
#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH
#define NODE_HEADING CMARK_NODE_HEADING
#define NODE_HEADER CMARK_NODE_HEADER
#define NODE_THEMATIC_BREAK CMARK_NODE_THEMATIC_BREAK
#define NODE_HRULE CMARK_NODE_HRULE
#define NODE_TEXT CMARK_NODE_TEXT
#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK
#define NODE_LINEBREAK CMARK_NODE_LINEBREAK
#define NODE_CODE CMARK_NODE_CODE
#define NODE_HTML_INLINE CMARK_NODE_HTML_INLINE
#define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE
#define NODE_EMPH CMARK_NODE_EMPH
#define NODE_STRONG CMARK_NODE_STRONG
#define NODE_LINK CMARK_NODE_LINK
#define NODE_IMAGE CMARK_NODE_IMAGE
#define BULLET_LIST CMARK_BULLET_LIST
#define ORDERED_LIST CMARK_ORDERED_LIST
#define PERIOD_DELIM CMARK_PERIOD_DELIM
#define PAREN_DELIM CMARK_PAREN_DELIM
#endif
typedef int32_t bufsize_t;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,7 @@
#ifndef CMARK_GFM_VERSION_H
#define CMARK_GFM_VERSION_H
#define CMARK_GFM_VERSION ((@PROJECT_VERSION_MAJOR@ << 24) | (@PROJECT_VERSION_MINOR@ << 16) | (@PROJECT_VERSION_PATCH@ << 8) | @PROJECT_VERSION_GFM@)
#define CMARK_GFM_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@.gfm.@PROJECT_VERSION_GFM@"
#endif

55
lib/commonmarker/cmark.c Normal file
View File

@ -0,0 +1,55 @@
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include "registry.h"
#include "node.h"
#include "houdini.h"
#include "cmark-gfm.h"
#include "buffer.h"
cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION;
cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_FOOTNOTE_REFERENCE;
int cmark_version() { return CMARK_GFM_VERSION; }
const char *cmark_version_string() { return CMARK_GFM_VERSION_STRING; }
static void *xcalloc(size_t nmem, size_t size) {
void *ptr = calloc(nmem, size);
if (!ptr) {
fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n");
abort();
}
return ptr;
}
static void *xrealloc(void *ptr, size_t size) {
void *new_ptr = realloc(ptr, size);
if (!new_ptr) {
fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n");
abort();
}
return new_ptr;
}
static void xfree(void *ptr) {
free(ptr);
}
cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, xfree};
cmark_mem *cmark_get_default_mem_allocator() {
return &CMARK_DEFAULT_MEM_ALLOCATOR;
}
char *cmark_markdown_to_html(const char *text, size_t len, int options) {
cmark_node *doc;
char *result;
doc = cmark_parse_document(text, len, options);
result = cmark_render_html(doc, options, NULL);
cmark_node_free(doc);
return result;
}

View File

@ -0,0 +1,44 @@
#include <stdint.h>
#include "cmark_ctype.h"
/** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other
*/
static const uint8_t cmark_ctype_class[256] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
/* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
/* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2,
/* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0,
/* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
/**
* Returns 1 if c is a "whitespace" character as defined by the spec.
*/
int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; }
/**
* Returns 1 if c is an ascii punctuation character.
*/
int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; }
int cmark_isalnum(char c) {
uint8_t result;
result = cmark_ctype_class[(uint8_t)c];
return (result == 3 || result == 4);
}
int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; }
int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; }

View File

@ -0,0 +1,33 @@
#ifndef CMARK_CMARK_CTYPE_H
#define CMARK_CMARK_CTYPE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cmark-gfm_export.h"
/** Locale-independent versions of functions from ctype.h.
* We want cmark to behave the same no matter what the system locale.
*/
CMARK_GFM_EXPORT
int cmark_isspace(char c);
CMARK_GFM_EXPORT
int cmark_ispunct(char c);
CMARK_GFM_EXPORT
int cmark_isalnum(char c);
CMARK_GFM_EXPORT
int cmark_isdigit(char c);
CMARK_GFM_EXPORT
int cmark_isalpha(char c);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,519 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include "config.h"
#include "cmark-gfm.h"
#include "node.h"
#include "buffer.h"
#include "utf8.h"
#include "scanners.h"
#include "render.h"
#include "syntax_extension.h"
#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, node, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
#define ENCODED_SIZE 20
#define LISTMARKER_SIZE 20
// Functions to convert cmark_nodes to commonmark strings.
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
cmark_escaping escape,
int32_t c, unsigned char nextc) {
bool needs_escaping = false;
bool follows_digit =
renderer->buffer->size > 0 &&
cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
char encoded[ENCODED_SIZE];
needs_escaping =
c < 0x80 && escape != LITERAL &&
((escape == NORMAL &&
(c < 0x20 ||
c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' ||
(c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
(renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
// begin_content doesn't get set to false til we've passed digits
// at the beginning of line, so...
!follows_digit) ||
(renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
(nextc == 0 || cmark_isspace(nextc))))) ||
(escape == URL &&
(c == '`' || c == '<' || c == '>' || cmark_isspace((char)c) || c == '\\' ||
c == ')' || c == '(')) ||
(escape == TITLE &&
(c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
if (needs_escaping) {
if (escape == URL && cmark_isspace((char)c)) {
// use percent encoding for spaces
snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += 3;
} else if (cmark_ispunct((char)c)) {
cmark_render_ascii(renderer, "\\");
cmark_render_code_point(renderer, c);
} else { // render as entity
snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += (int)strlen(encoded);
}
} else {
cmark_render_code_point(renderer, c);
}
}
static int longest_backtick_sequence(const char *code) {
int longest = 0;
int current = 0;
size_t i = 0;
size_t code_len = strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
} else {
if (current > longest) {
longest = current;
}
current = 0;
}
i++;
}
return longest;
}
static int shortest_unused_backtick_sequence(const char *code) {
// note: if the shortest sequence is >= 32, this returns 32
// so as not to overflow the bit array.
uint32_t used = 1;
int current = 0;
size_t i = 0;
size_t code_len = strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
} else {
if (current > 0 && current < 32) {
used |= (1U << current);
}
current = 0;
}
i++;
}
// return number of first bit that is 0:
i = 0;
while (i < 32 && used & 1) {
used = used >> 1;
i++;
}
return (int)i;
}
static bool is_autolink(cmark_node *node) {
cmark_chunk *title;
cmark_chunk *url;
cmark_node *link_text;
char *realurl;
int realurllen;
if (node->type != CMARK_NODE_LINK) {
return false;
}
url = &node->as.link.url;
if (url->len == 0 || scan_scheme(url, 0) == 0) {
return false;
}
title = &node->as.link.title;
// if it has a title, we can't treat it as an autolink:
if (title->len > 0) {
return false;
}
link_text = node->first_child;
if (link_text == NULL) {
return false;
}
cmark_consolidate_text_nodes(link_text);
realurl = (char *)url->data;
realurllen = url->len;
if (strncmp(realurl, "mailto:", 7) == 0) {
realurl += 7;
realurllen -= 7;
}
return (realurllen == link_text->as.literal.len &&
strncmp(realurl, (char *)link_text->as.literal.data,
link_text->as.literal.len) == 0);
}
// if node is a block node, returns node.
// otherwise returns first block-level node that is an ancestor of node.
// if there is no block-level ancestor, returns NULL.
static cmark_node *get_containing_block(cmark_node *node) {
while (node) {
if (CMARK_NODE_BLOCK_P(node)) {
return node;
} else {
node = node->parent;
}
}
return NULL;
}
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
cmark_node *tmp;
int list_number;
cmark_delim_type list_delim;
int numticks;
bool extra_spaces;
int i;
bool entering = (ev_type == CMARK_EVENT_ENTER);
const char *info, *code, *title;
char fencechar[2] = {'\0', '\0'};
size_t info_len, code_len;
char listmarker[LISTMARKER_SIZE];
char *emph_delim;
bool first_in_list_item;
bufsize_t marker_width;
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
!(CMARK_OPT_HARDBREAKS & options);
// Don't adjust tight list status til we've started the list.
// Otherwise we loose the blank line between a paragraph and
// a following list.
if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
tmp = get_containing_block(node);
renderer->in_tight_list_item =
tmp && // tmp might be NULL if there is no containing block
((tmp->type == CMARK_NODE_ITEM &&
cmark_node_get_list_tight(tmp->parent)) ||
(tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
cmark_node_get_list_tight(tmp->parent->parent)));
}
if (node->extension && node->extension->commonmark_render_func) {
node->extension->commonmark_render_func(node->extension, renderer, node, ev_type, options);
return 1;
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
LIT("> ");
renderer->begin_content = true;
cmark_strbuf_puts(renderer->prefix, "> ");
} else {
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
BLANKLINE();
}
break;
case CMARK_NODE_LIST:
if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
node->next->type == CMARK_NODE_LIST)) {
// this ensures that a following indented code block or list will be
// inteprereted correctly.
CR();
LIT("<!-- end list -->");
BLANKLINE();
}
break;
case CMARK_NODE_ITEM:
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
marker_width = 4;
} else {
list_number = cmark_node_get_list_start(node->parent);
list_delim = cmark_node_get_list_delim(node->parent);
tmp = node;
while (tmp->prev) {
tmp = tmp->prev;
list_number += 1;
}
// we ensure a width of at least 4 so
// we get nice transition from single digits
// to double
snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
list_delim == CMARK_PAREN_DELIM ? ")" : ".",
list_number < 10 ? " " : " ");
marker_width = (bufsize_t)strlen(listmarker);
}
if (entering) {
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
LIT(" - ");
renderer->begin_content = true;
} else {
LIT(listmarker);
renderer->begin_content = true;
}
for (i = marker_width; i--;) {
cmark_strbuf_putc(renderer->prefix, ' ');
}
} else {
cmark_strbuf_truncate(renderer->prefix,
renderer->prefix->size - marker_width);
CR();
}
break;
case CMARK_NODE_HEADING:
if (entering) {
for (i = cmark_node_get_heading_level(node); i > 0; i--) {
LIT("#");
}
LIT(" ");
renderer->begin_content = true;
renderer->no_linebreaks = true;
} else {
renderer->no_linebreaks = false;
BLANKLINE();
}
break;
case CMARK_NODE_CODE_BLOCK:
first_in_list_item = node->prev == NULL && node->parent &&
node->parent->type == CMARK_NODE_ITEM;
if (!first_in_list_item) {
BLANKLINE();
}
info = cmark_node_get_fence_info(node);
info_len = strlen(info);
fencechar[0] = strchr(info, '`') == NULL ? '`' : '~';
code = cmark_node_get_literal(node);
code_len = strlen(code);
// use indented form if no info, and code doesn't
// begin or end with a blank line, and code isn't
// first thing in a list item
if (info_len == 0 && (code_len > 2 && !cmark_isspace(code[0]) &&
!(cmark_isspace(code[code_len - 1]) &&
cmark_isspace(code[code_len - 2]))) &&
!first_in_list_item) {
LIT(" ");
cmark_strbuf_puts(renderer->prefix, " ");
OUT(cmark_node_get_literal(node), false, LITERAL);
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
} else {
numticks = longest_backtick_sequence(code) + 1;
if (numticks < 3) {
numticks = 3;
}
for (i = 0; i < numticks; i++) {
LIT(fencechar);
}
LIT(" ");
OUT(info, false, LITERAL);
CR();
OUT(cmark_node_get_literal(node), false, LITERAL);
CR();
for (i = 0; i < numticks; i++) {
LIT(fencechar);
}
}
BLANKLINE();
break;
case CMARK_NODE_HTML_BLOCK:
BLANKLINE();
OUT(cmark_node_get_literal(node), false, LITERAL);
BLANKLINE();
break;
case CMARK_NODE_CUSTOM_BLOCK:
BLANKLINE();
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
BLANKLINE();
break;
case CMARK_NODE_THEMATIC_BREAK:
BLANKLINE();
LIT("-----");
BLANKLINE();
break;
case CMARK_NODE_PARAGRAPH:
if (!entering) {
BLANKLINE();
}
break;
case CMARK_NODE_TEXT:
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
if (!(CMARK_OPT_HARDBREAKS & options)) {
LIT(" ");
}
CR();
break;
case CMARK_NODE_SOFTBREAK:
if (CMARK_OPT_HARDBREAKS & options) {
LIT(" ");
CR();
} else if (!renderer->no_linebreaks && renderer->width == 0 &&
!(CMARK_OPT_HARDBREAKS & options) &&
!(CMARK_OPT_NOBREAKS & options)) {
CR();
} else {
OUT(" ", allow_wrap, LITERAL);
}
break;
case CMARK_NODE_CODE:
code = cmark_node_get_literal(node);
code_len = strlen(code);
numticks = shortest_unused_backtick_sequence(code);
extra_spaces = code_len == 0 ||
code[0] == '`' || code[code_len - 1] == '`' ||
code[0] == ' ' || code[code_len - 1] == ' ';
for (i = 0; i < numticks; i++) {
LIT("`");
}
if (extra_spaces) {
LIT(" ");
}
OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
if (extra_spaces) {
LIT(" ");
}
for (i = 0; i < numticks; i++) {
LIT("`");
}
break;
case CMARK_NODE_HTML_INLINE:
OUT(cmark_node_get_literal(node), false, LITERAL);
break;
case CMARK_NODE_CUSTOM_INLINE:
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
break;
case CMARK_NODE_STRONG:
if (entering) {
LIT("**");
} else {
LIT("**");
}
break;
case CMARK_NODE_EMPH:
// If we have EMPH(EMPH(x)), we need to use *_x_*
// because **x** is STRONG(x):
if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
node->next == NULL && node->prev == NULL) {
emph_delim = "_";
} else {
emph_delim = "*";
}
if (entering) {
LIT(emph_delim);
} else {
LIT(emph_delim);
}
break;
case CMARK_NODE_LINK:
if (is_autolink(node)) {
if (entering) {
LIT("<");
if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
LIT((const char *)cmark_node_get_url(node) + 7);
} else {
LIT((const char *)cmark_node_get_url(node));
}
LIT(">");
// return signal to skip contents of node...
return 0;
}
} else {
if (entering) {
LIT("[");
} else {
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
if (strlen(title) > 0) {
LIT(" \"");
OUT(title, false, TITLE);
LIT("\"");
}
LIT(")");
}
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
LIT("![");
} else {
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
if (strlen(title) > 0) {
OUT(" \"", allow_wrap, LITERAL);
OUT(title, false, TITLE);
LIT("\"");
}
LIT(")");
}
break;
case CMARK_NODE_FOOTNOTE_REFERENCE:
if (entering) {
LIT("[^");
OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL);
LIT("]");
}
break;
case CMARK_NODE_FOOTNOTE_DEFINITION:
if (entering) {
renderer->footnote_ix += 1;
LIT("[^");
char n[32];
snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
OUT(n, false, LITERAL);
LIT("]:\n");
cmark_strbuf_puts(renderer->prefix, " ");
} else {
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
}
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root));
}
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
if (options & CMARK_OPT_HARDBREAKS) {
// disable breaking on width, since it has
// a different meaning with OPT_HARDBREAKS
width = 0;
}
return cmark_render(mem, root, options, width, outc, S_render_node);
}

View File

@ -0,0 +1,76 @@
#ifndef CMARK_CONFIG_H
#define CMARK_CONFIG_H
#ifdef __cplusplus
extern "C" {
#endif
#cmakedefine HAVE_STDBOOL_H
#ifdef HAVE_STDBOOL_H
#include <stdbool.h>
#elif !defined(__cplusplus)
typedef char bool;
#endif
#cmakedefine HAVE___BUILTIN_EXPECT
#cmakedefine HAVE___ATTRIBUTE__
#ifdef HAVE___ATTRIBUTE__
#define CMARK_ATTRIBUTE(list) __attribute__ (list)
#else
#define CMARK_ATTRIBUTE(list)
#endif
#ifndef CMARK_INLINE
#if defined(_MSC_VER) && !defined(__cplusplus)
#define CMARK_INLINE __inline
#else
#define CMARK_INLINE inline
#endif
#endif
/* snprintf and vsnprintf fallbacks for MSVC before 2015,
due to Valentin Milea http://stackoverflow.com/questions/2915672/
*/
#if defined(_MSC_VER) && _MSC_VER < 1900
#include <stdio.h>
#include <stdarg.h>
#define snprintf c99_snprintf
#define vsnprintf c99_vsnprintf
CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap)
{
int count = -1;
if (size != 0)
count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap);
if (count == -1)
count = _vscprintf(format, ap);
return count;
}
CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...)
{
int count;
va_list ap;
va_start(ap, format);
count = c99_vsnprintf(outBuf, size, format, ap);
va_end(ap);
return count;
}
#endif
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,40 @@
#include "cmark-gfm.h"
#include "parser.h"
#include "footnotes.h"
#include "inlines.h"
#include "chunk.h"
static void footnote_free(cmark_map *map, cmark_map_entry *_ref) {
cmark_footnote *ref = (cmark_footnote *)_ref;
cmark_mem *mem = map->mem;
if (ref != NULL) {
mem->free(ref->entry.label);
if (ref->node)
cmark_node_free(ref->node);
mem->free(ref);
}
}
void cmark_footnote_create(cmark_map *map, cmark_node *node) {
cmark_footnote *ref;
unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal);
/* empty footnote name, or composed from only whitespace */
if (reflabel == NULL)
return;
assert(map->sorted == NULL);
ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref));
ref->entry.label = reflabel;
ref->node = node;
ref->entry.age = map->size;
ref->entry.next = map->refs;
map->refs = (cmark_map_entry *)ref;
map->size++;
}
cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
return cmark_map_new(mem, footnote_free);
}

View File

@ -0,0 +1,25 @@
#ifndef CMARK_FOOTNOTES_H
#define CMARK_FOOTNOTES_H
#include "map.h"
#ifdef __cplusplus
extern "C" {
#endif
struct cmark_footnote {
cmark_map_entry entry;
cmark_node *node;
unsigned int ix;
};
typedef struct cmark_footnote cmark_footnote;
void cmark_footnote_create(cmark_map *map, cmark_node *node);
cmark_map *cmark_footnote_map_new(cmark_mem *mem);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,57 @@
#ifndef CMARK_HOUDINI_H
#define CMARK_HOUDINI_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include "config.h"
#include "buffer.h"
#ifdef HAVE___BUILTIN_EXPECT
#define likely(x) __builtin_expect((x), 1)
#define unlikely(x) __builtin_expect((x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif
#ifdef HOUDINI_USE_LOCALE
#define _isxdigit(c) isxdigit(c)
#define _isdigit(c) isdigit(c)
#else
/*
* Helper _isdigit methods -- do not trust the current locale
* */
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
#endif
#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
CMARK_GFM_EXPORT
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
CMARK_GFM_EXPORT
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
CMARK_GFM_EXPORT
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size, int secure);
CMARK_GFM_EXPORT
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
CMARK_GFM_EXPORT
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
CMARK_GFM_EXPORT
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,100 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "houdini.h"
/*
* The following characters will not be escaped:
*
* -_.+!*'(),%#@?=;:/,+&$~ alphanum
*
* Note that this character set is the addition of:
*
* - The characters which are safe to be in an URL
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
*
* We assume (lazily) that any RESERVED char that
* appears inside an URL is actually meant to
* have its native function (i.e. as an URL
* component/separator) and hence needs no escaping.
*
* There are two exceptions: the chacters & (amp)
* and ' (single quote) do not appear in the table.
* They are meant to appear in the URL as components,
* yet they require special HTML-entity escaping
* to generate valid HTML markup.
*
* All other characters will be escaped to %XX.
*
*/
static const char HREF_SAFE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
static const uint8_t hex_chars[] = "0123456789ABCDEF";
bufsize_t i = 0, org;
uint8_t hex_str[3];
hex_str[0] = '%';
while (i < size) {
org = i;
while (i < size && HREF_SAFE[src[i]] != 0)
i++;
if (likely(i > org))
cmark_strbuf_put(ob, src + org, i - org);
/* escaping */
if (i >= size)
break;
switch (src[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
cmark_strbuf_puts(ob, "&amp;");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
cmark_strbuf_puts(ob, "&#x27;");
break;
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
* when building GET strings */
#if 0
case ' ':
cmark_strbuf_putc(ob, '+');
break;
#endif
/* every other character goes with a %XX escaping */
default:
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
hex_str[2] = hex_chars[src[i] & 0xF];
cmark_strbuf_put(ob, hex_str, 3);
}
i++;
}
return 1;
}

View File

@ -0,0 +1,66 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "houdini.h"
/**
* According to the OWASP rules:
*
* & --> &amp;
* < --> &lt;
* > --> &gt;
* " --> &quot;
* ' --> &#x27; &apos; is not recommended
* / --> &#x2F; forward slash is included as it helps end an HTML entity
*
*/
static const char HTML_ESCAPE_TABLE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const char *HTML_ESCAPES[] = {"", "&quot;", "&amp;", "&#39;",
"&#47;", "&lt;", "&gt;"};
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
int secure) {
bufsize_t i = 0, org, esc = 0;
while (i < size) {
org = i;
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
i++;
if (i > org)
cmark_strbuf_put(ob, src + org, i - org);
/* escaping */
if (unlikely(i >= size))
break;
/* The forward slash and single quote are only escaped in secure mode */
if ((src[i] == '/' || src[i] == '\'') && !secure) {
cmark_strbuf_putc(ob, src[i]);
} else {
cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
}
i++;
}
return 1;
}
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
return houdini_escape_html0(ob, src, size, 1);
}

View File

@ -0,0 +1,149 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "buffer.h"
#include "houdini.h"
#include "utf8.h"
#include "entities.inc"
/* Binary tree lookup code for entities added by JGM */
static const unsigned char *S_lookup(int i, int low, int hi,
const unsigned char *s, int len) {
int j;
int cmp =
strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
return (const unsigned char *)cmark_entities[i].bytes;
} else if (cmp <= 0 && i > low) {
j = i - ((i - low) / 2);
if (j == i)
j -= 1;
return S_lookup(j, low, i - 1, s, len);
} else if (cmp > 0 && i < hi) {
j = i + ((hi - i) / 2);
if (j == i)
j += 1;
return S_lookup(j, i + 1, hi, s, len);
} else {
return NULL;
}
}
static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
}
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size) {
bufsize_t i = 0;
if (size >= 3 && src[0] == '#') {
int codepoint = 0;
int num_digits = 0;
if (_isdigit(src[1])) {
for (i = 1; i < size && _isdigit(src[i]); ++i) {
codepoint = (codepoint * 10) + (src[i] - '0');
if (codepoint >= 0x110000) {
// Keep counting digits but
// avoid integer overflow.
codepoint = 0x110000;
}
}
num_digits = i - 1;
}
else if (src[1] == 'x' || src[1] == 'X') {
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
if (codepoint >= 0x110000) {
// Keep counting digits but
// avoid integer overflow.
codepoint = 0x110000;
}
}
num_digits = i - 2;
}
if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
codepoint >= 0x110000) {
codepoint = 0xFFFD;
}
cmark_utf8proc_encode_char(codepoint, ob);
return i + 1;
}
}
else {
if (size > CMARK_ENTITY_MAX_LENGTH)
size = CMARK_ENTITY_MAX_LENGTH;
for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
if (src[i] == ' ')
break;
if (src[i] == ';') {
const unsigned char *entity = S_lookup_entity(src, i);
if (entity != NULL) {
cmark_strbuf_puts(ob, (const char *)entity);
return i + 1;
}
break;
}
}
}
return 0;
}
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size) {
bufsize_t i = 0, org, ent;
while (i < size) {
org = i;
while (i < size && src[i] != '&')
i++;
if (likely(i > org)) {
if (unlikely(org == 0)) {
if (i >= size)
return 0;
cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
}
cmark_strbuf_put(ob, src + org, i - org);
}
/* escaping */
if (i >= size)
break;
i++;
ent = houdini_unescape_ent(ob, src + i, size - i);
i += ent;
/* not really an entity */
if (ent == 0)
cmark_strbuf_putc(ob, '&');
}
return 1;
}
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size) {
if (!houdini_unescape_html(ob, src, size))
cmark_strbuf_put(ob, src, size);
}

465
lib/commonmarker/html.c Normal file
View File

@ -0,0 +1,465 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "cmark_ctype.h"
#include "config.h"
#include "cmark-gfm.h"
#include "houdini.h"
#include "scanners.h"
#include "syntax_extension.h"
#include "html.h"
#include "render.h"
// Functions to convert cmark_nodes to HTML strings.
static void escape_html(cmark_strbuf *dest, const unsigned char *source,
bufsize_t length) {
houdini_escape_html0(dest, source, length, 0);
}
static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size_t len) {
cmark_strbuf *html = renderer->html;
cmark_llist *it;
cmark_syntax_extension *ext;
bool filtered;
uint8_t *match;
while (len) {
match = (uint8_t *) memchr(data, '<', len);
if (!match)
break;
if (match != data) {
cmark_strbuf_put(html, data, (bufsize_t)(match - data));
len -= (match - data);
data = match;
}
filtered = false;
for (it = renderer->filter_extensions; it; it = it->next) {
ext = ((cmark_syntax_extension *) it->data);
if (!ext->html_filter_func(ext, data, len)) {
filtered = true;
break;
}
}
if (!filtered) {
cmark_strbuf_putc(html, '<');
} else {
cmark_strbuf_puts(html, "&lt;");
}
++data;
--len;
}
if (len)
cmark_strbuf_put(html, data, (bufsize_t)len);
}
static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) {
if (renderer->written_footnote_ix >= renderer->footnote_ix)
return false;
renderer->written_footnote_ix = renderer->footnote_ix;
cmark_strbuf_puts(html, "<a href=\"#fnref");
char n[32];
snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
cmark_strbuf_puts(html, n);
cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩</a>");
return true;
}
static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
cmark_node *parent;
cmark_node *grandparent;
cmark_strbuf *html = renderer->html;
cmark_llist *it;
cmark_syntax_extension *ext;
char start_heading[] = "<h0";
char end_heading[] = "</h0";
bool tight;
bool filtered;
char buffer[BUFFER_SIZE];
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (renderer->plain == node) { // back at original node
renderer->plain = NULL;
}
if (renderer->plain != NULL) {
switch (node->type) {
case CMARK_NODE_TEXT:
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_INLINE:
escape_html(html, node->as.literal.data, node->as.literal.len);
break;
case CMARK_NODE_LINEBREAK:
case CMARK_NODE_SOFTBREAK:
cmark_strbuf_putc(html, ' ');
break;
default:
break;
}
return 1;
}
if (node->extension && node->extension->html_render_func) {
node->extension->html_render_func(node->extension, renderer, node, ev_type, options);
return 1;
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "<blockquote");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, ">\n");
} else {
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "</blockquote>\n");
}
break;
case CMARK_NODE_LIST: {
cmark_list_type list_type = node->as.list.list_type;
int start = node->as.list.start;
if (entering) {
cmark_html_render_cr(html);
if (list_type == CMARK_BULLET_LIST) {
cmark_strbuf_puts(html, "<ul");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, ">\n");
} else if (start == 1) {
cmark_strbuf_puts(html, "<ol");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, ">\n");
} else {
snprintf(buffer, BUFFER_SIZE, "<ol start=\"%d\"", start);
cmark_strbuf_puts(html, buffer);
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, ">\n");
}
} else {
cmark_strbuf_puts(html,
list_type == CMARK_BULLET_LIST ? "</ul>\n" : "</ol>\n");
}
break;
}
case CMARK_NODE_ITEM:
if (entering) {
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "<li");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_putc(html, '>');
} else {
cmark_strbuf_puts(html, "</li>\n");
}
break;
case CMARK_NODE_HEADING:
if (entering) {
cmark_html_render_cr(html);
start_heading[2] = (char)('0' + node->as.heading.level);
cmark_strbuf_puts(html, start_heading);
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_putc(html, '>');
} else {
end_heading[3] = (char)('0' + node->as.heading.level);
cmark_strbuf_puts(html, end_heading);
cmark_strbuf_puts(html, ">\n");
}
break;
case CMARK_NODE_CODE_BLOCK:
cmark_html_render_cr(html);
if (node->as.code.info.len == 0) {
cmark_strbuf_puts(html, "<pre");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, "><code>");
} else {
bufsize_t first_tag = 0;
while (first_tag < node->as.code.info.len &&
!cmark_isspace(node->as.code.info.data[first_tag])) {
first_tag += 1;
}
if (options & CMARK_OPT_GITHUB_PRE_LANG) {
cmark_strbuf_puts(html, "<pre");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, " lang=\"");
escape_html(html, node->as.code.info.data, first_tag);
if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) {
cmark_strbuf_puts(html, "\" data-meta=\"");
escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1);
}
cmark_strbuf_puts(html, "\"><code>");
} else {
cmark_strbuf_puts(html, "<pre");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, "><code class=\"language-");
escape_html(html, node->as.code.info.data, first_tag);
if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) {
cmark_strbuf_puts(html, "\" data-meta=\"");
escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1);
}
cmark_strbuf_puts(html, "\">");
}
}
escape_html(html, node->as.code.literal.data, node->as.code.literal.len);
cmark_strbuf_puts(html, "</code></pre>\n");
break;
case CMARK_NODE_HTML_BLOCK:
cmark_html_render_cr(html);
if (!(options & CMARK_OPT_UNSAFE)) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else if (renderer->filter_extensions) {
filter_html_block(renderer, node->as.literal.data, node->as.literal.len);
} else {
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
}
cmark_html_render_cr(html);
break;
case CMARK_NODE_CUSTOM_BLOCK:
cmark_html_render_cr(html);
if (entering) {
cmark_strbuf_put(html, node->as.custom.on_enter.data,
node->as.custom.on_enter.len);
} else {
cmark_strbuf_put(html, node->as.custom.on_exit.data,
node->as.custom.on_exit.len);
}
cmark_html_render_cr(html);
break;
case CMARK_NODE_THEMATIC_BREAK:
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "<hr");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, " />\n");
break;
case CMARK_NODE_PARAGRAPH:
parent = cmark_node_parent(node);
grandparent = cmark_node_parent(parent);
if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) {
tight = grandparent->as.list.tight;
} else {
tight = false;
}
if (!tight) {
if (entering) {
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "<p");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_putc(html, '>');
} else {
if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) {
cmark_strbuf_putc(html, ' ');
S_put_footnote_backref(renderer, html);
}
cmark_strbuf_puts(html, "</p>\n");
}
}
break;
case CMARK_NODE_TEXT:
escape_html(html, node->as.literal.data, node->as.literal.len);
break;
case CMARK_NODE_LINEBREAK:
cmark_strbuf_puts(html, "<br />\n");
break;
case CMARK_NODE_SOFTBREAK:
if (options & CMARK_OPT_HARDBREAKS) {
cmark_strbuf_puts(html, "<br />\n");
} else if (options & CMARK_OPT_NOBREAKS) {
cmark_strbuf_putc(html, ' ');
} else {
cmark_strbuf_putc(html, '\n');
}
break;
case CMARK_NODE_CODE:
cmark_strbuf_puts(html, "<code>");
escape_html(html, node->as.literal.data, node->as.literal.len);
cmark_strbuf_puts(html, "</code>");
break;
case CMARK_NODE_HTML_INLINE:
if (!(options & CMARK_OPT_UNSAFE)) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
filtered = false;
for (it = renderer->filter_extensions; it; it = it->next) {
ext = (cmark_syntax_extension *) it->data;
if (!ext->html_filter_func(ext, node->as.literal.data, node->as.literal.len)) {
filtered = true;
break;
}
}
if (!filtered) {
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
} else {
cmark_strbuf_puts(html, "&lt;");
cmark_strbuf_put(html, node->as.literal.data + 1, node->as.literal.len - 1);
}
}
break;
case CMARK_NODE_CUSTOM_INLINE:
if (entering) {
cmark_strbuf_put(html, node->as.custom.on_enter.data,
node->as.custom.on_enter.len);
} else {
cmark_strbuf_put(html, node->as.custom.on_exit.data,
node->as.custom.on_exit.len);
}
break;
case CMARK_NODE_STRONG:
if (entering) {
cmark_strbuf_puts(html, "<strong>");
} else {
cmark_strbuf_puts(html, "</strong>");
}
break;
case CMARK_NODE_EMPH:
if (entering) {
cmark_strbuf_puts(html, "<em>");
} else {
cmark_strbuf_puts(html, "</em>");
}
break;
case CMARK_NODE_LINK:
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
if ((options & CMARK_OPT_UNSAFE) ||
!(scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
if (node->as.link.title.len) {
cmark_strbuf_puts(html, "\" title=\"");
escape_html(html, node->as.link.title.data, node->as.link.title.len);
}
cmark_strbuf_puts(html, "\">");
} else {
cmark_strbuf_puts(html, "</a>");
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
if ((options & CMARK_OPT_UNSAFE) ||
!(scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
cmark_strbuf_puts(html, "\" alt=\"");
renderer->plain = node;
} else {
if (node->as.link.title.len) {
cmark_strbuf_puts(html, "\" title=\"");
escape_html(html, node->as.link.title.data, node->as.link.title.len);
}
cmark_strbuf_puts(html, "\" />");
}
break;
case CMARK_NODE_FOOTNOTE_DEFINITION:
if (entering) {
if (renderer->footnote_ix == 0) {
cmark_strbuf_puts(html, "<section class=\"footnotes\">\n<ol>\n");
}
++renderer->footnote_ix;
cmark_strbuf_puts(html, "<li id=\"fn");
char n[32];
snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
cmark_strbuf_puts(html, n);
cmark_strbuf_puts(html, "\">\n");
} else {
if (S_put_footnote_backref(renderer, html)) {
cmark_strbuf_putc(html, '\n');
}
cmark_strbuf_puts(html, "</li>\n");
}
break;
case CMARK_NODE_FOOTNOTE_REFERENCE:
if (entering) {
cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn");
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
cmark_strbuf_puts(html, "\" id=\"fnref");
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
cmark_strbuf_puts(html, "\">");
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
cmark_strbuf_puts(html, "</a></sup>");
}
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions) {
return cmark_render_html_with_mem(root, options, extensions, cmark_node_mem(root));
}
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem) {
char *result;
cmark_strbuf html = CMARK_BUF_INIT(mem);
cmark_event_type ev_type;
cmark_node *cur;
cmark_html_renderer renderer = {&html, NULL, NULL, 0, 0, NULL};
cmark_iter *iter = cmark_iter_new(root);
for (; extensions; extensions = extensions->next)
if (((cmark_syntax_extension *) extensions->data)->html_filter_func)
renderer.filter_extensions = cmark_llist_append(
mem,
renderer.filter_extensions,
(cmark_syntax_extension *) extensions->data);
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
S_render_node(&renderer, cur, ev_type, options);
}
if (renderer.footnote_ix) {
cmark_strbuf_puts(&html, "</ol>\n</section>\n");
}
result = (char *)cmark_strbuf_detach(&html);
cmark_llist_free(mem, renderer.filter_extensions);
cmark_iter_free(iter);
return result;
}

27
lib/commonmarker/html.h Normal file
View File

@ -0,0 +1,27 @@
#ifndef CMARK_HTML_H
#define CMARK_HTML_H
#include "buffer.h"
#include "node.h"
CMARK_INLINE
static void cmark_html_render_cr(cmark_strbuf *html) {
if (html->size && html->ptr[html->size - 1] != '\n')
cmark_strbuf_putc(html, '\n');
}
#define BUFFER_SIZE 100
CMARK_INLINE
static void cmark_html_render_sourcepos(cmark_node *node, cmark_strbuf *html, int options) {
char buffer[BUFFER_SIZE];
if (CMARK_OPT_SOURCEPOS & options) {
snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"",
cmark_node_get_start_line(node), cmark_node_get_start_column(node),
cmark_node_get_end_line(node), cmark_node_get_end_column(node));
cmark_strbuf_puts(html, buffer);
}
}
#endif

1633
lib/commonmarker/inlines.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,29 @@
#ifndef CMARK_INLINES_H
#define CMARK_INLINES_H
#ifdef __cplusplus
extern "C" {
#endif
#include "references.h"
cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
CMARK_GFM_EXPORT
void cmark_parse_inlines(cmark_parser *parser,
cmark_node *parent,
cmark_map *refmap,
int options);
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
cmark_map *refmap);
void cmark_inlines_add_special_character(unsigned char c, bool emphasis);
void cmark_inlines_remove_special_character(unsigned char c, bool emphasis);
#ifdef __cplusplus
}
#endif
#endif

159
lib/commonmarker/iterator.c Normal file
View File

@ -0,0 +1,159 @@
#include <assert.h>
#include <stdlib.h>
#include "config.h"
#include "node.h"
#include "cmark-gfm.h"
#include "iterator.h"
cmark_iter *cmark_iter_new(cmark_node *root) {
if (root == NULL) {
return NULL;
}
cmark_mem *mem = root->content.mem;
cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter));
iter->mem = mem;
iter->root = root;
iter->cur.ev_type = CMARK_EVENT_NONE;
iter->cur.node = NULL;
iter->next.ev_type = CMARK_EVENT_ENTER;
iter->next.node = root;
return iter;
}
void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); }
static bool S_is_leaf(cmark_node *node) {
switch (node->type) {
case CMARK_NODE_HTML_BLOCK:
case CMARK_NODE_THEMATIC_BREAK:
case CMARK_NODE_CODE_BLOCK:
case CMARK_NODE_TEXT:
case CMARK_NODE_SOFTBREAK:
case CMARK_NODE_LINEBREAK:
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_INLINE:
return 1;
}
return 0;
}
cmark_event_type cmark_iter_next(cmark_iter *iter) {
cmark_event_type ev_type = iter->next.ev_type;
cmark_node *node = iter->next.node;
iter->cur.ev_type = ev_type;
iter->cur.node = node;
if (ev_type == CMARK_EVENT_DONE) {
return ev_type;
}
/* roll forward to next item, setting both fields */
if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) {
if (node->first_child == NULL) {
/* stay on this node but exit */
iter->next.ev_type = CMARK_EVENT_EXIT;
} else {
iter->next.ev_type = CMARK_EVENT_ENTER;
iter->next.node = node->first_child;
}
} else if (node == iter->root) {
/* don't move past root */
iter->next.ev_type = CMARK_EVENT_DONE;
iter->next.node = NULL;
} else if (node->next) {
iter->next.ev_type = CMARK_EVENT_ENTER;
iter->next.node = node->next;
} else if (node->parent) {
iter->next.ev_type = CMARK_EVENT_EXIT;
iter->next.node = node->parent;
} else {
assert(false);
iter->next.ev_type = CMARK_EVENT_DONE;
iter->next.node = NULL;
}
return ev_type;
}
void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
cmark_event_type event_type) {
iter->next.ev_type = event_type;
iter->next.node = current;
cmark_iter_next(iter);
}
cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; }
cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) {
return iter->cur.ev_type;
}
cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; }
void cmark_consolidate_text_nodes(cmark_node *root) {
if (root == NULL) {
return;
}
cmark_iter *iter = cmark_iter_new(root);
cmark_strbuf buf = CMARK_BUF_INIT(iter->mem);
cmark_event_type ev_type;
cmark_node *cur, *tmp, *next;
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT &&
cur->next && cur->next->type == CMARK_NODE_TEXT) {
cmark_strbuf_clear(&buf);
cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
tmp = cur->next;
while (tmp && tmp->type == CMARK_NODE_TEXT) {
cmark_iter_next(iter); // advance pointer
cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
cur->end_column = tmp->end_column;
next = tmp->next;
cmark_node_free(tmp);
tmp = next;
}
cmark_chunk_free(iter->mem, &cur->as.literal);
cur->as.literal = cmark_chunk_buf_detach(&buf);
}
}
cmark_strbuf_free(&buf);
cmark_iter_free(iter);
}
void cmark_node_own(cmark_node *root) {
if (root == NULL) {
return;
}
cmark_iter *iter = cmark_iter_new(root);
cmark_event_type ev_type;
cmark_node *cur;
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
if (ev_type == CMARK_EVENT_ENTER) {
switch (cur->type) {
case CMARK_NODE_TEXT:
case CMARK_NODE_HTML_INLINE:
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_BLOCK:
cmark_chunk_to_cstr(iter->mem, &cur->as.literal);
break;
case CMARK_NODE_LINK:
cmark_chunk_to_cstr(iter->mem, &cur->as.link.url);
cmark_chunk_to_cstr(iter->mem, &cur->as.link.title);
break;
case CMARK_NODE_CUSTOM_INLINE:
cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_enter);
cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_exit);
break;
}
}
}
cmark_iter_free(iter);
}

View File

@ -0,0 +1,26 @@
#ifndef CMARK_ITERATOR_H
#define CMARK_ITERATOR_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cmark-gfm.h"
typedef struct {
cmark_event_type ev_type;
cmark_node *node;
} cmark_iter_state;
struct cmark_iter {
cmark_mem *mem;
cmark_node *root;
cmark_iter_state cur;
cmark_iter_state next;
};
#ifdef __cplusplus
}
#endif
#endif

466
lib/commonmarker/latex.c Normal file
View File

@ -0,0 +1,466 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "config.h"
#include "cmark-gfm.h"
#include "node.h"
#include "buffer.h"
#include "utf8.h"
#include "scanners.h"
#include "render.h"
#include "syntax_extension.h"
#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, node, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
#define LIST_NUMBER_STRING_SIZE 20
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
cmark_escaping escape,
int32_t c, unsigned char nextc) {
if (escape == LITERAL) {
cmark_render_code_point(renderer, c);
return;
}
switch (c) {
case 123: // '{'
case 125: // '}'
case 35: // '#'
case 37: // '%'
case 38: // '&'
cmark_render_ascii(renderer, "\\");
cmark_render_code_point(renderer, c);
break;
case 36: // '$'
case 95: // '_'
if (escape == NORMAL) {
cmark_render_ascii(renderer, "\\");
}
cmark_render_code_point(renderer, c);
break;
case 45: // '-'
if (nextc == 45) { // prevent ligature
cmark_render_ascii(renderer, "-{}");
} else {
cmark_render_ascii(renderer, "-");
}
break;
case 126: // '~'
if (escape == NORMAL) {
cmark_render_ascii(renderer, "\\textasciitilde{}");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 94: // '^'
cmark_render_ascii(renderer, "\\^{}");
break;
case 92: // '\\'
if (escape == URL) {
// / acts as path sep even on windows:
cmark_render_ascii(renderer, "/");
} else {
cmark_render_ascii(renderer, "\\textbackslash{}");
}
break;
case 124: // '|'
cmark_render_ascii(renderer, "\\textbar{}");
break;
case 60: // '<'
cmark_render_ascii(renderer, "\\textless{}");
break;
case 62: // '>'
cmark_render_ascii(renderer, "\\textgreater{}");
break;
case 91: // '['
case 93: // ']'
cmark_render_ascii(renderer, "{");
cmark_render_code_point(renderer, c);
cmark_render_ascii(renderer, "}");
break;
case 34: // '"'
cmark_render_ascii(renderer, "\\textquotedbl{}");
// requires \usepackage[T1]{fontenc}
break;
case 39: // '\''
cmark_render_ascii(renderer, "\\textquotesingle{}");
// requires \usepackage{textcomp}
break;
case 160: // nbsp
cmark_render_ascii(renderer, "~");
break;
case 8230: // hellip
cmark_render_ascii(renderer, "\\ldots{}");
break;
case 8216: // lsquo
if (escape == NORMAL) {
cmark_render_ascii(renderer, "`");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8217: // rsquo
if (escape == NORMAL) {
cmark_render_ascii(renderer, "\'");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8220: // ldquo
if (escape == NORMAL) {
cmark_render_ascii(renderer, "``");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8221: // rdquo
if (escape == NORMAL) {
cmark_render_ascii(renderer, "''");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8212: // emdash
if (escape == NORMAL) {
cmark_render_ascii(renderer, "---");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8211: // endash
if (escape == NORMAL) {
cmark_render_ascii(renderer, "--");
} else {
cmark_render_code_point(renderer, c);
}
break;
default:
cmark_render_code_point(renderer, c);
}
}
typedef enum {
NO_LINK,
URL_AUTOLINK,
EMAIL_AUTOLINK,
NORMAL_LINK,
INTERNAL_LINK
} link_type;
static link_type get_link_type(cmark_node *node) {
size_t title_len, url_len;
cmark_node *link_text;
char *realurl;
int realurllen;
bool isemail = false;
if (node->type != CMARK_NODE_LINK) {
return NO_LINK;
}
const char *url = cmark_node_get_url(node);
cmark_chunk url_chunk = cmark_chunk_literal(url);
if (url && *url == '#') {
return INTERNAL_LINK;
}
url_len = strlen(url);
if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
return NO_LINK;
}
const char *title = cmark_node_get_title(node);
title_len = strlen(title);
// if it has a title, we can't treat it as an autolink:
if (title_len == 0) {
link_text = node->first_child;
cmark_consolidate_text_nodes(link_text);
if (!link_text)
return NO_LINK;
realurl = (char *)url;
realurllen = (int)url_len;
if (strncmp(realurl, "mailto:", 7) == 0) {
realurl += 7;
realurllen -= 7;
isemail = true;
}
if (realurllen == link_text->as.literal.len &&
strncmp(realurl, (char *)link_text->as.literal.data,
link_text->as.literal.len) == 0) {
if (isemail) {
return EMAIL_AUTOLINK;
} else {
return URL_AUTOLINK;
}
}
}
return NORMAL_LINK;
}
static int S_get_enumlevel(cmark_node *node) {
int enumlevel = 0;
cmark_node *tmp = node;
while (tmp) {
if (tmp->type == CMARK_NODE_LIST &&
cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) {
enumlevel++;
}
tmp = tmp->parent;
}
return enumlevel;
}
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
int list_number;
int enumlevel;
char list_number_string[LIST_NUMBER_STRING_SIZE];
bool entering = (ev_type == CMARK_EVENT_ENTER);
cmark_list_type list_type;
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
if (node->extension && node->extension->latex_render_func) {
node->extension->latex_render_func(node->extension, renderer, node, ev_type, options);
return 1;
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
LIT("\\begin{quote}");
CR();
} else {
LIT("\\end{quote}");
BLANKLINE();
}
break;
case CMARK_NODE_LIST:
list_type = cmark_node_get_list_type(node);
if (entering) {
LIT("\\begin{");
LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
LIT("}");
CR();
list_number = cmark_node_get_list_start(node);
if (list_number > 1) {
enumlevel = S_get_enumlevel(node);
// latex normally supports only five levels
if (enumlevel >= 1 && enumlevel <= 5) {
snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",
list_number);
LIT("\\setcounter{enum");
switch (enumlevel) {
case 1: LIT("i"); break;
case 2: LIT("ii"); break;
case 3: LIT("iii"); break;
case 4: LIT("iv"); break;
case 5: LIT("v"); break;
default: LIT("i"); break;
}
LIT("}{");
OUT(list_number_string, false, NORMAL);
LIT("}");
}
CR();
}
} else {
LIT("\\end{");
LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
LIT("}");
BLANKLINE();
}
break;
case CMARK_NODE_ITEM:
if (entering) {
LIT("\\item ");
} else {
CR();
}
break;
case CMARK_NODE_HEADING:
if (entering) {
switch (cmark_node_get_heading_level(node)) {
case 1:
LIT("\\section");
break;
case 2:
LIT("\\subsection");
break;
case 3:
LIT("\\subsubsection");
break;
case 4:
LIT("\\paragraph");
break;
case 5:
LIT("\\subparagraph");
break;
}
LIT("{");
} else {
LIT("}");
BLANKLINE();
}
break;
case CMARK_NODE_CODE_BLOCK:
CR();
LIT("\\begin{verbatim}");
CR();
OUT(cmark_node_get_literal(node), false, LITERAL);
CR();
LIT("\\end{verbatim}");
BLANKLINE();
break;
case CMARK_NODE_HTML_BLOCK:
break;
case CMARK_NODE_CUSTOM_BLOCK:
CR();
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
CR();
break;
case CMARK_NODE_THEMATIC_BREAK:
BLANKLINE();
LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}");
BLANKLINE();
break;
case CMARK_NODE_PARAGRAPH:
if (!entering) {
BLANKLINE();
}
break;
case CMARK_NODE_TEXT:
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
LIT("\\\\");
CR();
break;
case CMARK_NODE_SOFTBREAK:
if (options & CMARK_OPT_HARDBREAKS) {
LIT("\\\\");
CR();
} else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
CR();
} else {
OUT(" ", allow_wrap, NORMAL);
}
break;
case CMARK_NODE_CODE:
LIT("\\texttt{");
OUT(cmark_node_get_literal(node), false, NORMAL);
LIT("}");
break;
case CMARK_NODE_HTML_INLINE:
break;
case CMARK_NODE_CUSTOM_INLINE:
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
break;
case CMARK_NODE_STRONG:
if (entering) {
LIT("\\textbf{");
} else {
LIT("}");
}
break;
case CMARK_NODE_EMPH:
if (entering) {
LIT("\\emph{");
} else {
LIT("}");
}
break;
case CMARK_NODE_LINK:
if (entering) {
const char *url = cmark_node_get_url(node);
// requires \usepackage{hyperref}
switch (get_link_type(node)) {
case URL_AUTOLINK:
LIT("\\url{");
OUT(url, false, URL);
LIT("}");
return 0; // Don't process further nodes to avoid double-rendering artefacts
case EMAIL_AUTOLINK:
LIT("\\href{");
OUT(url, false, URL);
LIT("}\\nolinkurl{");
break;
case NORMAL_LINK:
LIT("\\href{");
OUT(url, false, URL);
LIT("}{");
break;
case INTERNAL_LINK:
LIT("\\protect\\hyperlink{");
OUT(url + 1, false, URL);
LIT("}{");
break;
case NO_LINK:
LIT("{"); // error?
}
} else {
LIT("}");
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
LIT("\\protect\\includegraphics{");
// requires \include{graphicx}
OUT(cmark_node_get_url(node), false, URL);
LIT("}");
return 0;
}
break;
case CMARK_NODE_FOOTNOTE_DEFINITION:
case CMARK_NODE_FOOTNOTE_REFERENCE:
// TODO
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_latex(cmark_node *root, int options, int width) {
return cmark_render_latex_with_mem(root, options, width, cmark_node_mem(root));
}
char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
return cmark_render(mem, root, options, width, outc, S_render_node);
}

View File

@ -0,0 +1,10 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=@CMAKE_INSTALL_PREFIX@
libdir=@CMAKE_INSTALL_PREFIX@/@libdir@
includedir=@CMAKE_INSTALL_PREFIX@/include
Name: libcmark-gfm
Description: CommonMark parsing, rendering, and manipulation with GitHub Flavored Markdown extensions
Version: @PROJECT_VERSION@
Libs: -L${libdir} -lcmark-gfm -lcmark-gfm-extensions
Cflags: -I${includedir}

View File

@ -0,0 +1,37 @@
#include <stdlib.h>
#include "cmark-gfm.h"
cmark_llist *cmark_llist_append(cmark_mem *mem, cmark_llist *head, void *data) {
cmark_llist *tmp;
cmark_llist *new_node = (cmark_llist *) mem->calloc(1, sizeof(cmark_llist));
new_node->data = data;
new_node->next = NULL;
if (!head)
return new_node;
for (tmp = head; tmp->next; tmp=tmp->next);
tmp->next = new_node;
return head;
}
void cmark_llist_free_full(cmark_mem *mem, cmark_llist *head, cmark_free_func free_func) {
cmark_llist *tmp, *prev;
for (tmp = head; tmp;) {
if (free_func)
free_func(mem, tmp->data);
prev = tmp;
tmp = tmp->next;
mem->free(prev);
}
}
void cmark_llist_free(cmark_mem *mem, cmark_llist *head) {
cmark_llist_free_full(mem, head, NULL);
}

328
lib/commonmarker/main.c Normal file
View File

@ -0,0 +1,328 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include "config.h"
#include "cmark-gfm.h"
#include "node.h"
#include "cmark-gfm-extension_api.h"
#include "syntax_extension.h"
#include "parser.h"
#include "registry.h"
#include "../extensions/cmark-gfm-core-extensions.h"
#if defined(__OpenBSD__)
# include <sys/param.h>
# if OpenBSD >= 201605
# define USE_PLEDGE
# include <unistd.h>
# endif
#endif
#if defined(__OpenBSD__)
# include <sys/param.h>
# if OpenBSD >= 201605
# define USE_PLEDGE
# include <unistd.h>
# endif
#endif
#if defined(_WIN32) && !defined(__CYGWIN__)
#include <io.h>
#include <fcntl.h>
#endif
typedef enum {
FORMAT_NONE,
FORMAT_HTML,
FORMAT_XML,
FORMAT_MAN,
FORMAT_COMMONMARK,
FORMAT_PLAINTEXT,
FORMAT_LATEX
} writer_format;
void print_usage() {
printf("Usage: cmark-gfm [FILE*]\n");
printf("Options:\n");
printf(" --to, -t FORMAT Specify output format (html, xml, man, "
"commonmark, plaintext, latex)\n");
printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n");
printf(" --sourcepos Include source position attribute\n");
printf(" --hardbreaks Treat newlines as hard line breaks\n");
printf(" --nobreaks Render soft line breaks as spaces\n");
printf(" --unsafe Render raw HTML and dangerous URLs\n");
printf(" --smart Use smart punctuation\n");
printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n");
printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n");
printf(" --extension, -e EXTENSION_NAME Specify an extension name to use\n");
printf(" --list-extensions List available extensions and quit\n");
printf(" --strikethrough-double-tilde Only parse strikethrough (if enabled)\n");
printf(" with two tildes\n");
printf(" --table-prefer-style-attributes Use style attributes to align table cells\n"
" instead of align attributes.\n");
printf(" --full-info-string Include remainder of code block info\n"
" string in a separate attribute.\n");
printf(" --help, -h Print usage information\n");
printf(" --version Print version\n");
}
static bool print_document(cmark_node *document, writer_format writer,
int options, int width, cmark_parser *parser) {
char *result;
cmark_mem *mem = cmark_get_default_mem_allocator();
switch (writer) {
case FORMAT_HTML:
result = cmark_render_html_with_mem(document, options, parser->syntax_extensions, mem);
break;
case FORMAT_XML:
result = cmark_render_xml_with_mem(document, options, mem);
break;
case FORMAT_MAN:
result = cmark_render_man_with_mem(document, options, width, mem);
break;
case FORMAT_COMMONMARK:
result = cmark_render_commonmark_with_mem(document, options, width, mem);
break;
case FORMAT_PLAINTEXT:
result = cmark_render_plaintext_with_mem(document, options, width, mem);
break;
case FORMAT_LATEX:
result = cmark_render_latex_with_mem(document, options, width, mem);
break;
default:
fprintf(stderr, "Unknown format %d\n", writer);
return false;
}
printf("%s", result);
mem->free(result);
return true;
}
static void print_extensions(void) {
cmark_llist *syntax_extensions;
cmark_llist *tmp;
printf ("Available extensions:\nfootnotes\n");
cmark_mem *mem = cmark_get_default_mem_allocator();
syntax_extensions = cmark_list_syntax_extensions(mem);
for (tmp = syntax_extensions; tmp; tmp=tmp->next) {
cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
printf("%s\n", ext->name);
}
cmark_llist_free(mem, syntax_extensions);
}
int main(int argc, char *argv[]) {
int i, numfps = 0;
int *files;
char buffer[4096];
cmark_parser *parser = NULL;
size_t bytes;
cmark_node *document = NULL;
int width = 0;
char *unparsed;
writer_format writer = FORMAT_HTML;
int options = CMARK_OPT_DEFAULT;
int res = 1;
#ifdef USE_PLEDGE
if (pledge("stdio rpath", NULL) != 0) {
perror("pledge");
return 1;
}
#endif
cmark_gfm_core_extensions_ensure_registered();
#ifdef USE_PLEDGE
if (pledge("stdio rpath", NULL) != 0) {
perror("pledge");
return 1;
}
#endif
#if defined(_WIN32) && !defined(__CYGWIN__)
_setmode(_fileno(stdin), _O_BINARY);
_setmode(_fileno(stdout), _O_BINARY);
#endif
files = (int *)calloc(argc, sizeof(*files));
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "--version") == 0) {
printf("cmark-gfm %s", CMARK_GFM_VERSION_STRING);
printf(" - CommonMark with GitHub Flavored Markdown converter\n(C) 2014-2016 John MacFarlane\n");
goto success;
} else if (strcmp(argv[i], "--list-extensions") == 0) {
print_extensions();
goto success;
} else if (strcmp(argv[i], "--full-info-string") == 0) {
options |= CMARK_OPT_FULL_INFO_STRING;
} else if (strcmp(argv[i], "--table-prefer-style-attributes") == 0) {
options |= CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES;
} else if (strcmp(argv[i], "--strikethrough-double-tilde") == 0) {
options |= CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE;
} else if (strcmp(argv[i], "--sourcepos") == 0) {
options |= CMARK_OPT_SOURCEPOS;
} else if (strcmp(argv[i], "--hardbreaks") == 0) {
options |= CMARK_OPT_HARDBREAKS;
} else if (strcmp(argv[i], "--nobreaks") == 0) {
options |= CMARK_OPT_NOBREAKS;
} else if (strcmp(argv[i], "--smart") == 0) {
options |= CMARK_OPT_SMART;
} else if (strcmp(argv[i], "--github-pre-lang") == 0) {
options |= CMARK_OPT_GITHUB_PRE_LANG;
} else if (strcmp(argv[i], "--unsafe") == 0) {
options |= CMARK_OPT_UNSAFE;
} else if (strcmp(argv[i], "--validate-utf8") == 0) {
options |= CMARK_OPT_VALIDATE_UTF8;
} else if (strcmp(argv[i], "--liberal-html-tag") == 0) {
options |= CMARK_OPT_LIBERAL_HTML_TAG;
} else if ((strcmp(argv[i], "--help") == 0) ||
(strcmp(argv[i], "-h") == 0)) {
print_usage();
goto success;
} else if (strcmp(argv[i], "--width") == 0) {
i += 1;
if (i < argc) {
width = (int)strtol(argv[i], &unparsed, 10);
if (unparsed && strlen(unparsed) > 0) {
fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i],
unparsed);
goto failure;
}
} else {
fprintf(stderr, "--width requires an argument\n");
goto failure;
}
} else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) {
i += 1;
if (i < argc) {
if (strcmp(argv[i], "man") == 0) {
writer = FORMAT_MAN;
} else if (strcmp(argv[i], "html") == 0) {
writer = FORMAT_HTML;
} else if (strcmp(argv[i], "xml") == 0) {
writer = FORMAT_XML;
} else if (strcmp(argv[i], "commonmark") == 0) {
writer = FORMAT_COMMONMARK;
} else if (strcmp(argv[i], "plaintext") == 0) {
writer = FORMAT_PLAINTEXT;
} else if (strcmp(argv[i], "latex") == 0) {
writer = FORMAT_LATEX;
} else {
fprintf(stderr, "Unknown format %s\n", argv[i]);
goto failure;
}
} else {
fprintf(stderr, "No argument provided for %s\n", argv[i - 1]);
goto failure;
}
} else if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) {
i += 1; // Simpler to handle extensions in a second pass, as we can directly register
// them with the parser.
if (i < argc && strcmp(argv[i], "footnotes") == 0) {
options |= CMARK_OPT_FOOTNOTES;
}
} else if (*argv[i] == '-') {
print_usage();
goto failure;
} else { // treat as file argument
files[numfps++] = i;
}
}
#if DEBUG
parser = cmark_parser_new(options);
#else
parser = cmark_parser_new_with_mem(options, cmark_get_arena_mem_allocator());
#endif
for (i = 1; i < argc; i++) {
if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) {
i += 1;
if (i < argc) {
if (strcmp(argv[i], "footnotes") == 0) {
continue;
}
cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(argv[i]);
if (!syntax_extension) {
fprintf(stderr, "Unknown extension %s\n", argv[i]);
goto failure;
}
cmark_parser_attach_syntax_extension(parser, syntax_extension);
} else {
fprintf(stderr, "No argument provided for %s\n", argv[i - 1]);
goto failure;
}
}
}
for (i = 0; i < numfps; i++) {
FILE *fp = fopen(argv[files[i]], "rb");
if (fp == NULL) {
fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]],
strerror(errno));
goto failure;
}
while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
cmark_parser_feed(parser, buffer, bytes);
if (bytes < sizeof(buffer)) {
break;
}
}
fclose(fp);
}
if (numfps == 0) {
while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
cmark_parser_feed(parser, buffer, bytes);
if (bytes < sizeof(buffer)) {
break;
}
}
}
#ifdef USE_PLEDGE
if (pledge("stdio", NULL) != 0) {
perror("pledge");
return 1;
}
#endif
document = cmark_parser_finish(parser);
if (!document || !print_document(document, writer, options, width, parser))
goto failure;
success:
res = 0;
failure:
#if DEBUG
if (parser)
cmark_parser_free(parser);
if (document)
cmark_node_free(document);
#else
cmark_arena_reset();
#endif
cmark_release_plugins();
free(files);
return res;
}

278
lib/commonmarker/man.c Normal file
View File

@ -0,0 +1,278 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "config.h"
#include "cmark-gfm.h"
#include "node.h"
#include "buffer.h"
#include "utf8.h"
#include "render.h"
#include "syntax_extension.h"
#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, node, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
#define LIST_NUMBER_SIZE 20
// Functions to convert cmark_nodes to groff man strings.
static void S_outc(cmark_renderer *renderer, cmark_node *node,
cmark_escaping escape, int32_t c,
unsigned char nextc) {
(void)(nextc);
if (escape == LITERAL) {
cmark_render_code_point(renderer, c);
return;
}
switch (c) {
case 46:
if (renderer->begin_line) {
cmark_render_ascii(renderer, "\\&.");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 39:
if (renderer->begin_line) {
cmark_render_ascii(renderer, "\\&'");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 45:
cmark_render_ascii(renderer, "\\-");
break;
case 92:
cmark_render_ascii(renderer, "\\e");
break;
case 8216: // left single quote
cmark_render_ascii(renderer, "\\[oq]");
break;
case 8217: // right single quote
cmark_render_ascii(renderer, "\\[cq]");
break;
case 8220: // left double quote
cmark_render_ascii(renderer, "\\[lq]");
break;
case 8221: // right double quote
cmark_render_ascii(renderer, "\\[rq]");
break;
case 8212: // em dash
cmark_render_ascii(renderer, "\\[em]");
break;
case 8211: // en dash
cmark_render_ascii(renderer, "\\[en]");
break;
default:
cmark_render_code_point(renderer, c);
}
}
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
cmark_node *tmp;
int list_number;
bool entering = (ev_type == CMARK_EVENT_ENTER);
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
if (node->extension && node->extension->man_render_func) {
node->extension->man_render_func(node->extension, renderer, node, ev_type, options);
return 1;
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
if (entering) {
/* Define a strikethrough macro */
/* Commenting out because this makes tests fail
LIT(".de ST");
CR();
LIT(".nr ww \\w'\\\\$1'");
CR();
LIT("\\Z@\\v'-.25m'\\l'\\\\n[ww]u'@\\\\$1");
CR();
LIT("..");
CR();
*/
}
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
CR();
LIT(".RS");
CR();
} else {
CR();
LIT(".RE");
CR();
}
break;
case CMARK_NODE_LIST:
break;
case CMARK_NODE_ITEM:
if (entering) {
CR();
LIT(".IP ");
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
LIT("\\[bu] 2");
} else {
list_number = cmark_node_get_list_start(node->parent);
tmp = node;
while (tmp->prev) {
tmp = tmp->prev;
list_number += 1;
}
char list_number_s[LIST_NUMBER_SIZE];
snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number);
LIT(list_number_s);
}
CR();
} else {
CR();
}
break;
case CMARK_NODE_HEADING:
if (entering) {
CR();
LIT(cmark_node_get_heading_level(node) == 1 ? ".SH" : ".SS");
CR();
} else {
CR();
}
break;
case CMARK_NODE_CODE_BLOCK:
CR();
LIT(".IP\n.nf\n\\f[C]\n");
OUT(cmark_node_get_literal(node), false, NORMAL);
CR();
LIT("\\f[]\n.fi");
CR();
break;
case CMARK_NODE_HTML_BLOCK:
break;
case CMARK_NODE_CUSTOM_BLOCK:
CR();
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
CR();
break;
case CMARK_NODE_THEMATIC_BREAK:
CR();
LIT(".PP\n * * * * *");
CR();
break;
case CMARK_NODE_PARAGRAPH:
if (entering) {
// no blank line if first paragraph in list:
if (node->parent && node->parent->type == CMARK_NODE_ITEM &&
node->prev == NULL) {
// no blank line or .PP
} else {
CR();
LIT(".PP");
CR();
}
} else {
CR();
}
break;
case CMARK_NODE_TEXT:
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
LIT(".PD 0\n.P\n.PD");
CR();
break;
case CMARK_NODE_SOFTBREAK:
if (options & CMARK_OPT_HARDBREAKS) {
LIT(".PD 0\n.P\n.PD");
CR();
} else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
CR();
} else {
OUT(" ", allow_wrap, LITERAL);
}
break;
case CMARK_NODE_CODE:
LIT("\\f[C]");
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
LIT("\\f[]");
break;
case CMARK_NODE_HTML_INLINE:
break;
case CMARK_NODE_CUSTOM_INLINE:
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
break;
case CMARK_NODE_STRONG:
if (entering) {
LIT("\\f[B]");
} else {
LIT("\\f[]");
}
break;
case CMARK_NODE_EMPH:
if (entering) {
LIT("\\f[I]");
} else {
LIT("\\f[]");
}
break;
case CMARK_NODE_LINK:
if (!entering) {
LIT(" (");
OUT(cmark_node_get_url(node), allow_wrap, URL);
LIT(")");
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
LIT("[IMAGE: ");
} else {
LIT("]");
}
break;
case CMARK_NODE_FOOTNOTE_DEFINITION:
case CMARK_NODE_FOOTNOTE_REFERENCE:
// TODO
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_man(cmark_node *root, int options, int width) {
return cmark_render_man_with_mem(root, options, width, cmark_node_mem(root));
}
char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
return cmark_render(mem, root, options, width, S_outc, S_render_node);
}

122
lib/commonmarker/map.c Normal file
View File

@ -0,0 +1,122 @@
#include "map.h"
#include "utf8.h"
#include "parser.h"
// normalize map label: collapse internal whitespace to single space,
// remove leading/trailing whitespace, case fold
// Return NULL if the label is actually empty (i.e. composed solely from
// whitespace)
unsigned char *normalize_map_label(cmark_mem *mem, cmark_chunk *ref) {
cmark_strbuf normalized = CMARK_BUF_INIT(mem);
unsigned char *result;
if (ref == NULL)
return NULL;
if (ref->len == 0)
return NULL;
cmark_utf8proc_case_fold(&normalized, ref->data, ref->len);
cmark_strbuf_trim(&normalized);
cmark_strbuf_normalize_whitespace(&normalized);
result = cmark_strbuf_detach(&normalized);
assert(result);
if (result[0] == '\0') {
mem->free(result);
return NULL;
}
return result;
}
static int
labelcmp(const unsigned char *a, const unsigned char *b) {
return strcmp((const char *)a, (const char *)b);
}
static int
refcmp(const void *p1, const void *p2) {
cmark_map_entry *r1 = *(cmark_map_entry **)p1;
cmark_map_entry *r2 = *(cmark_map_entry **)p2;
int res = labelcmp(r1->label, r2->label);
return res ? res : ((int)r1->age - (int)r2->age);
}
static int
refsearch(const void *label, const void *p2) {
cmark_map_entry *ref = *(cmark_map_entry **)p2;
return labelcmp((const unsigned char *)label, ref->label);
}
static void sort_map(cmark_map *map) {
unsigned int i = 0, last = 0, size = map->size;
cmark_map_entry *r = map->refs, **sorted = NULL;
sorted = (cmark_map_entry **)map->mem->calloc(size, sizeof(cmark_map_entry *));
while (r) {
sorted[i++] = r;
r = r->next;
}
qsort(sorted, size, sizeof(cmark_map_entry *), refcmp);
for (i = 1; i < size; i++) {
if (labelcmp(sorted[i]->label, sorted[last]->label) != 0)
sorted[++last] = sorted[i];
}
map->sorted = sorted;
map->size = last + 1;
}
cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label) {
cmark_map_entry **ref = NULL;
unsigned char *norm;
if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
return NULL;
if (map == NULL || !map->size)
return NULL;
norm = normalize_map_label(map->mem, label);
if (norm == NULL)
return NULL;
if (!map->sorted)
sort_map(map);
ref = (cmark_map_entry **)bsearch(norm, map->sorted, map->size, sizeof(cmark_map_entry *), refsearch);
map->mem->free(norm);
if (!ref)
return NULL;
return ref[0];
}
void cmark_map_free(cmark_map *map) {
cmark_map_entry *ref;
if (map == NULL)
return;
ref = map->refs;
while (ref) {
cmark_map_entry *next = ref->next;
map->free(map, ref);
ref = next;
}
map->mem->free(map->sorted);
map->mem->free(map);
}
cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free) {
cmark_map *map = (cmark_map *)mem->calloc(1, sizeof(cmark_map));
map->mem = mem;
map->free = free;
return map;
}

41
lib/commonmarker/map.h Normal file
View File

@ -0,0 +1,41 @@
#ifndef CMARK_MAP_H
#define CMARK_MAP_H
#include "chunk.h"
#ifdef __cplusplus
extern "C" {
#endif
struct cmark_map_entry {
struct cmark_map_entry *next;
unsigned char *label;
unsigned int age;
};
typedef struct cmark_map_entry cmark_map_entry;
struct cmark_map;
typedef void (*cmark_map_free_f)(struct cmark_map *, cmark_map_entry *);
struct cmark_map {
cmark_mem *mem;
cmark_map_entry *refs;
cmark_map_entry **sorted;
unsigned int size;
cmark_map_free_f free;
};
typedef struct cmark_map cmark_map;
unsigned char *normalize_map_label(cmark_mem *mem, cmark_chunk *ref);
cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free);
void cmark_map_free(cmark_map *map);
cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label);
#ifdef __cplusplus
}
#endif
#endif

979
lib/commonmarker/node.c Normal file
View File

@ -0,0 +1,979 @@
#include <stdlib.h>
#include <string.h>
#include "config.h"
#include "node.h"
#include "syntax_extension.h"
static void S_node_unlink(cmark_node *node);
#define NODE_MEM(node) cmark_node_mem(node)
bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) {
if (child_type == CMARK_NODE_DOCUMENT) {
return false;
}
if (node->extension && node->extension->can_contain_func) {
return node->extension->can_contain_func(node->extension, node, child_type) != 0;
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
case CMARK_NODE_BLOCK_QUOTE:
case CMARK_NODE_FOOTNOTE_DEFINITION:
case CMARK_NODE_ITEM:
return CMARK_NODE_TYPE_BLOCK_P(child_type) && child_type != CMARK_NODE_ITEM;
case CMARK_NODE_LIST:
return child_type == CMARK_NODE_ITEM;
case CMARK_NODE_CUSTOM_BLOCK:
return true;
case CMARK_NODE_PARAGRAPH:
case CMARK_NODE_HEADING:
case CMARK_NODE_EMPH:
case CMARK_NODE_STRONG:
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
case CMARK_NODE_CUSTOM_INLINE:
return CMARK_NODE_TYPE_INLINE_P(child_type);
default:
break;
}
return false;
}
static bool S_can_contain(cmark_node *node, cmark_node *child) {
cmark_node *cur;
if (node == NULL || child == NULL) {
return false;
}
if (NODE_MEM(node) != NODE_MEM(child)) {
return 0;
}
// Verify that child is not an ancestor of node or equal to node.
cur = node;
do {
if (cur == child) {
return false;
}
cur = cur->parent;
} while (cur != NULL);
return cmark_node_can_contain_type(node, (cmark_node_type) child->type);
}
cmark_node *cmark_node_new_with_mem_and_ext(cmark_node_type type, cmark_mem *mem, cmark_syntax_extension *extension) {
cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node));
cmark_strbuf_init(mem, &node->content, 0);
node->type = (uint16_t)type;
node->extension = extension;
switch (node->type) {
case CMARK_NODE_HEADING:
node->as.heading.level = 1;
break;
case CMARK_NODE_LIST: {
cmark_list *list = &node->as.list;
list->list_type = CMARK_BULLET_LIST;
list->start = 0;
list->tight = false;
break;
}
default:
break;
}
if (node->extension && node->extension->opaque_alloc_func) {
node->extension->opaque_alloc_func(node->extension, mem, node);
}
return node;
}
cmark_node *cmark_node_new_with_ext(cmark_node_type type, cmark_syntax_extension *extension) {
extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
return cmark_node_new_with_mem_and_ext(type, &CMARK_DEFAULT_MEM_ALLOCATOR, extension);
}
cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem)
{
return cmark_node_new_with_mem_and_ext(type, mem, NULL);
}
cmark_node *cmark_node_new(cmark_node_type type) {
return cmark_node_new_with_ext(type, NULL);
}
static void free_node_as(cmark_node *node) {
switch (node->type) {
case CMARK_NODE_CODE_BLOCK:
cmark_chunk_free(NODE_MEM(node), &node->as.code.info);
cmark_chunk_free(NODE_MEM(node), &node->as.code.literal);
break;
case CMARK_NODE_TEXT:
case CMARK_NODE_HTML_INLINE:
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_BLOCK:
case CMARK_NODE_FOOTNOTE_REFERENCE:
case CMARK_NODE_FOOTNOTE_DEFINITION:
cmark_chunk_free(NODE_MEM(node), &node->as.literal);
break;
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_chunk_free(NODE_MEM(node), &node->as.link.url);
cmark_chunk_free(NODE_MEM(node), &node->as.link.title);
break;
case CMARK_NODE_CUSTOM_BLOCK:
case CMARK_NODE_CUSTOM_INLINE:
cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_enter);
cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_exit);
break;
default:
break;
}
}
// Free a cmark_node list and any children.
static void S_free_nodes(cmark_node *e) {
cmark_node *next;
while (e != NULL) {
cmark_strbuf_free(&e->content);
if (e->user_data && e->user_data_free_func)
e->user_data_free_func(NODE_MEM(e), e->user_data);
if (e->as.opaque && e->extension && e->extension->opaque_free_func)
e->extension->opaque_free_func(e->extension, NODE_MEM(e), e);
free_node_as(e);
if (e->last_child) {
// Splice children into list
e->last_child->next = e->next;
e->next = e->first_child;
}
next = e->next;
NODE_MEM(e)->free(e);
e = next;
}
}
void cmark_node_free(cmark_node *node) {
S_node_unlink(node);
node->next = NULL;
S_free_nodes(node);
}
cmark_node_type cmark_node_get_type(cmark_node *node) {
if (node == NULL) {
return CMARK_NODE_NONE;
} else {
return (cmark_node_type)node->type;
}
}
int cmark_node_set_type(cmark_node * node, cmark_node_type type) {
cmark_node_type initial_type;
if (type == node->type)
return 1;
initial_type = (cmark_node_type) node->type;
node->type = (uint16_t)type;
if (!S_can_contain(node->parent, node)) {
node->type = (uint16_t)initial_type;
return 0;
}
/* We rollback the type to free the union members appropriately */
node->type = (uint16_t)initial_type;
free_node_as(node);
node->type = (uint16_t)type;
return 1;
}
const char *cmark_node_get_type_string(cmark_node *node) {
if (node == NULL) {
return "NONE";
}
if (node->extension && node->extension->get_type_string_func) {
return node->extension->get_type_string_func(node->extension, node);
}
switch (node->type) {
case CMARK_NODE_NONE:
return "none";
case CMARK_NODE_DOCUMENT:
return "document";
case CMARK_NODE_BLOCK_QUOTE:
return "block_quote";
case CMARK_NODE_LIST:
return "list";
case CMARK_NODE_ITEM:
return "item";
case CMARK_NODE_CODE_BLOCK:
return "code_block";
case CMARK_NODE_HTML_BLOCK:
return "html_block";
case CMARK_NODE_CUSTOM_BLOCK:
return "custom_block";
case CMARK_NODE_PARAGRAPH:
return "paragraph";
case CMARK_NODE_HEADING:
return "heading";
case CMARK_NODE_THEMATIC_BREAK:
return "thematic_break";
case CMARK_NODE_TEXT:
return "text";
case CMARK_NODE_SOFTBREAK:
return "softbreak";
case CMARK_NODE_LINEBREAK:
return "linebreak";
case CMARK_NODE_CODE:
return "code";
case CMARK_NODE_HTML_INLINE:
return "html_inline";
case CMARK_NODE_CUSTOM_INLINE:
return "custom_inline";
case CMARK_NODE_EMPH:
return "emph";
case CMARK_NODE_STRONG:
return "strong";
case CMARK_NODE_LINK:
return "link";
case CMARK_NODE_IMAGE:
return "image";
}
return "<unknown>";
}
cmark_node *cmark_node_next(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->next;
}
}
cmark_node *cmark_node_previous(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->prev;
}
}
cmark_node *cmark_node_parent(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->parent;
}
}
cmark_node *cmark_node_first_child(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->first_child;
}
}
cmark_node *cmark_node_last_child(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->last_child;
}
}
void *cmark_node_get_user_data(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->user_data;
}
}
int cmark_node_set_user_data(cmark_node *node, void *user_data) {
if (node == NULL) {
return 0;
}
node->user_data = user_data;
return 1;
}
int cmark_node_set_user_data_free_func(cmark_node *node,
cmark_free_func free_func) {
if (node == NULL) {
return 0;
}
node->user_data_free_func = free_func;
return 1;
}
const char *cmark_node_get_literal(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_HTML_BLOCK:
case CMARK_NODE_TEXT:
case CMARK_NODE_HTML_INLINE:
case CMARK_NODE_CODE:
case CMARK_NODE_FOOTNOTE_REFERENCE:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal);
case CMARK_NODE_CODE_BLOCK:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.literal);
default:
break;
}
return NULL;
}
int cmark_node_set_literal(cmark_node *node, const char *content) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_HTML_BLOCK:
case CMARK_NODE_TEXT:
case CMARK_NODE_HTML_INLINE:
case CMARK_NODE_CODE:
case CMARK_NODE_FOOTNOTE_REFERENCE:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.literal, content);
return 1;
case CMARK_NODE_CODE_BLOCK:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.literal, content);
return 1;
default:
break;
}
return 0;
}
const char *cmark_node_get_string_content(cmark_node *node) {
return (char *) node->content.ptr;
}
int cmark_node_set_string_content(cmark_node *node, const char *content) {
cmark_strbuf_sets(&node->content, content);
return true;
}
int cmark_node_get_heading_level(cmark_node *node) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_HEADING:
return node->as.heading.level;
default:
break;
}
return 0;
}
int cmark_node_set_heading_level(cmark_node *node, int level) {
if (node == NULL || level < 1 || level > 6) {
return 0;
}
switch (node->type) {
case CMARK_NODE_HEADING:
node->as.heading.level = level;
return 1;
default:
break;
}
return 0;
}
cmark_list_type cmark_node_get_list_type(cmark_node *node) {
if (node == NULL) {
return CMARK_NO_LIST;
}
if (node->type == CMARK_NODE_LIST) {
return node->as.list.list_type;
} else {
return CMARK_NO_LIST;
}
}
int cmark_node_set_list_type(cmark_node *node, cmark_list_type type) {
if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) {
return 0;
}
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
node->as.list.list_type = type;
return 1;
} else {
return 0;
}
}
cmark_delim_type cmark_node_get_list_delim(cmark_node *node) {
if (node == NULL) {
return CMARK_NO_DELIM;
}
if (node->type == CMARK_NODE_LIST) {
return node->as.list.delimiter;
} else {
return CMARK_NO_DELIM;
}
}
int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim) {
if (!(delim == CMARK_PERIOD_DELIM || delim == CMARK_PAREN_DELIM)) {
return 0;
}
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
node->as.list.delimiter = delim;
return 1;
} else {
return 0;
}
}
int cmark_node_get_list_start(cmark_node *node) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
return node->as.list.start;
} else {
return 0;
}
}
int cmark_node_set_list_start(cmark_node *node, int start) {
if (node == NULL || start < 0) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
node->as.list.start = start;
return 1;
} else {
return 0;
}
}
int cmark_node_get_list_tight(cmark_node *node) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
return node->as.list.tight;
} else {
return 0;
}
}
int cmark_node_set_list_tight(cmark_node *node, int tight) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
node->as.list.tight = tight == 1;
return 1;
} else {
return 0;
}
}
const char *cmark_node_get_fence_info(cmark_node *node) {
if (node == NULL) {
return NULL;
}
if (node->type == CMARK_NODE_CODE_BLOCK) {
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.info);
} else {
return NULL;
}
}
int cmark_node_set_fence_info(cmark_node *node, const char *info) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_CODE_BLOCK) {
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.info, info);
return 1;
} else {
return 0;
}
}
int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_CODE_BLOCK) {
*length = node->as.code.fence_length;
*offset = node->as.code.fence_offset;
*character = node->as.code.fence_char;
return node->as.code.fenced;
} else {
return 0;
}
}
int cmark_node_set_fenced(cmark_node * node, int fenced,
int length, int offset, char character) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_CODE_BLOCK) {
node->as.code.fenced = (int8_t)fenced;
node->as.code.fence_length = (uint8_t)length;
node->as.code.fence_offset = (uint8_t)offset;
node->as.code.fence_char = character;
return 1;
} else {
return 0;
}
}
const char *cmark_node_get_url(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.url);
default:
break;
}
return NULL;
}
int cmark_node_set_url(cmark_node *node, const char *url) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.url, url);
return 1;
default:
break;
}
return 0;
}
const char *cmark_node_get_title(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.title);
default:
break;
}
return NULL;
}
int cmark_node_set_title(cmark_node *node, const char *title) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.title, title);
return 1;
default:
break;
}
return 0;
}
const char *cmark_node_get_on_enter(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_CUSTOM_INLINE:
case CMARK_NODE_CUSTOM_BLOCK:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_enter);
default:
break;
}
return NULL;
}
int cmark_node_set_on_enter(cmark_node *node, const char *on_enter) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_CUSTOM_INLINE:
case CMARK_NODE_CUSTOM_BLOCK:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_enter, on_enter);
return 1;
default:
break;
}
return 0;
}
const char *cmark_node_get_on_exit(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_CUSTOM_INLINE:
case CMARK_NODE_CUSTOM_BLOCK:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_exit);
default:
break;
}
return NULL;
}
int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_CUSTOM_INLINE:
case CMARK_NODE_CUSTOM_BLOCK:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_exit, on_exit);
return 1;
default:
break;
}
return 0;
}
cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node) {
if (node == NULL) {
return NULL;
}
return node->extension;
}
int cmark_node_set_syntax_extension(cmark_node *node, cmark_syntax_extension *extension) {
if (node == NULL) {
return 0;
}
node->extension = extension;
return 1;
}
int cmark_node_get_start_line(cmark_node *node) {
if (node == NULL) {
return 0;
}
return node->start_line;
}
int cmark_node_get_start_column(cmark_node *node) {
if (node == NULL) {
return 0;
}
return node->start_column;
}
int cmark_node_get_end_line(cmark_node *node) {
if (node == NULL) {
return 0;
}
return node->end_line;
}
int cmark_node_get_end_column(cmark_node *node) {
if (node == NULL) {
return 0;
}
return node->end_column;
}
// Unlink a node without adjusting its next, prev, and parent pointers.
static void S_node_unlink(cmark_node *node) {
if (node == NULL) {
return;
}
if (node->prev) {
node->prev->next = node->next;
}
if (node->next) {
node->next->prev = node->prev;
}
// Adjust first_child and last_child of parent.
cmark_node *parent = node->parent;
if (parent) {
if (parent->first_child == node) {
parent->first_child = node->next;
}
if (parent->last_child == node) {
parent->last_child = node->prev;
}
}
}
void cmark_node_unlink(cmark_node *node) {
S_node_unlink(node);
node->next = NULL;
node->prev = NULL;
node->parent = NULL;
}
int cmark_node_insert_before(cmark_node *node, cmark_node *sibling) {
if (node == NULL || sibling == NULL) {
return 0;
}
if (!node->parent || !S_can_contain(node->parent, sibling)) {
return 0;
}
S_node_unlink(sibling);
cmark_node *old_prev = node->prev;
// Insert 'sibling' between 'old_prev' and 'node'.
if (old_prev) {
old_prev->next = sibling;
}
sibling->prev = old_prev;
sibling->next = node;
node->prev = sibling;
// Set new parent.
cmark_node *parent = node->parent;
sibling->parent = parent;
// Adjust first_child of parent if inserted as first child.
if (parent && !old_prev) {
parent->first_child = sibling;
}
return 1;
}
int cmark_node_insert_after(cmark_node *node, cmark_node *sibling) {
if (node == NULL || sibling == NULL) {
return 0;
}
if (!node->parent || !S_can_contain(node->parent, sibling)) {
return 0;
}
S_node_unlink(sibling);
cmark_node *old_next = node->next;
// Insert 'sibling' between 'node' and 'old_next'.
if (old_next) {
old_next->prev = sibling;
}
sibling->next = old_next;
sibling->prev = node;
node->next = sibling;
// Set new parent.
cmark_node *parent = node->parent;
sibling->parent = parent;
// Adjust last_child of parent if inserted as last child.
if (parent && !old_next) {
parent->last_child = sibling;
}
return 1;
}
int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode) {
if (!cmark_node_insert_before(oldnode, newnode)) {
return 0;
}
cmark_node_unlink(oldnode);
return 1;
}
int cmark_node_prepend_child(cmark_node *node, cmark_node *child) {
if (!S_can_contain(node, child)) {
return 0;
}
S_node_unlink(child);
cmark_node *old_first_child = node->first_child;
child->next = old_first_child;
child->prev = NULL;
child->parent = node;
node->first_child = child;
if (old_first_child) {
old_first_child->prev = child;
} else {
// Also set last_child if node previously had no children.
node->last_child = child;
}
return 1;
}
int cmark_node_append_child(cmark_node *node, cmark_node *child) {
if (!S_can_contain(node, child)) {
return 0;
}
S_node_unlink(child);
cmark_node *old_last_child = node->last_child;
child->next = NULL;
child->prev = old_last_child;
child->parent = node;
node->last_child = child;
if (old_last_child) {
old_last_child->next = child;
} else {
// Also set first_child if node previously had no children.
node->first_child = child;
}
return 1;
}
static void S_print_error(FILE *out, cmark_node *node, const char *elem) {
if (out == NULL) {
return;
}
fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem,
cmark_node_get_type_string(node), node->start_line,
node->start_column);
}
int cmark_node_check(cmark_node *node, FILE *out) {
cmark_node *cur;
int errors = 0;
if (!node) {
return 0;
}
cur = node;
for (;;) {
if (cur->first_child) {
if (cur->first_child->prev != NULL) {
S_print_error(out, cur->first_child, "prev");
cur->first_child->prev = NULL;
++errors;
}
if (cur->first_child->parent != cur) {
S_print_error(out, cur->first_child, "parent");
cur->first_child->parent = cur;
++errors;
}
cur = cur->first_child;
continue;
}
next_sibling:
if (cur == node) {
break;
}
if (cur->next) {
if (cur->next->prev != cur) {
S_print_error(out, cur->next, "prev");
cur->next->prev = cur;
++errors;
}
if (cur->next->parent != cur->parent) {
S_print_error(out, cur->next, "parent");
cur->next->parent = cur->parent;
++errors;
}
cur = cur->next;
continue;
}
if (cur->parent->last_child != cur) {
S_print_error(out, cur->parent, "last_child");
cur->parent->last_child = cur;
++errors;
}
cur = cur->parent;
goto next_sibling;
}
return errors;
}

117
lib/commonmarker/node.h Normal file
View File

@ -0,0 +1,117 @@
#ifndef CMARK_NODE_H
#define CMARK_NODE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include <stdint.h>
#include "cmark-gfm.h"
#include "cmark-gfm-extension_api.h"
#include "buffer.h"
#include "chunk.h"
typedef struct {
cmark_list_type list_type;
int marker_offset;
int padding;
int start;
cmark_delim_type delimiter;
unsigned char bullet_char;
bool tight;
} cmark_list;
typedef struct {
cmark_chunk info;
cmark_chunk literal;
uint8_t fence_length;
uint8_t fence_offset;
unsigned char fence_char;
int8_t fenced;
} cmark_code;
typedef struct {
int level;
bool setext;
} cmark_heading;
typedef struct {
cmark_chunk url;
cmark_chunk title;
} cmark_link;
typedef struct {
cmark_chunk on_enter;
cmark_chunk on_exit;
} cmark_custom;
enum cmark_node__internal_flags {
CMARK_NODE__OPEN = (1 << 0),
CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
};
struct cmark_node {
cmark_strbuf content;
struct cmark_node *next;
struct cmark_node *prev;
struct cmark_node *parent;
struct cmark_node *first_child;
struct cmark_node *last_child;
void *user_data;
cmark_free_func user_data_free_func;
int start_line;
int start_column;
int end_line;
int end_column;
int internal_offset;
uint16_t type;
uint16_t flags;
cmark_syntax_extension *extension;
union {
cmark_chunk literal;
cmark_list list;
cmark_code code;
cmark_heading heading;
cmark_link link;
cmark_custom custom;
int html_block_type;
void *opaque;
} as;
};
static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) {
return node->content.mem;
}
CMARK_GFM_EXPORT int cmark_node_check(cmark_node *node, FILE *out);
static CMARK_INLINE bool CMARK_NODE_TYPE_BLOCK_P(cmark_node_type node_type) {
return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_BLOCK;
}
static CMARK_INLINE bool CMARK_NODE_BLOCK_P(cmark_node *node) {
return node != NULL && CMARK_NODE_TYPE_BLOCK_P((cmark_node_type) node->type);
}
static CMARK_INLINE bool CMARK_NODE_TYPE_INLINE_P(cmark_node_type node_type) {
return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_INLINE;
}
static CMARK_INLINE bool CMARK_NODE_INLINE_P(cmark_node *node) {
return node != NULL && CMARK_NODE_TYPE_INLINE_P((cmark_node_type) node->type);
}
CMARK_GFM_EXPORT bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type);
#ifdef __cplusplus
}
#endif
#endif

58
lib/commonmarker/parser.h Normal file
View File

@ -0,0 +1,58 @@
#ifndef CMARK_PARSER_H
#define CMARK_PARSER_H
#include <stdio.h>
#include "references.h"
#include "node.h"
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
#define MAX_LINK_LABEL_LENGTH 1000
struct cmark_parser {
struct cmark_mem *mem;
/* A hashtable of urls in the current document for cross-references */
struct cmark_map *refmap;
/* The root node of the parser, always a CMARK_NODE_DOCUMENT */
struct cmark_node *root;
/* The last open block after a line is fully processed */
struct cmark_node *current;
/* See the documentation for cmark_parser_get_line_number() in cmark.h */
int line_number;
/* See the documentation for cmark_parser_get_offset() in cmark.h */
bufsize_t offset;
/* See the documentation for cmark_parser_get_column() in cmark.h */
bufsize_t column;
/* See the documentation for cmark_parser_get_first_nonspace() in cmark.h */
bufsize_t first_nonspace;
/* See the documentation for cmark_parser_get_first_nonspace_column() in cmark.h */
bufsize_t first_nonspace_column;
bufsize_t thematic_break_kill_pos;
/* See the documentation for cmark_parser_get_indent() in cmark.h */
int indent;
/* See the documentation for cmark_parser_is_blank() in cmark.h */
bool blank;
/* See the documentation for cmark_parser_has_partially_consumed_tab() in cmark.h */
bool partially_consumed_tab;
/* Contains the currently processed line */
cmark_strbuf curline;
/* See the documentation for cmark_parser_get_last_line_length() in cmark.h */
bufsize_t last_line_length;
/* FIXME: not sure about the difference with curline */
cmark_strbuf linebuf;
/* Options set by the user, see the Options section in cmark.h */
int options;
bool last_buffer_ended_with_cr;
cmark_llist *syntax_extensions;
cmark_llist *inline_syntax_extensions;
cmark_ispunct_func backslash_ispunct;
};
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,235 @@
#include "node.h"
#include "syntax_extension.h"
#include "render.h"
#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, node, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
#define LISTMARKER_SIZE 20
// Functions to convert cmark_nodes to plain text strings.
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
cmark_escaping escape,
int32_t c, unsigned char nextc) {
cmark_render_code_point(renderer, c);
}
// if node is a block node, returns node.
// otherwise returns first block-level node that is an ancestor of node.
// if there is no block-level ancestor, returns NULL.
static cmark_node *get_containing_block(cmark_node *node) {
while (node) {
if (CMARK_NODE_BLOCK_P(node)) {
return node;
} else {
node = node->parent;
}
}
return NULL;
}
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
cmark_node *tmp;
int list_number;
cmark_delim_type list_delim;
int i;
bool entering = (ev_type == CMARK_EVENT_ENTER);
char listmarker[LISTMARKER_SIZE];
bool first_in_list_item;
bufsize_t marker_width;
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
!(CMARK_OPT_HARDBREAKS & options);
// Don't adjust tight list status til we've started the list.
// Otherwise we loose the blank line between a paragraph and
// a following list.
if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
tmp = get_containing_block(node);
renderer->in_tight_list_item =
tmp && // tmp might be NULL if there is no containing block
((tmp->type == CMARK_NODE_ITEM &&
cmark_node_get_list_tight(tmp->parent)) ||
(tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
cmark_node_get_list_tight(tmp->parent->parent)));
}
if (node->extension && node->extension->plaintext_render_func) {
node->extension->plaintext_render_func(node->extension, renderer, node, ev_type, options);
return 1;
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
break;
case CMARK_NODE_LIST:
if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
node->next->type == CMARK_NODE_LIST)) {
CR();
}
break;
case CMARK_NODE_ITEM:
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
marker_width = 4;
} else {
list_number = cmark_node_get_list_start(node->parent);
list_delim = cmark_node_get_list_delim(node->parent);
tmp = node;
while (tmp->prev) {
tmp = tmp->prev;
list_number += 1;
}
// we ensure a width of at least 4 so
// we get nice transition from single digits
// to double
snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
list_delim == CMARK_PAREN_DELIM ? ")" : ".",
list_number < 10 ? " " : " ");
marker_width = (bufsize_t)strlen(listmarker);
}
if (entering) {
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
LIT(" - ");
renderer->begin_content = true;
} else {
LIT(listmarker);
renderer->begin_content = true;
}
for (i = marker_width; i--;) {
cmark_strbuf_putc(renderer->prefix, ' ');
}
} else {
cmark_strbuf_truncate(renderer->prefix,
renderer->prefix->size - marker_width);
CR();
}
break;
case CMARK_NODE_HEADING:
if (entering) {
renderer->begin_content = true;
renderer->no_linebreaks = true;
} else {
renderer->no_linebreaks = false;
BLANKLINE();
}
break;
case CMARK_NODE_CODE_BLOCK:
first_in_list_item = node->prev == NULL && node->parent &&
node->parent->type == CMARK_NODE_ITEM;
if (!first_in_list_item) {
BLANKLINE();
}
OUT(cmark_node_get_literal(node), false, LITERAL);
BLANKLINE();
break;
case CMARK_NODE_HTML_BLOCK:
break;
case CMARK_NODE_CUSTOM_BLOCK:
break;
case CMARK_NODE_THEMATIC_BREAK:
BLANKLINE();
break;
case CMARK_NODE_PARAGRAPH:
if (!entering) {
BLANKLINE();
}
break;
case CMARK_NODE_TEXT:
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
CR();
break;
case CMARK_NODE_SOFTBREAK:
if (CMARK_OPT_HARDBREAKS & options) {
CR();
} else if (!renderer->no_linebreaks && renderer->width == 0 &&
!(CMARK_OPT_HARDBREAKS & options) &&
!(CMARK_OPT_NOBREAKS & options)) {
CR();
} else {
OUT(" ", allow_wrap, LITERAL);
}
break;
case CMARK_NODE_CODE:
OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
break;
case CMARK_NODE_HTML_INLINE:
break;
case CMARK_NODE_CUSTOM_INLINE:
break;
case CMARK_NODE_STRONG:
break;
case CMARK_NODE_EMPH:
break;
case CMARK_NODE_LINK:
break;
case CMARK_NODE_IMAGE:
break;
case CMARK_NODE_FOOTNOTE_REFERENCE:
if (entering) {
LIT("[^");
OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL);
LIT("]");
}
break;
case CMARK_NODE_FOOTNOTE_DEFINITION:
if (entering) {
renderer->footnote_ix += 1;
LIT("[^");
char n[32];
snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
OUT(n, false, LITERAL);
LIT("]: ");
cmark_strbuf_puts(renderer->prefix, " ");
} else {
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
}
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_plaintext(cmark_node *root, int options, int width) {
return cmark_render_plaintext_with_mem(root, options, width, cmark_node_mem(root));
}
char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
if (options & CMARK_OPT_HARDBREAKS) {
// disable breaking on width, since it has
// a different meaning with OPT_HARDBREAKS
width = 0;
}
return cmark_render(mem, root, options, width, outc, S_render_node);
}

36
lib/commonmarker/plugin.c Normal file
View File

@ -0,0 +1,36 @@
#include <stdlib.h>
#include "plugin.h"
extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
int cmark_plugin_register_syntax_extension(cmark_plugin * plugin,
cmark_syntax_extension * extension) {
plugin->syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, plugin->syntax_extensions, extension);
return 1;
}
cmark_plugin *
cmark_plugin_new(void) {
cmark_plugin *res = (cmark_plugin *) CMARK_DEFAULT_MEM_ALLOCATOR.calloc(1, sizeof(cmark_plugin));
res->syntax_extensions = NULL;
return res;
}
void
cmark_plugin_free(cmark_plugin *plugin) {
cmark_llist_free_full(&CMARK_DEFAULT_MEM_ALLOCATOR,
plugin->syntax_extensions,
(cmark_free_func) cmark_syntax_extension_free);
CMARK_DEFAULT_MEM_ALLOCATOR.free(plugin);
}
cmark_llist *
cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin) {
cmark_llist *res = plugin->syntax_extensions;
plugin->syntax_extensions = NULL;
return res;
}

34
lib/commonmarker/plugin.h Normal file
View File

@ -0,0 +1,34 @@
#ifndef CMARK_PLUGIN_H
#define CMARK_PLUGIN_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cmark-gfm.h"
#include "cmark-gfm-extension_api.h"
/**
* cmark_plugin:
*
* A plugin structure, which should be filled by plugin's
* init functions.
*/
struct cmark_plugin {
cmark_llist *syntax_extensions;
};
cmark_llist *
cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin);
cmark_plugin *
cmark_plugin_new(void);
void
cmark_plugin_free(cmark_plugin *plugin);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,42 @@
#include "cmark-gfm.h"
#include "parser.h"
#include "references.h"
#include "inlines.h"
#include "chunk.h"
static void reference_free(cmark_map *map, cmark_map_entry *_ref) {
cmark_reference *ref = (cmark_reference *)_ref;
cmark_mem *mem = map->mem;
if (ref != NULL) {
mem->free(ref->entry.label);
cmark_chunk_free(mem, &ref->url);
cmark_chunk_free(mem, &ref->title);
mem->free(ref);
}
}
void cmark_reference_create(cmark_map *map, cmark_chunk *label,
cmark_chunk *url, cmark_chunk *title) {
cmark_reference *ref;
unsigned char *reflabel = normalize_map_label(map->mem, label);
/* empty reference name, or composed from only whitespace */
if (reflabel == NULL)
return;
assert(map->sorted == NULL);
ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
ref->entry.label = reflabel;
ref->url = cmark_clean_url(map->mem, url);
ref->title = cmark_clean_title(map->mem, title);
ref->entry.age = map->size;
ref->entry.next = map->refs;
map->refs = (cmark_map_entry *)ref;
map->size++;
}
cmark_map *cmark_reference_map_new(cmark_mem *mem) {
return cmark_map_new(mem, reference_free);
}

View File

@ -0,0 +1,26 @@
#ifndef CMARK_REFERENCES_H
#define CMARK_REFERENCES_H
#include "map.h"
#ifdef __cplusplus
extern "C" {
#endif
struct cmark_reference {
cmark_map_entry entry;
cmark_chunk url;
cmark_chunk title;
};
typedef struct cmark_reference cmark_reference;
void cmark_reference_create(cmark_map *map, cmark_chunk *label,
cmark_chunk *url, cmark_chunk *title);
cmark_map *cmark_reference_map_new(cmark_mem *mem);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,63 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
#include "cmark-gfm.h"
#include "syntax_extension.h"
#include "registry.h"
#include "plugin.h"
extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
static cmark_llist *syntax_extensions = NULL;
void cmark_register_plugin(cmark_plugin_init_func reg_fn) {
cmark_plugin *plugin = cmark_plugin_new();
if (!reg_fn(plugin)) {
cmark_plugin_free(plugin);
return;
}
cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin),
*it;
for (it = syntax_extensions_list; it; it = it->next) {
syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions, it->data);
}
cmark_llist_free(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions_list);
cmark_plugin_free(plugin);
}
void cmark_release_plugins(void) {
if (syntax_extensions) {
cmark_llist_free_full(
&CMARK_DEFAULT_MEM_ALLOCATOR,
syntax_extensions,
(cmark_free_func) cmark_syntax_extension_free);
syntax_extensions = NULL;
}
}
cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem) {
cmark_llist *it;
cmark_llist *res = NULL;
for (it = syntax_extensions; it; it = it->next) {
res = cmark_llist_append(mem, res, it->data);
}
return res;
}
cmark_syntax_extension *cmark_find_syntax_extension(const char *name) {
cmark_llist *tmp;
for (tmp = syntax_extensions; tmp; tmp = tmp->next) {
cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
if (!strcmp(ext->name, name))
return ext;
}
return NULL;
}

View File

@ -0,0 +1,24 @@
#ifndef CMARK_REGISTRY_H
#define CMARK_REGISTRY_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cmark-gfm.h"
#include "plugin.h"
CMARK_GFM_EXPORT
void cmark_register_plugin(cmark_plugin_init_func reg_fn);
CMARK_GFM_EXPORT
void cmark_release_plugins(void);
CMARK_GFM_EXPORT
cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem);
#ifdef __cplusplus
}
#endif
#endif

205
lib/commonmarker/render.c Normal file
View File

@ -0,0 +1,205 @@
#include <stdlib.h>
#include "buffer.h"
#include "chunk.h"
#include "cmark-gfm.h"
#include "utf8.h"
#include "render.h"
#include "node.h"
#include "syntax_extension.h"
static CMARK_INLINE void S_cr(cmark_renderer *renderer) {
if (renderer->need_cr < 1) {
renderer->need_cr = 1;
}
}
static CMARK_INLINE void S_blankline(cmark_renderer *renderer) {
if (renderer->need_cr < 2) {
renderer->need_cr = 2;
}
}
static void S_out(cmark_renderer *renderer, cmark_node *node,
const char *source, bool wrap,
cmark_escaping escape) {
int length = (int)strlen(source);
unsigned char nextc;
int32_t c;
int i = 0;
int last_nonspace;
int len;
cmark_chunk remainder = cmark_chunk_literal("");
int k = renderer->buffer->size - 1;
cmark_syntax_extension *ext = NULL;
cmark_node *n = node;
while (n && !ext) {
ext = n->extension;
if (!ext)
n = n->parent;
}
if (ext && !ext->commonmark_escape_func)
ext = NULL;
wrap = wrap && !renderer->no_linebreaks;
if (renderer->in_tight_list_item && renderer->need_cr > 1) {
renderer->need_cr = 1;
}
while (renderer->need_cr) {
if (k < 0 || renderer->buffer->ptr[k] == '\n') {
k -= 1;
} else {
cmark_strbuf_putc(renderer->buffer, '\n');
if (renderer->need_cr > 1) {
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
renderer->prefix->size);
}
}
renderer->column = 0;
renderer->last_breakable = 0;
renderer->begin_line = true;
renderer->begin_content = true;
renderer->need_cr -= 1;
}
while (i < length) {
if (renderer->begin_line) {
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
renderer->prefix->size);
// note: this assumes prefix is ascii:
renderer->column = renderer->prefix->size;
}
len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c);
if (len == -1) { // error condition
return; // return without rendering rest of string
}
if (ext && ext->commonmark_escape_func(ext, node, c))
cmark_strbuf_putc(renderer->buffer, '\\');
nextc = source[i + len];
if (c == 32 && wrap) {
if (!renderer->begin_line) {
last_nonspace = renderer->buffer->size;
cmark_strbuf_putc(renderer->buffer, ' ');
renderer->column += 1;
renderer->begin_line = false;
renderer->begin_content = false;
// skip following spaces
while (source[i + 1] == ' ') {
i++;
}
// We don't allow breaks that make a digit the first character
// because this causes problems with commonmark output.
if (!cmark_isdigit(source[i + 1])) {
renderer->last_breakable = last_nonspace;
}
}
} else if (escape == LITERAL) {
if (c == 10) {
cmark_strbuf_putc(renderer->buffer, '\n');
renderer->column = 0;
renderer->begin_line = true;
renderer->begin_content = true;
renderer->last_breakable = 0;
} else {
cmark_render_code_point(renderer, c);
renderer->begin_line = false;
// we don't set 'begin_content' to false til we've
// finished parsing a digit. Reason: in commonmark
// we need to escape a potential list marker after
// a digit:
renderer->begin_content =
renderer->begin_content && cmark_isdigit((char)c) == 1;
}
} else {
(renderer->outc)(renderer, node, escape, c, nextc);
renderer->begin_line = false;
renderer->begin_content =
renderer->begin_content && cmark_isdigit((char)c) == 1;
}
// If adding the character went beyond width, look for an
// earlier place where the line could be broken:
if (renderer->width > 0 && renderer->column > renderer->width &&
!renderer->begin_line && renderer->last_breakable > 0) {
// copy from last_breakable to remainder
cmark_chunk_set_cstr(renderer->mem, &remainder,
(char *)renderer->buffer->ptr +
renderer->last_breakable + 1);
// truncate at last_breakable
cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable);
// add newline, prefix, and remainder
cmark_strbuf_putc(renderer->buffer, '\n');
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
renderer->prefix->size);
cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len);
renderer->column = renderer->prefix->size + remainder.len;
cmark_chunk_free(renderer->mem, &remainder);
renderer->last_breakable = 0;
renderer->begin_line = false;
renderer->begin_content = false;
}
i += len;
}
}
// Assumes no newlines, assumes ascii content:
void cmark_render_ascii(cmark_renderer *renderer, const char *s) {
int origsize = renderer->buffer->size;
cmark_strbuf_puts(renderer->buffer, s);
renderer->column += renderer->buffer->size - origsize;
}
void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) {
cmark_utf8proc_encode_char(c, renderer->buffer);
renderer->column += 1;
}
char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width,
void (*outc)(cmark_renderer *, cmark_node *,
cmark_escaping, int32_t,
unsigned char),
int (*render_node)(cmark_renderer *renderer,
cmark_node *node,
cmark_event_type ev_type, int options)) {
cmark_strbuf pref = CMARK_BUF_INIT(mem);
cmark_strbuf buf = CMARK_BUF_INIT(mem);
cmark_node *cur;
cmark_event_type ev_type;
char *result;
cmark_iter *iter = cmark_iter_new(root);
cmark_renderer renderer = {mem, &buf, &pref, 0, width,
0, 0, true, true, false,
false, outc, S_cr, S_blankline, S_out,
0};
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
if (!render_node(&renderer, cur, ev_type, options)) {
// a false value causes us to skip processing
// the node's contents. this is used for
// autolinks.
cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT);
}
}
// ensure final newline
if (renderer.buffer->size == 0 || renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
cmark_strbuf_putc(renderer.buffer, '\n');
}
result = (char *)cmark_strbuf_detach(renderer.buffer);
cmark_iter_free(iter);
cmark_strbuf_free(renderer.prefix);
cmark_strbuf_free(renderer.buffer);
return result;
}

62
lib/commonmarker/render.h Normal file
View File

@ -0,0 +1,62 @@
#ifndef CMARK_RENDER_H
#define CMARK_RENDER_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
#include "buffer.h"
#include "chunk.h"
typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping;
struct cmark_renderer {
cmark_mem *mem;
cmark_strbuf *buffer;
cmark_strbuf *prefix;
int column;
int width;
int need_cr;
bufsize_t last_breakable;
bool begin_line;
bool begin_content;
bool no_linebreaks;
bool in_tight_list_item;
void (*outc)(struct cmark_renderer *, cmark_node *, cmark_escaping, int32_t, unsigned char);
void (*cr)(struct cmark_renderer *);
void (*blankline)(struct cmark_renderer *);
void (*out)(struct cmark_renderer *, cmark_node *, const char *, bool, cmark_escaping);
unsigned int footnote_ix;
};
typedef struct cmark_renderer cmark_renderer;
struct cmark_html_renderer {
cmark_strbuf *html;
cmark_node *plain;
cmark_llist *filter_extensions;
unsigned int footnote_ix;
unsigned int written_footnote_ix;
void *opaque;
};
typedef struct cmark_html_renderer cmark_html_renderer;
void cmark_render_ascii(cmark_renderer *renderer, const char *s);
void cmark_render_code_point(cmark_renderer *renderer, uint32_t c);
char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width,
void (*outc)(cmark_renderer *, cmark_node *,
cmark_escaping, int32_t,
unsigned char),
int (*render_node)(cmark_renderer *renderer,
cmark_node *node,
cmark_event_type ev_type, int options));
#ifdef __cplusplus
}
#endif
#endif

10520
lib/commonmarker/scanners.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,62 @@
#ifndef CMARK_SCANNERS_H
#define CMARK_SCANNERS_H
#include "cmark-gfm.h"
#include "chunk.h"
#ifdef __cplusplus
extern "C" {
#endif
bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
bufsize_t offset);
bufsize_t _scan_scheme(const unsigned char *p);
bufsize_t _scan_autolink_uri(const unsigned char *p);
bufsize_t _scan_autolink_email(const unsigned char *p);
bufsize_t _scan_html_tag(const unsigned char *p);
bufsize_t _scan_liberal_html_tag(const unsigned char *p);
bufsize_t _scan_html_block_start(const unsigned char *p);
bufsize_t _scan_html_block_start_7(const unsigned char *p);
bufsize_t _scan_html_block_end_1(const unsigned char *p);
bufsize_t _scan_html_block_end_2(const unsigned char *p);
bufsize_t _scan_html_block_end_3(const unsigned char *p);
bufsize_t _scan_html_block_end_4(const unsigned char *p);
bufsize_t _scan_html_block_end_5(const unsigned char *p);
bufsize_t _scan_link_title(const unsigned char *p);
bufsize_t _scan_spacechars(const unsigned char *p);
bufsize_t _scan_atx_heading_start(const unsigned char *p);
bufsize_t _scan_setext_heading_line(const unsigned char *p);
bufsize_t _scan_open_code_fence(const unsigned char *p);
bufsize_t _scan_close_code_fence(const unsigned char *p);
bufsize_t _scan_entity(const unsigned char *p);
bufsize_t _scan_dangerous_url(const unsigned char *p);
bufsize_t _scan_footnote_definition(const unsigned char *p);
#define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
#define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
#define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n)
#define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n)
#define scan_liberal_html_tag(c, n) _scan_at(&_scan_liberal_html_tag, c, n)
#define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n)
#define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n)
#define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n)
#define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n)
#define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n)
#define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n)
#define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n)
#define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n)
#define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n)
#define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n)
#define scan_setext_heading_line(c, n) \
_scan_at(&_scan_setext_heading_line, c, n)
#define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
#define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
#define scan_entity(c, n) _scan_at(&_scan_entity, c, n)
#define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n)
#define scan_footnote_definition(c, n) _scan_at(&_scan_footnote_definition, c, n)
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,326 @@
#include <stdlib.h>
#include "chunk.h"
#include "scanners.h"
bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
{
bufsize_t res;
unsigned char *ptr = (unsigned char *)c->data;
if (ptr == NULL || offset > c->len) {
return 0;
} else {
unsigned char lim = ptr[c->len];
ptr[c->len] = '\0';
res = scanner(ptr + offset);
ptr[c->len] = lim;
}
return res;
}
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
re2c:define:YYMARKER = marker;
re2c:define:YYCTXMARKER = marker;
re2c:yyfill:enable = 0;
wordchar = [^\x00-\x20];
spacechar = [ \t\v\f\r\n];
reg_char = [^\\()\x00-\x20];
escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-];
tagname = [A-Za-z][A-Za-z0-9-]*;
blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul';
attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*;
unquotedvalue = [^ \t\r\n\v\f"'=<>`\x00]+;
singlequotedvalue = ['][^'\x00]*['];
doublequotedvalue = ["][^"\x00]*["];
attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue;
attributevaluespec = spacechar* [=] spacechar* attributevalue;
attribute = spacechar+ attributename attributevaluespec?;
opentag = tagname attribute* spacechar* [/]? [>];
closetag = [/] tagname spacechar* [>];
htmlcomment = "!---->" | ("!--" ([-]? [^\x00>-]) ([-]? [^\x00-])* "-->");
processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00] | [>])* "?>";
declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">";
cdata = "![CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])* "]]>";
htmltag = opentag | closetag | htmlcomment | processinginstruction |
declaration | cdata;
in_parens_nosp = [(] (reg_char|escaped_char|[\\])* [)];
in_double_quotes = ["] (escaped_char|[^"\x00])* ["];
in_single_quotes = ['] (escaped_char|[^'\x00])* ['];
in_parens = [(] (escaped_char|[^)\x00])* [)];
scheme = [A-Za-z][A-Za-z0-9.+-]{1,31};
*/
// Try to match a scheme including colon.
bufsize_t _scan_scheme(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
scheme [:] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match URI autolink after first <, returning number of chars matched.
bufsize_t _scan_autolink_uri(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match email autolink after first <, returning num of chars matched.
bufsize_t _scan_autolink_email(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
[@]
[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
[>] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML tag after first <, returning num of chars matched.
bufsize_t _scan_html_tag(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
htmltag { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to (liberally) match an HTML tag after first <, returning num of chars matched.
bufsize_t _scan_liberal_html_tag(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]+ [>] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block tag start line, returning
// an integer code for the type of block (1-6, matching the spec).
// #7 is handled by a separate function, below.
bufsize_t _scan_html_block_start(const unsigned char *p)
{
const unsigned char *marker = NULL;
/*!re2c
[<] ('script'|'pre'|'style') (spacechar | [>]) { return 1; }
'<!--' { return 2; }
'<?' { return 3; }
'<!' [A-Z] { return 4; }
'<![CDATA[' { return 5; }
[<] [/]? blocktagname (spacechar | [/]? [>]) { return 6; }
* { return 0; }
*/
}
// Try to match an HTML block tag start line of type 7, returning
// 7 if successful, 0 if not.
bufsize_t _scan_html_block_start_7(const unsigned char *p)
{
const unsigned char *marker = NULL;
/*!re2c
[<] (opentag | closetag) [\t\n\f ]* [\r\n] { return 7; }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 1
bufsize_t _scan_html_block_end_1(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* [<] [/] ('script'|'pre'|'style') [>] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 2
bufsize_t _scan_html_block_end_2(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* '-->' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 3
bufsize_t _scan_html_block_end_3(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* '?>' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 4
bufsize_t _scan_html_block_end_4(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* '>' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 5
bufsize_t _scan_html_block_end_5(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* ']]>' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match a link title (in single quotes, in double quotes, or
// in parentheses), returning number of chars matched. Allow one
// level of internal nesting (quotes within quotes).
bufsize_t _scan_link_title(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); }
['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); }
[(] (escaped_char|[^()\x00])* [)] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Match space characters, including newlines.
bufsize_t _scan_spacechars(const unsigned char *p)
{
const unsigned char *start = p; \
/*!re2c
[ \t\v\f\r\n]+ { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Match ATX heading start.
bufsize_t _scan_atx_heading_start(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[#]{1,6} ([ \t]+|[\r\n]) { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Match setext heading line. Return 1 for level-1 heading,
// 2 for level-2, 0 for no match.
bufsize_t _scan_setext_heading_line(const unsigned char *p)
{
const unsigned char *marker = NULL;
/*!re2c
[=]+ [ \t]* [\r\n] { return 1; }
[-]+ [ \t]* [\r\n] { return 2; }
* { return 0; }
*/
}
// Scan an opening code fence.
bufsize_t _scan_open_code_fence(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
[~]{3,} / [^\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Scan a closing code fence with length at least len.
bufsize_t _scan_close_code_fence(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
[~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Scans an entity.
// Returns number of chars matched.
bufsize_t _scan_entity(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[&] ([#] ([Xx][A-Fa-f0-9]{1,6}|[0-9]{1,7}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
{ return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Returns positive value if a URL begins in a way that is potentially
// dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0.
bufsize_t _scan_dangerous_url(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
'data:image/' ('png'|'gif'|'jpeg'|'webp') { return 0; }
'javascript:' | 'vbscript:' | 'file:' | 'data:' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Scans a footnote definition opening.
bufsize_t _scan_footnote_definition(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
'[^' ([^\] \r\n\x00\t]+) ']:' [ \t]* { return (bufsize_t)(p - start); }
* { return 0; }
*/
}

View File

@ -0,0 +1,149 @@
#include <stdlib.h>
#include <assert.h>
#include "cmark-gfm.h"
#include "syntax_extension.h"
#include "buffer.h"
extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
static cmark_mem *_mem = &CMARK_DEFAULT_MEM_ALLOCATOR;
void cmark_syntax_extension_free(cmark_mem *mem, cmark_syntax_extension *extension) {
if (extension->free_function && extension->priv) {
extension->free_function(mem, extension->priv);
}
cmark_llist_free(mem, extension->special_inline_chars);
mem->free(extension->name);
mem->free(extension);
}
cmark_syntax_extension *cmark_syntax_extension_new(const char *name) {
cmark_syntax_extension *res = (cmark_syntax_extension *) _mem->calloc(1, sizeof(cmark_syntax_extension));
res->name = (char *) _mem->calloc(1, sizeof(char) * (strlen(name)) + 1);
strcpy(res->name, name);
return res;
}
cmark_node_type cmark_syntax_extension_add_node(int is_inline) {
cmark_node_type *ref = !is_inline ? &CMARK_NODE_LAST_BLOCK : &CMARK_NODE_LAST_INLINE;
if ((*ref & CMARK_NODE_VALUE_MASK) == CMARK_NODE_VALUE_MASK) {
assert(false);
return (cmark_node_type) 0;
}
return *ref = (cmark_node_type) ((int) *ref + 1);
}
void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension,
int emphasis) {
extension->emphasis = emphasis == 1;
}
void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension,
cmark_open_block_func func) {
extension->try_opening_block = func;
}
void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension,
cmark_match_block_func func) {
extension->last_block_matches = func;
}
void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension,
cmark_match_inline_func func) {
extension->match_inline = func;
}
void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension,
cmark_inline_from_delim_func func) {
extension->insert_inline_from_delim = func;
}
void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension,
cmark_llist *special_chars) {
extension->special_inline_chars = special_chars;
}
void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension,
cmark_get_type_string_func func) {
extension->get_type_string_func = func;
}
void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension,
cmark_can_contain_func func) {
extension->can_contain_func = func;
}
void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension,
cmark_contains_inlines_func func) {
extension->contains_inlines_func = func;
}
void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func) {
extension->commonmark_render_func = func;
}
void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func) {
extension->plaintext_render_func = func;
}
void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func) {
extension->latex_render_func = func;
}
void cmark_syntax_extension_set_xml_attr_func(cmark_syntax_extension *extension,
cmark_xml_attr_func func) {
extension->xml_attr_func = func;
}
void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func) {
extension->man_render_func = func;
}
void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension,
cmark_html_render_func func) {
extension->html_render_func = func;
}
void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension,
cmark_html_filter_func func) {
extension->html_filter_func = func;
}
void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension,
cmark_postprocess_func func) {
extension->postprocess_func = func;
}
void cmark_syntax_extension_set_private(cmark_syntax_extension *extension,
void *priv,
cmark_free_func free_func) {
extension->priv = priv;
extension->free_function = free_func;
}
void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension) {
return extension->priv;
}
void cmark_syntax_extension_set_opaque_alloc_func(cmark_syntax_extension *extension,
cmark_opaque_alloc_func func) {
extension->opaque_alloc_func = func;
}
void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension,
cmark_opaque_free_func func) {
extension->opaque_free_func = func;
}
void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension,
cmark_commonmark_escape_func func) {
extension->commonmark_escape_func = func;
}

View File

@ -0,0 +1,34 @@
#ifndef CMARK_SYNTAX_EXTENSION_H
#define CMARK_SYNTAX_EXTENSION_H
#include "cmark-gfm.h"
#include "cmark-gfm-extension_api.h"
#include "config.h"
struct cmark_syntax_extension {
cmark_match_block_func last_block_matches;
cmark_open_block_func try_opening_block;
cmark_match_inline_func match_inline;
cmark_inline_from_delim_func insert_inline_from_delim;
cmark_llist * special_inline_chars;
char * name;
void * priv;
bool emphasis;
cmark_free_func free_function;
cmark_get_type_string_func get_type_string_func;
cmark_can_contain_func can_contain_func;
cmark_contains_inlines_func contains_inlines_func;
cmark_common_render_func commonmark_render_func;
cmark_common_render_func plaintext_render_func;
cmark_common_render_func latex_render_func;
cmark_xml_attr_func xml_attr_func;
cmark_common_render_func man_render_func;
cmark_html_render_func html_render_func;
cmark_html_filter_func html_filter_func;
cmark_postprocess_func postprocess_func;
cmark_opaque_alloc_func opaque_alloc_func;
cmark_opaque_free_func opaque_free_func;
cmark_commonmark_escape_func commonmark_escape_func;
};
#endif

317
lib/commonmarker/utf8.c Normal file
View File

@ -0,0 +1,317 @@
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include "cmark_ctype.h"
#include "utf8.h"
static const int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
static void encode_unknown(cmark_strbuf *buf) {
static const uint8_t repl[] = {239, 191, 189};
cmark_strbuf_put(buf, repl, 3);
}
static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
int length, i;
if (!str_len)
return 0;
length = utf8proc_utf8class[str[0]];
if (!length)
return -1;
if (str_len >= 0 && (bufsize_t)length > str_len)
return -str_len;
for (i = 1; i < length; i++) {
if ((str[i] & 0xC0) != 0x80)
return -i;
}
return length;
}
// Validate a single UTF-8 character according to RFC 3629.
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
int length = utf8proc_utf8class[str[0]];
if (!length)
return -1;
if ((bufsize_t)length > str_len)
return -str_len;
switch (length) {
case 2:
if ((str[1] & 0xC0) != 0x80)
return -1;
if (str[0] < 0xC2) {
// Overlong
return -length;
}
break;
case 3:
if ((str[1] & 0xC0) != 0x80)
return -1;
if ((str[2] & 0xC0) != 0x80)
return -2;
if (str[0] == 0xE0) {
if (str[1] < 0xA0) {
// Overlong
return -length;
}
} else if (str[0] == 0xED) {
if (str[1] >= 0xA0) {
// Surrogate
return -length;
}
}
break;
case 4:
if ((str[1] & 0xC0) != 0x80)
return -1;
if ((str[2] & 0xC0) != 0x80)
return -2;
if ((str[3] & 0xC0) != 0x80)
return -3;
if (str[0] == 0xF0) {
if (str[1] < 0x90) {
// Overlong
return -length;
}
} else if (str[0] >= 0xF4) {
if (str[0] > 0xF4 || str[1] >= 0x90) {
// Above 0x10FFFF
return -length;
}
}
break;
}
return length;
}
void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
bufsize_t size) {
bufsize_t i = 0;
while (i < size) {
bufsize_t org = i;
int charlen = 0;
while (i < size) {
if (line[i] < 0x80 && line[i] != 0) {
i++;
} else if (line[i] >= 0x80) {
charlen = utf8proc_valid(line + i, size - i);
if (charlen < 0) {
charlen = -charlen;
break;
}
i += charlen;
} else if (line[i] == 0) {
// ASCII NUL is technically valid but rejected
// for security reasons.
charlen = 1;
break;
}
}
if (i > org) {
cmark_strbuf_put(ob, line + org, i - org);
}
if (i >= size) {
break;
} else {
// Invalid UTF-8
encode_unknown(ob);
i += charlen;
}
}
}
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
int32_t *dst) {
int length;
int32_t uc = -1;
*dst = -1;
length = utf8proc_charlen(str, str_len);
if (length < 0)
return -1;
switch (length) {
case 1:
uc = str[0];
break;
case 2:
uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
if (uc < 0x80)
uc = -1;
break;
case 3:
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
uc = -1;
break;
case 4:
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
if (uc < 0x10000 || uc >= 0x110000)
uc = -1;
break;
}
if (uc < 0)
return -1;
*dst = uc;
return length;
}
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
uint8_t dst[4];
bufsize_t len = 0;
assert(uc >= 0);
if (uc < 0x80) {
dst[0] = (uint8_t)(uc);
len = 1;
} else if (uc < 0x800) {
dst[0] = (uint8_t)(0xC0 + (uc >> 6));
dst[1] = 0x80 + (uc & 0x3F);
len = 2;
} else if (uc == 0xFFFF) {
dst[0] = 0xFF;
len = 1;
} else if (uc == 0xFFFE) {
dst[0] = 0xFE;
len = 1;
} else if (uc < 0x10000) {
dst[0] = (uint8_t)(0xE0 + (uc >> 12));
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
dst[2] = 0x80 + (uc & 0x3F);
len = 3;
} else if (uc < 0x110000) {
dst[0] = (uint8_t)(0xF0 + (uc >> 18));
dst[1] = 0x80 + ((uc >> 12) & 0x3F);
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
dst[3] = 0x80 + (uc & 0x3F);
len = 4;
} else {
encode_unknown(buf);
return;
}
cmark_strbuf_put(buf, dst, len);
}
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
bufsize_t len) {
int32_t c;
#define bufpush(x) cmark_utf8proc_encode_char(x, dest)
while (len > 0) {
bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
if (char_len >= 0) {
#include "case_fold_switch.inc"
} else {
encode_unknown(dest);
char_len = -char_len;
}
str += char_len;
len -= char_len;
}
}
// matches anything in the Zs class, plus LF, CR, TAB, FF.
int cmark_utf8proc_is_space(int32_t uc) {
return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
uc == 8287 || uc == 12288);
}
// matches anything in the P[cdefios] classes.
int cmark_utf8proc_is_punctuation(int32_t uc) {
return (
(uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
(uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
(uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
(uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
(uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
(uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
(uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
(uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
(uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
(uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
(uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
(uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
(uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
(uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
(uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
(uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
(uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
uc == 11632 || (uc >= 11776 && uc <= 11822) ||
(uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
(uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
uc == 42622 || (uc >= 42738 && uc <= 42743) ||
(uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
(uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
uc == 64831 || (uc >= 65040 && uc <= 65049) ||
(uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
(uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
(uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
uc == 65343 || uc == 65371 || uc == 65373 ||
(uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
(uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
(uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
(uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
(uc >= 70085 && uc <= 70088) || uc == 70093 ||
(uc >= 70200 && uc <= 70205) || uc == 70854 ||
(uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
(uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
uc == 113823);
}

35
lib/commonmarker/utf8.h Normal file
View File

@ -0,0 +1,35 @@
#ifndef CMARK_UTF8_H
#define CMARK_UTF8_H
#include <stdint.h>
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
CMARK_GFM_EXPORT
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
bufsize_t len);
CMARK_GFM_EXPORT
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
CMARK_GFM_EXPORT
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
CMARK_GFM_EXPORT
void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
bufsize_t size);
CMARK_GFM_EXPORT
int cmark_utf8proc_is_space(int32_t uc);
CMARK_GFM_EXPORT
int cmark_utf8proc_is_punctuation(int32_t uc);
#ifdef __cplusplus
}
#endif
#endif

181
lib/commonmarker/xml.c Normal file
View File

@ -0,0 +1,181 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "config.h"
#include "cmark-gfm.h"
#include "node.h"
#include "buffer.h"
#include "houdini.h"
#include "syntax_extension.h"
#define BUFFER_SIZE 100
// Functions to convert cmark_nodes to XML strings.
static void escape_xml(cmark_strbuf *dest, const unsigned char *source,
bufsize_t length) {
houdini_escape_html0(dest, source, length, 0);
}
struct render_state {
cmark_strbuf *xml;
int indent;
};
static CMARK_INLINE void indent(struct render_state *state) {
int i;
for (i = 0; i < state->indent; i++) {
cmark_strbuf_putc(state->xml, ' ');
}
}
static int S_render_node(cmark_node *node, cmark_event_type ev_type,
struct render_state *state, int options) {
cmark_strbuf *xml = state->xml;
bool literal = false;
cmark_delim_type delim;
bool entering = (ev_type == CMARK_EVENT_ENTER);
char buffer[BUFFER_SIZE];
if (entering) {
indent(state);
cmark_strbuf_putc(xml, '<');
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
node->start_line, node->start_column, node->end_line,
node->end_column);
cmark_strbuf_puts(xml, buffer);
}
if (node->extension && node->extension->xml_attr_func) {
const char* r = node->extension->xml_attr_func(node->extension, node);
if (r != NULL)
cmark_strbuf_puts(xml, r);
}
literal = false;
switch (node->type) {
case CMARK_NODE_DOCUMENT:
cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
break;
case CMARK_NODE_TEXT:
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_BLOCK:
case CMARK_NODE_HTML_INLINE:
cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
escape_xml(xml, node->as.literal.data, node->as.literal.len);
cmark_strbuf_puts(xml, "</");
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
literal = true;
break;
case CMARK_NODE_LIST:
switch (cmark_node_get_list_type(node)) {
case CMARK_ORDERED_LIST:
cmark_strbuf_puts(xml, " type=\"ordered\"");
snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",
cmark_node_get_list_start(node));
cmark_strbuf_puts(xml, buffer);
delim = cmark_node_get_list_delim(node);
if (delim == CMARK_PAREN_DELIM) {
cmark_strbuf_puts(xml, " delim=\"paren\"");
} else if (delim == CMARK_PERIOD_DELIM) {
cmark_strbuf_puts(xml, " delim=\"period\"");
}
break;
case CMARK_BULLET_LIST:
cmark_strbuf_puts(xml, " type=\"bullet\"");
break;
default:
break;
}
snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",
(cmark_node_get_list_tight(node) ? "true" : "false"));
cmark_strbuf_puts(xml, buffer);
break;
case CMARK_NODE_HEADING:
snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
cmark_strbuf_puts(xml, buffer);
break;
case CMARK_NODE_CODE_BLOCK:
if (node->as.code.info.len > 0) {
cmark_strbuf_puts(xml, " info=\"");
escape_xml(xml, node->as.code.info.data, node->as.code.info.len);
cmark_strbuf_putc(xml, '"');
}
cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
escape_xml(xml, node->as.code.literal.data, node->as.code.literal.len);
cmark_strbuf_puts(xml, "</");
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
literal = true;
break;
case CMARK_NODE_CUSTOM_BLOCK:
case CMARK_NODE_CUSTOM_INLINE:
cmark_strbuf_puts(xml, " on_enter=\"");
escape_xml(xml, node->as.custom.on_enter.data,
node->as.custom.on_enter.len);
cmark_strbuf_putc(xml, '"');
cmark_strbuf_puts(xml, " on_exit=\"");
escape_xml(xml, node->as.custom.on_exit.data,
node->as.custom.on_exit.len);
cmark_strbuf_putc(xml, '"');
break;
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_strbuf_puts(xml, " destination=\"");
escape_xml(xml, node->as.link.url.data, node->as.link.url.len);
cmark_strbuf_putc(xml, '"');
cmark_strbuf_puts(xml, " title=\"");
escape_xml(xml, node->as.link.title.data, node->as.link.title.len);
cmark_strbuf_putc(xml, '"');
break;
default:
break;
}
if (node->first_child) {
state->indent += 2;
} else if (!literal) {
cmark_strbuf_puts(xml, " /");
}
cmark_strbuf_puts(xml, ">\n");
} else if (node->first_child) {
state->indent -= 2;
indent(state);
cmark_strbuf_puts(xml, "</");
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
cmark_strbuf_puts(xml, ">\n");
}
return 1;
}
char *cmark_render_xml(cmark_node *root, int options) {
return cmark_render_xml_with_mem(root, options, cmark_node_mem(root));
}
char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem) {
char *result;
cmark_strbuf xml = CMARK_BUF_INIT(mem);
cmark_event_type ev_type;
cmark_node *cur;
struct render_state state = {&xml, 0};
cmark_iter *iter = cmark_iter_new(root);
cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
cmark_strbuf_puts(state.xml,
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
S_render_node(cur, ev_type, &state, options);
}
result = (char *)cmark_strbuf_detach(&xml);
cmark_iter_free(iter);
return result;
}