diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 8d367aad3..e26d74b71 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -115,6 +115,27 @@ typedef struct cmark_mem { Defines the memory allocation functions to be used by CMark when parsing and allocating a document tree +.PP +\fIcmark_mem *\f[] \fBcmark_get_default_mem_allocator\f[](\fI\f[]) + +.PP +The default memory allocator; uses the system's calloc, realloc and +free. + +.PP +\fIcmark_mem *\f[] \fBcmark_get_arena_mem_allocator\f[](\fI\f[]) + +.PP +An arena allocator; uses system calloc to allocate large slabs of +memory. Memory in these slabs is not reused at all. + +.PP +\fIvoid\f[] \fBcmark_arena_reset\f[](\fIvoid\f[]) + +.PP +Resets the arena allocator, quickly returning all used memory to the +operating system. + .SS Creating and Destroying Nodes @@ -641,6 +662,13 @@ Rendering Render a \f[I]node\f[] tree as XML. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_xml_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_xml\f[], but specifying the allocator to use +for the resulting string. + .PP \fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[]) @@ -649,6 +677,13 @@ Render a \f[I]node\f[] tree as an HTML fragment. It is up to the user to add an appropriate header and footer. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_html\f[], but specifying the allocator to use +for the resulting string. + .PP \fIchar *\f[] \fBcmark_render_man\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) @@ -656,6 +691,13 @@ to free the returned buffer. Render a \f[I]node\f[] tree as a groff man page, without the header. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_man_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_man\f[], but specifying the allocator to use +for the resulting string. + .PP \fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) @@ -663,6 +705,13 @@ is the caller's responsibility to free the returned buffer. Render a \f[I]node\f[] tree as a commonmark document. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_commonmark_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_commonmark\f[], but specifying the allocator to +use for the resulting string. + .PP \fIchar *\f[] \fBcmark_render_latex\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) @@ -670,6 +719,13 @@ responsibility to free the returned buffer. Render a \f[I]node\f[] tree as a LaTeX document. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_latex_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_latex\f[], but specifying the allocator to use +for the resulting string. + .SS Options diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0cb653014..ccb4734da 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,6 +40,7 @@ set(LIBRARY_SOURCES houdini_html_e.c houdini_html_u.c cmark_ctype.c + arena.c ${HEADERS} ) @@ -78,7 +79,7 @@ set_target_properties(${PROGRAM} PROPERTIES COMPILE_FLAGS -DCMARK_STATIC_DEFINE) # Check integrity of node structure when compiled as debug: -set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES -DDEBUG") set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 000000000..1a30a97c0 --- /dev/null +++ b/src/arena.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include "cmark.h" + +static struct arena_chunk { + size_t sz, used; + void *ptr; + struct arena_chunk *prev; +} *A = NULL; + +static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) { + struct arena_chunk *c = calloc(1, sizeof(*c)); + if (!c) + abort(); + c->sz = sz; + c->ptr = calloc(1, sz); + c->prev = prev; + return c; +} + +static void init_arena(void) { + A = alloc_arena_chunk(4 * 1048576, NULL); +} + +void cmark_arena_reset(void) { + while (A) { + free(A->ptr); + struct arena_chunk *n = A->prev; + free(A); + A = n; + } +} + +static void *arena_calloc(size_t nmem, size_t size) { + if (!A) + init_arena(); + + size_t sz = nmem * size + sizeof(size_t); + if (sz > A->sz) { + A->prev = alloc_arena_chunk(sz, A->prev); + return (uint8_t *) A->prev->ptr + sizeof(size_t); + } + if (sz > A->sz - A->used) { + A = alloc_arena_chunk(A->sz + A->sz / 2, A); + } + void *ptr = (uint8_t *) A->ptr + A->used; + A->used += sz; + *((size_t *) ptr) = nmem * size; + return (uint8_t *) ptr + sizeof(size_t); +} + +static void *arena_realloc(void *ptr, size_t size) { + if (!A) + init_arena(); + + void *new_ptr = arena_calloc(1, size); + if (ptr) + memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]); + return new_ptr; +} + +static void arena_free(void *ptr) { + (void) ptr; + /* no-op */ +} + +cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free}; + +cmark_mem *cmark_get_arena_mem_allocator() { + return &CMARK_ARENA_MEM_ALLOCATOR; +} diff --git a/src/blocks.c b/src/blocks.c index de47dfb37..f1b9d7d3c 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -108,8 +108,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { } cmark_parser *cmark_parser_new(int options) { - extern cmark_mem DEFAULT_MEM_ALLOCATOR; - return cmark_parser_new_with_mem(options, &DEFAULT_MEM_ALLOCATOR); + extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR); } void cmark_parser_free(cmark_parser *parser) { diff --git a/src/cmark.c b/src/cmark.c index 0d3bc1669..9220b8a9d 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -24,7 +24,11 @@ static void *xrealloc(void *ptr, size_t size) { return new_ptr; } -cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; +cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; + +cmark_mem *cmark_get_default_mem_allocator() { + return &CMARK_DEFAULT_MEM_ALLOCATOR; +} char *cmark_markdown_to_html(const char *text, size_t len, int options) { cmark_node *doc; diff --git a/src/cmark.h b/src/cmark.h index 6ed7eb057..26a22096b 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -100,6 +100,24 @@ typedef struct cmark_mem { void (*free)(void *); } cmark_mem; +/** The default memory allocator; uses the system's calloc, + * realloc and free. + */ +CMARK_EXPORT +cmark_mem *cmark_get_default_mem_allocator(); + +/** An arena allocator; uses system calloc to allocate large + * slabs of memory. Memory in these slabs is not reused at all. + */ +CMARK_EXPORT +cmark_mem *cmark_get_arena_mem_allocator(); + +/** Resets the arena allocator, quickly returning all used memory + * to the operating system. + */ +CMARK_EXPORT +void cmark_arena_reset(void); + /** * ## Creating and Destroying Nodes */ @@ -502,6 +520,12 @@ cmark_node *cmark_parse_file(FILE *f, int options); CMARK_EXPORT char *cmark_render_xml(cmark_node *root, int options); +/** As for 'cmark_render_xml', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem); + /** Render a 'node' tree as an HTML fragment. It is up to the user * to add an appropriate header and footer. It is the caller's * responsibility to free the returned buffer. @@ -509,24 +533,48 @@ char *cmark_render_xml(cmark_node *root, int options); CMARK_EXPORT char *cmark_render_html(cmark_node *root, int options); +/** As for 'cmark_render_html', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem); + /** Render a 'node' tree as a groff man page, without the header. * It is the caller's responsibility to free the returned buffer. */ CMARK_EXPORT char *cmark_render_man(cmark_node *root, int options, int width); +/** As for 'cmark_render_man', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** Render a 'node' tree as a commonmark document. * It is the caller's responsibility to free the returned buffer. */ CMARK_EXPORT char *cmark_render_commonmark(cmark_node *root, int options, int width); +/** As for 'cmark_render_commonmark', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** Render a 'node' tree as a LaTeX document. * It is the caller's responsibility to free the returned buffer. */ CMARK_EXPORT char *cmark_render_latex(cmark_node *root, int options, int width); +/** As for 'cmark_render_latex', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** * ## Options */ diff --git a/src/commonmark.c b/src/commonmark.c index b8b182068..7d0f34513 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -463,10 +463,14 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } char *cmark_render_commonmark(cmark_node *root, int options, int width) { + return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { if (options & CMARK_OPT_HARDBREAKS) { // disable breaking on width, since it has // a different meaning with OPT_HARDBREAKS width = 0; } - return cmark_render(root, options, width, outc, S_render_node); + return cmark_render(mem, root, options, width, outc, S_render_node); } diff --git a/src/html.c b/src/html.c index a680e4a50..d58596cd0 100644 --- a/src/html.c +++ b/src/html.c @@ -323,8 +323,12 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } char *cmark_render_html(cmark_node *root, int options) { + return cmark_render_html_with_mem(root, options, cmark_node_mem(root)); +} + +char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem) { char *result; - cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root)); + cmark_strbuf html = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; struct render_state state = {&html, NULL}; diff --git a/src/latex.c b/src/latex.c index e78c7d916..4aa628627 100644 --- a/src/latex.c +++ b/src/latex.c @@ -434,5 +434,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } char *cmark_render_latex(cmark_node *root, int options, int width) { - return cmark_render(root, options, width, outc, S_render_node); + return cmark_render_latex_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { + return cmark_render(mem, root, options, width, outc, S_render_node); } diff --git a/src/main.c b/src/main.c index 42cd8b163..e44dd3216 100644 --- a/src/main.c +++ b/src/main.c @@ -37,45 +37,50 @@ void print_usage() { printf(" --version Print version\n"); } -static void print_document(cmark_node *document, writer_format writer, +static bool print_document(cmark_node *document, writer_format writer, int options, int width) { char *result; + cmark_mem *mem = cmark_get_default_mem_allocator(); + switch (writer) { case FORMAT_HTML: - result = cmark_render_html(document, options); + result = cmark_render_html_with_mem(document, options, mem); break; case FORMAT_XML: - result = cmark_render_xml(document, options); + result = cmark_render_xml_with_mem(document, options, mem); break; case FORMAT_MAN: - result = cmark_render_man(document, options, width); + result = cmark_render_man_with_mem(document, options, width, mem); break; case FORMAT_COMMONMARK: - result = cmark_render_commonmark(document, options, width); + result = cmark_render_commonmark_with_mem(document, options, width, mem); break; case FORMAT_LATEX: - result = cmark_render_latex(document, options, width); + result = cmark_render_latex_with_mem(document, options, width, mem); break; default: fprintf(stderr, "Unknown format %d\n", writer); - exit(1); + return false; } printf("%s", result); - cmark_node_mem(document)->free(result); + mem->free(result); + + return true; } int main(int argc, char *argv[]) { int i, numfps = 0; int *files; char buffer[4096]; - cmark_parser *parser; + cmark_parser *parser = NULL; size_t bytes; - cmark_node *document; + cmark_node *document = NULL; int width = 0; char *unparsed; writer_format writer = FORMAT_HTML; int options = CMARK_OPT_DEFAULT; + int res = 1; #if defined(_WIN32) && !defined(__CYGWIN__) _setmode(_fileno(stdin), _O_BINARY); @@ -88,7 +93,7 @@ int main(int argc, char *argv[]) { if (strcmp(argv[i], "--version") == 0) { printf("cmark %s", CMARK_VERSION_STRING); printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n"); - exit(0); + goto success; } else if (strcmp(argv[i], "--sourcepos") == 0) { options |= CMARK_OPT_SOURCEPOS; } else if (strcmp(argv[i], "--hardbreaks") == 0) { @@ -106,7 +111,7 @@ int main(int argc, char *argv[]) { } else if ((strcmp(argv[i], "--help") == 0) || (strcmp(argv[i], "-h") == 0)) { print_usage(); - exit(0); + goto success; } else if (strcmp(argv[i], "--width") == 0) { i += 1; if (i < argc) { @@ -114,11 +119,11 @@ int main(int argc, char *argv[]) { if (unparsed && strlen(unparsed) > 0) { fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i], unparsed); - exit(1); + goto failure; } } else { fprintf(stderr, "--width requires an argument\n"); - exit(1); + goto failure; } } else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) { i += 1; @@ -135,27 +140,32 @@ int main(int argc, char *argv[]) { writer = FORMAT_LATEX; } else { fprintf(stderr, "Unknown format %s\n", argv[i]); - exit(1); + goto failure; } } else { fprintf(stderr, "No argument provided for %s\n", argv[i - 1]); - exit(1); + goto failure; } } else if (*argv[i] == '-') { print_usage(); - exit(1); + goto failure; } else { // treat as file argument files[numfps++] = i; } } +#if DEBUG parser = cmark_parser_new(options); +#else + parser = cmark_parser_new_with_mem(options, cmark_get_arena_mem_allocator()); +#endif + for (i = 0; i < numfps; i++) { FILE *fp = fopen(argv[files[i]], "rb"); if (fp == NULL) { fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]], strerror(errno)); - exit(1); + goto failure; } while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { @@ -169,7 +179,6 @@ int main(int argc, char *argv[]) { } if (numfps == 0) { - while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) { cmark_parser_feed(parser, buffer, bytes); if (bytes < sizeof(buffer)) { @@ -179,13 +188,25 @@ int main(int argc, char *argv[]) { } document = cmark_parser_finish(parser); - cmark_parser_free(parser); - print_document(document, writer, options, width); + if (!print_document(document, writer, options, width)) + goto failure; + +success: + res = 0; + +failure: + +#if DEBUG + if (parser) + cmark_parser_free(parser); cmark_node_free(document); +#else + cmark_arena_reset(); +#endif free(files); - return 0; + return res; } diff --git a/src/man.c b/src/man.c index 1c76f68bb..f3980275d 100644 --- a/src/man.c +++ b/src/man.c @@ -248,5 +248,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } char *cmark_render_man(cmark_node *root, int options, int width) { - return cmark_render(root, options, width, S_outc, S_render_node); + return cmark_render_man_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { + return cmark_render(mem, root, options, width, S_outc, S_render_node); } diff --git a/src/node.c b/src/node.c index e722acf90..fa20503e5 100644 --- a/src/node.c +++ b/src/node.c @@ -98,8 +98,8 @@ cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { } cmark_node *cmark_node_new(cmark_node_type type) { - extern cmark_mem DEFAULT_MEM_ALLOCATOR; - return cmark_node_new_with_mem(type, &DEFAULT_MEM_ALLOCATOR); + extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + return cmark_node_new_with_mem(type, &CMARK_DEFAULT_MEM_ALLOCATOR); } // Free a cmark_node list and any children. diff --git a/src/render.c b/src/render.c index 20dca5ff8..b7eabcb53 100644 --- a/src/render.c +++ b/src/render.c @@ -142,13 +142,12 @@ void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) { renderer->column += 1; } -char *cmark_render(cmark_node *root, int options, int width, +char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, void (*outc)(cmark_renderer *, cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options)) { - cmark_mem *mem = cmark_node_mem(root); cmark_strbuf pref = CMARK_BUF_INIT(mem); cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_node *cur; diff --git a/src/render.h b/src/render.h index 35eb0a65d..b73ace464 100644 --- a/src/render.h +++ b/src/render.h @@ -36,7 +36,7 @@ void cmark_render_ascii(cmark_renderer *renderer, const char *s); void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); -char *cmark_render(cmark_node *root, int options, int width, +char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, void (*outc)(cmark_renderer *, cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, diff --git a/src/xml.c b/src/xml.c index 4898cd2e8..ea53b99c6 100644 --- a/src/xml.c +++ b/src/xml.c @@ -148,8 +148,12 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } char *cmark_render_xml(cmark_node *root, int options) { + return cmark_render_xml_with_mem(root, options, cmark_node_mem(root)); +} + +char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem) { char *result; - cmark_strbuf xml = CMARK_BUF_INIT(cmark_node_mem(root)); + cmark_strbuf xml = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; struct render_state state = {&xml, 0};