Skip to content

Commit

Permalink
Arena allocator
Browse files Browse the repository at this point in the history
This allocator allocates a 4MiB arena into which all allocations are
made, and then increasingly larger arenas as earlier ones are used up.
Freeing memory in the arena is a no-op: clean all memory with
cmark_arena_reset().

In order to support realloc, we store the size of each allocation in a
size_t before the returned pointer.

The speedup is over 25% on large (benchmark-sized) inputs -- we pay a
small increase in maximum RSS (~10%) for this.
  • Loading branch information
Yuki Izumi committed Nov 29, 2016
1 parent 0618a8a commit 4f103d1
Show file tree
Hide file tree
Showing 15 changed files with 257 additions and 36 deletions.
56 changes: 56 additions & 0 deletions man/man3/cmark.3
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,27 @@ typedef struct cmark_mem {
Defines the memory allocation functions to be used by CMark when parsing
and allocating a document tree

.PP
\fIcmark_mem *\f[] \fBcmark_get_default_mem_allocator\f[](\fI\f[])

.PP
The default memory allocator; uses the system's calloc, realloc and
free.

.PP
\fIcmark_mem *\f[] \fBcmark_get_arena_mem_allocator\f[](\fI\f[])

.PP
An arena allocator; uses system calloc to allocate large slabs of
memory. Memory in these slabs is not reused at all.

.PP
\fIvoid\f[] \fBcmark_arena_reset\f[](\fIvoid\f[])

.PP
Resets the arena allocator, quickly returning all used memory to the
operating system.

.SS
Creating and Destroying Nodes

Expand Down Expand Up @@ -641,6 +662,13 @@ Rendering
Render a \f[I]node\f[] tree as XML. It is the caller's responsibility to
free the returned buffer.

.PP
\fIchar *\f[] \fBcmark_render_xml_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[])

.PP
As for \f[I]cmark_render_xml\f[], but specifying the allocator to use
for the resulting string.

.PP
\fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[])

Expand All @@ -649,27 +677,55 @@ Render a \f[I]node\f[] tree as an HTML fragment. It is up to the user to
add an appropriate header and footer. It is the caller's responsibility
to free the returned buffer.

.PP
\fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[])

.PP
As for \f[I]cmark_render_html\f[], but specifying the allocator to use
for the resulting string.

.PP
\fIchar *\f[] \fBcmark_render_man\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[])

.PP
Render a \f[I]node\f[] tree as a groff man page, without the header. It
is the caller's responsibility to free the returned buffer.

.PP
\fIchar *\f[] \fBcmark_render_man_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[])

.PP
As for \f[I]cmark_render_man\f[], but specifying the allocator to use
for the resulting string.

.PP
\fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[])

.PP
Render a \f[I]node\f[] tree as a commonmark document. It is the caller's
responsibility to free the returned buffer.

.PP
\fIchar *\f[] \fBcmark_render_commonmark_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[])

.PP
As for \f[I]cmark_render_commonmark\f[], but specifying the allocator to
use for the resulting string.

.PP
\fIchar *\f[] \fBcmark_render_latex\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[])

.PP
Render a \f[I]node\f[] tree as a LaTeX document. It is the caller's
responsibility to free the returned buffer.

.PP
\fIchar *\f[] \fBcmark_render_latex_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[])

.PP
As for \f[I]cmark_render_latex\f[], but specifying the allocator to use
for the resulting string.

.SS
Options

Expand Down
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ set(LIBRARY_SOURCES
houdini_html_e.c
houdini_html_u.c
cmark_ctype.c
arena.c
${HEADERS}
)

Expand Down Expand Up @@ -78,7 +79,7 @@ set_target_properties(${PROGRAM} PROPERTIES
COMPILE_FLAGS -DCMARK_STATIC_DEFINE)

# Check integrity of node structure when compiled as debug:
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES -DDEBUG")
set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}")

set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
Expand Down
72 changes: 72 additions & 0 deletions src/arena.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "cmark.h"

static struct arena_chunk {
size_t sz, used;
void *ptr;
struct arena_chunk *prev;
} *A = NULL;

static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
struct arena_chunk *c = calloc(1, sizeof(*c));
if (!c)
abort();
c->sz = sz;
c->ptr = calloc(1, sz);
c->prev = prev;
return c;
}

static void init_arena(void) {
A = alloc_arena_chunk(4 * 1048576, NULL);
}

void cmark_arena_reset(void) {
while (A) {
free(A->ptr);
struct arena_chunk *n = A->prev;
free(A);
A = n;
}
}

static void *arena_calloc(size_t nmem, size_t size) {
if (!A)
init_arena();

size_t sz = nmem * size + sizeof(size_t);
if (sz > A->sz) {
A->prev = alloc_arena_chunk(sz, A->prev);
return (uint8_t *) A->prev->ptr + sizeof(size_t);
}
if (sz > A->sz - A->used) {
A = alloc_arena_chunk(A->sz + A->sz / 2, A);
}
void *ptr = (uint8_t *) A->ptr + A->used;
A->used += sz;
*((size_t *) ptr) = nmem * size;
return (uint8_t *) ptr + sizeof(size_t);
}

static void *arena_realloc(void *ptr, size_t size) {
if (!A)
init_arena();

void *new_ptr = arena_calloc(1, size);
if (ptr)
memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
return new_ptr;
}

static void arena_free(void *ptr) {
(void) ptr;
/* no-op */
}

cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};

cmark_mem *cmark_get_arena_mem_allocator() {
return &CMARK_ARENA_MEM_ALLOCATOR;
}
4 changes: 2 additions & 2 deletions src/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
}

cmark_parser *cmark_parser_new(int options) {
extern cmark_mem DEFAULT_MEM_ALLOCATOR;
return cmark_parser_new_with_mem(options, &DEFAULT_MEM_ALLOCATOR);
extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR);
}

void cmark_parser_free(cmark_parser *parser) {
Expand Down
6 changes: 5 additions & 1 deletion src/cmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ static void *xrealloc(void *ptr, size_t size) {
return new_ptr;
}

cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};

cmark_mem *cmark_get_default_mem_allocator() {
return &CMARK_DEFAULT_MEM_ALLOCATOR;
}

char *cmark_markdown_to_html(const char *text, size_t len, int options) {
cmark_node *doc;
Expand Down
48 changes: 48 additions & 0 deletions src/cmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,24 @@ typedef struct cmark_mem {
void (*free)(void *);
} cmark_mem;

/** The default memory allocator; uses the system's calloc,
* realloc and free.
*/
CMARK_EXPORT
cmark_mem *cmark_get_default_mem_allocator();

/** An arena allocator; uses system calloc to allocate large
* slabs of memory. Memory in these slabs is not reused at all.
*/
CMARK_EXPORT
cmark_mem *cmark_get_arena_mem_allocator();

/** Resets the arena allocator, quickly returning all used memory
* to the operating system.
*/
CMARK_EXPORT
void cmark_arena_reset(void);

/**
* ## Creating and Destroying Nodes
*/
Expand Down Expand Up @@ -502,31 +520,61 @@ cmark_node *cmark_parse_file(FILE *f, int options);
CMARK_EXPORT
char *cmark_render_xml(cmark_node *root, int options);

/** As for 'cmark_render_xml', but specifying the allocator to use for
* the resulting string.
*/
CMARK_EXPORT
char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem);

/** Render a 'node' tree as an HTML fragment. It is up to the user
* to add an appropriate header and footer. It is the caller's
* responsibility to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_render_html(cmark_node *root, int options);

/** As for 'cmark_render_html', but specifying the allocator to use for
* the resulting string.
*/
CMARK_EXPORT
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem);

/** Render a 'node' tree as a groff man page, without the header.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_render_man(cmark_node *root, int options, int width);

/** As for 'cmark_render_man', but specifying the allocator to use for
* the resulting string.
*/
CMARK_EXPORT
char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);

/** Render a 'node' tree as a commonmark document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_render_commonmark(cmark_node *root, int options, int width);

/** As for 'cmark_render_commonmark', but specifying the allocator to use for
* the resulting string.
*/
CMARK_EXPORT
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);

/** Render a 'node' tree as a LaTeX document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_render_latex(cmark_node *root, int options, int width);

/** As for 'cmark_render_latex', but specifying the allocator to use for
* the resulting string.
*/
CMARK_EXPORT
char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);

/**
* ## Options
*/
Expand Down
6 changes: 5 additions & 1 deletion src/commonmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -463,10 +463,14 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
}

char *cmark_render_commonmark(cmark_node *root, int options, int width) {
return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root));
}

char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
if (options & CMARK_OPT_HARDBREAKS) {
// disable breaking on width, since it has
// a different meaning with OPT_HARDBREAKS
width = 0;
}
return cmark_render(root, options, width, outc, S_render_node);
return cmark_render(mem, root, options, width, outc, S_render_node);
}
6 changes: 5 additions & 1 deletion src/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -323,8 +323,12 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
}

char *cmark_render_html(cmark_node *root, int options) {
return cmark_render_html_with_mem(root, options, cmark_node_mem(root));
}

char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem) {
char *result;
cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root));
cmark_strbuf html = CMARK_BUF_INIT(mem);
cmark_event_type ev_type;
cmark_node *cur;
struct render_state state = {&html, NULL};
Expand Down
6 changes: 5 additions & 1 deletion src/latex.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,5 +434,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
}

char *cmark_render_latex(cmark_node *root, int options, int width) {
return cmark_render(root, options, width, outc, S_render_node);
return cmark_render_latex_with_mem(root, options, width, cmark_node_mem(root));
}

char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
return cmark_render(mem, root, options, width, outc, S_render_node);
}
Loading

0 comments on commit 4f103d1

Please sign in to comment.