From a945be4fc612955850a27b47f8eade5ea72f0e85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Bu=CC=88gling?= <boris@icculus.org>
Date: Fri, 30 Jan 2015 19:12:33 +0100
Subject: [PATCH] Fixed implicit casts from `size_t` to `int`.

Mostly by actually using `size_t`, except for the cases where negative
values were considered valid, there I have stuck with `long`. The latter
is of course all potential bugs, but at least they are more explicit
now.
---
 src/blocks.c         |  32 ++++++-------
 src/buffer.c         |  40 ++++++++---------
 src/buffer.h         |  24 +++++-----
 src/chunk.h          |   6 +--
 src/cmark.h          |   6 +--
 src/houdini_html_u.c |   2 +-
 src/html.c           |   8 ++--
 src/inlines.c        |  60 ++++++++++++-------------
 src/inlines.h        |   2 +-
 src/man.c            |   4 +-
 src/node.c           |   8 ++--
 src/node.h           |  14 +++---
 src/parser.h         |   2 +-
 src/scanners.c       | 105 ++++++++++++++++++++++---------------------
 src/scanners.h       |  28 ++++++------
 src/scanners.re      |  30 ++++++-------
 src/utf8.c           |  16 +++----
 src/utf8.h           |   4 +-
 src/xml.c            |   6 +--
 19 files changed, 201 insertions(+), 196 deletions(-)

diff --git a/src/blocks.c b/src/blocks.c
index 806f7a63d..e205cb2a0 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -26,7 +26,7 @@ static void
 S_process_line(cmark_parser *parser, const unsigned char *buffer,
                size_t bytes);
 
-static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column)
+static cmark_node* make_block(cmark_node_type tag, int start_line, size_t start_column)
 {
 	cmark_node* e;
 
@@ -84,7 +84,7 @@ static cmark_node*
 finalize(cmark_parser *parser, cmark_node* b);
 
 // Returns true if line has only space characters, else false.
-static bool is_blank(cmark_strbuf *s, int offset)
+static bool is_blank(cmark_strbuf *s, size_t offset)
 {
 	while (offset < s->size) {
 		switch (s->ptr[offset]) {
@@ -116,7 +116,7 @@ static inline bool accepts_lines(cmark_node_type block_type)
 	        block_type == NODE_CODE_BLOCK);
 }
 
-static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
+static void add_line(cmark_node* node, cmark_chunk *ch, size_t offset)
 {
 	assert(node->open);
 	cmark_strbuf_put(&node->string_content, ch->data + offset, ch->len - offset);
@@ -124,7 +124,7 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
 
 static void remove_trailing_blank_lines(cmark_strbuf *ln)
 {
-	int i;
+	long i;
 
 	for (i = ln->size - 1; i >= 0; --i) {
 		unsigned char c = ln->ptr[i];
@@ -185,7 +185,7 @@ static cmark_node*
 finalize(cmark_parser *parser, cmark_node* b)
 {
 	int firstlinelen;
-	int pos;
+	size_t pos;
 	cmark_node* item;
 	cmark_node* subitem;
 	cmark_node* parent;
@@ -289,7 +289,7 @@ finalize(cmark_parser *parser, cmark_node* b)
 
 // Add a cmark_node as child of another.  Return pointer to child.
 static cmark_node* add_child(cmark_parser *parser, cmark_node* parent,
-                             cmark_node_type block_type, int start_column)
+                             cmark_node_type block_type, size_t start_column)
 {
 	assert(parent);
 
@@ -338,10 +338,10 @@ static void process_inlines(cmark_node* root, cmark_reference_map *refmap)
 // Attempts to parse a list item marker (bullet or enumerated).
 // On success, returns length of the marker, and populates
 // data with the details.  On failure, returns 0.
-static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr)
+static size_t parse_list_marker(cmark_chunk *input, size_t pos, cmark_list **dataptr)
 {
 	unsigned char c;
-	int startpos;
+	size_t startpos;
 	cmark_list *data;
 
 	startpos = pos;
@@ -495,7 +495,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
 
 static void chop_trailing_hashtags(cmark_chunk *ch)
 {
-	int n, orig_n;
+	long n, orig_n;
 
 	cmark_chunk_rtrim(ch);
 	orig_n = n = ch->len - 1;
@@ -515,17 +515,17 @@ static void
 S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 {
 	cmark_node* last_matched_container;
-	int offset = 0;
-	int matched = 0;
-	int lev = 0;
-	int i;
+	size_t offset = 0;
+	size_t matched = 0;
+	size_t lev = 0;
+	size_t i;
 	cmark_list *data = NULL;
 	bool all_matched = true;
 	cmark_node* container;
 	cmark_node* cur = parser->current;
 	bool blank = false;
-	int first_nonspace;
-	int indent;
+	size_t first_nonspace;
+	size_t indent;
 	cmark_chunk input;
 
 	utf8proc_detab(parser->curline, buffer, bytes);
@@ -683,7 +683,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 			offset = first_nonspace + matched;
 			container = add_child(parser, container, NODE_HEADER, offset + 1);
 
-			int hashpos = cmark_chunk_strchr(&input, '#', first_nonspace);
+			size_t hashpos = cmark_chunk_strchr(&input, '#', first_nonspace);
 			int level = 0;
 
 			while (peek_at(&input, hashpos) == '#') {
diff --git a/src/buffer.c b/src/buffer.c
index 0df65617f..5d8f515a9 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -23,7 +23,7 @@ unsigned char cmark_strbuf__oom[1];
 #define MIN(x,y)  ((x<y) ? x : y)
 #endif
 
-void cmark_strbuf_init(cmark_strbuf *buf, int initial_size)
+void cmark_strbuf_init(cmark_strbuf *buf, size_t initial_size)
 {
 	buf->asize = 0;
 	buf->size = 0;
@@ -33,10 +33,10 @@ void cmark_strbuf_init(cmark_strbuf *buf, int initial_size)
 		cmark_strbuf_grow(buf, initial_size);
 }
 
-int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom)
+int cmark_strbuf_try_grow(cmark_strbuf *buf, size_t target_size, bool mark_oom)
 {
 	unsigned char *new_ptr;
-	int new_size;
+	size_t new_size;
 
 	if (buf->ptr == cmark_strbuf__oom)
 		return -1;
@@ -79,7 +79,7 @@ int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom)
 	return 0;
 }
 
-int cmark_strbuf_grow(cmark_strbuf *buf, int target_size)
+int cmark_strbuf_grow(cmark_strbuf *buf, size_t target_size)
 {
 	return cmark_strbuf_try_grow(buf, target_size, true);
 }
@@ -112,7 +112,7 @@ void cmark_strbuf_clear(cmark_strbuf *buf)
 		buf->ptr[0] = '\0';
 }
 
-int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len)
+int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, size_t len)
 {
 	if (len <= 0 || data == NULL) {
 		cmark_strbuf_clear(buf);
@@ -142,7 +142,7 @@ int cmark_strbuf_putc(cmark_strbuf *buf, int c)
 	return 0;
 }
 
-int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len)
+int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, long len)
 {
 	if (len <= 0)
 		return 0;
@@ -161,8 +161,8 @@ int cmark_strbuf_puts(cmark_strbuf *buf, const char *string)
 
 int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap)
 {
-	const int expected_size = buf->size + (strlen(format) * 2);
-	int len;
+	const size_t expected_size = buf->size + (strlen(format) * 2);
+	long len;
 
 	ENSURE_SIZE(buf, expected_size);
 
@@ -184,7 +184,7 @@ int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap)
 			return -1;
 		}
 
-		if (len + 1 <= buf->asize - buf->size) {
+		if ((size_t)(len + 1) <= buf->asize - buf->size) {
 			buf->size += len;
 			break;
 		}
@@ -207,9 +207,9 @@ int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
 	return r;
 }
 
-void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf)
+void cmark_strbuf_copy_cstr(char *data, size_t datasize, const cmark_strbuf *buf)
 {
-	int copylen;
+	size_t copylen;
 
 	assert(data && datasize && buf);
 
@@ -245,7 +245,7 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf)
 	return data;
 }
 
-void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize)
+void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, size_t asize)
 {
 	cmark_strbuf_free(buf);
 
@@ -268,7 +268,7 @@ int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b)
 	       (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
 }
 
-int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos)
+int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, size_t pos)
 {
 	const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
 	if (!p)
@@ -277,9 +277,9 @@ int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos)
 	return (int)(p - (const unsigned char *)buf->ptr);
 }
 
-int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos)
+long cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, size_t pos)
 {
-	int i;
+	long i;
 
 	for (i = pos; i >= 0; i--) {
 		if (buf->ptr[i] == (unsigned char) c)
@@ -289,7 +289,7 @@ int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos)
 	return -1;
 }
 
-void cmark_strbuf_truncate(cmark_strbuf *buf, int len)
+void cmark_strbuf_truncate(cmark_strbuf *buf, size_t len)
 {
 	if (len < buf->size) {
 		buf->size = len;
@@ -297,7 +297,7 @@ void cmark_strbuf_truncate(cmark_strbuf *buf, int len)
 	}
 }
 
-void cmark_strbuf_drop(cmark_strbuf *buf, int n)
+void cmark_strbuf_drop(cmark_strbuf *buf, long n)
 {
 	if (n > 0) {
 		buf->size = buf->size - n;
@@ -325,7 +325,7 @@ void cmark_strbuf_rtrim(cmark_strbuf *buf)
 
 void cmark_strbuf_trim(cmark_strbuf *buf)
 {
-	int i = 0;
+	size_t i = 0;
 
 	if (!buf->size)
 		return;
@@ -343,7 +343,7 @@ void cmark_strbuf_trim(cmark_strbuf *buf)
 void cmark_strbuf_normalize_whitespace(cmark_strbuf *s)
 {
 	bool last_char_was_space = false;
-	int r, w;
+	size_t r, w;
 
 	for (r = 0, w = 0; r < s->size; ++r) {
 		switch (s->ptr[r]) {
@@ -368,7 +368,7 @@ void cmark_strbuf_normalize_whitespace(cmark_strbuf *s)
 // Destructively unescape a string: remove backslashes before punctuation chars.
 extern void cmark_strbuf_unescape(cmark_strbuf *buf)
 {
-	int r, w;
+	size_t r, w;
 
 	for (r = 0, w = 0; r < buf->size; ++r) {
 		if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
diff --git a/src/buffer.h b/src/buffer.h
index fb9f91067..776ad2801 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -11,7 +11,7 @@ extern "C" {
 
 typedef struct {
 	unsigned char *ptr;
-	int asize, size;
+	size_t asize, size;
 } cmark_strbuf;
 
 extern unsigned char cmark_strbuf__initbuf[];
@@ -26,7 +26,7 @@ extern unsigned char cmark_strbuf__oom[];
  * For the cases where GH_BUF_INIT cannot be used to do static
  * initialization.
  */
-void cmark_strbuf_init(cmark_strbuf *buf, int initial_size);
+void cmark_strbuf_init(cmark_strbuf *buf, size_t initial_size);
 
 /**
  * Attempt to grow the buffer to hold at least `target_size` bytes.
@@ -36,7 +36,7 @@ void cmark_strbuf_init(cmark_strbuf *buf, int initial_size);
  * existing buffer content will be preserved, but calling code must handle
  * that buffer was not expanded.
  */
-int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom);
+int cmark_strbuf_try_grow(cmark_strbuf *buf, size_t target_size, bool mark_oom);
 
 /**
  * Grow the buffer to hold at least `target_size` bytes.
@@ -46,7 +46,7 @@ int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom);
  *
  * @return 0 on success or -1 on failure
  */
-int cmark_strbuf_grow(cmark_strbuf *buf, int target_size);
+int cmark_strbuf_grow(cmark_strbuf *buf, size_t target_size);
 
 void cmark_strbuf_free(cmark_strbuf *buf);
 void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
@@ -68,9 +68,9 @@ size_t cmark_strbuf_len(const cmark_strbuf *buf);
 
 int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
 
-void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize);
+void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, size_t asize);
 unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
-void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf);
+void cmark_strbuf_copy_cstr(char *data, size_t datasize, const cmark_strbuf *buf);
 
 static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
 {
@@ -87,20 +87,20 @@ static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
  * return code of these functions and call them in a series then just call
  * cmark_strbuf_oom at the end.
  */
-int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len);
+int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, size_t len);
 int cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
 int cmark_strbuf_putc(cmark_strbuf *buf, int c);
-int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len);
+int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, long len);
 int cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
 int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
 CMARK_ATTRIBUTE((format (printf, 2, 3)));
 int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap);
 void cmark_strbuf_clear(cmark_strbuf *buf);
 
-int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos);
-int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos);
-void cmark_strbuf_drop(cmark_strbuf *buf, int n);
-void cmark_strbuf_truncate(cmark_strbuf *buf, int len);
+int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, size_t pos);
+long cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, size_t pos);
+void cmark_strbuf_drop(cmark_strbuf *buf, long n);
+void cmark_strbuf_truncate(cmark_strbuf *buf, size_t len);
 void cmark_strbuf_rtrim(cmark_strbuf *buf);
 void cmark_strbuf_trim(cmark_strbuf *buf);
 void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
diff --git a/src/chunk.h b/src/chunk.h
index 54c4b1602..f028b13a4 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -9,7 +9,7 @@
 
 typedef struct {
 	unsigned char *data;
-	int len;
+	size_t len;
 	int alloc;  // also implies a NULL-terminated string
 } cmark_chunk;
 
@@ -49,7 +49,7 @@ static inline void cmark_chunk_trim(cmark_chunk *c)
 	cmark_chunk_rtrim(c);
 }
 
-static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset)
+static inline size_t cmark_chunk_strchr(cmark_chunk *ch, int c, size_t offset)
 {
 	const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
 	return p ? (int)(p - ch->data) : ch->len;
@@ -90,7 +90,7 @@ static inline cmark_chunk cmark_chunk_literal(const char *data)
 	return c;
 }
 
-static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len)
+static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, size_t pos, size_t len)
 {
 	cmark_chunk c = {ch->data + pos, len, 0};
 	return c;
diff --git a/src/cmark.h b/src/cmark.h
index 9f312bcdc..776b0ad37 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -265,7 +265,7 @@ cmark_node_set_literal(cmark_node *node, const char *content);
 
 /** Returns the header level of 'node', or 0 if 'node' is not a header.
  */
-CMARK_EXPORT int
+CMARK_EXPORT size_t
 cmark_node_get_header_level(cmark_node *node);
 
 /** Sets the header level of 'node', returning 1 on success and 0 on error.
@@ -357,7 +357,7 @@ cmark_node_get_start_line(cmark_node *node);
 
 /** Returns the column at which 'node' begins.
  */
-CMARK_EXPORT int
+CMARK_EXPORT size_t
 cmark_node_get_start_column(cmark_node *node);
 
 /** Returns the line on which 'node' ends.
@@ -367,7 +367,7 @@ cmark_node_get_end_line(cmark_node *node);
 
 /** Returns the column at which 'node' ends.
  */
-CMARK_EXPORT int
+CMARK_EXPORT size_t
 cmark_node_get_end_column(cmark_node *node);
 
 /**
diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c
index ecd7faa20..6de0c2ba9 100644
--- a/src/houdini_html_u.c
+++ b/src/houdini_html_u.c
@@ -52,7 +52,7 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
 				break;
 
 			if (src[i] == ';') {
-				const struct html_ent *entity = find_entity((char *)src, i);
+				const struct html_ent *entity = find_entity((char *)src, (int)i);
 
 				if (entity != NULL) {
 					size_t len = strnlen((const char *)entity->utf8, 4);
diff --git a/src/html.c b/src/html.c
index 8ccb495b5..309c8c60c 100644
--- a/src/html.c
+++ b/src/html.c
@@ -11,7 +11,7 @@
 
 // Functions to convert cmark_nodes to HTML strings.
 
-static void escape_html(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_html(cmark_strbuf *dest, const unsigned char *source, long length)
 {
 	if (length < 0)
 		length = strlen((char *)source);
@@ -19,7 +19,7 @@ static void escape_html(cmark_strbuf *dest, const unsigned char *source, int len
 	houdini_escape_html0(dest, source, (size_t)length, 0);
 }
 
-static void escape_href(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_href(cmark_strbuf *dest, const unsigned char *source, long length)
 {
 	if (length < 0)
 		length = strlen((char *)source);
@@ -42,7 +42,7 @@ static void
 S_render_sourcepos(cmark_node *node, cmark_strbuf *html, long options)
 {
 	if (CMARK_OPT_SOURCEPOS & options) {
-		cmark_strbuf_printf(html, " data-sourcepos=\"%d:%d-%d:%d\"",
+		cmark_strbuf_printf(html, " data-sourcepos=\"%d:%ld-%d:%ld\"",
 		                    cmark_node_get_start_line(node),
 		                    cmark_node_get_start_column(node),
 		                    cmark_node_get_end_line(node),
@@ -165,7 +165,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 			S_render_sourcepos(node, html, options);
 			cmark_strbuf_puts(html, "><code>");
 		} else {
-			int first_tag = 0;
+			size_t first_tag = 0;
 			while (first_tag < node->as.code.info.len &&
 			       node->as.code.info.data[first_tag] != ' ') {
 				first_tag += 1;
diff --git a/src/inlines.c b/src/inlines.c
index 2c1240833..4392e473f 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -28,7 +28,7 @@ typedef struct delimiter {
 	struct delimiter *next;
 	cmark_node *inl_text;
 	unsigned char delim_char;
-	int position;
+	size_t position;
 	bool can_open;
 	bool can_close;
 	bool active;
@@ -36,7 +36,7 @@ typedef struct delimiter {
 
 typedef struct {
 	cmark_chunk input;
-	int pos;
+	size_t pos;
 	cmark_reference_map *refmap;
 	delimiter *last_delim;
 } subject;
@@ -48,7 +48,7 @@ static int parse_inline(subject* subj, cmark_node * parent);
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
                              cmark_reference_map *refmap);
-static int subject_find_special_char(subject *subj);
+static size_t subject_find_special_char(subject *subj);
 
 static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email)
 {
@@ -130,7 +130,7 @@ static unsigned char *bufdup(const unsigned char *buf)
 	unsigned char *new_buf = NULL;
 
 	if (buf) {
-		int len = strlen((char *)buf);
+		size_t len = strlen((char *)buf);
 		new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf));
 		if(new_buf != NULL) {
 			memcpy(new_buf, buf, len + 1);
@@ -163,7 +163,7 @@ static inline unsigned char peek_char(subject *subj)
 	return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
 }
 
-static inline unsigned char peek_at(subject *subj, int pos)
+static inline unsigned char peek_at(subject *subj, size_t pos)
 {
 	return subj->input.data[pos];
 }
@@ -181,8 +181,8 @@ static inline int is_eof(subject* subj)
 static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 {
 	unsigned char c;
-	int startpos = subj->pos;
-	int len = 0;
+	size_t startpos = subj->pos;
+	size_t len = 0;
 
 	while ((c = peek_char(subj)) && (*f)(c)) {
 		advance(subj);
@@ -197,7 +197,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 // parsed).  Return 0 if you don't find matching closing
 // backticks, otherwise return the position in the subject
 // after the closing backticks.
-static int scan_to_closing_backticks(subject* subj, int openticklength)
+static size_t scan_to_closing_backticks(subject* subj, size_t openticklength)
 {
 	// read non backticks
 	unsigned char c;
@@ -207,7 +207,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 	if (is_eof(subj)) {
 		return 0;  // did not find closing ticks, return 0
 	}
-	int numticks = 0;
+	size_t numticks = 0;
 	while (peek_char(subj) == '`') {
 		advance(subj);
 		numticks++;
@@ -223,8 +223,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 static cmark_node* handle_backticks(subject *subj)
 {
 	cmark_chunk openticks = take_while(subj, isbacktick);
-	int startpos = subj->pos;
-	int endpos = scan_to_closing_backticks(subj, openticks.len);
+	size_t startpos = subj->pos;
+	size_t endpos = scan_to_closing_backticks(subj, openticks.len);
 
 	if (endpos == 0) { // not found
 		subj->pos = startpos; // rewind
@@ -246,10 +246,10 @@ static int
 scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 {
 	int numdelims = 0;
-	int before_char_pos;
+	size_t before_char_pos;
 	int32_t after_char = 0;
 	int32_t before_char = 0;
-	int len;
+	long len;
 	bool left_flanking, right_flanking;
 
 	if (subj->pos == 0) {
@@ -410,11 +410,11 @@ static delimiter*
 S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
 {
 	delimiter *delim, *tmp_delim;
-	int use_delims;
+	long use_delims;
 	cmark_node *opener_inl = opener->inl_text;
 	cmark_node *closer_inl = closer->inl_text;
-	int opener_num_chars = opener_inl->as.literal.len;
-	int closer_num_chars = closer_inl->as.literal.len;
+	size_t opener_num_chars = opener_inl->as.literal.len;
+	size_t closer_num_chars = closer_inl->as.literal.len;
 	cmark_node *tmp, *emph, *first_child, *last_child;
 
 	// calculate the actual number of characters used from this closer
@@ -584,7 +584,7 @@ unsigned char *cmark_clean_title(cmark_chunk *title)
 // Assumes the subject has a '<' character at the current position.
 static cmark_node* handle_pointy_brace(subject* subj)
 {
-	int matchlen = 0;
+	size_t matchlen = 0;
 	cmark_chunk contents;
 
 	advance(subj);  // advance past first <
@@ -631,8 +631,8 @@ static cmark_node* handle_pointy_brace(subject* subj)
 // encountered.  Backticks in labels do not start code spans.
 static int link_label(subject* subj, cmark_chunk *raw_label)
 {
-	int startpos = subj->pos;
-	int length = 0;
+	size_t startpos = subj->pos;
+	size_t length = 0;
 	unsigned char c;
 
 	// advance past [
@@ -674,10 +674,10 @@ static int link_label(subject* subj, cmark_chunk *raw_label)
 // Return a link, an image, or a literal close bracket.
 static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 {
-	int initial_pos;
-	int starturl, endurl, starttitle, endtitle, endall;
-	int n;
-	int sps;
+	size_t initial_pos;
+	size_t starturl, endurl, starttitle, endtitle, endall;
+	long n;
+	long sps;
 	cmark_reference *ref;
 	bool is_image = false;
 	cmark_chunk url_chunk, title_chunk;
@@ -827,7 +827,7 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 // Assumes the subject has a newline at the current position.
 static cmark_node* handle_newline(subject *subj)
 {
-	int nlpos = subj->pos;
+	size_t nlpos = subj->pos;
 	// skip over newline
 	advance(subj);
 	// skip spaces at beginning of line
@@ -843,7 +843,7 @@ static cmark_node* handle_newline(subject *subj)
 	}
 }
 
-static int subject_find_special_char(subject *subj)
+static size_t subject_find_special_char(subject *subj)
 {
 	// "\n\\`&_*[]<!"
 	static const int8_t SPECIAL_CHARS[256] = {
@@ -865,7 +865,7 @@ static int subject_find_special_char(subject *subj)
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 	};
 
-	int n = subj->pos + 1;
+	size_t n = subj->pos + 1;
 
 	while (n < subj->input.len) {
 		if (SPECIAL_CHARS[subj->input.data[n]])
@@ -883,7 +883,7 @@ static int parse_inline(subject* subj, cmark_node * parent)
 	cmark_node* new_inl = NULL;
 	cmark_chunk contents;
 	unsigned char c;
-	int endpos;
+	size_t endpos;
 	c = peek_char(subj);
 	if (c == 0) {
 		return 0;
@@ -971,7 +971,7 @@ static void spnl(subject* subj)
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
+size_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
 {
 	subject subj;
 
@@ -979,8 +979,8 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
 	cmark_chunk url;
 	cmark_chunk title;
 
-	int matchlen = 0;
-	int beforetitle;
+	size_t matchlen = 0;
+	size_t beforetitle;
 
 	subject_from_buf(&subj, input, NULL);
 
diff --git a/src/inlines.h b/src/inlines.h
index d2ccfb475..2ce1a7ea9 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -10,7 +10,7 @@ unsigned char *cmark_clean_title(cmark_chunk *title);
 
 void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap);
 
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
+size_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
 
 #ifdef __cplusplus
 }
diff --git a/src/man.c b/src/man.c
index f57eb7523..27d3cc784 100644
--- a/src/man.c
+++ b/src/man.c
@@ -10,9 +10,9 @@
 
 // Functions to convert cmark_nodes to groff man strings.
 
-static void escape_man(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_man(cmark_strbuf *dest, const unsigned char *source, size_t length)
 {
-	int i;
+	size_t i;
 	unsigned char c;
 	bool beginLine = true;
 
diff --git a/src/node.c b/src/node.c
index fe3dac31d..a42ceef43 100644
--- a/src/node.c
+++ b/src/node.c
@@ -339,7 +339,7 @@ cmark_node_set_literal(cmark_node *node, const char *content)
 	return 0;
 }
 
-int
+size_t
 cmark_node_get_header_level(cmark_node *node)
 {
 	if (node == NULL) {
@@ -614,7 +614,7 @@ cmark_node_get_start_line(cmark_node *node)
 	return node->start_line;
 }
 
-int
+size_t
 cmark_node_get_start_column(cmark_node *node)
 {
 	if (node == NULL) {
@@ -632,7 +632,7 @@ cmark_node_get_end_line(cmark_node *node)
 	return node->end_line;
 }
 
-int
+size_t
 cmark_node_get_end_column(cmark_node *node)
 {
 	if (node == NULL) {
@@ -807,7 +807,7 @@ S_print_error(FILE *out, cmark_node *node, const char *elem)
 	if (out == NULL) {
 		return;
 	}
-	fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem,
+	fprintf(out, "Invalid '%s' in node type %s at %d:%ld\n", elem,
 	        cmark_node_get_type_string(node), node->start_line,
 	        node->start_column);
 }
diff --git a/src/node.h b/src/node.h
index 74eddd482..c1bd873fa 100644
--- a/src/node.h
+++ b/src/node.h
@@ -13,8 +13,8 @@ extern "C" {
 
 typedef struct {
 	cmark_list_type   list_type;
-	int               marker_offset;
-	int               padding;
+	size_t            marker_offset;
+	size_t            padding;
 	int               start;
 	cmark_delim_type  delimiter;
 	unsigned char     bullet_char;
@@ -23,15 +23,15 @@ typedef struct {
 
 typedef struct {
 	bool              fenced;
-	int               fence_length;
-	int               fence_offset;
+	size_t            fence_length;
+	size_t            fence_offset;
 	unsigned char     fence_char;
 	cmark_chunk       info;
 	cmark_chunk       literal;
 } cmark_code;
 
 typedef struct {
-	int level;
+	size_t level;
 	bool setext;
 } cmark_header;
 
@@ -52,9 +52,9 @@ struct cmark_node {
 	void *user_data;
 
 	int start_line;
-	int start_column;
+	size_t start_column;
 	int end_line;
-	int end_column;
+	size_t end_column;
 	bool open;
 	bool last_line_blank;
 
diff --git a/src/parser.h b/src/parser.h
index 3c8def988..f0c6d5212 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -17,7 +17,7 @@ struct cmark_parser {
 	struct cmark_node* current;
 	int line_number;
 	cmark_strbuf *curline;
-	int last_line_length;
+	size_t last_line_length;
 	cmark_strbuf *linebuf;
 };
 
diff --git a/src/scanners.c b/src/scanners.c
index 21c074476..c6ed47068 100644
--- a/src/scanners.c
+++ b/src/scanners.c
@@ -1,11 +1,11 @@
-/* Generated by re2c 0.13.6 */
+/* Generated by re2c 0.13.7.4 */
 #include <stdlib.h>
 #include "chunk.h"
 #include "scanners.h"
 
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
+long _scan_at(long (*scanner)(const unsigned char *), cmark_chunk *c, long offset)
 {
-	int res;
+	long res;
 	unsigned char *ptr = (unsigned char *)c->data;
 	unsigned char lim = ptr[c->len];
 
@@ -19,7 +19,7 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
 
 
 // Try to match URI autolink after first <, returning number of chars matched.
-int _scan_autolink_uri(const unsigned char *p)
+long _scan_autolink_uri(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -616,7 +616,7 @@ int _scan_autolink_uri(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy35;
 	}
-	if (yych <= '=') goto yy31;
+	if (yych <= '<') goto yy31;
 	if (yych <= '>') goto yy38;
 	++p;
 	yych = *p;
@@ -2978,7 +2978,7 @@ int _scan_autolink_uri(const unsigned char *p)
 }
 
 // Try to match email autolink after first <, returning num of chars matched.
-int _scan_autolink_email(const unsigned char *p)
+long _scan_autolink_email(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -3104,7 +3104,8 @@ int _scan_autolink_email(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy495;
 	}
-	if (yych == '@') goto yy493;
+	if (yych <= '>') goto yy494;
+	if (yych <= '@') goto yy493;
 	goto yy494;
 yy497:
 	yych = *++p;
@@ -7923,7 +7924,7 @@ int _scan_autolink_email(const unsigned char *p)
 }
 
 // Try to match an HTML tag after first <, returning num of chars matched.
-int _scan_html_tag(const unsigned char *p)
+long _scan_html_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -8057,7 +8058,8 @@ int _scan_html_tag(const unsigned char *p)
 		goto yy762;
 	}
 	if (yych == '-') goto yy764;
-	if (yych == '[') goto yy761;
+	if (yych <= '@') goto yy749;
+	if (yych <= '[') goto yy761;
 	goto yy749;
 yy753:
 	yych = *(marker = ++p);
@@ -8275,7 +8277,7 @@ int _scan_html_tag(const unsigned char *p)
 	if (yybm[0+yych] & 16) {
 		goto yy794;
 	}
-	if (yych <= '.') {
+	if (yych <= ',') {
 		if (yych <= '\n') {
 			if (yych <= 0x08) goto yy757;
 		} else {
@@ -8333,9 +8335,9 @@ int _scan_html_tag(const unsigned char *p)
 		goto yy800;
 	}
 	if (yych <= 0x00) goto yy757;
-	if (yych <= '!') goto yy799;
+	if (yych <= ' ') goto yy799;
 	if (yych <= '"') goto yy804;
-	if (yych <= ';') goto yy802;
+	if (yych <= '\'') goto yy802;
 	goto yy757;
 yy799:
 	++p;
@@ -8345,10 +8347,10 @@ int _scan_html_tag(const unsigned char *p)
 	}
 	if (yych <= '"') {
 		if (yych <= 0x00) goto yy757;
-		if (yych <= '!') goto yy813;
+		if (yych <= ' ') goto yy813;
 		goto yy804;
 	} else {
-		if (yych <= ';') goto yy802;
+		if (yych <= '\'') goto yy802;
 		if (yych == '>') goto yy759;
 		goto yy757;
 	}
@@ -8359,7 +8361,7 @@ int _scan_html_tag(const unsigned char *p)
 		goto yy800;
 	}
 	if (yych <= 0x00) goto yy757;
-	if (yych <= '!') goto yy807;
+	if (yych <= ' ') goto yy807;
 	if (yych == '>') goto yy759;
 	goto yy757;
 yy802:
@@ -8575,7 +8577,7 @@ int _scan_html_tag(const unsigned char *p)
 
 // Try to match an HTML block tag including first <,
 // returning num of chars matched.
-int _scan_html_block_tag(const unsigned char *p)
+long _scan_html_block_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -10363,7 +10365,7 @@ int _scan_html_block_tag(const unsigned char *p)
 // This may optionally be contained in <..>; otherwise
 // whitespace and unbalanced right parentheses aren't allowed.
 // Newlines aren't ever allowed.
-int _scan_link_url(const unsigned char *p)
+long _scan_link_url(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -10436,7 +10438,7 @@ int _scan_link_url(const unsigned char *p)
 		goto yy1105;
 	}
 	if (yych <= ')') {
-		if (yych <= ' ') goto yy1104;
+		if (yych <= 0x1F) goto yy1104;
 		if (yych <= '\'') goto yy1118;
 		if (yych <= '(') goto yy1114;
 		goto yy1104;
@@ -10459,7 +10461,7 @@ int _scan_link_url(const unsigned char *p)
 	if (yybm[0+yych] & 32) {
 		goto yy1121;
 	}
-	if (yych <= '\'') {
+	if (yych <= ' ') {
 		if (yych <= 0x00) goto yy1104;
 		if (yych == '\n') goto yy1104;
 		goto yy1128;
@@ -10468,7 +10470,7 @@ int _scan_link_url(const unsigned char *p)
 			if (yych <= '(') goto yy1126;
 			goto yy1128;
 		} else {
-			if (yych <= '=') goto yy1118;
+			if (yych <= '<') goto yy1118;
 			if (yych <= '>') goto yy1123;
 			goto yy1125;
 		}
@@ -10579,7 +10581,7 @@ int _scan_link_url(const unsigned char *p)
 	if (yybm[0+yych] & 32) {
 		goto yy1121;
 	}
-	if (yych <= '\'') {
+	if (yych <= ' ') {
 		if (yych <= 0x00) goto yy1104;
 		if (yych == '\n') goto yy1104;
 		goto yy1128;
@@ -10588,7 +10590,7 @@ int _scan_link_url(const unsigned char *p)
 			if (yych <= '(') goto yy1126;
 			goto yy1128;
 		} else {
-			if (yych <= '=') goto yy1118;
+			if (yych <= '<') goto yy1118;
 			if (yych >= '?') goto yy1125;
 		}
 	}
@@ -10654,7 +10656,7 @@ int _scan_link_url(const unsigned char *p)
 	if (yybm[0+yych] & 64) {
 		goto yy1128;
 	}
-	if (yych <= '=') goto yy1116;
+	if (yych <= '<') goto yy1116;
 	if (yych >= '?') goto yy1131;
 yy1130:
 	yych = *++p;
@@ -10678,7 +10680,7 @@ int _scan_link_url(const unsigned char *p)
 	if (yybm[0+yych] & 64) {
 		goto yy1128;
 	}
-	if (yych <= '=') goto yy1124;
+	if (yych <= '<') goto yy1124;
 	if (yych <= '>') goto yy1130;
 	goto yy1131;
 yy1134:
@@ -10768,7 +10770,7 @@ int _scan_link_url(const unsigned char *p)
 	if (yybm[0+yych] & 32) {
 		goto yy1121;
 	}
-	if (yych <= '\'') {
+	if (yych <= ' ') {
 		if (yych <= 0x00) goto yy1124;
 		if (yych == '\n') goto yy1124;
 		goto yy1128;
@@ -10777,7 +10779,7 @@ int _scan_link_url(const unsigned char *p)
 			if (yych <= '(') goto yy1126;
 			goto yy1128;
 		} else {
-			if (yych <= '=') goto yy1118;
+			if (yych <= '<') goto yy1118;
 			if (yych <= '>') goto yy1123;
 			goto yy1125;
 		}
@@ -10813,7 +10815,7 @@ int _scan_link_url(const unsigned char *p)
 // Try to match a link title (in single quotes, in double quotes, or
 // in parentheses), returning number of chars matched.  Allow one
 // level of internal nesting (quotes within quotes).
-int _scan_link_title(const unsigned char *p)
+long _scan_link_title(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -10894,7 +10896,7 @@ int _scan_link_title(const unsigned char *p)
 		goto yy1147;
 	}
 	if (yych <= 0x00) goto yy1149;
-	if (yych <= '[') goto yy1152;
+	if (yych <= ')') goto yy1152;
 	goto yy1150;
 yy1149:
 	p = marker;
@@ -10918,7 +10920,7 @@ int _scan_link_title(const unsigned char *p)
 		goto yy1147;
 	}
 	if (yych <= 0x00) goto yy1149;
-	if (yych <= '[') goto yy1154;
+	if (yych <= ')') goto yy1154;
 	goto yy1150;
 yy1152:
 	++p;
@@ -10932,7 +10934,7 @@ int _scan_link_title(const unsigned char *p)
 		goto yy1147;
 	}
 	if (yych <= 0x00) goto yy1153;
-	if (yych <= '[') goto yy1152;
+	if (yych <= ')') goto yy1152;
 	goto yy1150;
 yy1155:
 	++p;
@@ -10942,7 +10944,7 @@ int _scan_link_title(const unsigned char *p)
 		goto yy1155;
 	}
 	if (yych <= 0x00) goto yy1149;
-	if (yych <= '[') goto yy1159;
+	if (yych <= '\'') goto yy1159;
 yy1157:
 	++p;
 	yych = *p;
@@ -10950,7 +10952,7 @@ int _scan_link_title(const unsigned char *p)
 		goto yy1155;
 	}
 	if (yych <= 0x00) goto yy1149;
-	if (yych <= '[') goto yy1161;
+	if (yych <= '\'') goto yy1161;
 	goto yy1157;
 yy1159:
 	++p;
@@ -10964,7 +10966,7 @@ int _scan_link_title(const unsigned char *p)
 		goto yy1155;
 	}
 	if (yych <= 0x00) goto yy1160;
-	if (yych <= '[') goto yy1159;
+	if (yych <= '\'') goto yy1159;
 	goto yy1157;
 yy1162:
 	++p;
@@ -10974,7 +10976,7 @@ int _scan_link_title(const unsigned char *p)
 		goto yy1162;
 	}
 	if (yych <= 0x00) goto yy1149;
-	if (yych <= '[') goto yy1166;
+	if (yych <= '"') goto yy1166;
 yy1164:
 	++p;
 	yych = *p;
@@ -10982,7 +10984,7 @@ int _scan_link_title(const unsigned char *p)
 		goto yy1162;
 	}
 	if (yych <= 0x00) goto yy1149;
-	if (yych <= '[') goto yy1168;
+	if (yych <= '"') goto yy1168;
 	goto yy1164;
 yy1166:
 	++p;
@@ -10996,14 +10998,14 @@ int _scan_link_title(const unsigned char *p)
 		goto yy1162;
 	}
 	if (yych <= 0x00) goto yy1167;
-	if (yych <= '[') goto yy1166;
+	if (yych <= '"') goto yy1166;
 	goto yy1164;
 }
 
 }
 
 // Match space characters, including newlines.
-int _scan_spacechars(const unsigned char *p)
+long _scan_spacechars(const unsigned char *p)
 {
   const unsigned char *start = p; \
 
@@ -11073,7 +11075,7 @@ int _scan_spacechars(const unsigned char *p)
 }
 
 // Match ATX header start.
-int _scan_atx_header_start(const unsigned char *p)
+long _scan_atx_header_start(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -11183,7 +11185,7 @@ int _scan_atx_header_start(const unsigned char *p)
 
 // Match sexext header line.  Return 1 for level-1 header,
 // 2 for level-2, 0 for no match.
-int _scan_setext_header_line(const unsigned char *p)
+long _scan_setext_header_line(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
 
@@ -11299,7 +11301,7 @@ int _scan_setext_header_line(const unsigned char *p)
 // Scan a horizontal rule line: "...three or more hyphens, asterisks,
 // or underscores on a line by themselves. If you wish, you may use
 // spaces between the hyphens or asterisks."
-int _scan_hrule(const unsigned char *p)
+long _scan_hrule(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -11407,7 +11409,8 @@ int _scan_hrule(const unsigned char *p)
 	if (yybm[0+yych] & 32) {
 		goto yy1225;
 	}
-	if (yych != '\n') goto yy1220;
+	if (yych <= 0x08) goto yy1220;
+	if (yych >= '\v') goto yy1220;
 yy1227:
 	++p;
 	{ return (p - start); }
@@ -11482,7 +11485,7 @@ int _scan_hrule(const unsigned char *p)
 }
 
 // Scan an opening code fence.
-int _scan_open_code_fence(const unsigned char *p)
+long _scan_open_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -11560,7 +11563,7 @@ int _scan_open_code_fence(const unsigned char *p)
 		goto yy1259;
 	}
 	if (yych <= 0x00) goto yy1256;
-	if (yych <= '}') goto yy1261;
+	if (yych <= '\n') goto yy1261;
 	goto yy1257;
 yy1259:
 	++p;
@@ -11569,7 +11572,7 @@ int _scan_open_code_fence(const unsigned char *p)
 		goto yy1259;
 	}
 	if (yych <= 0x00) goto yy1256;
-	if (yych >= '~') goto yy1256;
+	if (yych >= '\v') goto yy1256;
 yy1261:
 	++p;
 	p = marker;
@@ -11588,7 +11591,7 @@ int _scan_open_code_fence(const unsigned char *p)
 		goto yy1266;
 	}
 	if (yych <= 0x00) goto yy1256;
-	if (yych <= '_') goto yy1268;
+	if (yych <= '\n') goto yy1268;
 	goto yy1264;
 yy1266:
 	++p;
@@ -11597,7 +11600,7 @@ int _scan_open_code_fence(const unsigned char *p)
 		goto yy1266;
 	}
 	if (yych <= 0x00) goto yy1256;
-	if (yych >= '`') goto yy1256;
+	if (yych >= '\v') goto yy1256;
 yy1268:
 	++p;
 	p = marker;
@@ -11607,7 +11610,7 @@ int _scan_open_code_fence(const unsigned char *p)
 }
 
 // Scan a closing code fence with length at least len.
-int _scan_close_code_fence(const unsigned char *p)
+long _scan_close_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -11684,7 +11687,8 @@ int _scan_close_code_fence(const unsigned char *p)
 	if (yybm[0+yych] & 64) {
 		goto yy1280;
 	}
-	if (yych == '\n') goto yy1282;
+	if (yych <= 0x08) goto yy1277;
+	if (yych <= '\n') goto yy1282;
 	if (yych == '~') goto yy1278;
 	goto yy1277;
 yy1280:
@@ -11693,7 +11697,8 @@ int _scan_close_code_fence(const unsigned char *p)
 	if (yybm[0+yych] & 64) {
 		goto yy1280;
 	}
-	if (yych != '\n') goto yy1277;
+	if (yych <= 0x08) goto yy1277;
+	if (yych >= '\v') goto yy1277;
 yy1282:
 	++p;
 	p = marker;
@@ -11737,7 +11742,7 @@ int _scan_close_code_fence(const unsigned char *p)
 
 // Scans an entity.
 // Returns number of chars matched.
-int _scan_entity(const unsigned char *p)
+long _scan_entity(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
diff --git a/src/scanners.h b/src/scanners.h
index f360505ba..fbc1e0a04 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -5,20 +5,20 @@
 extern "C" {
 #endif
 
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset);
-int _scan_autolink_uri(const unsigned char *p);
-int _scan_autolink_email(const unsigned char *p);
-int _scan_html_tag(const unsigned char *p);
-int _scan_html_block_tag(const unsigned char *p);
-int _scan_link_url(const unsigned char *p);
-int _scan_link_title(const unsigned char *p);
-int _scan_spacechars(const unsigned char *p);
-int _scan_atx_header_start(const unsigned char *p);
-int _scan_setext_header_line(const unsigned char *p);
-int _scan_hrule(const unsigned char *p);
-int _scan_open_code_fence(const unsigned char *p);
-int _scan_close_code_fence(const unsigned char *p);
-int _scan_entity(const unsigned char *p);
+long _scan_at(long (*scanner)(const unsigned char *), cmark_chunk *c, long offset);
+long _scan_autolink_uri(const unsigned char *p);
+long _scan_autolink_email(const unsigned char *p);
+long _scan_html_tag(const unsigned char *p);
+long _scan_html_block_tag(const unsigned char *p);
+long _scan_link_url(const unsigned char *p);
+long _scan_link_title(const unsigned char *p);
+long _scan_spacechars(const unsigned char *p);
+long _scan_atx_header_start(const unsigned char *p);
+long _scan_setext_header_line(const unsigned char *p);
+long _scan_hrule(const unsigned char *p);
+long _scan_open_code_fence(const unsigned char *p);
+long _scan_close_code_fence(const unsigned char *p);
+long _scan_entity(const unsigned char *p);
 
 #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
 #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n)
diff --git a/src/scanners.re b/src/scanners.re
index d83efdefa..645999294 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -2,9 +2,9 @@
 #include "chunk.h"
 #include "scanners.h"
 
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
+long _scan_at(long (*scanner)(const unsigned char *), cmark_chunk *c, long offset)
 {
-	int res;
+	long res;
 	unsigned char *ptr = (unsigned char *)c->data;
 	unsigned char lim = ptr[c->len];
 
@@ -70,7 +70,7 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
 */
 
 // Try to match URI autolink after first <, returning number of chars matched.
-int _scan_autolink_uri(const unsigned char *p)
+long _scan_autolink_uri(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -81,7 +81,7 @@ int _scan_autolink_uri(const unsigned char *p)
 }
 
 // Try to match email autolink after first <, returning num of chars matched.
-int _scan_autolink_email(const unsigned char *p)
+long _scan_autolink_email(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -96,7 +96,7 @@ int _scan_autolink_email(const unsigned char *p)
 }
 
 // Try to match an HTML tag after first <, returning num of chars matched.
-int _scan_html_tag(const unsigned char *p)
+long _scan_html_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -108,7 +108,7 @@ int _scan_html_tag(const unsigned char *p)
 
 // Try to match an HTML block tag including first <,
 // returning num of chars matched.
-int _scan_html_block_tag(const unsigned char *p)
+long _scan_html_block_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -124,7 +124,7 @@ int _scan_html_block_tag(const unsigned char *p)
 // This may optionally be contained in <..>; otherwise
 // whitespace and unbalanced right parentheses aren't allowed.
 // Newlines aren't ever allowed.
-int _scan_link_url(const unsigned char *p)
+long _scan_link_url(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -138,7 +138,7 @@ int _scan_link_url(const unsigned char *p)
 // Try to match a link title (in single quotes, in double quotes, or
 // in parentheses), returning number of chars matched.  Allow one
 // level of internal nesting (quotes within quotes).
-int _scan_link_title(const unsigned char *p)
+long _scan_link_title(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -151,7 +151,7 @@ int _scan_link_title(const unsigned char *p)
 }
 
 // Match space characters, including newlines.
-int _scan_spacechars(const unsigned char *p)
+long _scan_spacechars(const unsigned char *p)
 {
   const unsigned char *start = p; \
 /*!re2c
@@ -161,7 +161,7 @@ int _scan_spacechars(const unsigned char *p)
 }
 
 // Match ATX header start.
-int _scan_atx_header_start(const unsigned char *p)
+long _scan_atx_header_start(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -173,7 +173,7 @@ int _scan_atx_header_start(const unsigned char *p)
 
 // Match sexext header line.  Return 1 for level-1 header,
 // 2 for level-2, 0 for no match.
-int _scan_setext_header_line(const unsigned char *p)
+long _scan_setext_header_line(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
 /*!re2c
@@ -186,7 +186,7 @@ int _scan_setext_header_line(const unsigned char *p)
 // Scan a horizontal rule line: "...three or more hyphens, asterisks,
 // or underscores on a line by themselves. If you wish, you may use
 // spaces between the hyphens or asterisks."
-int _scan_hrule(const unsigned char *p)
+long _scan_hrule(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -199,7 +199,7 @@ int _scan_hrule(const unsigned char *p)
 }
 
 // Scan an opening code fence.
-int _scan_open_code_fence(const unsigned char *p)
+long _scan_open_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -211,7 +211,7 @@ int _scan_open_code_fence(const unsigned char *p)
 }
 
 // Scan a closing code fence with length at least len.
-int _scan_close_code_fence(const unsigned char *p)
+long _scan_close_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -224,7 +224,7 @@ int _scan_close_code_fence(const unsigned char *p)
 
 // Scans an entity.
 // Returns number of chars matched.
-int _scan_entity(const unsigned char *p)
+long _scan_entity(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
diff --git a/src/utf8.c b/src/utf8.c
index d77c5d138..02de1bfc8 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -30,7 +30,7 @@ static void encode_unknown(cmark_strbuf *buf)
 	cmark_strbuf_put(buf, repl, 3);
 }
 
-static int utf8proc_charlen(const uint8_t *str, int str_len)
+static long utf8proc_charlen(const uint8_t *str, long str_len)
 {
 	int length, i;
 
@@ -54,9 +54,9 @@ static int utf8proc_charlen(const uint8_t *str, int str_len)
 }
 
 // Validate a single UTF-8 character according to RFC 3629.
-static int utf8proc_valid(const uint8_t *str, int str_len)
+static long utf8proc_valid(const uint8_t *str, size_t str_len)
 {
-	int length = utf8proc_charlen(str, str_len);
+	size_t length = utf8proc_charlen(str, str_len);
 
 	if (length <= 0)
 		return length;
@@ -136,7 +136,7 @@ void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size)
 			i += 1;
 			tab += numspaces;
 		} else {
-			int charlen = utf8proc_valid(line + i, size - i);
+			long charlen = utf8proc_valid(line + i, size - i);
 
 			if (charlen >= 0) {
 				cmark_strbuf_put(ob, line + i, charlen);
@@ -151,9 +151,9 @@ void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size)
 	}
 }
 
-int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
+long utf8proc_iterate(const uint8_t *str, size_t str_len, int32_t *dst)
 {
-	int length;
+	long length;
 	int32_t uc = -1;
 
 	*dst = -1;
@@ -228,7 +228,7 @@ void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf)
 	cmark_strbuf_put(buf, dst, len);
 }
 
-void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len)
+void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, size_t len)
 {
 	int32_t c;
 
@@ -236,7 +236,7 @@ void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len)
 	utf8proc_encode_char(x, dest)
 
 	while (len > 0) {
-		int char_len = utf8proc_iterate(str, len, &c);
+		long char_len = utf8proc_iterate(str, len, &c);
 
 		if (char_len >= 0) {
 #include "case_fold_switch.inc"
diff --git a/src/utf8.h b/src/utf8.h
index 7df15737e..30ed4b4ed 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -8,9 +8,9 @@
 extern "C" {
 #endif
 
-void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len);
+void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, size_t len);
 void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
-int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst);
+long utf8proc_iterate(const uint8_t *str, size_t str_len, int32_t *dst);
 void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, size_t size);
 int utf8proc_is_space(int32_t uc);
 int utf8proc_is_punctuation(int32_t uc);
diff --git a/src/xml.c b/src/xml.c
index f1692419a..66caced44 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -11,7 +11,7 @@
 
 // Functions to convert cmark_nodes to XML strings.
 
-static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_xml(cmark_strbuf *dest, const unsigned char *source, long length)
 {
 	if (source != NULL) {
 		if (length < 0)
@@ -49,7 +49,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 		                    cmark_node_get_type_string(node));
 
 		if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
-			cmark_strbuf_printf(xml, " sourcepos=\"%d:%d-%d:%d\"",
+			cmark_strbuf_printf(xml, " sourcepos=\"%d:%ld-%d:%ld\"",
 			                    node->start_line,
 			                    node->start_column,
 			                    node->end_line,
@@ -97,7 +97,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 			                     "true" : "false"));
 			break;
 		case CMARK_NODE_HEADER:
-			cmark_strbuf_printf(xml, " level=\"%d\"",
+			cmark_strbuf_printf(xml, " level=\"%ld\"",
 			                    node->as.header.level);
 			break;
 		case CMARK_NODE_CODE_BLOCK: