Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CRLF support #18

Merged
merged 7 commits into from
Jun 3, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 69 additions & 26 deletions src/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ static bool is_blank(cmark_strbuf *s, int offset)
{
while (offset < s->size) {
switch (s->ptr[offset]) {
case '\r':
case '\n':
return true;
case ' ':
Expand Down Expand Up @@ -126,9 +127,10 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
static void remove_trailing_blank_lines(cmark_strbuf *ln)
{
int i;
unsigned char c;

for (i = ln->size - 1; i >= 0; --i) {
unsigned char c = ln->ptr[i];
c = ln->ptr[i];

if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
break;
Expand All @@ -139,9 +141,16 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln)
return;
}

i = cmark_strbuf_strchr(ln, '\n', i);
if (i >= 0)

for(; i < ln->size; ++i) {
c = ln->ptr[i];

if (c != '\r' && c != '\n')
continue;

cmark_strbuf_truncate(ln, i);
break;
}
}

// Check to see if a node ends with a blank line, descending
Expand Down Expand Up @@ -185,7 +194,6 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr)
static cmark_node*
finalize(cmark_parser *parser, cmark_node* b)
{
int firstlinelen;
int pos;
cmark_node* item;
cmark_node* subitem;
Expand All @@ -204,9 +212,11 @@ finalize(cmark_parser *parser, cmark_node* b)
(b->type == NODE_CODE_BLOCK && b->as.code.fenced) ||
(b->type == NODE_HEADER && b->as.header.setext)) {
b->end_line = parser->line_number;
b->end_column = parser->curline->size -
(parser->curline->ptr[parser->curline->size - 1] == '\n' ?
1 : 0);
b->end_column = parser->curline->size;
if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\n')
b->end_column--;
if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\r')
b->end_column--;
} else {
b->end_line = parser->line_number - 1;
b->end_column = parser->last_line_length;
Expand All @@ -232,19 +242,28 @@ finalize(cmark_parser *parser, cmark_node* b)
} else {

// first line of contents becomes info
firstlinelen = cmark_strbuf_strchr(&b->string_content, '\n', 0);
for (pos = 0; pos < b->string_content.size; ++pos) {
if (b->string_content.ptr[pos] == '\r' ||
b->string_content.ptr[pos] == '\n')
break;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to break on every \r even if not followd by \n. Is this the intention?

assert(pos < b->string_content.size);

cmark_strbuf tmp = GH_BUF_INIT;
houdini_unescape_html_f(
&tmp,
b->string_content.ptr,
firstlinelen
pos
);
cmark_strbuf_trim(&tmp);
cmark_strbuf_unescape(&tmp);
b->as.code.info = cmark_chunk_buf_detach(&tmp);

cmark_strbuf_drop(&b->string_content, firstlinelen + 1);
if (b->string_content.ptr[pos] == '\r')
pos += 1;
if (b->string_content.ptr[pos] == '\n')
pos += 1;
cmark_strbuf_drop(&b->string_content, pos);
}
b->as.code.literal = cmark_chunk_buf_detach(&b->string_content);
break;
Expand Down Expand Up @@ -467,13 +486,22 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
const unsigned char *end = buffer + len;

while (buffer < end) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't find \r\n if it lies across a buffer boundary.

const unsigned char *eol
= (const unsigned char *)memchr(buffer, '\n',
end - buffer);
const unsigned char *eol;
size_t line_len;

for (eol = buffer; eol < end; ++eol) {
if (*eol == '\r' || *eol == '\n')
break;
}
if (eol >= end)
eol = NULL;

if (eol) {
line_len = eol + 1 - buffer;
if (eol < end && *eol == '\r')
eol++;
if (eol < end && *eol == '\n')
eol++;
line_len = eol - buffer;
} else if (eof) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to break on every \r even if not followed by \n. Is this the intention?

line_len = end - buffer;
} else {
Expand Down Expand Up @@ -533,9 +561,13 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)

// Add a newline to the end if not present:
// TODO this breaks abstraction:
if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
cmark_strbuf_putc(parser->curline, '\n');
if (parser->curline->size && parser->curline->ptr[parser->curline->size - 1] == '\n') {
cmark_strbuf_truncate(parser->curline, parser->curline->size - 1);
}
if (parser->curline->size && parser->curline->ptr[parser->curline->size - 1] == '\r') {
cmark_strbuf_truncate(parser->curline, parser->curline->size - 1);
}
cmark_strbuf_putc(parser->curline, '\n');
input.data = parser->curline->ptr;
input.len = parser->curline->size;

Expand All @@ -556,7 +588,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
}

indent = first_nonspace - offset;
blank = peek_at(&input, first_nonspace) == '\n';
blank = peek_at(&input, first_nonspace) == '\n' ||
peek_at(&input, first_nonspace) == '\r';

if (container->type == NODE_BLOCK_QUOTE) {
matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
Expand Down Expand Up @@ -657,7 +690,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
first_nonspace++;

indent = first_nonspace - offset;
blank = peek_at(&input, first_nonspace) == '\n';
blank = peek_at(&input, first_nonspace) == '\n' ||
peek_at(&input, first_nonspace) == '\r';

if (indent >= CODE_INDENT) {
if (!maybe_lazy && !blank) {
Expand Down Expand Up @@ -713,8 +747,10 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
} else if (container->type == NODE_PARAGRAPH &&
(lev = scan_setext_header_line(&input, first_nonspace)) &&
// check that there is only one line in the paragraph:
cmark_strbuf_strrchr(&container->string_content, '\n',
cmark_strbuf_len(&container->string_content) - 2) < 0) {
(cmark_strbuf_strrchr(&container->string_content, '\n',
cmark_strbuf_len(&container->string_content) - 2) < 0 &&
cmark_strbuf_strrchr(&container->string_content, '\r',
cmark_strbuf_len(&container->string_content) - 2) < 0)) {

container->type = NODE_HEADER;
container->as.header.level = lev;
Expand All @@ -738,7 +774,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
i++;
}
// i = number of spaces after marker, up to 5
if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
if (i >= 5 || i < 1 ||
peek_at(&input, offset) == '\n' ||
peek_at(&input, offset) == '\r') {
data->padding = matched + 1;
if (i > 0) {
offset += 1;
Expand Down Expand Up @@ -786,7 +824,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
first_nonspace++;

indent = first_nonspace - offset;
blank = peek_at(&input, first_nonspace) == '\n';
blank = peek_at(&input, first_nonspace) == '\n' ||
peek_at(&input, first_nonspace) == '\r';

if (blank && container->last_child) {
container->last_child->last_line_blank = true;
Expand Down Expand Up @@ -854,10 +893,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
parser->current = container;
}
finished:
parser->last_line_length = parser->curline->size -
(parser->curline->ptr[parser->curline->size - 1] == '\n' ?
1 : 0);
;
parser->last_line_length = parser->curline->size;
if (parser->last_line_length &&
parser->curline->ptr[parser->last_line_length - 1] == '\n')
parser->last_line_length--;
if (parser->last_line_length &&
parser->curline->ptr[parser->last_line_length - 1] == '\r')
parser->last_line_length--;

cmark_strbuf_clear(parser->curline);

}
Expand Down
13 changes: 7 additions & 6 deletions src/inlines.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ static cmark_node* handle_backslash(subject *subj)
if (cmark_ispunct(nextchar)) { // only ascii symbols and newline can be escaped
advance(subj);
return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
} else if (nextchar == '\n') {
} else if (nextchar == '\r' || nextchar == '\n') {
advance(subj);
return make_linebreak();
} else {
Expand Down Expand Up @@ -928,9 +928,9 @@ static cmark_node* handle_newline(subject *subj)

static int subject_find_special_char(subject *subj, int options)
{
// "\n\\`&_*[]<!"
// "\r\n\\`&_*[]<!"
static const int8_t SPECIAL_CHARS[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
Expand Down Expand Up @@ -995,6 +995,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
return 0;
}
switch(c) {
case '\r':
case '\n':
new_inl = handle_newline(subj);
break;
Expand Down Expand Up @@ -1046,7 +1047,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
subj->pos = endpos;

// if we're at a newline, strip trailing spaces.
if (peek_char(subj) == '\n') {
if (peek_char(subj) == '\r' || peek_char(subj) == '\n') {
cmark_chunk_rtrim(&contents);
}

Expand Down Expand Up @@ -1076,7 +1077,7 @@ static void spnl(subject* subj)
bool seen_newline = false;
while (peek_char(subj) == ' ' ||
(!seen_newline &&
(seen_newline = peek_char(subj) == '\n'))) {
(seen_newline = peek_char(subj) == '\r' || peek_char(subj) == '\n'))) {
advance(subj);
}
}
Expand Down Expand Up @@ -1134,7 +1135,7 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
while (peek_char(&subj) == ' ') {
advance(&subj);
}
if (peek_char(&subj) == '\n') {
if (peek_char(&subj) == '\r' || peek_char(&subj) == '\n') {
advance(&subj);
} else if (peek_char(&subj) != 0) {
return 0;
Expand Down
Loading