Skip to content

Commit

Permalink
Roll our own implementation of HTML escaper
Browse files Browse the repository at this point in the history
  • Loading branch information
rtfb committed Sep 10, 2016
1 parent 31f2685 commit 993325d
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 33 deletions.
45 changes: 45 additions & 0 deletions esc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package blackfriday

import (
"html"
"io"
)

type escMap struct {
char byte
seq []byte
}

var htmlEscaper = []escMap{

This comment has been minimized.

Copy link
@Ambrevar

Ambrevar Oct 6, 2016

Instead of looping over 4 characters, index by them:

// All escaped ASCII chars are below 64.
var htmlEscaper = [64][]byte{
  '&': []byte("&"),
  '<', []byte("&lt;"),
  '>', []byte("&gt;"),
  '"', []byte("&quot;",
}

This comment has been minimized.

Copy link
@Ambrevar

Ambrevar Oct 8, 2016

It should be a 256 table btw, not 64, but you got it right :)

{'&', []byte("&amp;")},
{'<', []byte("&lt;")},
{'>', []byte("&gt;")},
{'"', []byte("&quot;")},
}

func escapeHTML(w io.Writer, s []byte) {
var start, end int
var sEnd byte
for end < len(s) {
sEnd = s[end]
if sEnd == '&' || sEnd == '<' || sEnd == '>' || sEnd == '"' {

This comment has been minimized.

Copy link
@Ambrevar

Ambrevar Oct 6, 2016

With the above change, you can remove the if, and the for:
you just need something like:

        escapedChar = htmlEscaper(s[end])
        if escapedChar != nil {
            w.Write(s[start:end])
            w.Write(escapedChar)
            start = end + 1
        }

This comment has been minimized.

Copy link
@rtfb

rtfb Oct 8, 2016

Author Collaborator

Thanks for this idea! It shaved off a few nanosecs and made the code simpler. Beautiful

for i := 0; i < len(htmlEscaper); i++ {
if sEnd == htmlEscaper[i].char {
w.Write(s[start:end])
w.Write(htmlEscaper[i].seq)
start = end + 1
break
}
}
}
end++
}
if start < len(s) && end <= len(s) {
w.Write(s[start:end])
}
}

func escLink(w io.Writer, text []byte) {
unesc := html.UnescapeString(string(text))
escapeHTML(w, []byte(unesc))
}
50 changes: 50 additions & 0 deletions esc_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package blackfriday

import (
"bytes"
"testing"
)

func TestEsc(t *testing.T) {
tests := []string{
"abc", "abc",
"a&c", "a&amp;c",
"<", "&lt;",
"[]:<", "[]:&lt;",
"Hello <!--", "Hello &lt;!--",
}
for i := 0; i < len(tests); i += 2 {
var b bytes.Buffer
escapeHTML(&b, []byte(tests[i]))
if !bytes.Equal(b.Bytes(), []byte(tests[i+1])) {
t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]",
tests[i], tests[i+1], b.String())
}
}
}

/*

This comment has been minimized.

Copy link
@Ambrevar

Ambrevar Oct 6, 2016

Why the commented out benchmark?

This comment has been minimized.

Copy link
@rtfb

rtfb Oct 8, 2016

Author Collaborator

A silly oversight on my side, will uncomment.

func BenchmarkEscapeHTML(b *testing.B) {
tests := [][]byte{
[]byte(""),
[]byte("AT&T has an ampersand in their name."),
[]byte("AT&amp;T is another way to write it."),
[]byte("This & that."),
[]byte("4 < 5."),
[]byte("6 > 5."),
[]byte("Here's a [link] [1] with an ampersand in the URL."),
[]byte("Here's a link with an amersand in the link text: [AT&T] [2]."),
[]byte("Here's an inline [link](/script?foo=1&bar=2)."),
[]byte("Here's an inline [link](</script?foo=1&bar=2>)."),
[]byte("[1]: http://example.com/?foo=1&bar=2"),
[]byte("[2]: http://att.com/ \"AT&T\""),
}
var buff bytes.Buffer
for n := 0; n < b.N; n++ {
for _, t := range tests {
escapeHTML(&buff, t)
buff.Reset()
}
}
}
*/
57 changes: 30 additions & 27 deletions html.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package blackfriday
import (
"bytes"
"fmt"
"html"
"io"
"regexp"
"strings"
Expand Down Expand Up @@ -375,17 +374,6 @@ func cellAlignment(align CellAlignFlags) string {
}
}

func esc(text []byte) []byte {
unesc := []byte(html.UnescapeString(string(text)))
return escCode(unesc)
}

func escCode(text []byte) []byte {
e1 := []byte(html.EscapeString(string(text)))
e2 := bytes.Replace(e1, []byte("&#34;"), []byte("&quot;"), -1)
return bytes.Replace(e2, []byte("&#39;"), []byte{'\''}, -1)
}

func (r *HTMLRenderer) out(w io.Writer, text []byte) {
if r.disableTags > 0 {
w.Write(htmlTagRe.ReplaceAll(text, []byte{}))
Expand Down Expand Up @@ -504,11 +492,17 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
attrs := []string{}
switch node.Type {
case Text:
node.Literal = esc(node.Literal)
if r.Flags&Smartypants != 0 {
node.Literal = r.sr.Process(node.Literal)
var tmp bytes.Buffer
escapeHTML(&tmp, node.Literal)
r.sr.Process(w, tmp.Bytes())
} else {
if node.Parent.Type == Link {
escLink(w, node.Literal)
} else {
escapeHTML(w, node.Literal)
}
}
r.out(w, node.Literal)
case Softbreak:
r.out(w, []byte{'\n'})
// TODO: make it configurable via out(renderer.softbreak)
Expand Down Expand Up @@ -561,16 +555,22 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
} else {
if entering {
dest = r.addAbsPrefix(dest)
//if (!(options.safe && potentiallyUnsafe(node.destination))) {
attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest)))
//}
var hrefBuff bytes.Buffer
hrefBuff.WriteString("href=\"")
escLink(&hrefBuff, dest)
hrefBuff.WriteByte('"')
attrs = append(attrs, hrefBuff.String())
if node.NoteID != 0 {
r.out(w, footnoteRef(r.FootnoteAnchorPrefix, node))
break
}
attrs = appendLinkAttrs(attrs, r.Flags, dest)
if len(node.LinkData.Title) > 0 {
attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title)))
var titleBuff bytes.Buffer
titleBuff.WriteString("title=\"")
escapeHTML(&titleBuff, node.LinkData.Title)
titleBuff.WriteByte('"')
attrs = append(attrs, titleBuff.String())
}
r.tag(w, aTag, attrs)
} else {
Expand All @@ -591,7 +591,9 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
//if options.safe && potentiallyUnsafe(dest) {
//out(w, `<img src="" alt="`)
//} else {
r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest))))
r.out(w, []byte(`<img src="`))
escLink(w, dest)
r.out(w, []byte(`" alt="`))
//}
}
r.disableTags++
Expand All @@ -600,14 +602,14 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
if r.disableTags == 0 {
if node.LinkData.Title != nil {
r.out(w, []byte(`" title="`))
r.out(w, esc(node.LinkData.Title))
escapeHTML(w, node.LinkData.Title)
}
r.out(w, []byte(`" />`))
}
}
case Code:
r.out(w, codeTag)
r.out(w, escCode(node.Literal))
escapeHTML(w, node.Literal)
r.out(w, codeCloseTag)
case Document:
break
Expand Down Expand Up @@ -752,7 +754,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
r.cr(w)
r.out(w, preTag)
r.tag(w, codeTag[:len(codeTag)-1], attrs)
r.out(w, escCode(node.Literal))
escapeHTML(w, node.Literal)
r.out(w, codeCloseTag)
r.out(w, preCloseTag)
if node.Parent.Type != Item {
Expand Down Expand Up @@ -837,9 +839,9 @@ func (r *HTMLRenderer) writeDocumentHeader(w *bytes.Buffer) {
w.WriteString("<head>\n")
w.WriteString(" <title>")
if r.Flags&Smartypants != 0 {
w.Write(r.sr.Process([]byte(r.Title)))
r.sr.Process(w, []byte(r.Title))
} else {
w.Write(esc([]byte(r.Title)))
escapeHTML(w, []byte(r.Title))
}
w.WriteString("</title>\n")
w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
Expand All @@ -852,14 +854,14 @@ func (r *HTMLRenderer) writeDocumentHeader(w *bytes.Buffer) {
w.WriteString(">\n")
if r.CSS != "" {
w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
w.Write(esc([]byte(r.CSS)))
escapeHTML(w, []byte(r.CSS))
w.WriteString("\"")
w.WriteString(ending)
w.WriteString(">\n")
}
if r.Icon != "" {
w.WriteString(" <link rel=\"icon\" type=\"image/x-icon\" href=\"")
w.Write(esc([]byte(r.Icon)))
escapeHTML(w, []byte(r.Icon))
w.WriteString("\"")
w.WriteString(ending)
w.WriteString(">\n")
Expand Down Expand Up @@ -919,6 +921,7 @@ func (r *HTMLRenderer) writeTOC(w *bytes.Buffer, ast *Node) {
w.Write(buf.Bytes())
w.WriteString("\n\n</nav>\n")
}
r.lastOutputLen = buf.Len()
}

func (r *HTMLRenderer) writeDocumentFooter(w *bytes.Buffer) {
Expand Down
11 changes: 5 additions & 6 deletions smartypants.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package blackfriday

import (
"bytes"
"io"
)

// SPRenderer is a struct containing state of a Smartypants renderer.
Expand Down Expand Up @@ -401,26 +402,24 @@ func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
}

// Process is the entry point of the Smartypants renderer.
func (r *SPRenderer) Process(text []byte) []byte {
var buff bytes.Buffer
func (r *SPRenderer) Process(w io.Writer, text []byte) {
mark := 0
for i := 0; i < len(text); i++ {
if action := r.callbacks[text[i]]; action != nil {
if i > mark {
buff.Write(text[mark:i])
w.Write(text[mark:i])
}
previousChar := byte(0)
if i > 0 {
previousChar = text[i-1]
}
var tmp bytes.Buffer
i += action(&tmp, previousChar, text[i:])
buff.Write(tmp.Bytes())
w.Write(tmp.Bytes())
mark = i + 1
}
}
if mark < len(text) {
buff.Write(text[mark:])
w.Write(text[mark:])
}
return buff.Bytes()
}

0 comments on commit 993325d

Please sign in to comment.