Skip to content

Commit

Permalink
[chore/bugfix] Switch markdown from blackfriday to goldmark (#1267)
Browse files Browse the repository at this point in the history
Co-authored-by: Autumn! <autumnull@posteo.net>
  • Loading branch information
autumnull and autumnull authored Dec 16, 2022
1 parent 2b0342b commit eb08529
Show file tree
Hide file tree
Showing 71 changed files with 16,261 additions and 8,358 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ require (
github.com/mitchellh/mapstructure v1.5.0
github.com/oklog/ulid v1.3.1
github.com/robfig/cron/v3 v3.0.1
github.com/russross/blackfriday/v2 v2.1.0
github.com/spf13/cobra v1.6.1
github.com/spf13/viper v1.14.0
github.com/stretchr/testify v1.8.1
Expand All @@ -48,6 +47,7 @@ require (
github.com/uptrace/bun/dialect/pgdialect v1.1.9
github.com/uptrace/bun/dialect/sqlitedialect v1.1.9
github.com/wagslane/go-password-validator v0.3.0
github.com/yuin/goldmark v1.5.3
golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90
golang.org/x/exp v0.0.0-20220613132600-b0d781184e0d
golang.org/x/image v0.2.0
Expand Down
3 changes: 2 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,6 @@ github.com/rs/xid v1.4.0 h1:qd7wPTDkN6KQx2VmMBLrpHkiyQwgFXRnkOLacUiaSNY=
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw=
Expand Down Expand Up @@ -645,6 +644,8 @@ github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark v1.5.3 h1:3HUJmBFbQW9fhQOzMgseU134xfi6hU+mjWywx5Ty+/M=
github.com/yuin/goldmark v1.5.3/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/zeebo/assert v1.1.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/blake3 v0.2.1/go.mod h1:TSQ0KjMH+pht+bRyvVooJ1rBpvvngSGaPISafq9MxJk=
github.com/zeebo/pcg v1.0.1/go.mod h1:09F0S9iiKrwn9rlI5yjLkmrug154/YRW6KnnXVDM/l4=
Expand Down
2 changes: 1 addition & 1 deletion internal/api/client/status/statuscreate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type StatusCreateTestSuite struct {
const (
statusWithLinksAndTags = "#test alright, should be able to post #links with fragments in them now, let's see........\n\nhttps://docs.gotosocial.org/en/latest/user_guide/posts/#links\n\n#gotosocial\n\n(tobi remember to pull the docker image challenge)"
statusMarkdown = "# Title\n\n## Smaller title\n\nThis is a post written in [markdown](https://www.markdownguide.org/)\n\n<img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\"/>"
statusMarkdownExpected = "<h1>Title</h1><h2>Smaller title</h2><p>This is a post written in <a href=\"https://www.markdownguide.org/\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">markdown</a></p><p><img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\" crossorigin=\"anonymous\"></p>"
statusMarkdownExpected = "<h1>Title</h1><h2>Smaller title</h2><p>This is a post written in <a href=\"https://www.markdownguide.org/\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">markdown</a></p><img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\" crossorigin=\"anonymous\">"
)

// Post a new status with some custom visibility settings
Expand Down
72 changes: 25 additions & 47 deletions internal/text/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,62 +21,23 @@ package text
import (
"bytes"
"context"
"io"
"strings"

"github.com/russross/blackfriday/v2"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/html"
minifyHtml "github.com/tdewolff/minify/v2/html"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/renderer/html"
)

var (
bfExtensions = blackfriday.NoIntraEmphasis | blackfriday.FencedCode | blackfriday.Autolink | blackfriday.Strikethrough | blackfriday.SpaceHeadings | blackfriday.HardLineBreak
m *minify.M
m *minify.M
)

type renderer struct {
f *formatter
ctx context.Context
mentions []*gtsmodel.Mention
tags []*gtsmodel.Tag
blackfriday.HTMLRenderer
}

func (r *renderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
if node.Type == blackfriday.Text {
// call RenderNode to do the html escaping
var buff bytes.Buffer
status := r.HTMLRenderer.RenderNode(&buff, node, entering)

html := buff.String()
html = r.f.ReplaceTags(r.ctx, html, r.tags)
html = r.f.ReplaceMentions(r.ctx, html, r.mentions)

// we don't have much recourse if this fails
if _, err := io.WriteString(w, html); err != nil {
log.Errorf("error outputting markdown text: %s", err)
}
return status
}
return r.HTMLRenderer.RenderNode(w, node, entering)
}

func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string {

renderer := &renderer{
f: f,
ctx: ctx,
mentions: mentions,
tags: tags,
HTMLRenderer: *blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{
// same as blackfriday.CommonHTMLFlags, but with Smartypants disabled
// ref: https://github.com/superseriousbusiness/gotosocial/issues/1028
Flags: blackfriday.UseXHTML,
}),
}

// Temporarily replace all found emoji shortcodes in the markdown text with
// their ID so that they're not parsed as anything by the markdown parser -
// this fixes cases where emojis with some underscores in them are parsed as
Expand All @@ -89,8 +50,25 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti
}

// parse markdown text into html, using custom renderer to add hashtag/mention links
htmlContentBytes := blackfriday.Run([]byte(markdownText), blackfriday.WithExtensions(bfExtensions), blackfriday.WithRenderer(renderer))
htmlContent := string(htmlContentBytes)
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithHardWraps(),
html.WithUnsafe(), // allows raw HTML
),
goldmark.WithExtensions(
&customRenderer{f, ctx, mentions, tags},
extension.Linkify, // turns URLs into links
extension.Strikethrough,
),
)

var htmlContentBytes bytes.Buffer
err := md.Convert([]byte(markdownText), &htmlContentBytes)
if err != nil {
log.Errorf("error rendering markdown to HTML: %s", err)
}
htmlContent := htmlContentBytes.String()

// Replace emoji IDs in the parsed html content with their shortcodes again
for _, e := range emojis {
Expand All @@ -102,7 +80,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti

if m == nil {
m = minify.New()
m.Add("text/html", &html.Minifier{
m.Add("text/html", &minifyHtml.Minifier{
KeepEndTags: true,
KeepQuotes: true,
})
Expand Down
21 changes: 21 additions & 0 deletions internal/text/markdown_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ const (
mdMentionAndCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>the_mighty_zork</span></a></span></p><pre><code>@the_mighty_zork\n</code></pre>"
mdWithSmartypants = "\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping"
mdWithSmartypantsExpected = "<p>\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping</p>"
mdWithAsciiHeart = "hello <3 old friend <3 i loved u </3 :(( you stole my heart"
mdWithAsciiHeartExpected = "<p>hello &lt;3 old friend &lt;3 i loved u &lt;/3 :(( you stole my heart</p>"
mdWithStrikethrough = "I have ~~mdae~~ made an error"
mdWithStrikethroughExpected = "<p>I have <del>mdae</del> made an error</p>"
mdWithLink = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial"
mdWithLinkExpected = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>"
)

type MarkdownTestSuite struct {
Expand Down Expand Up @@ -160,6 +166,21 @@ func (suite *MarkdownTestSuite) TestParseSmartypants() {
suite.Equal(mdWithSmartypantsExpected, s)
}

func (suite *MarkdownTestSuite) TestParseAsciiHeart() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithAsciiHeart, nil, nil, nil)
suite.Equal(mdWithAsciiHeartExpected, s)
}

func (suite *MarkdownTestSuite) TestParseStrikethrough() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithStrikethrough, nil, nil, nil)
suite.Equal(mdWithStrikethroughExpected, s)
}

func (suite *MarkdownTestSuite) TestParseLink() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithLink, nil, nil, nil)
suite.Equal(mdWithLinkExpected, s)
}

func TestMarkdownTestSuite(t *testing.T) {
suite.Run(t, new(MarkdownTestSuite))
}
215 changes: 215 additions & 0 deletions internal/text/markdownextension.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package text

import (
"context"
"unicode"

"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/text"
mdutil "github.com/yuin/goldmark/util"
)

// A goldmark extension that parses potential mentions and hashtags separately from regular
// text, so that they stay as one contiguous text fragment in the AST, and then renders
// them separately too, to avoid scanning normal text for mentions and tags.

// mention and hashtag fulfil the goldmark ast.Node interface.
type mention struct {
ast.BaseInline
Segment text.Segment
}

type hashtag struct {
ast.BaseInline
Segment text.Segment
}

var kindMention = ast.NewNodeKind("Mention")
var kindHashtag = ast.NewNodeKind("Hashtag")

func (n *mention) Kind() ast.NodeKind {
return kindMention
}

func (n *hashtag) Kind() ast.NodeKind {
return kindHashtag
}

// Dump is used by goldmark for debugging. It is implemented only minimally because
// it is not used in our code.
func (n *mention) Dump(source []byte, level int) {
ast.DumpHelper(n, source, level, nil, nil)
}

func (n *hashtag) Dump(source []byte, level int) {
ast.DumpHelper(n, source, level, nil, nil)
}

// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
// The contained segment is used in rendering.
func newMention(s text.Segment) *mention {
return &mention{
BaseInline: ast.BaseInline{},
Segment: s,
}
}

func newHashtag(s text.Segment) *hashtag {
return &hashtag{
BaseInline: ast.BaseInline{},
Segment: s,
}
}

// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
type mentionParser struct {
}

type hashtagParser struct {
}

func (p *mentionParser) Trigger() []byte {
return []byte{'@'}
}

func (p *hashtagParser) Trigger() []byte {
return []byte{'#'}
}

func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()

if !unicode.IsSpace(before) {
return nil
}

// unideal for performance but makes use of existing regex
loc := regexes.MentionFinder.FindIndex(line)
switch {
case loc == nil:
fallthrough
case loc[0] != 0: // fail if not found at start
return nil
default:
block.Advance(loc[1])
return newMention(segment.WithStop(segment.Start + loc[1]))
}
}

func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()
s := string(line)

if !util.IsHashtagBoundary(before) {
return nil
}

for i, r := range s {
switch {
case r == '#' && i == 0:
continue
case !util.IsPermittedInHashtag(r) && !util.IsHashtagBoundary(r):
// Fake hashtag, don't trust it
return nil
case util.IsHashtagBoundary(r):
// End of hashtag
block.Advance(i)
return newHashtag(segment.WithStop(segment.Start + i))
}
}
// If we don't find invalid characters before the end of the line then it's good
block.Advance(len(s))
return newHashtag(segment)
}

// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
// It is created in FromMarkdown to be used a goldmark extension, and the fields are used
// when rendering mentions and tags.
type customRenderer struct {
f *formatter
ctx context.Context
mentions []*gtsmodel.Mention
tags []*gtsmodel.Tag
}

func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(kindMention, r.renderMention)
reg.Register(kindHashtag, r.renderHashtag)
}

func (r *customRenderer) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(parser.WithInlineParsers(
// 500 is pretty arbitrary here, it was copied from example goldmark extension code.
// https://github.com/yuin/goldmark/blob/75d8cce5b78c7e1d5d9c4ca32c1164f0a1e57b53/extension/strikethrough.go#L111
mdutil.Prioritized(&mentionParser{}, 500),
mdutil.Prioritized(&hashtagParser{}, 500),
))
m.Renderer().AddOptions(renderer.WithNodeRenderers(
mdutil.Prioritized(r, 500),
))
}

// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}

n, ok := node.(*mention) // this function is only registered for kindMention
if !ok {
log.Errorf("type assertion failed")
}
text := string(n.Segment.Value(source))

html := r.f.ReplaceMentions(r.ctx, text, r.mentions)

// we don't have much recourse if this fails
if _, err := w.WriteString(html); err != nil {
log.Errorf("error outputting markdown text: %s", err)
}
return ast.WalkContinue, nil
}

func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}

n, ok := node.(*hashtag) // this function is only registered for kindHashtag
if !ok {
log.Errorf("type assertion failed")
}
text := string(n.Segment.Value(source))

html := r.f.ReplaceTags(r.ctx, text, r.tags)

// we don't have much recourse if this fails
if _, err := w.WriteString(html); err != nil {
log.Errorf("error outputting markdown text: %s", err)
}
return ast.WalkContinue, nil
}
Loading

0 comments on commit eb08529

Please sign in to comment.