Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch markdown from blackfriday to goldmark #1267

Merged
merged 1 commit into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ require (
github.com/mitchellh/mapstructure v1.5.0
github.com/oklog/ulid v1.3.1
github.com/robfig/cron/v3 v3.0.1
github.com/russross/blackfriday/v2 v2.1.0
github.com/spf13/cobra v1.6.1
github.com/spf13/viper v1.14.0
github.com/stretchr/testify v1.8.1
Expand All @@ -48,6 +47,7 @@ require (
github.com/uptrace/bun/dialect/pgdialect v1.1.9
github.com/uptrace/bun/dialect/sqlitedialect v1.1.9
github.com/wagslane/go-password-validator v0.3.0
github.com/yuin/goldmark v1.5.3
golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90
golang.org/x/exp v0.0.0-20220613132600-b0d781184e0d
golang.org/x/image v0.2.0
Expand Down
3 changes: 2 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,6 @@ github.com/rs/xid v1.4.0 h1:qd7wPTDkN6KQx2VmMBLrpHkiyQwgFXRnkOLacUiaSNY=
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw=
Expand Down Expand Up @@ -645,6 +644,8 @@ github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark v1.5.3 h1:3HUJmBFbQW9fhQOzMgseU134xfi6hU+mjWywx5Ty+/M=
github.com/yuin/goldmark v1.5.3/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/zeebo/assert v1.1.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/blake3 v0.2.1/go.mod h1:TSQ0KjMH+pht+bRyvVooJ1rBpvvngSGaPISafq9MxJk=
github.com/zeebo/pcg v1.0.1/go.mod h1:09F0S9iiKrwn9rlI5yjLkmrug154/YRW6KnnXVDM/l4=
Expand Down
2 changes: 1 addition & 1 deletion internal/api/client/status/statuscreate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type StatusCreateTestSuite struct {
const (
statusWithLinksAndTags = "#test alright, should be able to post #links with fragments in them now, let's see........\n\nhttps://docs.gotosocial.org/en/latest/user_guide/posts/#links\n\n#gotosocial\n\n(tobi remember to pull the docker image challenge)"
statusMarkdown = "# Title\n\n## Smaller title\n\nThis is a post written in [markdown](https://www.markdownguide.org/)\n\n<img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\"/>"
statusMarkdownExpected = "<h1>Title</h1><h2>Smaller title</h2><p>This is a post written in <a href=\"https://www.markdownguide.org/\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">markdown</a></p><p><img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\" crossorigin=\"anonymous\"></p>"
statusMarkdownExpected = "<h1>Title</h1><h2>Smaller title</h2><p>This is a post written in <a href=\"https://www.markdownguide.org/\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">markdown</a></p><img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\" crossorigin=\"anonymous\">"
)

// Post a new status with some custom visibility settings
Expand Down
72 changes: 25 additions & 47 deletions internal/text/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,62 +21,23 @@ package text
import (
"bytes"
"context"
"io"
"strings"

"github.com/russross/blackfriday/v2"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/html"
minifyHtml "github.com/tdewolff/minify/v2/html"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/renderer/html"
)

var (
bfExtensions = blackfriday.NoIntraEmphasis | blackfriday.FencedCode | blackfriday.Autolink | blackfriday.Strikethrough | blackfriday.SpaceHeadings | blackfriday.HardLineBreak
m *minify.M
m *minify.M
)

type renderer struct {
f *formatter
ctx context.Context
mentions []*gtsmodel.Mention
tags []*gtsmodel.Tag
blackfriday.HTMLRenderer
}

func (r *renderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
if node.Type == blackfriday.Text {
// call RenderNode to do the html escaping
var buff bytes.Buffer
status := r.HTMLRenderer.RenderNode(&buff, node, entering)

html := buff.String()
html = r.f.ReplaceTags(r.ctx, html, r.tags)
html = r.f.ReplaceMentions(r.ctx, html, r.mentions)

// we don't have much recourse if this fails
if _, err := io.WriteString(w, html); err != nil {
log.Errorf("error outputting markdown text: %s", err)
}
return status
}
return r.HTMLRenderer.RenderNode(w, node, entering)
}

func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string {

renderer := &renderer{
f: f,
ctx: ctx,
mentions: mentions,
tags: tags,
HTMLRenderer: *blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{
// same as blackfriday.CommonHTMLFlags, but with Smartypants disabled
// ref: https://github.com/superseriousbusiness/gotosocial/issues/1028
Flags: blackfriday.UseXHTML,
}),
}

// Temporarily replace all found emoji shortcodes in the markdown text with
// their ID so that they're not parsed as anything by the markdown parser -
// this fixes cases where emojis with some underscores in them are parsed as
Expand All @@ -89,8 +50,25 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti
}

// parse markdown text into html, using custom renderer to add hashtag/mention links
htmlContentBytes := blackfriday.Run([]byte(markdownText), blackfriday.WithExtensions(bfExtensions), blackfriday.WithRenderer(renderer))
htmlContent := string(htmlContentBytes)
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithHardWraps(),
html.WithUnsafe(), // allows raw HTML
),
goldmark.WithExtensions(
&customRenderer{f, ctx, mentions, tags},
extension.Linkify, // turns URLs into links
extension.Strikethrough,
),
)

var htmlContentBytes bytes.Buffer
err := md.Convert([]byte(markdownText), &htmlContentBytes)
if err != nil {
log.Errorf("error rendering markdown to HTML: %s", err)
}
htmlContent := htmlContentBytes.String()

// Replace emoji IDs in the parsed html content with their shortcodes again
for _, e := range emojis {
Expand All @@ -102,7 +80,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti

if m == nil {
m = minify.New()
m.Add("text/html", &html.Minifier{
m.Add("text/html", &minifyHtml.Minifier{
KeepEndTags: true,
KeepQuotes: true,
})
Expand Down
21 changes: 21 additions & 0 deletions internal/text/markdown_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ const (
mdMentionAndCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>the_mighty_zork</span></a></span></p><pre><code>@the_mighty_zork\n</code></pre>"
mdWithSmartypants = "\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping"
mdWithSmartypantsExpected = "<p>\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping</p>"
mdWithAsciiHeart = "hello <3 old friend <3 i loved u </3 :(( you stole my heart"
mdWithAsciiHeartExpected = "<p>hello &lt;3 old friend &lt;3 i loved u &lt;/3 :(( you stole my heart</p>"
mdWithStrikethrough = "I have ~~mdae~~ made an error"
mdWithStrikethroughExpected = "<p>I have <del>mdae</del> made an error</p>"
mdWithLink = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial"
mdWithLinkExpected = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>"
)

type MarkdownTestSuite struct {
Expand Down Expand Up @@ -160,6 +166,21 @@ func (suite *MarkdownTestSuite) TestParseSmartypants() {
suite.Equal(mdWithSmartypantsExpected, s)
}

func (suite *MarkdownTestSuite) TestParseAsciiHeart() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithAsciiHeart, nil, nil, nil)
suite.Equal(mdWithAsciiHeartExpected, s)
}

func (suite *MarkdownTestSuite) TestParseStrikethrough() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithStrikethrough, nil, nil, nil)
suite.Equal(mdWithStrikethroughExpected, s)
}

func (suite *MarkdownTestSuite) TestParseLink() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithLink, nil, nil, nil)
suite.Equal(mdWithLinkExpected, s)
}

func TestMarkdownTestSuite(t *testing.T) {
suite.Run(t, new(MarkdownTestSuite))
}
215 changes: 215 additions & 0 deletions internal/text/markdownextension.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package text

import (
"context"
"unicode"

"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/text"
mdutil "github.com/yuin/goldmark/util"
)

// A goldmark extension that parses potential mentions and hashtags separately from regular
// text, so that they stay as one contiguous text fragment in the AST, and then renders
// them separately too, to avoid scanning normal text for mentions and tags.

// mention and hashtag fulfil the goldmark ast.Node interface.
type mention struct {
ast.BaseInline
Segment text.Segment
}

type hashtag struct {
ast.BaseInline
Segment text.Segment
}

var kindMention = ast.NewNodeKind("Mention")
var kindHashtag = ast.NewNodeKind("Hashtag")

func (n *mention) Kind() ast.NodeKind {
return kindMention
}

func (n *hashtag) Kind() ast.NodeKind {
return kindHashtag
}

// Dump is used by goldmark for debugging. It is implemented only minimally because
// it is not used in our code.
func (n *mention) Dump(source []byte, level int) {
ast.DumpHelper(n, source, level, nil, nil)
}

func (n *hashtag) Dump(source []byte, level int) {
ast.DumpHelper(n, source, level, nil, nil)
}

// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
// The contained segment is used in rendering.
func newMention(s text.Segment) *mention {
return &mention{
BaseInline: ast.BaseInline{},
Segment: s,
}
}

func newHashtag(s text.Segment) *hashtag {
return &hashtag{
BaseInline: ast.BaseInline{},
Segment: s,
}
}

// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
type mentionParser struct {
}

type hashtagParser struct {
}

func (p *mentionParser) Trigger() []byte {
return []byte{'@'}
}

func (p *hashtagParser) Trigger() []byte {
return []byte{'#'}
}

func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()

if !unicode.IsSpace(before) {
return nil
}

// unideal for performance but makes use of existing regex
loc := regexes.MentionFinder.FindIndex(line)
switch {
case loc == nil:
fallthrough
case loc[0] != 0: // fail if not found at start
return nil
default:
block.Advance(loc[1])
return newMention(segment.WithStop(segment.Start + loc[1]))
}
}

func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()
s := string(line)

if !util.IsHashtagBoundary(before) {
return nil
}

for i, r := range s {
switch {
case r == '#' && i == 0:
continue
case !util.IsPermittedInHashtag(r) && !util.IsHashtagBoundary(r):
// Fake hashtag, don't trust it
return nil
case util.IsHashtagBoundary(r):
// End of hashtag
block.Advance(i)
return newHashtag(segment.WithStop(segment.Start + i))
}
}
// If we don't find invalid characters before the end of the line then it's good
block.Advance(len(s))
return newHashtag(segment)
}

// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
// It is created in FromMarkdown to be used a goldmark extension, and the fields are used
// when rendering mentions and tags.
type customRenderer struct {
f *formatter
ctx context.Context
mentions []*gtsmodel.Mention
tags []*gtsmodel.Tag
}

func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(kindMention, r.renderMention)
reg.Register(kindHashtag, r.renderHashtag)
}

func (r *customRenderer) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(parser.WithInlineParsers(
// 500 is pretty arbitrary here, it was copied from example goldmark extension code.
// https://github.com/yuin/goldmark/blob/75d8cce5b78c7e1d5d9c4ca32c1164f0a1e57b53/extension/strikethrough.go#L111
mdutil.Prioritized(&mentionParser{}, 500),
mdutil.Prioritized(&hashtagParser{}, 500),
))
m.Renderer().AddOptions(renderer.WithNodeRenderers(
mdutil.Prioritized(r, 500),
))
}

// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}

n, ok := node.(*mention) // this function is only registered for kindMention
if !ok {
log.Errorf("type assertion failed")
}
text := string(n.Segment.Value(source))

html := r.f.ReplaceMentions(r.ctx, text, r.mentions)

// we don't have much recourse if this fails
if _, err := w.WriteString(html); err != nil {
log.Errorf("error outputting markdown text: %s", err)
}
return ast.WalkContinue, nil
}

func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}

n, ok := node.(*hashtag) // this function is only registered for kindHashtag
if !ok {
log.Errorf("type assertion failed")
}
text := string(n.Segment.Value(source))

html := r.f.ReplaceTags(r.ctx, text, r.tags)

// we don't have much recourse if this fails
if _, err := w.WriteString(html); err != nil {
log.Errorf("error outputting markdown text: %s", err)
}
return ast.WalkContinue, nil
}
Loading