Skip to content

Commit

Permalink
converter: allow base domain with url scheme
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesKaufmann committed Nov 17, 2024
1 parent 5af1444 commit 4d6b2ff
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 17 deletions.
17 changes: 8 additions & 9 deletions cli/cmd/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -454,15 +454,14 @@ func TestExecute_General(t *testing.T) {

expectedStdout: []byte("![](http://example.com/image.png)\n"),
},
// TODO: with https domain
// {
// desc: "[domain] with https domain",
//
// inputStdin: []byte(`<img src="/image.png" />`),
// inputArgs: []string{"html2markdown", "--domain", "https://example.com"},
//
// expectedStdout: []byte("![](https://example.com/image.png)\n"),
// },
{
desc: "[domain] with https domain",

inputStdin: []byte(`<img src="/image.png" />`),
inputArgs: []string{"html2markdown", "--domain", "https://example.com"},

expectedStdout: []byte("![](https://example.com/image.png)\n"),
},

// - - - - - selectors - - - - - //
{
Expand Down
30 changes: 23 additions & 7 deletions converter/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@ var percentEncodingReplacer = strings.NewReplacer(
">", "%3E",
)

func parseBaseDomain(rawDomain string) *url.URL {
if rawDomain == "" {
return nil
}

u1, err := url.Parse(rawDomain)
if err == nil && u1.Host != "" {
// Yes, we got valid domain (probably with a http/https scheme)
return u1
}

u2, err := url.Parse("http://" + rawDomain)
if err == nil && u2.Host != "" {
// Yes, we got a valid domain (by choosing a fallback scheme)
return u2
}

return nil
}
func defaultAssembleAbsoluteURL(tagName string, rawURL string, domain string) string {
rawURL = strings.TrimSpace(rawURL)

Expand Down Expand Up @@ -51,13 +70,10 @@ func defaultAssembleAbsoluteURL(tagName string, rawURL string, domain string) st
// e.g. the email reading "Hi+Johannes" instead of "Hi Johannes"
u.RawQuery = strings.ReplaceAll(u.RawQuery, "+", "%20")

if domain != "" {
if u.Scheme == "" {
u.Scheme = "http"
}
if u.Host == "" {
u.Host = domain
}
if base := parseBaseDomain(domain); base != nil {
// If a "domain" is provided, we use that to convert relative links
// to absolute links.
u = base.ResolveReference(u)
}

return percentEncodingReplacer.Replace(u.String())
Expand Down
30 changes: 29 additions & 1 deletion converter/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,34 @@ func TestDefaultAssembleAbsoluteURL(t *testing.T) {

expected: "http://test.com/page.html?key=val#hash",
},
{
desc: "with http domain",

tagName: "a",
input: "/page.html?key=val#hash",
domain: "http://test.com",

expected: "http://test.com/page.html?key=val#hash",
},
{
desc: "with https domain",

tagName: "a",
input: "/page.html?key=val#hash",
domain: "https://test.com",

expected: "https://test.com/page.html?key=val#hash",
},
{
desc: "with domain that includes path",

tagName: "a",
input: "/page.html?key=val#hash",
domain: "https://test.com/random_stuff",

expected: "https://test.com/page.html?key=val#hash",
},

{
desc: "data uri",

Expand Down Expand Up @@ -223,7 +251,7 @@ func TestDefaultAssembleAbsoluteURL(t *testing.T) {
}
}

func TestParseAndEncode(t *testing.T) {
func TestParseAndEncodeQuery(t *testing.T) {
runs := []struct {
desc string

Expand Down

0 comments on commit 4d6b2ff

Please sign in to comment.