diff --git a/backend/app/cmd/server.go b/backend/app/cmd/server.go index cfb8236b61..bb1e9dd88d 100644 --- a/backend/app/cmd/server.go +++ b/backend/app/cmd/server.go @@ -11,7 +11,6 @@ import ( "os/signal" "path" "regexp" - "slices" "strings" "syscall" "time" @@ -646,7 +645,7 @@ func (s *ServerCommand) getAllowedDomains() []string { if rawURL == "self" || rawURL == "'self'" || rawURL == "\"self\"" { continue } - // AllowedHosts usually don't have https:// prefix + // AllowedHosts usually don't have https:// prefix, so we're adding it just to make parsing below work the same way as for RemarkURL if !strings.HasPrefix(rawURL, "http://") && !strings.HasPrefix(rawURL, "https://") { rawURL = "https://" + rawURL } @@ -656,16 +655,19 @@ func (s *ServerCommand) getAllowedDomains() []string { continue } domain := parsedURL.Hostname() + + if domain == "" || // don't add empty domain as it will allow everything to be extracted + (len(strings.Split(domain, ".")) < 2 && // don't allow single-word domains like "com" + domain != "localhost") { // localhost is an exceptional single-word domain which is allowed + continue + } + // if domain is not IP and has more than two levels, extract second level domain if net.ParseIP(domain) == nil && len(strings.Split(domain, ".")) > 2 { domain = strings.Join(strings.Split(domain, ".")[len(strings.Split(domain, "."))-2:], ".") } - if domain != "" && // don't add empty domain as it will allow everything to be extracted - !slices.Contains(allowedDomains, domain) && // don't duplicate domains - (domain == "localhost" || len(strings.Split(domain, ".")) > 1) { // don't allow single-word domains like "com" except localhost - allowedDomains = append(allowedDomains, domain) - } + allowedDomains = append(allowedDomains, domain) } return allowedDomains } diff --git a/backend/app/cmd/server_test.go b/backend/app/cmd/server_test.go index 0ad9a9f525..7de5f20e76 100644 --- a/backend/app/cmd/server_test.go +++ b/backend/app/cmd/server_test.go @@ -733,12 +733,13 @@ func Test_getAllowedDomains(t *testing.T) { }{ // correct example, parsed and returned as allowed domain {ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "https://remark42.example.org"}}, []string{"example.org"}}, + {ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "http://remark42.example.org"}}, []string{"example.org"}}, {ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "http://localhost"}}, []string{"localhost"}}, // incorrect URLs, so Hostname is empty but returned list doesn't include empty string as it would allow any domain {ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "bad hostname"}}, []string{}}, {ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "not_a_hostname"}}, []string{}}, - // test removal of 'self', multiple AllowedHosts and deduplication - {ServerCommand{AllowedHosts: []string{"'self'", "example.org", "test.example.org", "remark42.com"}, CommonOpts: CommonOpts{RemarkURL: "https://example.org"}}, []string{"example.org", "remark42.com"}}, + // test removal of 'self', multiple AllowedHosts. No deduplication is expected + {ServerCommand{AllowedHosts: []string{"'self'", "example.org", "test.example.org", "remark42.com"}, CommonOpts: CommonOpts{RemarkURL: "https://example.org"}}, []string{"example.org", "example.org", "remark42.com", "example.org"}}, } for i, tt := range tbl { t.Run(strconv.Itoa(i), func(t *testing.T) { diff --git a/backend/app/store/service/title.go b/backend/app/store/service/title.go index 22c19cc27e..bc5e61b35b 100644 --- a/backend/app/store/service/title.go +++ b/backend/app/store/service/title.go @@ -27,6 +27,7 @@ type TitleExtractor struct { // NewTitleExtractor makes extractor with cache. If memory cache failed, switching to no-cache func NewTitleExtractor(client http.Client, allowedDomains []string) *TitleExtractor { + log.Printf("[DEBUG] creating extractor, allowed domains %+v", allowedDomains) res := TitleExtractor{ client: client, allowedDomains: allowedDomains, @@ -49,7 +50,9 @@ func (t *TitleExtractor) Get(pageURL string) (string, error) { } allowed := false for _, domain := range t.allowedDomains { - if strings.HasSuffix(u.Hostname(), domain) { + if u.Hostname() == domain || + (strings.HasSuffix(u.Hostname(), domain) && // suffix match, e.g. "example.com" matches "www.example.com" + u.Hostname()[len(u.Hostname())-len(domain)-1] == '.') { // but we should not match "notexample.com" allowed = true break }