Skip to content

Commit

Permalink
limit TitleExtractor to allow only Remark42 whitelisted domains
Browse files Browse the repository at this point in the history
Allowed domains consist of `REMARK_URL` second-level domain (or whole IP in case it's IP like `127.0.0.1`) and `ALLOWED_HOSTS`. That is needed to prevent Remark42 from asking arbitrary servers and storing the page title as the comment.PostTitle.

Previous behaviour allowed the caller of the API to create a comment
with an arbitrary URL and learn the title of the page, which might be
accessible to the server Remark42 is installed on but not to the user
outside that network (CWE-918).
  • Loading branch information
paskal committed Oct 10, 2023
1 parent a5c4e68 commit f0071ae
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 10 deletions.
16 changes: 9 additions & 7 deletions backend/app/cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"os/signal"
"path"
"regexp"
"slices"
"strings"
"syscall"
"time"
Expand Down Expand Up @@ -646,7 +645,7 @@ func (s *ServerCommand) getAllowedDomains() []string {
if rawURL == "self" || rawURL == "'self'" || rawURL == "\"self\"" {
continue
}
// AllowedHosts usually don't have https:// prefix
// AllowedHosts usually don't have https:// prefix, so we're adding it just to make parsing below work the same way as for RemarkURL
if !strings.HasPrefix(rawURL, "http://") && !strings.HasPrefix(rawURL, "https://") {
rawURL = "https://" + rawURL
}
Expand All @@ -656,16 +655,19 @@ func (s *ServerCommand) getAllowedDomains() []string {
continue
}
domain := parsedURL.Hostname()

if domain == "" || // don't add empty domain as it will allow everything to be extracted
(len(strings.Split(domain, ".")) < 2 && // don't allow single-word domains like "com"
domain != "localhost") { // localhost is an exceptional single-word domain which is allowed
continue
}

// if domain is not IP and has more than two levels, extract second level domain
if net.ParseIP(domain) == nil && len(strings.Split(domain, ".")) > 2 {
domain = strings.Join(strings.Split(domain, ".")[len(strings.Split(domain, "."))-2:], ".")
}

if domain != "" && // don't add empty domain as it will allow everything to be extracted
!slices.Contains(allowedDomains, domain) && // don't duplicate domains
(domain == "localhost" || len(strings.Split(domain, ".")) > 1) { // don't allow single-word domains like "com" except localhost
allowedDomains = append(allowedDomains, domain)
}
allowedDomains = append(allowedDomains, domain)
}
return allowedDomains
}
Expand Down
5 changes: 3 additions & 2 deletions backend/app/cmd/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -733,12 +733,13 @@ func Test_getAllowedDomains(t *testing.T) {
}{
// correct example, parsed and returned as allowed domain
{ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "https://remark42.example.org"}}, []string{"example.org"}},
{ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "http://remark42.example.org"}}, []string{"example.org"}},
{ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "http://localhost"}}, []string{"localhost"}},
// incorrect URLs, so Hostname is empty but returned list doesn't include empty string as it would allow any domain
{ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "bad hostname"}}, []string{}},
{ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "not_a_hostname"}}, []string{}},
// test removal of 'self', multiple AllowedHosts and deduplication
{ServerCommand{AllowedHosts: []string{"'self'", "example.org", "test.example.org", "remark42.com"}, CommonOpts: CommonOpts{RemarkURL: "https://example.org"}}, []string{"example.org", "remark42.com"}},
// test removal of 'self', multiple AllowedHosts. No deduplication is expected
{ServerCommand{AllowedHosts: []string{"'self'", "example.org", "test.example.org", "remark42.com"}, CommonOpts: CommonOpts{RemarkURL: "https://example.org"}}, []string{"example.org", "example.org", "remark42.com", "example.org"}},
}
for i, tt := range tbl {
t.Run(strconv.Itoa(i), func(t *testing.T) {
Expand Down
5 changes: 4 additions & 1 deletion backend/app/store/service/title.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type TitleExtractor struct {

// NewTitleExtractor makes extractor with cache. If memory cache failed, switching to no-cache
func NewTitleExtractor(client http.Client, allowedDomains []string) *TitleExtractor {
log.Printf("[DEBUG] creating extractor, allowed domains %+v", allowedDomains)
res := TitleExtractor{
client: client,
allowedDomains: allowedDomains,
Expand All @@ -49,7 +50,9 @@ func (t *TitleExtractor) Get(pageURL string) (string, error) {
}
allowed := false
for _, domain := range t.allowedDomains {
if strings.HasSuffix(u.Hostname(), domain) {
if u.Hostname() == domain ||
(strings.HasSuffix(u.Hostname(), domain) && // suffix match, e.g. "example.com" matches "www.example.com"
u.Hostname()[len(u.Hostname())-len(domain)-1] == '.') { // but we should not match "notexample.com"
allowed = true
break
}
Expand Down

0 comments on commit f0071ae

Please sign in to comment.