Skip to content

Commit

Permalink
limit TitleExtractor to allow only Remark42 whitelisted domains
Browse files Browse the repository at this point in the history
Allowed domains consist of `REMARK_URL` second-level domain (or whole IP in case it's IP like `127.0.0.1`) and `ALLOWED_HOSTS`. That is needed to prevent Remark42 from asking arbitrary servers and storing the page title as the comment.PostTitle.

Previous behaviour allowed the caller of the API to create a comment
with an arbitrary URL and learn the title of the page, which might be
accessible to the server Remark42 is installed on but not to the user
outside that network (CWE-918).
  • Loading branch information
paskal committed Oct 10, 2023
1 parent 7a71d47 commit a5c4e68
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 29 deletions.
39 changes: 38 additions & 1 deletion backend/app/cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ import (
"context"
"embed"
"fmt"
"net"
"net/http"
"net/url"
"os"
"os/signal"
"path"
"regexp"
"slices"
"strings"
"syscall"
"time"
Expand Down Expand Up @@ -504,7 +506,7 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) {
MaxVotes: s.MaxVotes,
PositiveScore: s.PositiveScore,
ImageService: imageService,
TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}),
TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}, s.getAllowedDomains()),
RestrictedWordsMatcher: service.NewRestrictedWordsMatcher(service.StaticRestrictedWordsLister{Words: s.RestrictedWords}),
}
dataService.RestrictSameIPVotes.Enabled = s.RestrictVoteIP
Expand Down Expand Up @@ -633,6 +635,41 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) {
}, nil
}

// Extract second level domains from s.RemarkURL and s.AllowedHosts.
// It can be and IP like http//127.0.0.1 in which case we need to use whole IP as domain
func (s *ServerCommand) getAllowedDomains() []string {
rawDomains := s.AllowedHosts
rawDomains = append(rawDomains, s.RemarkURL)
allowedDomains := []string{}
for _, rawURL := range rawDomains {
// case of 'self' AllowedHosts, which is not a valid rawURL name
if rawURL == "self" || rawURL == "'self'" || rawURL == "\"self\"" {
continue
}
// AllowedHosts usually don't have https:// prefix
if !strings.HasPrefix(rawURL, "http://") && !strings.HasPrefix(rawURL, "https://") {
rawURL = "https://" + rawURL
}
parsedURL, err := url.Parse(rawURL)
if err != nil {
log.Printf("[WARN] failed to parse URL %s for TitleExtract whitelist: %v", rawURL, err)
continue
}
domain := parsedURL.Hostname()
// if domain is not IP and has more than two levels, extract second level domain
if net.ParseIP(domain) == nil && len(strings.Split(domain, ".")) > 2 {
domain = strings.Join(strings.Split(domain, ".")[len(strings.Split(domain, "."))-2:], ".")
}

if domain != "" && // don't add empty domain as it will allow everything to be extracted
!slices.Contains(allowedDomains, domain) && // don't duplicate domains
(domain == "localhost" || len(strings.Split(domain, ".")) > 1) { // don't allow single-word domains like "com" except localhost
allowedDomains = append(allowedDomains, domain)
}
}
return allowedDomains
}

// Run all application objects
func (a *serverApp) run(ctx context.Context) error {
if a.AdminPasswd != "" {
Expand Down
21 changes: 21 additions & 0 deletions backend/app/cmd/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,27 @@ func Test_splitAtCommas(t *testing.T) {
}
}

func Test_getAllowedDomains(t *testing.T) {
tbl := []struct {
s ServerCommand
allowedDomains []string
}{
// correct example, parsed and returned as allowed domain
{ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "https://remark42.example.org"}}, []string{"example.org"}},
{ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "http://localhost"}}, []string{"localhost"}},
// incorrect URLs, so Hostname is empty but returned list doesn't include empty string as it would allow any domain
{ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "bad hostname"}}, []string{}},
{ServerCommand{AllowedHosts: []string{}, CommonOpts: CommonOpts{RemarkURL: "not_a_hostname"}}, []string{}},
// test removal of 'self', multiple AllowedHosts and deduplication
{ServerCommand{AllowedHosts: []string{"'self'", "example.org", "test.example.org", "remark42.com"}, CommonOpts: CommonOpts{RemarkURL: "https://example.org"}}, []string{"example.org", "remark42.com"}},
}
for i, tt := range tbl {
t.Run(strconv.Itoa(i), func(t *testing.T) {
assert.Equal(t, tt.allowedDomains, tt.s.getAllowedDomains())
})
}
}

func chooseRandomUnusedPort() (port int) {
for i := 0; i < 10; i++ {
port = 40000 + int(rand.Int31n(10000))
Expand Down
2 changes: 1 addition & 1 deletion backend/app/rest/api/admin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func TestAdmin_Title(t *testing.T) {
ts, srv, teardown := startupT(t)
defer teardown()

srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second})
srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"127.0.0.1"})
tss := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.String() == "/post1" {
_, err := w.Write([]byte("<html><title>post1 blah 123</title><body> 2222</body></html>"))
Expand Down
5 changes: 3 additions & 2 deletions backend/app/rest/api/rest_public_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ func TestRest_FindUserComments(t *testing.T) {

func TestRest_FindUserComments_CWE_918(t *testing.T) {
ts, srv, teardown := startupT(t)
srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}) // required for extracting the title, bad URL test
srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"radio-t.com"}) // required for extracting the title, bad URL test
defer srv.DataService.TitleExtractor.Close()
defer teardown()

Expand All @@ -496,7 +496,8 @@ func TestRest_FindUserComments_CWE_918(t *testing.T) {

assert.False(t, backendRequestedArbitraryServer)
addComment(t, arbitraryURLComment, ts)
assert.True(t, backendRequestedArbitraryServer)
assert.False(t, backendRequestedArbitraryServer,
"no request is expected to the test server as it's not in the list of the allowed domains for the title extractor")

res, code := get(t, ts.URL+"/api/v1/comments?site=remark42&user=provider1_dev")
assert.Equal(t, http.StatusOK, code)
Expand Down
10 changes: 5 additions & 5 deletions backend/app/store/service/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func TestService_CreateFromPartialWithTitle(t *testing.T) {
eng, teardown := prepStoreEngine(t)
defer teardown()
b := DataStore{Engine: eng, AdminStore: ks,
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})}
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})}
defer b.Close()

postPath := "/post/42"
Expand Down Expand Up @@ -195,7 +195,7 @@ func TestService_SetTitle(t *testing.T) {
eng, teardown := prepStoreEngine(t)
defer teardown()
b := DataStore{Engine: eng, AdminStore: ks,
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})}
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})}
defer b.Close()
comment := store.Comment{
Text: "text",
Expand Down Expand Up @@ -1658,10 +1658,10 @@ func TestService_DoubleClose_Static(t *testing.T) {
eng, teardown := prepStoreEngine(t)
defer teardown()
b := DataStore{Engine: eng, AdminStore: ks,
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})}
b.Close()
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})}
assert.NoError(t, b.Close())
// second call should not result in panic or errors
b.Close()
assert.NoError(t, b.Close())
}

// makes new boltdb, put two records
Expand Down
42 changes: 30 additions & 12 deletions backend/app/store/service/title.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"

Expand All @@ -19,14 +20,16 @@ const (

// TitleExtractor gets html title from remote page, cached
type TitleExtractor struct {
client http.Client
cache lcw.LoadingCache
client http.Client
cache lcw.LoadingCache
allowedDomains []string
}

// NewTitleExtractor makes extractor with cache. If memory cache failed, switching to no-cache
func NewTitleExtractor(client http.Client) *TitleExtractor {
func NewTitleExtractor(client http.Client, allowedDomains []string) *TitleExtractor {
res := TitleExtractor{
client: client,
client: client,
allowedDomains: allowedDomains,
}
var err error
res.cache, err = lcw.NewExpirableCache(lcw.TTL(teCacheTTL), lcw.MaxKeySize(teCacheMaxRecs))
Expand All @@ -38,33 +41,48 @@ func NewTitleExtractor(client http.Client) *TitleExtractor {
}

// Get page for url and return title
func (t *TitleExtractor) Get(url string) (string, error) {
func (t *TitleExtractor) Get(pageURL string) (string, error) {
// parse domain of the URL and check if it's in the allowed list
u, err := url.Parse(pageURL)
if err != nil {
return "", fmt.Errorf("failed to parse url %s: %w", pageURL, err)
}
allowed := false
for _, domain := range t.allowedDomains {
if strings.HasSuffix(u.Hostname(), domain) {
allowed = true
break
}
}
if !allowed {
return "", fmt.Errorf("domain %s is not allowed", u.Host)
}
client := http.Client{Timeout: t.client.Timeout, Transport: t.client.Transport}
defer client.CloseIdleConnections()
b, err := t.cache.Get(url, func() (interface{}, error) {
resp, err := client.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to load page %s: %w", url, err)
b, err := t.cache.Get(pageURL, func() (interface{}, error) {
resp, e := client.Get(pageURL)
if e != nil {
return nil, fmt.Errorf("failed to load page %s: %w", pageURL, e)
}
defer func() {
if err = resp.Body.Close(); err != nil {
log.Printf("[WARN] failed to close title extractor body, %v", err)
}
}()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("can't load page %s, code %d", url, resp.StatusCode)
return nil, fmt.Errorf("can't load page %s, code %d", pageURL, resp.StatusCode)
}

title, ok := t.getTitle(resp.Body)
if !ok {
return nil, fmt.Errorf("can't get title for %s", url)
return nil, fmt.Errorf("can't get title for %s", pageURL)
}
return title, nil
})

// on error save result (empty string) to cache too and return "" title
if err != nil {
_, _ = t.cache.Get(url, func() (interface{}, error) { return "", nil })
_, _ = t.cache.Get(pageURL, func() (interface{}, error) { return "", nil })
return "", err
}

Expand Down
16 changes: 8 additions & 8 deletions backend/app/store/service/title_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestTitle_GetTitle(t *testing.T) {
{`<html><body> 2222</body></html>`, false, ""},
}

ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})
defer ex.Close()
for i, tt := range tbl {
tt := tt
Expand All @@ -41,7 +41,7 @@ func TestTitle_GetTitle(t *testing.T) {
}

func TestTitle_Get(t *testing.T) {
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})
defer ex.Close()
var hits int32
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -75,7 +75,7 @@ func TestTitle_GetConcurrent(t *testing.T) {
for n := 0; n < 1000; n++ {
body += "something something blah blah\n"
}
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})
defer ex.Close()
var hits int32
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -104,7 +104,7 @@ func TestTitle_GetConcurrent(t *testing.T) {
}

func TestTitle_GetFailed(t *testing.T) {
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})
defer ex.Close()
var hits int32
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand All @@ -124,9 +124,9 @@ func TestTitle_GetFailed(t *testing.T) {
assert.Equal(t, int32(1), atomic.LoadInt32(&hits), "hit once, errors cached")
}

func TestTitle_DoubleClosed(*testing.T) {
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex.Close()
func TestTitle_DoubleClosed(t *testing.T) {
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})
assert.NoError(t, ex.Close())
// second call should not result in panic
ex.Close()
assert.NoError(t, ex.Close())
}

0 comments on commit a5c4e68

Please sign in to comment.