Skip to content

Commit

Permalink
Merge pull request #24 from gummiboll/copyheadersonredirect
Browse files Browse the repository at this point in the history
Copy the headers from the last request on redirect
  • Loading branch information
asciimoo authored Oct 13, 2017
2 parents d1a65d3 + 928ff10 commit b45e8de
Showing 1 changed file with 27 additions and 0 deletions.
27 changes: 27 additions & 0 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package colly
import (
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
Expand Down Expand Up @@ -130,6 +131,7 @@ func (c *Collector) Init() {
c.MaxBodySize = 10 * 1024 * 1024
c.backend = &httpBackend{}
c.backend.Init()
c.backend.Client.CheckRedirect = c.checkRedirectFunc()
c.wg = &sync.WaitGroup{}
c.lock = &sync.Mutex{}
}
Expand Down Expand Up @@ -356,6 +358,31 @@ func (c *Collector) Cookies(URL string) []*http.Cookie {
return c.backend.Client.Jar.Cookies(u)
}

func (c *Collector) checkRedirectFunc() func(req *http.Request, via []*http.Request) error {
return func(req *http.Request, via []*http.Request) error {
if !c.isDomainAllowed(req.URL.Host) {
return fmt.Errorf("Not following redirect to %s because its not in AllowedDomains", req.URL.Host)
}

// Honor golangs default of maximum of 10 redirects
if len(via) >= 10 {
return http.ErrUseLastResponse
}

lastRequest := via[len(via)-1]

// Copy the headers from last request
req.Header = lastRequest.Header

// If domain has changed, remove the Authorization-header if it exists
if req.URL.Host != lastRequest.URL.Host {
req.Header.Del("Authorization")
}

return nil
}
}

// Attr returns the selected attribute of a HTMLElement or empty string
// if no attribute found
func (h *HTMLElement) Attr(k string) string {
Expand Down

0 comments on commit b45e8de

Please sign in to comment.