Skip to content

Commit

Permalink
#61 add gzip
Browse files Browse the repository at this point in the history
close #61
  • Loading branch information
zhengchun committed Mar 1, 2023
1 parent c960c2a commit cc1aeb6
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 4 deletions.
37 changes: 33 additions & 4 deletions query.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package htmlquery

import (
"bufio"
"compress/gzip"
"compress/zlib"
"fmt"
"io"
"net/http"
Expand Down Expand Up @@ -88,15 +90,42 @@ func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node {
return elems
}

// LoadURL loads the HTML document from the specified URL.
// LoadURL loads the HTML document from the specified URL. Default enabling gzip on a HTTP request.
func LoadURL(url string) (*html.Node, error) {
resp, err := http.Get(url)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Enable gzip compression.
req.Header.Add("Accept-Encoding", "gzip")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
var reader io.ReadCloser

defer func() {
if reader != nil {
reader.Close()
}
}()

switch resp.Header.Get("Content-Encoding") {
case "gzip":
reader, err = gzip.NewReader(resp.Body)
if err != nil {
return nil, err
}
case "deflate":
reader, err = zlib.NewReader(resp.Body)
if err != nil {
return nil, err
}
default:
reader = resp.Body
}

r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
r, err := charset.NewReader(reader, resp.Header.Get("Content-Type"))
if err != nil {
return nil, err
}
Expand Down
16 changes: 16 additions & 0 deletions query_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package htmlquery

import (
"compress/gzip"
"fmt"
"io/ioutil"
"net/http"
Expand Down Expand Up @@ -80,6 +81,21 @@ func TestLoadURL(t *testing.T) {
}
}

func TestLoadURLWithGzipResponse(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Encoding", "gzip")
gz := gzip.NewWriter(w)
defer gz.Close()
fmt.Fprint(gz, htmlSample)
}))
defer ts.Close()

_, err := LoadURL(ts.URL)
if err != nil {
t.Fatal(err)
}
}

func TestLoadDoc(t *testing.T) {
tempHTMLdoc, err := ioutil.TempFile("", "sample_*.html")
if err != nil {
Expand Down

0 comments on commit cc1aeb6

Please sign in to comment.