-
Notifications
You must be signed in to change notification settings - Fork 0
/
handler.go
160 lines (136 loc) · 4.18 KB
/
handler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
package head
import (
"encoding/json"
"io/ioutil"
"net/http"
"strings"
)
// URLHandler is an HTTP handler that responds with a serialized *Object after
// processing a URL's <head> tag. This URL is obtained from the HTTP request.
// It can provided as query parameter or in the HTTP body. GetURL function
// determines how the URL is extracted from an HTTP request.
type URLHandler struct {
// Client is the *http.Client used to make HTTP requests.
Client *http.Client
// GetURL is used to extract the URL from an HTTP request. If GetURL is
// nil, DefaultGetURL is used.
GetURL func(r *http.Request) (string, error)
// WriteResponse is used by URLHandler to produce an HTTP response. It
// transforms and serializes *Object and writes it to w. If WriteResponse
// is nil, DefaultWriteResponse is used.
WriteResponse func(w http.ResponseWriter, obj *Object)
}
// HTMLHandler is an HTTP handler that responds with a serialized *Object after
// processing an HTML string's <head> tag. This HTML string is obtained from
// the HTTP request. It can provided as query parameter or in the HTTP body.
// GetHTML function determines how the HTML string is extracted from an HTTP
// request.
type HTMLHandler struct {
// GetHTML is used to extract the HTML string from an HTTP request. If
// GetHTML is nil, DefaultGetHTML is used.
GetHTML func(r *http.Request) (string, error)
// WriteResponse is used by HTMLHandler to produce an HTTP response. It
// transforms and serializes *Object and writes it to w. If WriteResponse
// is nil, DefaultWriteResponse is used.
WriteResponse func(w http.ResponseWriter, obj *Object)
}
func (h *URLHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if h == nil {
h = &URLHandler{}
}
getURL := h.GetURL
if getURL == nil {
getURL = DefaultGetURL
}
u, err := getURL(r)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if u == "" {
http.Error(w, "url not found in request", http.StatusBadRequest)
return
}
obj, err := ProcessURL(u, h.Client)
if err != nil {
if _, ok := err.(*urlParseError); ok {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if _, ok := err.(*httpClientError); ok {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
writeResponse := h.WriteResponse
if writeResponse == nil {
writeResponse = DefaultWriteResponse
}
writeResponse(w, obj)
}
func (h *HTMLHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if h == nil {
h = &HTMLHandler{}
}
getHTML := h.GetHTML
if getHTML == nil {
getHTML = DefaultGetHTML
}
htmlStr, err := getHTML(r)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if htmlStr == "" {
http.Error(w, "html string not found in request", http.StatusBadRequest)
return
}
obj, err := ParseHTML(strings.NewReader(htmlStr))
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
writeResponse := h.WriteResponse
if writeResponse == nil {
writeResponse = DefaultWriteResponse
}
writeResponse(w, obj)
}
// DefaultGetURL is used to extract the URL from an HTTP request. It first
// tries to read the `url` query paramter and then the HTTP request body.
// Whichever is present is used as the URL that needs to be processed.
func DefaultGetURL(r *http.Request) (string, error) {
u := r.URL.Query().Get("url")
if u != "" {
return u, nil
}
bs, err := ioutil.ReadAll(r.Body)
if err != nil {
return "", err
}
r.Body.Close()
return string(bs), nil
}
// DefaultGetHTML is used to extract the HTML from an HTTP request. It simply
// reads the HTTP request body and returns it.
func DefaultGetHTML(r *http.Request) (string, error) {
bs, err := ioutil.ReadAll(r.Body)
if err != nil {
return "", err
}
r.Body.Close()
return string(bs), nil
}
// DefaultWriteResponse serializes *Object as JSON and writes it to w.
func DefaultWriteResponse(w http.ResponseWriter, obj *Object) {
bs, err := json.Marshal(obj)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write(bs)
}