-
Notifications
You must be signed in to change notification settings - Fork 1
/
http_parser_sanity.ml
115 lines (94 loc) · 3.69 KB
/
http_parser_sanity.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
(*
OCaml HTTP - do it yourself (fully OCaml) HTTP daemon
Copyright (C) <2002-2005> Stefano Zacchiroli <zack@cs.unibo.it>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as
published by the Free Software Foundation, version 2.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA
*)
open Printf
open Http_types
open Http_constants
(*
type url_syntax_option =
Url_part_not_recognized
| Url_part_allowed
| Url_part_required
* (1) scheme://user:password@host:port/path;params?query#fragment
*)
let request_uri_syntax =
{
Neturl.url_enable_scheme = Neturl.Url_part_not_recognized;
url_enable_user = Neturl.Url_part_not_recognized;
url_enable_user_param = Neturl.Url_part_not_recognized;
url_enable_password = Neturl.Url_part_not_recognized;
url_enable_host = Neturl.Url_part_not_recognized;
url_enable_port = Neturl.Url_part_not_recognized;
url_enable_path = Neturl.Url_part_required;
url_enable_param = Neturl.Url_part_not_recognized;
url_enable_query = Neturl.Url_part_allowed;
url_enable_fragment = Neturl.Url_part_not_recognized;
url_enable_other = Neturl.Url_part_not_recognized;
url_accepts_8bits = false;
url_enable_relative = true;
url_is_valid = (fun _ -> true);
}
(* convention:
foo_RE_raw is the uncompiled regexp matching foo
foo_RE is the compiled regexp matching foo
is_foo is the predicate over string matching foo
*)
let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t"
let ctls_RE_raw = "\\x00-\\x1F\\x7F"
let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+"
let lws_RE_raw = "(\r\n)?[ \t]"
let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\""
let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+"
let field_content_RE_raw =
sprintf
"^(((%s)|(%s)|(%s))|(%s))*$"
token_RE_raw
separators_RE_raw
quoted_string_RE_raw
text_RE_raw
(*
(* following RFC 2616 specifications *)
let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*"
*)
(* smarter implementation: TEXT production is included in the regexp below *)
let field_value_RE_raw =
sprintf
"^((%s)|(%s)|(%s)|(%s))*$"
token_RE_raw
separators_RE_raw
quoted_string_RE_raw
lws_RE_raw
let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$")
let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$")
let heading_lws_RE = Pcre.regexp (sprintf "^%s*" lws_RE_raw)
let trailing_lws_RE = Pcre.regexp (sprintf "%s*$" lws_RE_raw)
let is_token s = Pcre.pmatch ~rex:token_RE s
let is_field_name = is_token
let is_field_value s = Pcre.pmatch ~rex:field_value_RE s
let heal_header_name s =
if not (is_field_name s) then raise (Invalid_header_name s) else ()
let heal_header_value s =
if not (is_field_value s) then raise (Invalid_header_value s) else ()
let normalize_header_value s =
Pcre.replace ~rex:trailing_lws_RE
(Pcre.replace ~rex:heading_lws_RE s)
let heal_header (name, value) =
heal_header_name name;
heal_header_value name
let url_of_string s =
try
Neturl.url_of_string request_uri_syntax s
with Neturl.Malformed_URL -> raise (Malformed_URL s)
let string_of_url = Neturl.string_of_url