Skip to content

Commit

Permalink
fix parsing / printing of IPv6 addresses in URIs
Browse files Browse the repository at this point in the history
  • Loading branch information
anmonteiro committed Mar 27, 2023
1 parent 4248b0f commit cd56175
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 92 deletions.
206 changes: 117 additions & 89 deletions lib/uri.ml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ type safe_chars = bool array

module type Scheme = sig
val safe_chars_for_component : component -> safe_chars
val normalize_host : string option -> string option
val normalize_host : string -> string
val canonicalize_port : int option -> int option
val canonicalize_path : string list -> string list
end
Expand Down Expand Up @@ -186,9 +186,7 @@ end
module Http : Scheme = struct
include Generic

let normalize_host = function
| Some hs -> Some (String.lowercase_ascii hs)
| None -> None
let normalize_host hs = String.lowercase_ascii hs

let canonicalize_port = function
| None -> None
Expand All @@ -212,11 +210,9 @@ end
module File : Scheme = struct
include Generic

let normalize_host = function
| Some hs ->
let hs = String.lowercase_ascii hs in
if hs="localhost" then Some "" else Some hs
| None -> None
let normalize_host hs =
let hs = String.lowercase_ascii hs in
if hs="localhost" then "" else hs
end

module Urn : Scheme = struct
Expand Down Expand Up @@ -542,7 +538,9 @@ let encoded_of_query ?scheme = Query.encoded_of_query ?scheme
type t = {
scheme: Pct.decoded option;
userinfo: Userinfo.t option;
host: Pct.decoded option;
host: [ `Ipv4_literal of string
| `Ipv6_literal of string
| `Host of Pct.decoded] option ;
port: int option;
path: Path.t;
query: Query.t;
Expand All @@ -561,8 +559,16 @@ let empty = {

let compare_decoded = Pct.unlift_decoded2 String.compare
let compare_decoded_opt = compare_opt compare_decoded
let compare_host h1 h2 =
match h1, h2 with
| `Ipv4_literal ip1, `Ipv4_literal ip2 -> String.compare ip1 ip2
| `Ipv6_literal ip1, `Ipv6_literal ip2 -> String.compare ip1 ip2
| `Host h1, `Host h2 -> compare_decoded h1 h2
| _ -> -1
let compare_host_opt = compare_opt compare_host

let compare t t' =
(match compare_decoded_opt t.host t'.host with
(match compare_host_opt t.host t'.host with
| 0 -> (match compare_decoded_opt t.scheme t'.scheme with
| 0 -> (match compare_opt (fun p p' ->
if p < p' then -1 else if p > p' then 1 else 0
Expand All @@ -584,10 +590,6 @@ let uncast_opt = function
| Some h -> Some (Pct.uncast_decoded h)
| None -> None

let cast_opt = function
| Some h -> Some (Pct.cast_decoded h)
| None -> None

let normalize schem uri =
let module Scheme =
(val (module_of_scheme (uncast_opt schem)) : Scheme) in
Expand All @@ -596,38 +598,16 @@ let normalize schem uri =
| None -> None
in {uri with
scheme=dob String.lowercase_ascii uri.scheme;
host=cast_opt (Scheme.normalize_host (uncast_opt uri.host))
host= match uri.host with
| Some (`Ipv4_literal host) ->
Some (`Ipv4_literal (Scheme.normalize_host host))
| Some (`Ipv6_literal host) ->
Some (`Ipv6_literal (Scheme.normalize_host host))
| Some (`Host host) ->
Some (`Host (Pct.cast_decoded (Scheme.normalize_host (Pct.uncast_decoded host))))
| None -> None
}

(* Make a URI record. This is a bit more inefficient than it needs to be due to the
* casting/uncasting (which isn't fully identity due to the option box), but it is
* no big deal for now.
*)
let make ?scheme ?userinfo ?host ?port ?path ?query ?fragment () =
let decode = function
|Some x -> Some (Pct.cast_decoded x) |None -> None in
let host = match userinfo, host, port with
| _, Some _, _ | None, None, None -> host
| Some _, None, _ | _, None, Some _ -> Some ""
in
let userinfo = match userinfo with
| None -> None | Some u -> Some (userinfo_of_encoded u) in
let path = match path with
|None -> [] | Some p ->
let path = path_of_encoded p in
match host, path with
| None, _ | Some _, "/"::_ | Some _, [] -> path
| Some _, _ -> "/"::path
in
let query = match query with
| None -> Query.KV []
| Some p -> Query.KV p
in
let scheme = decode scheme in
normalize scheme
{ scheme; userinfo;
host=decode host; port; path; query; fragment=decode fragment }

(** Convert a URI structure into a percent-encoded string
<http://tools.ietf.org/html/rfc3986#section-5.3>
*)
Expand Down Expand Up @@ -659,8 +639,13 @@ let to_string ?(pct_encoder=pct_encoder ()) uri =
);
(match uri.host with
|None -> ()
|Some host ->
|Some (`Host host) ->
add_pct_string ~component:pct_encoder.host host;
|Some (`Ipv4_literal host) -> Buffer.add_string buf host
|Some (`Ipv6_literal host) ->
Buffer.add_char buf '[';
Buffer.add_string buf host;
Buffer.add_char buf ']'
);
(match uri.port with
|None -> ()
Expand Down Expand Up @@ -707,11 +692,11 @@ let with_scheme uri =
|Some scheme -> { uri with scheme=Some (Pct.cast_decoded scheme) }
|None -> { uri with scheme=None }

let host uri = get_decoded_opt uri.host
let with_host uri =
function
|Some host -> { uri with host=Some (Pct.cast_decoded host) }
|None -> { uri with host=None }
let host uri =
match uri.host with
| None -> None
| Some (`Ipv4_literal h | `Ipv6_literal h) -> Some h
| Some (`Host h) -> Some (Pct.uncast_decoded h)

let host_with_default ?(default="localhost") uri =
match host uri with
Expand All @@ -729,7 +714,7 @@ let with_userinfo uri userinfo =
| None -> None
in
match host uri with
| None -> { uri with host=Some (Pct.cast_decoded ""); userinfo=userinfo }
| None -> { uri with host=Some (`Host (Pct.cast_decoded "")); userinfo=userinfo }
| Some _ -> { uri with userinfo=userinfo }

let user uri = match uri.userinfo with
Expand All @@ -741,7 +726,7 @@ let password uri = match uri.userinfo with
| Some (_, Some pass) -> Some pass
let with_password uri password =
let result userinfo = match host uri with
| None -> { uri with host=Some (Pct.cast_decoded ""); userinfo=userinfo }
| None -> { uri with host=Some (`Host (Pct.cast_decoded "")); userinfo=userinfo }
| Some _ -> { uri with userinfo=userinfo }
in
match uri.userinfo, password with
Expand All @@ -756,7 +741,7 @@ let with_port uri port =
| None -> begin
match port with
| None -> { uri with host=None; port=None }
| Some _ -> { uri with host=Some (Pct.cast_decoded ""); port=port }
| Some _ -> { uri with host=Some (`Host (Pct.cast_decoded "")); port=port }
end

(* Return the path component *)
Expand Down Expand Up @@ -800,30 +785,6 @@ let remove_query_param uri k = Query.(
{ uri with query=KV (List.filter (fun (k',_) -> k<>k') (kv uri.query)) }
)

let with_uri ?scheme ?userinfo ?host ?port ?path ?query ?fragment uri =
let with_path_opt u o =
match o with
| None -> with_path u ""
| Some p -> with_path u p
in
let with_query_opt u o =
match o with
| None -> with_query u []
| Some q -> with_query u q
in
let with_ f o u =
match o with
| None -> u
| Some x -> f u x
in
with_ with_scheme scheme uri
|> with_ with_userinfo userinfo
|> with_ with_host host
|> with_ with_port port
|> with_ with_path_opt path
|> with_ with_query_opt query
|> with_ with_fragment fragment

(* Construct encoded path and query components *)
let path_and_query uri =
match (path uri), (query uri) with
Expand Down Expand Up @@ -986,12 +947,7 @@ module Parser = struct
lift format_addr (c_colon *> c_colon *> double_colon 0 <|> part 0)

let ipv6_address =
lift3
(fun lb ip rb ->
String.concat "" [ string_of_char lb; ip; string_of_char rb ])
(char '[')
ipv6
(char ']')
(char '[') *> ipv6 <* (char ']')

let pct_encoded =
lift2
Expand Down Expand Up @@ -1025,10 +981,12 @@ module Parser = struct
]))

let host =
lift
(fun s -> Pct.decode (Pct.cast_encoded s))
(choice
[ reg_name; ipv4_address; ipv6_address (* | ipv4_literal TODO *) ])
choice
[ ipv4_address >>| (fun h -> `Ipv4_literal h)
; ipv6_address >>| (fun h -> `Ipv6_literal h)
; reg_name >>| (fun s -> `Host (Pct.decode (Pct.cast_encoded s)))
(* ipv4_literal TODO *)
]

let userinfo =
lift
Expand Down Expand Up @@ -1057,7 +1015,8 @@ module Parser = struct
let authority =
string "//"
*> lift3
(fun userinfo host port -> userinfo, Some host, port)
(fun userinfo host port ->
userinfo, Some host, port)
userinfo
host
port
Expand Down Expand Up @@ -1102,6 +1061,75 @@ module Parser = struct
empty
end

let decode_host host =
match host with
| Some host ->
begin match Angstrom.parse_string ~consume:All Parser.host host with
| Ok parsed -> Some parsed
| Error _ ->
begin match Angstrom.parse_string ~consume:All Parser.ipv6 host with
| Ok parsed -> Some (`Ipv6_literal parsed)
| Error _ -> Some (`Host (Pct.cast_decoded host))
end
end
| None -> None

(* Make a URI record. This is a bit more inefficient than it needs to be due to the
* casting/uncasting (which isn't fully identity due to the option box), but it is
* no big deal for now.
*)
let make ?scheme ?userinfo ?host ?port ?path ?query ?fragment () =
let decode = function
|Some x -> Some (Pct.cast_decoded x) |None -> None in
let host = match userinfo, host, port with
| _, Some _, _ | None, None, None -> host
| Some _, None, _ | _, None, Some _ -> Some ""
in
let userinfo = match userinfo with
| None -> None | Some u -> Some (userinfo_of_encoded u) in
let path = match path with
|None -> [] | Some p ->
let path = path_of_encoded p in
match host, path with
| None, _ | Some _, "/"::_ | Some _, [] -> path
| Some _, _ -> "/"::path
in
let query = match query with
| None -> Query.KV []
| Some p -> Query.KV p
in
let scheme = decode scheme in
normalize scheme
{ scheme; userinfo;
host=decode_host host; port; path; query; fragment=decode fragment }

let with_host uri host =
{ uri with host = decode_host host }

let with_uri ?scheme ?userinfo ?host ?port ?path ?query ?fragment uri =
let with_path_opt u o =
match o with
| None -> with_path u ""
| Some p -> with_path u p
in
let with_query_opt u o =
match o with
| None -> with_query u []
| Some q -> with_query u q
in
let with_ f o u =
match o with
| None -> u
| Some x -> f u x
in
with_ with_scheme scheme uri
|> with_ with_userinfo userinfo
|> with_ with_host host
|> with_ with_port port
|> with_ with_path_opt path
|> with_ with_query_opt query
|> with_ with_fragment fragment

let of_string s =
(* To preserve the old regex parser's behavior, we only parse a prefix, and
* stop whenever we can't parse more. *)
Expand Down
10 changes: 7 additions & 3 deletions lib_test/test_runner.ml
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ let uri_encodes = [
"/wh/at/ev/er", (Uri.make ~path:"/wh/at/ev/er" ());
"/wh/at!/ev%20/er", (Uri.make ~path:"/wh/at!/ev /er" ());
(* IPv6 literal *)
"http://%5Bdead%3Abeef%3A%3Adead%3A0%3Abeaf%5D",
"http://[dead:beef::dead:0:beaf]",
(Uri.make ~scheme:"http" ~host:"[dead:beef::dead:0:beaf]" ());
"http://user:pass@%5B2001%3A41d1%3Afe67%3A500%3A227%3Ac6ff%3Afe5a%3Aefa0%5D:6789/wh/at/ever?foo=1&bar=5#5",
"http://user:pass@[2001:41d1:fe67:500:227:c6ff:fe5a:efa0]:6789/wh/at/ever?foo=1&bar=5#5",
(Uri.make ~scheme:"http" ~userinfo:"user:pass" ~host:"[2001:41d1:fe67:500:227:c6ff:fe5a:efa0]"
~port:6789 ~path:"/wh/at/ever" ~query:["foo",["1"];"bar",["5"]] ~fragment:"5" ());
(* IPv6 literal with zone id *)
Expand Down Expand Up @@ -338,6 +338,7 @@ let generic_uri_norm = [
"//colon%[email protected]/",
"//colon%[email protected]/";
"foo+bar%3a", "./foo+bar:";
"http://[2001:DB8:1234:5678:90ab:cdef::0123]/%68%65%6c%6c%6f", "http://[2001:db8:1234:5678:90ab:cdef::0123]/hello";
(let p_q = "/foo%20bar/" in
p_q, Uri.(path_and_query (of_string p_q)));
]
Expand Down Expand Up @@ -646,7 +647,10 @@ let with_uri =
Uri.with_uri ~query:None base, "scheme://user:pass@host:0/path#fragment";
Uri.with_uri ~query:(Some ["new", ["a"]]) base, "scheme://user:pass@host:0/path?new=a#fragment";
Uri.with_uri ~fragment:None base, "scheme://user:pass@host:0/path?query=arg";
Uri.with_uri ~fragment:(Some "new") base, "scheme://user:pass@host:0/path?query=arg#new"]
Uri.with_uri ~fragment:(Some "new") base, "scheme://user:pass@host:0/path?query=arg#new";
Uri.with_uri ~host:(Some "2001:DB8:1234:5678:90ab:cdef::0123") base,
"scheme://user:pass@[2001:DB8:1234:5678:90ab:cdef::0123]:0/path?query=arg#fragment"
]

let test_with_uri =
List.map (fun (input, output) ->
Expand Down

0 comments on commit cd56175

Please sign in to comment.