summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/url.ml88
1 files changed, 82 insertions, 6 deletions
diff --git a/src/url.ml b/src/url.ml
index 1985ff0..5d36dae 100644
--- a/src/url.ml
+++ b/src/url.ml
@@ -24,16 +24,92 @@ let is_scheme_allowed allowed_schemes scheme =
let regex_matches regex s =
try
- let _ = Pcre.exec ~pat:regex s in
+ let _ = Pcre.exec ~rex:regex s in
true
with Not_found -> false
+let host_path_format =
+ Pcre.regexp
+ {|^//(?:[a-zA-Z0-9\-\._~]+(?::[a-zA-Z0-9\-\._~]*)?@)?([a-zA-Z0-9\-\._~]+|\[[a-zA-Z0-9:\.]+\])(?::([0-9]+))?(/.*)?$|}
+
+let host_name_format = Pcre.regexp {|^[a-zA-Z0-9]+([\-\._~]{1}[a-zA-Z0-9]+)*$|}
+let ipv4_addr_format = Pcre.regexp {|^(([1-9]\d{0,2}|0)\.){3}([1-9]\d{0,2}|0)$|}
+let ipv6_addr_format = Pcre.regexp {|^\[([a-z0-9:\.]+|[A-Z0-9:\.]+)\]$|}
+
+let is_port s =
+ try
+ let n = int_of_string s in
+ if n > 0 && n < 65536 then true
+ else false
+ with Failure _ -> false
+
+let is_ipv4_octet s =
+ try
+ let n = int_of_string s in
+ if n >= 0 && n < 256 then true
+ else false
+ with Failure _ -> false
+
+let is_ipv6_segment s =
+ try
+ let n = int_of_string ("0x" ^ s) in
+ if n >= 0 && n < 65536 then true
+ else false
+ with Failure _ -> false
+
+let is_ipv4_addr s =
+ let res = Pcre.exec ~rex:ipv4_addr_format s in
+ let ipv4_addr_str = Pcre.get_substring res 0 in
+ let ipv4_addr_l = String.split_on_char '.' ipv4_addr_str in
+ List.for_all is_ipv4_octet ipv4_addr_l
+
+let is_ipv6_pure_addr s =
+ let ipv6_addr_l = String.split_on_char ':' s in
+ if List.length ipv6_addr_l > 8 || List.length ipv6_addr_l < 3 then false
+ else
+ let seg_str_l = List.filter (fun s -> String.length s > 0) ipv6_addr_l in
+ List.for_all is_ipv6_segment seg_str_l
+
+let is_ipv6_dual_addr s =
+ let ipv6_addr_l = List.rev (String.split_on_char ':' s) in
+ match ipv6_addr_l with
+ | [] -> false
+ | h::t ->
+ if not (is_ipv4_addr h) then false
+ else
+ if List.length t > 6 || List.length t < 2 then false
+ else
+ let seg_str_l = List.filter (fun s -> String.length s > 0) t in
+ List.for_all is_ipv6_segment seg_str_l
+
+let is_ipv6_addr s =
+ let res = Pcre.exec ~rex:ipv6_addr_format s in
+ let ipv6_addr_str = Pcre.get_substring res 1 in
+ try
+ let typo = Pcre.exec ~pat:{|:::|} ipv6_addr_str in
+ match typo with
+ | _ -> false
+ with Not_found ->
+ is_ipv6_pure_addr ipv6_addr_str || is_ipv6_dual_addr ipv6_addr_str
+
+let host_path_matches s =
+ try
+ let res = Pcre.exec ~rex:host_path_format s in
+ let substr = Pcre.get_substrings ~full_match:false res in
+ let port_str = Array.get substr 1 in
+ if String.length port_str > 0 && not (is_port port_str) then false
+ else
+ let host = Array.get substr 0 in
+ match host with
+ | host when regex_matches ipv6_addr_format host -> is_ipv6_addr host
+ | host when regex_matches ipv4_addr_format host -> is_ipv4_addr host
+ | host when regex_matches host_name_format host -> true
+ | _ -> false
+ with Not_found -> false;
+
let validate_uri scheme uri =
- match scheme with
- | "http" | "https" ->
- if regex_matches {|//[a-z0-9]+([\-\.]{1}[a-z0-9]+)*(:[0-9]+)*(/.* )?|} uri then Ok ()
- else Error (Printf.sprintf {|"%s" is not a valid URI for the %s URL scheme|} uri scheme)
- | _ -> Ok ()
+ if host_path_matches uri then Ok ()
+ else Error (Printf.sprintf {|"%s" is not a valid URI for the %s URL scheme|} uri scheme)
let validate_url allowed_schemes url =
let (let*) = Result.bind in