summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Estabrook <jestabro@vyos.io>2022-12-29 08:36:35 -0600
committerGitHub <noreply@github.com>2022-12-29 08:36:35 -0600
commit13e2fe73a5a02db4e40e7c4e56a7564076767aa2 (patch)
tree3df1ac8665f02b069b1184af8091042f1502b491
parent2da99f7795e08378b4370b3403f556fce864f83d (diff)
parentb8569f0c3e155ee5886b907957d24c268c4d016f (diff)
downloadvyos-utils-13e2fe73a5a02db4e40e7c4e56a7564076767aa2.tar.gz
vyos-utils-13e2fe73a5a02db4e40e7c4e56a7564076767aa2.zip
Merge pull request #7 from dmbaturin/url-validator
T4761: add a URL value validator
-rw-r--r--[-rwxr-xr-x]debian/rules1
-rw-r--r--src/url.ml149
2 files changed, 150 insertions, 0 deletions
diff --git a/debian/rules b/debian/rules
index 09cdd65..73f3805 100755..100644
--- a/debian/rules
+++ b/debian/rules
@@ -10,6 +10,7 @@ override_dh_auto_build:
mkdir -p _build
ocamlfind ocamlopt -o _build/numeric -package pcre -linkpkg src/numeric.ml
ocamlfind ocamlopt -o _build/file-path -package fileutils -linkpkg src/file_path.ml
+ ocamlfind ocamlopt -o _build/url -package pcre -linkpkg src/url.ml
ocamlfind ocamlopt -o _build/validate-value -package pcre,unix,containers -linkpkg src/validate_value.ml
override_dh_auto_install:
diff --git a/src/url.ml b/src/url.ml
new file mode 100644
index 0000000..5d36dae
--- /dev/null
+++ b/src/url.ml
@@ -0,0 +1,149 @@
+(* Extract and validate the scheme part.
+ As per the RFC:
+
+ Scheme names consist of a sequence of characters. The lower case
+ letters "a"--"z", digits, and the characters plus ("+"), period
+ ("."), and hyphen ("-") are allowed. For resiliency, programs
+ interpreting URLs should treat upper case letters as equivalent to
+ lower case in scheme names (e.g., allow "HTTP" as well as "http").
+ *)
+let split_scheme url =
+ let aux url =
+ let res = Pcre.exec ~pat:{|^([a-zA-Z0-9\.\-]+):(.*)$|} url in
+ let scheme = Pcre.get_substring res 1 in
+ let uri = Pcre.get_substring res 2 in
+ (String.lowercase_ascii scheme, uri)
+ in
+ try Ok (aux url)
+ with Not_found -> Error (Printf.sprintf {|"%s" is not a valid URL|} url)
+
+let is_scheme_allowed allowed_schemes scheme =
+ match List.find_opt ((=) scheme) allowed_schemes with
+ | Some _ -> Ok ()
+ | None -> Error (Printf.sprintf {|URL scheme "%s:" is not allowed|} scheme)
+
+let regex_matches regex s =
+ try
+ let _ = Pcre.exec ~rex:regex s in
+ true
+ with Not_found -> false
+
+let host_path_format =
+ Pcre.regexp
+ {|^//(?:[a-zA-Z0-9\-\._~]+(?::[a-zA-Z0-9\-\._~]*)?@)?([a-zA-Z0-9\-\._~]+|\[[a-zA-Z0-9:\.]+\])(?::([0-9]+))?(/.*)?$|}
+
+let host_name_format = Pcre.regexp {|^[a-zA-Z0-9]+([\-\._~]{1}[a-zA-Z0-9]+)*$|}
+let ipv4_addr_format = Pcre.regexp {|^(([1-9]\d{0,2}|0)\.){3}([1-9]\d{0,2}|0)$|}
+let ipv6_addr_format = Pcre.regexp {|^\[([a-z0-9:\.]+|[A-Z0-9:\.]+)\]$|}
+
+let is_port s =
+ try
+ let n = int_of_string s in
+ if n > 0 && n < 65536 then true
+ else false
+ with Failure _ -> false
+
+let is_ipv4_octet s =
+ try
+ let n = int_of_string s in
+ if n >= 0 && n < 256 then true
+ else false
+ with Failure _ -> false
+
+let is_ipv6_segment s =
+ try
+ let n = int_of_string ("0x" ^ s) in
+ if n >= 0 && n < 65536 then true
+ else false
+ with Failure _ -> false
+
+let is_ipv4_addr s =
+ let res = Pcre.exec ~rex:ipv4_addr_format s in
+ let ipv4_addr_str = Pcre.get_substring res 0 in
+ let ipv4_addr_l = String.split_on_char '.' ipv4_addr_str in
+ List.for_all is_ipv4_octet ipv4_addr_l
+
+let is_ipv6_pure_addr s =
+ let ipv6_addr_l = String.split_on_char ':' s in
+ if List.length ipv6_addr_l > 8 || List.length ipv6_addr_l < 3 then false
+ else
+ let seg_str_l = List.filter (fun s -> String.length s > 0) ipv6_addr_l in
+ List.for_all is_ipv6_segment seg_str_l
+
+let is_ipv6_dual_addr s =
+ let ipv6_addr_l = List.rev (String.split_on_char ':' s) in
+ match ipv6_addr_l with
+ | [] -> false
+ | h::t ->
+ if not (is_ipv4_addr h) then false
+ else
+ if List.length t > 6 || List.length t < 2 then false
+ else
+ let seg_str_l = List.filter (fun s -> String.length s > 0) t in
+ List.for_all is_ipv6_segment seg_str_l
+
+let is_ipv6_addr s =
+ let res = Pcre.exec ~rex:ipv6_addr_format s in
+ let ipv6_addr_str = Pcre.get_substring res 1 in
+ try
+ let typo = Pcre.exec ~pat:{|:::|} ipv6_addr_str in
+ match typo with
+ | _ -> false
+ with Not_found ->
+ is_ipv6_pure_addr ipv6_addr_str || is_ipv6_dual_addr ipv6_addr_str
+
+let host_path_matches s =
+ try
+ let res = Pcre.exec ~rex:host_path_format s in
+ let substr = Pcre.get_substrings ~full_match:false res in
+ let port_str = Array.get substr 1 in
+ if String.length port_str > 0 && not (is_port port_str) then false
+ else
+ let host = Array.get substr 0 in
+ match host with
+ | host when regex_matches ipv6_addr_format host -> is_ipv6_addr host
+ | host when regex_matches ipv4_addr_format host -> is_ipv4_addr host
+ | host when regex_matches host_name_format host -> true
+ | _ -> false
+ with Not_found -> false;
+
+let validate_uri scheme uri =
+ if host_path_matches uri then Ok ()
+ else Error (Printf.sprintf {|"%s" is not a valid URI for the %s URL scheme|} uri scheme)
+
+let validate_url allowed_schemes url =
+ let (let*) = Result.bind in
+ let* scheme, uri = split_scheme url in
+ let* () = is_scheme_allowed allowed_schemes scheme in
+ let* () = validate_uri scheme uri in
+ Ok ()
+
+let file_transport_schemes = ["http"; "https"; "ftp"; "sftp"; "scp"; "tftp"]
+
+let message_schemes = ["mailto"; "tel"; "sms"]
+
+let allowed_schemes = ref []
+let url = ref ""
+
+let args = [
+ ("--scheme",
+ Arg.String (fun s -> allowed_schemes := s :: !allowed_schemes),
+ "Allow only specified schemes");
+ ("--file-transport",
+ Arg.Unit (fun () -> allowed_schemes := (List.append !allowed_schemes file_transport_schemes)),
+ "Allow only file transport protocols (HTTP/S, FTP, SCP/SFTP, TFTP)");
+ ("--", Arg.Rest (fun s -> url := s), "Interpret next item as an argument");
+]
+
+let usage = Printf.sprintf "Usage: %s [OPTIONS] <URL>" Sys.argv.(0)
+
+let () =
+ let () = Arg.parse args (fun s -> url := s) usage in
+ (* Force all allowed scheme named to lowercase for ease of comparison. *)
+ let allowed_schemes = List.map String.lowercase_ascii !allowed_schemes in
+ let res = validate_url allowed_schemes !url in
+ match res with
+ | Ok () -> ()
+ | Error msg ->
+ let () = Printf.fprintf stdout "%s" msg in
+ exit 1