From f22292be24334ac8767aa288221091955f93ebd6 Mon Sep 17 00:00:00 2001 From: Daniil Baturin Date: Wed, 22 May 2024 13:34:05 +0100 Subject: T6380: Move validators to a dedicated subdirectory --- src/file_path.ml | 55 ------------ src/numeric.ml | 200 -------------------------------------------- src/url.ml | 149 --------------------------------- src/validators/file_path.ml | 55 ++++++++++++ src/validators/numeric.ml | 200 ++++++++++++++++++++++++++++++++++++++++++++ src/validators/url.ml | 149 +++++++++++++++++++++++++++++++++ 6 files changed, 404 insertions(+), 404 deletions(-) delete mode 100644 src/file_path.ml delete mode 100644 src/numeric.ml delete mode 100644 src/url.ml create mode 100644 src/validators/file_path.ml create mode 100644 src/validators/numeric.ml create mode 100644 src/validators/url.ml (limited to 'src') diff --git a/src/file_path.ml b/src/file_path.ml deleted file mode 100644 index ea3068c..0000000 --- a/src/file_path.ml +++ /dev/null @@ -1,55 +0,0 @@ -type opts = { - must_be_file : bool; - parent : string option; - lookup_path : string option; - strict : bool; -} - -let default_opts = { - must_be_file = true; - parent = None; - lookup_path = None; - strict = false -} - -let opts = ref default_opts - -let path_arg = ref "" - -let args = [ - ("--file", Arg.Unit (fun () -> opts := {!opts with must_be_file=true}), "Path must point to a file and not a directory (default)"); - ("--directory", Arg.Unit (fun () -> opts := {!opts with must_be_file=false}), "Path must point to a directory"); - ("--parent-dir", Arg.String (fun s -> opts := {!opts with parent=(Some s)}), "Path must be inside specific parent directory"); - ("--lookup-path", Arg.String (fun s -> opts := {!opts with lookup_path=(Some s)}), "Prefix path argument with lookup path"); - ("--strict", Arg.Unit (fun () -> opts := {!opts with strict=true}), "Treat warnings as errors"); -] -let usage = Printf.sprintf "Usage: %s [OPTIONS] " Sys.argv.(0) - -let () = if Array.length Sys.argv = 1 then (Arg.usage args usage; exit 1) -let () = Arg.parse args (fun s -> path_arg := s) usage - -let fail msg = - let () = print_endline msg in - exit 1 - -let () = - let opts = !opts in - let path = - match opts.lookup_path with - | None -> !path_arg - | Some lookup_path -> FilePath.concat lookup_path !path_arg - in - (* First, check if the file/dir path exists at all. *) - let exists = FileUtil.test FileUtil.Exists path in - if not exists then Printf.ksprintf fail {|Incorrect path %s: no such file or directory|} path else - (* If yes, check if it's of the correct type: file or directory. *) - let is_file = FileUtil.test FileUtil.Is_file path in - if ((not is_file) && opts.must_be_file) then Printf.ksprintf fail {|%s is a directory, not a file|} path else - if (is_file && (not opts.must_be_file)) then Printf.ksprintf fail {|%s is a file, not a directory|} path else - match opts.parent with - | None -> - exit 0 - | Some parent -> - if not (FilePath.is_subdir (FilePath.reduce path) (FilePath.reduce parent)) then - let msg = Printf.sprintf {|Path %s is not under %s directory|} path parent in - if opts.strict then fail msg else Printf.printf "Warning: %s\n" msg diff --git a/src/numeric.ml b/src/numeric.ml deleted file mode 100644 index 0c75f83..0000000 --- a/src/numeric.ml +++ /dev/null @@ -1,200 +0,0 @@ -type numeric_str = Number_string of string | Range_string of string -type numeric_val = Number_float of float | Range_float of float * float - -type options = { - positive: bool; - nonnegative: bool; - allow_float: bool; - ranges: string list; - not_ranges: string list; - not_values: string list; - relative: bool; - allow_range: bool; - require_range: bool; -} - -let default_opts = { - positive = false; - nonnegative = false; - allow_float = false; - ranges = []; - not_ranges = []; - not_values = []; - relative = false; - allow_range = false; - require_range = false; -} - -let opts = ref default_opts - -let number_arg = ref "" - -let args = [ - ("--non-negative", Arg.Unit (fun () -> opts := {!opts with nonnegative=true}), "Check if the number is non-negative (>= 0)"); - ("--positive", Arg.Unit (fun () -> opts := {!opts with positive=true}), "Check if the number is positive (> 0)"); - ("--range", Arg.String (fun s -> let optsv = !opts in opts := {optsv with ranges=(s :: optsv.ranges)}), "Check if the number or range is within a range (inclusive)"); - ("--not-range", Arg.String (fun s -> let optsv = !opts in opts := {optsv with not_ranges=(s :: optsv.not_ranges)}), "Check if the number or range is not within a range (inclusive)"); - ("--not-value", Arg.String (fun s -> let optsv = !opts in opts := {optsv with not_values=(s :: optsv.not_values)}), "Check if the number does not equal a specific value"); - ("--float", Arg.Unit (fun () -> opts := {!opts with allow_float=true}), "Allow floating-point numbers"); - ("--relative", Arg.Unit (fun () -> opts := {!opts with relative=true}), "Allow relative increment/decrement (+/-N)"); - ("--allow-range", Arg.Unit (fun () -> opts := {!opts with allow_range=true}), "Allow the argument to be a range rather than a single number"); - ("--require-range", Arg.Unit (fun () -> opts := {!opts with require_range=true; allow_range=true}), "Require the argument to be a range rather than a single number"); - ("--", Arg.Rest (fun s -> number_arg := s), "Interpret next item as an argument"); -] -let usage = Printf.sprintf "Usage: %s [OPTIONS] |" Sys.argv.(0) - -let () = if Array.length Sys.argv = 1 then (Arg.usage args usage; exit 1) -let () = Arg.parse args (fun s -> number_arg := s) usage - -let check_nonnegative opts m = - if opts.nonnegative then - match m with - | Number_float n -> - if (n < 0.0) then - failwith "Number should be non-negative." - | Range_float _ -> - failwith "option '--non-negative' does not apply to a range value" - -let check_positive opts m = - if opts.positive then - match m with - | Number_float n -> - if (n <= 0.0) then - failwith "Number should be positive" - | Range_float _ -> - failwith "option '--positive does' not apply to a range value" - -let looks_like_number value = - try let _ = Pcre.exec ~pat:"^(\\-?)[0-9]+(\\.[0-9]+)?$" value in true - with Not_found -> false - -let is_relative value = - try let _ = Pcre.exec ~pat:"^[+-][0-9]+$" value in true - with Not_found -> false - -let number_string_drop_modifier value = - String.sub value 1 (String.length value - 1) - -let get_relative opts t = - if opts.relative then - match t with - | Number_string s -> - if not (is_relative s) then - failwith "Value is not a relative increment/decrement" - else Number_string (number_string_drop_modifier s) - | Range_string _ -> - failwith "increment/decrement does not apply to a range value" - else t - -let number_of_string opts s = - if not (looks_like_number s) then Printf.ksprintf failwith "'%s' is not a valid number" s else - let n = float_of_string_opt s in - match n with - | Some n -> - (* If floats are explicitly allowed, just return the number. *) - if opts.allow_float then n - (* If floats are not explicitly allowed, check if the argument has a decimal separator in it. - If the argument string contains a dot but float_of_string didn't dislike it, - it's a valid number but not an integer. - *) - else if not (String.contains s '.') then n - (* If float_of_string returned None, the argument string is just garbage rather than a number. *) - else Printf.ksprintf failwith "'%s' is not a valid integer number" s - | None -> - Printf.ksprintf failwith "'%s' is not a valid number" s - -let range_of_string opts s = - let rs = String.split_on_char '-' s |> List.map String.trim |> List.map (number_of_string opts) in - match rs with - | [l; r] -> (l, r) - | exception (Failure msg) -> - (* Some of the numbers in the range are bad. *) - Printf.ksprintf failwith "'%s' is not a valid number range: %s" s msg - | _ -> - (* The range itself if malformed, like 1-10-20. *) - Printf.ksprintf failwith "'%s' is not a valid number range" s - -let value_in_ranges ranges n = - let in_range (l, r) n = (n >= l) && (n <= r) in - List.fold_left (fun acc r -> acc || (in_range r n)) false ranges - -let value_not_in_ranges ranges n = - let in_range (l, r) n = (n >= l) && (n <= r) in - List.fold_left (fun acc r -> acc && (not (in_range r n))) true ranges - -let check_ranges opts m = - if opts.ranges <> [] then - let ranges = List.map (range_of_string opts) opts.ranges in - match m with - | Number_float n -> - if not (value_in_ranges ranges n) then - Printf.ksprintf failwith "Number is not in any of allowed ranges" - | Range_float (i, j) -> - if (not (value_in_ranges ranges i) || - not (value_in_ranges ranges j)) then - Printf.ksprintf failwith "Range is not in any of allowed ranges" - - -let check_not_ranges opts m = - if opts.not_ranges <> [] then - let ranges = List.map (range_of_string opts) opts.not_ranges in - match m with - | Number_float n -> - if not (value_not_in_ranges ranges n) then - Printf.ksprintf failwith "Number is in one of excluded ranges" - | Range_float (i, j) -> - if (not (value_not_in_ranges ranges i) || - not (value_not_in_ranges ranges j)) then - Printf.ksprintf failwith "Range is in one of excluded ranges" - -let check_not_values opts m = - let excluded_values = List.map (number_of_string opts) opts.not_values in - if excluded_values = [] then () else - match m with - | Range_float _ -> Printf.ksprintf failwith "--not-value cannot be used with ranges" - | Number_float num -> - begin - let res = List.find_opt ((=) num) excluded_values in - match res with - | None -> () - | Some _ -> Printf.ksprintf failwith "Value is excluded by --not-value" - end - -let check_argument_type opts m = - match m with - | Number_float _ -> - if opts.require_range then Printf.ksprintf failwith "Value must be a range, not a number" - else () - | Range_float _ -> - if opts.allow_range then () - else Printf.ksprintf failwith "Value must be a number, not a range" - -let is_range_val s = - try let _ = Pcre.exec ~pat:"^[0-9]+-[0-9]+$" s in true - with Not_found -> false - -let var_numeric_str s = - match is_range_val s with - | true -> Range_string s - | false -> Number_string s - -let () = try - let s = var_numeric_str !number_arg in - let opts = !opts in - let s = get_relative opts s in - let n = - match s with - | Number_string r -> Number_float (number_of_string opts r) - | Range_string r -> let i, j = range_of_string opts r in - Range_float (i, j) - in - check_argument_type opts n; - check_nonnegative opts n; - check_positive opts n; - check_not_values opts n; - check_ranges opts n; - check_not_ranges opts n -with (Failure err) -> - print_endline err; - exit 1 - diff --git a/src/url.ml b/src/url.ml deleted file mode 100644 index 3d77544..0000000 --- a/src/url.ml +++ /dev/null @@ -1,149 +0,0 @@ -(* Extract and validate the scheme part. - As per the RFC: - - Scheme names consist of a sequence of characters. The lower case - letters "a"--"z", digits, and the characters plus ("+"), period - ("."), and hyphen ("-") are allowed. For resiliency, programs - interpreting URLs should treat upper case letters as equivalent to - lower case in scheme names (e.g., allow "HTTP" as well as "http"). - *) -let split_scheme url = - let aux url = - let res = Pcre.exec ~pat:{|^([a-zA-Z0-9\.\-]+):(.*)$|} url in - let scheme = Pcre.get_substring res 1 in - let uri = Pcre.get_substring res 2 in - (String.lowercase_ascii scheme, uri) - in - try Ok (aux url) - with Not_found -> Error (Printf.sprintf {|"%s" is not a valid URL|} url) - -let is_scheme_allowed allowed_schemes scheme = - match List.find_opt ((=) scheme) allowed_schemes with - | Some _ -> Ok () - | None -> Error (Printf.sprintf {|URL scheme "%s:" is not allowed|} scheme) - -let regex_matches regex s = - try - let _ = Pcre.exec ~rex:regex s in - true - with Not_found -> false - -let host_path_format = - Pcre.regexp - {|^//(?:[^/?#]+(?::[^/?#]*)?@)?([a-zA-Z0-9\-\._~]+|\[[a-zA-Z0-9:\.]+\])(?::([0-9]+))?(/.*)?$|} - -let host_name_format = Pcre.regexp {|^[a-zA-Z0-9]+([\-\._~]{1}[a-zA-Z0-9]+)*$|} -let ipv4_addr_format = Pcre.regexp {|^(([1-9]\d{0,2}|0)\.){3}([1-9]\d{0,2}|0)$|} -let ipv6_addr_format = Pcre.regexp {|^\[([a-z0-9:\.]+|[A-Z0-9:\.]+)\]$|} - -let is_port s = - try - let n = int_of_string s in - if n > 0 && n < 65536 then true - else false - with Failure _ -> false - -let is_ipv4_octet s = - try - let n = int_of_string s in - if n >= 0 && n < 256 then true - else false - with Failure _ -> false - -let is_ipv6_segment s = - try - let n = int_of_string ("0x" ^ s) in - if n >= 0 && n < 65536 then true - else false - with Failure _ -> false - -let is_ipv4_addr s = - let res = Pcre.exec ~rex:ipv4_addr_format s in - let ipv4_addr_str = Pcre.get_substring res 0 in - let ipv4_addr_l = String.split_on_char '.' ipv4_addr_str in - List.for_all is_ipv4_octet ipv4_addr_l - -let is_ipv6_pure_addr s = - let ipv6_addr_l = String.split_on_char ':' s in - if List.length ipv6_addr_l > 8 || List.length ipv6_addr_l < 3 then false - else - let seg_str_l = List.filter (fun s -> String.length s > 0) ipv6_addr_l in - List.for_all is_ipv6_segment seg_str_l - -let is_ipv6_dual_addr s = - let ipv6_addr_l = List.rev (String.split_on_char ':' s) in - match ipv6_addr_l with - | [] -> false - | h::t -> - if not (is_ipv4_addr h) then false - else - if List.length t > 6 || List.length t < 2 then false - else - let seg_str_l = List.filter (fun s -> String.length s > 0) t in - List.for_all is_ipv6_segment seg_str_l - -let is_ipv6_addr s = - let res = Pcre.exec ~rex:ipv6_addr_format s in - let ipv6_addr_str = Pcre.get_substring res 1 in - try - let typo = Pcre.exec ~pat:{|:::|} ipv6_addr_str in - match typo with - | _ -> false - with Not_found -> - is_ipv6_pure_addr ipv6_addr_str || is_ipv6_dual_addr ipv6_addr_str - -let host_path_matches s = - try - let res = Pcre.exec ~rex:host_path_format s in - let substr = Pcre.get_substrings ~full_match:false res in - let port_str = Array.get substr 1 in - if String.length port_str > 0 && not (is_port port_str) then false - else - let host = Array.get substr 0 in - match host with - | host when regex_matches ipv6_addr_format host -> is_ipv6_addr host - | host when regex_matches ipv4_addr_format host -> is_ipv4_addr host - | host when regex_matches host_name_format host -> true - | _ -> false - with Not_found -> false - -let validate_uri scheme uri = - if host_path_matches uri then Ok () - else Error (Printf.sprintf {|"%s" is not a valid URI for the %s URL scheme|} uri scheme) - -let validate_url allowed_schemes url = - let (let*) = Result.bind in - let* scheme, uri = split_scheme url in - let* () = is_scheme_allowed allowed_schemes scheme in - let* () = validate_uri scheme uri in - Ok () - -let file_transport_schemes = ["http"; "https"; "ftp"; "sftp"; "scp"; "tftp"] - -let message_schemes = ["mailto"; "tel"; "sms"] - -let allowed_schemes = ref [] -let url = ref "" - -let args = [ - ("--scheme", - Arg.String (fun s -> allowed_schemes := s :: !allowed_schemes), - "Allow only specified schemes"); - ("--file-transport", - Arg.Unit (fun () -> allowed_schemes := (List.append !allowed_schemes file_transport_schemes)), - "Allow only file transport protocols (HTTP/S, FTP, SCP/SFTP, TFTP)"); - ("--", Arg.Rest (fun s -> url := s), "Interpret next item as an argument"); -] - -let usage = Printf.sprintf "Usage: %s [OPTIONS] " Sys.argv.(0) - -let () = - let () = Arg.parse args (fun s -> url := s) usage in - (* Force all allowed scheme named to lowercase for ease of comparison. *) - let allowed_schemes = List.map String.lowercase_ascii !allowed_schemes in - let res = validate_url allowed_schemes !url in - match res with - | Ok () -> () - | Error msg -> - let () = Printf.fprintf stdout "%s" msg in - exit 1 diff --git a/src/validators/file_path.ml b/src/validators/file_path.ml new file mode 100644 index 0000000..ea3068c --- /dev/null +++ b/src/validators/file_path.ml @@ -0,0 +1,55 @@ +type opts = { + must_be_file : bool; + parent : string option; + lookup_path : string option; + strict : bool; +} + +let default_opts = { + must_be_file = true; + parent = None; + lookup_path = None; + strict = false +} + +let opts = ref default_opts + +let path_arg = ref "" + +let args = [ + ("--file", Arg.Unit (fun () -> opts := {!opts with must_be_file=true}), "Path must point to a file and not a directory (default)"); + ("--directory", Arg.Unit (fun () -> opts := {!opts with must_be_file=false}), "Path must point to a directory"); + ("--parent-dir", Arg.String (fun s -> opts := {!opts with parent=(Some s)}), "Path must be inside specific parent directory"); + ("--lookup-path", Arg.String (fun s -> opts := {!opts with lookup_path=(Some s)}), "Prefix path argument with lookup path"); + ("--strict", Arg.Unit (fun () -> opts := {!opts with strict=true}), "Treat warnings as errors"); +] +let usage = Printf.sprintf "Usage: %s [OPTIONS] " Sys.argv.(0) + +let () = if Array.length Sys.argv = 1 then (Arg.usage args usage; exit 1) +let () = Arg.parse args (fun s -> path_arg := s) usage + +let fail msg = + let () = print_endline msg in + exit 1 + +let () = + let opts = !opts in + let path = + match opts.lookup_path with + | None -> !path_arg + | Some lookup_path -> FilePath.concat lookup_path !path_arg + in + (* First, check if the file/dir path exists at all. *) + let exists = FileUtil.test FileUtil.Exists path in + if not exists then Printf.ksprintf fail {|Incorrect path %s: no such file or directory|} path else + (* If yes, check if it's of the correct type: file or directory. *) + let is_file = FileUtil.test FileUtil.Is_file path in + if ((not is_file) && opts.must_be_file) then Printf.ksprintf fail {|%s is a directory, not a file|} path else + if (is_file && (not opts.must_be_file)) then Printf.ksprintf fail {|%s is a file, not a directory|} path else + match opts.parent with + | None -> + exit 0 + | Some parent -> + if not (FilePath.is_subdir (FilePath.reduce path) (FilePath.reduce parent)) then + let msg = Printf.sprintf {|Path %s is not under %s directory|} path parent in + if opts.strict then fail msg else Printf.printf "Warning: %s\n" msg diff --git a/src/validators/numeric.ml b/src/validators/numeric.ml new file mode 100644 index 0000000..0c75f83 --- /dev/null +++ b/src/validators/numeric.ml @@ -0,0 +1,200 @@ +type numeric_str = Number_string of string | Range_string of string +type numeric_val = Number_float of float | Range_float of float * float + +type options = { + positive: bool; + nonnegative: bool; + allow_float: bool; + ranges: string list; + not_ranges: string list; + not_values: string list; + relative: bool; + allow_range: bool; + require_range: bool; +} + +let default_opts = { + positive = false; + nonnegative = false; + allow_float = false; + ranges = []; + not_ranges = []; + not_values = []; + relative = false; + allow_range = false; + require_range = false; +} + +let opts = ref default_opts + +let number_arg = ref "" + +let args = [ + ("--non-negative", Arg.Unit (fun () -> opts := {!opts with nonnegative=true}), "Check if the number is non-negative (>= 0)"); + ("--positive", Arg.Unit (fun () -> opts := {!opts with positive=true}), "Check if the number is positive (> 0)"); + ("--range", Arg.String (fun s -> let optsv = !opts in opts := {optsv with ranges=(s :: optsv.ranges)}), "Check if the number or range is within a range (inclusive)"); + ("--not-range", Arg.String (fun s -> let optsv = !opts in opts := {optsv with not_ranges=(s :: optsv.not_ranges)}), "Check if the number or range is not within a range (inclusive)"); + ("--not-value", Arg.String (fun s -> let optsv = !opts in opts := {optsv with not_values=(s :: optsv.not_values)}), "Check if the number does not equal a specific value"); + ("--float", Arg.Unit (fun () -> opts := {!opts with allow_float=true}), "Allow floating-point numbers"); + ("--relative", Arg.Unit (fun () -> opts := {!opts with relative=true}), "Allow relative increment/decrement (+/-N)"); + ("--allow-range", Arg.Unit (fun () -> opts := {!opts with allow_range=true}), "Allow the argument to be a range rather than a single number"); + ("--require-range", Arg.Unit (fun () -> opts := {!opts with require_range=true; allow_range=true}), "Require the argument to be a range rather than a single number"); + ("--", Arg.Rest (fun s -> number_arg := s), "Interpret next item as an argument"); +] +let usage = Printf.sprintf "Usage: %s [OPTIONS] |" Sys.argv.(0) + +let () = if Array.length Sys.argv = 1 then (Arg.usage args usage; exit 1) +let () = Arg.parse args (fun s -> number_arg := s) usage + +let check_nonnegative opts m = + if opts.nonnegative then + match m with + | Number_float n -> + if (n < 0.0) then + failwith "Number should be non-negative." + | Range_float _ -> + failwith "option '--non-negative' does not apply to a range value" + +let check_positive opts m = + if opts.positive then + match m with + | Number_float n -> + if (n <= 0.0) then + failwith "Number should be positive" + | Range_float _ -> + failwith "option '--positive does' not apply to a range value" + +let looks_like_number value = + try let _ = Pcre.exec ~pat:"^(\\-?)[0-9]+(\\.[0-9]+)?$" value in true + with Not_found -> false + +let is_relative value = + try let _ = Pcre.exec ~pat:"^[+-][0-9]+$" value in true + with Not_found -> false + +let number_string_drop_modifier value = + String.sub value 1 (String.length value - 1) + +let get_relative opts t = + if opts.relative then + match t with + | Number_string s -> + if not (is_relative s) then + failwith "Value is not a relative increment/decrement" + else Number_string (number_string_drop_modifier s) + | Range_string _ -> + failwith "increment/decrement does not apply to a range value" + else t + +let number_of_string opts s = + if not (looks_like_number s) then Printf.ksprintf failwith "'%s' is not a valid number" s else + let n = float_of_string_opt s in + match n with + | Some n -> + (* If floats are explicitly allowed, just return the number. *) + if opts.allow_float then n + (* If floats are not explicitly allowed, check if the argument has a decimal separator in it. + If the argument string contains a dot but float_of_string didn't dislike it, + it's a valid number but not an integer. + *) + else if not (String.contains s '.') then n + (* If float_of_string returned None, the argument string is just garbage rather than a number. *) + else Printf.ksprintf failwith "'%s' is not a valid integer number" s + | None -> + Printf.ksprintf failwith "'%s' is not a valid number" s + +let range_of_string opts s = + let rs = String.split_on_char '-' s |> List.map String.trim |> List.map (number_of_string opts) in + match rs with + | [l; r] -> (l, r) + | exception (Failure msg) -> + (* Some of the numbers in the range are bad. *) + Printf.ksprintf failwith "'%s' is not a valid number range: %s" s msg + | _ -> + (* The range itself if malformed, like 1-10-20. *) + Printf.ksprintf failwith "'%s' is not a valid number range" s + +let value_in_ranges ranges n = + let in_range (l, r) n = (n >= l) && (n <= r) in + List.fold_left (fun acc r -> acc || (in_range r n)) false ranges + +let value_not_in_ranges ranges n = + let in_range (l, r) n = (n >= l) && (n <= r) in + List.fold_left (fun acc r -> acc && (not (in_range r n))) true ranges + +let check_ranges opts m = + if opts.ranges <> [] then + let ranges = List.map (range_of_string opts) opts.ranges in + match m with + | Number_float n -> + if not (value_in_ranges ranges n) then + Printf.ksprintf failwith "Number is not in any of allowed ranges" + | Range_float (i, j) -> + if (not (value_in_ranges ranges i) || + not (value_in_ranges ranges j)) then + Printf.ksprintf failwith "Range is not in any of allowed ranges" + + +let check_not_ranges opts m = + if opts.not_ranges <> [] then + let ranges = List.map (range_of_string opts) opts.not_ranges in + match m with + | Number_float n -> + if not (value_not_in_ranges ranges n) then + Printf.ksprintf failwith "Number is in one of excluded ranges" + | Range_float (i, j) -> + if (not (value_not_in_ranges ranges i) || + not (value_not_in_ranges ranges j)) then + Printf.ksprintf failwith "Range is in one of excluded ranges" + +let check_not_values opts m = + let excluded_values = List.map (number_of_string opts) opts.not_values in + if excluded_values = [] then () else + match m with + | Range_float _ -> Printf.ksprintf failwith "--not-value cannot be used with ranges" + | Number_float num -> + begin + let res = List.find_opt ((=) num) excluded_values in + match res with + | None -> () + | Some _ -> Printf.ksprintf failwith "Value is excluded by --not-value" + end + +let check_argument_type opts m = + match m with + | Number_float _ -> + if opts.require_range then Printf.ksprintf failwith "Value must be a range, not a number" + else () + | Range_float _ -> + if opts.allow_range then () + else Printf.ksprintf failwith "Value must be a number, not a range" + +let is_range_val s = + try let _ = Pcre.exec ~pat:"^[0-9]+-[0-9]+$" s in true + with Not_found -> false + +let var_numeric_str s = + match is_range_val s with + | true -> Range_string s + | false -> Number_string s + +let () = try + let s = var_numeric_str !number_arg in + let opts = !opts in + let s = get_relative opts s in + let n = + match s with + | Number_string r -> Number_float (number_of_string opts r) + | Range_string r -> let i, j = range_of_string opts r in + Range_float (i, j) + in + check_argument_type opts n; + check_nonnegative opts n; + check_positive opts n; + check_not_values opts n; + check_ranges opts n; + check_not_ranges opts n +with (Failure err) -> + print_endline err; + exit 1 + diff --git a/src/validators/url.ml b/src/validators/url.ml new file mode 100644 index 0000000..3d77544 --- /dev/null +++ b/src/validators/url.ml @@ -0,0 +1,149 @@ +(* Extract and validate the scheme part. + As per the RFC: + + Scheme names consist of a sequence of characters. The lower case + letters "a"--"z", digits, and the characters plus ("+"), period + ("."), and hyphen ("-") are allowed. For resiliency, programs + interpreting URLs should treat upper case letters as equivalent to + lower case in scheme names (e.g., allow "HTTP" as well as "http"). + *) +let split_scheme url = + let aux url = + let res = Pcre.exec ~pat:{|^([a-zA-Z0-9\.\-]+):(.*)$|} url in + let scheme = Pcre.get_substring res 1 in + let uri = Pcre.get_substring res 2 in + (String.lowercase_ascii scheme, uri) + in + try Ok (aux url) + with Not_found -> Error (Printf.sprintf {|"%s" is not a valid URL|} url) + +let is_scheme_allowed allowed_schemes scheme = + match List.find_opt ((=) scheme) allowed_schemes with + | Some _ -> Ok () + | None -> Error (Printf.sprintf {|URL scheme "%s:" is not allowed|} scheme) + +let regex_matches regex s = + try + let _ = Pcre.exec ~rex:regex s in + true + with Not_found -> false + +let host_path_format = + Pcre.regexp + {|^//(?:[^/?#]+(?::[^/?#]*)?@)?([a-zA-Z0-9\-\._~]+|\[[a-zA-Z0-9:\.]+\])(?::([0-9]+))?(/.*)?$|} + +let host_name_format = Pcre.regexp {|^[a-zA-Z0-9]+([\-\._~]{1}[a-zA-Z0-9]+)*$|} +let ipv4_addr_format = Pcre.regexp {|^(([1-9]\d{0,2}|0)\.){3}([1-9]\d{0,2}|0)$|} +let ipv6_addr_format = Pcre.regexp {|^\[([a-z0-9:\.]+|[A-Z0-9:\.]+)\]$|} + +let is_port s = + try + let n = int_of_string s in + if n > 0 && n < 65536 then true + else false + with Failure _ -> false + +let is_ipv4_octet s = + try + let n = int_of_string s in + if n >= 0 && n < 256 then true + else false + with Failure _ -> false + +let is_ipv6_segment s = + try + let n = int_of_string ("0x" ^ s) in + if n >= 0 && n < 65536 then true + else false + with Failure _ -> false + +let is_ipv4_addr s = + let res = Pcre.exec ~rex:ipv4_addr_format s in + let ipv4_addr_str = Pcre.get_substring res 0 in + let ipv4_addr_l = String.split_on_char '.' ipv4_addr_str in + List.for_all is_ipv4_octet ipv4_addr_l + +let is_ipv6_pure_addr s = + let ipv6_addr_l = String.split_on_char ':' s in + if List.length ipv6_addr_l > 8 || List.length ipv6_addr_l < 3 then false + else + let seg_str_l = List.filter (fun s -> String.length s > 0) ipv6_addr_l in + List.for_all is_ipv6_segment seg_str_l + +let is_ipv6_dual_addr s = + let ipv6_addr_l = List.rev (String.split_on_char ':' s) in + match ipv6_addr_l with + | [] -> false + | h::t -> + if not (is_ipv4_addr h) then false + else + if List.length t > 6 || List.length t < 2 then false + else + let seg_str_l = List.filter (fun s -> String.length s > 0) t in + List.for_all is_ipv6_segment seg_str_l + +let is_ipv6_addr s = + let res = Pcre.exec ~rex:ipv6_addr_format s in + let ipv6_addr_str = Pcre.get_substring res 1 in + try + let typo = Pcre.exec ~pat:{|:::|} ipv6_addr_str in + match typo with + | _ -> false + with Not_found -> + is_ipv6_pure_addr ipv6_addr_str || is_ipv6_dual_addr ipv6_addr_str + +let host_path_matches s = + try + let res = Pcre.exec ~rex:host_path_format s in + let substr = Pcre.get_substrings ~full_match:false res in + let port_str = Array.get substr 1 in + if String.length port_str > 0 && not (is_port port_str) then false + else + let host = Array.get substr 0 in + match host with + | host when regex_matches ipv6_addr_format host -> is_ipv6_addr host + | host when regex_matches ipv4_addr_format host -> is_ipv4_addr host + | host when regex_matches host_name_format host -> true + | _ -> false + with Not_found -> false + +let validate_uri scheme uri = + if host_path_matches uri then Ok () + else Error (Printf.sprintf {|"%s" is not a valid URI for the %s URL scheme|} uri scheme) + +let validate_url allowed_schemes url = + let (let*) = Result.bind in + let* scheme, uri = split_scheme url in + let* () = is_scheme_allowed allowed_schemes scheme in + let* () = validate_uri scheme uri in + Ok () + +let file_transport_schemes = ["http"; "https"; "ftp"; "sftp"; "scp"; "tftp"] + +let message_schemes = ["mailto"; "tel"; "sms"] + +let allowed_schemes = ref [] +let url = ref "" + +let args = [ + ("--scheme", + Arg.String (fun s -> allowed_schemes := s :: !allowed_schemes), + "Allow only specified schemes"); + ("--file-transport", + Arg.Unit (fun () -> allowed_schemes := (List.append !allowed_schemes file_transport_schemes)), + "Allow only file transport protocols (HTTP/S, FTP, SCP/SFTP, TFTP)"); + ("--", Arg.Rest (fun s -> url := s), "Interpret next item as an argument"); +] + +let usage = Printf.sprintf "Usage: %s [OPTIONS] " Sys.argv.(0) + +let () = + let () = Arg.parse args (fun s -> url := s) usage in + (* Force all allowed scheme named to lowercase for ease of comparison. *) + let allowed_schemes = List.map String.lowercase_ascii !allowed_schemes in + let res = validate_url allowed_schemes !url in + match res with + | Ok () -> () + | Error msg -> + let () = Printf.fprintf stdout "%s" msg in + exit 1 -- cgit v1.2.3