summaryrefslogtreecommitdiff
path: root/src/url.ml
blob: 1985ff070f72c12426e48c7752329a29c293cb36 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
(* Extract and validate the scheme part.
   As per the RFC:

  Scheme names consist of a sequence of characters. The lower case
  letters "a"--"z", digits, and the characters plus ("+"), period
  ("."), and hyphen ("-") are allowed. For resiliency, programs
  interpreting URLs should treat upper case letters as equivalent to
  lower case in scheme names (e.g., allow "HTTP" as well as "http").
 *)
let split_scheme url =
  let aux url =
    let res = Pcre.exec ~pat:{|^([a-zA-Z0-9\.\-]+):(.*)$|} url in
    let scheme = Pcre.get_substring res 1 in
    let uri = Pcre.get_substring res 2 in
    (String.lowercase_ascii scheme, uri)
  in
  try Ok (aux url)
  with Not_found -> Error (Printf.sprintf {|"%s" is not a valid URL|} url)

let is_scheme_allowed allowed_schemes scheme =
  match List.find_opt ((=) scheme) allowed_schemes with
  | Some _ -> Ok ()
  | None -> Error (Printf.sprintf {|URL scheme "%s:" is not allowed|} scheme)

let regex_matches regex s =
  try
    let _ = Pcre.exec ~pat:regex s in
    true
  with Not_found -> false

let validate_uri scheme uri =
  match scheme with
  | "http" | "https" ->
    if regex_matches {|//[a-z0-9]+([\-\.]{1}[a-z0-9]+)*(:[0-9]+)*(/.* )?|} uri then Ok ()
    else Error (Printf.sprintf {|"%s" is not a valid URI for the %s URL scheme|} uri scheme)
  | _ -> Ok ()

let validate_url allowed_schemes url =
  let (let*) = Result.bind in
  let* scheme, uri = split_scheme url in
  let* () = is_scheme_allowed allowed_schemes scheme in
  let* () = validate_uri scheme uri in
  Ok ()

let file_transport_schemes = ["http"; "https"; "ftp"; "sftp"; "scp"; "tftp"]

let message_schemes = ["mailto"; "tel"; "sms"]

let allowed_schemes = ref []
let url = ref ""

let args = [
    ("--scheme",
     Arg.String (fun s -> allowed_schemes := s :: !allowed_schemes),
     "Allow only specified schemes");
    ("--file-transport",
     Arg.Unit (fun () -> allowed_schemes := (List.append !allowed_schemes file_transport_schemes)),
     "Allow only file transport protocols (HTTP/S, FTP, SCP/SFTP, TFTP)");
    ("--", Arg.Rest (fun s -> url := s), "Interpret next item as an argument");
]

let usage = Printf.sprintf "Usage: %s [OPTIONS] <URL>" Sys.argv.(0)

let () =
  let () = Arg.parse args (fun s -> url := s) usage in
  (* Force all allowed scheme named to lowercase for ease of comparison. *)
  let allowed_schemes = List.map String.lowercase_ascii !allowed_schemes in
  let res = validate_url allowed_schemes !url in
  match res with
  | Ok () -> ()
  | Error msg ->
    let () = Printf.fprintf stdout "%s" msg in
    exit 1