summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/curly_lexer.mll40
-rw-r--r--test/curly_parser_test.ml16
2 files changed, 47 insertions, 9 deletions
diff --git a/src/curly_lexer.mll b/src/curly_lexer.mll
index 6254358..6d2bc87 100644
--- a/src/curly_lexer.mll
+++ b/src/curly_lexer.mll
@@ -7,8 +7,18 @@ exception Error of string
}
rule token = parse
-| [' ' '\t' '\n']
+| [' ' '\t' '\r']
{ token lexbuf }
+| '\n'
+ { Lexing.new_line lexbuf; token lexbuf }
+| '"'
+ { read_string (Buffer.create 16) lexbuf }
+| '''
+ { read_single_quoted_string (Buffer.create 16) lexbuf }
+| "//" [^ '\n']+ '\n'
+ { Lexing.new_line lexbuf ; token lexbuf }
+| "/*"
+ { read_comment (Buffer.create 16) lexbuf }
| '{'
{ LEFT_BRACE }
| '}'
@@ -19,16 +29,10 @@ rule token = parse
{ RIGHT_BRACKET }
| ';'
{ SEMI }
-| ['a' -'z' 'A' - 'Z' '0' - '9' '.' '/' ':' '@' '$' '-' ]+ as s
+| [^ ' ' '\t' '\n' '\r' '{' '}' '[' ']' ';' '#' '"' ''' ]+ as s
{ IDENTIFIER s}
| eof
{ EOF }
-| '"'
- { read_string (Buffer.create 16) lexbuf }
-| "//" [^ '\n']+ '\n'
- { Lexing.new_line lexbuf ; token lexbuf }
-| "/*"
- { read_comment (Buffer.create 16) lexbuf }
| _
{ raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) }
@@ -52,6 +56,26 @@ and read_string buf =
| _ { raise (Error (Printf.sprintf "Illegal string character: %s" (Lexing.lexeme lexbuf))) }
| eof { raise (Error ("String is not terminated")) }
+and read_single_quoted_string buf =
+ parse
+ | ''' { STRING (Buffer.contents buf) }
+ | '\\' '/' { Buffer.add_char buf '/'; read_string buf lexbuf }
+ | '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf }
+ | '\\' 'b' { Buffer.add_char buf '\b'; read_string buf lexbuf }
+ | '\\' 'f' { Buffer.add_char buf '\012'; read_string buf lexbuf }
+ | '\\' 'n' { Buffer.add_char buf '\n'; read_string buf lexbuf }
+ | '\\' 'r' { Buffer.add_char buf '\r'; read_string buf lexbuf }
+ | '\\' 't' { Buffer.add_char buf '\t'; read_string buf lexbuf }
+ | '\\' '\'' { Buffer.add_char buf '\''; read_string buf lexbuf }
+ | '\\' '"' { Buffer.add_char buf '"'; read_string buf lexbuf }
+ | '\n' { Lexing.new_line lexbuf; Buffer.add_char buf '\n'; read_string buf lexbuf }
+ | [^ ''' '\\']+
+ { Buffer.add_string buf (Lexing.lexeme lexbuf);
+ read_single_quoted_string buf lexbuf
+ }
+ | _ { raise (Error (Printf.sprintf "Illegal string character: %s" (Lexing.lexeme lexbuf))) }
+ | eof { raise (Error ("String is not terminated")) }
+
and read_comment buf =
parse
| "*/"
diff --git a/test/curly_parser_test.ml b/test/curly_parser_test.ml
index 8e3d3fd..b84e5f2 100644
--- a/test/curly_parser_test.ml
+++ b/test/curly_parser_test.ml
@@ -15,6 +15,7 @@ let config_tag_top_level = "foo bar { baz quux; }"
let config_with_leaf = "foo { bar baz; }"
let config_with_leaf_url_unquoted = "foo { bar http://www2.example.org/foo; }"
let config_with_leaf_value_quoted = "foo { bar \"foo bar\"; }"
+let config_with_leaf_value_single_quoted = "foo { bar \'foo bar\'; }"
let config_with_leaf_valueless = "foo { bar; }"
(* XXX: naive use of Menhir's separated_list doesn't allow [baz; xyzzy;],
@@ -22,6 +23,7 @@ let config_with_leaf_valueless = "foo { bar; }"
let config_with_multi = "foo { bar [baz; xyzzy]; }"
let config_with_tag = "foo { bar baz { quux xyzzy; } bar qwerty { quux foobar; } }"
+let config_with_tag_nonalpha = "foo { bar baz0.99 { } bar baz1-8 { } }"
let config_with_comment = "foo { /* comment */ bar { } }"
let config_with_leaf_node_comment = "foo { /* comment */ bar baz; }"
@@ -53,7 +55,7 @@ let test_parse_with_leaf test_ctxt =
let config = parse config_with_leaf in
assert_equal (CT.get_value config ["foo"; "bar"]) "baz"
-(* Leaf nodes with [.:/] in values are parsed correctly *)
+(* Leaf nodes with non-alphanumeric characters in values are parsed correctly *)
let test_parse_with_leaf_url_unquoted test_ctxt =
let config = parse config_with_leaf_url_unquoted in
assert_equal (CT.get_value config ["foo"; "bar"]) "http://www2.example.org/foo"
@@ -63,6 +65,11 @@ let test_parse_with_leaf_value_quoted test_ctxt =
let config = parse config_with_leaf_value_quoted in
assert_equal (CT.get_value config ["foo"; "bar"]) "foo bar"
+(* Leaf nodes with single quoted values are parsed correctly *)
+let test_parse_with_leaf_value_single_quoted test_ctxt =
+ let config = parse config_with_leaf_value_single_quoted in
+ assert_equal (CT.get_value config ["foo"; "bar"]) "foo bar"
+
(* Valueless leaf nodes work *)
let test_parse_with_leaf_valueless test_ctxt =
let config = parse config_with_leaf_valueless in
@@ -103,6 +110,11 @@ let test_parse_with_tag test_ctxt =
assert_equal (CT.get_value config ["foo"; "bar"; "baz"; "quux"]) "xyzzy";
assert_equal (CT.get_value config ["foo"; "bar"; "qwerty"; "quux"]) "foobar"
+(* Non-alphanumeric characters are allowed in tag nodes *)
+let test_parse_with_tag_nonalpha test_ctxt =
+ let config = parse config_with_tag_nonalpha in
+ assert_equal (Vytree.get config ["foo"; "bar"] |> Vytree.list_children) ["baz0.99"; "baz1-8"]
+
(* Normal nodes with duplicate children are detected *)
let test_parse_node_duplicate_child test_ctxt =
try ignore @@ parse config_with_duplicate_node; assert_failure "Duplicated node child didn't cause errors"
@@ -128,11 +140,13 @@ let suite =
"test_parse_with_leaf" >:: test_parse_with_leaf;
"test_parse_with_leaf_url_unquoted" >:: test_parse_with_leaf_url_unquoted;
"test_parse_with_leaf_value_quoted" >:: test_parse_with_leaf_value_quoted;
+ "test_parse_with_leaf_value_single_quoted" >:: test_parse_with_leaf_value_single_quoted;
"test_parse_with_leaf_valueless" >:: test_parse_with_leaf_valueless;
"test_parse_top_level_leaf_node" >:: test_parse_top_level_leaf_node;
"test_parse_top_level_tag_node" >:: test_parse_top_level_tag_node;
"test_parse_with_multi" >:: test_parse_with_multi;
"test_parse_with_tag" >:: test_parse_with_tag;
+ "test_parse_with_tag_nonalpha" >:: test_parse_with_tag_nonalpha;
"test_parse_with_comment" >:: test_parse_with_comment;
"test_parse_with_leaf_node_comment" >:: test_parse_with_leaf_node_comment;
"test_parse_with_tag_node_comment" >:: test_parse_with_tag_node_comment;