summaryrefslogtreecommitdiff
path: root/src/vyos1x_lexer.mll
diff options
context:
space:
mode:
authorDaniil Baturin <daniil@baturin.org>2019-06-30 01:00:01 +0700
committerDaniil Baturin <daniil@baturin.org>2019-06-30 01:25:57 +0700
commitaa3d09c3fff12e379fd189ceaf55644574ff5c43 (patch)
tree95ffb89bb701c14ea0e9533661c3c5c0a6aff8e9 /src/vyos1x_lexer.mll
downloadvyos1x-config-aa3d09c3fff12e379fd189ceaf55644574ff5c43.tar.gz
vyos1x-config-aa3d09c3fff12e379fd189ceaf55644574ff5c43.zip
Initial import of libraries from Vyconf and old libvyosconfig.
Diffstat (limited to 'src/vyos1x_lexer.mll')
-rw-r--r--src/vyos1x_lexer.mll138
1 files changed, 138 insertions, 0 deletions
diff --git a/src/vyos1x_lexer.mll b/src/vyos1x_lexer.mll
new file mode 100644
index 0000000..a996642
--- /dev/null
+++ b/src/vyos1x_lexer.mll
@@ -0,0 +1,138 @@
+{
+
+open Vyos1x_parser
+
+exception Error of string
+
+(*
+
+The language of the VyOS 1.x config file has multiple ambiguities that
+make it not even context free.
+
+The main issue is lack of explicit statement separators, so if we ignore whitespace,
+a parser is left to guess if, for example
+
+address dhcp # leaf node with a value
+disable # valueless leaf node
+
+is three valueless nodes, a valueless node followed by a node with a value,
+or a node with a value followed by a valueless node.
+
+The only cue is the newline, which means that newlines are sometimes significant,
+and sometimes they aren't.
+
+interfaces { # doesn't matter
+ ethernet 'eth0' { # doesn't matter
+ address '192.0.2.1/24' # significant!
+ disable # significant!
+
+ # empty line -- doesn't matter
+ hw-id 00:aa:bb:cc:dd:ee # significant!
+ } # doesn't matter
+}
+
+If there were explicit terminators (like we do in VyConf, or like JunOS does),
+the language would be context free. Enter the lexer hack: let's emit newlines only
+when they are significant, so that the parser can use them as terminators.
+
+The informal idea is that a newline is only significant if it follows a leaf node.
+So we need rules for finding out if we are inside a leaf node or not.
+
+These are the formal rules. A newline is significant if and only if
+1. Preceding token is an identifier
+2. Preceding token is a quoted string
+
+We set the vy_inside_node flag to true when we enter a leaf node and reset it when
+we reach the end of it.
+
+*)
+
+let vy_inside_node = ref false
+
+}
+
+rule token = parse
+| [' ' '\t' '\r']
+ { token lexbuf }
+| '\n'
+ { Lexing.new_line lexbuf; if !vy_inside_node then (vy_inside_node := false; NEWLINE) else token lexbuf }
+| '"'
+ { vy_inside_node := true; read_string (Buffer.create 16) lexbuf }
+| '''
+ { vy_inside_node := true; read_single_quoted_string (Buffer.create 16) lexbuf }
+| "/*"
+ { vy_inside_node := false; read_comment (Buffer.create 16) lexbuf }
+| '{'
+ { vy_inside_node := false; LEFT_BRACE }
+| '}'
+ { vy_inside_node := false; RIGHT_BRACE }
+| "//" [^ '\n']*
+ { token lexbuf }
+| [^ ' ' '\t' '\n' '\r' '{' '}' '"' ''' ]+ as s
+ { vy_inside_node := true; IDENTIFIER s}
+| eof
+ { EOF }
+| _
+{ raise (Error (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) }
+
+and read_string buf =
+ parse
+ | '"' { STRING (Buffer.contents buf) }
+ | '\\' '/' { Buffer.add_char buf '/'; read_string buf lexbuf }
+ | '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf }
+ | '\\' 'b' { Buffer.add_char buf '\b'; read_string buf lexbuf }
+ | '\\' 'f' { Buffer.add_char buf '\012'; read_string buf lexbuf }
+ | '\\' 'n' { Buffer.add_char buf '\n'; read_string buf lexbuf }
+ | '\\' 'r' { Buffer.add_char buf '\r'; read_string buf lexbuf }
+ | '\\' 't' { Buffer.add_char buf '\t'; read_string buf lexbuf }
+ | '\\' '\'' { Buffer.add_char buf '\''; read_string buf lexbuf }
+ | '\\' '"' { Buffer.add_char buf '"'; read_string buf lexbuf }
+ | '\n' { Lexing.new_line lexbuf; Buffer.add_char buf '\n'; read_string buf lexbuf }
+ | [^ '"' '\\']+
+ { Buffer.add_string buf (Lexing.lexeme lexbuf);
+ read_string buf lexbuf
+ }
+ | _ { raise (Error (Printf.sprintf "Illegal string character: %s" (Lexing.lexeme lexbuf))) }
+ | eof { raise (Error ("String is not terminated")) }
+
+and read_single_quoted_string buf =
+ parse
+ | ''' { STRING (Buffer.contents buf) }
+ | '\\' '/' { Buffer.add_char buf '/'; read_string buf lexbuf }
+ | '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf }
+ | '\\' 'b' { Buffer.add_char buf '\b'; read_string buf lexbuf }
+ | '\\' 'f' { Buffer.add_char buf '\012'; read_string buf lexbuf }
+ | '\\' 'n' { Buffer.add_char buf '\n'; read_string buf lexbuf }
+ | '\\' 'r' { Buffer.add_char buf '\r'; read_string buf lexbuf }
+ | '\\' 't' { Buffer.add_char buf '\t'; read_string buf lexbuf }
+ | '\\' '\'' { Buffer.add_char buf '\''; read_string buf lexbuf }
+ | '\\' '"' { Buffer.add_char buf '"'; read_string buf lexbuf }
+ | '\n' { Lexing.new_line lexbuf; Buffer.add_char buf '\n'; read_string buf lexbuf }
+ | [^ ''' '\\']+
+ { Buffer.add_string buf (Lexing.lexeme lexbuf);
+ read_single_quoted_string buf lexbuf
+ }
+ | _ { raise (Error (Printf.sprintf "Illegal string character: %s" (Lexing.lexeme lexbuf))) }
+ | eof { raise (Error ("String is not terminated")) }
+
+and read_comment buf =
+ parse
+ | "*/"
+ { COMMENT (Buffer.contents buf) }
+ | _
+ { Buffer.add_string buf (Lexing.lexeme lexbuf);
+ read_comment buf lexbuf
+ }
+
+(*
+
+If you are curious how the original parsers handled the issue: they did not.
+The CStore parser cheated by reading data from command definitions to resolve
+the ambiguities, which made it impossible to use in standalone config
+manipulation programs like migration scripts.
+
+The XorpConfigParser could not tell tag nodes' name and tag from
+a leaf node with a value, which made it impossible to manipulate
+tag nodes or change values properly.
+
+*)