summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniil Baturin <daniil@baturin.org>2018-07-18 03:40:18 +0200
committerDaniil Baturin <daniil@baturin.org>2018-07-18 03:40:18 +0200
commitc7f24109e78115318042428a33b9ad3b98d3b5af (patch)
treedbc79699907f183000b26cb5d42ed54a40040d23
parent976442b5202121e538a5a3e463a0ae7ca8c90950 (diff)
downloadlibvyosconfig-c7f24109e78115318042428a33b9ad3b98d3b5af.tar.gz
libvyosconfig-c7f24109e78115318042428a33b9ad3b98d3b5af.zip
Use a more descriptive name for the lexer hack flag and add an explanation of it.
-rw-r--r--parser/vyos1x_lexer.mll72
1 files changed, 64 insertions, 8 deletions
diff --git a/parser/vyos1x_lexer.mll b/parser/vyos1x_lexer.mll
index f471ec6..1969b91 100644
--- a/parser/vyos1x_lexer.mll
+++ b/parser/vyos1x_lexer.mll
@@ -4,7 +4,50 @@ open Vyos1x_parser
exception Error of string
-let vy_in_string = ref false
+(*
+
+The language of the VyOS 1.x config file has multiple ambiguities that
+make it not even context free.
+
+The main issue is lack of explicit statement separators, so if we ignore whitespace,
+a parser is left to guess if, for example
+
+address dhcp # leaf node with a value
+disable # valueless leaf node
+
+is three valueless nodes, a valueless node followed by a node with a value,
+or a node with a value followed by a valueless node.
+
+The only cue is the newline, which means that newlines are sometimes significant,
+and sometimes they aren't.
+
+interfaces { # doesn't matter
+ ethernet 'eth0' { # doesn't matter
+ address '192.0.2.1/24' # significant!
+ disable # significant!
+
+ # empty line -- doesn't matter
+ hw-id 00:aa:bb:cc:dd:ee # significant!
+ } # doesn't matter
+}
+
+If there were explicit terminators (like we do in VyConf, or like JunOS does),
+the language would be context free. Enter the lexer hack: let's emit newlines only
+when they are significant, so that the parser can use them as terminators.
+
+The informal idea is that a newline is only significant if it follows a leaf node.
+So we need rules for finding out if we are inside a leaf node or not.
+
+These are the formal rules. A newline is significant if and only if
+1. Preceding token is an identifier
+2. Preceding token is a quoted string
+
+We set the vy_inside_node flag to true when we enter a leaf node and reset it when
+we reach the end of it.
+
+*)
+
+let vy_inside_node = ref false
}
@@ -12,19 +55,19 @@ rule token = parse
| [' ' '\t' '\r']
{ token lexbuf }
| '\n'
- { Lexing.new_line lexbuf; if !vy_in_string then (vy_in_string := false; NEWLINE) else token lexbuf }
+ { Lexing.new_line lexbuf; if !vy_inside_node then (vy_inside_node := false; NEWLINE) else token lexbuf }
| '"'
- { vy_in_string := true; read_string (Buffer.create 16) lexbuf }
+ { vy_inside_node := true; read_string (Buffer.create 16) lexbuf }
| '''
- { vy_in_string := true; read_single_quoted_string (Buffer.create 16) lexbuf }
+ { vy_inside_node := true; read_single_quoted_string (Buffer.create 16) lexbuf }
| "/*"
- { vy_in_string := false; read_comment (Buffer.create 16) lexbuf }
+ { vy_inside_node := false; read_comment (Buffer.create 16) lexbuf }
| '{'
- { vy_in_string := false; LEFT_BRACE }
+ { vy_inside_node := false; LEFT_BRACE }
| '}'
- { vy_in_string := false; RIGHT_BRACE }
+ { vy_inside_node := false; RIGHT_BRACE }
| [^ ' ' '\t' '\n' '\r' '{' '}' '[' ']' ';' '#' '"' ''' ]+ as s
- { vy_in_string := true; IDENTIFIER s}
+ { vy_inside_node := true; IDENTIFIER s}
| eof
{ EOF }
| _
@@ -78,3 +121,16 @@ and read_comment buf =
{ Buffer.add_string buf (Lexing.lexeme lexbuf);
read_comment buf lexbuf
}
+
+(*
+
+If you are curious how the original parsers handled the issue: they did not.
+The CStore parser cheated by reading data from command definitions to resolve
+the ambiguities, which made it impossible to use in standalone config
+manipulation programs like migration scripts.
+
+The XorpConfigParser could not tell tag nodes' name and tag from
+a leaf node with a value, which made it impossible to manipulate
+tag nodes or change values properly.
+
+*)