summaryrefslogtreecommitdiff
path: root/src/vyos1x_lexer.mll
blob: a363c6503a76d2a0a1b52cb17fcd16658a60b83c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
{

open Util
open Vyos1x_parser

exception Error of string

let lexing_error lexbuf msg =
  let line, column = Util.get_lexing_position lexbuf in
  raise (Syntax_error (Some (line, column), msg))

(*

The language of the VyOS 1.x config file has multiple ambiguities that
make it not even context free.

The main issue is lack of explicit statement separators, so if we ignore whitespace,
a parser is left to guess if, for example

address dhcp # leaf node with a value
disable      # valueless leaf node

is three valueless nodes, a valueless node followed by a node with a value,
or a node with a value followed by a valueless node.

The only cue is the newline, which means that newlines are sometimes significant,
and sometimes they aren't.

interfaces { # doesn't matter
  ethernet 'eth0' { # doesn't matter
    address '192.0.2.1/24' # significant!
    disable # significant!

    # empty line -- doesn't matter
    hw-id 00:aa:bb:cc:dd:ee # significant!
  } # doesn't matter
}

If there were explicit terminators (like we do in VyConf, or like JunOS does),
the language would be context free. Enter the lexer hack: let's emit newlines only
when they are significant, so that the parser can use them as terminators.

The informal idea is that a newline is only significant if it follows a leaf node.
So we need rules for finding out if we are inside a leaf node or not.

These are the formal rules. A newline is significant if and only if either:
1. its preceding token is an identifier
2. OR its preceding token is a quoted string

We set the vy_inside_node flag to true when we enter a leaf node and reset it when
we reach the end of it.

*)

}

rule token vy_inside_node = parse
| [' ' '\t' '\r']
    { token vy_inside_node lexbuf }
| '\n'
    { Lexing.new_line lexbuf; if vy_inside_node then (false, NEWLINE) else token vy_inside_node lexbuf }
| '"'
    { read_double_quoted_string true (Buffer.create 16) lexbuf }
| '''
    { read_single_quoted_string true (Buffer.create 16) lexbuf }
| "/*"
    { read_comment false (Buffer.create 16) lexbuf }
| '{'
    { (false, LEFT_BRACE) }
| '}'
    { (false, RIGHT_BRACE) }
| "//" [^ '\n']*
    { token vy_inside_node lexbuf }
| [^ ' ' '\t' '\n' '\r' '{' '}' '"' ''' ]+ as s
    { (true, IDENTIFIER s) }
| eof
    { (vy_inside_node, EOF) }
| _ as bad_char
{ lexing_error lexbuf (Printf.sprintf "unexpected character \'%c\'" bad_char) }

and read_double_quoted_string vy_inside_node buf =
  parse
  | '"'       { (vy_inside_node, STRING (Buffer.contents buf)) }
  | '\\' '/'  { Buffer.add_char buf '/'; read_double_quoted_string vy_inside_node buf lexbuf }
  | '\\' '\\' { Buffer.add_char buf '\\'; read_double_quoted_string vy_inside_node buf lexbuf }
  | '\\' 'b'  { Buffer.add_char buf '\b'; read_double_quoted_string vy_inside_node buf lexbuf }
  | '\\' 'f'  { Buffer.add_char buf '\012'; read_double_quoted_string vy_inside_node buf lexbuf }
  | '\\' 'n'  { Buffer.add_char buf '\n'; read_double_quoted_string vy_inside_node buf lexbuf }
  | '\\' 'r'  { Buffer.add_char buf '\r'; read_double_quoted_string vy_inside_node buf lexbuf }
  | '\\' 't'  { Buffer.add_char buf '\t'; read_double_quoted_string vy_inside_node buf lexbuf }
  | '\\' '\'' { Buffer.add_char buf '\''; read_double_quoted_string vy_inside_node buf lexbuf }
  | '\\' '"' { Buffer.add_char buf '"'; read_double_quoted_string vy_inside_node buf lexbuf }
  | '\n'      { Lexing.new_line lexbuf; Buffer.add_char buf '\n'; read_double_quoted_string vy_inside_node buf lexbuf }
  | [^ '"' '\\']+
    { Buffer.add_string buf (Lexing.lexeme lexbuf);
      read_double_quoted_string vy_inside_node buf lexbuf
    }
  | eof { lexing_error lexbuf "Quoted string is missing the closing double quote" }

and read_single_quoted_string vy_inside_node buf =
  parse
  | '''       { (vy_inside_node, STRING (Buffer.contents buf)) }
  | '\\' '/'  { Buffer.add_char buf '/'; read_single_quoted_string vy_inside_node buf lexbuf }
  | '\\' '\\' { Buffer.add_char buf '\\'; read_single_quoted_string vy_inside_node buf lexbuf }
  | '\\' 'b'  { Buffer.add_char buf '\b'; read_single_quoted_string vy_inside_node buf lexbuf }
  | '\\' 'f'  { Buffer.add_char buf '\012'; read_single_quoted_string vy_inside_node buf lexbuf }
  | '\\' 'n'  { Buffer.add_char buf '\n'; read_single_quoted_string vy_inside_node buf lexbuf }
  | '\\' 'r'  { Buffer.add_char buf '\r'; read_single_quoted_string vy_inside_node buf lexbuf }
  | '\\' 't'  { Buffer.add_char buf '\t'; read_single_quoted_string vy_inside_node buf lexbuf }
  | '\\' '\'' { Buffer.add_char buf '\''; read_single_quoted_string vy_inside_node buf lexbuf }
  | '\\' '"' { Buffer.add_char buf '"'; read_single_quoted_string vy_inside_node buf lexbuf }
  | '\n'      { Lexing.new_line lexbuf; Buffer.add_char buf '\n'; read_single_quoted_string vy_inside_node buf lexbuf }
  | [^ ''' '\\']+
    { Buffer.add_string buf (Lexing.lexeme lexbuf);
      read_single_quoted_string vy_inside_node buf lexbuf
    }
  | eof { lexing_error lexbuf "Quoted string is missing the closing single quote" }

and read_comment vy_inside_node buf =
  parse
  | "*/"
      { (vy_inside_node, COMMENT (Buffer.contents buf)) }
  | _
      { Buffer.add_string buf (Lexing.lexeme lexbuf);
        read_comment vy_inside_node buf lexbuf
      }

(*

If you are curious how the original parsers from Vyatta handled the issue: they did not.

The CStore parser cheated by reading data from command definitions to resolve
the ambiguities, which made it impossible to use in standalone config
manipulation programs like migration scripts.

The XorpConfigParser could not tell tag nodes' name and tag from
a leaf node with a value, which made it impossible to manipulate
tag nodes or change values properly.

*)