CBMC
smt2_tokenizer.cpp
Go to the documentation of this file.
1
/*******************************************************************\
2
3
Module:
4
5
Author: Daniel Kroening, kroening@kroening.com
6
7
\*******************************************************************/
8
9
#include "
smt2_tokenizer.h
"
10
11
bool
is_smt2_simple_symbol_character
(
char
ch)
12
{
13
// any non-empty sequence of letters, digits and the characters
14
// ~ ! @ $ % ^ & * _ - + = < > . ? /
15
// that does not start with a digit and is not a reserved word.
16
17
return
isalnum(ch) ||
18
ch==
'~'
|| ch==
'!'
|| ch==
'@'
|| ch==
'$'
|| ch==
'%'
||
19
ch==
'^'
|| ch==
'&'
|| ch==
'*'
|| ch==
'_'
|| ch==
'-'
||
20
ch==
'+'
|| ch==
'='
|| ch==
'<'
|| ch==
'>'
|| ch==
'.'
||
21
ch==
'?'
|| ch==
'/'
;
22
}
23
24
smt2_tokenizert::tokent
smt2_tokenizert::get_simple_symbol
()
25
{
26
// any non-empty sequence of letters, digits and the characters
27
// ~ ! @ $ % ^ & * _ - + = < > . ? /
28
// that does not start with a digit and is not a reserved word.
29
30
buffer
.clear();
31
32
char
ch;
33
while
(
in
->get(ch))
34
{
35
if
(
is_smt2_simple_symbol_character
(ch))
36
{
37
buffer
+=ch;
38
}
39
else
40
{
41
in
->unget();
// put back
42
quoted_symbol
=
false
;
43
return
SYMBOL;
44
}
45
}
46
47
// eof -- this is ok here
48
if
(
buffer
.empty())
49
return
END_OF_FILE;
50
else
51
{
52
quoted_symbol
=
false
;
53
return
SYMBOL;
54
}
55
}
56
57
smt2_tokenizert::tokent
smt2_tokenizert::get_decimal_numeral
()
58
{
59
// we accept any sequence of digits and dots
60
61
buffer
.clear();
62
63
char
ch;
64
while
(
in
->get(ch))
65
{
66
if
(isdigit(ch) || ch==
'.'
)
67
{
68
buffer
+=ch;
69
}
70
else
71
{
72
in
->unget();
// put back
73
return
NUMERAL;
74
}
75
}
76
77
// eof -- this is ok here
78
if
(
buffer
.empty())
79
return
END_OF_FILE;
80
else
81
return
NUMERAL;
82
}
83
84
smt2_tokenizert::tokent
smt2_tokenizert::get_bin_numeral
()
85
{
86
// we accept any sequence of '0' or '1'
87
88
buffer
.clear();
89
buffer
+=
'#'
;
90
buffer
+=
'b'
;
91
92
char
ch;
93
while
(
in
->get(ch))
94
{
95
if
(ch==
'0'
|| ch==
'1'
)
96
{
97
buffer
+=ch;
98
}
99
else
100
{
101
in
->unget();
// put back
102
return
NUMERAL;
103
}
104
}
105
106
// eof -- this is ok here
107
if
(
buffer
.empty())
108
return
END_OF_FILE;
109
else
110
return
NUMERAL;
111
}
112
113
smt2_tokenizert::tokent
smt2_tokenizert::get_hex_numeral
()
114
{
115
// we accept any sequence of '0'-'9', 'a'-'f', 'A'-'F'
116
117
buffer
.clear();
118
buffer
+=
'#'
;
119
buffer
+=
'x'
;
120
121
char
ch;
122
while
(
in
->get(ch))
123
{
124
if
(isxdigit(ch))
125
{
126
buffer
+=ch;
127
}
128
else
129
{
130
in
->unget();
// put back
131
return
NUMERAL;
132
}
133
}
134
135
// eof -- this is ok here
136
if
(
buffer
.empty())
137
return
END_OF_FILE;
138
else
139
return
NUMERAL;
140
}
141
142
smt2_tokenizert::tokent
smt2_tokenizert::get_quoted_symbol
()
143
{
144
// any sequence of printable ASCII characters (including space,
145
// tab, and line-breaking characters) except for the backslash
146
// character \, that starts and ends with | and does not otherwise
147
// contain |
148
149
buffer
.clear();
150
151
char
ch;
152
while
(
in
->get(ch))
153
{
154
if
(ch==
'|'
)
155
{
156
quoted_symbol
=
true
;
157
return
SYMBOL;
// done
158
}
159
160
buffer
+=ch;
161
162
if
(ch==
'\n'
)
163
line_no
++;
164
}
165
166
// Hmpf. Eof before end of quoted symbol. This is an error.
167
throw
error
(
"EOF within quoted symbol"
);
168
}
169
170
smt2_tokenizert::tokent
smt2_tokenizert::get_string_literal
()
171
{
172
buffer
.clear();
173
174
char
ch;
175
while
(
in
->get(ch))
176
{
177
if
(ch==
'"'
)
178
{
179
// quotes may be escaped by repeating
180
if
(
in
->get(ch))
181
{
182
if
(ch==
'"'
)
183
{
184
}
185
else
186
{
187
in
->unget();
188
return
STRING_LITERAL;
// done
189
}
190
}
191
else
192
return
STRING_LITERAL;
// done
193
}
194
buffer
+=ch;
195
}
196
197
// Hmpf. Eof before end of string literal. This is an error.
198
throw
error
(
"EOF within string literal"
);
199
}
200
201
smt2_tokenizert::tokent
smt2_tokenizert::next_token
()
202
{
203
if
(
peeked
)
204
peeked
=
false
;
205
else
206
get_token_from_stream
();
207
208
return
token
;
209
}
210
211
void
smt2_tokenizert::get_token_from_stream
()
212
{
213
char
ch;
214
215
while
(
in
->get(ch))
216
{
217
switch
(ch)
218
{
219
case
'\n'
:
220
line_no
++;
221
break
;
222
223
case
' '
:
224
case
'\r'
:
225
case
'\t'
:
226
case
static_cast<
char
>
(160):
// non-breaking space
227
// skip any whitespace
228
break
;
229
230
case
';'
:
// comment
231
// skip until newline
232
while
(
in
->get(ch))
233
{
234
if
(ch==
'\n'
)
235
{
236
line_no
++;
237
break
;
238
}
239
}
240
break
;
241
242
case
'('
:
243
// produce sub-expression
244
token
= OPEN;
245
return
;
246
247
case
')'
:
248
// done with sub-expression
249
token
= CLOSE;
250
return
;
251
252
case
'|'
:
// quoted symbol
253
token
=
get_quoted_symbol
();
254
return
;
255
256
case
'"'
:
// string literal
257
token
=
get_string_literal
();
258
return
;
259
260
case
':'
:
// keyword
261
token
=
get_simple_symbol
();
262
if
(
token
== SYMBOL)
263
{
264
token
= KEYWORD;
265
return
;
266
}
267
else
268
throw
error
(
"expecting symbol after colon"
);
269
270
case
'#'
:
271
if
(
in
->get(ch))
272
{
273
if
(ch==
'b'
)
274
{
275
token
=
get_bin_numeral
();
276
return
;
277
}
278
else
if
(ch==
'x'
)
279
{
280
token
=
get_hex_numeral
();
281
return
;
282
}
283
else
284
throw
error
(
"unknown numeral token"
);
285
}
286
else
287
throw
error
(
"unexpected EOF in numeral token"
);
288
break
;
289
290
default
:
// likely a simple symbol or a numeral
291
if
(isdigit(ch))
292
{
293
in
->unget();
294
token
=
get_decimal_numeral
();
295
return
;
296
}
297
else
if
(
is_smt2_simple_symbol_character
(ch))
298
{
299
in
->unget();
300
token
=
get_simple_symbol
();
301
return
;
302
}
303
else
304
{
305
// illegal character, error
306
throw
error
() <<
"unexpected character '"
<< ch <<
'\''
;
307
}
308
}
309
}
310
311
token
= END_OF_FILE;
312
}
smt2_tokenizert::get_quoted_symbol
tokent get_quoted_symbol()
Definition:
smt2_tokenizer.cpp:142
smt2_tokenizert::token
tokent token
Definition:
smt2_tokenizer.h:110
smt2_tokenizert::tokent
enum { NONE, END_OF_FILE, STRING_LITERAL, NUMERAL, SYMBOL, KEYWORD, OPEN, CLOSE } tokent
Definition:
smt2_tokenizer.h:66
smt2_tokenizert::get_string_literal
tokent get_string_literal()
Definition:
smt2_tokenizer.cpp:170
smt2_tokenizert::error
smt2_errort error() const
generate an error exception
Definition:
smt2_tokenizer.h:99
smt2_tokenizert::in
std::istream * in
Definition:
smt2_tokenizer.h:105
smt2_tokenizert::peeked
bool peeked
Definition:
smt2_tokenizer.h:109
smt2_tokenizer.h
smt2_tokenizert::get_bin_numeral
tokent get_bin_numeral()
Definition:
smt2_tokenizer.cpp:84
smt2_tokenizert::line_no
unsigned line_no
Definition:
smt2_tokenizer.h:106
smt2_tokenizert::buffer
std::string buffer
Definition:
smt2_tokenizer.h:107
smt2_tokenizert::get_hex_numeral
tokent get_hex_numeral()
Definition:
smt2_tokenizer.cpp:113
smt2_tokenizert::get_simple_symbol
tokent get_simple_symbol()
Definition:
smt2_tokenizer.cpp:24
smt2_tokenizert::get_decimal_numeral
tokent get_decimal_numeral()
Definition:
smt2_tokenizer.cpp:57
is_smt2_simple_symbol_character
bool is_smt2_simple_symbol_character(char ch)
Definition:
smt2_tokenizer.cpp:11
smt2_tokenizert::next_token
tokent next_token()
Definition:
smt2_tokenizer.cpp:201
smt2_tokenizert::quoted_symbol
bool quoted_symbol
Definition:
smt2_tokenizer.h:108
smt2_tokenizert::get_token_from_stream
void get_token_from_stream()
read a token from the input stream and store it in 'token'
Definition:
smt2_tokenizer.cpp:211
src
solvers
smt2
smt2_tokenizer.cpp
Generated by
1.8.17