CBMC
format_strings.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Format String Parser
4 
5 Author: CM Wintersteiger
6 
7 \*******************************************************************/
8 
11 
12 #include "format_strings.h"
13 
14 #include <util/c_types.h>
15 #include <util/exception_utils.h>
16 #include <util/invariant.h>
17 #include <util/std_expr.h>
18 
19 #include <cctype>
20 
22  std::string::const_iterator &it,
23  format_tokent &curtok)
24 {
25  while(*it=='#' || *it=='0' ||
26  *it=='-' || *it==' ' || *it=='+')
27  {
28  switch(*it)
29  {
30  case '#':
31  curtok.flags.push_back(format_tokent::flag_typet::ALTERNATE); break;
32  case '0':
33  curtok.flags.push_back(format_tokent::flag_typet::ZERO_PAD); break;
34  case '-':
35  curtok.flags.push_back(format_tokent::flag_typet::LEFT_ADJUST); break;
36  case ' ':
37  curtok.flags.push_back(format_tokent::flag_typet::SIGNED_SPACE); break;
38  case '+':
39  curtok.flags.push_back(format_tokent::flag_typet::SIGN); break;
40  default:
42  std::string("unsupported format specifier flag: '") + *it + "'");
43  }
44  it++;
45  }
46 }
47 
49  std::string::const_iterator &it,
50  format_tokent &curtok)
51 {
52  if(*it=='*')
53  {
55  it++;
56  }
57 
58  std::string tmp;
59  for( ; isdigit(*it); it++) tmp+=*it;
60  curtok.field_width=string2integer(tmp);
61 }
62 
64  std::string::const_iterator &it,
65  format_tokent &curtok)
66 {
67  if(*it=='.')
68  {
69  it++;
70 
71  if(*it=='*')
72  {
74  it++;
75  }
76  else
77  {
78  std::string tmp;
79  for( ; isdigit(*it); it++) tmp+=*it;
80  curtok.precision=string2integer(tmp);
81  }
82  }
83 }
84 
86  std::string::const_iterator &it,
87  format_tokent &curtok)
88 {
89  if(*it=='h')
90  {
91  it++;
92  if(*it=='h')
93  it++;
95  }
96  else if(*it=='l')
97  {
98  it++;
99  if(*it=='l')
100  it++;
102  }
103  else if(*it=='L')
104  {
105  it++;
107  }
108  else if(*it=='j')
109  {
110  it++;
112  }
113  else if(*it=='t')
114  {
115  it++;
117  }
118 }
119 
121  const std::string &arg_string,
122  std::string::const_iterator &it,
123  format_tokent &curtok)
124 {
125  switch(*it)
126  {
127  case 'd':
128  case 'i':
131  break;
132  case 'o':
135  break;
136  case 'u':
139  break;
140  case 'x':
141  case 'X':
144  break;
145  case 'e':
146  case 'E': curtok.type=format_tokent::token_typet::FLOAT; break;
147  case 'f':
148  case 'F': curtok.type=format_tokent::token_typet::FLOAT; break;
149  case 'g':
150  case 'G': curtok.type=format_tokent::token_typet::FLOAT; break;
151  case 'a':
152  case 'A': curtok.type=format_tokent::token_typet::FLOAT; break;
153  case 'c': curtok.type=format_tokent::token_typet::CHAR; break;
154  case 's': curtok.type=format_tokent::token_typet::STRING; break;
155  case 'p': curtok.type=format_tokent::token_typet::POINTER; break;
156  case '%':
158  curtok.value="%";
159  break;
160  case '[': // pattern matching in, e.g., fscanf.
161  {
162  std::string tmp;
163  it++;
164  if(*it=='^') // if it's there, it must be first
165  {
166  tmp+='^'; it++;
167  if(*it==']') // if it's there, it must be here
168  {
169  tmp+=']'; it++;
170  }
171  }
172 
173  for( ; it!=arg_string.end() && *it!=']'; it++)
174  tmp+=*it;
175 
176  break;
177  }
178 
179  default:
181  std::string("unsupported format conversion specifier: '") + *it + "'");
182  }
183  it++;
184 }
185 
186 format_token_listt parse_format_string(const std::string &arg_string)
187 {
188  format_token_listt token_list;
189 
190  std::string::const_iterator it=arg_string.begin();
191 
192  while(it!=arg_string.end())
193  {
194  if(*it=='%')
195  {
196  token_list.push_back(format_tokent());
197  format_tokent &curtok=token_list.back();
198  it++;
199 
200  parse_flags(it, curtok);
201  parse_field_width(it, curtok);
202  parse_precision(it, curtok);
203  parse_length_modifier(it, curtok);
204  parse_conversion_specifier(arg_string, it, curtok);
205  }
206  else
207  {
208  if(token_list.empty() ||
209  token_list.back().type!=format_tokent::token_typet::TEXT)
210  token_list.push_back(format_tokent(format_tokent::token_typet::TEXT));
211 
212  std::string tmp;
213  for( ; it!=arg_string.end() && *it!='%'; it++)
214  tmp+=*it;
215 
216  INVARIANT(
217  !token_list.empty() &&
218  token_list.back().type == format_tokent::token_typet::TEXT,
219  "must already have a TEXT token at the back of the token list");
220 
221  token_list.back().value=tmp;
222  }
223  }
224 
225  return token_list;
226 }
227 
229 {
230  switch(token.type)
231  {
233  switch(token.length_modifier)
234  {
237  return signed_char_type();
238  else
239  return unsigned_char_type();
240 
243  return signed_short_int_type();
244  else
245  return unsigned_short_int_type();
246 
249  return signed_long_int_type();
250  else
251  return unsigned_long_int_type();
252 
255  return signed_long_long_int_type();
256  else
258 
264  return signed_int_type();
265  else
266  return unsigned_int_type();
267  }
268 
270  switch(token.length_modifier)
271  {
280  return float_type();
281  }
282 
284  switch(token.length_modifier)
285  {
294  return char_type();
295  }
296 
298  return pointer_type(void_type());
299 
301  switch(token.length_modifier)
302  {
304  return array_typet(wchar_t_type(), nil_exprt());
312  return array_typet(char_type(), nil_exprt());
313  }
314 
317  return {};
318  }
319 
320  UNREACHABLE;
321 }
UNREACHABLE
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:503
exception_utils.h
parse_format_string
format_token_listt parse_format_string(const std::string &arg_string)
Definition: format_strings.cpp:186
parse_length_modifier
void parse_length_modifier(std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:85
format_tokent::value
irep_idt value
Definition: format_strings.h:87
signed_long_long_int_type
signedbv_typet signed_long_long_int_type()
Definition: c_types.cpp:97
signed_char_type
signedbv_typet signed_char_type()
Definition: c_types.cpp:152
format_tokent::token_typet::STRING
@ STRING
format_tokent::representationt::UNSIGNED_OCT
@ UNSIGNED_OCT
long_double_type
floatbv_typet long_double_type()
Definition: c_types.cpp:211
unsupported_operation_exceptiont
Thrown when we encounter an instruction, parameters to an instruction etc.
Definition: exception_utils.h:144
string2integer
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
invariant.h
parse_flags
void parse_flags(std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:21
format_strings.h
format_tokent::precision
mp_integer precision
Definition: format_strings.h:84
format_tokent::length_modifierst::LEN_undef
@ LEN_undef
format_tokent::field_width
mp_integer field_width
Definition: format_strings.h:83
format_tokent::length_modifier
length_modifierst length_modifier
Definition: format_strings.h:85
unsigned_char_type
unsignedbv_typet unsigned_char_type()
Definition: c_types.cpp:145
void_type
empty_typet void_type()
Definition: c_types.cpp:263
format_tokent::length_modifierst::LEN_h
@ LEN_h
format_tokent::representationt::SIGNED_DEC
@ SIGNED_DEC
unsigned_short_int_type
unsignedbv_typet unsigned_short_int_type()
Definition: c_types.cpp:61
unsigned_long_long_int_type
unsignedbv_typet unsigned_long_long_int_type()
Definition: c_types.cpp:111
format_tokent::token_typet::POINTER
@ POINTER
format_tokent::representationt::UNSIGNED_DEC
@ UNSIGNED_DEC
unsigned_long_int_type
unsignedbv_typet unsigned_long_int_type()
Definition: c_types.cpp:104
parse_field_width
void parse_field_width(std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:48
parse_conversion_specifier
void parse_conversion_specifier(const std::string &arg_string, std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:120
format_token_listt
std::list< format_tokent > format_token_listt
Definition: format_strings.h:90
signed_int_type
signedbv_typet signed_int_type()
Definition: c_types.cpp:40
format_tokent::length_modifierst::LEN_hh
@ LEN_hh
format_tokent::token_typet::INT
@ INT
format_tokent::length_modifierst::LEN_t
@ LEN_t
format_tokent::flag_typet::ZERO_PAD
@ ZERO_PAD
nil_exprt
The NIL expression.
Definition: std_expr.h:3025
wchar_t_type
bitvector_typet wchar_t_type()
Definition: c_types.cpp:159
signed_short_int_type
signedbv_typet signed_short_int_type()
Definition: c_types.cpp:47
float_type
floatbv_typet float_type()
Definition: c_types.cpp:195
format_tokent::flags
std::list< flag_typet > flags
Definition: format_strings.h:82
format_tokent::flag_typet::LEFT_ADJUST
@ LEFT_ADJUST
format_tokent::representationt::UNSIGNED_HEX
@ UNSIGNED_HEX
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:253
unsigned_int_type
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:54
format_tokent::length_modifierst::LEN_L
@ LEN_L
format_tokent::length_modifierst::LEN_j
@ LEN_j
format_tokent::flag_typet::ALTERNATE
@ ALTERNATE
optionalt
nonstd::optional< T > optionalt
Definition: optional.h:35
double_type
floatbv_typet double_type()
Definition: c_types.cpp:203
char_type
bitvector_typet char_type()
Definition: c_types.cpp:124
parse_precision
void parse_precision(std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:63
format_tokent::flag_typet::SIGN
@ SIGN
format_tokent::token_typet::CHAR
@ CHAR
array_typet
Arrays with given size.
Definition: std_types.h:762
format_tokent::token_typet::UNKNOWN
@ UNKNOWN
format_tokent::length_modifierst::LEN_l
@ LEN_l
format_tokent::type
token_typet type
Definition: format_strings.h:81
format_tokent::representation
representationt representation
Definition: format_strings.h:86
get_type
optionalt< typet > get_type(const format_tokent &token)
Definition: format_strings.cpp:228
signed_long_int_type
signedbv_typet signed_long_int_type()
Definition: c_types.cpp:90
format_tokent
Definition: format_strings.h:24
format_tokent::token_typet::FLOAT
@ FLOAT
format_tokent::token_typet::TEXT
@ TEXT
format_tokent::flag_typet::SIGNED_SPACE
@ SIGNED_SPACE
std_expr.h
c_types.h
format_tokent::flag_typet::ASTERISK
@ ASTERISK
validation_modet::INVARIANT
@ INVARIANT
format_tokent::length_modifierst::LEN_ll
@ LEN_ll