CBMC
type2name.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Type Naming for C
4 
5 Author: Daniel Kroening, kroening@cs.cmu.edu
6 
7 \*******************************************************************/
8 
11 
12 #include "type2name.h"
13 
14 #include <util/arith_tools.h>
15 #include <util/c_types.h>
16 #include <util/invariant.h>
17 #include <util/namespace.h>
19 #include <util/std_expr.h>
20 #include <util/symbol.h>
21 
22 #include <regex>
23 
24 typedef std::unordered_map<irep_idt, std::pair<size_t, bool>> symbol_numbert;
25 
26 static std::string type2name(
27  const typet &type,
28  const namespacet &ns,
29  symbol_numbert &symbol_number);
30 
31 static std::string type2name_tag(
32  const tag_typet &type,
33  const namespacet &ns,
34  symbol_numbert &symbol_number)
35 {
36  const irep_idt &identifier = type.get_identifier();
37 
38  const symbolt *symbol;
39 
40  if(ns.lookup(identifier, symbol))
41  return "SYM#"+id2string(identifier)+"#";
42 
43  assert(symbol && symbol->is_type);
44 
45  if(symbol->type.id()!=ID_struct &&
46  symbol->type.id()!=ID_union)
47  return type2name(symbol->type, ns, symbol_number);
48 
49  // assign each symbol a number when seen for the first time
50  std::pair<symbol_numbert::iterator, bool> entry=
51  symbol_number.insert(std::make_pair(
52  identifier,
53  std::make_pair(symbol_number.size(), true)));
54 
55  std::string result = "SYM" +
57  '#' + std::to_string(entry.first->second.first);
58 
59  // new entry, add definition
60  if(entry.second)
61  {
62  result+="={";
63  result+=type2name(symbol->type, ns, symbol_number);
64  result+='}';
65 
66  entry.first->second.second=false;
67  }
68 
69  return result;
70 }
71 
72 static std::string pointer_offset_bits_as_string(
73  const typet &type,
74  const namespacet &ns)
75 {
76  auto bits = pointer_offset_bits(type, ns);
77  CHECK_RETURN(bits.has_value());
78  return integer2string(*bits);
79 }
80 
81 static std::string type2name(
82  const typet &type,
83  const namespacet &ns,
84  symbol_numbert &symbol_number)
85 {
86  std::string result;
87 
88  // qualifiers first
89  if(type.get_bool(ID_C_constant))
90  result+='c';
91 
92  if(type.get_bool(ID_C_restricted))
93  result+='r';
94 
95  if(type.get_bool(ID_C_volatile))
96  result+='v';
97 
98  if(type.get_bool(ID_C_transparent_union))
99  result+='t';
100 
101  if(type.get_bool(ID_C_noreturn))
102  result+='n';
103 
104  // this isn't really a qualifier, but the linker needs to
105  // distinguish these - should likely be fixed in the linker instead
106  if(!type.source_location().get_function().empty())
107  result+='l';
108 
109  if(type.id().empty())
110  throw "empty type encountered";
111  else if(type.id()==ID_empty)
112  result+='V';
113  else if(type.id()==ID_signedbv)
114  result+="S" + pointer_offset_bits_as_string(type, ns);
115  else if(type.id()==ID_unsignedbv)
116  result+="U" + pointer_offset_bits_as_string(type, ns);
117  else if(type.id()==ID_bool ||
118  type.id()==ID_c_bool)
119  result+='B';
120  else if(type.id()==ID_integer)
121  result+='I';
122  else if(type.id()==ID_real)
123  result+='R';
124  else if(type.id()==ID_complex)
125  result+='C';
126  else if(type.id()==ID_floatbv)
127  result+="F" + pointer_offset_bits_as_string(type, ns);
128  else if(type.id()==ID_fixedbv)
129  result+="X" + pointer_offset_bits_as_string(type, ns);
130  else if(type.id()==ID_natural)
131  result+='N';
132  else if(type.id()==ID_pointer)
133  {
134  if(type.get_bool(ID_C_reference))
135  result+='&';
136  else
137  result+='*';
138  }
139  else if(type.id()==ID_code)
140  {
141  const code_typet &t=to_code_type(type);
142  const code_typet::parameterst parameters=t.parameters();
143  result+=type2name(t.return_type(), ns, symbol_number)+"(";
144 
145  for(code_typet::parameterst::const_iterator
146  it=parameters.begin();
147  it!=parameters.end();
148  it++)
149  {
150  if(it!=parameters.begin())
151  result+='|';
152  result+=type2name(it->type(), ns, symbol_number);
153  }
154 
155  if(t.has_ellipsis())
156  {
157  if(!parameters.empty())
158  result+='|';
159  result+="...";
160  }
161 
162  result+=")->";
163  result+=type2name(t.return_type(), ns, symbol_number);
164  }
165  else if(type.id()==ID_array)
166  {
167  const exprt &size = to_array_type(type).size();
168 
169  if(size.id() == ID_symbol)
170  result += "ARR" + id2string(to_symbol_expr(size).get_identifier());
171  else
172  {
173  const auto size_int = numeric_cast<mp_integer>(size);
174 
175  if(!size_int.has_value())
176  result += "ARR?";
177  else
178  result += "ARR" + integer2string(*size_int);
179  }
180  }
181  else if(
182  type.id() == ID_c_enum_tag || type.id() == ID_struct_tag ||
183  type.id() == ID_union_tag)
184  {
185  result += type2name_tag(to_tag_type(type), ns, symbol_number);
186  }
187  else if(type.id()==ID_struct ||
188  type.id()==ID_union)
189  {
190  const auto &struct_union_type = to_struct_union_type(type);
191 
192  if(struct_union_type.is_incomplete())
193  {
194  if(type.id() == ID_struct)
195  result += "ST?";
196  else if(type.id() == ID_union)
197  result += "UN?";
198  }
199  else
200  {
201  if(type.id() == ID_struct)
202  result += "ST";
203  if(type.id() == ID_union)
204  result += "UN";
205  result += '[';
206  bool first = true;
207  for(const auto &c : struct_union_type.components())
208  {
209  if(!first)
210  result += '|';
211  else
212  first = false;
213 
214  result += type2name(c.type(), ns, symbol_number);
215  const irep_idt &component_name = c.get_name();
216  CHECK_RETURN(!component_name.empty());
217  result += "'" + id2string(component_name) + "'";
218  }
219  result += ']';
220  }
221  }
222  else if(type.id()==ID_c_enum)
223  {
224  const c_enum_typet &t=to_c_enum_type(type);
225 
226  if(t.is_incomplete())
227  result += "EN?";
228  else
229  {
230  result += "EN";
231  const c_enum_typet::memberst &members = t.members();
232  result += '[';
233  for(c_enum_typet::memberst::const_iterator it = members.begin();
234  it != members.end();
235  ++it)
236  {
237  if(it != members.begin())
238  result += '|';
239  result += id2string(it->get_value());
240  result += "'" + id2string(it->get_identifier()) + "'";
241  }
242  }
243  }
244  else if(type.id()==ID_c_bit_field)
245  result+="BF"+pointer_offset_bits_as_string(type, ns);
246  else if(type.id()==ID_vector)
247  result+="VEC"+type.get_string(ID_size);
248  else
249  throw "unknown type '"+type.id_string()+"' encountered";
250 
251  if(type.has_subtype())
252  {
253  result+='{';
254  result +=
255  type2name(to_type_with_subtype(type).subtype(), ns, symbol_number);
256  result+='}';
257  }
258 
259  if(type.has_subtypes())
260  {
261  result+='$';
262  for(const typet &subtype : to_type_with_subtypes(type).subtypes())
263  {
264  result += type2name(subtype, ns, symbol_number);
265  result+='|';
266  }
267  result[result.size()-1]='$';
268  }
269 
270  return result;
271 }
272 
273 std::string type2name(const typet &type, const namespacet &ns)
274 {
275  symbol_numbert symbol_number;
276  return type2name(type, ns, symbol_number);
277 }
278 
283 std::string type_name2type_identifier(const std::string &name)
284 {
285  const auto replace_special_characters = [](const std::string &name) {
286  std::string result{};
287  for(char c : name)
288  {
289  switch(c)
290  {
291  case '*':
292  result += "_ptr_";
293  break;
294  case '{':
295  result += "_start_sub_";
296  break;
297  case '}':
298  result += "_end_sub_";
299  break;
300  default:
301  result += c;
302  break;
303  }
304  }
305  return result;
306  };
307  const auto replace_invalid_characters_with_underscore =
308  [](const std::string &identifier) {
309  static const std::regex non_alpha_numeric{"[^A-Za-z0-9\x80-\xff]+"};
310  return std::regex_replace(identifier, non_alpha_numeric, "_");
311  };
312  const auto strip_leading_digits = [](const std::string &identifier) {
313  static const std::regex identifier_regex{
314  "[A-Za-z\x80-\xff_][A-Za-z0-9_\x80-\xff]*"};
315  std::smatch match_results;
316  bool found = std::regex_search(identifier, match_results, identifier_regex);
317  POSTCONDITION(found);
318  return match_results.str(0);
319  };
320  return strip_leading_digits(replace_invalid_characters_with_underscore(
321  replace_special_characters(name)));
322 }
323 
324 std::string type_to_partial_identifier(const typet &type, const namespacet &ns)
325 {
326  return type_name2type_identifier(type2name(type, ns));
327 }
tag_typet::get_identifier
const irep_idt & get_identifier() const
Definition: std_types.h:410
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:36
pointer_offset_size.h
source_locationt::get_function
const irep_idt & get_function() const
Definition: source_location.h:55
code_typet::has_ellipsis
bool has_ellipsis() const
Definition: std_types.h:611
symbol_numbert
std::unordered_map< irep_idt, std::pair< size_t, bool > > symbol_numbert
Definition: type2name.cpp:24
c_enum_typet
The type of C enums.
Definition: c_types.h:216
c_enum_typet::is_incomplete
bool is_incomplete() const
enum types may be incomplete
Definition: c_types.h:261
arith_tools.h
to_struct_union_type
const struct_union_typet & to_struct_union_type(const typet &type)
Cast a typet to a struct_union_typet.
Definition: std_types.h:214
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
typet
The type of an expression, extends irept.
Definition: type.h:28
code_typet::parameterst
std::vector< parametert > parameterst
Definition: std_types.h:541
typet::has_subtype
bool has_subtype() const
Definition: type.h:82
symbolt::type
typet type
Type of symbol.
Definition: symbol.h:31
c_enum_typet::memberst
std::vector< c_enum_membert > memberst
Definition: c_types.h:253
to_c_enum_type
const c_enum_typet & to_c_enum_type(const typet &type)
Cast a typet to a c_enum_typet.
Definition: c_types.h:302
type2name.h
typet::has_subtypes
bool has_subtypes() const
Definition: type.h:79
invariant.h
to_type_with_subtype
const type_with_subtypet & to_type_with_subtype(const typet &type)
Definition: type.h:193
to_type_with_subtypes
const type_with_subtypest & to_type_with_subtypes(const typet &type)
Definition: type.h:237
exprt
Base class for all expressions.
Definition: expr.h:55
to_string
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: string_constraint.cpp:58
namespace.h
type_name2type_identifier
std::string type_name2type_identifier(const std::string &name)
type2name generates strings that aren't valid C identifiers If we want utilities like dump_c to work ...
Definition: type2name.cpp:283
pointer_offset_bits_as_string
static std::string pointer_offset_bits_as_string(const typet &type, const namespacet &ns)
Definition: type2name.cpp:72
array_typet::size
const exprt & size() const
Definition: std_types.h:800
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:90
namespacet::lookup
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:138
struct_union_typet::get_tag
irep_idt get_tag() const
Definition: std_types.h:168
to_code_type
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:744
messaget::result
mstreamt & result() const
Definition: message.h:409
to_tag_type
const tag_typet & to_tag_type(const typet &type)
Cast a typet to a tag_typet.
Definition: std_types.h:434
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
get_identifier
static optionalt< smt_termt > get_identifier(const exprt &expr, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_handle_identifiers, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_identifiers)
Definition: smt2_incremental_decision_procedure.cpp:328
type2name
static std::string type2name(const typet &type, const namespacet &ns, symbol_numbert &symbol_number)
Definition: type2name.cpp:81
typet::source_location
const source_locationt & source_location() const
Definition: type.h:90
pointer_offset_bits
optionalt< mp_integer > pointer_offset_bits(const typet &type, const namespacet &ns)
Definition: pointer_offset_size.cpp:101
irept::id_string
const std::string & id_string() const
Definition: irep.h:399
irept::get_string
const std::string & get_string(const irep_idt &name) const
Definition: irep.h:409
to_symbol_expr
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:222
code_typet
Base type of functions.
Definition: std_types.h:538
symbol.h
Symbol table entry.
irept::id
const irep_idt & id() const
Definition: irep.h:396
dstringt::empty
bool empty() const
Definition: dstring.h:88
tag_typet
A tag-based type, i.e., typet with an identifier.
Definition: std_types.h:395
code_typet::parameters
const parameterst & parameters() const
Definition: std_types.h:655
type2name_tag
static std::string type2name_tag(const tag_typet &type, const namespacet &ns, symbol_numbert &symbol_number)
Definition: type2name.cpp:31
POSTCONDITION
#define POSTCONDITION(CONDITION)
Definition: invariant.h:479
symbolt
Symbol table entry.
Definition: symbol.h:27
symbolt::is_type
bool is_type
Definition: symbol.h:61
to_array_type
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:844
type_to_partial_identifier
std::string type_to_partial_identifier(const typet &type, const namespacet &ns)
Constructs a string describing the given type, which can be used as part of a C identifier.
Definition: type2name.cpp:324
code_typet::return_type
const typet & return_type() const
Definition: std_types.h:645
std_expr.h
c_types.h
irept::get_bool
bool get_bool(const irep_idt &name) const
Definition: irep.cpp:58
c_enum_typet::members
const memberst & members() const
Definition: c_types.h:255
integer2string
const std::string integer2string(const mp_integer &n, unsigned base)
Definition: mp_arith.cpp:103