CBMC
c_wrangler.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: C Wrangler
4 
5 Author: Daniel Kroening, dkr@amazon.com
6 
7 \*******************************************************************/
8 
11 
12 #include "c_wrangler.h"
13 
14 #include "c_defines.h"
15 #include "ctokenit.h"
16 #include "mini_c_parser.h"
17 
18 #include <util/cprover_prefix.h>
19 #include <util/exception_utils.h>
20 #include <util/file_util.h>
21 #include <util/json.h>
22 #include <util/optional.h>
23 #include <util/prefix.h>
24 #include <util/run.h>
25 #include <util/string_utils.h>
26 #include <util/suffix.h>
27 #include <util/tempdir.h>
28 
29 #include <fstream>
30 #include <iostream>
31 #include <map>
32 #include <regex>
33 #include <sstream>
34 #include <unordered_map>
35 
37 {
38  // sources and preprocessing
39  std::vector<std::string> source_files;
40  std::vector<std::string> includes;
41  std::vector<std::string> defines;
42 
43  // transformations
45  {
46  std::string clause;
47  std::string content;
48  contract_clauset(std::string _clause, std::string _content)
49  : clause(std::move(_clause)), content(std::move(_content))
50  {
51  }
52  };
53 
55  {
56  std::string loop_type;
57  std::string identifier;
58  std::string content;
60  std::string _loop_type,
61  std::string _identifier,
62  std::string _content)
63  : loop_type(std::move(_loop_type)),
64  identifier(std::move(_identifier)),
65  content(std::move(_content))
66  {
67  }
68  };
69 
70  struct assertiont
71  {
72  std::string identifier;
73  std::string content;
74  assertiont(std::string _identifier, std::string _content)
75  : identifier(std::move(_identifier)), content(std::move(_content))
76  {
77  }
78  };
79 
80  struct functiont
81  {
82  // should be variant to preserve ordering
83  std::vector<contract_clauset> contract;
84  std::vector<loop_invariantt> loop_invariants;
85  std::vector<assertiont> assertions;
87  bool remove_static = false;
88  };
89 
90  using functionst = std::list<std::pair<std::regex, functiont>>;
92 
93  struct objectt
94  {
95  bool remove_static = false;
96  };
97 
98  using objectst = std::list<std::pair<std::regex, objectt>>;
100 
101  // output
102  std::string output;
103 
104  void configure_sources(const jsont &);
105  void configure_functions(const jsont &);
106  void configure_objects(const jsont &);
107  void configure_output(const jsont &);
108 };
109 
111 {
112  auto sources = config["sources"];
113 
114  if(!sources.is_null())
115  {
116  if(!sources.is_array())
117  throw deserialization_exceptiont("sources entry must be sequence");
118 
119  for(const auto &source : to_json_array(sources))
120  {
121  if(!source.is_string())
122  throw deserialization_exceptiont("source must be string");
123 
124  this->source_files.push_back(source.value);
125  }
126  }
127 
128  auto includes = config["includes"];
129 
130  if(!includes.is_null())
131  {
132  if(!includes.is_array())
133  throw deserialization_exceptiont("includes entry must be sequence");
134 
135  for(const auto &include : to_json_array(includes))
136  {
137  if(!include.is_string())
138  throw deserialization_exceptiont("include must be string");
139 
140  this->includes.push_back(include.value);
141  }
142  }
143 
144  auto defines = config["defines"];
145 
146  if(!defines.is_null())
147  {
148  if(!defines.is_array())
149  throw deserialization_exceptiont("defines entry must be sequence");
150 
151  for(const auto &define : to_json_array(defines))
152  {
153  if(!define.is_string())
154  throw deserialization_exceptiont("define must be string");
155 
156  this->defines.push_back(define.value);
157  }
158  }
159 }
160 
162 {
163  auto functions = config["functions"];
164 
165  if(functions.is_null())
166  return;
167 
168  if(!functions.is_array())
169  throw deserialization_exceptiont("functions entry must be sequence");
170 
171  for(const auto &function : to_json_array(functions))
172  {
173  if(!function.is_object())
174  throw deserialization_exceptiont("function entry must be object");
175 
176  for(const auto &function_entry : to_json_object(function))
177  {
178  const auto function_name = function_entry.first;
179  const auto &items = function_entry.second;
180 
181  if(!items.is_array())
182  throw deserialization_exceptiont("function entry must be sequence");
183 
184  this->functions.emplace_back(function_name, functiont{});
185  functiont &function_config = this->functions.back().second;
186 
187  for(const auto &function_item : to_json_array(items))
188  {
189  // These need to start with "ensures", "requires", "assigns",
190  // "invariant", "assert", "stub", "remove"
191  if(!function_item.is_string())
192  throw deserialization_exceptiont("function entry must be string");
193 
194  auto item_string = function_item.value;
195  auto split = split_string(item_string, ' ');
196  if(split.empty())
197  continue;
198 
199  if(
200  split[0] == "ensures" || split[0] == "requires" ||
201  split[0] == "assigns")
202  {
203  std::ostringstream rest;
204  join_strings(rest, split.begin() + 1, split.end(), ' ');
205 
206  function_config.contract.emplace_back(split[0], rest.str());
207  }
208  else if(split[0] == "assert" && split.size() >= 3)
209  {
210  std::ostringstream rest;
211  join_strings(rest, split.begin() + 2, split.end(), ' ');
212 
213  function_config.assertions.emplace_back(split[1], rest.str());
214  }
215  else if(
216  (split[0] == "for" || split[0] == "while" || split[0] == "loop") &&
217  split.size() >= 3 && split[2] == "invariant")
218  {
219  std::ostringstream rest;
220  join_strings(rest, split.begin() + 3, split.end(), ' ');
221 
222  function_config.loop_invariants.emplace_back(
223  split[0], split[1], rest.str());
224  }
225  else if(split[0] == "stub")
226  {
227  std::ostringstream rest;
228  join_strings(rest, split.begin() + 1, split.end(), ' ');
229 
230  function_config.stub = rest.str();
231  }
232  else if(split[0] == "remove")
233  {
234  if(split.size() == 1)
235  throw deserialization_exceptiont("unexpected remove entry");
236 
237  if(split[1] == "static")
238  function_config.remove_static = true;
239  else
241  "unexpected remove entry " + split[1]);
242  }
243  else
245  "unexpected function entry " + split[0]);
246  }
247  }
248  }
249 }
250 
252 {
253  auto objects = config["objects"];
254 
255  if(objects.is_null())
256  return;
257 
258  if(!objects.is_array())
259  throw deserialization_exceptiont("objects entry must be sequence");
260 
261  for(const auto &object : to_json_array(objects))
262  {
263  if(!object.is_object())
264  throw deserialization_exceptiont("object entry must be object");
265 
266  for(const auto &object_entry : to_json_object(object))
267  {
268  const auto &object_name = object_entry.first;
269  const auto &items = object_entry.second;
270 
271  if(!items.is_array())
272  throw deserialization_exceptiont("object entry must be sequence");
273 
274  this->objects.emplace_back(object_name, objectt{});
275  objectt &object_config = this->objects.back().second;
276 
277  for(const auto &object_item : to_json_array(items))
278  {
279  // Needs to start with "remove"
280  if(!object_item.is_string())
281  throw deserialization_exceptiont("object entry must be string");
282 
283  auto item_string = object_item.value;
284  auto split = split_string(item_string, ' ');
285  if(split.empty())
286  continue;
287 
288  if(split[0] == "remove")
289  {
290  if(split.size() == 1)
291  throw deserialization_exceptiont("unexpected remove entry");
292 
293  if(split[1] == "static")
294  object_config.remove_static = true;
295  else
297  "unexpected remove entry " + split[1]);
298  }
299  else
301  "unexpected object entry " + split[0]);
302  }
303  }
304  }
305 }
306 
308 {
309  auto output = config["output"];
310 
311  if(output.is_null())
312  return;
313 
314  if(!output.is_string())
315  throw deserialization_exceptiont("output entry must be string");
316 
317  this->output = output.value;
318 }
319 
320 static std::string
321 preprocess(const std::string &source_file, const c_wranglert &c_wrangler)
322 {
323  std::vector<std::string> argv = {"cc", "-E", source_file};
324 
325  for(const auto &include : c_wrangler.includes)
326  {
327  argv.push_back("-I");
328  argv.push_back(include);
329  }
330 
331  for(const auto &define : c_wrangler.defines)
332  argv.push_back(std::string("-D") + define);
333 
334  std::ostringstream result;
335 
336  auto run_result = run("cc", argv, "", result, "");
337  if(run_result != 0)
338  throw system_exceptiont("preprocessing " + source_file + " has failed");
339 
340  return result.str();
341 }
342 
343 static c_definest
344 get_defines(const std::string &source_file, const c_wranglert &config)
345 {
346  std::vector<std::string> argv = {"cc", "-E", "-dM", source_file};
347 
348  for(const auto &include : config.includes)
349  {
350  argv.push_back("-I");
351  argv.push_back(include);
352  }
353 
354  std::ostringstream result;
355 
356  auto run_result = run("cc", argv, "", result, "");
357  if(run_result != 0)
358  throw system_exceptiont("preprocessing " + source_file + " has failed");
359 
360  c_definest defines;
361  defines.parse(result.str());
362  return defines;
363 }
364 
365 static void mangle_function(
366  const c_declarationt &declaration,
367  const c_definest &defines,
368  const c_wranglert::functiont &function_config,
369  std::ostream &out)
370 {
371  if(function_config.stub.has_value() && declaration.has_body())
372  {
373  // replace by stub
374  out << function_config.stub.value();
375  }
376  else
377  {
378  if(function_config.remove_static)
379  {
380  for(auto &t : declaration.pre_declarator)
381  {
382  if(t.text == "static")
383  {
384  // we replace by white space
385  out << std::string(6, ' ');
386  }
387  else
388  out << t.text;
389  }
390  }
391  else
392  {
393  for(auto &t : declaration.pre_declarator)
394  out << t.text;
395  }
396 
397  for(auto &t : declaration.declarator)
398  out << t.text;
399  for(auto &t : declaration.post_declarator)
400  out << t.text;
401 
402  if(!declaration.has_body())
403  {
404  for(auto &t : declaration.initializer)
405  out << t.text;
406  return;
407  }
408 
409  for(const auto &entry : function_config.contract)
410  out << ' ' << CPROVER_PREFIX << entry.clause << '('
411  << defines(entry.content) << ')';
412 
413  std::map<std::string, std::string> loop_invariants;
414 
415  for(const auto &entry : function_config.loop_invariants)
416  loop_invariants[entry.loop_type + entry.identifier] = entry.content;
417 
418  if(loop_invariants.empty())
419  {
420  for(auto &t : declaration.initializer)
421  out << t.text;
422  }
423  else
424  {
425  std::size_t for_count = 0, while_count = 0;
426  ctokenitt t(declaration.initializer);
427 
428  while(t)
429  {
430  const auto &token = *(t++);
431  out << token.text;
432 
433  if(token == "while")
434  {
435  while_count++;
436  const auto &invariant =
437  loop_invariants.count("while" + std::to_string(while_count))
438  ? loop_invariants["while" + std::to_string(while_count)]
439  : loop_invariants
440  ["loop" + std::to_string(while_count + for_count)];
441 
442  if(!invariant.empty())
443  {
444  auto t_end = match_bracket(t, '(', ')');
445  for(; t != t_end; t++)
446  out << t->text;
447  out << ' ' << CPROVER_PREFIX << "loop_invariant("
448  << defines(invariant) << ')';
449  }
450  }
451  else if(token == "for")
452  {
453  for_count++;
454  const auto &invariant =
455  loop_invariants.count("for" + std::to_string(for_count))
456  ? loop_invariants["for" + std::to_string(for_count)]
457  : loop_invariants
458  ["loop" + std::to_string(while_count + for_count)];
459 
460  if(!invariant.empty())
461  {
462  auto t_end = match_bracket(t, '(', ')');
463  for(; t != t_end; t++)
464  out << t->text;
465  out << ' ' << CPROVER_PREFIX << "loop_invariant("
466  << defines(invariant) << ')';
467  }
468  }
469  }
470  }
471  }
472 }
473 
474 static void mangle_object(
475  const c_declarationt &declaration,
476  const c_definest &defines,
477  const c_wranglert::objectt &object_config,
478  std::ostream &out)
479 {
480  if(object_config.remove_static)
481  {
482  for(auto &t : declaration.pre_declarator)
483  {
484  if(t.text == "static")
485  {
486  // we replace by white space
487  out << std::string(6, ' ');
488  }
489  else
490  out << t.text;
491  }
492  }
493  else
494  {
495  for(auto &t : declaration.pre_declarator)
496  out << t.text;
497  }
498 
499  for(auto &t : declaration.declarator)
500  out << t.text;
501  for(auto &t : declaration.post_declarator)
502  out << t.text;
503  for(auto &t : declaration.initializer)
504  out << t.text;
505 }
506 
507 static void mangle(
508  const c_declarationt &declaration,
509  const c_definest &defines,
510  const c_wranglert &config,
511  std::ostream &out)
512 {
513  auto name_opt = declaration.declared_identifier();
514  if(declaration.is_function() && name_opt.has_value())
515  {
516  for(const auto &entry : config.functions)
517  {
518  if(std::regex_match(name_opt->text, entry.first))
519  {
520  // we are to modify this function
521  mangle_function(declaration, defines, entry.second, out);
522 
523  return;
524  }
525  }
526  }
527  else if(!declaration.is_function() && name_opt.has_value())
528  {
529  for(const auto &entry : config.objects)
530  {
531  if(std::regex_match(name_opt->text, entry.first))
532  {
533  // we are to modify this function
534  mangle_object(declaration, defines, entry.second, out);
535 
536  return;
537  }
538  }
539  }
540 
541  // output
542  out << declaration;
543 }
544 
545 static std::string mangle(
546  const std::string &in,
547  const c_definest &defines,
548  const c_wranglert &config)
549 {
550  std::ostringstream out;
551  std::istringstream in_str(in);
552 
553  auto parsed = parse_c(in_str);
554 
555  for(const auto &declaration : parsed)
556  mangle(declaration, defines, config, out);
557 
558  return out.str();
559 }
560 
561 void c_wrangler(const jsont &config)
562 {
564 
565  c_wrangler.configure_sources(config);
566  c_wrangler.configure_functions(config);
567  c_wrangler.configure_objects(config);
568  c_wrangler.configure_output(config);
569 
570  for(auto &source_file : c_wrangler.source_files)
571  {
572  // first preprocess
573  auto preprocessed = preprocess(source_file, c_wrangler);
574 
575  // get the defines
576  auto defines = get_defines(source_file, c_wrangler);
577 
578  // now mangle
579  auto mangled = mangle(preprocessed, defines, c_wrangler);
580 
581  // now output
582  if(c_wrangler.output == "stdout" || c_wrangler.output.empty())
583  {
584  std::cout << mangled;
585  }
586  else
587  {
588  std::ofstream out(c_wrangler.output);
589  out << mangled;
590  }
591  }
592 }
c_wranglert::contract_clauset::contract_clauset
contract_clauset(std::string _clause, std::string _content)
Definition: c_wrangler.cpp:48
exception_utils.h
c_wranglert::configure_functions
void configure_functions(const jsont &)
Definition: c_wrangler.cpp:161
c_wranglert::contract_clauset
Definition: c_wrangler.cpp:44
c_wranglert
Definition: c_wrangler.cpp:36
c_wranglert::loop_invariantt::loop_type
std::string loop_type
Definition: c_wrangler.cpp:56
c_wranglert::output
std::string output
Definition: c_wrangler.cpp:102
c_wranglert::assertiont
Definition: c_wrangler.cpp:70
mangle_function
static void mangle_function(const c_declarationt &declaration, const c_definest &defines, const c_wranglert::functiont &function_config, std::ostream &out)
Definition: c_wrangler.cpp:365
c_wranglert::includes
std::vector< std::string > includes
Definition: c_wrangler.cpp:40
c_wranglert::functiont
Definition: c_wrangler.cpp:80
file_util.h
string_utils.h
c_wranglert::functiont::contract
std::vector< contract_clauset > contract
Definition: c_wrangler.cpp:83
optional.h
c_wranglert::objects
objectst objects
Definition: c_wrangler.cpp:99
ctokenitt
Definition: ctokenit.h:17
deserialization_exceptiont
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
Definition: exception_utils.h:79
prefix.h
ctokenit.h
run
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
c_wranglert::configure_objects
void configure_objects(const jsont &)
Definition: c_wrangler.cpp:251
to_string
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: string_constraint.cpp:58
run.h
c_defines.h
to_json_array
json_arrayt & to_json_array(jsont &json)
Definition: json.h:426
tempdir.h
jsont
Definition: json.h:26
c_wrangler
void c_wrangler(const jsont &config)
Definition: c_wrangler.cpp:561
c_wranglert::configure_output
void configure_output(const jsont &)
Definition: c_wrangler.cpp:307
c_wranglert::functionst
std::list< std::pair< std::regex, functiont > > functionst
Definition: c_wrangler.cpp:90
c_declarationt::is_function
bool is_function() const
Definition: mini_c_parser.cpp:107
c_declarationt::initializer
tokenst initializer
Definition: mini_c_parser.h:30
c_wranglert::source_files
std::vector< std::string > source_files
Definition: c_wrangler.cpp:39
ctokent::text
std::string text
Definition: ctoken.h:40
c_declarationt::declarator
tokenst declarator
Definition: mini_c_parser.h:28
split_string
void split_string(const std::string &s, char delim, std::vector< std::string > &result, bool strip, bool remove_empty)
Definition: string_utils.cpp:39
c_wranglert::loop_invariantt::content
std::string content
Definition: c_wrangler.cpp:58
c_definest::parse
void parse(const std::string &)
Definition: c_defines.cpp:21
c_wranglert::assertiont::identifier
std::string identifier
Definition: c_wrangler.cpp:72
c_wranglert::objectst
std::list< std::pair< std::regex, objectt > > objectst
Definition: c_wrangler.cpp:98
c_wrangler.h
c_declarationt::has_body
bool has_body() const
Definition: mini_c_parser.cpp:112
join_strings
Stream & join_strings(Stream &&os, const It b, const It e, const Delimiter &delimiter, TransformFunc &&transform_func)
Prints items to an stream, separated by a constant delimiter.
Definition: string_utils.h:62
parse_c
c_translation_unitt parse_c(std::istream &in)
Definition: mini_c_parser.cpp:392
match_bracket
ctokenitt match_bracket(ctokenitt t, char open, char close)
Definition: ctokenit.cpp:33
c_wranglert::loop_invariantt::loop_invariantt
loop_invariantt(std::string _loop_type, std::string _identifier, std::string _content)
Definition: c_wrangler.cpp:59
c_declarationt::pre_declarator
tokenst pre_declarator
Definition: mini_c_parser.h:27
system_exceptiont
Thrown when some external system fails unexpectedly.
Definition: exception_utils.h:71
c_wranglert::contract_clauset::content
std::string content
Definition: c_wrangler.cpp:47
mangle
static void mangle(const c_declarationt &declaration, const c_definest &defines, const c_wranglert &config, std::ostream &out)
Definition: c_wrangler.cpp:507
cprover_prefix.h
c_wranglert::functiont::stub
optionalt< std::string > stub
Definition: c_wrangler.cpp:86
optionalt
nonstd::optional< T > optionalt
Definition: optional.h:35
c_wranglert::functions
functionst functions
Definition: c_wrangler.cpp:91
c_wranglert::loop_invariantt::identifier
std::string identifier
Definition: c_wrangler.cpp:57
config
configt config
Definition: config.cpp:25
c_wranglert::contract_clauset::clause
std::string clause
Definition: c_wrangler.cpp:46
c_wranglert::defines
std::vector< std::string > defines
Definition: c_wrangler.cpp:41
c_wranglert::objectt
Definition: c_wrangler.cpp:93
c_wranglert::functiont::loop_invariants
std::vector< loop_invariantt > loop_invariants
Definition: c_wrangler.cpp:84
c_wranglert::objectt::remove_static
bool remove_static
Definition: c_wrangler.cpp:95
c_wranglert::assertiont::assertiont
assertiont(std::string _identifier, std::string _content)
Definition: c_wrangler.cpp:74
suffix.h
to_json_object
json_objectt & to_json_object(jsont &json)
Definition: json.h:444
CPROVER_PREFIX
#define CPROVER_PREFIX
Definition: cprover_prefix.h:14
json.h
c_wranglert::functiont::remove_static
bool remove_static
Definition: c_wrangler.cpp:87
c_definest
This class maintains a representation of one assignment to the preprocessor macros in a C program.
Definition: c_defines.h:23
preprocess
static std::string preprocess(const std::string &source_file, const c_wranglert &c_wrangler)
Definition: c_wrangler.cpp:321
get_defines
static c_definest get_defines(const std::string &source_file, const c_wranglert &config)
Definition: c_wrangler.cpp:344
c_wranglert::loop_invariantt
Definition: c_wrangler.cpp:54
c_declarationt::post_declarator
tokenst post_declarator
Definition: mini_c_parser.h:29
c_declarationt
Definition: mini_c_parser.h:22
mini_c_parser.h
c_wranglert::assertiont::content
std::string content
Definition: c_wrangler.cpp:73
c_declarationt::declared_identifier
optionalt< ctokent > declared_identifier() const
Definition: mini_c_parser.cpp:117
c_wranglert::functiont::assertions
std::vector< assertiont > assertions
Definition: c_wrangler.cpp:85
mangle_object
static void mangle_object(const c_declarationt &declaration, const c_definest &defines, const c_wranglert::objectt &object_config, std::ostream &out)
Definition: c_wrangler.cpp:474
c_wranglert::configure_sources
void configure_sources(const jsont &)
Definition: c_wrangler.cpp:110