CBMC
linker_script_merge.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Linker Script Merging
4 
5 Author: Kareem Khazem <karkhaz@karkhaz.com>, 2017
6 
7 \*******************************************************************/
8 
9 #include "linker_script_merge.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <iterator>
14 
15 #include <util/arith_tools.h>
16 #include <util/c_types.h>
17 #include <util/cmdline.h>
18 #include <util/expr_initializer.h>
19 #include <util/magic.h>
20 #include <util/pointer_expr.h>
21 #include <util/run.h>
22 #include <util/tempfile.h>
23 
24 #include <json/json_parser.h>
25 
27 
31 
32 #include "compile.h"
33 
35 {
36  if(!cmdline.isset('T'))
37  return 0;
38 
39  auto original_goto_model =
41 
42  if(!original_goto_model.has_value())
43  {
44  log.error() << "Unable to read goto binary for linker script merging"
45  << messaget::eom;
46  return 1;
47  }
48 
49  temporary_filet linker_def_outfile("goto-cc-linker-info", ".json");
50  std::list<irep_idt> linker_defined_symbols;
51  int fail = get_linker_script_data(
52  linker_defined_symbols,
53  original_goto_model->symbol_table,
54  elf_binary,
55  linker_def_outfile());
56  // ignore linker script parsing failures until the code is tested more widely
57  if(fail!=0)
58  return 0;
59 
60  jsont data;
61  fail = parse_json(linker_def_outfile(), log.get_message_handler(), data);
62  if(fail!=0)
63  {
64  log.error() << "Problem parsing linker script JSON data" << messaget::eom;
65  return fail;
66  }
67 
68  fail=linker_data_is_malformed(data);
69  if(fail!=0)
70  {
71  log.error() << "Malformed linker-script JSON document" << messaget::eom;
72  data.output(log.error());
73  return fail;
74  }
75 
76  linker_valuest linker_values;
77  fail = ls_data2instructions(
78  data,
79  cmdline.get_value('T'),
80  original_goto_model->symbol_table,
81  linker_values);
82  if(fail!=0)
83  {
84  log.error() << "Could not add linkerscript defs to symbol table"
85  << messaget::eom;
86  return fail;
87  }
88  if(
89  original_goto_model->goto_functions.function_map.erase(
90  INITIALIZE_FUNCTION) != 0)
91  {
93  original_goto_model->symbol_table,
94  original_goto_model->symbol_table.lookup_ref(INITIALIZE_FUNCTION)
95  .location);
98  original_goto_model->symbol_table,
99  original_goto_model->goto_functions,
101  original_goto_model->goto_functions.update();
102  }
103 
104  fail=goto_and_object_mismatch(linker_defined_symbols, linker_values);
105  if(fail!=0)
106  return fail;
107 
108  // The keys of linker_values are exactly the elements of
109  // linker_defined_symbols, so iterate over linker_values from now on.
110 
111  fail = pointerize_linker_defined_symbols(*original_goto_model, linker_values);
112 
113  if(fail!=0)
114  {
115  log.error() << "Could not pointerize all linker-defined expressions"
116  << messaget::eom;
117  return fail;
118  }
119 
121  goto_binary,
122  *original_goto_model,
123  cmdline.isset("validate-goto-model"),
125 
126  if(fail!=0)
127  {
128  log.error() << "Could not write linkerscript-augmented binary"
129  << messaget::eom;
130  }
131 
132  return fail;
133 }
134 
136  const std::string &elf_binary,
137  const std::string &goto_binary,
138  const cmdlinet &cmdline,
139  message_handlert &message_handler)
140  : elf_binary(elf_binary),
141  goto_binary(goto_binary),
142  cmdline(cmdline),
143  log(message_handler),
144  replacement_predicates(
146  "address of array's first member",
147  [](const exprt &expr) -> const symbol_exprt & {
148  return to_symbol_expr(
149  to_index_expr(to_address_of_expr(expr).object()).index());
150  },
151  [](const exprt &expr) {
152  return expr.id() == ID_address_of &&
153  expr.type().id() == ID_pointer &&
154 
155  to_address_of_expr(expr).object().id() == ID_index &&
156  to_address_of_expr(expr).object().type().id() ==
157  ID_unsignedbv &&
158 
159  to_index_expr(to_address_of_expr(expr).object())
160  .array()
161  .id() == ID_symbol &&
162  to_index_expr(to_address_of_expr(expr).object())
163  .array()
164  .type()
165  .id() == ID_array &&
166 
167  to_index_expr(to_address_of_expr(expr).object())
168  .index()
169  .id() == ID_constant &&
170  to_index_expr(to_address_of_expr(expr).object())
171  .index()
172  .type()
173  .id() == ID_signedbv;
174  }),
176  "address of array",
177  [](const exprt &expr) -> const symbol_exprt & {
178  return to_symbol_expr(to_address_of_expr(expr).object());
179  },
180  [](const exprt &expr) {
181  return expr.id() == ID_address_of &&
182  expr.type().id() == ID_pointer &&
183 
184  to_address_of_expr(expr).object().id() == ID_symbol &&
185  to_address_of_expr(expr).object().type().id() == ID_array;
186  }),
188  "address of struct",
189  [](const exprt &expr) -> const symbol_exprt & {
190  return to_symbol_expr(to_address_of_expr(expr).object());
191  },
192  [](const exprt &expr) {
193  return expr.id() == ID_address_of &&
194  expr.type().id() == ID_pointer &&
195 
196  to_address_of_expr(expr).object().id() == ID_symbol &&
197  (to_address_of_expr(expr).object().type().id() == ID_struct ||
198  to_address_of_expr(expr).object().type().id() ==
199  ID_struct_tag);
200  }),
202  "array variable",
203  [](const exprt &expr) -> const symbol_exprt & {
204  return to_symbol_expr(expr);
205  },
206  [](const exprt &expr) {
207  return expr.id() == ID_symbol && expr.type().id() == ID_array;
208  }),
210  "pointer (does not need pointerizing)",
211  [](const exprt &expr) -> const symbol_exprt & {
212  return to_symbol_expr(expr);
213  },
214  [](const exprt &expr) {
215  return expr.id() == ID_symbol && expr.type().id() == ID_pointer;
216  })})
217 {}
218 
220  goto_modelt &goto_model,
221  const linker_valuest &linker_values)
222 {
223  const namespacet ns(goto_model.symbol_table);
224 
225  int ret=0;
226 
227  // Next, find all occurrences of linker-defined symbols that are _values_
228  // of some symbol in the symbol table, and pointerize them too
229  for(const auto &pair : goto_model.symbol_table.symbols)
230  {
231  std::list<symbol_exprt> to_pointerize;
232  symbols_to_pointerize(linker_values, pair.second.value, to_pointerize);
233 
234  if(to_pointerize.empty())
235  continue;
236  log.debug() << "Pointerizing the symbol-table value of symbol "
237  << pair.first << messaget::eom;
238  int fail = pointerize_subexprs_of(
239  goto_model.symbol_table.get_writeable_ref(pair.first).value,
240  to_pointerize,
241  linker_values);
242  if(to_pointerize.empty() && fail==0)
243  continue;
244  ret=1;
245  for(const auto &sym : to_pointerize)
246  {
247  log.error() << " Could not pointerize '" << sym.get_identifier()
248  << "' in symbol table entry " << pair.first << ". Pretty:\n"
249  << sym.pretty() << "\n";
250  }
251  log.error() << messaget::eom;
252  }
253 
254  // Finally, pointerize all occurrences of linker-defined symbols in the
255  // goto program
256  for(auto &gf : goto_model.goto_functions.function_map)
257  {
258  goto_programt &program=gf.second.body;
259  for(auto &instruction : program.instructions)
260  {
261  std::list<exprt *> expressions = {&instruction.code_nonconst()};
262  if(instruction.has_condition())
263  expressions.push_back(&instruction.condition_nonconst());
264 
265  for(exprt *insts : expressions)
266  {
267  std::list<symbol_exprt> to_pointerize;
268  symbols_to_pointerize(linker_values, *insts, to_pointerize);
269  if(to_pointerize.empty())
270  continue;
271  log.debug() << "Pointerizing a program expression..." << messaget::eom;
272  int fail = pointerize_subexprs_of(*insts, to_pointerize, linker_values);
273  if(to_pointerize.empty() && fail==0)
274  continue;
275  ret=1;
276  for(const auto &sym : to_pointerize)
277  {
278  log.error() << " Could not pointerize '" << sym.get_identifier()
279  << "' in function " << gf.first << ". Pretty:\n"
280  << sym.pretty() << "\n";
281  log.error().source_location = instruction.source_location();
282  }
283  log.error() << messaget::eom;
284  }
285  }
286  }
287  return ret;
288 }
289 
291  exprt &old_expr,
292  const linker_valuest &linker_values,
293  const symbol_exprt &old_symbol,
294  const irep_idt &ident,
295  const std::string &shape)
296 {
297  auto it=linker_values.find(ident);
298  if(it==linker_values.end())
299  {
300  log.error() << "Could not find a new expression for linker script-defined "
301  << "symbol '" << ident << "'" << messaget::eom;
302  return 1;
303  }
304  symbol_exprt new_expr=it->second.first;
305  new_expr.add_source_location()=old_symbol.source_location();
306  log.debug() << "Pointerizing linker-defined symbol '" << ident
307  << "' of shape <" << shape << ">." << messaget::eom;
308  old_expr=new_expr;
309  return 0;
310 }
311 
313  exprt &expr,
314  std::list<symbol_exprt> &to_pointerize,
315  const linker_valuest &linker_values)
316 {
317  int fail=0, tmp=0;
318  for(auto const &pair : linker_values)
319  for(auto const &pattern : replacement_predicates)
320  {
321  if(!pattern.match(expr))
322  continue;
323  // take a copy, expr will be changed below
324  const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
325  if(pair.first!=inner_symbol.get_identifier())
326  continue;
327  tmp=replace_expr(expr, linker_values, inner_symbol, pair.first,
328  pattern.description());
329  fail=tmp?tmp:fail;
330  auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
331  inner_symbol);
332  if(result==to_pointerize.end())
333  {
334  fail=1;
335  log.error() << "Too many removals of '" << inner_symbol.get_identifier()
336  << "'" << messaget::eom;
337  }
338  else
339  to_pointerize.erase(result);
340  // If we get here, we've just pointerized this expression. That expression
341  // will be a symbol possibly wrapped in some unary junk, but won't contain
342  // other symbols as subexpressions that might need to be pointerized. So
343  // it's safe to bail out here.
344  return fail;
345  }
346 
347  for(auto &op : expr.operands())
348  {
349  tmp = pointerize_subexprs_of(op, to_pointerize, linker_values);
350  fail=tmp?tmp:fail;
351  }
352  return fail;
353 }
354 
356  const linker_valuest &linker_values,
357  const exprt &expr,
358  std::list<symbol_exprt> &to_pointerize) const
359 {
360  for(const auto &op : expr.operands())
361  {
362  if(op.id()!=ID_symbol)
363  continue;
364  const symbol_exprt &sym_exp=to_symbol_expr(op);
365  const auto &pair=linker_values.find(sym_exp.get_identifier());
366  if(pair!=linker_values.end())
367  to_pointerize.push_back(sym_exp);
368  }
369  for(const auto &op : expr.operands())
370  symbols_to_pointerize(linker_values, op, to_pointerize);
371 }
372 
373 #if 0
374 The current implementation of this function is less precise than the
375  commented-out version below. To understand the difference between these
376  implementations, consider the following example:
377 
378 Suppose we have a section in the linker script, 100 bytes long, where the
379 address of the symbol sec_start is the start of the section (value 4096) and the
380 address of sec_end is the end of that section (value 4196).
381 
382 The current implementation synthesizes the goto-version of the following C:
383 
384  char __sec_array[100];
385  char *sec_start=(&__sec_array[0]);
386  char *sec_end=((&__sec_array[0])+100);
387  // Yes, it is 100 not 99. We're pointing to the end of the memory occupied
388  // by __sec_array, not the last element of __sec_array.
389 
390 This is imprecise for the following reason: the actual address of the array and
391 the pointers shall be some random CBMC-internal address, instead of being 4096
392 and 4196. The linker script, on the other hand, would have specified the exact
393 position of the section, and we even know what the actual values of sec_start
394 and sec_end are from the object file (these values are in the `addresses` list
395 of the `data` argument to this function). If the correctness of the code depends
396 on these actual values, then CBMCs model of the code is too imprecise to verify
397 this.
398 
399 The commented-out version of this function below synthesizes the following:
400 
401  char *sec_start=4096;
402  char *sec_end=4196;
403  __CPROVER_allocated_memory(4096, 100);
404 
405 This code has both the actual addresses of the start and end of the section and
406 tells CBMC that the intermediate region is valid. However, the allocated_memory
407 macro does not currently allocate an actual object at the address 4096, so
408 symbolic execution can fail. In particular, the 'allocated memory' is part of
409 __CPROVER_memory, which does not have a bounded size; this means that (for
410 example) calls to memcpy or memset fail, because the first and third arguments
411 do not have know n size. The commented-out implementation should be reinstated
412 once this limitation of __CPROVER_allocated_memory has been fixed.
413 
414 In either case, no other changes to the rest of the code (outside this function)
415 should be necessary. The rest of this file converts expressions containing the
416 linker-defined symbol into pointer types if they were not already, and this is
417 the right behaviour for both implementations.
418 #endif
420  jsont &data,
421  const std::string &linker_script,
422  symbol_tablet &symbol_table,
423  linker_valuest &linker_values)
424 #if 1
425 {
426  std::map<irep_idt, std::size_t> truncated_symbols;
427  for(auto &d : to_json_array(data["regions"]))
428  {
429  bool has_end=d["has-end-symbol"].is_true();
430 
431  std::ostringstream array_name;
432  array_name << CPROVER_PREFIX << "linkerscript-array_"
433  << d["start-symbol"].value;
434  if(has_end)
435  array_name << "-" << d["end-symbol"].value;
436 
437 
438  // Array symbol_exprt
439  mp_integer array_size = string2integer(d["size"].value);
440  if(array_size > MAX_FLATTENED_ARRAY_SIZE)
441  {
442  log.warning() << "Object section '" << d["section"].value << "' of size "
443  << array_size << " is too large to model. Truncating to "
444  << MAX_FLATTENED_ARRAY_SIZE << " bytes" << messaget::eom;
445  array_size=MAX_FLATTENED_ARRAY_SIZE;
446  if(!has_end)
447  truncated_symbols[d["size-symbol"].value]=MAX_FLATTENED_ARRAY_SIZE;
448  }
449 
450  constant_exprt array_size_expr=from_integer(array_size, size_type());
451  array_typet array_type(char_type(), array_size_expr);
452 
453  source_locationt array_loc;
454  array_loc.set_file(linker_script);
455  std::ostringstream array_comment;
456  array_comment << "Object section '" << d["section"].value << "' of size "
457  << array_size << " bytes";
458  array_loc.set_comment(array_comment.str());
459 
460  namespacet ns(symbol_table);
461  const auto zi = zero_initializer(array_type, array_loc, ns);
462  CHECK_RETURN(zi.has_value());
463 
464  // Add the array to the symbol table.
465  symbolt array_sym;
466  array_sym.is_static_lifetime = true;
467  array_sym.is_lvalue = true;
468  array_sym.is_state_var = true;
469  array_sym.name = array_name.str();
470  array_sym.type = array_type;
471  array_sym.value = *zi;
472  array_sym.location = array_loc;
473 
474  bool failed = symbol_table.add(array_sym);
476 
477  // Array start address
478  index_exprt zero_idx{array_sym.symbol_expr(), from_integer(0, size_type())};
479  address_of_exprt array_start(zero_idx);
480 
481  // Linker-defined symbol_exprt pointing to start address
482  symbolt start_sym;
483  start_sym.is_static_lifetime = true;
484  start_sym.is_lvalue = true;
485  start_sym.is_state_var = true;
486  start_sym.name = d["start-symbol"].value;
487  start_sym.type = pointer_type(char_type());
488  start_sym.value = array_start;
489  linker_values.emplace(
490  d["start-symbol"].value,
491  std::make_pair(start_sym.symbol_expr(), array_start));
492 
493  // Since the value of the pointer will be a random CBMC address, write a
494  // note about the real address in the object file
495  auto it = std::find_if(
496  to_json_array(data["addresses"]).begin(),
497  to_json_array(data["addresses"]).end(),
498  [&d](const jsont &add) {
499  return add["sym"].value == d["start-symbol"].value;
500  });
501  if(it == to_json_array(data["addresses"]).end())
502  {
503  log.error() << "Start: Could not find address corresponding to symbol '"
504  << d["start-symbol"].value << "' (start of section)"
505  << messaget::eom;
506  return 1;
507  }
508  source_locationt start_loc;
509  start_loc.set_file(linker_script);
510  std::ostringstream start_comment;
511  start_comment << "Pointer to beginning of object section '"
512  << d["section"].value << "'. Original address in object file"
513  << " is " << (*it)["val"].value;
514  start_loc.set_comment(start_comment.str());
515  start_sym.location = start_loc;
516 
517  auto start_sym_entry = symbol_table.insert(start_sym);
518  if(!start_sym_entry.second)
519  start_sym_entry.first = start_sym;
520 
521  if(has_end) // Same for pointer to end of array
522  {
523  plus_exprt array_end(array_start, array_size_expr);
524 
525  symbolt end_sym;
526  end_sym.is_static_lifetime = true;
527  end_sym.is_lvalue = true;
528  end_sym.is_state_var = true;
529  end_sym.name = d["end-symbol"].value;
530  end_sym.type = pointer_type(char_type());
531  end_sym.value = array_end;
532  linker_values.emplace(
533  d["end-symbol"].value,
534  std::make_pair(end_sym.symbol_expr(), array_end));
535 
536  auto entry = std::find_if(
537  to_json_array(data["addresses"]).begin(),
538  to_json_array(data["addresses"]).end(),
539  [&d](const jsont &add) {
540  return add["sym"].value == d["end-symbol"].value;
541  });
542  if(entry == to_json_array(data["addresses"]).end())
543  {
544  log.debug() << "Could not find address corresponding to symbol '"
545  << d["end-symbol"].value << "' (end of section)"
546  << messaget::eom;
547  return 1;
548  }
549  source_locationt end_loc;
550  end_loc.set_file(linker_script);
551  std::ostringstream end_comment;
552  end_comment << "Pointer to end of object section '" << d["section"].value
553  << "'. Original address in object file"
554  << " is " << (*entry)["val"].value;
555  end_loc.set_comment(end_comment.str());
556  end_sym.location = end_loc;
557 
558  auto end_sym_entry = symbol_table.insert(end_sym);
559  if(!end_sym_entry.second)
560  end_sym_entry.first = end_sym;
561  }
562  }
563 
564  // We've added every linker-defined symbol that marks the start or end of a
565  // region. But there might be other linker-defined symbols with some random
566  // address. These will have been declared extern too, so we need to give them
567  // a value also. Here, we give them the actual value that they have in the
568  // object file, since we're not assigning any object to them.
569  for(const auto &d : to_json_array(data["addresses"]))
570  {
571  auto it=linker_values.find(irep_idt(d["sym"].value));
572  if(it!=linker_values.end())
573  // sym marks the start or end of a region; already dealt with.
574  continue;
575 
576  // Perhaps this is a size symbol for a section whose size we truncated
577  // earlier.
578  irep_idt symbol_value;
579  const auto &pair=truncated_symbols.find(d["sym"].value);
580  if(pair==truncated_symbols.end())
581  symbol_value=d["val"].value;
582  else
583  {
584  log.debug()
585  << "Truncating the value of symbol " << d["sym"].value << " from "
586  << d["val"].value << " to " << MAX_FLATTENED_ARRAY_SIZE
587  << " because it corresponds to the size of a too-large section."
588  << messaget::eom;
590  }
591 
592  source_locationt loc;
593  loc.set_file(linker_script);
594  loc.set_comment("linker script-defined symbol: char *"+
595  d["sym"].value+"="+"(char *)"+id2string(symbol_value)+"u;");
596 
597  constant_exprt rhs(
599  string2integer(id2string(symbol_value)),
600  unsigned_int_type().get_width()),
602 
603  typecast_exprt rhs_tc(rhs, pointer_type(char_type()));
604 
605  symbolt &symbol = symbol_table.get_writeable_ref(d["sym"].value);
606  symbol.is_extern = false;
607  symbol.is_static_lifetime = true;
608  symbol.location = loc;
609  symbol.type = pointer_type(char_type());
610  symbol.value = rhs_tc;
611 
612  linker_values.emplace(
613  irep_idt(d["sym"].value), std::make_pair(symbol.symbol_expr(), rhs_tc));
614  }
615  return 0;
616 }
617 #else
618 {
619  goto_programt::instructionst initialize_instructions=gp.instructions;
620  for(const auto &d : to_json_array(data["regions"]))
621  {
622  unsigned start=safe_string2unsigned(d["start"].value);
623  unsigned size=safe_string2unsigned(d["size"].value);
624  constant_exprt first=from_integer(start, size_type());
625  constant_exprt second=from_integer(size, size_type());
626  const code_typet void_t({}, empty_typet());
628  symbol_exprt(CPROVER_PREFIX "allocated_memory", void_t), {first, second});
629 
630  source_locationt loc;
631  loc.set_file(linker_script);
632  loc.set_comment("linker script-defined region:\n"+d["commt"].value+":\n"+
633  d["annot"].value);
634  f.add_source_location()=loc;
635 
637  i.make_function_call(f);
638  initialize_instructions.push_front(i);
639  }
640 
641  if(!symbol_table.has_symbol(CPROVER_PREFIX "allocated_memory"))
642  {
643  symbolt sym;
644  sym.name=CPROVER_PREFIX "allocated_memory";
645  sym.pretty_name=CPROVER_PREFIX "allocated_memory";
646  sym.is_lvalue=sym.is_static_lifetime=true;
647  const code_typet void_t({}, empty_typet());
648  sym.type=void_t;
649  symbol_table.add(sym);
650  }
651 
652  for(const auto &d : to_json_array(data["addresses"]))
653  {
654  source_locationt loc;
655  loc.set_file(linker_script);
656  loc.set_comment("linker script-defined symbol: char *"+
657  d["sym"].value+"="+"(char *)"+d["val"].value+"u;");
658 
659  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
660 
661  constant_exprt rhs;
663  string2integer(d["val"].value), unsigned_int_type().get_width()));
664  rhs.type()=unsigned_int_type();
665 
666  exprt rhs_tc =
668 
669  linker_values.emplace(
670  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
671 
672  code_assignt assign(lhs, rhs_tc);
673  assign.add_source_location()=loc;
675  assign_i.make_assignment(assign);
676  initialize_instructions.push_front(assign_i);
677  }
678  return 0;
679 }
680 #endif
681 
683  std::list<irep_idt> &linker_defined_symbols,
684  const symbol_tablet &symbol_table,
685  const std::string &out_file,
686  const std::string &def_out_file)
687 {
688  for(auto const &pair : symbol_table.symbols)
689  {
690  if(
691  pair.second.is_extern && pair.second.value.is_nil() &&
692  pair.second.name != CPROVER_PREFIX "memory")
693  {
694  linker_defined_symbols.push_back(pair.second.name);
695  }
696  }
697 
698  std::ostringstream linker_def_str;
699  std::copy(
700  linker_defined_symbols.begin(),
701  linker_defined_symbols.end(),
702  std::ostream_iterator<irep_idt>(linker_def_str, "\n"));
703  log.debug() << "Linker-defined symbols: [" << linker_def_str.str() << "]\n"
704  << messaget::eom;
705 
706  temporary_filet linker_def_infile("goto-cc-linker-defs", "");
707  std::ofstream linker_def_file(linker_def_infile());
708  linker_def_file << linker_def_str.str();
709  linker_def_file.close();
710 
711  std::vector<std::string> argv=
712  {
713  "ls_parse.py",
714  "--script", cmdline.get_value('T'),
715  "--object", out_file,
716  "--sym-file", linker_def_infile(),
717  "--out-file", def_out_file
718  };
719 
721  argv.push_back("--very-verbose");
723  argv.push_back("--verbose");
724 
725  log.debug() << "RUN:";
726  for(std::size_t i=0; i<argv.size(); i++)
727  log.debug() << " " << argv[i];
728  log.debug() << messaget::eom;
729 
730  int rc = run(argv[0], argv, linker_def_infile(), def_out_file, "");
731  if(rc!=0)
732  log.warning() << "Problem parsing linker script" << messaget::eom;
733 
734  return rc;
735 }
736 
738  const std::list<irep_idt> &linker_defined_symbols,
739  const linker_valuest &linker_values)
740 {
741  int fail=0;
742  for(const auto &sym : linker_defined_symbols)
743  if(linker_values.find(sym)==linker_values.end())
744  {
745  fail=1;
746  log.error() << "Variable '" << sym
747  << "' was declared extern but never given "
748  << "a value, even in a linker script" << messaget::eom;
749  }
750 
751  for(const auto &pair : linker_values)
752  {
753  auto it=std::find(linker_defined_symbols.begin(),
754  linker_defined_symbols.end(), pair.first);
755  if(it==linker_defined_symbols.end())
756  {
757  fail=1;
758  log.error()
759  << "Linker script-defined symbol '" << pair.first << "' was "
760  << "either defined in the C source code, not declared extern in "
761  << "the C source code, or does not appear in the C source code"
762  << messaget::eom;
763  }
764  }
765  return fail;
766 }
767 
769 {
770  if(!data.is_object())
771  return true;
772 
773  const json_objectt &data_object = to_json_object(data);
774  return (
775  !(data_object.find("regions") != data_object.end() &&
776  data_object.find("addresses") != data_object.end() &&
777  data["regions"].is_array() && data["addresses"].is_array() &&
778  std::all_of(
779  to_json_array(data["addresses"]).begin(),
780  to_json_array(data["addresses"]).end(),
781  [](const jsont &j) {
782  if(!j.is_object())
783  return false;
784 
785  const json_objectt &address = to_json_object(j);
786  return address.find("val") != address.end() &&
787  address.find("sym") != address.end() &&
788  address["val"].is_number() && address["sym"].is_string();
789  }) &&
790  std::all_of(
791  to_json_array(data["regions"]).begin(),
792  to_json_array(data["regions"]).end(),
793  [](const jsont &j) {
794  if(!j.is_object())
795  return false;
796 
797  const json_objectt &region = to_json_object(j);
798  return region.find("start") != region.end() &&
799  region.find("size") != region.end() &&
800  region.find("annot") != region.end() &&
801  region.find("commt") != region.end() &&
802  region.find("start-symbol") != region.end() &&
803  region.find("has-end-symbol") != region.end() &&
804  region.find("section") != region.end() &&
805  region["start"].is_number() && region["size"].is_number() &&
806  region["annot"].is_string() &&
807  region["start-symbol"].is_string() &&
808  region["section"].is_string() && region["commt"].is_string() &&
809  ((region["has-end-symbol"].is_true() &&
810  region.find("end-symbol") != region.end() &&
811  region["end-symbol"].is_string()) ||
812  (region["has-end-symbol"].is_false() &&
813  region.find("size-symbol") != region.end() &&
814  region.find("end-symbol") == region.end() &&
815  region["size-symbol"].is_string()));
816  })));
817 }
symbol_table_baset::has_symbol
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
Definition: symbol_table_base.h:87
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:36
array_name
std::string array_name(const namespacet &ns, const exprt &expr)
Definition: array_name.cpp:19
messaget::M_RESULT
@ M_RESULT
Definition: message.h:170
tempfile.h
typecast_exprt::conditional_cast
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2025
symbolt::is_state_var
bool is_state_var
Definition: symbol.h:62
symbol_tablet
The symbol table.
Definition: symbol_table.h:13
mp_integer
BigInt mp_integer
Definition: smt_terms.h:17
source_locationt::set_comment
void set_comment(const irep_idt &comment)
Definition: source_location.h:135
linker_script_merget::linker_valuest
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
Definition: linker_script_merge.h:84
linker_script_merge.h
Merge linker script-defined symbols into a goto-program.
arith_tools.h
cmdlinet::isset
virtual bool isset(char option) const
Definition: cmdline.cpp:30
address_of_exprt::object
exprt & object()
Definition: pointer_expr.h:380
linker_script_merget::get_linker_script_data
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
Definition: linker_script_merge.cpp:682
jsont::output
void output(std::ostream &out) const
Definition: json.h:90
widen_modet::no
@ no
__CPROVER_allocated_memory
void __CPROVER_allocated_memory(__CPROVER_size_t address, __CPROVER_size_t extent)
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
compilet::write_bin_object_file
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
Definition: compile.cpp:574
to_index_expr
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1478
message_handlert::get_verbosity
unsigned get_verbosity() const
Definition: message.h:54
symbolt::type
typet type
Type of symbol.
Definition: symbol.h:31
goto_programt::instructionst
std::list< instructiont > instructionst
Definition: goto_program.h:584
linker_script_merget::add_linker_script_definitions
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
Definition: linker_script_merge.cpp:34
json_objectt::find
iterator find(const std::string &key)
Definition: json.h:356
string2integer
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
linker_script_merget::ls_data2instructions
int ls_data2instructions(jsont &data, const std::string &linker_script, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into the symbol_table.
Definition: linker_script_merge.cpp:419
MAX_FLATTENED_ARRAY_SIZE
const std::size_t MAX_FLATTENED_ARRAY_SIZE
Definition: magic.h:11
goto_model.h
plus_exprt
The plus expression Associativity is not specified.
Definition: std_expr.h:946
run
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
exprt
Base class for all expressions.
Definition: expr.h:55
linker_script_merget::replace_expr
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
Definition: linker_script_merge.cpp:290
goto_modelt
Definition: goto_model.h:25
linker_script_merget::log
messaget log
Definition: linker_script_merge.h:96
irep_idt
dstringt irep_idt
Definition: irep.h:37
to_string
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: string_constraint.cpp:58
messaget::eom
static eomt eom
Definition: message.h:297
run.h
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:29
to_json_array
json_arrayt & to_json_array(jsont &json)
Definition: json.h:426
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:112
jsont
Definition: json.h:26
linker_script_merget::pointerize_subexprs_of
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
Definition: linker_script_merge.cpp:312
magic.h
Magic numbers used throughout the codebase.
zero_initializer
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Definition: expr_initializer.cpp:297
symbolt::pretty_name
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
linker_script_merget::goto_binary
const std::string & goto_binary
Definition: linker_script_merge.h:94
json_objectt
Definition: json.h:299
json_parser.h
goto_convert
void goto_convert(const codet &code, symbol_table_baset &symbol_table, goto_programt &dest, message_handlert &message_handler, const irep_idt &mode)
Definition: goto_convert.cpp:1905
replacement_predicatet
Patterns of expressions that should be replaced.
Definition: linker_script_merge.h:27
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:90
linker_script_merget::linker_data_is_malformed
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
Definition: linker_script_merge.cpp:768
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:84
code_function_callt
goto_instruction_codet representation of a function call statement.
Definition: goto_instruction_code.h:271
cmdlinet
Definition: cmdline.h:20
expr_initializer.h
messaget::error
mstreamt & error() const
Definition: message.h:399
empty_typet
The empty type.
Definition: std_types.h:50
integer2bvrep
irep_idt integer2bvrep(const mp_integer &src, std::size_t width)
convert an integer to bit-vector representation with given width This uses two's complement for negat...
Definition: arith_tools.cpp:380
symbol_table_baset::get_writeable_ref
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
Definition: symbol_table_base.h:121
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
failed
static bool failed(bool error_indicator)
Definition: symtab2gb_parse_options.cpp:39
messaget::mstreamt::source_location
source_locationt source_location
Definition: message.h:247
symbol_exprt::get_identifier
const irep_idt & get_identifier() const
Definition: std_expr.h:142
source_locationt::set_file
void set_file(const irep_idt &file)
Definition: source_location.h:90
symbolt::symbol_expr
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
pointer_expr.h
INITIALIZE_FUNCTION
#define INITIALIZE_FUNCTION
Definition: static_lifetime_init.h:22
cmdlinet::get_value
std::string get_value(char option) const
Definition: cmdline.cpp:48
compile.h
linker_script_merget::cmdline
const cmdlinet & cmdline
Definition: linker_script_merge.h:95
linker_script_merget::symbols_to_pointerize
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
Definition: linker_script_merge.cpp:355
index_exprt::index
exprt & index()
Definition: std_expr.h:1450
symbol_tablet::insert
virtual std::pair< symbolt &, bool > insert(symbolt symbol) override
Author: Diffblue Ltd.
Definition: symbol_table.cpp:17
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:253
index_exprt::array
exprt & array()
Definition: std_expr.h:1440
unsigned_int_type
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:54
to_symbol_expr
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:222
code_typet
Base type of functions.
Definition: std_types.h:538
linker_script_merget::elf_binary
const std::string & elf_binary
Definition: linker_script_merge.h:93
irept::id
const irep_idt & id() const
Definition: irep.h:396
message_handlert
Definition: message.h:27
read_goto_binary.h
safe_string2unsigned
unsigned safe_string2unsigned(const std::string &str, int base)
Definition: string2int.cpp:16
linker_script_merget::linker_script_merget
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
Definition: linker_script_merge.cpp:135
linker_script_merget::goto_and_object_mismatch
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
Definition: linker_script_merge.cpp:737
char_type
bitvector_typet char_type()
Definition: c_types.cpp:124
source_locationt
Definition: source_location.h:18
symbol_table_baset::add
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Definition: symbol_table_base.cpp:18
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:592
symbolt::value
exprt value
Initial value of symbol.
Definition: symbol.h:34
messaget::get_message_handler
message_handlert & get_message_handler()
Definition: message.h:184
json_objectt::end
iterator end()
Definition: json.h:386
array_typet
Arrays with given size.
Definition: std_types.h:762
symbolt::is_extern
bool is_extern
Definition: symbol.h:66
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
to_json_object
json_objectt & to_json_object(jsont &json)
Definition: json.h:444
symbolt::location
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
cmdline.h
symbolt
Symbol table entry.
Definition: symbol.h:27
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:100
static_lifetime_init
static optionalt< codet > static_lifetime_init(const irep_idt &identifier, symbol_tablet &symbol_table)
Definition: static_lifetime_init.cpp:22
symbol_table_baset::symbols
const symbolst & symbols
Read-only field, used to look up symbols given their names.
Definition: symbol_table_base.h:30
jsont::is_array
bool is_array() const
Definition: json.h:61
CPROVER_PREFIX
#define CPROVER_PREFIX
Definition: cprover_prefix.h:14
void_t
typename detail::make_voidt< typest... >::type void_t
Definition: type_traits.h:35
symbolt::is_static_lifetime
bool is_static_lifetime
Definition: symbol.h:65
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:72
read_goto_binary
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
Definition: read_goto_binary.cpp:61
linker_script_merget::replacement_predicates
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
Definition: linker_script_merge.h:105
jsont::is_object
bool is_object() const
Definition: json.h:56
to_address_of_expr
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: pointer_expr.h:408
exprt::operands
operandst & operands()
Definition: expr.h:94
symbolt::is_lvalue
bool is_lvalue
Definition: symbol.h:66
messaget::debug
mstreamt & debug() const
Definition: message.h:429
messaget::M_DEBUG
@ M_DEBUG
Definition: message.h:171
goto_convert_functions.h
index_exprt
Array index operator.
Definition: std_expr.h:1409
static_lifetime_init.h
address_of_exprt
Operator to return the address of an object.
Definition: pointer_expr.h:370
exprt::add_source_location
source_locationt & add_source_location()
Definition: expr.h:216
typecast_exprt
Semantic type conversion.
Definition: std_expr.h:2016
size_type
unsignedbv_typet size_type()
Definition: c_types.cpp:68
code_assignt
A goto_instruction_codet representing an assignment in the program.
Definition: goto_instruction_code.h:22
constant_exprt
A constant literal expression.
Definition: std_expr.h:2941
messaget::warning
mstreamt & warning() const
Definition: message.h:404
linker_script_merget::pointerize_linker_defined_symbols
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
Definition: linker_script_merge.cpp:219
parse_json
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
goto_programt::instructiont
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:180
constant_exprt::set_value
void set_value(const irep_idt &value)
Definition: std_expr.h:2955
exprt::source_location
const source_locationt & source_location() const
Definition: expr.h:211
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
c_types.h
symbolt::name
irep_idt name
The unique identifier.
Definition: symbol.h:40
temporary_filet
Definition: tempfile.h:23
jsont::value
std::string value
Definition: json.h:132