39 auto original_goto_model =
42 if(!original_goto_model.has_value())
44 log.
error() <<
"Unable to read goto binary for linker script merging"
50 std::list<irep_idt> linker_defined_symbols;
52 linker_defined_symbols,
53 original_goto_model->symbol_table,
55 linker_def_outfile());
80 original_goto_model->symbol_table,
84 log.
error() <<
"Could not add linkerscript defs to symbol table"
89 original_goto_model->goto_functions.function_map.erase(
93 original_goto_model->symbol_table,
98 original_goto_model->symbol_table,
99 original_goto_model->goto_functions,
101 original_goto_model->goto_functions.update();
115 log.
error() <<
"Could not pointerize all linker-defined expressions"
122 *original_goto_model,
128 log.
error() <<
"Could not write linkerscript-augmented binary"
136 const std::string &elf_binary,
137 const std::string &goto_binary,
140 : elf_binary(elf_binary),
141 goto_binary(goto_binary),
143 log(message_handler),
144 replacement_predicates(
146 "address of array's first member",
151 [](
const exprt &expr) {
152 return expr.
id() == ID_address_of &&
153 expr.
type().
id() == ID_pointer &&
161 .
id() == ID_symbol &&
169 .
id() == ID_constant &&
173 .
id() == ID_signedbv;
180 [](
const exprt &expr) {
181 return expr.
id() == ID_address_of &&
182 expr.
type().
id() == ID_pointer &&
192 [](
const exprt &expr) {
193 return expr.
id() == ID_address_of &&
194 expr.
type().
id() == ID_pointer &&
206 [](
const exprt &expr) {
207 return expr.
id() == ID_symbol && expr.
type().
id() == ID_array;
210 "pointer (does not need pointerizing)",
214 [](
const exprt &expr) {
215 return expr.
id() == ID_symbol && expr.
type().
id() == ID_pointer;
231 std::list<symbol_exprt> to_pointerize;
234 if(to_pointerize.empty())
236 log.
debug() <<
"Pointerizing the symbol-table value of symbol "
242 if(to_pointerize.empty() && fail==0)
245 for(
const auto &sym : to_pointerize)
247 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
248 <<
"' in symbol table entry " << pair.first <<
". Pretty:\n"
249 << sym.pretty() <<
"\n";
261 std::list<exprt *> expressions = {&instruction.code_nonconst()};
262 if(instruction.has_condition())
263 expressions.push_back(&instruction.condition_nonconst());
265 for(
exprt *insts : expressions)
267 std::list<symbol_exprt> to_pointerize;
269 if(to_pointerize.empty())
273 if(to_pointerize.empty() && fail==0)
276 for(
const auto &sym : to_pointerize)
278 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
279 <<
"' in function " << gf.first <<
". Pretty:\n"
280 << sym.pretty() <<
"\n";
295 const std::string &shape)
297 auto it=linker_values.find(ident);
298 if(it==linker_values.end())
300 log.
error() <<
"Could not find a new expression for linker script-defined "
306 log.
debug() <<
"Pointerizing linker-defined symbol '" << ident
314 std::list<symbol_exprt> &to_pointerize,
318 for(
auto const &pair : linker_values)
321 if(!pattern.match(expr))
324 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
327 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
328 pattern.description());
330 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
332 if(result==to_pointerize.end())
339 to_pointerize.erase(result);
358 std::list<symbol_exprt> &to_pointerize)
const
360 for(
const auto &op : expr.
operands())
362 if(op.id()!=ID_symbol)
366 if(pair!=linker_values.end())
367 to_pointerize.push_back(sym_exp);
369 for(
const auto &op : expr.
operands())
374 The current implementation of
this function is less precise than the
375 commented-out version below. To understand the difference between these
376 implementations, consider the following example:
378 Suppose we have a section in the linker script, 100 bytes long, where the
379 address of the symbol sec_start is the start of the section (value 4096) and the
380 address of sec_end is the end of that section (value 4196).
382 The current implementation synthesizes the
goto-version of the following C:
384 char __sec_array[100];
385 char *sec_start=(&__sec_array[0]);
386 char *sec_end=((&__sec_array[0])+100);
390 This is imprecise
for the following reason: the actual address of the array and
391 the pointers shall be some random CBMC-
internal address, instead of being 4096
392 and 4196. The linker script, on the other hand, would have specified the exact
393 position of the section, and we even know what the actual values of sec_start
394 and sec_end are from the
object file (these values are in the `addresses` list
395 of the `data` argument to
this function). If the correctness of the code depends
396 on these actual values, then CBMCs model of the code is too imprecise to verify
399 The commented-out version of
this function below synthesizes the following:
401 char *sec_start=4096;
405 This code has both the actual addresses of the start and end of the section and
406 tells CBMC that the intermediate region is valid. However, the allocated_memory
407 macro does not currently allocate an actual
object at the address 4096, so
408 symbolic execution can fail. In particular, the
'allocated memory' is part of
409 __CPROVER_memory, which does not have a bounded size;
this means that (
for
410 example) calls to memcpy or memset fail, because the first and third arguments
411 do not have know n size. The commented-out implementation should be reinstated
414 In either
case,
no other changes to the rest of the code (outside
this function)
415 should be necessary. The rest of
this file converts expressions containing the
416 linker-defined symbol into pointer types
if they were not already, and
this is
417 the right behaviour
for both implementations.
421 const std::string &linker_script,
426 std::map<irep_idt, std::size_t> truncated_symbols;
429 bool has_end=d[
"has-end-symbol"].is_true();
433 << d[
"start-symbol"].value;
442 log.warning() <<
"Object section '" << d[
"section"].value <<
"' of size "
443 << array_size <<
" is too large to model. Truncating to "
455 std::ostringstream array_comment;
456 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size "
457 << array_size <<
" bytes";
470 array_sym.
type = array_type;
471 array_sym.
value = *zi;
474 bool failed = symbol_table.
add(array_sym);
486 start_sym.
name = d[
"start-symbol"].value;
488 start_sym.
value = array_start;
489 linker_values.emplace(
490 d[
"start-symbol"].value,
491 std::make_pair(start_sym.
symbol_expr(), array_start));
495 auto it = std::find_if(
498 [&d](
const jsont &add) {
499 return add[
"sym"].
value == d[
"start-symbol"].value;
503 log.error() <<
"Start: Could not find address corresponding to symbol '"
504 << d[
"start-symbol"].value <<
"' (start of section)"
510 std::ostringstream start_comment;
511 start_comment <<
"Pointer to beginning of object section '"
512 << d[
"section"].value <<
"'. Original address in object file"
513 <<
" is " << (*it)[
"val"].value;
517 auto start_sym_entry = symbol_table.
insert(start_sym);
518 if(!start_sym_entry.second)
519 start_sym_entry.first = start_sym;
523 plus_exprt array_end(array_start, array_size_expr);
529 end_sym.
name = d[
"end-symbol"].value;
531 end_sym.
value = array_end;
532 linker_values.emplace(
533 d[
"end-symbol"].value,
536 auto entry = std::find_if(
539 [&d](
const jsont &add) {
540 return add[
"sym"].
value == d[
"end-symbol"].value;
544 log.debug() <<
"Could not find address corresponding to symbol '"
545 << d[
"end-symbol"].value <<
"' (end of section)"
551 std::ostringstream end_comment;
552 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
553 <<
"'. Original address in object file"
554 <<
" is " << (*entry)[
"val"].value;
558 auto end_sym_entry = symbol_table.
insert(end_sym);
559 if(!end_sym_entry.second)
560 end_sym_entry.first = end_sym;
571 auto it=linker_values.find(
irep_idt(d[
"sym"].value));
572 if(it!=linker_values.end())
579 const auto &pair=truncated_symbols.find(d[
"sym"].value);
580 if(pair==truncated_symbols.end())
581 symbol_value=d[
"val"].value;
585 <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from "
587 <<
" because it corresponds to the size of a too-large section."
594 loc.
set_comment(
"linker script-defined symbol: char *"+
595 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
610 symbol.
value = rhs_tc;
612 linker_values.emplace(
632 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
634 f.add_source_location()=loc;
637 i.make_function_call(f);
638 initialize_instructions.push_front(i);
649 symbol_table.
add(sym);
656 loc.
set_comment(
"linker script-defined symbol: char *"+
657 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
669 linker_values.emplace(
670 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
673 assign.add_source_location()=loc;
675 assign_i.make_assignment(assign);
676 initialize_instructions.push_front(assign_i);
683 std::list<irep_idt> &linker_defined_symbols,
685 const std::string &out_file,
686 const std::string &def_out_file)
688 for(
auto const &pair : symbol_table.
symbols)
691 pair.second.is_extern && pair.second.value.is_nil() &&
694 linker_defined_symbols.push_back(pair.second.name);
698 std::ostringstream linker_def_str;
700 linker_defined_symbols.begin(),
701 linker_defined_symbols.end(),
702 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
703 log.
debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n"
707 std::ofstream linker_def_file(linker_def_infile());
708 linker_def_file << linker_def_str.str();
709 linker_def_file.close();
711 std::vector<std::string> argv=
715 "--object", out_file,
716 "--sym-file", linker_def_infile(),
717 "--out-file", def_out_file
721 argv.push_back(
"--very-verbose");
723 argv.push_back(
"--verbose");
726 for(std::size_t i=0; i<argv.size(); i++)
730 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
738 const std::list<irep_idt> &linker_defined_symbols,
742 for(
const auto &sym : linker_defined_symbols)
743 if(linker_values.find(sym)==linker_values.end())
747 <<
"' was declared extern but never given "
751 for(
const auto &pair : linker_values)
753 auto it=std::find(linker_defined_symbols.begin(),
754 linker_defined_symbols.end(), pair.first);
755 if(it==linker_defined_symbols.end())
759 <<
"Linker script-defined symbol '" << pair.first <<
"' was "
760 <<
"either defined in the C source code, not declared extern in "
761 <<
"the C source code, or does not appear in the C source code"
775 !(data_object.
find(
"regions") != data_object.
end() &&
776 data_object.
find(
"addresses") != data_object.
end() &&
786 return address.
find(
"val") != address.end() &&
787 address.find(
"sym") != address.end() &&
788 address[
"val"].is_number() && address[
"sym"].is_string();
798 return region.
find(
"start") != region.end() &&
799 region.find(
"size") != region.end() &&
800 region.find(
"annot") != region.end() &&
801 region.find(
"commt") != region.end() &&
802 region.find(
"start-symbol") != region.end() &&
803 region.find(
"has-end-symbol") != region.end() &&
804 region.find(
"section") != region.end() &&
805 region[
"start"].is_number() && region[
"size"].is_number() &&
806 region[
"annot"].is_string() &&
807 region[
"start-symbol"].is_string() &&
808 region[
"section"].is_string() && region[
"commt"].is_string() &&
809 ((region[
"has-end-symbol"].is_true() &&
810 region.find(
"end-symbol") != region.end() &&
811 region[
"end-symbol"].is_string()) ||
812 (region[
"has-end-symbol"].is_false() &&
813 region.find(
"size-symbol") != region.end() &&
814 region.find(
"end-symbol") == region.end() &&
815 region[
"size-symbol"].is_string()));