CBMC
remove_asm.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Remove 'asm' statements by compiling them into suitable
4  standard goto program instructions
5 
6 Author: Daniel Kroening
7 
8 Date: December 2014
9 
10 \*******************************************************************/
11 
15 
16 #include "remove_asm.h"
17 
18 #include <util/c_types.h>
19 #include <util/pointer_expr.h>
20 #include <util/std_code.h>
21 #include <util/string_constant.h>
22 
25 
26 #include "assembler_parser.h"
27 
29 {
30 public:
31  remove_asmt(symbol_tablet &_symbol_table, goto_functionst &_goto_functions)
32  : symbol_table(_symbol_table), goto_functions(_goto_functions)
33  {
34  }
35 
36  void operator()()
37  {
38  for(auto &f : goto_functions.function_map)
39  process_function(f.second);
40  }
41 
42 protected:
45 
47 
49  goto_programt::instructiont &instruction,
50  goto_programt &dest);
51 
53 
54  void process_instruction_msc(const code_asmt &, goto_programt &dest);
55 
57  const irep_idt &function_base_name,
58  const code_asm_gcct &code,
59  goto_programt &dest);
60 
62  const irep_idt &function_base_name,
63  const code_asmt &code,
64  goto_programt &dest);
65 };
66 
75  const irep_idt &function_base_name,
76  const code_asm_gcct &code,
77  goto_programt &dest)
78 {
79  irep_idt function_identifier = function_base_name;
80 
82 
83  const typet void_pointer = pointer_type(empty_typet());
84 
85  // outputs
86  forall_operands(it, code.outputs())
87  {
88  if(it->operands().size() == 2)
89  {
90  arguments.push_back(typecast_exprt(
91  address_of_exprt(to_binary_expr(*it).op1()), void_pointer));
92  }
93  }
94 
95  // inputs
96  forall_operands(it, code.inputs())
97  {
98  if(it->operands().size() == 2)
99  {
100  arguments.push_back(typecast_exprt(
101  address_of_exprt(to_binary_expr(*it).op1()), void_pointer));
102  }
103  }
104 
105  code_typet fkt_type({}, empty_typet());
106 
107  symbol_exprt fkt(function_identifier, fkt_type);
108 
109  code_function_callt function_call(std::move(fkt), std::move(arguments));
110 
111  dest.add(
112  goto_programt::make_function_call(function_call, code.source_location()));
113 
114  // do we have it?
115  if(!symbol_table.has_symbol(function_identifier))
116  {
117  symbolt symbol;
118 
119  symbol.name = function_identifier;
120  symbol.type = fkt_type;
121  symbol.base_name = function_base_name;
122  symbol.value = nil_exprt();
123  symbol.mode = ID_C;
124 
125  symbol_table.add(symbol);
126 
127  goto_functions.function_map.emplace(function_identifier, goto_functiont());
128  }
129  else
130  {
132  symbol_table.lookup_ref(function_identifier).type == fkt_type,
133  "function types should match");
134  }
135 }
136 
145  const irep_idt &function_base_name,
146  const code_asmt &code,
147  goto_programt &dest)
148 {
149  irep_idt function_identifier = function_base_name;
150 
151  const typet void_pointer = pointer_type(empty_typet());
152 
153  code_typet fkt_type({}, empty_typet());
154 
155  symbol_exprt fkt(function_identifier, fkt_type);
156 
157  code_function_callt function_call(fkt);
158 
159  dest.add(
160  goto_programt::make_function_call(function_call, code.source_location()));
161 
162  // do we have it?
163  if(!symbol_table.has_symbol(function_identifier))
164  {
165  symbolt symbol;
166 
167  symbol.name = function_identifier;
168  symbol.type = fkt_type;
169  symbol.base_name = function_base_name;
170  symbol.value = nil_exprt();
171  symbol.mode = ID_C;
172 
173  symbol_table.add(symbol);
174 
175  goto_functions.function_map.emplace(function_identifier, goto_functiont());
176  }
177  else
178  {
180  symbol_table.lookup_ref(function_identifier).type == fkt_type,
181  "function types should match");
182  }
183 }
184 
192  goto_programt::instructiont &instruction,
193  goto_programt &dest)
194 {
195  const code_asmt &code = to_code_asm(instruction.get_other());
196 
197  const irep_idt &flavor = code.get_flavor();
198 
199  if(flavor == ID_gcc)
201  else if(flavor == ID_msc)
202  process_instruction_msc(code, dest);
203  else
204  DATA_INVARIANT(false, "unexpected assembler flavor");
205 }
206 
213  const code_asm_gcct &code,
214  goto_programt &dest)
215 {
216  const irep_idt &i_str = to_string_constant(code.asm_text()).get_value();
217 
218  std::istringstream str(id2string(i_str));
220  assembler_parser.in = &str;
222 
223  goto_programt tmp_dest;
224  bool unknown = false;
225  bool x86_32_locked_atomic = false;
226 
227  for(const auto &instruction : assembler_parser.instructions)
228  {
229  if(instruction.empty())
230  continue;
231 
232 #if 0
233  std::cout << "A ********************\n";
234  for(const auto &ins : instruction)
235  {
236  std::cout << "XX: " << ins.pretty() << '\n';
237  }
238 
239  std::cout << "B ********************\n";
240 #endif
241 
242  // deal with prefixes
243  irep_idt command;
244  unsigned pos = 0;
245 
246  if(
247  instruction.front().id() == ID_symbol &&
248  instruction.front().get(ID_identifier) == "lock")
249  {
250  x86_32_locked_atomic = true;
251  pos++;
252  }
253 
254  // done?
255  if(pos == instruction.size())
256  continue;
257 
258  if(instruction[pos].id() == ID_symbol)
259  {
260  command = instruction[pos].get(ID_identifier);
261  pos++;
262  }
263 
264  if(command == "xchg" || command == "xchgl")
265  x86_32_locked_atomic = true;
266 
267  if(x86_32_locked_atomic)
268  {
270 
271  codet code_fence(ID_fence);
272  code_fence.add_source_location() = code.source_location();
273  code_fence.set(ID_WWfence, true);
274  code_fence.set(ID_RRfence, true);
275  code_fence.set(ID_RWfence, true);
276  code_fence.set(ID_WRfence, true);
277 
278  tmp_dest.add(
279  goto_programt::make_other(code_fence, code.source_location()));
280  }
281 
282  if(command == "fstcw" || command == "fnstcw" || command == "fldcw") // x86
283  {
284  gcc_asm_function_call("__asm_" + id2string(command), code, tmp_dest);
285  }
286  else if(
287  command == "mfence" || command == "lfence" || command == "sfence") // x86
288  {
289  gcc_asm_function_call("__asm_" + id2string(command), code, tmp_dest);
290  }
291  else if(command == ID_sync) // Power
292  {
293  codet code_fence(ID_fence);
294  code_fence.add_source_location() = code.source_location();
295  code_fence.set(ID_WWfence, true);
296  code_fence.set(ID_RRfence, true);
297  code_fence.set(ID_RWfence, true);
298  code_fence.set(ID_WRfence, true);
299  code_fence.set(ID_WWcumul, true);
300  code_fence.set(ID_RWcumul, true);
301  code_fence.set(ID_RRcumul, true);
302  code_fence.set(ID_WRcumul, true);
303 
304  tmp_dest.add(
305  goto_programt::make_other(code_fence, code.source_location()));
306  }
307  else if(command == ID_lwsync) // Power
308  {
309  codet code_fence(ID_fence);
310  code_fence.add_source_location() = code.source_location();
311  code_fence.set(ID_WWfence, true);
312  code_fence.set(ID_RRfence, true);
313  code_fence.set(ID_RWfence, true);
314  code_fence.set(ID_WWcumul, true);
315  code_fence.set(ID_RWcumul, true);
316  code_fence.set(ID_RRcumul, true);
317 
318  tmp_dest.add(
319  goto_programt::make_other(code_fence, code.source_location()));
320  }
321  else if(command == ID_isync) // Power
322  {
323  codet code_fence(ID_fence);
324  code_fence.add_source_location() = code.source_location();
325 
326  tmp_dest.add(
327  goto_programt::make_other(code_fence, code.source_location()));
328  // doesn't do anything by itself,
329  // needs to be combined with branch
330  }
331  else if(command == "dmb" || command == "dsb") // ARM
332  {
333  codet code_fence(ID_fence);
334  code_fence.add_source_location() = code.source_location();
335  code_fence.set(ID_WWfence, true);
336  code_fence.set(ID_RRfence, true);
337  code_fence.set(ID_RWfence, true);
338  code_fence.set(ID_WRfence, true);
339  code_fence.set(ID_WWcumul, true);
340  code_fence.set(ID_RWcumul, true);
341  code_fence.set(ID_RRcumul, true);
342  code_fence.set(ID_WRcumul, true);
343 
344  tmp_dest.add(
345  goto_programt::make_other(code_fence, code.source_location()));
346  }
347  else if(command == "isb") // ARM
348  {
349  codet code_fence(ID_fence);
350  code_fence.add_source_location() = code.source_location();
351 
352  tmp_dest.add(
353  goto_programt::make_other(code_fence, code.source_location()));
354  // doesn't do anything by itself,
355  // needs to be combined with branch
356  }
357  else
358  unknown = true; // give up
359 
360  if(x86_32_locked_atomic)
361  {
363 
364  x86_32_locked_atomic = false;
365  }
366  }
367 
368  if(unknown)
369  {
370  // we give up; we should perhaps print a warning
371  }
372  else
373  dest.destructive_append(tmp_dest);
374 }
375 
382  const code_asmt &code,
383  goto_programt &dest)
384 {
385  const irep_idt &i_str = to_string_constant(code.op0()).get_value();
386 
387  std::istringstream str(id2string(i_str));
389  assembler_parser.in = &str;
391 
392  goto_programt tmp_dest;
393  bool unknown = false;
394  bool x86_32_locked_atomic = false;
395 
396  for(const auto &instruction : assembler_parser.instructions)
397  {
398  if(instruction.empty())
399  continue;
400 
401 #if 0
402  std::cout << "A ********************\n";
403  for(const auto &ins : instruction)
404  {
405  std::cout << "XX: " << ins.pretty() << '\n';
406  }
407 
408  std::cout << "B ********************\n";
409 #endif
410 
411  // deal with prefixes
412  irep_idt command;
413  unsigned pos = 0;
414 
415  if(
416  instruction.front().id() == ID_symbol &&
417  instruction.front().get(ID_identifier) == "lock")
418  {
419  x86_32_locked_atomic = true;
420  pos++;
421  }
422 
423  // done?
424  if(pos == instruction.size())
425  continue;
426 
427  if(instruction[pos].id() == ID_symbol)
428  {
429  command = instruction[pos].get(ID_identifier);
430  pos++;
431  }
432 
433  if(command == "xchg" || command == "xchgl")
434  x86_32_locked_atomic = true;
435 
436  if(x86_32_locked_atomic)
437  {
439 
440  codet code_fence(ID_fence);
441  code_fence.add_source_location() = code.source_location();
442  code_fence.set(ID_WWfence, true);
443  code_fence.set(ID_RRfence, true);
444  code_fence.set(ID_RWfence, true);
445  code_fence.set(ID_WRfence, true);
446 
447  tmp_dest.add(
448  goto_programt::make_other(code_fence, code.source_location()));
449  }
450 
451  if(command == "fstcw" || command == "fnstcw" || command == "fldcw") // x86
452  {
453  msc_asm_function_call("__asm_" + id2string(command), code, tmp_dest);
454  }
455  else if(
456  command == "mfence" || command == "lfence" || command == "sfence") // x86
457  {
458  msc_asm_function_call("__asm_" + id2string(command), code, tmp_dest);
459  }
460  else
461  unknown = true; // give up
462 
463  if(x86_32_locked_atomic)
464  {
466 
467  x86_32_locked_atomic = false;
468  }
469  }
470 
471  if(unknown)
472  {
473  // we give up; we should perhaps print a warning
474  }
475  else
476  dest.destructive_append(tmp_dest);
477 }
478 
484  goto_functionst::goto_functiont &goto_function)
485 {
486  bool did_something = false;
487 
488  Forall_goto_program_instructions(it, goto_function.body)
489  {
490  if(it->is_other() && it->get_other().get_statement() == ID_asm)
491  {
492  goto_programt tmp_dest;
493  process_instruction(*it, tmp_dest);
494  it->turn_into_skip();
495  did_something = true;
496 
497  goto_programt::targett next = it;
498  next++;
499 
500  goto_function.body.destructive_insert(next, tmp_dest);
501  }
502  }
503 
504  if(did_something)
505  remove_skip(goto_function.body);
506 }
507 
512 void remove_asm(goto_functionst &goto_functions, symbol_tablet &symbol_table)
513 {
514  remove_asmt rem(symbol_table, goto_functions);
515  rem();
516 }
517 
525 void remove_asm(goto_modelt &goto_model)
526 {
527  remove_asm(goto_model.goto_functions, goto_model.symbol_table);
528 }
Forall_goto_program_instructions
#define Forall_goto_program_instructions(it, program)
Definition: goto_program.h:1234
goto_programt::make_other
static instructiont make_other(const goto_instruction_codet &_code, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:948
symbol_table_baset::has_symbol
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
Definition: symbol_table_base.h:87
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:36
symbol_tablet
The symbol table.
Definition: symbol_table.h:13
code_asm_gcct
codet representation of an inline assembler statement, for the gcc flavor.
Definition: std_code.h:1296
symbol_table_baset::lookup_ref
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
Definition: symbol_table_base.h:104
codet::op0
exprt & op0()
Definition: expr.h:125
code_asm_gcct::outputs
exprt & outputs()
Definition: std_code.h:1315
code_asmt
codet representation of an inline assembler statement.
Definition: std_code.h:1252
goto_programt::instructiont::get_other
const goto_instruction_codet & get_other() const
Get the statement for OTHER.
Definition: goto_program.h:321
pos
literalt pos(literalt a)
Definition: literal.h:194
remove_asmt
Definition: remove_asm.cpp:28
typet
The type of an expression, extends irept.
Definition: type.h:28
remove_asmt::goto_functions
goto_functionst & goto_functions
Definition: remove_asm.cpp:44
assembler_parsert::parse
virtual bool parse()
Definition: assembler_parser.h:44
symbolt::type
typet type
Type of symbol.
Definition: symbol.h:31
remove_skip
void remove_skip(goto_programt &goto_program, goto_programt::targett begin, goto_programt::targett end)
remove unnecessary skip statements
Definition: remove_skip.cpp:87
remove_asm.h
to_string_constant
const string_constantt & to_string_constant(const exprt &expr)
Definition: string_constant.h:40
goto_programt::add
targett add(instructiont &&instruction)
Adds a given instruction at the end.
Definition: goto_program.h:709
goto_model.h
string_constant.h
code_asm_gcct::inputs
exprt & inputs()
Definition: std_code.h:1325
goto_modelt
Definition: goto_model.h:25
symbolt::base_name
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
remove_asmt::process_instruction_msc
void process_instruction_msc(const code_asmt &, goto_programt &dest)
Translates the given inline assembly code (in msc style) to non-assembly goto program instructions.
Definition: remove_asm.cpp:381
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:29
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:112
assembler_parser
assembler_parsert assembler_parser
Definition: assembler_parser.cpp:11
remove_asmt::process_function
void process_function(goto_functionst::goto_functiont &)
Replaces inline assembly instructions in the goto function by non-assembly goto program instructions.
Definition: remove_asm.cpp:483
to_binary_expr
const binary_exprt & to_binary_expr(const exprt &expr)
Cast an exprt to a binary_exprt.
Definition: std_expr.h:660
goto_programt::make_function_call
static instructiont make_function_call(const code_function_callt &_code, const source_locationt &l=source_locationt::nil())
Create a function call instruction.
Definition: goto_program.h:1081
code_function_callt
goto_instruction_codet representation of a function call statement.
Definition: goto_instruction_code.h:271
symbolt::mode
irep_idt mode
Language mode.
Definition: symbol.h:49
empty_typet
The empty type.
Definition: std_types.h:50
parsert::in
std::istream * in
Definition: parser.h:26
DATA_INVARIANT
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:510
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
forall_operands
#define forall_operands(it, expr)
Definition: expr.h:20
irept::set
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:420
assembler_parsert::clear
virtual void clear()
Definition: assembler_parser.h:50
nil_exprt
The NIL expression.
Definition: std_expr.h:3025
pointer_expr.h
remove_asmt::remove_asmt
remove_asmt(symbol_tablet &_symbol_table, goto_functionst &_goto_functions)
Definition: remove_asm.cpp:31
remove_asmt::gcc_asm_function_call
void gcc_asm_function_call(const irep_idt &function_base_name, const code_asm_gcct &code, goto_programt &dest)
Adds a call to a library function that implements the given gcc-style inline assembly statement.
Definition: remove_asm.cpp:74
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:253
goto_programt::make_atomic_begin
static instructiont make_atomic_begin(const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:970
code_typet
Base type of functions.
Definition: std_types.h:538
remove_asmt::process_instruction_gcc
void process_instruction_gcc(const code_asm_gcct &, goto_programt &dest)
Translates the given inline assembly code (in gcc style) to non-assembly goto program instructions.
Definition: remove_asm.cpp:212
goto_functiont
A goto function, consisting of function body (see body) and parameter identifiers (see parameter_iden...
Definition: goto_function.h:23
code_function_callt::argumentst
exprt::operandst argumentst
Definition: goto_instruction_code.h:281
to_code_asm_gcc
code_asm_gcct & to_code_asm_gcc(codet &code)
Definition: std_code.h:1373
assembler_parsert::instructions
std::list< instructiont > instructions
Definition: assembler_parser.h:25
std_code.h
goto_programt::destructive_append
void destructive_append(goto_programt &p)
Appends the given program p to *this. p is destroyed.
Definition: goto_program.h:692
goto_functionst::goto_functiont
::goto_functiont goto_functiont
Definition: goto_functions.h:27
to_code_asm
code_asmt & to_code_asm(codet &code)
Definition: std_code.h:1282
symbol_table_baset::add
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Definition: symbol_table_base.cpp:18
goto_functionst
A collection of goto functions.
Definition: goto_functions.h:24
symbolt::value
exprt value
Initial value of symbol.
Definition: symbol.h:34
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
symbolt
Symbol table entry.
Definition: symbol.h:27
assembler_parser.h
remove_asm
void remove_asm(goto_functionst &goto_functions, symbol_tablet &symbol_table)
Replaces inline assembly instructions in the goto program (i.e., instructions of kind OTHER with a co...
Definition: remove_asm.cpp:512
code_asm_gcct::asm_text
exprt & asm_text()
Definition: std_code.h:1305
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:72
remove_asmt::operator()
void operator()()
Definition: remove_asm.cpp:36
address_of_exprt
Operator to return the address of an object.
Definition: pointer_expr.h:370
exprt::add_source_location
source_locationt & add_source_location()
Definition: expr.h:216
remove_asmt::msc_asm_function_call
void msc_asm_function_call(const irep_idt &function_base_name, const code_asmt &code, goto_programt &dest)
Adds a call to a library function that implements the given msc-style inline assembly statement.
Definition: remove_asm.cpp:144
code_asmt::get_flavor
const irep_idt & get_flavor() const
Definition: std_code.h:1263
typecast_exprt
Semantic type conversion.
Definition: std_expr.h:2016
remove_asmt::symbol_table
symbol_tablet & symbol_table
Definition: remove_asm.cpp:43
remove_skip.h
binary_exprt::op1
exprt & op1()
Definition: expr.h:128
goto_programt::instructiont
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:180
exprt::source_location
const source_locationt & source_location() const
Definition: expr.h:211
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
c_types.h
symbolt::name
irep_idt name
The unique identifier.
Definition: symbol.h:40
string_constantt::get_value
const irep_idt & get_value() const
Definition: string_constant.h:21
goto_programt::targett
instructionst::iterator targett
Definition: goto_program.h:586
goto_programt::make_atomic_end
static instructiont make_atomic_end(const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:981
remove_asmt::process_instruction
void process_instruction(goto_programt::instructiont &instruction, goto_programt &dest)
Translates the given inline assembly code (which must be in either gcc or msc style) to non-assembly ...
Definition: remove_asm.cpp:191
codet
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:28