CBMC
gdb_api.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: GDB Machine Interface API
4 
5 Author: Malte Mues <mail.mues@gmail.com>
6  Daniel Poetzl
7 
8 \*******************************************************************/
9 
14 
15 #include <cerrno>
16 #include <cstdio>
17 #include <cstring>
18 #include <regex>
19 
20 #include <iostream>
21 
22 #include "gdb_api.h"
23 
24 #include <util/prefix.h>
25 #include <util/string2int.h>
26 #include <util/string_utils.h>
27 
28 #include <sys/wait.h>
29 
30 gdb_apit::gdb_apit(const std::vector<std::string> &args, const bool log)
31  : args(args), log(log), gdb_state(gdb_statet::NOT_CREATED)
32 {
33 }
34 
36 {
40 
42  return;
43 
44  write_to_gdb("-gdb-exit");
45  // we cannot use most_recent_line_has_tag() here as it checks the last line
46  // before the next `(gdb) \n` prompt in the output; however when gdb exits no
47  // next prompt is printed
49 
51 
52  fclose(command_stream);
53  fclose(response_stream);
54 
55  wait(NULL);
56 }
57 
58 size_t gdb_apit::query_malloc_size(const std::string &pointer_expr)
59 {
60  const auto maybe_address_string = get_value(pointer_expr);
61  CHECK_RETURN(maybe_address_string.has_value());
62 
63  if(allocated_memory.count(*maybe_address_string) == 0)
64  return 1;
65  else
66  return allocated_memory[*maybe_address_string];
67 }
68 
70 {
72 
73  command_log.clear();
74 
75  pid_t gdb_process;
76 
77  int pipe_input[2];
78  int pipe_output[2];
79 
80  if(pipe(pipe_input) == -1)
81  {
82  throw gdb_interaction_exceptiont("could not create pipe for stdin");
83  }
84 
85  if(pipe(pipe_output) == -1)
86  {
87  throw gdb_interaction_exceptiont("could not create pipe for stdout");
88  }
89 
90  gdb_process = fork();
91 
92  if(gdb_process == -1)
93  {
94  throw gdb_interaction_exceptiont("could not create gdb process");
95  }
96 
97  if(gdb_process == 0)
98  {
99  // child process
100  close(pipe_input[1]);
101  close(pipe_output[0]);
102 
103  dup2(pipe_input[0], STDIN_FILENO);
104  dup2(pipe_output[1], STDOUT_FILENO);
105  dup2(pipe_output[1], STDERR_FILENO);
106 
107  dprintf(pipe_output[1], "binary name: %s\n", args.front().c_str());
108 
109  std::vector<std::string> exec_cmd;
110  exec_cmd.reserve(args.size() + 3);
111  exec_cmd.push_back("gdb");
112  exec_cmd.push_back("--interpreter=mi");
113  exec_cmd.push_back("--args");
114  exec_cmd.insert(exec_cmd.end(), args.begin(), args.end());
115 
116  char **exec_cmd_ptr = static_cast<char **>(malloc(
117  sizeof(char *) * (exec_cmd.size() + 1)));
118  exec_cmd_ptr[exec_cmd.size()] = NULL;
119 
120  for(std::size_t i = 0; i < exec_cmd.size(); i++)
121  {
122  exec_cmd_ptr[i] = static_cast<char *>(malloc(
123  sizeof(char) * (exec_cmd[i].length() + 1)));
124  strcpy(exec_cmd_ptr[i], exec_cmd[i].c_str()); // NOLINT(runtime/printf)
125  }
126 
127  dprintf(pipe_output[1], "Loading gdb...\n");
128  execvp("gdb", exec_cmd_ptr);
129 
130  // Only reachable, if execvp failed
131  int errno_value = errno;
132  dprintf(pipe_output[1], "Starting gdb failed: %s\n", strerror(errno_value));
133  dprintf(pipe_output[1], "(gdb) \n");
134  throw gdb_interaction_exceptiont("could not run gdb");
135  }
136  else
137  {
138  // parent process
139  close(pipe_input[0]);
140  close(pipe_output[1]);
141 
142  // get stream for reading the gdb output
143  response_stream = fdopen(pipe_output[0], "r");
144 
145  // get stream for writing to gdb
146  command_stream = fdopen(pipe_input[1], "w");
147 
148  std::string line = read_most_recent_line();
149  if(has_prefix(line, "Starting gdb failed:"))
150  throw gdb_interaction_exceptiont(line);
151 
153 
154  CHECK_RETURN(
155  has_prefix(line, R"(~"done)") ||
156  has_prefix(line, R"(~"Reading)"));
157 
158  if(log)
159  {
160  // logs output to `gdb.txt` in the current directory, input is not logged
161  // hence we log it to `command_log`
162  write_to_gdb("-gdb-set logging on");
164  }
165 
166  write_to_gdb("-gdb-set max-value-size unlimited");
168  }
169 }
170 
171 void gdb_apit::write_to_gdb(const std::string &command)
172 {
173  PRECONDITION(!command.empty());
174  PRECONDITION(command.find('\n') == std::string::npos);
175 
176  std::string line(command);
177  line += '\n';
178 
179  if(log)
180  {
181  command_log.push_front(command);
182  }
183 
184  if(fputs(line.c_str(), command_stream) == EOF)
185  {
186  throw gdb_interaction_exceptiont("could not write a command to gdb");
187  }
188 
189  fflush(command_stream);
190 }
191 
193 {
194  PRECONDITION(log);
195  return command_log;
196 }
197 
199 {
200  std::string result;
201 
202  do
203  {
204  const size_t buf_size = 1024;
205  char buf[buf_size]; // NOLINT(runtime/arrays)
206 
207  const char *c = fgets(buf, buf_size, response_stream);
208 
209  if(c == NULL)
210  {
211  if(ferror(response_stream))
212  {
213  throw gdb_interaction_exceptiont("error reading from gdb");
214  }
215 
216  INVARIANT(
217  feof(response_stream),
218  "EOF must have been reached when the error indicator on the stream "
219  "is not set and fgets returned NULL");
220  INVARIANT(
221  result.empty() || result.back() != '\n',
222  "when EOF is reached then either no characters were read or the string"
223  " read does not end in a newline");
224 
225  return result;
226  }
227 
228  std::string chunk(buf);
229  INVARIANT(!chunk.empty(), "chunk cannot be empty when EOF was not reached");
230 
231  result += chunk;
232  } while(result.back() != '\n');
233 
234  return result;
235 }
236 
238 {
239  std::string line;
240  std::string output;
241 
242  do
243  {
244  output = line;
245  line = read_next_line();
246  } while(line != "(gdb) \n");
247 
248  return output;
249 }
250 
252 gdb_apit::get_most_recent_record(const std::string &tag, const bool must_exist)
253 {
254  std::string line = read_most_recent_line();
255  const bool b = has_prefix(line, tag);
256 
257  if(must_exist)
258  {
259  CHECK_RETURN(b);
260  }
261  else if(!b)
262  {
263  throw gdb_interaction_exceptiont("record does not exist");
264  }
265 
266  std::string record = strip_string(line.substr(line.find(',') + 1));
267 
268  return parse_gdb_output_record(record);
269 }
270 
271 bool gdb_apit::most_recent_line_has_tag(const std::string &tag)
272 {
273  const std::string line = read_most_recent_line();
274  return has_prefix(line, tag);
275 }
276 
277 void gdb_apit::run_gdb_from_core(const std::string &corefile)
278 {
280 
281  // there does not seem to be a gdb mi command to run from a core file
282  const std::string command = "core " + corefile;
283 
284  write_to_gdb(command);
286 
288 }
289 
291 {
292  // this is what the registers look like at the function call entry:
293  //
294  // reg. name hex. value dec. value
295  // 0: rax 0xffffffff 4294967295
296  // 1: rbx 0x20000000 536870912
297  // 2: rcx 0x591 1425
298  // 3: rdx 0x591 1425
299  // 4: rsi 0x1 1
300  // 5: rdi 0x591 1425
301  // ...
302  // rax will eventually contain the return value and
303  // rdi now stores the first (integer) argument
304  // in the machine interface they are referred to by numbers, hence:
305  write_to_gdb("-data-list-register-values d 5");
306  auto record = get_most_recent_record("^done", true);
307  auto allocated_size = safe_string2size_t(get_register_value(record));
308 
309  write_to_gdb("-exec-finish");
310  if(!most_recent_line_has_tag("*running"))
311  {
312  throw gdb_interaction_exceptiont("could not run program");
313  }
314  record = get_most_recent_record("*stopped");
315  auto frame_content = get_value_from_record(record, "frame");
316 
317  // the malloc breakpoint may be inside another malloc function
318  if(frame_content.find("func=\"malloc\"") != std::string::npos)
319  {
320  // so we need to finish the outer malloc as well
321  write_to_gdb("-exec-finish");
322  if(!most_recent_line_has_tag("*running"))
323  {
324  throw gdb_interaction_exceptiont("could not run program");
325  }
326  record = get_most_recent_record("*stopped");
327  }
328 
329  // now we can read the rax register to the the allocated memory address
330  write_to_gdb("-data-list-register-values x 0");
331  record = get_most_recent_record("^done", true);
332  allocated_memory[get_register_value(record)] = allocated_size;
333 }
334 
335 bool gdb_apit::run_gdb_to_breakpoint(const std::string &breakpoint)
336 {
338 
339  write_to_gdb("-break-insert " + malloc_name);
340  bool malloc_is_known = was_command_accepted();
341 
342  std::string command("-break-insert");
343  command += " " + breakpoint;
344 
345  write_to_gdb(command);
346  if(!was_command_accepted())
347  {
348  throw gdb_interaction_exceptiont("could not set breakpoint");
349  }
350 
351  write_to_gdb("-exec-run");
352 
353  if(!most_recent_line_has_tag("*running"))
354  {
355  throw gdb_interaction_exceptiont("could not run program");
356  }
357 
358  gdb_output_recordt record = get_most_recent_record("*stopped");
359 
360  // malloc function is known, i.e. present among the symbols
361  if(malloc_is_known)
362  {
363  // stop at every entry into malloc call
364  while(hit_malloc_breakpoint(record))
365  {
366  // and store the information about the allocated memory
368  write_to_gdb("-exec-continue");
369  if(!most_recent_line_has_tag("*running"))
370  {
371  throw gdb_interaction_exceptiont("could not run program");
372  }
373  record = get_most_recent_record("*stopped");
374  }
375 
376  write_to_gdb("-break-delete 1");
377  if(!was_command_accepted())
378  {
379  throw gdb_interaction_exceptiont("could not delete breakpoint at malloc");
380  }
381  }
382 
383  const auto it = record.find("reason");
384  CHECK_RETURN(it != record.end());
385 
386  const std::string &reason = it->second;
387 
388  if(reason == "breakpoint-hit")
389  {
391  return true;
392  }
393  else if(reason == "exited-normally")
394  {
395  return false;
396  }
397  else
398  {
400  "gdb stopped for unhandled reason `" + reason + "`");
401  }
402 
403  UNREACHABLE;
404 }
405 
406 std::string gdb_apit::eval_expr(const std::string &expr)
407 {
408  write_to_gdb("-var-create tmp * " + expr);
409 
410  if(!was_command_accepted())
411  {
413  "could not create variable for expression `" + expr + "`");
414  }
415 
416  write_to_gdb("-var-evaluate-expression tmp");
417  gdb_output_recordt record = get_most_recent_record("^done", true);
418 
419  write_to_gdb("-var-delete tmp");
421 
422  const auto it = record.find("value");
423  CHECK_RETURN(it != record.end());
424 
425  const std::string value = it->second;
426 
427  INVARIANT(
428  value.back() != '"' ||
429  (value.length() >= 2 && value[value.length() - 2] == '\\'),
430  "quotes should have been stripped off from value");
431  INVARIANT(value.back() != '\n', "value should not end in a newline");
432 
433  return value;
434 }
435 
436 gdb_apit::pointer_valuet gdb_apit::get_memory(const std::string &expr)
437 {
439 
440  std::string value;
441  try
442  {
443  value = eval_expr(expr);
444  }
446  {
447  return pointer_valuet{};
448  }
449 
450  std::regex regex(
451  r_hex_addr + r_opt(' ' + r_id) + r_opt(' ' + r_or(r_char, r_string)));
452 
453  std::smatch result;
454  const bool b = regex_match(value, result, regex);
455  if(!b)
456  return pointer_valuet{};
457 
458  optionalt<std::string> opt_string;
459  const std::string string = result[4];
460 
461  if(!string.empty())
462  {
463  const std::size_t len = string.length();
464 
465  INVARIANT(
466  len >= 4,
467  "pointer-string should be: backslash, quotes, .., backslash, quotes");
468  INVARIANT(
469  string[0] == '\\',
470  "pointer-string should be: backslash, quotes, .., backslash, quotes");
471  INVARIANT(
472  string[1] == '"',
473  "pointer-string should be: backslash, quotes, .., backslash, quotes");
474  INVARIANT(
475  string[len - 2] == '\\',
476  "pointer-string should be: backslash, quotes, .., backslash, quotes");
477  INVARIANT(
478  string[len - 1] == '"',
479  "pointer-string should be: backslash, quotes, .., backslash, quotes");
480 
481  opt_string = string.substr(2, len - 4);
482  }
483 
484  return pointer_valuet(result[1], result[2], result[3], opt_string, true);
485 }
486 
487 optionalt<std::string> gdb_apit::get_value(const std::string &expr)
488 {
490 
491  std::string value;
492  try
493  {
494  value = eval_expr(expr);
495  }
497  {
498  return {};
499  }
500 
501  // Get char value
502  {
503  // matches e.g. 99 'c' and extracts c
504  std::regex regex(R"([^ ]+ '([^']+)')");
505 
506  std::smatch result;
507  const bool b = regex_match(value, result, regex);
508 
509  if(b)
510  {
511  return std::string{result[1]};
512  }
513  }
514 
515  // return raw value
516  return value;
517 }
518 
520 gdb_apit::parse_gdb_output_record(const std::string &s)
521 {
522  PRECONDITION(s.back() != '\n');
523 
524  gdb_output_recordt result;
525 
526  std::size_t depth = 0;
527  std::string::size_type start = 0;
528 
529  const std::string::size_type n = s.length();
530 
531  for(std::string::size_type i = 0; i < n; i++)
532  {
533  const char c = s[i];
534 
535  if(c == '{' || c == '[')
536  {
537  depth++;
538  }
539  else if(c == '}' || c == ']')
540  {
541  depth--;
542  }
543 
544  if(depth == 0 && (c == ',' || i == n - 1))
545  {
546  const std::string item =
547  i == n - 1 ? s.substr(start) : s.substr(start, i - start);
548 
549  // Split on first `=`
550  std::string::size_type j = item.find('=');
551  CHECK_RETURN(j != std::string::npos);
552  CHECK_RETURN(j > 0);
553  CHECK_RETURN(j < s.length());
554 
555  const std::string key = strip_string(item.substr(0, j));
556  std::string value = strip_string(item.substr(j + 1));
557 
558  const char first = value.front();
559  const char last = value.back();
560 
561  INVARIANT(first == '"' || first == '{' || first == '[', "");
562  INVARIANT(first != '"' || last == '"', "");
563  INVARIANT(first != '{' || last == '}', "");
564  INVARIANT(first != '[' || last == ']', "");
565 
566  // Remove enclosing `"` for primitive values
567  if(first == '"')
568  {
569  value = value.substr(1, value.length() - 2);
570  }
571 
572  auto r = result.insert(std::make_pair(key, value));
573  CHECK_RETURN(r.second);
574 
575  start = i + 1;
576  }
577  }
578 
579  return result;
580 }
581 
583 {
584  return most_recent_line_has_tag("^done");
585 }
586 
588 {
589  bool was_accepted = was_command_accepted();
590  CHECK_RETURN(was_accepted);
591 }
592 
593 std::string gdb_apit::r_opt(const std::string &regex)
594 {
595  return R"((?:)" + regex + R"()?)";
596 }
597 
598 std::string
599 gdb_apit::r_or(const std::string &regex_left, const std::string &regex_right)
600 {
601  return R"((?:)" + regex_left + '|' + regex_right + R"())";
602 }
603 
605  const gdb_output_recordt &record,
606  const std::string &value_name)
607 {
608  const auto it = record.find(value_name);
609  CHECK_RETURN(it != record.end());
610  const auto value = it->second;
611 
612  INVARIANT(
613  value.back() != '"' ||
614  (value.length() >= 2 && value[value.length() - 2] == '\\'),
615  "quotes should have been stripped off from value");
616  INVARIANT(value.back() != '\n', "value should not end in a newline");
617 
618  return value;
619 }
620 
621 bool gdb_apit::hit_malloc_breakpoint(const gdb_output_recordt &stopped_record)
622 {
623  const auto it = stopped_record.find("reason");
624  CHECK_RETURN(it != stopped_record.end());
625 
626  if(it->second != "breakpoint-hit")
627  return false;
628 
629  return safe_string2size_t(get_value_from_record(stopped_record, "bkptno")) ==
630  1;
631 }
632 
633 std::string gdb_apit::get_register_value(const gdb_output_recordt &record)
634 {
635  // we expect the record of form:
636  // {[register-values]->[name=name_string, value=\"value_string\"],..}
637  auto record_value = get_value_from_record(record, "register-values");
638  std::string value_eq_quotes = "value=\"";
639  auto value_eq_quotes_size = value_eq_quotes.size();
640 
641  auto starting_pos = record_value.find(value_eq_quotes) + value_eq_quotes_size;
642  auto ending_pos = record_value.find('\"', starting_pos);
643  auto value_length = ending_pos - starting_pos;
644  return std::string{record_value, starting_pos, value_length};
645 }
gdb_apit::r_or
static std::string r_or(const std::string &regex_left, const std::string &regex_right)
UNREACHABLE
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:503
gdb_apit::hit_malloc_breakpoint
bool hit_malloc_breakpoint(const gdb_output_recordt &stopped_record)
Check if the breakpoint we hit is inside a malloc.
gdb_apit::collect_malloc_calls
void collect_malloc_calls()
Intercepts the gdb-analysis at the malloc call-site to add the corresponding information into allocat...
Definition: gdb_api.cpp:290
gdb_apit::r_opt
static std::string r_opt(const std::string &regex)
gdb_apit::query_malloc_size
size_t query_malloc_size(const std::string &pointer_expr)
Get the exact allocated size for a pointer pointer_expr.
Definition: gdb_api.cpp:58
gdb_interaction_exceptiont
Definition: gdb_api.h:229
gdb_apit::pointer_valuet
Data associated with the value of a pointer, i.e.
Definition: gdb_api.h:77
gdb_apit::args
std::vector< std::string > args
Definition: gdb_api.h:144
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
string_utils.h
gdb_apit::gdb_statet::NOT_CREATED
@ NOT_CREATED
gdb_apit::commandst
std::forward_list< std::string > commandst
Definition: gdb_api.h:33
gdb_apit::get_memory
pointer_valuet get_memory(const std::string &expr)
Get the value of a pointer associated with expr.
irept::find
const irept & find(const irep_idt &name) const
Definition: irep.cpp:106
gdb_apit::parse_gdb_output_record
static gdb_output_recordt parse_gdb_output_record(const std::string &s)
gdb_apit::most_recent_line_has_tag
bool most_recent_line_has_tag(const std::string &tag)
Definition: gdb_api.cpp:271
gdb_apit::gdb_state
gdb_statet gdb_state
Definition: gdb_api.h:159
prefix.h
gdb_apit::gdb_apit
gdb_apit(const std::vector< std::string > &args, const bool log=false)
Create a gdb_apit object.
Definition: gdb_api.cpp:30
gdb_apit::command_stream
FILE * command_stream
Definition: gdb_api.h:147
gdb_apit::eval_expr
std::string eval_expr(const std::string &expr)
Definition: gdb_api.cpp:406
safe_string2size_t
std::size_t safe_string2size_t(const std::string &str, int base)
Definition: string2int.cpp:23
gdb_apit::command_log
commandst command_log
Definition: gdb_api.h:150
gdb_api.h
string2int.h
gdb_apit::gdb_statet::STOPPED
@ STOPPED
strip_string
std::string strip_string(const std::string &s)
Remove all whitespace characters from either end of a string.
Definition: string_utils.cpp:21
gdb_apit::r_hex_addr
const std::string r_hex_addr
Definition: gdb_api.h:211
has_prefix
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
gdb_apit::response_stream
FILE * response_stream
Definition: gdb_api.h:146
PRECONDITION
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
gdb_apit::get_most_recent_record
gdb_output_recordt get_most_recent_record(const std::string &tag, const bool must_exist=false)
Definition: gdb_api.cpp:252
gdb_apit::write_to_gdb
void write_to_gdb(const std::string &command)
Definition: gdb_api.cpp:171
gdb_apit::~gdb_apit
~gdb_apit()
Terminate the gdb process and close open streams (for reading from and writing to gdb)
Definition: gdb_api.cpp:35
gdb_apit::read_next_line
std::string read_next_line()
Definition: gdb_api.cpp:198
gdb_apit::get_command_log
const commandst & get_command_log()
Return the vector of commands that have been written to gdb so far.
Definition: gdb_api.cpp:192
gdb_apit::malloc_name
const std::string malloc_name
Definition: gdb_api.h:226
gdb_apit::allocated_memory
std::map< std::string, size_t > allocated_memory
track the allocated size for each malloc call maps hexadecimal address to the number of bytes
Definition: gdb_api.h:163
gdb_apit::r_id
const std::string r_id
Definition: gdb_api.h:215
gdb_apit::get_value
optionalt< std::string > get_value(const std::string &expr)
Get the memory address pointed to by the given pointer expression.
gdb_apit::get_register_value
std::string get_register_value(const gdb_output_recordt &record)
Parse the record produced by listing register value.
gdb_apit::log
const bool log
Definition: gdb_api.h:149
optionalt
nonstd::optional< T > optionalt
Definition: optional.h:35
gdb_apit::was_command_accepted
bool was_command_accepted()
gdb_apit::get_value_from_record
std::string get_value_from_record(const gdb_output_recordt &record, const std::string &value_name)
Locate and return the value for a given name.
gdb_apit::create_gdb_process
void create_gdb_process()
Create a new gdb process for analysing the binary indicated by the first element in args
Definition: gdb_api.cpp:69
gdb_apit::r_string
const std::string r_string
Definition: gdb_api.h:223
gdb_apit::check_command_accepted
void check_command_accepted()
gdb_apit::gdb_statet
gdb_statet
Definition: gdb_api.h:152
gdb_apit::run_gdb_to_breakpoint
bool run_gdb_to_breakpoint(const std::string &breakpoint)
Run gdb to the given breakpoint.
Definition: gdb_api.cpp:335
gdb_apit::run_gdb_from_core
void run_gdb_from_core(const std::string &corefile)
Run gdb with the given core file.
Definition: gdb_api.cpp:277
r
static int8_t r
Definition: irep_hash.h:60
gdb_apit::r_char
const std::string r_char
Definition: gdb_api.h:219
size_type
unsignedbv_typet size_type()
Definition: c_types.cpp:68
gdb_apit::gdb_statet::CREATED
@ CREATED
validation_modet::INVARIANT
@ INVARIANT
gdb_apit::read_most_recent_line
std::string read_most_recent_line()
Definition: gdb_api.cpp:237
gdb_apit::gdb_output_recordt
std::map< std::string, std::string > gdb_output_recordt
Definition: gdb_api.h:165