CBMC
osx_fat_reader.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Read Mach-O
4 
5 Author:
6 
7 \*******************************************************************/
8 
11 
12 #include "osx_fat_reader.h"
13 
14 #include <util/exception_utils.h>
15 #include <util/invariant.h>
16 
17 // we define file-type magic values for all platforms to detect when we find a
18 // file that we might not be able to process
19 #define CPROVER_FAT_MAGIC 0xcafebabe
20 #define CPROVER_FAT_CIGAM 0xbebafeca
21 #define CPROVER_MH_MAGIC 0xfeedface
22 #define CPROVER_MH_CIGAM 0xcefaedfe
23 #define CPROVER_MH_MAGIC_64 0xfeedfacf
24 #define CPROVER_MH_CIGAM_64 0xcffaedfe
25 
26 #ifdef __APPLE__
27 # include <architecture/byte_order.h>
28 # include <mach-o/fat.h>
29 # include <mach-o/loader.h>
30 # include <mach-o/swap.h>
31 
32 # if(CPROVER_FAT_MAGIC != FAT_MAGIC) || (CPROVER_FAT_CIGAM != FAT_CIGAM) || \
33  (CPROVER_MH_MAGIC != MH_MAGIC) || (CPROVER_MH_CIGAM != MH_CIGAM) || \
34  (CPROVER_MH_MAGIC_64 != MH_MAGIC_64) || \
35  (CPROVER_MH_CIGAM_64 != MH_CIGAM_64)
36 # error "Mach-O magic has inconsistent value"
37 # endif
38 #endif
39 
40 #include <util/run.h>
41 
43 {
44  uint32_t magic;
45  uint32_t n_architectures;
46 };
47 
48 static uint32_t u32_to_native_endian(uint32_t input)
49 {
50  const uint8_t *input_as_bytes = reinterpret_cast<uint8_t *>(&input);
51  return (((uint32_t)input_as_bytes[0]) << 24) |
52  (((uint32_t)input_as_bytes[1]) << 16) |
53  (((uint32_t)input_as_bytes[2]) << 8) |
54  (((uint32_t)input_as_bytes[3]) << 0);
55 }
56 
57 bool is_osx_fat_header(char header_bytes[8])
58 {
59  struct fat_header_prefixt *header =
60  reinterpret_cast<struct fat_header_prefixt *>(header_bytes);
61 
62  // Unfortunately for us, both Java class files and Mach fat binaries use the
63  // magic number 0xCAFEBABE. Therefore we must also check the second field,
64  // number of architectures, is in a sensible range (I use at 1 <= archs < 20,
65  // the same criterion used by `GNU file`).
66  // Luckily the class file format stores the file version here, which cannot
67  // fall in this range.
68  uint32_t n_architectures_native =
70  return u32_to_native_endian(header->magic) == CPROVER_FAT_MAGIC &&
71  n_architectures_native >= 1 && n_architectures_native < 20;
72 }
73 
75  std::ifstream &in,
76  message_handlert &message_handler)
77  : log(message_handler), has_gb_arch(false)
78 {
79 #ifdef __APPLE__
80  // NOLINTNEXTLINE(readability/identifiers)
81  struct fat_header fh;
82  // NOLINTNEXTLINE(readability/identifiers)
83  in.read(reinterpret_cast<char*>(&fh), sizeof(struct fat_header));
84 
85  if(!in)
86  throw system_exceptiont("failed to read OSX fat header");
87 
88  if(!is_osx_fat_header(reinterpret_cast<char *>(&(fh.magic))))
89  throw deserialization_exceptiont("OSX fat header malformed");
90 
91  static_assert(
92  sizeof(fh.nfat_arch) == 4, "fat_header::nfat_arch is of type uint32_t");
93  unsigned narch = u32_to_native_endian(fh.nfat_arch);
94 
95  for(unsigned i=0; !has_gb_arch && i<narch; ++i)
96  {
97  // NOLINTNEXTLINE(readability/identifiers)
98  struct fat_arch fa;
99  // NOLINTNEXTLINE(readability/identifiers)
100  in.read(reinterpret_cast<char*>(&fa), sizeof(struct fat_arch));
101 
102  static_assert(
103  sizeof(fa.cputype) == 4 && sizeof(fa.cpusubtype) == 4 &&
104  sizeof(fa.size) == 4,
105  "This requires a specific fat architecture");
106  int cputype = u32_to_native_endian(fa.cputype);
107  int cpusubtype = u32_to_native_endian(fa.cpusubtype);
108  unsigned size = u32_to_native_endian(fa.size);
109 
110  has_gb_arch=cputype==CPU_TYPE_HPPA &&
111  cpusubtype==CPU_SUBTYPE_HPPA_7100LC &&
112  size > 0;
113  }
114 #else
115  (void)in; // unused parameter
116 
117  log.warning() << "Cannot read OSX fat archive on this platform"
118  << messaget::eom;
119 #endif
120 }
121 
123  const std::string &source,
124  const std::string &dest) const
125 {
127 
128  return run(
129  "lipo", {"lipo", "-thin", "hppa7100LC", "-output", dest, source}) !=
130  0;
131 }
132 
133 // guided by https://lowlevelbits.org/parsing-mach-o-files/
134 bool is_osx_mach_object(char hdr[4])
135 {
136  uint32_t *magic = reinterpret_cast<uint32_t *>(hdr);
137 
138  switch(*magic)
139  {
140  case CPROVER_MH_MAGIC:
141  case CPROVER_MH_CIGAM:
142  case CPROVER_MH_MAGIC_64:
143  case CPROVER_MH_CIGAM_64:
144  return true;
145  }
146 
147  return false;
148 }
149 
150 void osx_mach_o_readert::process_sections_32(uint32_t nsects, bool need_swap)
151 {
152 #ifdef __APPLE__
153  for(uint32_t i = 0; i < nsects; ++i)
154  {
155  // NOLINTNEXTLINE(readability/identifiers)
156  struct section s;
157  in.read(reinterpret_cast<char *>(&s), sizeof(s));
158 
159  if(!in)
160  throw deserialization_exceptiont("failed to read Mach-O section");
161 
162  if(need_swap)
163  swap_section(&s, 1, NXHostByteOrder());
164 
165  sections.emplace(s.sectname, sectiont(s.sectname, s.offset, s.size));
166  }
167 #else
168  // unused parameters
169  (void)nsects;
170  (void)need_swap;
171 #endif
172 }
173 
174 void osx_mach_o_readert::process_sections_64(uint32_t nsects, bool need_swap)
175 {
176 #ifdef __APPLE__
177  for(uint32_t i = 0; i < nsects; ++i)
178  {
179  // NOLINTNEXTLINE(readability/identifiers)
180  struct section_64 s;
181  in.read(reinterpret_cast<char *>(&s), sizeof(s));
182 
183  if(!in)
184  throw deserialization_exceptiont("failed to read 64-bit Mach-O section");
185 
186  if(need_swap)
187  swap_section_64(&s, 1, NXHostByteOrder());
188 
189  sections.emplace(s.sectname, sectiont(s.sectname, s.offset, s.size));
190  }
191 #else
192  // unused parameters
193  (void)nsects;
194  (void)need_swap;
195 #endif
196 }
197 
199  uint32_t ncmds,
200  std::size_t offset,
201  bool need_swap)
202 {
203 #ifdef __APPLE__
204  for(uint32_t i = 0; i < ncmds; ++i)
205  {
206  in.seekg(offset);
207 
208  // NOLINTNEXTLINE(readability/identifiers)
209  struct load_command lc;
210  in.read(reinterpret_cast<char *>(&lc), sizeof(lc));
211 
212  if(!in)
213  throw deserialization_exceptiont("failed to read Mach-O command");
214 
215  if(need_swap)
216  swap_load_command(&lc, NXHostByteOrder());
217 
218  // we may need to re-read the command once we have figured out its type; in
219  // particular, segment commands contain additional information that we have
220  // now just read a prefix of
221  in.seekg(offset);
222 
223  switch(lc.cmd)
224  {
225  case LC_SEGMENT:
226  {
227  // NOLINTNEXTLINE(readability/identifiers)
228  struct segment_command seg;
229  in.read(reinterpret_cast<char *>(&seg), sizeof(seg));
230 
231  if(!in)
232  throw deserialization_exceptiont("failed to read Mach-O segment");
233 
234  if(need_swap)
235  swap_segment_command(&seg, NXHostByteOrder());
236 
237  process_sections_32(seg.nsects, need_swap);
238  break;
239  }
240  case LC_SEGMENT_64:
241  {
242  // NOLINTNEXTLINE(readability/identifiers)
243  struct segment_command_64 seg;
244  in.read(reinterpret_cast<char *>(&seg), sizeof(seg));
245 
246  if(!in)
247  throw deserialization_exceptiont("failed to read Mach-O segment");
248 
249  if(need_swap)
250  swap_segment_command_64(&seg, NXHostByteOrder());
251 
252  process_sections_64(seg.nsects, need_swap);
253  break;
254  }
255  default:
256  break;
257  }
258 
259  offset += lc.cmdsize;
260  }
261 #else
262  // unused parameters
263  (void)ncmds;
264  (void)offset;
265  (void)need_swap;
266 #endif
267 }
268 
270  std::istream &_in,
271  message_handlert &message_handler)
272  : log(message_handler), in(_in)
273 {
274  // read magic
275  uint32_t magic;
276  in.read(reinterpret_cast<char *>(&magic), sizeof(magic));
277 
278  if(!in)
279  throw deserialization_exceptiont("failed to read Mach-O magic");
280 
281 #ifdef __APPLE__
282  bool is_64 = false, need_swap = false;
283  switch(magic)
284  {
285  case CPROVER_MH_CIGAM:
286  need_swap = true;
287  break;
288  case CPROVER_MH_MAGIC:
289  break;
290  case CPROVER_MH_CIGAM_64:
291  need_swap = true;
292  is_64 = true;
293  break;
294  case CPROVER_MH_MAGIC_64:
295  is_64 = true;
296  break;
297  default:
298  throw deserialization_exceptiont("no Mach-O magic");
299  }
300 
301  uint32_t ncmds = 0;
302  std::size_t offset = 0;
303 
304  // re-read from the beginning, now reading the full header
305  in.seekg(0);
306 
307  if(!is_64)
308  {
309  // NOLINTNEXTLINE(readability/identifiers)
310  struct mach_header mh;
311  in.read(reinterpret_cast<char *>(&mh), sizeof(mh));
312 
313  if(!in)
314  throw deserialization_exceptiont("failed to read 32-bit Mach-O header");
315 
316  if(need_swap)
317  swap_mach_header(&mh, NXHostByteOrder());
318 
319  ncmds = mh.ncmds;
320  offset = sizeof(mh);
321  }
322  else
323  {
324  // NOLINTNEXTLINE(readability/identifiers)
325  struct mach_header_64 mh;
326  in.read(reinterpret_cast<char *>(&mh), sizeof(mh));
327 
328  if(!in)
329  throw deserialization_exceptiont("failed to read 64-bit Mach-O header");
330 
331  if(need_swap)
332  swap_mach_header_64(&mh, NXHostByteOrder());
333 
334  ncmds = mh.ncmds;
335  offset = sizeof(mh);
336  }
337 
338  process_commands(ncmds, offset, need_swap);
339 #else
340  log.warning() << "Cannot read OSX Mach-O on this platform" << messaget::eom;
341 #endif
342 }
exception_utils.h
osx_mach_o_readert::process_sections_32
void process_sections_32(uint32_t nsects, bool need_swap)
Definition: osx_fat_reader.cpp:150
deserialization_exceptiont
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
Definition: exception_utils.h:79
invariant.h
osx_fat_reader.h
run
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
fat_header_prefixt
Definition: osx_fat_reader.cpp:42
messaget::eom
static eomt eom
Definition: message.h:297
run.h
CPROVER_MH_MAGIC_64
#define CPROVER_MH_MAGIC_64
Definition: osx_fat_reader.cpp:23
osx_mach_o_readert::sectiont
Definition: osx_fat_reader.h:47
is_osx_mach_object
bool is_osx_mach_object(char hdr[4])
Definition: osx_fat_reader.cpp:134
osx_fat_readert::log
messaget log
Definition: osx_fat_reader.h:36
osx_mach_o_readert::in
std::istream & in
Definition: osx_fat_reader.h:69
osx_mach_o_readert::log
messaget log
Definition: osx_fat_reader.h:68
PRECONDITION
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
system_exceptiont
Thrown when some external system fails unexpectedly.
Definition: exception_utils.h:71
osx_mach_o_readert::sections
sectionst sections
Definition: osx_fat_reader.h:60
CPROVER_MH_CIGAM_64
#define CPROVER_MH_CIGAM_64
Definition: osx_fat_reader.cpp:24
message_handlert
Definition: message.h:27
osx_mach_o_readert::process_sections_64
void process_sections_64(uint32_t nsects, bool need_swap)
Definition: osx_fat_reader.cpp:174
u32_to_native_endian
static uint32_t u32_to_native_endian(uint32_t input)
Definition: osx_fat_reader.cpp:48
osx_fat_readert::extract_gb
bool extract_gb(const std::string &source, const std::string &dest) const
Definition: osx_fat_reader.cpp:122
fat_header_prefixt::magic
uint32_t magic
Definition: osx_fat_reader.cpp:44
fat_header_prefixt::n_architectures
uint32_t n_architectures
Definition: osx_fat_reader.cpp:45
CPROVER_FAT_MAGIC
#define CPROVER_FAT_MAGIC
Definition: osx_fat_reader.cpp:19
osx_fat_readert::has_gb_arch
bool has_gb_arch
Definition: osx_fat_reader.h:37
is_osx_fat_header
bool is_osx_fat_header(char header_bytes[8])
Definition: osx_fat_reader.cpp:57
osx_fat_readert::osx_fat_readert
osx_fat_readert(std::ifstream &, message_handlert &)
Definition: osx_fat_reader.cpp:74
CPROVER_MH_CIGAM
#define CPROVER_MH_CIGAM
Definition: osx_fat_reader.cpp:22
osx_mach_o_readert::process_commands
void process_commands(uint32_t ncmds, std::size_t offset, bool need_swap)
Definition: osx_fat_reader.cpp:198
messaget::warning
mstreamt & warning() const
Definition: message.h:404
CPROVER_MH_MAGIC
#define CPROVER_MH_MAGIC
Definition: osx_fat_reader.cpp:21
osx_mach_o_readert::osx_mach_o_readert
osx_mach_o_readert(std::istream &, message_handlert &)
Definition: osx_fat_reader.cpp:269