36 bool parse()
override;
70 error() <<
"invalid constant pool index (" << index <<
")" <<
eom;
105 void rbytecode(std::vector<instructiont> &);
117 for(std::size_t i=0; i<bytes; i++)
121 error() <<
"unexpected end of bytecode file" <<
eom;
128 template <
typename T>
132 std::is_unsigned<T>::value,
"T should be an unsigned integer");
133 const constexpr
size_t bytes =
sizeof(T);
135 for(
size_t i = 0; i < bytes; i++)
139 error() <<
"unexpected end of bytecode file" <<
eom;
145 return narrow_cast<T>(
result);
151 #define CONSTANT_Class 7
152 #define CONSTANT_Fieldref 9
153 #define CONSTANT_Methodref 10
154 #define CONSTANT_InterfaceMethodref 11
155 #define CONSTANT_String 8
156 #define CONSTANT_Integer 3
157 #define CONSTANT_Float 4
158 #define CONSTANT_Long 5
159 #define CONSTANT_Double 6
160 #define CONSTANT_NameAndType 12
161 #define CONSTANT_Utf8 1
162 #define CONSTANT_MethodHandle 15
163 #define CONSTANT_MethodType 16
164 #define CONSTANT_InvokeDynamic 18
166 #define VTYPE_INFO_TOP 0
167 #define VTYPE_INFO_INTEGER 1
168 #define VTYPE_INFO_FLOAT 2
169 #define VTYPE_INFO_LONG 3
170 #define VTYPE_INFO_DOUBLE 4
171 #define VTYPE_INFO_ITEM_NULL 5
172 #define VTYPE_INFO_UNINIT_THIS 6
173 #define VTYPE_INFO_OBJECT 7
174 #define VTYPE_INFO_UNINIT 8
283 "name_and_typeindex did not correspond to a name_and_type in the "
386 catch(
const char *message)
392 catch(
const std::string &message)
407 #define ACC_PUBLIC 0x0001u
408 #define ACC_PRIVATE 0x0002u
409 #define ACC_PROTECTED 0x0004u
410 #define ACC_STATIC 0x0008u
411 #define ACC_FINAL 0x0010u
412 #define ACC_SYNCHRONIZED 0x0020u
413 #define ACC_BRIDGE 0x0040u
414 #define ACC_NATIVE 0x0100u
415 #define ACC_INTERFACE 0x0200u
416 #define ACC_ABSTRACT 0x0400u
417 #define ACC_STRICT 0x0800u
418 #define ACC_SYNTHETIC 0x1000u
419 #define ACC_ANNOTATION 0x2000u
420 #define ACC_ENUM 0x4000u
422 #define UNUSED_u2(x) \
424 const u2 x = read<u2>(); \
433 const u4 magic = read<u4>();
435 const u2 major_version = read<u2>();
437 if(magic!=0xCAFEBABE)
445 error() <<
"unexpected major version" <<
eom;
453 const u2 access_flags = read<u2>();
454 const u2 this_class = read<u2>();
455 const u2 super_class = read<u2>();
482 const u2 attributes_count = read<u2>();
484 for(std::size_t j=0; j<attributes_count; j++)
518 if(field.signature.has_value())
540 for(
const auto ¶meter_annotations : method.parameter_annotations)
543 if(method.signature.has_value())
558 for(
const auto &var : method.local_variable_table)
560 if(var.signature.has_value())
580 if(src.
id()==ID_code)
588 else if(src.
id() == ID_struct_tag)
596 else if(src.
id()==ID_struct)
602 else if(src.
id()==ID_pointer)
609 const std::vector<annotationt> &annotations)
611 for(
const auto &annotation : annotations)
614 for(
const auto &element_value_pair : annotation.element_value_pairs)
624 if(
const auto &symbol_expr = expr_try_dynamic_cast<symbol_exprt>(value))
626 const irep_idt &value_id = symbol_expr->get_identifier();
629 else if(
const auto &array_expr = expr_try_dynamic_cast<array_exprt>(value))
642 const u2 constant_pool_count = read<u2>();
643 if(constant_pool_count==0)
645 error() <<
"invalid constant_pool_count" <<
eom;
654 it->tag = read<u1>();
659 it->ref1 = read<u2>();
667 it->ref1 = read<u2>();
668 it->ref2 = read<u2>();
673 it->ref1 = read<u2>();
678 it->number = read<u4>();
683 it->number = read<u8>();
687 error() <<
"invalid double entry" <<
eom;
696 const u2 bytes = read<u2>();
706 it->ref1 = read<u1>();
707 it->ref2 = read<u2>();
711 error() <<
"unknown constant pool entry (" << it->tag <<
")"
721 [&](constant_poolt::value_type &entry) {
726 const std::string &s = id2string(pool_entry(entry.ref1).s);
727 entry.expr = type_exprt(java_classname(s));
731 case CONSTANT_Fieldref:
733 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
734 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
735 const pool_entryt &class_entry = pool_entry(entry.ref1);
736 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
737 typet type=type_entry(nameandtype_entry.ref2);
739 auto class_tag = java_classname(id2string(class_name_entry.s));
741 fieldref_exprt fieldref(type, name_entry.s, class_tag.get_identifier());
743 entry.expr = fieldref;
747 case CONSTANT_Methodref:
748 case CONSTANT_InterfaceMethodref:
750 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
751 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
752 const pool_entryt &class_entry = pool_entry(entry.ref1);
753 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
754 typet type=type_entry(nameandtype_entry.ref2);
756 auto class_tag = java_classname(id2string(class_name_entry.s));
758 irep_idt mangled_method_name =
759 id2string(name_entry.s) +
":" +
760 id2string(pool_entry(nameandtype_entry.ref2).s);
762 irep_idt class_id = class_tag.get_identifier();
764 entry.expr = class_method_descriptor_exprt{
765 type, mangled_method_name, class_id, name_entry.s};
769 case CONSTANT_String:
772 entry.expr = java_string_literal_exprt{pool_entry(entry.ref1).s};
776 case CONSTANT_Integer:
777 entry.expr = from_integer(entry.number, java_int_type());
782 ieee_floatt value(ieee_float_spect::single_precision());
783 value.unpack(entry.number);
784 entry.expr = value.to_expr();
789 entry.expr = from_integer(entry.number, java_long_type());
792 case CONSTANT_Double:
794 ieee_floatt value(ieee_float_spect::double_precision());
795 value.unpack(entry.number);
796 entry.expr = value.to_expr();
800 case CONSTANT_NameAndType:
802 entry.expr.id(
"nameandtype");
806 case CONSTANT_MethodHandle:
808 entry.expr.id(
"methodhandle");
812 case CONSTANT_MethodType:
814 entry.expr.id(
"methodtype");
818 case CONSTANT_InvokeDynamic:
820 entry.expr.id(
"invokedynamic");
821 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
822 typet type=type_entry(nameandtype_entry.ref2);
823 type.set(ID_java_lambda_method_handle_index, entry.ref1);
824 entry.expr.type() = type;
833 const u2 interfaces_count = read<u2>();
835 for(std::size_t i=0; i<interfaces_count; i++)
837 constant(read<u2>()).type().get(ID_C_base_name));
842 const u2 fields_count = read<u2>();
844 for(std::size_t i=0; i<fields_count; i++)
848 const u2 access_flags = read<u2>();
849 const u2 name_index = read<u2>();
850 const u2 descriptor_index = read<u2>();
851 const u2 attributes_count = read<u2>();
862 const auto flags = (field.
is_public ? 1 : 0) +
865 DATA_INVARIANT(flags<=1,
"at most one of public, protected, private");
867 for(std::size_t j=0; j<attributes_count; j++)
883 const u4 code_length = read<u4>();
886 size_t bytecode_index=0;
888 for(address=0; address<code_length; address++)
890 bool wide_instruction=
false;
891 u4 start_of_instruction=address;
893 u1 bytecode = read<u1>();
897 wide_instruction=
true;
899 bytecode = read<u1>();
906 std::string(
"Unexpected wide instruction: ") +
910 instructions.emplace_back();
913 instruction.
address=start_of_instruction;
942 const s1 c = read<u1>();
950 const s2 offset = read<u2>();
953 instruction.
args.push_back(
961 const s4 offset = read<u4>();
964 instruction.
args.push_back(
974 const u2 v = read<u2>();
980 const u1 v = read<u1>();
992 const u2 v = read<u2>();
994 const s2 c = read<u2>();
1000 const u1 v = read<u1>();
1002 const s1 c = read<u1>();
1010 const u2 c = read<u2>();
1012 const u1 b1 = read<u1>();
1014 const u1 b2 = read<u1>();
1022 u4 base_offset=address;
1025 while(((address + 1u) & 3u) != 0)
1032 const s4 default_value = read<u4>();
1035 instruction.
args.push_back(
1040 const u4 npairs = read<u4>();
1043 for(std::size_t i=0; i<npairs; i++)
1045 const s4 match = read<u4>();
1046 const s4 offset = read<u4>();
1047 instruction.
args.push_back(
1051 instruction.
args.push_back(
1060 size_t base_offset=address;
1063 while(((address + 1u) & 3u) != 0)
1070 const s4 default_value = read<u4>();
1071 instruction.
args.push_back(
1076 const s4 low_value = read<u4>();
1080 const s4 high_value = read<u4>();
1084 for(
s4 i=low_value; i<=high_value; i++)
1086 s4 offset = read<u4>();
1090 instruction.
args.push_back(
1099 const u2 c = read<u2>();
1101 const u1 dimensions = read<u1>();
1102 instruction.
args.push_back(
1119 case T_INT: t.
id(ID_int);
break;
1130 const s2 s = read<u2>();
1137 throw "unknown JVM bytecode instruction";
1142 if(address!=code_length)
1144 error() <<
"bytecode length mismatch" <<
eom;
1151 const u2 attribute_name_index = read<u2>();
1152 const u4 attribute_length = read<u4>();
1156 if(attribute_name ==
"Code")
1166 const u2 exception_table_length = read<u2>();
1173 for(std::size_t e = 0; e < exception_table_length; e++)
1175 const u2 start_pc = read<u2>();
1176 const u2 end_pc = read<u2>();
1182 "The start_pc must be less than the end_pc as this is the range the "
1183 "exception is active");
1185 const u2 handler_pc = read<u2>();
1186 const u2 catch_type = read<u2>();
1196 u2 attributes_count = read<u2>();
1198 for(std::size_t j=0; j<attributes_count; j++)
1211 if(!instruction.source_location.get_line().empty())
1212 line_number = instruction.source_location.get_line();
1213 else if(!line_number.
empty())
1214 instruction.source_location.set_line(line_number);
1215 instruction.source_location.set_function(
1220 const auto it = std::find_if(
1224 return !instruction.source_location.get_line().empty();
1229 else if(attribute_name==
"Signature")
1231 const u2 signature_index = read<u2>();
1234 else if(attribute_name==
"RuntimeInvisibleAnnotations" ||
1235 attribute_name==
"RuntimeVisibleAnnotations")
1240 attribute_name ==
"RuntimeInvisibleParameterAnnotations" ||
1241 attribute_name ==
"RuntimeVisibleParameterAnnotations")
1243 const u1 parameter_count = read<u1>();
1251 for(
u2 param_no = 0; param_no < parameter_count; ++param_no)
1254 else if(attribute_name ==
"Exceptions")
1264 const u2 attribute_name_index = read<u2>();
1265 const u4 attribute_length = read<u4>();
1269 if(attribute_name==
"Signature")
1271 const u2 signature_index = read<u2>();
1274 else if(attribute_name==
"RuntimeInvisibleAnnotations" ||
1275 attribute_name==
"RuntimeVisibleAnnotations")
1285 const u2 attribute_name_index = read<u2>();
1286 const u4 attribute_length = read<u4>();
1290 if(attribute_name==
"LineNumberTable")
1292 std::map<unsigned, std::reference_wrapper<instructiont>> instruction_map;
1294 instruction_map.emplace(instruction.address, instruction);
1296 const u2 line_number_table_length = read<u2>();
1298 for(std::size_t i=0; i<line_number_table_length; i++)
1300 const u2 start_pc = read<u2>();
1301 const u2 line_number = read<u2>();
1304 auto it = instruction_map.find(start_pc);
1306 if(it!=instruction_map.end())
1307 it->second.get().source_location.set_line(line_number);
1310 else if(attribute_name==
"LocalVariableTable")
1312 const u2 local_variable_table_length = read<u2>();
1316 for(std::size_t i=0; i<local_variable_table_length; i++)
1318 const u2 start_pc = read<u2>();
1319 const u2 length = read<u2>();
1320 const u2 name_index = read<u2>();
1321 const u2 descriptor_index = read<u2>();
1322 const u2 index = read<u2>();
1332 else if(attribute_name==
"LocalVariableTypeTable")
1336 else if(attribute_name==
"StackMapTable")
1338 const u2 stack_map_entries = read<u2>();
1342 for(
size_t i=0; i<stack_map_entries; i++)
1344 const u1 frame_type = read<u1>();
1351 else if(64<=frame_type && frame_type<=127)
1361 else if(frame_type==247)
1368 const u2 offset_delta = read<u2>();
1373 else if(248<=frame_type && frame_type<=250)
1378 const u2 offset_delta = read<u2>();
1381 else if(frame_type==251)
1387 const u2 offset_delta = read<u2>();
1390 else if(252<=frame_type && frame_type<=254)
1392 size_t new_locals = frame_type - 251;
1396 const u2 offset_delta = read<u2>();
1398 for(
size_t k=0; k<new_locals; k++)
1407 else if(frame_type==255)
1410 const u2 offset_delta = read<u2>();
1412 const u2 number_locals = read<u2>();
1414 for(
size_t k=0; k<(size_t) number_locals; k++)
1422 const u2 number_stack_items = read<u2>();
1424 for(
size_t k=0; k<(size_t) number_stack_items; k++)
1434 throw "error: unknown stack frame type encountered";
1444 const u1 tag = read<u1>();
1477 throw "error: unknown verification type info encountered";
1482 std::vector<annotationt> &annotations)
1484 const u2 num_annotations = read<u2>();
1486 for(
u2 number=0; number<num_annotations; number++)
1490 annotations.push_back(annotation);
1497 const u2 type_index = read<u2>();
1505 const u2 num_element_value_pairs = read<u2>();
1506 element_value_pairs.resize(num_element_value_pairs);
1508 for(
auto &element_value_pair : element_value_pairs)
1510 const u2 element_name_index = read<u2>();
1511 element_value_pair.element_name=
pool_entry(element_name_index).
s;
1524 const u1 tag = read<u1>();
1538 const u2 class_info_index = read<u2>();
1553 const u2 num_values = read<u2>();
1555 values.reserve(num_values);
1556 for(std::size_t i=0; i<num_values; i++)
1565 const u2 const_value_index = read<u2>();
1571 const u2 const_value_index = read<u2>();
1572 return constant(const_value_index);
1589 const u4 &attribute_length)
1592 std::string name = parsed_class.
name.
c_str();
1593 const u2 number_of_classes = read<u2>();
1594 const u4 number_of_bytes_to_be_read = number_of_classes * 8 + 2;
1596 number_of_bytes_to_be_read == attribute_length,
1597 "The number of bytes to be read for the InnerClasses attribute does not "
1598 "match the attribute length.");
1600 const auto pool_entry_lambda = [
this](
u2 index) ->
pool_entryt & {
1603 const auto remove_separator_char = [](std::string str,
char ch) {
1604 str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1608 for(
int i = 0; i < number_of_classes; i++)
1610 const u2 inner_class_info_index = read<u2>();
1611 const u2 outer_class_info_index = read<u2>();
1612 const u2 inner_name_index = read<u2>();
1613 const u2 inner_class_access_flags = read<u2>();
1615 std::string inner_class_info_name =
1618 bool is_private = (inner_class_access_flags &
ACC_PRIVATE) != 0;
1619 bool is_public = (inner_class_access_flags &
ACC_PUBLIC) != 0;
1620 bool is_protected = (inner_class_access_flags &
ACC_PROTECTED) != 0;
1621 bool is_static = (inner_class_access_flags &
ACC_STATIC) != 0;
1626 bool is_inner_class = remove_separator_char(
id2string(parsed_class.
name),
'.') ==
1627 remove_separator_char(inner_class_info_name,
'/');
1633 if(inner_name_index == 0)
1636 parsed_class.
inner_name = pool_entry_lambda(inner_name_index).s;
1639 if(outer_class_info_index == 0)
1647 std::string outer_class_info_name =
1666 const u2 number_of_exceptions = read<u2>();
1668 std::vector<irep_idt> exceptions;
1669 for(
size_t i = 0; i < number_of_exceptions; i++)
1671 const u2 exception_index_table = read<u2>();
1674 exceptions.push_back(exception_name);
1683 const u2 attribute_name_index = read<u2>();
1684 const u4 attribute_length = read<u4>();
1688 if(attribute_name==
"SourceFile")
1690 const u2 sourcefile_index = read<u2>();
1694 size_t last_index = fqn.find_last_of(
'.');
1695 if(last_index==std::string::npos)
1699 std::string package_name=fqn.substr(0, last_index+1);
1700 std::replace(package_name.begin(), package_name.end(),
'.',
'/');
1701 const std::string &full_file_name=
1703 sourcefile_name=full_file_name;
1706 for(
auto &method : parsed_class.
methods)
1708 method.source_location.set_file(sourcefile_name);
1709 for(
auto &instruction : method.instructions)
1711 if(!instruction.source_location.get_line().empty())
1712 instruction.source_location.set_file(sourcefile_name);
1716 else if(attribute_name==
"Signature")
1718 const u2 signature_index = read<u2>();
1724 else if(attribute_name==
"RuntimeInvisibleAnnotations" ||
1725 attribute_name==
"RuntimeVisibleAnnotations")
1729 else if(attribute_name ==
"BootstrapMethods")
1735 "only one BootstrapMethods argument is allowed in a class file");
1741 else if(attribute_name ==
"InnerClasses")
1751 const u2 methods_count = read<u2>();
1753 for(std::size_t j=0; j<methods_count; j++)
1757 #define ACC_PUBLIC 0x0001u
1758 #define ACC_PRIVATE 0x0002u
1759 #define ACC_PROTECTED 0x0004u
1760 #define ACC_STATIC 0x0008u
1761 #define ACC_FINAL 0x0010u
1762 #define ACC_VARARGS 0x0080u
1763 #define ACC_SUPER 0x0020u
1764 #define ACC_VOLATILE 0x0040u
1765 #define ACC_TRANSIENT 0x0080u
1766 #define ACC_INTERFACE 0x0200u
1767 #define ACC_ABSTRACT 0x0400u
1768 #define ACC_SYNTHETIC 0x1000u
1769 #define ACC_ANNOTATION 0x2000u
1770 #define ACC_ENUM 0x4000u
1776 const u2 access_flags = read<u2>();
1777 const u2 name_index = read<u2>();
1778 const u2 descriptor_index = read<u2>();
1795 const auto flags = (method.
is_public ? 1 : 0) +
1798 DATA_INVARIANT(flags<=1,
"at most one of public, protected, private");
1799 const u2 attributes_count = read<u2>();
1801 for(std::size_t j=0; j<attributes_count; j++)
1806 std::istream &istream,
1809 bool skip_instructions)
1812 java_bytecode_parser.
in=&istream;
1815 bool parser_result=java_bytecode_parser.
parse();
1824 return std::move(java_bytecode_parser.
parse_tree);
1828 const std::string &file,
1831 bool skip_instructions)
1841 in, class_name, message_handler, skip_instructions);
1849 const u2 local_variable_type_table_length = read<u2>();
1853 "Local variable type table cannot have more elements "
1854 "than the local variable table.");
1855 for(std::size_t i=0; i<local_variable_type_table_length; i++)
1857 const u2 start_pc = read<u2>();
1858 const u2 length = read<u2>();
1859 const u2 name_index = read<u2>();
1860 const u2 signature_index = read<u2>();
1861 const u2 index = read<u2>();
1867 if(lvar.index==index &&
1869 lvar.start_pc==start_pc &&
1870 lvar.length==length)
1879 "Entry in LocalVariableTypeTable must be present in LVT");
1892 switch(java_handle_kind)
1955 std::string descriptor = name_and_type.
get_descriptor(pool_entry_lambda);
1963 method_type, mangled_method_name, class_name, method_name};
1972 const u2 num_bootstrap_methods = read<u2>();
1973 for(
size_t bootstrap_method_index = 0;
1974 bootstrap_method_index < num_bootstrap_methods;
1975 ++bootstrap_method_index)
1977 const u2 bootstrap_methodhandle_ref = read<u2>();
1982 const u2 num_bootstrap_arguments = read<u2>();
1983 debug() <<
"INFO: parse BootstrapMethod handle " << num_bootstrap_arguments
1987 std::vector<u2> u2_values(num_bootstrap_arguments);
1988 for(
size_t i = 0; i < num_bootstrap_arguments; i++)
1989 u2_values[i] = read<u2>();
2021 if(num_bootstrap_arguments < 3)
2025 <<
"format of BootstrapMethods entry not recognized: too few arguments"
2030 u2 interface_type_index = u2_values[0];
2031 u2 method_handle_index = u2_values[1];
2032 u2 method_type_index = u2_values[2];
2038 bool recognized =
true;
2039 for(
size_t i = 3; i < num_bootstrap_arguments; i++)
2041 u2 skipped_argument = u2_values[i];
2047 debug() <<
"format of BootstrapMethods entry not recognized: extra "
2048 "arguments of wrong type"
2064 debug() <<
"format of BootstrapMethods entry not recognized: arguments "
2071 debug() <<
"INFO: parse lambda handle" <<
eom;
2077 debug() <<
"format of BootstrapMethods entry not recognized: method "
2078 "handle not recognised"
2088 debug() <<
"lambda function reference "
2090 .base_method_name())
2092 <<
"\n interface type is "
2094 <<
"\n method type is "
2105 size_t bootstrap_method_index)