CBMC
java_string_literals.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Java string literal processing
4 
5 Author: Chris Smowton, chris.smowton@diffblue.com
6 
7 \*******************************************************************/
8 
9 #include "java_string_literals.h"
10 #include "java_root_class.h"
12 #include "java_types.h"
13 #include "java_utils.h"
14 
16 #include <util/arith_tools.h>
17 #include <util/expr_initializer.h>
18 #include <util/namespace.h>
19 #include <util/string_utils.h>
20 #include <util/symbol_table_base.h>
21 #include <util/unicode.h>
22 
23 #include <sstream>
24 
28 static array_exprt utf16_to_array(const std::wstring &in)
29 {
30  const auto jchar=java_char_type();
31  array_exprt ret(
32  {}, array_typet(jchar, from_integer(in.length(), java_int_type())));
33  for(const auto c : in)
34  ret.copy_to_operands(from_integer(c, jchar));
35  return ret;
36 }
37 
39  const java_string_literal_exprt &string_expr,
40  symbol_table_baset &symbol_table,
41  bool string_refinement_enabled)
42 {
43  const irep_idt value = string_expr.value();
44  const struct_tag_typet string_type("java::java.lang.String");
45 
46  const std::string escaped_symbol_name = escape_non_alnum(id2string(value));
47  const std::string escaped_symbol_name_with_prefix =
48  JAVA_STRING_LITERAL_PREFIX "." + escaped_symbol_name;
49 
50  auto findit = symbol_table.symbols.find(escaped_symbol_name_with_prefix);
51  if(findit != symbol_table.symbols.end())
52  return findit->second.symbol_expr();
53 
54 #ifndef CPROVER_INVARIANT_DO_NOT_CHECK
55  const auto initialize_function = symbol_table.lookup(INITIALIZE_FUNCTION);
56  INVARIANT(
57  !initialize_function || initialize_function->value.is_nil(),
58  "Cannot create more string literals after making " INITIALIZE_FUNCTION);
59 #endif
60 
61  // Create a new symbol:
62  symbolt new_symbol;
63  new_symbol.name = escaped_symbol_name_with_prefix;
64  new_symbol.type = string_type;
65  new_symbol.type.set(ID_C_constant, true);
66  new_symbol.base_name = escaped_symbol_name;
67  new_symbol.pretty_name = value;
68  new_symbol.mode = ID_java;
69  new_symbol.is_type = false;
70  new_symbol.is_lvalue = true;
71  new_symbol.is_state_var = true;
72  new_symbol.is_static_lifetime = true;
73 
74  namespacet ns(symbol_table);
75 
76  // Regardless of string refinement setting, at least initialize
77  // the literal with @clsid = String
78  struct_tag_typet jlo_symbol("java::java.lang.Object");
79  const auto &jlo_struct = to_struct_type(ns.follow(jlo_symbol));
80  struct_exprt jlo_init({}, jlo_symbol);
81  const auto &jls_struct = to_struct_type(ns.follow(string_type));
82  java_root_class_init(jlo_init, jlo_struct, "java::java.lang.String");
83 
84  // If string refinement *is* around, populate the actual
85  // contents as well:
86  if(string_refinement_enabled)
87  {
88  const array_exprt data =
90 
91  struct_exprt literal_init({}, new_symbol.type);
92  literal_init.operands().resize(jls_struct.components().size());
93  const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object");
94  literal_init.operands()[jlo_nb] = jlo_init;
95 
96  const std::size_t length_nb = jls_struct.component_number("length");
97  const typet &length_type = jls_struct.components()[length_nb].type();
98  const exprt length = from_integer(data.operands().size(), length_type);
99  literal_init.operands()[length_nb] = length;
100 
101  // Initialize the string with a constant utf-16 array:
102  symbolt array_symbol;
103  array_symbol.name = escaped_symbol_name_with_prefix + "_constarray";
104  array_symbol.base_name = escaped_symbol_name + "_constarray";
105  array_symbol.pretty_name = value;
106  array_symbol.mode = ID_java;
107  array_symbol.is_type = false;
108  array_symbol.is_lvalue = true;
109  // These are basically const global data:
110  array_symbol.is_static_lifetime = true;
111  array_symbol.is_state_var = true;
112  array_symbol.value = data;
113  array_symbol.type = array_symbol.value.type();
114  array_symbol.type.set(ID_C_constant, true);
115 
116  if(symbol_table.add(array_symbol))
117  throw "failed to add constarray symbol to symbol table";
118 
119  const symbol_exprt array_expr = array_symbol.symbol_expr();
120  const address_of_exprt array_pointer(
121  index_exprt(array_expr, from_integer(0, java_int_type())));
122 
123  const std::size_t data_nb = jls_struct.component_number("data");
124  literal_init.operands()[data_nb] = array_pointer;
125 
126  // Associate array with pointer
127  symbolt return_symbol;
128  return_symbol.name = escaped_symbol_name_with_prefix + "_return_value";
129  return_symbol.base_name = escaped_symbol_name + "_return_value";
130  return_symbol.pretty_name =
131  escaped_symbol_name.length() > 10
132  ? escaped_symbol_name.substr(0, 10) + "..._return_value"
133  : escaped_symbol_name + "_return_value";
134  return_symbol.mode = ID_java;
135  return_symbol.is_type = false;
136  return_symbol.is_lvalue = true;
137  return_symbol.is_static_lifetime = true;
138  return_symbol.is_state_var = true;
139  return_symbol.value = make_function_application(
140  ID_cprover_associate_array_to_pointer_func,
141  {array_symbol.value, array_pointer},
142  java_int_type(),
143  symbol_table);
144  return_symbol.type = return_symbol.value.type();
145  return_symbol.type.set(ID_C_constant, true);
146  if(symbol_table.add(return_symbol))
147  throw "failed to add return symbol to symbol table";
148 
149  // Initialise the literal structure with
150  // (ABC_return_value, { ..., .length = N, .data = &ABC_constarray }),
151  // using a C-style comma expression to indicate that we need the
152  // _return_value global for its side-effects.
153  exprt init_comma_expr(ID_comma);
154  init_comma_expr.type() = literal_init.type();
155  init_comma_expr.add_to_operands(
156  return_symbol.symbol_expr(), std::move(literal_init));
157  new_symbol.value = init_comma_expr;
158  }
159  else if(jls_struct.components().size()>=1 &&
160  jls_struct.components()[0].get_name()=="@java.lang.Object")
161  {
162  // Case where something defined java.lang.String, so it has
163  // a proper base class (always java.lang.Object in practical
164  // JDKs seen so far)
165  struct_exprt literal_init({std::move(jlo_init)}, new_symbol.type);
166  for(const auto &comp : jls_struct.components())
167  {
168  if(comp.get_name()=="@java.lang.Object")
169  continue;
170  // Other members of JDK's java.lang.String we don't understand
171  // without string-refinement. Just zero-init them; consider using
172  // test-gen-like nondet object trees instead.
173  const auto init =
174  zero_initializer(comp.type(), string_expr.source_location(), ns);
175  CHECK_RETURN(init.has_value());
176  literal_init.copy_to_operands(*init);
177  }
178  new_symbol.value = literal_init;
179  }
180  else if(to_java_class_type(jls_struct).get_is_stub())
181  {
182  // Case where java.lang.String was stubbed, and so directly defines
183  // @class_identifier
184  new_symbol.value = jlo_init;
185  new_symbol.value.type() = string_type;
186  }
187 
188  bool add_failed = symbol_table.add(new_symbol);
189  INVARIANT(
190  !add_failed,
191  "string literal symbol was already checked not to be "
192  "in the symbol table, so adding it should succeed");
193 
194  return new_symbol.symbol_expr();
195 }
196 
198  const irep_idt &string_value,
199  symbol_table_baset &symbol_table,
200  bool string_refinement_enabled)
201 {
203  java_string_literal_exprt{string_value},
204  symbol_table,
205  string_refinement_enabled);
206 }
exprt::copy_to_operands
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition: expr.h:155
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:36
java_root_class_init
void java_root_class_init(struct_exprt &jlo, const struct_typet &root_type, const irep_idt &class_identifier)
Adds members for an object of the root class (usually java.lang.Object).
Definition: java_root_class.cpp:41
symbolt::is_state_var
bool is_state_var
Definition: symbol.h:62
java_root_class.h
utf16_to_array
static array_exprt utf16_to_array(const std::wstring &in)
Convert UCS-2 or UTF-16 to an array expression.
Definition: java_string_literals.cpp:28
arith_tools.h
to_struct_type
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:308
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
escape_non_alnum
std::string escape_non_alnum(const std::string &to_escape)
Replace non-alphanumeric characters with _xx escapes, where xx are hex digits.
Definition: string_utils.cpp:153
string_utils.h
java_string_literals.h
typet
The type of an expression, extends irept.
Definition: type.h:28
symbolt::type
typet type
Type of symbol.
Definition: symbol.h:31
make_function_application
exprt make_function_application(const irep_idt &function_name, const exprt::operandst &arguments, const typet &range, symbol_table_baset &symbol_table)
Create a (mathematical) function application expression.
Definition: java_utils.cpp:387
java_string_literal_expr.h
exprt
Base class for all expressions.
Definition: expr.h:55
symbolt::base_name
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
struct_tag_typet
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:448
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:112
get_or_create_string_literal_symbol
symbol_exprt get_or_create_string_literal_symbol(const java_string_literal_exprt &string_expr, symbol_table_baset &symbol_table, bool string_refinement_enabled)
Creates or gets an existing constant global symbol for a given string literal.
Definition: java_string_literals.cpp:38
namespace.h
zero_initializer
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Definition: expr_initializer.cpp:297
symbolt::pretty_name
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
struct_exprt
Struct constructor from list of elements.
Definition: std_expr.h:1818
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:90
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:84
expr_initializer.h
symbolt::mode
irep_idt mode
Language mode.
Definition: symbol.h:49
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
irept::set
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:420
symbol_table_baset
The symbol table base class interface.
Definition: symbol_table_base.h:21
symbolt::symbol_expr
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
INITIALIZE_FUNCTION
#define INITIALIZE_FUNCTION
Definition: static_lifetime_init.h:22
java_int_type
signedbv_typet java_int_type()
Definition: java_types.cpp:31
utf8_to_utf16_native_endian
std::wstring utf8_to_utf16_native_endian(const std::string &in)
Convert UTF8-encoded string to UTF-16 with architecture-native endianness.
Definition: unicode.cpp:191
symbol_table_baset::add
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Definition: symbol_table_base.cpp:18
symbolt::value
exprt value
Initial value of symbol.
Definition: symbol.h:34
array_typet
Arrays with given size.
Definition: std_types.h:762
namespace_baset::follow
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:49
symbolt
Symbol table entry.
Definition: symbol.h:27
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:100
symbol_table_baset::symbols
const symbolst & symbols
Read-only field, used to look up symbols given their names.
Definition: symbol_table_base.h:30
java_char_type
unsignedbv_typet java_char_type()
Definition: java_types.cpp:61
symbolt::is_type
bool is_type
Definition: symbol.h:61
exprt::add_to_operands
void add_to_operands(const exprt &expr)
Add the given argument to the end of exprt's operands.
Definition: expr.h:162
unicode.h
symbolt::is_static_lifetime
bool is_static_lifetime
Definition: symbol.h:65
symbol_table_baset::lookup
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
Definition: symbol_table_base.h:95
java_string_literal_exprt
Definition: java_string_literal_expr.h:17
exprt::operands
operandst & operands()
Definition: expr.h:94
symbolt::is_lvalue
bool is_lvalue
Definition: symbol.h:66
to_java_class_type
const java_class_typet & to_java_class_type(const typet &type)
Definition: java_types.h:582
index_exprt
Array index operator.
Definition: std_expr.h:1409
static_lifetime_init.h
address_of_exprt
Operator to return the address of an object.
Definition: pointer_expr.h:370
INVARIANT
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
java_types.h
symbol_table_base.h
Author: Diffblue Ltd.
java_utils.h
exprt::source_location
const source_locationt & source_location() const
Definition: expr.h:211
array_exprt
Array constructor from list of elements.
Definition: std_expr.h:1562
JAVA_STRING_LITERAL_PREFIX
#define JAVA_STRING_LITERAL_PREFIX
Definition: java_utils.h:104
symbolt::name
irep_idt name
The unique identifier.
Definition: symbol.h:40
java_string_literal_exprt::value
irep_idt value() const
Definition: java_string_literal_expr.h:26