CBMC
unicode.h
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #ifndef CPROVER_UTIL_UNICODE_H
10 #define CPROVER_UTIL_UNICODE_H
11 
12 #include <algorithm>
13 #include <string>
14 #include <vector>
15 
16 // we follow the ideas suggested at
17 // http://www.utf8everywhere.org/
18 
19 std::string narrow(const wchar_t *s);
20 std::wstring widen(const char *s);
21 std::string narrow(const std::wstring &s);
22 std::wstring widen(const std::string &s);
23 
24 std::string
25 utf32_native_endian_to_utf8(const std::basic_string<unsigned int> &s);
26 
29 std::u32string utf8_to_utf32(const std::string &utf8_str);
30 
31 std::wstring utf8_to_utf16_native_endian(const std::string &in);
32 std::string utf16_native_endian_to_java(const char16_t ch);
33 std::string utf16_native_endian_to_java(const std::wstring &in);
34 std::string utf16_native_endian_to_java_string(const std::wstring &in);
35 
36 std::vector<std::string> narrow_argv(int argc, const wchar_t **argv_wide);
37 
41 std::string utf16_native_endian_to_utf8(char16_t utf16_char);
42 
45 std::string utf16_native_endian_to_utf8(const std::u16string &utf16_str);
46 
51 char16_t codepoint_hex_to_utf16_native_endian(const std::string &hex);
52 
56 std::string codepoint_hex_to_utf8(const std::string &hex);
57 
58 template <typename It>
59 std::vector<const char *> to_c_str_array(It b, It e)
60 {
61  // Assumes that walking the range will be faster than repeated allocation
62  std::vector<const char *> ret(std::distance(b, e) + 1, nullptr);
64  b, e, std::begin(ret), [](const std::string &s) { return s.c_str(); });
65  return ret;
66 }
67 
68 #endif // CPROVER_UTIL_UNICODE_H
codepoint_hex_to_utf16_native_endian
char16_t codepoint_hex_to_utf16_native_endian(const std::string &hex)
Definition: unicode.cpp:378
utf16_native_endian_to_java_string
std::string utf16_native_endian_to_java_string(const std::wstring &in)
Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes.
Definition: unicode.cpp:350
transform
static abstract_object_pointert transform(const exprt &expr, const std::vector< abstract_object_pointert > &operands, const abstract_environmentt &environment, const namespacet &ns)
Definition: abstract_value_object.cpp:159
utf16_native_endian_to_utf8
std::string utf16_native_endian_to_utf8(char16_t utf16_char)
Definition: unicode.cpp:359
narrow_argv
std::vector< std::string > narrow_argv(int argc, const wchar_t **argv_wide)
Definition: unicode.cpp:148
narrow
std::string narrow(const wchar_t *s)
Definition: unicode.cpp:32
widen
std::wstring widen(const char *s)
Definition: unicode.cpp:48
to_c_str_array
std::vector< const char * > to_c_str_array(It b, It e)
Definition: unicode.h:59
codepoint_hex_to_utf8
std::string codepoint_hex_to_utf8(const std::string &hex)
Definition: unicode.cpp:384
utf16_native_endian_to_java
std::string utf16_native_endian_to_java(const char16_t ch)
Definition: unicode.cpp:335
utf8_to_utf16_native_endian
std::wstring utf8_to_utf16_native_endian(const std::string &in)
Convert UTF8-encoded string to UTF-16 with architecture-native endianness.
Definition: unicode.cpp:191
utf32_native_endian_to_utf8
std::string utf32_native_endian_to_utf8(const std::basic_string< unsigned int > &s)
Definition: unicode.cpp:136
utf8_to_utf32
std::u32string utf8_to_utf32(const std::string &utf8_str)
Convert UTF8-encoded string to UTF-32 with architecture-native endianness.
Definition: unicode.cpp:205