I am working with Unicode in C++11 & I am right now unable to convert std::string to std::u32string.
My code is as follows:
#include <iostream>
#include <string>
#include <locale>
#include "unicode/unistr.h"
#include "unicode/ustream.h"
int main()
{
constexpr char locale_name[] = "";
setlocale( LC_ALL, locale_name );
std::locale::global(std::locale(locale_name));
std::ios_base::sync_with_stdio(false);
std::wcin.imbue(std::locale());
std::wcout.imbue(std::locale());
std::string str="hello☺😆";
std::u32string s(str.begin(),str.end());
icu::UnicodeString ustr = icu::UnicodeString::fromUTF32(reinterpret_cast<const UChar32 *>(s.c_str()), s.size());
std::cout << "Unicode string is: " << ustr << std::endl;
std::cout << "Size of unicode string = " << ustr.countChar32() << std::endl;
std::cout << "Individual characters of the string are:" << std::endl;
for(int i=0; i < ustr.countChar32(); i++)
std::cout << icu::UnicodeString(ustr.char32At(i)) << std::endl;
return 0;
}
On executing the output is: (which is not expected)
Unicode string is: hello�������
Size of unicode string = 12
Individual characters of the string are:
h
e
l
l
o
�
�
�
�
�
�
�
Please suggest if any ICU library function exists for this
Thanks everybody for help!
Using these 2 links, I was able to found some relevant functions:
I tried using codecvt
functions, but I got the error:
fatal error: codecvt: No such file or directory
#include <codecvt>
^
compilation terminated.
So, I skipped that & on further searching, I found mbrtoc32()
function which works:)
This is the working code:
#include <iostream>
#include <string>
#include <locale>
#include "unicode/unistr.h"
#include "unicode/ustream.h"
#include <cassert>
#include <cwchar>
#include <uchar.h>
int main()
{
constexpr char locale_name[] = "";
setlocale( LC_ALL, locale_name );
std::locale::global(std::locale(locale_name));
std::ios_base::sync_with_stdio(false);
std::wcin.imbue(std::locale());
std::wcout.imbue(std::locale());
std::string str;
std::cin >> str;
//For example, the input string is "hello☺😆"
std::mbstate_t state{}; // zero-initialized to initial state
char32_t c32;
const char *ptr = str.c_str(), *end = str.c_str() + str.size() + 1;
icu::UnicodeString ustr;
while(std::size_t rc = mbrtoc32(&c32, ptr, end - ptr, &state))
{
icu::UnicodeString temp((UChar32)c32);
ustr+=temp;
assert(rc != (std::size_t)-3); // no surrogates in UTF-32
if(rc == (std::size_t)-1) break;
if(rc == (std::size_t)-2) break;
ptr+=rc;
}
std::cout << "Unicode string is: " << ustr << std::endl;
std::cout << "Size of unicode string = " << ustr.countChar32() << std::endl;
std::cout << "Individual characters of the string are:" << std::endl;
for(int i=0; i < ustr.countChar32(); i++)
std::cout << icu::UnicodeString(ustr.char32At(i)) << std::endl;
return 0;
}
The output on entering input hello☺😆
is as expected:
Unicode string is: hello☺😆
Size of unicode string = 7
Individual characters of the string are:
h
e
l
l
o
☺
😆