codeator codeator - 13 days ago 5
C++ Question

Unicode to UTF8 Conversation

I am trying to convert Unicode string to UTF8 string :

#include <stdio.h>
#include <string>
#include <atlconv.h>
#include <atlstr.h>

using namespace std;

CStringA ConvertUnicodeToUTF8(const CStringW& uni)
{
if (uni.IsEmpty()) return "";
CStringA utf8;
int cc = 0;

if ((cc = WideCharToMultiByte(CP_UTF8, 0, uni, -1, NULL, 0, 0, 0) - 1) > 0)
{
char *buf = utf8.GetBuffer(cc);
if (buf) WideCharToMultiByte(CP_UTF8, 0, uni, -1, buf, cc, 0, 0);
utf8.ReleaseBuffer();
}
return utf8;
}

int main(void)
{
string u8str = ConvertUnicodeToUTF8(L"gökhan");

printf("%d\n", u8str.size());

return 0;
}


My question is : Should u8str.size() return value be 6? It prints 7 now!

Answer

7 is correct. The non ASCII character ö is encoded with two bytes.