I have problem working with unicode strings, wchar_t type.
In my program I'm getting input as wchar_t and I'm supposed to XOR it and write it to file and read it back and print it to command line.
This is my code,
const unsigned int XORKey = 0xff;
size_t XORit(const wchar_t* value, wchar_t* xorred)
{
size_t length = wcslen(value);
for (int i = 0; i < length; i++)
xorred[i] = ((char)(value[i] ^ XORKey));
return length;
}
int main()
{
setlocale(LC_ALL, "en_US.UTF-8");
// XOR it
wchar_t sample[] = { L"TEST1自己人" };
int samplelen = wcslen(sample);
printf("%ls", sample);
printf("\n");
printf("Plain:\n\t");
for (int i = 0; i < samplelen; i++)
{
printf("%02X ", sample[i]);
}
printf("\n");
wchar_t* xorred = (wchar_t*)malloc(samplelen);
if (xorred == NULL) return -1;
memset(xorred, 0, samplelen);
XORit(sample, xorred);
printf("XOR'ed\n\t");
for (int i = 0; i < samplelen; i++)
{
printf("%02X ", xorred[i]);
}
printf("\n");
// Write to file
FILE* fpW = _wfopen(L"logon.bin", L"wb");
fwrite(xorred, sizeof(wchar_t), samplelen, fpW);
fclose(fpW);
// Read from file
FILE* fpR = fopen("logon.bin", "rb");
fseek(fpR, 0, SEEK_END);
int filesize = ftell(fpR);
wchar_t* unxorred = (wchar_t*)malloc(filesize + sizeof(wchar_t));
if (unxorred == NULL) return -1;
rewind(fpR);
fread(unxorred, sizeof(wchar_t), filesize, fpW);
fclose(fpW);
printf("Reading\n\t");
for (int i = 0; i < samplelen; i++)
{
printf("%02X ", unxorred[i]);
}
printf("\n");
printf("Un-XOR'ed\n\t");
for (int i = 0; i < samplelen; i++)
{
printf("%02X ", unxorred[i] ^ XORKey);
}
printf("\n");
printf("%ls", unxorred);
return 0;
}
The values I'm reading back from the file doesn't match to what I wrote! :( I'm new to C programming, I did my best to get this right, please forgive any noob mistakes in my understanding of the issue and implementing it.
Thanks in advance
I fixed the code based on the comments,
const unsigned int XORKey = 0xff;
size_t XORit(const wchar_t* value, wchar_t* xorred)
{
size_t length = wcslen(value);
for (int i = 0; i < length; i++)
xorred[i] = (value[i] ^ XORKey);
return length;
}
int main()
{
setlocale(LC_ALL, "en_US.UTF-8");
// XOR it
wchar_t sample[] = { L"Test1自己人自己人A" };
int samplelen = wcslen(sample);
printf("%ls", sample);
printf("\n");
printf("Plain:\n\t");
for (int i = 0; i < samplelen; i++)
{
printf("%0*X ", (int)sizeof(wchar_t), (unsigned int)sample[i]);
}
printf("\n");
wchar_t* xorred = (wchar_t*)malloc(samplelen);
if (xorred == NULL) return -1;
memset(xorred, 0, samplelen);
XORit(sample, xorred);
printf("XOR'ed\n\t");
for (int i = 0; i < samplelen; i++)
{
printf("%0*X ", (int)sizeof(wchar_t), (unsigned int)xorred[i]);
}
printf("\n");
// Write to file
FILE* fpW = _wfopen(L"logon.bin", L"wb");
fwrite(xorred, sizeof(wchar_t), samplelen, fpW);
fclose(fpW);
// Read from file
FILE* fpR = fopen("logon.bin", "rb");
fseek(fpR, 0, SEEK_END);
int filesize = ftell(fpR);
int whattoread = (filesize / sizeof(wchar_t));
wchar_t* ReadXOR = (wchar_t*)malloc(filesize + 1);
if (ReadXOR == NULL) return -1;
memset(ReadXOR, 0, filesize + 1);
rewind(fpR);
fread(ReadXOR, sizeof(wchar_t), whattoread, fpR);
fclose(fpW);
printf("Reading\n\t");
for (int i = 0; i < samplelen; i++)
{
printf("%0*X ", (int)sizeof(wchar_t), (unsigned int)ReadXOR[i]);
}
printf("\n");
wchar_t* unxorred = (wchar_t*)malloc(whattoread);
if (unxorred == NULL) return -1;
memset(unxorred, 0, whattoread);
printf("Un-XOR'ed\n\t");
for (int i = 0; i < whattoread; i++)
{
unxorred[i] = ReadXOR[i] ^ 0xff;
printf("%0*X ", (int)sizeof(wchar_t), (unsigned int)unxorred[i]);
}
printf("\n");
printf("%ls\n", unxorred);
printf("%ls\n", sample);
return 0;
The output looks like below,
Test1自己人自己人A
Plain:
54 65 73 74 31 81EA 5DF1 4EBA 81EA 5DF1 4EBA 41
XOR'ed
AB 9A 8C 8B CE 8115 5D0E 4E45 8115 5D0E 4E45 BE
Reading
AB 9A 8C 8B CE 8115 5D0E 4E45 8115 5D0E 4E45 BE
Un-XOR'ed
54 65 73 74 31 81EA 5DF1 4EBA 81EA 5DF1 4EBA 41
Test1自己人自己人A粘蹊?言?萉?
Test1自己人自己人A
When I modified the unicode string I got wrong text in the output!
Here ...
printf("Un-XOR'ed\n\t");
for (int i = 0; i < samplelen; i++)
{
printf("%02X ", unxorred[i] ^ XORKey);
}
printf("\n");
... you print out the decoded values without storing them.
When you then ...
printf("%ls", unxorred);
... you are printing the data as read back from the file, not the decoded string corresponding to the previously-printed code sequence.
Additionally,
here ...
int filesize = ftell(fpR);
wchar_t* unxorred = (wchar_t*)malloc(filesize);
if (unxorred == NULL) return -1;
rewind(fpR);
fread(unxorred, sizeof(wchar_t), filesize, fpW);
... you are attempting to read back sizeof(wchar_t) * filesize
bytes from the file, which is more than it actually contains or that you have allocated for (unless sizeof(wchar_t)
is 1, which is possible, but unlikely, and is anyway not your case).
You do not allocate space for a (wide) string terminator or add one to the read-back data, yet you pass it to printf()
is if it were a wide string. This is erroneous.
Your approach to printing out the bytes of the wide strings is flawed. The conversion specifier X
requires a corresponding unsigned int
argument, and wchar_t
might neither be the same as unsigned int
nor promote to unsigned int
via the default argument promotions. Additionally, you get varying-length outputs because your wchar_t
is at least 16 bits wide, and your 02
only guarantees 2 hex digits. Better would be, for example:
for (int i = 0; i < samplelen; i++) {
printf("%0*X ", (int) sizeof(wchar_t), (unsigned int) xorred[i]);
}
The *
for a width says that the minimum field width will be passed as an argument of type int
. The casts match the arguments to the types required by the format.