Search code examples
c++python-3.xdllctypeswchar-t

ctypes wintypes WCHAR String Additional White Spaces


Why is every character followed by a white space in the following?

C++ DLL

test.h:

#ifndef TEST_DLL_H
#define TEST_DLL_H
#define EXPORT __declspec(dllexport) __stdcall 

#include <iostream>
#include <Windows.h>

namespace Test_DLL
{
    struct Simple
    {
        TCHAR a[1024];
    };

    extern "C"
    {
        int EXPORT simple(Simple* a);
    }
};

#endif

test.cpp:

#include "test.h"

int EXPORT Test_DLL::simple(Simple* a)
{
    std::wcout << a->a << std::endl;

    return 0;
}

Python

test.py:

import ctypes
from ctypes import wintypes


class MyStructure(ctypes.Structure):
    _fields_ = [("a", wintypes.WCHAR * 1024)]


a = "Hello, world!"
hDLL = ctypes.LibraryLoader(ctypes.WinDLL)
hDLL_Test = hDLL.LoadLibrary(r"...\test.dll")
simple = hDLL_Test.simple
mystruct = MyStructure(a=a)
ret = simple(ctypes.byref(mystruct))

The result:

H e l l o ,   w o r l d ! 

Is the problem on the C++ DLL side? Or am I missing something on the Python side?


Solution

  • At the beginning I thought that it's some minor problem in your code. When debugging I discovered that it isn't quite so. Starting from your example, I developed another one that illustrates some key points.

    test.h:

    #if !defined(TEST_DLL_H)
    #define TEST_DLL_H
    
    
    #if defined(_WIN32)
    #  if defined(TEST_EXPORTS)
    #    define TEST_API __declspec(dllexport)
    #  else
    #    define TEST_API __declspec(dllimport)
    #  endif
    #  define CALLING_CONVENTION __cdecl
    #else
    #  define __TEXT(X) L##X
    #  define TEXT(X) __TEXT(X)
    #  define TEST_API
    #  define CALLING_CONVENTION
    #endif
    
    
    namespace TestDll {
        typedef struct Simple_ {
            wchar_t a[1024];
        } Simple;
    
        extern "C" {
            TEST_API int CALLING_CONVENTION simple(Simple *pSimple);
            TEST_API int CALLING_CONVENTION printStr(char *pStr);
            TEST_API int CALLING_CONVENTION wprintWstr(wchar_t *pWstr);
            TEST_API wchar_t* CALLING_CONVENTION wstr();
            TEST_API void CALLING_CONVENTION clearWstr(wchar_t *pWstr);
        }
    };
    
    #endif  // TEST_DLL_H
    

    test.cpp:

    #define TEST_EXPORTS
    #include "test.h"
    #if defined(_WIN32)
    #  include <Windows.h>
    #else
    #  include <wchar.h>
    #  define __FUNCTION__ "function"
    #endif
    #include <stdio.h>
    //#include <iostream>
    
    #define PRINT_MSG_0() printf("From C: - [%s] (%d) - [%s]\n", __FILE__, __LINE__, __FUNCTION__)
    #define WPRINT_MSG_0() wprintf(L"From C: - [%s] (%d) - [%s]\n", TEXT(__FILE__), __LINE__, TEXT(__FUNCTION__))
    
    #define DUMMY_TEXT_W L"Dummy text."
    
    
    //using namespace std;
    
    
    int TestDll::simple(Simple *pSimple) {
        //std::wcout << pSimple->a << std::endl;
        WPRINT_MSG_0();
        int ret = wprintf(L"%s", pSimple->a);
        wprintf(L"\n");
        return ret;
    }
    
    
    int TestDll::printStr(char *pStr) {
        PRINT_MSG_0();
        int ret = printf("%s", pStr);
        printf("\n");
        return ret;
    }
    
    
    int TestDll::wprintWstr(wchar_t *pWstr) {
        WPRINT_MSG_0();
        int ret = wprintf(L"%s", pWstr);
        wprintf(L"\n");
        int len = wcslen(pWstr);
        char *buf = (char*)pWstr;
        wprintf(L"Hex (%d): ", len);
        for (int i = 0; i < len * sizeof(wchar_t); i++)
            wprintf(L"%02X ", buf[i]);
        wprintf(L"\n");
        return ret;
    }
    
    
    wchar_t *TestDll::wstr() {
        wchar_t *ret = (wchar_t*)malloc((wcslen(DUMMY_TEXT_W) + 1) * sizeof(wchar_t));
        wcscpy(ret, DUMMY_TEXT_W);
        return ret;
    }
    
    
    void TestDll::clearWstr(wchar_t *pWstr) {
        free(pWstr);
    }
    

    main.cpp:

    #include "test.h"
    #include <stdio.h>
    #if defined(_WIN32)
    #  include <Windows.h>
    #endif
    
    
    int main() {
        char *text = "Hello, world!";
        TestDll::Simple s = { TEXT("Hello, world!") };
        int ret = simple(&s);  // ??? Compiles even if namespace not specified here !!!
        printf("\"simple\" returned %d\n", ret);
        ret = TestDll::printStr("Hello, world!");
        printf("\"printStr\" returned %d\n", ret);
        ret = TestDll::wprintWstr(s.a);
        printf("\"wprintWstr\" returned %d\n", ret);
        return 0;
    }
    

    code.py:

    #!/usr/bin/env python3
    
    import sys
    import ctypes
    
    
    DLL_NMAME = "./test.dll"
    DUMMY_TEXT = "Hello, world!"
    
    
    WCharArr1024 = ctypes.c_wchar * 1024
    
    class SimpleStruct(ctypes.Structure):
        _fields_ = [
            ("a", WCharArr1024),
        ]
    
    
    def main():
    
        test_dll = ctypes.CDLL(DLL_NMAME)
    
        simple_func = test_dll.simple
        simple_func.argtypes = [ctypes.POINTER(SimpleStruct)]
        simple_func.restype = ctypes.c_int
        stuct_obj = SimpleStruct(a=DUMMY_TEXT)
    
        print_str_func = test_dll.printStr
        print_str_func.argtypes = [ctypes.c_char_p]
        print_str_func.restype = ctypes.c_int
    
        wprint_wstr_func = test_dll.wprintWstr
        wprint_wstr_func.argtypes = [ctypes.c_wchar_p]
        wprint_wstr_func.restype = ctypes.c_int
    
        wstr_func = test_dll.wstr
        wstr_func.argtypes = []
        wstr_func.restype = ctypes.c_wchar_p
    
        clear_wstr_func = test_dll.clearWstr
        clear_wstr_func.argtypes = [ctypes.c_wchar_p]
        clear_wstr_func.restype = None
    
        #print("From PY: [{:s}]".format(stuct_obj.a))
        ret = simple_func(ctypes.byref(stuct_obj))
        print("\"{:s}\" returned {:d}".format(simple_func.__name__, ret))
        ret = print_str_func(DUMMY_TEXT.encode())
        print("\"{:s}\" returned {:d}".format(print_str_func.__name__, ret))
        #ret = wprint_wstr_func(ctypes.cast(DUMMY_TEXT.encode(), ctypes.c_wchar_p))
        ret = wprint_wstr_func(DUMMY_TEXT)
        print("\"{:s}\" returned {:d}".format(wprint_wstr_func.__name__, ret))
        s = wstr_func()
        print("\"{:s}\" returned \"{:s}\"".format(wstr_func.__name__, s))
        #clear_wstr_func(s)
    
    
    if __name__ == "__main__":
        #print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
        main()
    

    Changes:

    • Removed the C++ layer (to exclude as many variables as possible) and only rely on C
    • Adapted the code to be Nix compliant (I've run it on Ubtu, but I encountered other issues that I'm not going to discuss)
    • Added more functions (this was a debugging process), to gather as much intel as possible
    • Did some renames, refactorings and other non important changes
    • While investigating, I discovered a funny problem (the coment from main.cpp). Apparently simple function compiles even if I don't prepend the namespace in which it's declared. This doesn't apply for the other functions. After some quick tries, I realized that it's because of the Simple argument (probably because it's also part of the namespace?). Anyway, didn't spend too much time and didn't get to the bottom of it (yet), probably it's Undefined Behavior (and it only works because of dumb luck)
    • The narrow and wide functions are mixed, that's a NO - NO, and is only for debugging / demonstrating purposes

    Output:

    e:\Work\Dev\StackOverflow\q054269984>"c:\Install\x86\Microsoft\Visual Studio Community\2015\vc\vcvarsall.bat" x64
    
    e:\Work\Dev\StackOverflow\q054269984>dir /b
    code.py
    main.cpp
    test.cpp
    test.h
    
    e:\Work\Dev\StackOverflow\q054269984>cl /nologo /DDLL /DUNICODE /MD /EHsc test.cpp  /link /NOLOGO /DLL /OUT:test.dll
    test.cpp
       Creating library test.lib and object test.exp
    
    e:\Work\Dev\StackOverflow\q054269984>cl /nologo /DUNICODE /MD /EHsc main.cpp  /link /NOLOGO /OUT:main.exe test.lib
    main.cpp
    
    e:\Work\Dev\StackOverflow\q054269984>dir /b
    code.py
    main.cpp
    main.exe
    main.obj
    test.cpp
    test.dll
    test.exp
    test.h
    test.lib
    test.obj
    
    e:\Work\Dev\StackOverflow\q054269984>main.exe
    From C: - [test.cpp] (23) - [TestDll::simple]
    Hello, world!
    "simple" returned 13
    From C: - [test.cpp] (31) - [TestDll::printStr]
    Hello, world!
    "printStr" returned 13
    From C: - [test.cpp] (39) - [TestDll::wprintWstr]
    Hello, world!
    Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
    "wprintWstr" returned 13
    
    e:\Work\Dev\StackOverflow\q054269984>"e:\Work\Dev\VEnvs\py_064_03.06.08_test0\Scripts\python.exe" code.py
    Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64 bit (AMD64)] on win32
    
    F r o m   C :   -   [ t e s t . c p p ]   ( 2 3 )   -   [ T e s t D l l : : s i m p l e ]
     H e l l o ,   w o r l d !
     "simple" returned 13
    From C: - [test.cpp] (31) - [TestDll::printStr]
    Hello, world!
    "printStr" returned 13
    F r o m   C :   -   [ t e s t . c p p ]   ( 3 9 )   -   [ T e s t D l l : : w p r i n t W s t r ]
     H e l l o ,   w o r l d !
     H e x   ( 1 3 ) :   4 8   0 0   6 5   0 0   6 C   0 0   6 C   0 0   6 F   0 0   2 C   0 0   2 0   0 0   7 7   0 0   6 F   0 0   7 2   0 0   6 C   0 0   6 4   0 0   2 1   0 0
     "wprintWstr" returned 13
    "wstr" returned "Dummy text."
    
    • It seems to be Python related
    • The strings themselves are not messed up (their lengths and wprintf return value are correct). It's more like stdout is the culprit

    Then, I went further:

    e:\Work\Dev\StackOverflow\q054269984>for /f %f in ('dir /b "e:\Work\Dev\VEnvs\py_064*"') do ("e:\Work\Dev\VEnvs\%f\Scripts\python.exe" code.py)
    
    e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_02.07.15_test0\Scripts\python.exe" code.py )
    Python 2.7.15 (v2.7.15:ca079a3ea3, Apr 30 2018, 16:30:26) [MSC v.1500 64 bit (AMD64)] on win32
    
    From C: - [test.cpp] (23) - [TestDll::simple]
    Hello, world!
    "simple" returned 13
    From C: - [test.cpp] (31) - [TestDll::printStr]
    Hello, world!
    "printStr" returned 13
    From C: - [test.cpp] (39) - [TestDll::wprintWstr]
    Hello, world!
    Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
    "wprintWstr" returned 13
    "wstr" returned "Dummy text."
    
    e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.04.04_test0\Scripts\python.exe" code.py )
    Python 3.4.4 (v3.4.4:737efcadf5a6, Dec 20 2015, 20:20:57) [MSC v.1600 64 bit (AMD64)] on win32
    
    From C: - [test.cpp] (23) - [TestDll::simple]
    Hello, world!
    "simple" returned 13
    From C: - [test.cpp] (31) - [TestDll::printStr]
    Hello, world!
    "printStr" returned 13
    From C: - [test.cpp] (39) - [TestDll::wprintWstr]
    Hello, world!
    Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
    "wprintWstr" returned 13
    "wstr" returned "Dummy text."
    
    e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.05.04_test0\Scripts\python.exe" code.py )
    Python 3.5.4 (v3.5.4:3f56838, Aug  8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
    
    F r o m   C :   -   [ t e s t . c p p ]   ( 2 3 )   -   [ T e s t D l l : : s i m p l e ]
     H e l l o ,   w o r l d !
     "simple" returned 13
    From C: - [test.cpp] (31) - [TestDll::printStr]
    Hello, world!
    "printStr" returned 13
    F r o m   C :   -   [ t e s t . c p p ]   ( 3 9 )   -   [ T e s t D l l : : w p r i n t W s t r ]
     H e l l o ,   w o r l d !
     H e x   ( 1 3 ) :   4 8   0 0   6 5   0 0   6 C   0 0   6 C   0 0   6 F   0 0   2 C   0 0   2 0   0 0   7 7   0 0   6 F   0 0   7 2   0 0   6 C   0 0   6 4   0 0   2 1   0 0
     "wprintWstr" returned 13
    "wstr" returned "Dummy text."
    
    e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.06.08_test0\Scripts\python.exe" code.py )
    Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64 bit (AMD64)] on win32
    
    F r o m   C :   -   [ t e s t . c p p ]   ( 2 3 )   -   [ T e s t D l l : : s i m p l e ]
     H e l l o ,   w o r l d !
     "simple" returned 13
    From C: - [test.cpp] (31) - [TestDll::printStr]
    Hello, world!
    "printStr" returned 13
    F r o m   C :   -   [ t e s t . c p p ]   ( 3 9 )   -   [ T e s t D l l : : w p r i n t W s t r ]
     H e l l o ,   w o r l d !
     H e x   ( 1 3 ) :   4 8   0 0   6 5   0 0   6 C   0 0   6 C   0 0   6 F   0 0   2 C   0 0   2 0   0 0   7 7   0 0   6 F   0 0   7 2   0 0   6 C   0 0   6 4   0 0   2 1   0 0
     "wprintWstr" returned 13
    "wstr" returned "Dummy text."
    
    e:\Work\Dev\StackOverflow\q054269984>("e:\Work\Dev\VEnvs\py_064_03.07.02_test0\Scripts\python.exe" code.py )
    Python 3.7.2 (tags/v3.7.2:9a3ffc0492, Dec 23 2018, 23:09:28) [MSC v.1916 64 bit (AMD64)] on win32
    
    F r o m   C :   -   [ t e s t . c p p ]   ( 2 3 )   -   [ T e s t D l l : : s i m p l e ]
     H e l l o ,   w o r l d !
     "simple" returned 13
    From C: - [test.cpp] (31) - [TestDll::printStr]
    Hello, world!
    "printStr" returned 13
    F r o m   C :   -   [ t e s t . c p p ]   ( 3 9 )   -   [ T e s t D l l : : w p r i n t W s t r ]
     H e l l o ,   w o r l d !
     H e x   ( 1 3 ) :   4 8   0 0   6 5   0 0   6 C   0 0   6 C   0 0   6 F   0 0   2 C   0 0   2 0   0 0   7 7   0 0   6 F   0 0   7 2   0 0   6 C   0 0   6 4   0 0   2 1   0 0
     "wprintWstr" returned 13
    "wstr" returned "Dummy text."
    

    As seen, the behavior is reproducible starting with Python 3.5.

    I thought it is because of [Python]: PEP 529 -- Change Windows filesystem encoding to UTF-8, but that's only availalbe from version 3.6.

    Then I started reading, (I even tried to do a diff between Python 3.4 and Python 3.5) but with not much success. Some articles that I went through:

    Then I noticed [SO]: Output unicode strings in Windows console app (@DuckMaestro's answer) and started to play with [MS.Docs]: _setmode.

    Adding:

    #include <io.h>
    #include <fcntl.h>
    
    
    static int set_stdout_mode(int mode) {
        fflush(stdout);
        int ret = _setmode(_fileno(stdout), mode);
        return ret;
    }
    

    and calling it like int stdout_mode = set_stdout_mode(_O_TEXT); in test.cpp before outputting anything from C (and C++: std::wcout line uncommented), yielded:

    e:\Work\Dev\StackOverflow\q054269984>"e:\Work\Dev\VEnvs\py_064_03.06.08_test0\Scripts\python.exe" code.py
    Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64 bit (AMD64)] on win32
    
    Hello, world!
    From C: - [test.cpp] (32) - [TestDll::simple]
    Hello, world!
    "simple" returned 13
    From C: - [test.cpp] (40) - [TestDll::printStr]
    Hello, world!
    "printStr" returned 13
    From C: - [test.cpp] (48) - [TestDll::wprintWstr]
    Hello, world!
    Hex (13): 48 00 65 00 6C 00 6C 00 6F 00 2C 00 20 00 77 00 6F 00 72 00 6C 00 64 00 21 00
    "wprintWstr" returned 13
    "wstr" returned "Dummy text."
    
    • Although it works, I do not know why. It could be Undefined Behavior
      • Printing _setmode's return value, revealed that Python 3.4 and also main.exe automatically set the mode to _O_TEXT (0x4000), while newer Python versions (those that don't work) set it to _O_BINARY (0x8000) - which apparently seems to be the cause (might be related: [Python]: Issue #16587 - Py_Initialize breaks wprintf on Windows)
      • Trying to set it to any of the wide related constants (_O_U16TEXT, _O_U8TEXT, _O_WTEXT) crashes the program when calling printf or std::cout (even if restoring the original mode when done with wide functions - before the narrow ones)
    • Trying to output real Unicode chars, won't work (most likely)
    • You could achieve the same goal on Python side: msvcrt.setmode(sys.stdout.fileno(), 0x4000)