Search code examples
perlwinapi

Handling wide char values returned by Win32::API


The answer provided in wide char and win32::api works for passing utf-16 to the Win API. But how do I convert utf16 strings returned by the Win API? (I am trying to use GetCommandLineW).

I have tried both Unicode::String and Encode::decode without success. I'm guessing that perhaps the data needs to be packed or unpacked first, but how?

After that, the next problem is how to deal with a pointer-to-pointer-to-utf16 like the one returned by CommandLineToArgvW.


Solution

  • When you specify the return value is a string, Win32::API assumes it's a terminated by a byte with value 0, but bytes with that value are common in UTF-16le text.

    As Win32::API suggests, you should use the N type (or Q on 64-bit builds) to get the pointer as a number, then read the pointed memory yourself. Win32::API's provides ReadMemory to read memory, but it requires knowing how much memory to read. That's not useful for NUL-terminated strings and wide NUL-terminated strings.

    For wide NUL-terminated strings, Win32::API provides SafeReadWideCString. But SafeReadWideCString can return a string unrelated to the input on error, so I use my own decode_LPCWSTR instead.

    use strict;
    use warnings;
    use feature qw( say state );
    
    use open ':std', ':encoding('.do { require Win32; "cp".Win32::GetConsoleOutputCP() }.')';
    
    use Config     qw( %Config );
    use Encode     qw( decode encode );
    use Win32::API qw( ReadMemory );
    
    use constant PTR_SIZE => $Config{ptrsize};
    
    use constant PTR_PACK_FORMAT =>
         PTR_SIZE == 8 ? 'Q'
       : PTR_SIZE == 4 ? 'L'
       : die("Unrecognized ptrsize\n");
    
    use constant PTR_WIN32API_TYPE =>
         PTR_SIZE == 8 ? 'Q'
       : PTR_SIZE == 4 ? 'N'
       : die("Unrecognized ptrsize\n");
    
        
    sub lstrlenW {
       my ($ptr) = @_;
    
       state $lstrlenW = Win32::API->new('kernel32', 'lstrlenW', PTR_WIN32API_TYPE, 'i')
          or die($^E);
    
       return $lstrlenW->Call($ptr);
    }
    
    
    sub decode_LPCWSTR {
       my ($ptr) = @_;
       return undef if !$ptr;
    
       my $num_chars = lstrlenW($ptr)
          or return '';
    
       return decode('UTF-16le', ReadMemory($ptr, $num_chars * 2));
    }
    
    
    # Returns true on success. Returns false and sets $^E on error.
    sub LocalFree {
       my ($ptr) = @_;
    
       state $LocalFree = Win32::API->new('kernel32', 'LocalFree', PTR_WIN32API_TYPE, PTR_WIN32API_TYPE)
          or die($^E);
    
       return $LocalFree->Call($ptr) == 0;
    }
    
    
    sub GetCommandLine {
       state $GetCommandLine = Win32::API->new('kernel32', 'GetCommandLineW', '', PTR_WIN32API_TYPE)
          or die($^E);
    
       return decode_LPCWSTR($GetCommandLine->Call());
    }
    
    
    # Returns a reference to an array on success. Returns undef and sets $^E on error.
    sub CommandLineToArgv {
       my ($cmd_line) = @_;
    
       state $CommandLineToArgv = Win32::API->new('shell32', 'CommandLineToArgvW', 'PP', PTR_WIN32API_TYPE)
          or die($^E);
    
       my $cmd_line_encoded = encode('UTF-16le', $cmd_line."\0");
       my $num_args_buf = pack('i', 0);  # Allocate space for an "int".
    
       my $arg_ptrs_ptr = $CommandLineToArgv->Call($cmd_line_encoded, $num_args_buf)
          or return undef;
    
       my $num_args = unpack('i', $num_args_buf);
       my @args =
          map { decode_LPCWSTR($_) }
             unpack PTR_PACK_FORMAT.'*',
                ReadMemory($arg_ptrs_ptr, PTR_SIZE * $num_args);
    
       LocalFree($arg_ptrs_ptr);
       return \@args;
    }
    
    
    {
       my $cmd_line = GetCommandLine();
    
       say $cmd_line;
    
       my $args = CommandLineToArgv($cmd_line)
          or die("CommandLineToArgv: $^E\n");
    
       for my $arg (@$args) {
          say "<$arg>";
       }
    }