Search code examples
perlstrawberry-perl

Why does the -e file existence test always return false for a filename containing non-ASCII characters?


I am doing an existence check for a file containing non-ASCII characters in its name, using Perl. Even though the file exists, the check always returns false. I am using Strawberry Perl v5.24.0 on a Windows 10 machine.

Here is my code:

use strict;
use warnings;

use Encode;

my $file = '<SOME DIR PATH> / áéíóú.mov';
if (-e $file) {
  print "$file exists";
} else {
  print " $file does not exists" ;
}

I also changed the code page in the cmd shell by running chcp 65001. cmd was then able to recognize the characters but somehow it always returns "Not Exists" for this file.

How can I fix this?


Solution

  • use strict;
    use warnings;
    
    # Properly decode source code, which is expected to be UTF-8.
    # This allows non-ASCII characters in the source.
    use utf8;
    
    # Properly decode text received from STDIN.
    # Properly encode text sent to STDOUT and STDERR.
    use Win32 qw( );
    my ( $enc_in, $enc_out, $enc_syscall );
    BEGIN {
       $enc_input   = 'cp'.Win32::GetConsoleCP();
       $enc_output  = 'cp'.Win32::GetConsoleOutputCP();
       $enc_syscall = 'cp'.Win32::GetACP();
    
       binmode STDIN,  ":encoding($enc_input)";
       binmode STDOUT, ":encoding($enc_output)";
       binmode STDERR, ":encoding($enc_output)";
    }
    
    use Encode qw( encode );
    
    my $file = 'áéíóú.mov';
    
    if (-e encode($enc_syscall, $file, Encode::FB_CROAK | Encode::LEAVE_SRC)) {
       print("$file exists\n");
    }
    elsif ($!{ENOENT}) {
       print("$file doesn't exist\n");
    }
    else {
       die("Can't determine if \"$file\" exists: $!\n");
    }
    

    or

    use strict;
    use warnings;
    
    # Properly decode source code, which is expected to be UTF-8.
    # This allows non-ASCII characters in the source.
    use utf8;
    
    # Properly decode text received from STDIN.
    # Properly encode text sent to STDOUT and STDERR.
    use Win32 qw( );
    my ( $enc_in, $enc_out, $enc_syscall );
    BEGIN {
       $enc_input   = 'cp'.Win32::GetConsoleCP();
       $enc_output  = 'cp'.Win32::GetConsoleOutputCP();
       $enc_syscall = 'cp'.Win32::GetACP();
    
       binmode STDIN,  ":encoding($enc_input)";
       binmode STDOUT, ":encoding($enc_output)";
       binmode STDERR, ":encoding($enc_output)";
    }
    
    use Win32::Unicode::File qw( statW );
    
    my $file = 'áéíóú.mov';
    
    if (statW($file)) {
       print("$file exists\n");
    }
    elsif ($!{ENOENT}) {
       print("$file doesn't exist\n");
    }
    else {
       die("Can't determine if \"$file\" exists: $^E\n");
    }
    

    The latter isn't limited to paths containing characters of the machine's ANSI charset.