Search code examples
perlcharacter-encodingdb2dbi

DB2: should I decode fetched data with utf8, regardless of the database code set?


Is utf-8 the correct encoding for decoding data retrieved from a DB2 database, regardless of the database's code set?

#!/usr/bin/env perl
use strict;
use warnings;
use DBI;
my ( $db, $dbh, $sth, $value );
my $table = 'mytablename';
my $user = 'user';
my $password = 'passwd';
my $char = "\x{20ac}"; # €

# ISODB code set = ISO885915
$db = 'ISODB';
$dbh = DBI->connect( "dbi:DB2:$db", $user, $password ) or die DBI->errstr;
$dbh->do( "DROP TABLE $table" );
$dbh->do( "CREATE TABLE $table (Name VARCHAR(32))" );
$sth = $dbh->prepare( "INSERT INTO $table (Name) VALUES(?)" );
$sth->execute( $char );
( $value ) = $dbh->selectrow_array( "SELECT Name FROM $table" );
printf "%v02X\n", $value;
$sth->finish;
$dbh->disconnect();

# UTFDB code set = utf-8
$db = 'UTFDB';
$dbh = DBI->connect( "dbi:DB2:$db", $user, $password ) or die DBI->errstr;
$dbh->do( "DROP TABLE $table" );
$dbh->do( "CREATE TABLE $table (Name VARCHAR(32))" );
$sth = $dbh->prepare( "INSERT INTO $table (Name) VALUES(?)" );
$sth->execute( $char );
( $value ) = $dbh->selectrow_array( "SELECT Name FROM $table" );
printf "%v02X\n", $value;
$sth->finish;
$dbh->disconnect;
# E2.82.AC
# E2.82.AC

Solution

  • The encoding used by DBD::DB2 is determined from the environment.

    You can query the encoding of the database and the encoding of the connection using $dbh->get_info.

    use strict;
    use warnings;
    
    use DBI qw( );
    
    use constant {
       SQL_DATABASE_CODEPAGE    => 2519,
       SQL_APPLICATION_CODEPAGE => 2520,
    };
    
    {
       my $dbh = ...;
    
       printf( "SQL_DATABASE_CODEPAGE:    %s\n", $dbh->get_info( SQL_DATABASE_CODEPAGE    ) );
       printf( "SQL_APPLICATION_CODEPAGE: %s\n", $dbh->get_info( SQL_APPLICATION_CODEPAGE ) );
    
       my $val = $dbh->selectrow_array( "SELECT * FROM ( VALUES( CHR( 201 ) ) )" );
       printf( "%v02X\n", $val );
    }
    
    $ enctest
    SQL_DATABASE_CODEPAGE:    819
    SQL_APPLICATION_CODEPAGE: 819   # ISO-8859-1
    C9
    
    $ DB2CODEPAGE=1208 enctest
    SQL_DATABASE_CODEPAGE:    819
    SQL_APPLICATION_CODEPAGE: 1208  # UTF-8
    C3.E9
    

    You should either query the SQL_APPLICATION_CODEPAGE, or ensure that a known code page is used.