Search code examples
windowsdelphidelphi-xe7

Is Writeln capable of supporting Unicode?


Consider this program:

{$APPTYPE CONSOLE}

begin
  Writeln('АБВГДЕЖЅZЗИІКЛМНОПҀРСТȢѸФХѾЦЧШЩЪЫЬѢѤЮѦѪѨѬѠѺѮѰѲѴ');
end.

The output on my console which uses the Consolas font is:

????????Z??????????????????????????????????????

The Windows console is quite capable of supporting Unicode as evidenced by this program:

{$APPTYPE CONSOLE}

uses
  Winapi.Windows;

const
  Text = 'АБВГДЕЖЅZЗИІКЛМНОПҀРСТȢѸФХѾЦЧШЩЪЫЬѢѤЮѦѪѨѬѠѺѮѰѲѴ';

var
  NumWritten: DWORD;

begin
  WriteConsole(GetStdHandle(STD_OUTPUT_HANDLE), PChar(Text), Length(Text), NumWritten, nil);
end.

for which the output is:

АБВГДЕЖЅZЗИІКЛМНОПҀРСТȢѸФХѾЦЧШЩЪЫЬѢѤЮѦѪѨѬѠѺѮѰѲѴ

Can Writeln be persuaded to respect Unicode, or is it inherently crippled?


Solution

  • Just set the console output codepage through the SetConsoleOutputCP() routine with codepage cp_UTF8.

    program Project1;
    
    {$APPTYPE CONSOLE}
    
    uses
      System.SysUtils,Windows;
    Const
      Text =  'АБВГДЕЖЅZЗИІКЛМНОПҀРСТȢѸФХѾЦЧШЩЪЫЬѢѤЮѦѪѨѬѠѺѮѰѲѴ';
    VAR
      NumWritten: DWORD;
    begin
      ReadLn;  // Make sure Consolas font is selected
      try
        WriteConsole(GetStdHandle(STD_OUTPUT_HANDLE), PChar(Text), Length(Text), NumWritten, nil);    
        SetConsoleOutputCP(CP_UTF8);
        WriteLn;
        WriteLn('АБВГДЕЖЅZЗИІКЛМНОПҀРСТȢѸФХѾЦЧШЩЪЫЬѢѤЮѦѪѨѬѠѺѮѰѲѴ');
      except
        on E: Exception do
          Writeln(E.ClassName, ': ', E.Message);
      end;
      ReadLn;
    end.
    

    Outputs:

    АБВГДЕЖЅZЗИІКЛМНОПҀРСТȢѸФХѾЦЧШЩЪЫЬѢѤЮѦѪѨѬѠѺѮѰѲѴ
    АБВГДЕЖЅZЗИІКЛМНОПҀРСТȢѸФХѾЦЧШЩЪЫЬѢѤЮѦѪѨѬѠѺѮѰѲѴ
    

    WriteLn() translates Unicode UTF16 strings to the selected output codepage (cp_UTF8) internally.


    Update:

    The above works in Delphi-XE2 and above. In Delphi-XE you need an explicit conversion to UTF-8 to make it work properly.

    WriteLn(UTF8String('АБВГДЕЖЅZЗИІКЛМНОПҀРСТȢѸФХѾЦЧШЩЪЫЬѢѤЮѦѪѨѬѠѺѮѰѲѴ'));
    

    Addendum:

    If an output to the console is done in another codepage before calling SetConsoleOutputCP(cp_UTF8), the OS will not correctly output text in utf-8. This can be fixed by closing/reopening the stdout handler.

    Another option is to declare a new text output handler for utf-8.

    var
      toutUTF8: TextFile;
    ...
    SetConsoleOutputCP(CP_UTF8);
    AssignFile(toutUTF8,'',cp_UTF8);  // Works in XE2 and above
    Rewrite(toutUTF8);
    WriteLn(toutUTF8,'АБВГДЕЖЅZЗИІКЛМНОПҀРСТȢѸФХѾЦЧШЩЪЫЬѢѤЮѦѪѨѬѠѺѮѰѲѴ');