Search code examples
delphicharacter-encodingasciiutf-16delphi-2006

How do I get Delphi 2006 TStringList.LoadFromFile to load UTF-16 files


I have a Delphi 2006 app that I am adding code to process some generated CSV data files. TStringList.LoadFromFile was giving strange results and I have just worked out the files are UTF-16 encoded.

Upgrading to XE is planned but not an option at the moment.

What's the simplest route to handle these files with D2006? I'm assuming they can be mapped to 8-bit ASCII without any issues - they are "pure" CSV - just digits and commas, etc. and I don't think there will be any issues with characters that are not in the 8-bit set.


Solution

  • TStringList does not support UTF-16 in D2006, so you will have to load and decode the file data manually before then putting the output into your TStringList. For example:

    var
      sl: TStringList;
      {$IFNDEF D2009_OR_LATER}
      ms: TMemoryStream;
      ptr: PWideChar;
      s: AnsiString;
      dlen, slen: Integer;
      {$ENDIF}
    begin
      ...
      {$IFDEF D2009_OR_LATER}
      sl.LoadFromFile('...', TEncoding.Unicode);
      {$ELSE}
      ms := TMemoryStream.Create;
      try
        ms.LoadFromFile('...');
        ptr := PWideChar(ms.Memory);
        dlen := ms.Size div SizeOf(WideChar);
        if (dlen >= 1) and (PWord(ptr)^ = $FEFF) then
        begin
          Inc(ptr);
          Dec(dlen);
        end;
        slen := WideCharToMultiByte(0, 0, ptr, dlen, nil, 0, nil, nil);
        if slen > 0 then begin
          SetLength(s, slen);
          WideCharToMultiByte(0, 0, ptr, dlen, PAnsiChar(s), slen, nil, nil));
        end;
        sl.Text := s;
      finally
        ms.Free;
      end;
      {$ENDIF}
      ...
    end;