Using Delphi 11.3 and an Oracle DB using UniDac, I have the problem that there are many old apps (written in Cobol) which do not support Unicode, so the data for text fields is stored as Ansi text using the (Windows) client's codepage (125x). I have to replace them step by step and use "new" tables where I store the data in Unicode (UTF-16).
As long as the old apps are still in use, I have to "translate" the Ansi strings into UTF (and back for writing).
I have written 2 functions:
function AnsiToUTF(Value: AnsiString; codepage: word): string;
and
function UTFToAnsi(Value: string; codepage word): AnsiString
which are working.
I searched this forum and it seems to be possible to make changes to the TStringField
class, but I have no experience in doing so.
It would be great if I could apply a new property AsUTF
to the TStringField
class with my 2 functions embedded in SetAsUTF()
and GetAsUTF()
to that I can call them in any data-aware component using TFields
. For example:
MyString := UniQuery1.FieldByName('TEXT').AsUTF(1252);
and
UniQuery1.FieldByName('TEXT').AsUTF(1252) := MyString;
Would that be possible (or is there a better solution)?
#EDIT: I was told to give a reproducible example for it, so here it is:
unit UTFStringField;
TUTFStringField = class(TWideStringField)
protected
procedure SetAsUTF(UTF: string; Codepage: word);
function GetAsUTF(Codepage: word): string;
constructor Create; override;
destructor Destroy; override;
public
function UTFToAnsi(txt: string; GCodePage: word): Ansistring;
function AnsiToUTF(txt: Ansistring; GCodepage: word): string;
end;
implementation
procedure TUTFStringField.SetAsUTF(UTF: string; Codepage: word);
begin
SetAsAnsiString(UTFToAnsi(UTF,Codepage));
end;
function TUTFStringField.GetAsUTF(CodePage: word): string;
begin
Result := AnsiToUTF(GetAsAnsiString,CodePage);
end;
constructor TUTFStringField.Create;
begin
inherited Create;
DefaultFieldClasses[ftWideString] := TUTFStringField;
end;
destructor TUTFStringField.Destroy;
begin
DefaultFieldClasses[ftWideString] := TWideStringField;
inherited destroy;
end;
function TUTFStringField.AnsiToUTF(txt: Ansistring; GCodepage: word): string;
var
NewStr: string;
OldChar: AnsiChar;
NewChar: Char;
i: integer;
begin
NewStr := '';
case GCodepage of
1250: begin
for i := 1 to Length(txt) do
begin
OldChar := txt[i];
NewChar := Char(OldChar);
case Ord(OldChar) of
$80: NewChar := Char($20AC); // #EURO SIGN
$82: NewChar := Char($201A); // #SINGLE LOW-9
$84: NewChar := Char($201E); // #DOUBLE LOW-9
$85: NewChar := Char($2026); // #HORIZONTAL ELLIPSIS
....
end;
NewStr := NewStr + NewChar;
end;
end;
1251: begin
for i := 1 to Length(txt) do
begin
OldChar := AnsiChar(txt[i]);
NewChar := Char(OldChar);
case Ord(OldChar) of
$80: NewChar := Char($0402); // #CYRILLIC CAPITAL LETTER
$81: NewChar := Char($0403); // #CYRILLIC CAPITAL LETTER
$82: NewChar := Char($201A); // #SINGLE LOW-9 QUOTATION
...
end;
NewStr := NewStr + NewChar;
end;
end;
end;
Result := NewStr;
end;
function TUTFStringField.UTFToAnsi(txt: string; GCodepage: word): Ansistring;
var
NewStr: Ansistring;
OldChar: Char;
NewChar: AnsiChar;
i: integer;
begin
NewStr := '';
case GCodepage of
1250: begin
for i := 1 to Length(txt) do
begin
OldChar := Copy(txt,i,1);
NewChar := AnsiChar(OldChar);
case Ord(OldChar) of
$20AC: NewChar := AnsiChar($80);
$201A: NewChar := AnsiChar($82);
$201E: NewChar := AnsiChar($84); // DOUBLE LOW-9
$2026: NewChar := AnsiChar($85); // HORIZONTAL ELLIPSIS
$2020: NewChar := AnsiChar($86); // DAGGER
....
end;
NewStr := NewStr + NewChar;
end;
end;
1251: begin
for i := 1 to Length(txt) do
begin
OldChar := Char(txt[i]);
NewChar := AnsiChar(OldChar);
case Ord(OldChar) of
$0402: NewChar := AnsiChar($80); // CYRILLIC CAPITAL
$0403: NewChar := AnsiChar($81); // CYRILLIC CAPITAL
$201A: NewChar := AnsiChar($82); // SINGLE LOW-9
$0453: NewChar := AnsiChar($83); // CYRILLIC SMALL
$201E: NewChar := AnsiChar($84); // DOUBLE LOW-9
end;
NewStr := NewStr + NewChar;
end;
end;
end;
interface
RegisterClass(TUTFStringField);
end.
Surely the constructor / destructor is wrong, but I have no idea, how and where to introduce my new TUTFStringField
class, so that it is always used in the moment, for example when I drop an UniQuery component on my form.
Oh, and by the way: I set "UniCode"
in the Oracle Uni provider to true, as my new apps should use Unicode as default (the database charset is UTF-16).
FYI, your translation functions are unnecessary. The RTL has its own ways of converting to/from Ansi strings with codepages, such as System.LocaleCharsFromUnicode()
and System.UnicodeFromLocaleChars()
, or SysUtils.TEncoding
, or RawByteString
with System.SetCodePage()
.
You should use the built-in functionality instead of rolling your own. especially since you are not handling UTF-16 surrogates, whereas the RTL will.
Try something more like this instead:
unit UTFStringField;
interface
uses
Data.DB;
type
TUTFStringField = helper class for TField
public
procedure SetAsUTF(const UTF: string; Codepage: Word);
function GetAsUTF(Codepage: Word): string;
end;
implementation
procedure TUTFStringField.SetAsUTF(const UTF: string; Codepage: Word);
var
NewStr: AnsiString;
begin
SetLength(NewStr, LocaleCharsFromUnicode(Codepage, 0, PChar(UTF), Length(UTF), nil, 0, nil, nil));
LocaleCharsFromUnicode(Codepage, 0, PChar(UTF), Length(UTF), PAnsiChar(NewStr), Length(NewStr), nil, nil);
SetCodePage(PRawByteString(@NewStr)^, Codepage, False);
{ alternatively:
var enc: TEncoding := TEncoding.GetEncoding(Codepage);
try
SetLength(NewStr, enc.GetByteCount(UTF));
enc.GetBytes(PChar(UTF), Length(UTF), PByte(PAnsiChar(NewStr)), Length(NewStr));
finally
enc.Free;
end;
SetCodePage(PRawByteString(@NewStr)^, Codepage, False);
}
{ alternatively:
var raw: RawByteString := PRawByteString(@UTF)^;
SetCodePage(raw, Codepage, True);
NewStr := PAnsiString(@raw)^;
}
Self.AsAnsiString := NewStr;
end;
function TUTFStringField.GetAsUTF(Codepage: Word): string;
var
txt: AnsiString;
begin
txt := Self.AsAnsiString;
SetLength(Result, UnicodeFromLocaleChars(Codepage, 0, PAnsiChar(txt), Length(txt), nil, 0));
UnicodeFromLocaleChars(Codepage, 0, PAnsiChar(txt), Length(txt), PWideChar(Result), Length(Result));
{ alternatively:
var enc: TEncoding := TEncoding.GetEncoding(Codepage);
try
SetLength(Result, enc.GetCharCount(PByte(PAnsiChar(txt)), Length(txt)));
enc.GetChars(PByte(PAnsiChar(txt)), Length(txt), PChar(Result), Length(Result));
finally
enc.Free;
end;
}
{ alternatively:
SetCodePage(PRawByteString(@txt)^, Codepage, False);
Result := string(txt);
}
end;
end.
And then, you can call them like this (just make sure UTFStringField
is in the uses
clause):
MyString := UniQuery1.FieldByName('TEXT').GetAsUTF(1252);
UniQuery1.FieldByName('TEXT').SetAsUTF(MyString, 1252);