Search code examples
delphidelphi-2010

encode string from json utf-8


I have a JSON file {"data":[{"clientName":"Is Такой текст"}]} that is UTF-8 encoded, and I am trying to see the text in the tag clientName using the ShowMessage() function.

Instead of normal text, I get "Is strangeText". I understand that the problem is in the encoding, but I don't understand how to fix it.

procedure TForm1.jsonTest;
var
  JSONData, JSON: TJSONObject;
  jArr: TJSONArray;
  s: TStringList;
  i, j: Integer;
  jValue: TJSonValue;
  JsonArray: TJSONArray;
  jPair: TJSONPair;
begin
  json := TJSONObject.Create;
  s := TStringList.Create;
  try
    S.LoadFromFile('clientOrders.json');
    text := S.Text;
    JSON := TJSONObject.ParseJSONValue(BytesOf(text),0) as TJSONObject;
    if JSON <> nil then
    begin
      jPair := JSON.Get(0);
      jArr := jPair.JsonValue as TJSONArray;

      for I := 0 to jArr.Size-1 do
      begin
        JSONData := jArr.Get(I) as TJSONObject;

        for j := 0 to JSONData.Size - 1 do
        begin
          ShowMessage(JSONData.Get(j).JsonValue.ToString);
        end;
      end;
    end
    else
      raise Exception.Create('This is not a JSON');
  finally
    json.Free;
    s.Free;
    jValue.Free;
  end;
end;

Solution

  • Assuming you are running this code on Windows, then the problem is two-fold:

    • you are not telling TStringList.LoadFromFile() what the encoding of the file is. So, unless the file begins with a UTF-8 BOM (which is unlikely with a JSON file), it will be decoded as ANSI, not as UTF-8, thus corrupting any NON-ASCII characters.

    • you are converting the decoded text back into bytes without specifying an encoding. The overload of ParseJSONValue() you are using expects UTF-8 encoded bytes, but BytesOf() will encode to ANSI, not to UTF-8, thus corrupting non-ASCII characters even futher.

    That is why you are getting garbage text from the JSON.

    There are other problems with your code, too. Namely, a memory leak and a double-free, due to you mismanaging the initlal TJSONObject.

    Try this instead.

    procedure TForm1.jsonTest;
    var
      JSONData, JSON: TJSONObject;
      jArr: TJSONArray;
      s: TStringList;
      i, j: Integer;
      jValue: TJSonValue;
      data: string;
    begin
      s := TStringList.Create;
      try
        s.LoadFromFile('clientOrders.json', TEncoding.UTF8);
        data := s.Text;
      finally
        s.Free;
      end;
      { Alternatively:
      data := IOUtils.TFile.ReadAllText('clientOrders.json', TEncoding.UTF8);
      }
      jValue := TJSONObject.ParseJSONValue(TEncoding.UTF8.GetBytes(data), 0);
      if jValue = nil then
        raise Exception.Create('This is not a JSON');
      try
        JSON := jValue as TJSONObject;
        jArr := JSON.Get(0).JsonValue as TJSONArray;
        for I := 0 to jArr.Size-1 do
        begin
          JSONData := jArr.Get(I) as TJSONObject;
           for j := 0 to JSONData.Size - 1 do
           begin
             ShowMessage(JSONData.Get(j).JsonValue.ToString);
           end;
          end;
        end;
      finally
        jValue.Free;
      end;
    end;
    

    Alternatively, don't decode the file bytes into a string just to convert them back into bytes, just load them as-is into ParseJSONValue(), eg:

    procedure TForm1.jsonTest;
    var
      ...
      jValue: TJSonValue;
      data: TBytesStream;
    begin
      data := TBytesStream.Create;
      try
        data.LoadFromFile('clientOrders.json');
        jValue := TJSONObject.ParseJSONValue(data.Bytes, 0);
        ...
      finally
        data.Free;
      end;
    end;
    

    Or:

    procedure TForm1.jsonTest;
    var
      ...
      jValue: TJSonValue;
      data: TBytes;
    begin
      data := IOUtils.TFile.ReadAllBytes('clientOrders.json');
      jValue := TJSONObject.ParseJSONValue(data, 0);
      ...
    end;