Search code examples
arraysdelphidistinctdelphi-xe7

How to make array distinct


I have an array of integer and string fields. To make it distinct I currently copy line by line into new array and with each record I check if the record already exists in new, if not I copy it. At the end I copy new array back to original.

It works, but is slow. Is there any faster, easier way to do this?

TArrayMixed = record
    Field1: integer;
    Field2: integer;
    Field3: integer;
    Field4: string;
    Field5: string;
    Field6: string;
 end;

procedure TForm1.Button10Click(Sender: TObject);
var
  ArrayMixed, ArrayMixed_tmp: array of TArrayMixed;
  i, j, vIdx: integer;
  vExists: boolean;
begin
  SetLength(ArrayMixed, 100000);
  for i := 0 to 99999 do
  begin
    ArrayMixed[i].Field1 := 1 + Random(5);
    ArrayMixed[i].Field2 := 1 + Random(5);
    ArrayMixed[i].Field3 := 1 + Random(5);
    ArrayMixed[i].Field4 := 'String';
    ArrayMixed[i].Field5 := 'Another string';
    ArrayMixed[i].Field6 := 'New string';
  end;

  // Sort
  TArray.Sort<TArrayMixed > (ArrayMixed, TComparer<TArrayMixed > .Construct(function(const Left, Right: TArrayMixed): Integer
    begin
      Result := MyCompareAMixed(Left, Right);
    end
    ));

  // Distinct
  SetLength(ArrayMixed_tmp, Length(ArrayMixed));
  vIdx := 0;
  for i := Low(ArrayMixed) to High(ArrayMixed) do
  begin
    vExists := False;
    for j := Low(ArrayMixed_tmp) to vIdx - 1 do
      if (ArrayMixed_tmp[j].Field1 = ArrayMixed[i].Field1) and
        (ArrayMixed_tmp[j].Field2 = ArrayMixed[i].Field2) and
        (ArrayMixed_tmp[j].Field3 = ArrayMixed[i].Field3) and
        (ArrayMixed_tmp[j].Field4 = ArrayMixed[i].Field4) and
        (ArrayMixed_tmp[j].Field5 = ArrayMixed[i].Field5) and
        (ArrayMixed_tmp[j].Field6 = ArrayMixed[i].Field6) then
      begin
        vExists := True;
        Break;
      end;

    if not vExists then
    begin
      ArrayMixed_tmp[vIdx] := ArrayMixed[i];
      Inc(vIdx);
    end;
  end;
  SetLength(ArrayMixed_tmp, vIdx);

  // now copy back to original array
  SetLength(ArrayMixed, 0);
  SetLength(ArrayMixed, Length(ArrayMixed_tmp));
  for i := Low(ArrayMixed_tmp) to High(ArrayMixed_tmp) do
    ArrayMixed[i] := ArrayMixed_tmp[i];

  sleep(0);

end;

Edit:

Since in real data the strings are not all the same, the part where it makes distinct array is slower when original array is filled like this:

Edit #2: (copied wrong code in Edit #1)

for i := 0 to 999999 do
  begin
    ArrayMixed[i].Field1 := 1 + Random(5);
    ArrayMixed[i].Field2 := 1 + Random(5);
    ArrayMixed[i].Field3 := 1 + Random(5);
    ArrayMixed[i].Field4 := 'String'+IntToStr(i mod 5);
    ArrayMixed[i].Field5 := 'Another string'+IntToStr(i mod 5);
    ArrayMixed[i].Field6 := 'New string'+IntToStr( i mod 5);
  end;

Edit #3: Publishing code for sorting - only first 3 fields are sorted!

TMyArray3 = array[1..3] of Integer;

    function CompareIntegerArray3(const lhs, rhs: TMyArray3): Integer;
    var
      i: Integer;
    begin
      Assert(Length(lhs) = Length(rhs));
      for i := low(lhs) to high(lhs) do
        if lhs[i] < rhs[i] then
          exit(-1)
        else if lhs[i] > rhs[i] then
          exit(1);

      exit(0);
    end;

    function GetMyArrayAMixed(const Value: TArrayMixed): TMyArray3;
    begin
      Result[1] := Value.Field1;
      Result[2] := Value.Field2;
      Result[3] := Value.Field3;
    end;

    function MyCompareAMixed(const lhs, rhs: TArrayMixed): Integer;
    begin
      Result := CompareIntegerArray3(GetMyArrayAMixed(lhs), GetMyArrayAMixed(rhs));
    end;

Solution

  • Just check for duplicates next to the prior index, since the array is sorted. Here is the sorting comparer reused as well.

    function RemoveDuplicates(const anArray: array of TArrayMixed): TArray<TArrayMixed>;
    var
      j, vIdx: integer;
    begin
      // Sort
      TArray.Sort<TArrayMixed > (anArray, TComparer<TArrayMixed >.Construct(function(const Left, Right: TArrayMixed): Integer
        begin
          Result := MyCompareAMixed(Left, Right);
        end
        ));
    
      // Distinct
      SetLength(Result, Length(anArray));
      vIdx := 0;
      j := 0;
      while (j <= High(anArray) do
      begin
        Result[vIdx] := anArray[j];
        Inc(j);
        While (j <= High(anArray)) and (MyCompareAMixed(Result[vIdx],anArray[j]) = 0) do
          Inc(j);
       Inc(vIdx);
      end;
      SetLength(Result, vIdx);
    end;
    

    Update:

    In an update to the question it is stated that the array is only partially sorted. One way to reduce the number of iterations to remove duplicates would then be to:

    • Find start and stop index to items that share the first sorting criteria.
    • Iterate among them to sort out duplicates.