Search code examples
delphifull-text-searchsavetdictionary

TDictionary save to file


I have a lot of files(about 160 000) and I need to have information about position of individual words in files(fulltext). So I used Dictionary like this:

WordDict : TDictionary<string, TDictionary<string, TIntegerDynArray>>;

Now I know that WORD1 is in FILE1,FILE3 and FILE100 and positions in each file <1,3,5>,<2,8,35> etc. I can fill it, I can use it - its very fast. But I don't know how effectively store dictionary to file.

EDIT: effectively - I mean quickly and small size of file


Solution

  • You can use the streaming system of Delphi to write a proprietary stream format. If size matters (contrary to speed) you can zip the stream. Here is some code:

    type
      TFilePos = TArray<Integer>;
      TFileDict = TDictionary<string, TFilePos>;
      TWordDict = class (TDictionary<string, TFileDict>)
      private
        procedure LoadFromStream(stream: TStream);
        procedure SaveToStream(stream: TStream);
      public
        procedure LoadFromZip(const AFileName: string);
        procedure LoadFromFile(const AFileName: string);
        procedure SaveToZip(const AFileName: string);
        procedure SaveToFile(const AFileName: string);
      end;
    
    procedure TWordDict.LoadFromZip(const AFileName: string);
    var
      stream: TStream;
      localHeader: TZipHeader;
      zipFile: TZipFile;
    begin
      zipFile := TZipFile.Create;
      try
        zipFIle.Open(AFIleName, zmRead);
        zipFile.Read('worddict', stream, localHeader);
        try
          LoadFromStream(stream);
        finally
          stream.Free;
        end;
        zipFile.Close;
      finally
        zipFile.Free;
      end;
    end;
    
    procedure TWordDict.SaveToZip(const AFileName: string);
    var
      stream: TStream;
      zipFile: TZipFile;
    begin
      stream := TMemoryStream.Create;
      try
        SaveToStream(stream);
        stream.Position := 0;
        zipFile := TZipFile.Create;
        try
          zipFile.Open(AFileName, zmWrite);
          zipFile.Add(stream, 'worddict');
          zipFile.Close;
        finally
          zipFile.Free;
        end;
      finally
        stream.Free;
      end;
    end;
    
    procedure TWordDict.SaveToStream(stream: TStream);
    var
      posi: System.Generics.Collections.TPair<string, TFilePos>;
      i: Integer;
      pair: System.Generics.Collections.TPair<string, TFileDict>;
      writer: TWriter;
    begin
      writer := TWriter.Create(stream, 4096);
      try
        writer.WriteListBegin;
        for pair in Self do
        begin
          writer.WriteString(pair.Key);
          writer.WriteListBegin;
          for posi in pair.Value do
          begin
            writer.WriteString(posi.Key);
            writer.WriteInteger(Length(posi.Value));
            for i in posi.Value do
            begin
              writer.WriteInteger(i);
            end;
          end;
          writer.WriteListEnd;
        end;
        writer.WriteListEnd;
      finally
        writer.Free;
      end;
    end;
    
    procedure TWordDict.LoadFromStream(stream: TStream);
    var
      sFiles: TFileDict;
      aPosi: TFilePos;
      size: Integer;
      i: Integer;
      sWord: string;
      reader: TReader;
      sFile: string;
    begin
      Clear;
      reader := TReader.Create(stream, 1024);
      try
        reader.ReadListBegin;
        while not reader.EndOfList do
        begin
          sWord := reader.ReadString;
          sFiles := TFileDict.Create;
          reader.ReadListBegin;
          while not reader.EndOfList do
          begin
            sFile := reader.ReadString;
            size := reader.ReadInteger;
            SetLength(aPosi, size);
            for I := 0 to size - 1 do
            begin
              aPosi[I] := reader.ReadInteger;
            end;
            sFiles.Add(sFile, Copy(aPosi));
          end;
          reader.ReadListEnd;
          Add(sWord, sFiles);
        end;
        reader.ReadListEnd;
      finally
        reader.Free;
      end;
    end;
    
    procedure TWordDict.LoadFromFile(const AFileName: string);
    var
      stream: TStream;
    begin
      stream := TFileStream.Create(AFileName, fmOpenRead);
      try
        LoadFromStream(stream);
      finally
        stream.Free;
      end;
    end;
    
    procedure TWordDict.SaveToFile(const AFileName: string);
    var
      stream: TStream;
    begin
      stream := TFileStream.Create(AFileName, fmCreate);
      try
        SaveToStream(stream);
      finally
        stream.Free;
      end;
    end;