Search code examples
delphistreamreaderdelphi-10.3-rio

Fast way to get total line number of a large file


I'm dealing with large text files (bigger than 100MB). I need the total number of lines as fast as possible. I'm currently using the code below (update: added try-finally):

var
  SR: TStreamReader;
  totallines: int64;
  str: string;
begin
  SR:=TStreamReader.Create(myfilename, TEncoding.UTF8);
  try
    totallines:=0;
    while not SR.EndOfStream do
    begin
      str:=SR.ReadLine;
      inc(totallines);
    end;
  finally
    SR.Free;
  end;
end;

Is there any faster way to get totallines?


Solution

  • Program LineCount;
    
    {$APPTYPE CONSOLE}
    {$WEAKLINKRTTI ON}
    {$RTTI EXPLICIT METHODS([]) PROPERTIES([]) FIELDS([])}
    {$SetPEFlags 1}
    
    { Compile with XE8 or above... }
    
    USES
      SysUtils,
      BufferedFileStream;
    
    VAR
      LineCnt: Int64;
      Ch: Char;
      BFS: TReadOnlyCachedFileStream;
    
    function Komma(const S: string; const C: Char = ','): string;
    { About 4 times faster than Comma... }
    var
      I: Integer; // loops through separator position
    begin
      Result := S;
      I := Length(S) - 2;
      while I > 1 do
      begin
        Insert(C, Result, I);
        I := I - 3;
      end;
    end; {Komma}
    
    BEGIN
      writeln('LineCount - Copyright (C) 2020 by Walter L. Chester.');
      writeln('Counts lines in the given textfile.');
      if ParamCount <> 1 then
        begin
          writeln('USAGE:  LineCount <filename>');
          writeln;
          writeln('No file size limit!  Counts lines: takes 4 minutes on a 16GB file.');
          Halt;
        end;
      if not FileExists(ParamStr(1)) then
        begin
          writeln('File not found!');
          halt;
        end;
      writeln('Counting lines in file...');
      BFS := TReadOnlyCachedFileStream.Create(ParamStr(1), fmOpenRead);
      try
        LineCnt := 0;
        while BFS.Read(ch,1) = 1 do
          begin
            if ch = #13 then
              Inc(LineCnt);
            if (LineCnt mod 1000000) = 0 then
              write('.');
          end;
        writeln;
        writeln('Total Lines: ' + Komma(LineCnt.ToString));
      finally
        BFS.Free;
      end;
    END.