Search code examples
matlabcell-arraytextreader

Import file into MATLAB cell array with space delimiter


I have a .asc file 'test.asc' which consists of lines with different length and content.

  my name is blalala
  This is my home and I live in here
  12 13 10 is he
  he is my brother 12 13 14

How can I import the contents of the file into a MATLAB cell array where each row is a line separated by space delimiter?

  resultCellarray={
    'my'   'name' 'is'  'blalala' []    []   []     []   []
    'This' 'is'   'my'  'home'    'and' 'I' 'live' 'in' 'here'
    '12'   '13'   '10'  'is'      'he'  []   []     []   []
    'he'   'is'   'my'  'brother' '12' '13'  '14'   []   []
    }

I have tried inserting each line as one cell:

   content = textread('test.asc','%s','delimiter','\n','whitespace','');     

and then dividing the cell into several columns, using: separating cell array into several columns MATLAB, but it is taking a lot of time when the file is large. What is the fastest way to do this?


Solution

  • This code should run very fast (split 1M characters in 0.2sec):

    %generate random file
    % w=[10,13,32*ones(1,10),97:122,97:122];
    % FILE_LENGTH=10*1000*1000;mytext=char(w(randi(length(w),1,FILE_LENGTH))); 
    % fileID = fopen('z:\mytest.asc','w');fprintf(fileID,'%s',mytext);fclose(fileID);
    clear
    tic
    %settings
    Filename='z:\test.asc';
    LineDelimiter=newline;%=char(10)
    WordDelimiter=' ';
    
    %read file
    fid=fopen(Filename,'r');
    text=fread(fid,'*char')';
    fclose(fid);
    
    %fix text
    text(text==char(9))=WordDelimiter; %replace tab with space
    text(text==char(13))=[];%remove '\r'
    if text(end)~=LineDelimiter, text(end+1)=LineDelimiter;end %add eol if needed
    IdxWords=find(text==WordDelimiter);
    text(IdxWords(diff(IdxWords)==1))=[];% remove 2 spaces or more
    
    %count words per line
    IdxNewline=find(text==LineDelimiter);
    NumOfLines=length(IdxNewline); %2eol=2lines
    WordsPerLine=zeros(1,NumOfLines); %
    IdxWords=find(text==WordDelimiter|text==LineDelimiter);
    iword=1; iword_max=length(IdxWords);
    for i=1:NumOfLines
        while iword<=iword_max && IdxWords(iword)<=IdxNewline(i)
            WordsPerLine(i)=WordsPerLine(i)+1;
            iword=iword+1;
        end
    end
    MaxWords=max(WordsPerLine);
    LongestWord=max(diff(IdxWords));
    
    %split
    Output=cell(NumOfLines,MaxWords);
    pos=1;iword=0;
    for i=1:NumOfLines
        idxline=IdxNewline(i);
        for j=1:WordsPerLine(i)
            iword=iword+1;
            Output{i,j}=text(pos:IdxWords(iword)-1);
            pos=IdxWords(iword)+1;
        end
    end
    toc
    
    % disp(Output)