I am reading one column of data out of a big CSV file (column 3). I want to know if there is a more efficient way of doing this. Right now i read each entire row, and then pick out my column.
String S;
TStringList *List = new TStringList;
List->LoadFromFile(myCSV); // load the CSV file
std::auto_ptr<TStringList>pListA(new TStringList);
for (int i = 0; i < 400; i++) { // for testing just getting first 400 rows.
pListA->Text = StringReplace(List[0].Strings[i], ",", "\r\n", TReplaceFlags() << rfReplaceAll);
S = pListA->Strings[3];
// do something with this value from column 3
}
This code works fine, but it has to read the entire row of CSV data just to let me get that one column. Below is sample of the first 2 rows from the CSV data:
1,0,-6856,12830,8458,-16666,98717,-10718,-80874,49999,-99998,98569,99998,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,8948,-6856,12830,8458,-16666,39641,63125,-99973,99998,-99998,39385,99998,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
UPDATE: Based on Remy's code I ended up with this below.
TStreamReader* Reader;
Reader = new TStreamReader(myCSV);
while (!Reader->EndOfStream)
{
String S = Reader->ReadLine();
int start = PosEx(_D(","), S, 1);
start = PosEx(_D(","), S, start+1) + 1;
int end = PosEx(_D(","), S, start);
S = S.SubString1(start, end-start);
// use S
}
Reader->Close();
delete Reader->BaseStream;
delete Reader;
Instead of using TStringList
at all, you could use TStreamReader
to read each line, and simply parse each line using Strutils::PosEx()
and String::SubString1()
to extract just the 3rd column value.
// NOTE: in C++11 and later, use std::unique_ptr instead!
std::auto_ptr<TStreamReader> Reader(new TStreamReader(myCSV)); // load the CSV file
while (!Reader->EndOfStream)
{
String S = Reader->ReadLine();
int start = PosEx(_D(","), S, 1);
start = PosEx(_D(","), S, start+1) + 1;
int end = PosEx(_D(","), S, start);
S = S.SubString1(start, end-start);
// do something with this value from column 3
}