I need to parse a specific lines in a text file in which to begin by a specific words as in the following image :
I need to parse the lines that begins by "Level" only and extract the value of "Row" and "Col" only. Note that the text file will contain 6 groups of such data where each group starts by
---------------- chromosome : # ------------------
See the following sample : Sample
I need to save each group's row and columns in a separated list for each!!. Is there any way to perform this ?
I tried the following method :
public List<int[,]> getValuesFromTextFile(String filePath ) {
IEnumerable<string> allLines = Enumerable.Empty<string>();
List<int[,]> path = new List<int[,]>();
int[,] item = new int[2,1];
if (File.Exists(filePath))
{
//Read all content of the files and store it to the list split with new line
allLines = File.ReadLines(filePath);
}
//all Level lines
IEnumerable<string> levelLines = allLines.Where(d => d.StartsWith("Level", StringComparison.CurrentCultureIgnoreCase));
foreach(string line in levelLines)
{
string[] values= line.Split(':');//either space or tab or others as your file contain seperator
for(int i=1; i <values.Length;i++) {
string value = values[i];// skip index 0,it contains label, remaining are point data
if (i == 3) item[1,0] = Int32.Parse(value);
if (i == 5 && item[1,0] != null ) { item[0,0] = Int32.Parse(value);
path.Add(item);
}
}
}
return path;
}
I got the following error at line( if (i == 3) item[1,0] = Int32.Parse(value);
) :
Input string was not in a correct format.
When I put a break point at this line I saw that the value of string "value" equal to null!!.
When I added a break point to see all lines list contents, I got as in the following image :
The above method needs to parse each group of levels separately!!.
You can use a named group Regex to parse the lines and build out a list of records.
void Main()
{
var file = File.ReadLines(@"C:\TreeParser.txt");
var groupRegex = new Regex(@"Chromosome : (?<Chromosome>[0-9])");
var recordRegex = new Regex(@"Level : '(?<Level>[0-9])', Row : '(?<Row>[0-9])', Col : '(?<Col>[0-9])'");
var groups = new List<Group>();
foreach (var line in file)
{
var groupMatch = groupRegex.Match(line);
if (groupMatch.Success)
{
groups.Add(new Group
{
Chromosome = int.Parse(groupMatch.Groups["Chromosome"].Value),
Records = new List<Record>()
});
}
var recordMatch = recordRegex.Match(line);
if (!recordMatch.Success)
{
// No match was found
continue;
}
var level = new Record
{
Level = int.Parse(recordMatch.Groups["Level"].Value),
Row = int.Parse(recordMatch.Groups["Row"].Value),
Col = int.Parse(recordMatch.Groups["Col"].Value)
};
groups.Last().Records.Add(level);
}
// groups now contains a list of each section from the file with a list of records
}
public class Record
{
public int Level { get; set; }
public int Row { get; set; }
public int Col { get; set; }
}
public class Group
{
public int Chromosome { get; set; }
public List<Record> Records { get; set; }
}