Search code examples
c#csvcsvhelper

Reading csv file that is organized differently in 2 different places


My csv file looks like

Metals:,E10
Al,0.1906
Ca,0.1132
Co,0.01951
Cu,0.5824
Cu,0.02383
Fe,0.03828
K,0.09577
Li,0.03024
Mg,0.007145
Na,0.1833
Ni,0.3236
Pb,0.0005787
Ti,0.4931
Tl,0.001887
Zn,0.07644

GLot,id,Slot,Scribe,Diameter,MPD,SResistivity,SThickness,TTV,LTV,Warp,Bow,S_U_A,Ep,Epi_L,Epi_Layer,Epi_Layer_2,EThick,E2thick,E2Dope,E2DopeT,E2DopeMax,E2DopeMin
31075046-001,XFB-LE00674.CP10023+001-12,1,22C1285,149.98,0,0.0217,334.71,1.91,1.03,5.35,-0.91,99.590582,1.0,1.0E18,9.8,1.12,9.9,9.6,9926193600000000,4.5574,10834500800000000,9551876800000000

My code looks like this:

namespace CsvHelperTest
{
    class CsvHelperTester
    {
        static void Main(string[] args)
        {
            var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
            {
                HasHeaderRecord = false,
                HeaderValidated = null,
                IgnoreBlankLines = true,
                MissingFieldFound = null,
                AllowComments = true,
                Comment = ';',
                Delimiter = ",",
                TrimOptions = TrimOptions.Trim, 
                PrepareHeaderForMatch = header => Regex.Replace(header.Header, ",", "\n"),
            };

            using (var streamReader = new StreamReader("C:\\Users\\eyoung\\Desktop\\parse test files\\XFB-1C2002A_62152_CoA.csv"))
            {
                using (var csvReader = new CsvReader(streamReader, csvConfig))
                {
                    for (var i = 0; i < 1; i++)
                    {
                        csvReader.Read();
                    }

                    var records = csvReader.GetRecords<EpiDataNames>().ToList();

                    var table = records[0];

                    records.RemoveAt(0);

                    var columns = records;

                    using (var writer = new CsvWriter(Console.Out, CultureInfo.InvariantCulture))
                    {
                        //writer.WriteField(records[0].Type);
                        //writer.NextRecord();

                        //records.RemoveAt(0);
                        //foreach (var item in records.Select(r => r.Type))
                        //{
                        //    writer.WriteField(item);
                        //}
                        //writer.NextRecord();
                        //foreach (var item in records.Select(r => r.Value))
                        //{
                        //    writer.WriteField(item);
                        //}
                        //writer.NextRecord(); 
                    }
                }
            }
        }

        public class EpiDataNames
        {
            [Index(0)]
            public string Type { get; set; }
            [Index(1)]
            public string Value { get; set; }
        }
    }
}

This is great as it takes the first set of data and makes them into 2 columns, 'Type' and 'Value', however, the issue arises when the 2nd set of data is shown, is there a way I can only read the first block of data? When I try to leave out those last headers, it acts weird and removes the first block of data.

for (var i = 0; i < 1; i++)
{
    csvReader.Read(); //this skips the first line of data
}

for (var i = 0; i > 18; i++)
{
    csvReader.Read(); //I thought this would skip the last lines of data, but it doesn't.
}

The issue with the second block of headers is that the headers are reading like

Type Value
GLot Id

when it should be,

Type Value
Glot 31075046-001

Any ideas? I'm pretty lost on this one, I should also preface, I have no control over editing this csv file beforehand.


Solution

  • If you only want to read the first block of data, your code can be pretty simple.

    var config = new CsvConfiguration(CultureInfo.InvariantCulture)
    {
        HasHeaderRecord = false
    };
    
    using (var reader = new StreamReader("C:\\Users\\eyoung\\Desktop\\parse test files\\XFB-1C2002A_62152_CoA.csv"))
    using (var csv = new CsvReader(reader, config))
    {
        var records = new List<EpiDataNames>();
        
        while(csv.Read() && csv.GetField(0) != "GLot" && csv.GetField(1) != "id")
        {
            records.Add(csv.GetRecord<EpiDataNames>());
        }
        
        records.Dump();
    }
    
    

    You also have the option of setting IgnoreBlankLines = false and then use the blank line as your break point. For both solutions, after you read the first block of data, you could read the second block.

    The only potential issue I see with reading the second block is if you leave the default of HasHeaderRecord = true, you lose that first line of data Metals:,E10 as a header, but if you set it to false you can't read the header for the second block of data.

    void Main()
    {
        var data = @"Metals:,E10
    Al,0.1906
    Ca,0.1132
    Co,0.01951
    Cu,0.5824
    
    GLot,id,Slot,Scribe
    31075046-001,XFB-LE00674.CP10023+001-12,1,22C1285";
    
        var config = new CsvConfiguration(CultureInfo.InvariantCulture)
        {
            IgnoreBlankLines = false
        };
    
        using (var reader = new StringReader(data))
        using (var csv = new CsvReader(reader, config))
        {
            var records = new List<EpiDataNames>();
    
            while (csv.Read() && !csv.Parser.Record.All(r => r == string.Empty))
            {
                records.Add(csv.GetRecord<EpiDataNames>());
            }
            
            csv.Read(); // Remove this line if using the first solution
            csv.ReadHeader();
            
            var otherRecords = csv.GetRecords<MyOtherClass>().ToList();
    
            records.Dump();
            otherRecords.Dump();
        }
    }
    
    public class EpiDataNames
    {
        [Index(0)]
        public string Type { get; set; }
        [Index(1)]
        public string Value { get; set; }
    }
    
    public class MyOtherClass
    {
        public string GLot { get; set; }
        [Name("id")]
        public string Id { get; set; }
        public string Slot { get; set; }
        public string Scribe { get; set; }
    }