Search code examples
c#csvdictionarycsvhelper

Mapping data to a list when I have the position index


(I'm using CSVHelper package)

Hi,

I have an array of sbyte[] that holds the position of each header column in a CSV file. The array is defined as follows,

public sbyte[] ColumnIndex = new sbyte[Enum.GetNames(typeof(MyEnum)).Length];

and say I have a CSV file that looks like this:

col1,col2,col3,col4
name1,empId1,241682-27638-USD-CIGGNT ,1
name2,empId2,241682-27638-USD-OCGGINT ,1
name3,empId3,241942-37190-USD-GGDIV ,2
name4,empId4,241942-37190-USD-CHYOF ,1
name5,empId5,241942-37190-USD-EQPL ,1
name6,empId6,241942-37190-USD-INT ,1
name7,empId7,242066-15343-USD-CYJOF ,3
name8,empId8,242066-15343-USD-CYJOF ,3
name9,empId9,242066-15343-USD-CYJOF ,3
name10,empId10,241942-37190-USD-GGDIV ,2

Now calling the byte array and passing the Enum index would give me back where that position in the header row:

int conversion = (int)MyEnum.col3;

ColumnIndex[conversion]);

returns 2

Now this all works fine, but I seem to struggle to figure out how to map each column with its info to separate List<string> based off the position index I have.

I tried to put all fields in a single List<string> using this code:

 public List<string> ParseEntire(aliasType type, string PathToFile) {

            List<string> result = new List<string>();
            using (TextReader fileReader = File.OpenText(PathToFile)) {
                var csv = new CsvReader(fileReader, CultureInfo.InvariantCulture);

                string value;
                while (csv.Read()) {
                    for (int i = 0; csv.TryGetField<string>(i, out value); i++) {

                        result.Add(value);
                    }
                }
            }
            return result;
        }

However, that's useless because I won't be able to know the position of every header. I feel like there is an easier way to do it using the CSVHelper package and I'm just complicating a simple task. Any Help would be appreciated.

EDIT: For the following Emun

  public enum aliasType {
            col5,
            col4,
            col3,
            col2,
            col1
        }

After executing the code to map the position indices to ColumnIndex, it would look like this:

ColumnIndex  {sbyte[4]}
[0]             [-1]
[1]             [3] 
[2]             [2]
[3]             [1]
[4]             [0]

I'm mainly doing this because I do not know what the header row contains. So, I extract as much info as I can and when I get back an index of -1, I know that that specific field doesn't exist.

UPDATE:

the following code uses the Intersect to compare against different aliases for the same header column I'm trying to extract.

 public List<string> HeaderColumnParser(aliasType type, string PathToFile) {

            List<string> result = new List<string>();
            using (TextReader fileReader = File.OpenText(PathToFile)) {
                var csv = new CsvReader(fileReader, CultureInfo.InvariantCulture);
                CSVBOM extract = new CSVBOM("", CSVBOM.BOMFileType.csv);

                csv.Read();
                csv.ReadHeader();
                string[] header = csv.Context.HeaderRecord;
                IEnumerable<string> CommonHeaders;

                foreach (aliasType foo in Enum.GetValues(typeof(aliasType))) {
                    int res = Convert.ToInt32(foo);

                    switch (res) {
                        case 0:
                            // get matching string
                            CommonHeaders = header.Intersect(ReferenceDesignatorAliases);
                            ColumnIndex[res] = extract.ExtractIndexHeader(CommonHeaders, header);
                            break;
                        case 1:
                            CommonHeaders = header.Intersect(ManufacturersPartNumberAliases);
                            ColumnIndex[res] = extract.ExtractIndexHeader(CommonHeaders, header);
                            break;
                        case 2:
                            CommonHeaders = header.Intersect(ValueAliases);
                            ColumnIndex[res] = extract.ExtractIndexHeader(CommonHeaders, header);
                            break;
                        case 3:
                            CommonHeaders = header.Intersect(DescriptionShortAliases);
                            ColumnIndex[res] = extract.ExtractIndexHeader(CommonHeaders, header);
                            break;
                        case 4:
                            CommonHeaders = header.Intersect(DescriptionLongAliases);
                            ColumnIndex[res] = extract.ExtractIndexHeader(CommonHeaders, header);
                            break;
                        case 5:
                            CommonHeaders = header.Intersect(ManufacturerAliases);
                            ColumnIndex[res] = extract.ExtractIndexHeader(CommonHeaders, header);
                            break;
                        case 6:
                            CommonHeaders = header.Intersect(DNIAliases);
                            ColumnIndex[res] = extract.ExtractIndexHeader(CommonHeaders, header);
                            break;
                        case 7:
                            CommonHeaders = header.Intersect(DataSheetAliases);
                            ColumnIndex[res] = extract.ExtractIndexHeader(CommonHeaders, header);
                            break;
                        default:
                            throw new Exception("Alias type is not recognized");
                    }
                }

            }
            return result;
        }

My Enum and array of strings for the aliases:

   public enum aliasType {
            ReferenceDesignatorAliases,
            ManufacturersPartNumberAliases,
            ValueAliases,
            DescriptionShortAliases,
            DescriptionLongAliases,
            ManufacturerAliases,
            DNIAliases,
            DataSheetAliases
        }

        //Returns -1 meaning not found
        public sbyte[] ColumnIndex = new sbyte[Enum.GetNames(typeof(aliasType)).Length];

        public string[] ReferenceDesignatorAliases = { "Reference Designator", "RefDes", "Designator", "Annotation" };
        public string[] ManufacturersPartNumberAliases = { "Manufacturer's Part Number", "MPN", "PN", "part Number" };
        public string[] ValueAliases = { "Value" };
        public string[] DescriptionShortAliases = { "Description Short", "Description" };
        public string[] DescriptionLongAliases = { "Description Long" };
        public string[] ManufacturerAliases = { "Manufacturer", "MF" };
        public string[] DNIAliases = { "DNI", "Do Not Install" };
        public string[] DataSheetAliases = { "DataSheet", "Data Sheet" };

Solution

  • I believe I have a little better idea of what you are trying to accomplish. Let me know if this solves your issue.

    public static void Main(string[] args)
    {
        using (MemoryStream stream = new MemoryStream())
        using (StreamWriter writer = new StreamWriter(stream))
        using (StreamReader reader = new StreamReader(stream))
        using (CsvReader csv = new CsvReader(reader, CultureInfo.InvariantCulture))
        {
            writer.WriteLine("MF,RefDes,MPN,Value");
            writer.WriteLine("name1,empId1,241682-27638-USD-CIGGNT ,1");
            writer.WriteLine("name2,empId2,241682-27638-USD-OCGGINT ,1");
            writer.WriteLine("name3,empId3,241942-37190-USD-GGDIV ,2");
            writer.WriteLine("name4,empId4,241942-37190-USD-CHYOF ,1");
            writer.Flush();
            stream.Position = 0;
    
            string[] ReferenceDesignatorAliases = { "Reference Designator", "RefDes", "Designator", "Annotation" };        
    
            csv.Read();
            csv.ReadHeader();
    
            var result = new List<string>();
    
            if (csv.Context.HeaderRecord.Intersect(ReferenceDesignatorAliases).Count() > 0)
            {
                while (csv.Read())
                {
                    if (csv.TryGetField(csv.GetFieldIndex(ReferenceDesignatorAliases), out string value))
                    {
                        result.Add(value);
                    }
                }
            }
        }
    
        Console.ReadKey();
    }
    

    Here is another option that gets all of the columns at once and then you can split them up into the individual column lists.

    public class Program
    {
        public static void Main(string[] args)
        {
            List<Foo> records;
    
            using (MemoryStream stream = new MemoryStream())
            using (StreamWriter writer = new StreamWriter(stream))
            using (StreamReader reader = new StreamReader(stream))
            using (CsvReader csv = new CsvReader(reader, CultureInfo.InvariantCulture))
            {
                writer.WriteLine("MF,RefDes,MPN,Value");
                writer.WriteLine("name1,empId1,241682-27638-USD-CIGGNT ,1");
                writer.WriteLine("name2,empId2,241682-27638-USD-OCGGINT ,1");
                writer.WriteLine("name3,empId3,241942-37190-USD-GGDIV ,2");
                writer.WriteLine("name4,empId4,241942-37190-USD-CHYOF ,1");
                writer.Flush();
                stream.Position = 0;
    
                csv.Configuration.RegisterClassMap<FooClassMap>();
    
                records = csv.GetRecords<Foo>().ToList();                
            }
    
            if (!records.All(r => r.ReferenceDesignator == null))
            {
                var ReferenceResult = records.Select(r => r.ReferenceDesignator).ToList();
            }
    
            if (!records.All(r => r.Manufacturer == null))
            {
                var ManufacturerResult = records.Select(r => r.Manufacturer).ToList();
            }
    
            Console.ReadKey();
        }
    }
    
    public class Foo
    {
        public string ReferenceDesignator { get; set; }
        public string ManufacturersPartNumber { get; set; }
        public int? Value { get; set; }
        public string DescriptionShort { get; set; }
        public string DescriptionLong { get; set; }
        public string Manufacturer { get; set; }
        public string Dni { get; set; }
        public string DataSheet { get; set; }
    }
    
    public class FooClassMap : ClassMap<Foo>
    {
        public FooClassMap()
        {
            Map(m => m.ReferenceDesignator).Optional().Name("Reference Designator", "RefDes", "Designator", "Annotation");
            Map(m => m.ManufacturersPartNumber).Optional().Name("Manufacturer's Part Number", "MPN", "PN", "part Number");
            Map(m => m.Value).Optional();
            Map(m => m.DescriptionShort).Optional().Name("Description Short", "Description");
            Map(m => m.DescriptionLong).Optional().Name("Description Long");
            Map(m => m.Manufacturer).Optional().Name("Manufacturer", "MF");
            Map(m => m.Dni).Optional().Name("DNI", "Do Not Install");
            Map(m => m.DataSheet).Optional().Name("DataSheet", "Data Sheet");
        }
    }