Search code examples
c#excelopenxml-sdk

Reading Excel files using OpenXML


I am trying to read Excel files via OpenXML and looking to output into a CSV. Currently it reads each cell in a separate line (due to writeline) or a single line (when using write). What is the best way to read and outputing in a tabular format like in Excel? Is there a built in feature in OpenXML which I can leverage for this?

static void Main(string[] args)
{
        String xlDocName = @"C:\Users\xlp111\source.xlsx";

        using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(xlDocName, false))
        {
            WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
            string cellValue = string.Empty;
            foreach(WorksheetPart worksheetPart in workbookPart.WorksheetParts)
            {
                OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);

                while (reader.Read())
                {
                if (reader.ElementType == typeof(Row))
                {
                    reader.ReadFirstChild();

                    do
                    {
                    if (reader.ElementType == typeof(Cell))
                    {
                        Cell c = (Cell)reader.LoadCurrentElement();

                        if (c.DataType != null && c.DataType == CellValues.SharedString)
                        {
                        SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
                        cellValue = ssi.Text.Text;
                        Console.WriteLine(cellValue);
                        }
                    }
                    }                      
                    while (reader.ReadNextSibling());
                }
                }
            }
            Console.ReadLine();
        }
    }
}

Solution

  • Edit:

    Using Open XML SDK for Microsoft Office

    install V2 from :https://www.microsoft.com/en-eg/download/details.aspx?id=5124&wa=wsignin1.0

    (or V2.5)

    The following class convert excel sheet to CSV file with delimeter

     //reference library
     using DocumentFormat.OpenXml.Packaging;
     using DocumentFormat.OpenXml.Spreadsheet;
    
    
    public class OpenXmlExcel
    {
    public void ExcelToCsv(string source, string target, string delimiter = ";", bool firstRowIsHeade = true)
    {
        var dt = ReadExcelSheet(source, firstRowIsHeade);
        DatatableToCsv(dt, target, delimiter);
    
    }
    
    private void DatatableToCsv(DataTable dt, string fname, string delimiter = ";")
    {
    
        using (StreamWriter writer = new StreamWriter(fname))
        {
            foreach (DataRow row in dt.AsEnumerable())
            {
                writer.WriteLine(string.Join(delimiter, row.ItemArray.Select(x => x.ToString())) + delimiter);
            }
        }
    
    }
    
    List<string> Headers = new List<string>();
    
    
    private DataTable ReadExcelSheet(string fname, bool firstRowIsHeade)
    {
    
        DataTable dt = new DataTable();
        using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fname, false))
        {
            //Read the first Sheets 
            Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet>();
            Worksheet worksheet = (doc.WorkbookPart.GetPartById(sheet.Id.Value) as WorksheetPart).Worksheet;
            IEnumerable<Row> rows = worksheet.GetFirstChild<SheetData>().Descendants<Row>();
    
            foreach (Row row in rows)
            {
                //Read the first row as header
                if (row.RowIndex.Value == 1)
                {
                    var j = 1;
                    foreach (Cell cell in row.Descendants<Cell>())
                    {
                        var colunmName = firstRowIsHeade ? GetCellValue(doc, cell) : "Field" + j++;
                        Console.WriteLine(colunmName);
                        Headers.Add(colunmName);
                        dt.Columns.Add(colunmName);
                    }
                }
                else
                {
                    dt.Rows.Add();
                    int i = 0;
                    foreach (Cell cell in row.Descendants<Cell>())
                    {
                        dt.Rows[dt.Rows.Count - 1][i] = GetCellValue(doc, cell);
                        i++;
                    }
                }
            }
    
        }
        return dt;
    }
    
    private string GetCellValue(SpreadsheetDocument doc, Cell cell)
    {
        string value = cell.CellValue.InnerText;
        if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
        {
            return doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem(int.Parse(value)).InnerText;
        }
        return value;
     }
    }
    

    How to use:

    new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv",";",true);
    or
    //use default: separator=";" ,first row is header
    new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv");