I have the following piece of code in a much larger OpenXML Excel reader. This reader grabs the information assigns to a dataset and then is displayed in a datagridview:
public static DataTable ExtractExcelSheetValuesToDataTable(string xlsxFilePath, string sheetName, int startingRow) {
DataTable dt = new DataTable();
using (SpreadsheetDocument myWorkbook = SpreadsheetDocument.Open(xlsxFilePath, true)) {
//Access the main Workbook part, which contains data
WorkbookPart workbookPart = myWorkbook.WorkbookPart;
WorksheetPart worksheetPart = null;
if (!string.IsNullOrEmpty(sheetName)) {
Sheet ss = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName).SingleOrDefault<Sheet>();
worksheetPart = (WorksheetPart)workbookPart.GetPartById(ss.Id);
} else {
worksheetPart = workbookPart.WorksheetParts.FirstOrDefault();
}
SharedStringTablePart stringTablePart = workbookPart.SharedStringTablePart;
if (worksheetPart != null) {
Row lastRow = worksheetPart.Worksheet.Descendants<Row>().LastOrDefault();
#region ColumnCreation
//Returns the columns - come back to this later - may be able to modify this to have
//A checkbox "Column names in first row"
Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
int columnInt = 0;
//if (firstRow != null)
//{
foreach (Cell c in firstRow.ChildElements)
{
string value = GetValue(c, stringTablePart);
dt.Columns.Add(columnInt + ": " + value);
columnInt++;
}
//}
#endregion
#region Create Rows
//if (lastRow != null)
//{
//lastRow.RowIndex;
for (int i = startingRow; i <= 150000; i++)
{
DataRow dr = dt.NewRow();
bool empty = true;
Row row = worksheetPart.Worksheet.Descendants<Row>().Where(r => i == r.RowIndex).FirstOrDefault();
int j = 0;
if (row != null)
{
foreach (Cell c in row.ChildElements)
{
//Get cell value
string value = GetValue(c, stringTablePart);
if (!string.IsNullOrEmpty(value) && value != "")
empty = false;
dr[j] = value;
j++;
if (j == dt.Columns.Count)
break;
}
if (empty)
break;
dt.Rows.Add(dr);
}
}
}
#endregion
}
// }
return dt;
}
public static string GetValue(Cell cell, SharedStringTablePart stringTablePart) {
if (cell.ChildElements.Count == 0) return null;
//get cell value
string value = cell.ElementAt(0).InnerText;//CellValue.InnerText;
//Look up real value from shared string table
if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
return value;
}
public void GetSheetInfo(string fileName)
{
Sheets theSheets = null;
// Open file as read-only.
using (SpreadsheetDocument mySpreadsheet = SpreadsheetDocument.Open(fileName, false))
{
S sheets = mySpreadsheet.WorkbookPart.Workbook.Sheets;
WorkbookPart wbPart = mySpreadsheet.WorkbookPart;
theSheets = wbPart.Workbook.Sheets;
foreach (Sheet item in theSheets)
{
cmbSheetSelect.Items.Add(item.Name);
}
}
}
This has worked for basic spreadsheets but as I try to read more advanced ones I get a problem or two.
Firstly, I have a worksheet that has 5 columns:see here
However when I run my program it only returns the first 4 columns and not column E and all its data.
My second question would is it possible using that code (or a variation of it) to be able to specify the line I want the program to read as the datagridview column heading?
In case anyone needs this I found that changing:
Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
To
Row firstRow = worksheetPart.Worksheet.Descendants<Row>().ElementAtOrDefault(columnIndex)
Worked. With columnIndex being a variable I can change based on the sheet selected.