I have to consume some xlsx
documents. I've read Reading a date from xlsx using open xml sdk and http://www.dotnetperls.com/fromoadate. Most of my columns are texts (shared strings), but there are some numbers (integer numbers), and I have also some dates and date-times.
I'm using OpenXML SDK 2.5.
My problem is that I don't know how to distinguish the actual numbers from the dates. Both of them has DataType
of null
, and the textual number representation is in the Text
property of the cell.
Some code:
using (var xlsxStream = assembly.GetManifestResourceStream("Checklist.xlsx"))
using (var spreadsheetDocument = SpreadsheetDocument.Open(xlsxStream, false))
{
var workbookPart = spreadsheetDocument.WorkbookPart;
var sharedStringTable = workbookPart.SharedStringTablePart.SharedStringTable;
var worksheetPart = workbookPart.WorksheetParts.First();
var sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
string text;
foreach (Row r in sheetData.Elements<Row>())
{
foreach (Cell c in r.Elements<Cell>())
{
if (c.CellValue != null)
{
text = c.CellValue.Text;
if (c.DataType != null)
{
if (c.DataType.Value == CellValues.SharedString)
{
int tableIndex = int.Parse(text);
text = sharedStringTable.ChildElements[tableIndex].InnerText;
}
// note: the date cells do not have c.DataType.Value == CellValues.Date
// Their c.DataType is null, if they are OLE Automation date numbers
}
// So here I am, and I'd need to know if the number supposed to be an
// OLE Automation date or a number, so I can transform it if needed.
//if (it's a date) // <- ?????
//{
// double dateDouble = double.Parse(text);
// DateTime dateTime = DateTime.FromOADate(dateDouble);
// text = dateTime.ToShortDateString();
//}
Console.Write(text + " ");
}
else
{
Console.Write("NULL" + " ");
}
}
Console.WriteLine();
}
Console.WriteLine();
Console.ReadKey();
I just came across similar issue and it is not easy to check whether a cell contains date/time value, see Using cell format to determine a cell contains date/time value, but the issue doesn't end with built-in number formats, I needed to handle custom formats too. There are no utilities in OpenXML SDK 2.5 to help, so I had to write my own (doesn't support Thai date/time formats).
public class ExcelHelper
{
static uint[] builtInDateTimeNumberFormatIDs = new uint[] { 14, 15, 16, 17, 18, 19, 20, 21, 22, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58 };
static Dictionary<uint, NumberingFormat> builtInDateTimeNumberFormats = builtInDateTimeNumberFormatIDs.ToDictionary(id => id, id => new NumberingFormat { NumberFormatId = id });
static Regex dateTimeFormatRegex = new Regex(@"((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*)", RegexOptions.Compiled);
public static Dictionary<uint, NumberingFormat> GetDateTimeCellFormats(WorkbookPart workbookPart)
{
var dateNumberFormats = workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats
.Descendants<NumberingFormat>()
.Where(nf => dateTimeFormatRegex.Match(nf.FormatCode.Value).Success)
.ToDictionary(nf => nf.NumberFormatId.Value);
var cellFormats = workbookPart.WorkbookStylesPart.Stylesheet.CellFormats
.Descendants<CellFormat>();
var dateCellFormats = new Dictionary<uint, NumberingFormat>();
uint styleIndex = 0;
foreach (var cellFormat in cellFormats)
{
if (cellFormat.ApplyNumberFormat != null && cellFormat.ApplyNumberFormat.Value)
{
if (dateNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
{
dateCellFormats.Add(styleIndex, dateNumberFormats[cellFormat.NumberFormatId.Value]);
}
else if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
{
dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
}
}
styleIndex++;
}
return dateCellFormats;
}
// Usage Example
public static bool IsDateTimeCell(WorkbookPart workbookPart, Cell cell)
{
if (cell.StyleIndex == null)
return false;
var dateTimeCellFormats = ExcelHelper.GetDateTimeCellFormats(workbookPart);
return dateTimeCellFormats.ContainsKey(cell.StyleIndex);
}
}