I am coding in C# .NET Core 2.2. I am trying to replace Excel Interop with EPPlusCore in my code for reliability and for portablilty. I have a series of invoices I am opening with EPPlus, but some of them throw a "Null Reference" exception when accessing the Workbook property of the Excel package.
This only happens when running the code without debugging it. When debugging, if I hover over the ExcelPackage item, it refreshes the reference to the Workbook and I am able to run the rest of the code.
public object[,] GetExcelDataEpplus(string filePath, int index,
bool sheetByName = false, string name = null, string password = null)
{
var remoteFileInfo = new FileInfo(filePath);
if (!remoteFileInfo.Exists)
{
throw new Exception("File does not exist: " + filePath);
}
var currentPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "temp-excel");
if (!Directory.Exists(currentPath))
{
Directory.CreateDirectory(currentPath);
}
var localFileInfo = new FileInfo(Path.Combine(currentPath, remoteFileInfo.Name));
if (!localFileInfo.Exists)
{
File.Copy(filePath, localFileInfo.FullName);
}
object[,] values = null;
try
{
if (!File.Exists(localFileInfo.FullName))
{
_logger.LogInformation(DbLog, "Cannot find file : " + localFileInfo.FullName);
return null;
}
else
{
_logger.LogInformation(DbLog, "Found file : " + localFileInfo.FullName);
}
_logger.LogInformation(DbLog, "Initializing EPPlus...");
using (var package = string.IsNullOrEmpty(password)
? new ExcelPackage(localFileInfo)
: new ExcelPackage(localFileInfo, password))
{
_logger.LogInformation(DbLog, "Opening Workbook...");
//todo Error Thrown Here
try
{
package.Workbook.FormulaParserManager.LoadFunctionModule(new ImporterFunctionModule());
}
catch (Exception e)
{
_logger.LogWarning(DbLog, e, $"Could not load workbook : Loading file again...");
try
{
System.Threading.Thread.Sleep(1000);
package.Workbook.FormulaParserManager.LoadFunctionModule(new ImporterFunctionModule());
}
catch (Exception ex)
{
_logger.LogError(DbLog, ex, "Could not load workbook");
throw;
}
}
var workbook = package.Workbook;
_logger.LogInformation(DbLog, $"Calculating formulas...");
workbook.Calculate();
_logger.LogInformation(DbLog, "Finding Worksheet...");
var worksheet = sheetByName ? workbook.Worksheets.FirstOrDefault(x => x.Name == name) : workbook.Worksheets[index];
if (worksheet == null)
{
throw new Exception($"Could not find worksheet : {name}");
}
_logger.LogInformation(DbLog, $"Reading from worksheet : {worksheet.Name}...");
var start = worksheet.Dimension.Start;
var end = worksheet.Dimension.End;
values = worksheet.Cells[start.Row, start.Column, end.Row, end.Column].ToMultiDimensionalArray();
}
}
catch (Exception e)
{
_logger.LogError(DbLog, e, $"GetExcelInvoiceDataEpplus from {filePath} ({localFileInfo.FullName})"); //todo propogate error
}
finally
{
File.Delete(localFileInfo.FullName);
}
var rowCount = values?.GetLength(0) ?? 0;
_logger.LogInformation(DbLog, $"EPPLus found {rowCount} rows in the spreadsheet");
return values;
}
On most files, this works correctly, and I get a multidimensional array of the values from the specified worksheet tab. However, on some files, it does not work and I am at a loss as to why.
The closest similar problem I've been able to find is this: https://github.com/JanKallman/EPPlus/issues/416
But if this is accurate, how would I know what worksheet names have bad references without accessing the workbook first?
I found the solution. The excel files that were having problems were ones that were larger than average.
The solution was just to wait longer for them.
try
{
package.Workbook.FormulaParserManager.LoadFunctionModule(new ImporterFunctionModule());
}
catch (Exception e)
{
for (var i=1; i < 6; i++)
{
_logger.LogWarning(DbLog, e, $"Could not load workbook : Loading file again (Attempt #{i})...");
try
{
System.Threading.Thread.Sleep(2000);
package.Workbook.FormulaParserManager.LoadFunctionModule(new ImporterFunctionModule());
break;
}
catch (Exception ex)
{
if (i < 5) continue;
_logger.LogError(DbLog, ex, "Could not load workbook after 5 attempts");
throw;
}
}
}