b^3
New member
- Joined
- Apr 6, 2020
- Messages
- 4
- Programming Experience
- 10+
New to C# and my first project is for reading Excel (.xlsx) files.
I am reading every cell from a spreadsheet and then formatting to output as a CSV file. Currently, I am just writing to STDOUT.
My first attempt just pulled all cell entries and processed .ToString(). This, understandably, returns formulas, when, what I really want is the value created by the formula.
Therefore, I put additional code to test whether the entry was a value type and then processed through the .ToString() as mentioned above, otherwise used .NumericCellValue.ToString() instead.
However, once this check was added, the processing time increased by 8x (e.g. <2min to ~15min). Is that a reasonable expectation or can this be made more efficient?
Here is my code with comments:
Any feedback is greatly appreciated.
I am reading every cell from a spreadsheet and then formatting to output as a CSV file. Currently, I am just writing to STDOUT.
My first attempt just pulled all cell entries and processed .ToString(). This, understandably, returns formulas, when, what I really want is the value created by the formula.
Therefore, I put additional code to test whether the entry was a value type and then processed through the .ToString() as mentioned above, otherwise used .NumericCellValue.ToString() instead.
However, once this check was added, the processing time increased by 8x (e.g. <2min to ~15min). Is that a reasonable expectation or can this be made more efficient?
Here is my code with comments:
C#:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using NPOI.XSSF.UserModel;
using NPOI.HSSF.UserModel;
using NPOI.SS.UserModel;
using System.IO;
namespace ExcelReader
{
class ReadDataNPOI
{
IWorkbook Book;
ISheet DesiredSheet;
const string Delimiter = ",";
DateTime StartTime = DateTime.Now;
StringBuilder RowContent = new StringBuilder(String.Empty);
int LNoRow = 8, LNoCol = 1, CustDRow = 7, CustDCol = 1;
string LNo, CustD, IntD = "DRDZ-050-120-C_HEC", GlobalD = "GlobalD62941.4Z"; //Need to get the IntD and GlobalD from MES.
string Filename = "";
string LInfo = "";
public ReadDataNPOI(string path, int Sheet)
{
try
{
FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
Filename = path;
try
{
Book = new XSSFWorkbook(fs);
}
catch
{
Book = null;
}
// If reading fails, try to read workbook as XLS:
if (Book == null)
{
Book = new HSSFWorkbook(fs);
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message, "Excel read error");
return;
}
try
{
DesiredSheet = Book.GetSheetAt(Sheet);
}
catch (Exception ex) {
Console.WriteLine(ex.Message, "ERROR: Sheet read error or not available.");
return;
}
}
public string ReadWorkbook(int i, int j)
{
//Get L number from spreadsheet.
if (null != DesiredSheet.GetRow(LNoRow).GetCell(LNoCol) && !string.IsNullOrEmpty(DesiredSheet.GetRow(LNoRow).GetCell(LNoCol).ToString()))
{
LNo = DesiredSheet.GetRow(LNoRow).GetCell(LNoCol).ToString();
}
else
{
if (null != DesiredSheet.GetRow(LNoRow - 1).GetCell(LNoCol) && !string.IsNullOrEmpty(DesiredSheet.GetRow(LNoRow - 1).GetCell(LNoCol).ToString()))
LNo = DesiredSheet.GetRow(LNoRow - 1).GetCell(LNoCol).ToString();
}
if (LNo == null)
{
Console.WriteLine("No L number recorded in file:" + Filename);
return "";
}
//Get CustD from spreadsheet.
//This checks to see if the L Number is in its proper cell; if so that implies the CustD will also
//be in its proper cell. This will hopefully handle the older files and the newer files with L number location correction.
if (null != DesiredSheet.GetRow(LNoRow).GetCell(LNoCol) && !string.IsNullOrEmpty(DesiredSheet.GetRow(LNoRow).GetCell(LNoCol).ToString()))
{
CustD = DesiredSheet.GetRow(CustDRow).GetCell(CustDCol).ToString();
}
else
{
CustD = "";
}
//Console.WriteLine(LNo + Delimiter + IntD + Delimiter + GlobalD + Delimiter + CustD + System.Environment.NewLine);
LInfo = LNo + Delimiter + IntD + Delimiter + GlobalD + Delimiter + CustD + Delimiter;
for (int Row=30; Row <= DesiredSheet.LastRowNum; Row++)
// NOTE: This is hard coded for now. Will later be defined in a global variable.
// This is where the main data section begins in all workbooks to be processed.
{
RowContent.Append(LInfo);
if (DesiredSheet.GetRow(Row) != null)
{
for (int Col = 0; Col <= DesiredSheet.GetRow(Row).LastCellNum; Col++)
{
if (null != DesiredSheet.GetRow(Row).GetCell(Col) && !string.IsNullOrEmpty(DesiredSheet.GetRow(Row).GetCell(Col).ToString()))
{
CurrentCell = DesiredSheet.GetRow(Row).GetCell(Col);
// Before checking for IsValueType
// RowContent.Append(CurrentCell.NumericCellValue.ToString());
// This is now the exception to IsValueType.
if (CurrentCell.GetType().IsValueType)
{
RowContent.Append(CurrentCell.ToString());
}
else
{
RowContent.Append(CurrentCell.NumericCellValue.ToString());
}
if (Col < DesiredSheet.GetRow(Row).LastCellNum-1)
{
RowContent.Append(Delimiter);
}
//The next else was used to place all data in one long string. Took too long.
//else
//{
// Console.WriteLine(DesiredSheet.GetRow(Row).LastCellNum);
// RowContent.Append(System.Environment.NewLine);
//}
}
}
Console.WriteLine(RowContent);
RowContent.Clear();
}
}
Book.Close();
Console.WriteLine("Start: " + StartTime);
DateTime EndTime = DateTime.Now;
Console.WriteLine("End: " + EndTime);
return String.Empty;
}
}
}
Any feedback is greatly appreciated.
Last edited: