Steve listens and quickly understands requirements. He delivers a superior product on time, on budget and beyond the ask. I definitely recommend Steve's work!
Friday, 02 September 2011 22:47 Last Updated on Monday, 23 July 2012 16:51
There are two ways to change cell values in an Excel spreadsheet: the wrong way and the fast way. The wrong way takes the form of a loop where each cell is individually accessed in a for loop. Each time you assign a value, your add-in makes a COM call into Excel. Calling COM object methods from C# or anything for that matter, is a slow process. It might be 10ms or so (just a guess) but if you were looping through 200k cells that becomes 2000 seconds of COM calls.
Exhibit 1: The slow way of doing things in Excel.
int i = 1;
foreach(Excel.Range cell in cells)
{
cell.Value2 = foo(i);
}
The fast way is to work with arrays and assign values to many cells at once in one COM call. Instead of assigning a value to each cell in a loop we can assign a values to an array then assign the value of the entire range to the array. In exhibit 2 we create a two dimensional array buffer[1 .. 200000, 1 .. 1] then calculate the values in the for loop. The reason this is faster is that the for loop is not calling Excel, it’s all .Net and once we’ve calculated all the values we call Excel once to set the values of the cells.
Exhibit 2: The fast way.
int i = 1;
int bufferSize = cells.Rows.Count;int[] lowerBounds = new int[] {1,1};
int[] lengths = new int[] {bufferSize, 1};
object[,] buffer = Array.CreateInstance(typeof(object), lengths, lowerBounds) as object[,];
for(i = 1; i <= bufferSize; i++)
{
buffer[i,1] = foo(i);
}
cells.Value2 = buffer;
Now the problem with this is that the buffer is potentially using a lot of memory (imagine if it were text values). Instead of reducing it to one call we can compromise and use a buffer so instead of doing one big write, we do 10 instead for instance. To make things easier we can wrap up the logic in a utility class shown in exhibit 3.
Exhibit 3: The ExcelColumnBuffer class.
/*
Copyright (c) 2012, Steve Powell
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Neither the name of the Excel Advisor nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL XLSPOT.COM BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
using System;
using System.Collections.Generic;
using System.Text;
using Excel = Microsoft.Office.Interop.Excel;
using System.Runtime.InteropServices;
namespace xlspot.Tools
{
public enum ExcelBufferMode
{
Write = 1, /* Doesn't affect unchanged cells during flush. */
Overwrite = 2 /* All cells in buffer will be null unless they are written to. */
}
public class ExcelColumnBuffer
{
#region Private Members
private int top = 0;
private int left = 0;
private int bufferSize = 0;
private object[,] buffer = null;
private ExcelBufferMode mode = ExcelBufferMode.Overwrite;
private Excel._Worksheet sheet = null;
#endregion
#region Constructors
///
/// Creates a buffer of a fixed size.
///
///
/// Defaults to Write - existing values won't be overwritten implicitely.
public ExcelColumnBuffer(int nColumnSize, ExcelBufferMode bufferMode = ExcelBufferMode.Write)
{
bufferSize = nColumnSize;
int[] lowerBounds = new int[] {1,1};
int[] lengths = new int[] {bufferSize,1};
buffer = Array.CreateInstance(typeof(object), lengths, lowerBounds) as object[,];
mode = bufferMode;
}
#endregion
#region Public Methods
public void SetSheet(Excel._Worksheet sh)
{
sheet = sh;
}
public object Read(int y, int x)
{
int oy = 1;
Excel.Range cells = null;
List<object> comObjs = new List<object>();
if (x != left || !Translate(y, out oy))
{
Flush();
top = y;
left = x;
try
{
comObjs.Add(cells = sheet.Cells);
comObjs.Add(cells = cells[top, left] as Excel.Range);
comObjs.Add(cells = cells.Resize[bufferSize]);
buffer = cells.Value as object[,];
oy = 1; // it's now the top of the buffer.
}
finally
{
foreach (object o in comObjs)
{
if (o != null)
Marshal.ReleaseComObject(o);
}
}
}
return buffer[oy,1];
}
public void Write(int y, int x, object value)
{
int oy;
if (x != left || !Translate(y, out oy))
{
if (mode == ExcelBufferMode.Write)
{
Read(y, x); // moves the buffer.
}
else if (mode == ExcelBufferMode.Overwrite)
{
Flush();
top = y;
left = x;
}
Translate(y, out oy);
}
buffer[oy,1] = value;
}
public void Flush()
{
if (left == 0)
return;
Excel.Range cells = null;
List<object> comObjs = new List<object>();
try
{
comObjs.Add(cells = sheet.Cells);
comObjs.Add(cells = cells[top, left] as Excel.Range);
comObjs.Add(cells = cells.Resize[bufferSize]);
cells.Value = buffer;
}
finally
{
foreach (object o in comObjs)
{
if (o != null)
Marshal.ReleaseComObject(o);
}
}
if (mode == ExcelBufferMode.Overwrite)
{
Array.Clear(buffer, 1, bufferSize);
}
}
#endregion
#region Private Helper Methods
private bool Translate(int y, out int oy)
{
oy = y - top + 1;
if (oy >= 1 && oy <= bufferSize)
return true;
else
return false;
}
#endregion
#region Indexer
public object this[int y, int x]
{
get
{
return Read(y, x);
}
set
{
Write(y, x, value);
}
}
#endregion
}
}
The buffer class is column based so to use it effectively access the spreadsheet top to bottom first, then sideways see exhibit 4. The example in exhibit 5 uses the buffer class to determine if a range contains part of a Fibonacci sequence. I’m not a mathematician so I’m sure there’s a better way to do it, but then again this very fast.
Exhibit 4: Use this pattern to access cells through the buffer. If you can’t do that then you need a different buffer class.
Exhibit 5: An example of using the ExcelColumnBuffer class to verify that the selected range contains part of a Fibonacci sequence.
private bool IsSelectionFibonacci()
{
Excel.Range selection = null, cells = null;
double d = 0.0, d1 = 0.0, d2 = 0.0, ds = 0.0;
double v;
int y, x, h, w, right, bottom;
List<object> comObjs = new List<object>();
try
{
comObjs.Add(selection = ExcelApp.Selection as Excel.Range);
if (selection != null)
{
ExcelColumnBuffer buffer = new ExcelColumnBuffer(100);
buffer.SetSheet(ExcelApp.ActiveSheet as Excel._Worksheet);
y = selection.Row;
x = selection.Column;
comObjs.Add(cells = selection.Rows);
h = cells.Count;
comObjs.Add(cells = selection.Columns);
w = cells.Count;
v = (double)buffer.Read(y, x);
right = x + w - 1;
bottom = y + h - 1;
ds = v;
if (v > 1)
{
d = 1.0;
d1 = 1.0;
d2 = 0.0;
while (d < v)
{
d = d1 + d2;
d2 = d1;
d1 = d;
}
if (d != v)
{
return false;
}
}
else
{
d1 = v;
}
++y;
for (; x <= right; x++)
{
for (; y <= bottom; y++)
{
v = (double)buffer[y, x];
d = d1 + d2;
if (v == d)
{
d2 = d1;
d1 = d;
}
else if (v == 1.0 && d1 == 0.0 && ds == 0.0)
{
d2 = d1;
d1 = v;
}
else if (v == 2.0 && d1 == 1.0 && ds == 1.0)
{
// we're on 2.0 and it started on the second 1.0 in the sequence
d2 = d1;
d1 = v;
}
else
{
// doesn't match the sequence
return false;
}
}
y = selection.Row;
}
return true;
}
return false;
}
finally
{
foreach (object o in comObjs)
{
if (o != null)
Marshal.ReleaseComObject(o);
}
}
}
Although this buffer class is limited to column based access, it’s possible to extend it to use a row based buffer, or a two dimensional buffer. For applications that access many cell values this makes a huge difference in performance.