Search code examples
performancegoogle-sheetsgoogle-apps-script

Very strange performance issue on Google Apps Script


I have a strange problem on Apps Script, which I cannot understand by myself, and I have therefore decided to ask for help…

I have a Google Sheets file with 3 tabs, each containing a list of item names within a single row.

My script parses each list within its tab and, for each item seen, does the following:

Checks in a table whether the item has been seen before (regardless of which tab if was found in):

  • If it’s been seen before, it increases the related counter by one
  • If it’s never been seen before, it adds it to the tracking table and initialises the relevant counters.

The main storage method is a multidimensional table with position 0 of the first index being the table where item names are added, and each of the following positions being tables containing the values being incremented when an item is seen (so first index shows which on the 3 tabs it was found in and second index refers to the position of this item is index 0.

Now it’s nothing complicated (one could hardly think of a less complicated algorithm), and I am running this on lists that barely contain 10000 rows. So I was expecting this to run within seconds, but the results were staggering…

I have added a tracking counter within the script which records how long the script takes to process each 100 rows within the input lists. As you can see in the results tabs (A, B, C, etc…), there is a point at which the processing of every 100 rows sky-rockets from approximately 500ms to over 15 seconds !!!! So it’s a 30-fold increase appearing in the middle of the processing, for no apparent reason that I can think of, and I really have no idea what’s going on. Does anyone have any idea ???

function Analyse_data() {

const TABS_to_PROCESS = [1,2,3];
const TAB_NAMES = ['Tab1','Tab2','Tab3'];
const NAME_COLUMN = 1;
const ROW_COLUMN = 1;
const TIME_COLUMN = 2;
const START_ROW = 1;
const OUTPUT_TAB = 'Output';
const OFFSET_STATS = 3;

var spreadsheet = SpreadsheetApp.getActive();
var timestampedlog = new Array();

var items_found = new Array(TABS_to_PROCESS.length+1); // items_found[0] will store item names, items_found[x] (with x>=1) the nbr of occurrences in each processed tab
for(i=0; i<items_found.length; i++) { items_found[i] = new Array(); }

/* A "unique" value is a value that's never been seen before, on the tab being processed OR on other tabs processed previously */
var NbrOfUniqueValues = 0;
var TotalNbrOfValues = 0;
var TotalRowsProcessed = 0;

TABS_to_PROCESS.forEach((tab_being_processed) => {

  var NbrOfUniqueValuesInTab = 0;
  var TotalNbrOfValuesInTab = 0;
  var row_being_processed=START_ROW;
  var processed_item_name;

  while (spreadsheet.getSheetByName(TAB_NAMES[tab_being_processed-1]).getRange(row_being_processed,NAME_COLUMN).getValue() != '') {
    // ******************* This section for debug only *********************************************** START
    /*   */  if ((row_being_processed % 100) == 0) {
    /*   */    timestampedlog.push(row_being_processed);
    /*   */    timestampedlog.push(Date.now());
    /*   */  }
    // ******************* This section for debug only *********************************************** END
    // Reading the next item name
    processed_item_name = spreadsheet.getSheetByName(TAB_NAMES[tab_being_processed-1]).getRange(row_being_processed,NAME_COLUMN).getValue();
    // Checking if item name is already in the table (i.e. in items_found[0])
    if (items_found[0].includes(processed_item_name)) {
      // if we've found it, we increment by one the value corresponding to the current tab, at the index position of that specific item
      (items_found[tab_being_processed][items_found[0].indexOf(processed_item_name)])++;
    } else {
      // if we've NOT found it, we add the item name to items_found[0]... 
      items_found[0].unshift(processed_item_name);
      // ...then we initialize the corresponding values at index position 0:
      items_found[tab_being_processed].unshift(1);
      // ...and to finish we initialize the counting cells for the item just added:
      for(i=1; i<items_found.length; i++) {
         if (i == tab_being_processed) {
           items_found[i].unshift(1); // initialization to 1 for the tab being processed
         } else { 
           items_found[i].unshift(0); // initialization to 0 for the other tabs
         }
      }
      NbrOfUniqueValuesInTab++;
    }
    TotalNbrOfValuesInTab++;
    TotalRowsProcessed++;
    row_being_processed++;
  }
  /* User-friendly messages sent to console during processing */
  console.log('   - Finished processing ' +  TAB_NAMES[tab_being_processed-1] + ': ' + TotalNbrOfValuesInTab + ' input row(s), containing ' + NbrOfUniqueValuesInTab + ' unique values');
  NbrOfUniqueValues = NbrOfUniqueValues + NbrOfUniqueValuesInTab;
  TotalNbrOfValues = TotalNbrOfValues + TotalNbrOfValuesInTab;
})

// Displaying item data
for(i=1; i<items_found[0].length; i++) { // The variable i will parse every value of 'name' stored in items_found[0]
  spreadsheet.getSheetByName(OUTPUT_TAB).getRange(i+1,NAME_COLUMN).setValue(items_found[0][i]);
  TABS_to_PROCESS.forEach((tab_being_processed) => {
    spreadsheet.getSheetByName(OUTPUT_TAB).getRange(1,NAME_COLUMN+tab_being_processed).setValue(TAB_NAMES[tab_being_processed-1]);
    spreadsheet.getSheetByName(OUTPUT_TAB).getRange(i+1,NAME_COLUMN+tab_being_processed).setValue(items_found[tab_being_processed][i]);
  })
}
for(j=1; j<=TABS_to_PROCESS.length+1; j++) { 
  spreadsheet.getSheetByName(OUTPUT_TAB).getRange(i+1,j).setValue('--- END ---');
}

// Displaying stats & time-stamped log
spreadsheet.getSheetByName(OUTPUT_TAB).getRange(1,TABS_to_PROCESS.length + OFFSET_STATS).setValue(NbrOfUniqueValues + ' unique values found altogether over ' + TABS_to_PROCESS.length + ' input tabs ' + '(' + TotalRowsProcessed + ' rows parsed)');
for(i=0; i<timestampedlog.length; i=i+2) {
  spreadsheet.getSheetByName(OUTPUT_TAB).getRange(i/2+OFFSET_STATS,TABS_to_PROCESS.length + OFFSET_STATS).setValue(timestampedlog[i]);
  spreadsheet.getSheetByName(OUTPUT_TAB).getRange(i/2+OFFSET_STATS,TABS_to_PROCESS.length + OFFSET_STATS+1).setValue(timestampedlog[i+1]);
}
spreadsheet.getSheetByName(OUTPUT_TAB).getRange(i/2+OFFSET_STATS,TABS_to_PROCESS.length + OFFSET_STATS).setValue('--- END ---');
spreadsheet.getSheetByName(OUTPUT_TAB).getRange(i/2+OFFSET_STATS,TABS_to_PROCESS.length + OFFSET_STATS+1).setValue('--- END ---');

}

The issue is within the loop that starts with this:

while (spreadsheet.getSheetByName(TAB_NAMES[tab_being_processed-1]).getRange(row_being_processed,NAME_COLUMN).getValue() != '')

Processing of 100 iterations take roughly 500 ms with "small" input, but when I run it with 3 input tabs containing a few thousand rows each (for example 9000, 9000 and 2000), then perf decreases to an unbreable level in a very sudden manner, like this:

Rows processed in tab 3 Time to execute (ms)
300 658
400 675
500 16151
600 19210

Here is the link to a read-only sheet containing the Apps Script code, 3 input tabs, 2 good results, A & B (with input tabs containing lists < 3000 rows) and one "awful performance" output, Z, with input lists containing about 9000 rows.


Solution

  • The code is slow because there are API calls within the inner loop.

    You can improve performance considerably by switching to batch operations, like this:

    function analyseData() {
      const start = new Date().getTime();
      const sheetNames = ['Tab1', 'Tab2', 'Tab3',];
      const ss = SpreadsheetApp.getActive();
      const namesInSheets = sheetNames.map(sheetName => ss.getRange(`${sheetName}!A2:A`).getDisplayValues().flat().filter(String));
      const allNames = namesInSheets.flat();
      const uniqueNames = allNames.filter((name, index) => allNames.indexOf(name) === index).sort();
      const result = uniqueNames.map(name => [name, ...namesInSheets.map(a => a.filter(v => v === name).length),]);
      result.unshift(['Device name', ...sheetNames,]);
      const outputSheet = ss.insertSheet(`Output ${new Date().toISOString()}`);
      outputSheet.getRange('A2').offset(0, 0, result.length, result[0].length).setValues(result);
      outputSheet.getRange('A1').setValue(new Date().getTime() - start);
    }
    

    This function will create a new sheet and put the counts there starting on row 2. It will also put the execution time in number of milliseconds in cell A1.

    It runs fast because it reads all the data at the start, processes the data and puts the results in an array, and finally writes the results in one go, instead of writing individual results one by one. Also see DRY.

    If you need the results to update automatically, it is probably easier to use a plain vanilla spreadsheet formula, like this:

    =let( 
      stackSheets_, lambda(sn, ra, reduce(tocol(æ,2),sn,lambda(rs,s,let(r,indirect(s&"!"&ra),l,max(index(row(r)*iferror(r<>""))),d,filter(r,row(r)<=l),r_,lambda(v,byrow(d,lambda(_,v))),if(counta(iferror(d)),vstack(rs,hstack(r_(s),d)),rs))))),
    
      query( 
        stackSheets_({ "Tab1", "Tab2", "Tab3" }, "A2:A"), 
        "select Col2, count(Col2) 
         where Col2 is not null 
         group by Col2 
         pivot Col1", 
        0 
      ) 
    )