Search code examples
google-apps-scriptgoogle-sheetsgoogle-drive-apigoogle-docs

Ways to Speed Up This Script for Returning Google Docs Files Modified in Current Year


I'm currently using the following script to return only Google Docs files that were modified in the current year. It recursively runs through folders and sub folders within a Shared Drive and returns the results in a Google Sheets file. It does the job, but it runs somewhat slow. I'm just curious if there's anything I could do to modify this script to make it run quicker. Thanks for your help!

function listGoogleDocsModifiedInCurrentYear() {
  var folderId = "SHARED FOLDER ID HERE"; 
  var folder = DriveApp.getFolderById(folderId);
  var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
  var currentYear = new Date().getFullYear();
  
  var sheetYear = getOrCreateSheet(spreadsheet, currentYear + " Docs");
  sheetYear.clearContents(); // Clear existing data in the sheet
  
  var headersRange = sheetYear.getRange("A1:F1");
  var headersValues = headersRange.getValues();
  var headersApplied = headersValues[0][0] === "Last Modified";
  
  if (!headersApplied) {
    sheetYear.getRange("A1:F1").setValues([["Last Modified", "Title", "Type", "Link", "Last Modifier", "Folder Name"]]);
    sheetYear.getRange("A1:F1").setFontWeight("bold");
    sheetYear.getRange("A1:F1").setHorizontalAlignment("left");
    sheetYear.setFrozenRows(1);
  }
  
  var processedFiles = new Set();
  
  listGoogleDocsModifiedInCurrentYearRecursive(folder, sheetYear, processedFiles, function(fileDataArray) {
    if (fileDataArray.length > 0) {
      
      sheetYear.getRange(sheetYear.getLastRow() + 1, 1, fileDataArray.length, fileDataArray[0].length).setValues(fileDataArray);
    }
  });
}

function listGoogleDocsModifiedInCurrentYearRecursive(folder, sheetYear, processedFiles, callback) {
  var files = folder.getFilesByType(MimeType.GOOGLE_DOCS);
  var folderFileNames = new Set(); // Set to store file names within the folder
  
  var existingData = sheetYear.getDataRange().getValues();
  for (var i = 1; i < existingData.length; i++) {
    folderFileNames.add(existingData[i][1]);
  }
  
  while (files.hasNext()) {
    var file = files.next();
    var fileId = file.getId();
    var fileName = file.getName();
  
    if (processedFiles.has(fileId) || folderFileNames.has(fileName)) {
      continue;
    }
    
    var fileLink = "https://drive.google.com/file/d/" + fileId + "/view";
    var fileDate = file.getLastUpdated();
    
    var currentYear = new Date().getFullYear();
    if (fileDate.getFullYear() === currentYear) {
      var lastModifierEmail = getLastModifierEmail(fileId);
      var folderName = (folder.getName() === "root") ? getSharedDriveName(file) : folder.getName();
      var fileType = "Google Doc";
      
      var fileDataArray = [
        [fileDate, fileName, fileType, fileLink, lastModifierEmail, folderName]
      ];
      callback(fileDataArray);
      
      processedFiles.add(fileId);
      folderFileNames.add(fileName);
    }
  }
  
  var subfolders = folder.getFolders();
  while (subfolders.hasNext()) {
    var subfolder = subfolders.next();
    listGoogleDocsModifiedInCurrentYearRecursive(subfolder, sheetYear, processedFiles, callback);
  }
}

function getLastModifierEmail(fileId) {
  var revisions = Drive.Revisions.list(fileId);
  
  if (revisions.items && revisions.items.length > 0) {
    for (var i = revisions.items.length - 1; i >= 0; i--) {
      var lastModifier = revisions.items[i].lastModifyingUser;
      if (lastModifier && lastModifier.emailAddress) {
        return lastModifier.emailAddress;
      }
    }
  }
  
  return "No modifier";
}

Solution

  • I believe your goal is as follows.

    • You want to reduce the process cost of your script.

    Modification points:

    • In this case, how about retrieving the file list using the search query instead of getFilesByType? By this, the Google Document files updated this year can be directly retrieved.

    • In order to retrieve the modified user's email, how about using the batch request? When the batch request is used, the process cost can be reduced a little. Ref

    • In your script, at sheetYear.clearContents(); // Clear existing data in the sheet, it seems that the sheet is cleared. In this case, I thought that if (!headersApplied) {} might not be able to be used.

    • sheetYear.getRange("A1:F1") can be used one time.

    When these points are reflected in your script, how about the following modification?

    Usage:

    1. Install Google Apps Script library

    In this case, in order to use the batch request, please install the Google Apps Script library. Ref (Author: me)

    You can see how to install this at here.

    2. Enable Drive API

    Please enable Drive API at Advanced Google services.

    3. Sample script

    function myFunction() {
      // Please set your folder ID.
      const folderId = "SHARED FOLDER ID HERE"; 
    
    
      // Retrieve file list.
      const getDocFiles_ = (folder, q, obj = new Map()) => {
        if (folder) {
          const files = folder.searchFiles(q);
          while (files.hasNext()) {
            const file = files.next();
            const name = file.getName();
            if (!obj.has(name)) {
              const fileId = file.getId();
              let folderName = file.getParents().next().getName();
              // folderName = (folderName === "root") ? getSharedDriveName(file) : folderName; // I'm not sure whether you use this line.
              obj.set(name, { value: [file.getLastUpdated(), name, "Google Docs", `https://drive.google.com/file/d/${fileId}/view`, "", folderName], fileId });
            }
          }
          const subFolders = folder.getFolders();
          while (subFolders.hasNext()) {
            getDocFiles_(subFolders.next(), q, obj);
          }
        }
        return obj;
      }
      const folder = DriveApp.getFolderById(folderId);
      const thisYear = new Date();
      thisYear.setMonth(0);
      thisYear.setDate(1);
      thisYear.setHours(0, 0, 0, 0);
      thisYearStr = thisYear.toISOString();
      const q = `mimeType='${MimeType.GOOGLE_DOCS}' and trashed=false and modifiedDate >= '${thisYearStr}'`;
      const values = [...getDocFiles_(folder, q).values()];
      if (values.length == 0) return;
      const requests = {
        batchPath: "batch/drive/v3",
        requests: values.map(({ fileId }) => ({
          method: "GET",
          endpoint: `https://www.googleapis.com/drive/v3/files/${fileId}?supportsAllDrives=true&fields=lastModifyingUser(emailAddress)`
        }))
      };
      const result = BatchRequest.EDo(requests); // Using this library
      const res = values.map(({ value }, i) => (value[4] = result[i].lastModifyingUser?.emailAddress || null, value));
    
      // Put values to the sheet.
    
      // This is from your script.
      var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
      var currentYear = new Date().getFullYear();
      var sheetYear = getOrCreateSheet(spreadsheet, currentYear + " Docs");
    
      sheetYear.getRange("A1:F1").setValues([["Last Modified", "Title", "Type", "Link", "Last Modifier", "Folder Name"]]).setFontWeight("bold").setHorizontalAlignment("left");
      sheetYear.setFrozenRows(1);
      sheetYear.getRange(2, 1, res.length, res[0].length).setValues(res);
    }
    

    Testing

    When I tested this modified script, I confirmed the same result as your showing script. In my test, the process cost could be reduced by about 40 %.

    References: