Search code examples
google-apps-scriptgoogle-sheetspdfmerge

JavaScript runtime error while merging docx files into pdf


This is the continuation of my previous question where Tanaike proposed a solution to merge docx files into a pdf. The following code merges docx files into pdf:

async function newMain() {
  // Retrieve PDF data.
  var destinationFolder = DriveApp.getFolderById("your-folder-id")
  var urls = sheet.getRange(2, 2).getValue().toString().split(",");  //split the urls joined by commas 
    const ids = urls.map((url) => {
    const matches = url.match(/\/file\/d\/([^\/]+)\/edit/); //get file id from urls
    return matches ? [matches[1]] : [];
  });

    const data = ids.flatMap(([id]) => {
    const file = DriveApp.getFileById(id);
    const mimeType = file.getMimeType();
    if (mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
      var blob = file.getBlob();
      var tempfile = Drive.Files.insert({}, blob, { convert: true });
      const pdfData = DriveApp.getFileById(tempfile.id).getBlob().getBytes();
      Drive.Files.remove(tempfile.id);
      return new Uint8Array(pdfData);
    } else if (mimeType === 'application/pdf') {
      return new Uint8Array(file.getBlob().getBytes());
    }
    return [];
  });



  // Load pdf-lib
  const cdnjs = "https://cdn.jsdelivr.net/npm/pdf-lib/dist/pdf-lib.min.js";
  eval(UrlFetchApp.fetch(cdnjs).getContentText().replace(/setTimeout\(.*?,.*?(\d*?)\)/g, "Utilities.sleep($1);return t();"));

  // Merge PDFs.
  const pdfDoc = await PDFLib.PDFDocument.create();
  for (let i = 0; i < data.length; i++) {
    const pdfData = await PDFLib.PDFDocument.load(data[i]);
    for (let j = 0; j < pdfData.getPageCount(); j++) {
      const [page] = await pdfDoc.copyPages(pdfData, [j]);
      pdfDoc.addPage(page);
    }
  }
  const bytes = await pdfDoc.save();

  // Create a PDF file.
  var file = DriveApp.createFile(Utilities.newBlob([...new Int8Array(bytes)], MimeType.PDF, "sample2.pdf"));
  file.moveTo(destinationFolder);
}

However, when I plug the following URLs:

https://docs.google.com/document/d/1-yFHs6DJfRJA9dLzy0yed3uLW4_XdNxD/edit,https://drive.google.com/file/d/16-nyhNGEUpLzT1p1--MseDRO5uzAPdRa/edit,https://drive.google.com/file/d/15VHdNbEJpsEPMRIvzwUJNRA5sZkIdoTO/edit,https://drive.google.com/file/d/1dNHeu-kncSC6f6Dm4DscCnjNejh-6FT1/edit

It shows the following error message:

The JavaScript runtime exited unexpectedly.

I believe this occurs when the following line is executed:

var file = DriveApp.createFile(Utilities.newBlob([...new Int8Array(bytes)], MimeType.PDF, "sample2.pdf"));

Any guidance to solve this issue is much appreciated.


Solution

  • Modification points:

    • In your showing script, the URL like https://drive.google.com/file/d/{fileId}/edit is retrieved. In your showing URLs, https://docs.google.com/document/d/{fileId}/edit is included. In this case, your script cannot retrieve the file ID from the URL. I think that this is the 1st issue of your script.

    • When the large PDF data is merged, it seems that the showing script for merging PDF data cannot be used. In this case, it is required to also modify the script for merging PDF data. Ref (Author: me) I think that this is the 2nd issue of your script.

    When these points are reflected in your script, how about the following modification?

    Modified script:

    Please set sheet and your folder ID.

    async function newMain() {
      var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Sheet1"); // Please declare the variable of "sheet".
    
      // Retrieve PDF data.
      var destinationFolder = DriveApp.getFolderById("your-folder-id")
      var urls = sheet.getRange(2, 2).getValue().toString().split(",");
    
      const ids = urls.map(url => [url.split("/")[5]]); // Modified
    
      const data = ids.flatMap(([id]) => {
        const file = DriveApp.getFileById(id);
        const mimeType = file.getMimeType();
        if (mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
          var blob = file.getBlob();
          var tempfile = Drive.Files.insert({}, blob, { convert: true });
          const pdfData = DriveApp.getFileById(tempfile.id).getBlob().getBytes();
          Drive.Files.remove(tempfile.id);
          return new Uint8Array(pdfData);
        } else if (mimeType === 'application/pdf') {
          return new Uint8Array(file.getBlob().getBytes());
        }
        return [];
      });
    
      // Load pdf-lib
      const cdnjs = "https://cdn.jsdelivr.net/npm/pdf-lib/dist/pdf-lib.min.js";
      eval(UrlFetchApp.fetch(cdnjs).getContentText().replace(/setTimeout\(.*?,.*?(\d*?)\)/g, "Utilities.sleep($1);return t();"));
    
      // Modified
      // Merge PDFs.
      const pdfDoc = await PDFLib.PDFDocument.create();
      for (let i = 0; i < data.length; i++) {
        const pdfData = await PDFLib.PDFDocument.load(data[i]);
        const pages = await pdfDoc.copyPages(pdfData, pdfData.getPageIndices());
        pages.forEach(page => pdfDoc.addPage(page));
      }
      const bytes = await pdfDoc.save();
    
      // Create a PDF file.
      var file = DriveApp.createFile(Utilities.newBlob([...new Int8Array(bytes)], MimeType.PDF, "sample2.pdf"));
      file.moveTo(destinationFolder);
    }
    
    • When I tested this modified script using your showing URLs of https://docs.google.com/document/d/1-yFHs6DJfRJA9dLzy0yed3uLW4_XdNxD/edit,https://drive.google.com/file/d/16-nyhNGEUpLzT1p1--MseDRO5uzAPdRa/edit,https://drive.google.com/file/d/15VHdNbEJpsEPMRIvzwUJNRA5sZkIdoTO/edit,https://drive.google.com/file/d/1dNHeu-kncSC6f6Dm4DscCnjNejh-6FT1/edit in cell "B2", I confirmed that a PDF file including 330 pages is created.