Search code examples
google-apps-scriptgoogle-drive-apigoogle-docs

Prevent Google Drive Service API from extracting text when converting images


I've got a script that processes files uploaded to Google Drive and if they are not pdf files it converts them into Google Docs, then converts the Google Doc file to pdf. This all works as required.

The problem I have is if the file to be converted to Google Docs is an image file, the "new" Google Docs file not only contains the image but also text extracted via OCR (I assume). I've tried setting that option to false to no avail.

Please help, all I want is to convert image files to pdf without extracting any text from the images.

function afterUpload(form_upload_items, suid, lname,scans_folder){
  var pdf_file_ids, pdf_files, processed_folder, converted_file;
  form_upload_items.forEach (function (form_upload_item_id) {      
    var uploaded_file = DriveApp.getFileById(form_upload_item_id);
    const uft = uploaded_file.getMimeType();
    Logger.log("item has name of '%s' and is of type '%s'", uploaded_file.
    getName(), uft);
    processed_folder = scans_folder.getFoldersByName("Processed").next();
    var file_description = uploaded_file.getDescription();
    if (file_description==null){
      file_description=" "
    }
    if (file_description.includes("Scanned")){
      Logger.log("Already scanned '%s'", uploaded_file.getName());
    }
    else{
      uploaded_file.setDescription("Scanned");
      file_description = uploaded_file.getDescription();
    }        
    if (file_description.includes("Processed")) {
      Logger.log("Already processed %s", uploaded_file.getName());
    }
    else{
      if (uft !=MimeType.PDF && !file_description.includes("Converted")){ // convert file to pdf if required
        //Convert to Google docs
        const file_blob = uploaded_file.getBlob();
        const options ={
          ocr: false,
          supportsAllDrives: true
        }
        const res = Drive.Files.insert({ // Drive API of Advanced Google services is used.
          "mimeType": "application/vnd.google-apps.document",
          "title": uploaded_file.getName() + "_gdocs_conv"
        }, file_blob,options);
        uploaded_file.moveTo(processed_folder);
        uploaded_file.setDescription(file_description+", Converted");
        converted_file = DriveApp.getFileById(res.id);
        converted_file.moveTo(processed_folder);
        const blobPDF = converted_file.getAs(MimeType.PDF);
        //finally convert google docs to PDF
        uploaded_file = scans_folder.createFile(blobPDF);
        Logger.log("Converted %s to pdf",uploaded_file.getName());
      }                   
    }
  });
  const query = "title contains '" +suid+"'"
  const scanned_files = scans_folder.searchFiles(query);
  pdf_file_ids = [];
  pdf_files = [];
  while (scanned_files.hasNext()){
    const scanned_file = scanned_files.next();
    Logger.log("%s %s",scanned_file.getName(), scanned_file.getDescription());
    pdf_file_ids.push(scanned_file.getId());
    pdf_files.push(scanned_file); 
  }
  var function_result = {};
  var documents_folder = scans_folder.getParents().next();
  function_result["file_name_prefix"] = suid+" "+lname;
  function_result["pdf_files"] = pdf_file_ids;
  function_result["documents_folder_id"] = documents_folder.getId(); 
  }

Solution

    1. Open the file that has been created using the "Drive" service with the "DocumentApp",
    2. then search the body of the DocumentApp doc for a paragraph containing the image(googledocs already resizes the image correctly).
    3. Then save the paragraph to memory,
    4. clear the body of the doc,
    5. and lastly paste the saved paragraph back