Search code examples
amazon-web-servicesgoogle-apps-scriptaws-lambdaaws-sdk-jsamazon-textract

"InvalidParameterType" error for image files sent as blob to AWS Textract from external source


CURRENTLY

I am trying to get AWS Textract working for images supplied from a function in Google Scripts, that is sent to a Lambda resolved. I am following documentation on https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Textract.html#analyzeDocument-property

My Google Scripts code:

function googleFunction(id) {
  
  let file = DriveApp.getFileById(id);
  console.log("File is a " + file.getMimeType());
  let blob = file.getBlob();

  let params = {
    doc: blob,
  };

  var options = {
    method: "PUT",
    "Content-Type": "application/json",
    payload: JSON.stringify(params),
  };

  let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}

My Lambda resolver code:

"use strict";

const AWS = require("aws-sdk");

exports.handler = async (event) => {
  let params = JSON.parse(event.body);
  console.log("Parse as document...");
  let textract = new AWS.Textract();
  let doc = params["doc"];
  let config = {
    Document: {
      Bytes: doc,
      FeatureTypes: ["TABLES"],
    }
  };
  textract.analyzeDocument(config, function (err, data) {
    console.log("analyzing...");
    if (err) {
      console.log(err, err.stack);
    }
    // an error occurred
    else {
      console.log("data:" + JSON.stringfy(data));
    } // successful response
  });
};

ISSUE

File is successfully sent from Google Scripts to Lambda, but the following error is returned:

"errorType": "InvalidParameterType",
"errorMessage": "Expected params.Document.Bytes to be a string, Buffer, Stream, Blob, or typed array object"

Questions

  • Is there a way of verifying what the format of the doc variable is, to ensure it meets AWS Textract's requirements?
  • Can anyone see a possible cause for the errors being returned?

NOTES

  • Textract works fine when the same file is uploaded to an S3 bucked, and supplied in the config using: S3Object: { Bucket: 'bucket_name', Name: 'file_name' }
  • I have confirmed the file is a JPEG

Solution

  • Got it working with 2 changes:

    1. added getBytes() to Google side code
    2. added Buffer.from() to AWS side code

    My Google Scripts code:

    function googleFunction(id) {
      
      let file = DriveApp.getFileById(id);
      console.log("File is a " + file.getMimeType());
      let blob = file.getBlob().getBytes();
    
      let params = {
        doc: blob,
      };
    
      var options = {
        method: "PUT",
        "Content-Type": "application/json",
        payload: JSON.stringify(params),
      };
    
      let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
    }
    

    My Lambda resolver code:

    "use strict";
    
    const AWS = require("aws-sdk");
    
    exports.handler = async (event) => {
      let params = JSON.parse(event.body);
      console.log("Parse as document...");
      let textract = new AWS.Textract();
      let doc = params["doc"];
      let config = {
        Document: {
          Bytes: Buffer.from(doc),
          FeatureTypes: ["TABLES"],
        }
      };
      textract.analyzeDocument(config, function (err, data) {
        console.log("analyzing...");
        if (err) {
          console.log(err, err.stack);
        }
        // an error occurred
        else {
          console.log("data:" + JSON.stringfy(data));
        } // successful response
      });
    };