Search code examples
node.jsamazon-s3aws-lambdanode-crypto

nodejs crypto.createhash('sha256') produces different hash for the same S3 object


Every time I test my code it produces a different hash even though it is the same file/object from S3. Here is my code:

"use strict";
var crypto = require('crypto');
let AWS    = require("aws-sdk/global");
AWS.config.update({region: "us-east-1"});
let S3 = require("aws-sdk/clients/s3");
let s3 = new S3();

const successResponse = { statusCode: 200, body: JSON.stringify('Processed File Metadata successfully')};

// Main Lambda entry point
exports.handler = (event, context, callback) => {
  context.callbackWaitsForEmptyEventLoop = false;

  var bucketName = event.Records[0].s3.bucket.name;
  var objectKey  = event.Records[0].s3.object.key;

  getS3ObjectHash(bucketName, objectKey, function(err, s3ObjectHash){
    if(err){
      console.log("Error getting S3ObjectHash"+err.message);
      return callback(err);
    }else{
      return callback(null, successResponse);
    }
  });
};

async function getS3ObjectHash(srcBucket, srcKey, callback){
  let hashResult;
  try {
    console.log("Bucket "+srcBucket);
    console.log("Key "+srcKey);
    const params = {
      Bucket: srcBucket,
      Key: srcKey
    };
      // Creating Hash to be used as the Key for S3 Object
    let hash = crypto.createHash('sha256');
    
    let stream = s3.getObject(params, function(err, data){
      if(err){
        console.log(err);
        return;
      }
    }).createReadStream();
    stream.on('data', function(data){
      hash.update(data, 'binary');
    });
    stream.on('end', function(){
      hashResult = hash.digest('hex');
      console.log("**** Result hash "+ hashResult);
    return callback(null, hashResult);
    });
  } catch (error) {
      console.log("Caught error "+ error.message);
      return callback(error);
  } 
}

Here is the test data that I'm using. It is the S3 trigger event when creating an object :

{
  "Records": [
    {
      "eventVersion": "2.1",
      "eventSource": "aws:s3",
      "awsRegion": "us-east-2",
      "eventTime": "2019-09-03T19:37:27.192Z",
      "eventName": "ObjectCreated:Put",
      "userIdentity": {
        "principalId": "AWS:PRINCIPAL-ID"
      },
      "requestParameters": {
        "sourceIPAddress": "205.255.255.255"
      },
      "responseElements": {
        "x-amz-request-id": "D82B88E5F771F645",
        "x-amz-id-2":  "vlR7PnpV2Ce81l0PRw6jlUpck7Jo5ZsQjryTjKlc5aLWGVHPZLj5NeC6qMa0emYBDXOo6QBU0Wo="
      },
      "s3": {
        "s3SchemaVersion": "1.0",
        "configurationId": "828aa6fc-f7b5-4305-8584-487c791949c1",
        "bucket": {
          "name": "BUCKET-NAME",
          "ownerIdentity": {
            "principalId": "PRINCIPAL-ID"
      },
          "arn": "arn:aws:s3:::BUCKET-NAME"
        },
        "object": {
          "key": "utp/1185481445975.nrcs143_023308.pdf",
          "size": 1305107,
          "eTag": "b21b84d653bb07b05b1e6b33684dc11b",
          "sequencer": "0C0F6F405D6ED209E1"
        }
      }
    }
  ]
}

Here is the output from two consecutive tests using the same above test data

2020-06-21 INFO Key utp/1185481445975.nrcs143_023308.pdf
2020-06-21 INFO **** Result hash ea41e18defef3f4e44743d847ed804aa818afb33765d7ea83d0d2d92138e5946

2020-06-21 INFO Key utp/1185481445975.nrcs143_023308.pdf
2020-06-21 INFO **** Result hash b226d051e637f7627acbab588ab171eebefad412c797a7e7cf66e92f924e578f

As you can see the Result Hash is different for each test.

I know that the test is the same.

I know that the S3 object is the same.

I know I'm not so good with Nodejs. Could someone please tell me what I'm doing wrong?


Solution

  • I give credit to @keithRozario. His comment made me give it a try.

    Once I only hashed the body the hash remained constant. Here is the code:

    async function getS3ObjectHash(srcBucket, srcKey, callback){
      let hashResult;
      try {const params = {Bucket: srcBucket,Key: srcKey};
        // getting the object so we get the hash
        s3.getObject(params, function(err, data){
          if(err){console.log(err);
            return;
          }
          let hash = crypto.createHash("sha256");
          hashResult = hash.update(data.Body).digest("hex");
          console.log("**** Result hash "+ hashResult);
          return callback(null, hashResult);
        });
      } catch (error) {
          console.log("Caught error "+ error.message);
          return callback(error);
      } 
    }