Search code examples
node.jsamazon-web-servicesamazon-s3aws-lambda

Running S3-put-triggered Lambda function on existing S3 objects?


I have a Lambda function in Node.js that processes new images added to my bucket. I want to run the function for all existing objects. How can I do this? I figured the easiest way is to "re-put" each object, to trigger the function, but I'm not sure how to do this.

To be clear - I want to run, one-time, on each of the existing objects. The trigger is already working for new objects, I just need to run it on the objects that were inserted before the lambda function was created.


Solution

  • The following Lambda function will do what you require.

    It will iterate through each file in your target S3 bucket and for each it will execute the desired lambda function against it emulating a put operation.

    You're probably going to want to put a very long execution time allowance against this function

    var TARGET_BUCKET="my-bucket-goes-here";
    var TARGET_LAMBDA_FUNCTION_NAME="TestFunct";
    var S3_PUT_SIMULATION_PARAMS={
      "Records": [
        {
          "eventVersion": "2.0",
          "eventTime": "1970-01-01T00:00:00.000Z",
          "requestParameters": {
            "sourceIPAddress": "127.0.0.1"
          },
          "s3": {
            "configurationId": "testConfigRule",
            "object": {
              "eTag": "0123456789abcdef0123456789abcdef",
              "sequencer": "0A1B2C3D4E5F678901",
              "key": "HappyFace.jpg",
              "size": 1024
            },
            "bucket": {
              "arn": "arn:aws:s3:::mybucket",
              "name": "sourcebucket",
              "ownerIdentity": {
                "principalId": "EXAMPLE"
              }
            },
            "s3SchemaVersion": "1.0"
          },
          "responseElements": {
            "x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH",
            "x-amz-request-id": "EXAMPLE123456789"
          },
          "awsRegion": "us-east-1",
          "eventName": "ObjectCreated:Put",
          "userIdentity": {
            "principalId": "EXAMPLE"
          },
          "eventSource": "aws:s3"
        }
      ]
    };
    
    var aws = require('aws-sdk');
    var s3 = new aws.S3();
    var lambda = new aws.Lambda();
    
    
    exports.handler = (event, context, callback) => {
        retrieveS3BucketContents(TARGET_BUCKET, function(s3Objects){
            simulateS3PutOperation(TARGET_BUCKET, s3Objects, simulateS3PutOperation, function(){ 
                console.log("complete."); 
            });
        });
    };
    
    function retrieveS3BucketContents(bucket, callback){
        s3.listObjectsV2({
            Bucket: TARGET_BUCKET
        }, function(err, data) {
            callback(data.Contents);
        });
    }
    
    function simulateS3PutOperation(bucket, s3ObjectStack, callback, callbackEmpty){
        var params = {
          FunctionName: TARGET_LAMBDA_FUNCTION_NAME, 
          Payload: ""
        };
    
        if(s3ObjectStack.length > 0){
            var s3Obj = s3ObjectStack.pop();
            var p = S3_PUT_SIMULATION_PARAMS;
            p.Records[0].s3.bucket.name = bucket;
            p.Records[0].s3.object.key = s3Obj.Key;
            params.Payload = JSON.stringify(p, null, 2);
            lambda.invoke(params, function(err, data) {
              if (err) console.log(err, err.stack); // an error occurred
              else{
                  callback(bucket, s3ObjectStack, callback, callbackEmpty);
              }
            });
        }
        else{
            callbackEmpty();   
        }
    }
    

    Below is the full policy that your lambda query will need to execute this method, it allows R/W to CloudWatch logs and ListObject access to S3. You need to fill in your bucket details where you see MY-BUCKET-GOES-HERE

    {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Sid": "Stmt1477382207000",
                "Effect": "Allow",
                "Action": [
                    "s3:ListBucket"
                ],
                "Resource": [
                    "arn:aws:s3:::MY-BUCKET-GOES-HERE/*"
                ]
            },
            {
          "Effect": "Allow",
          "Action": [
            "logs:CreateLogGroup",
            "logs:CreateLogStream",
            "logs:PutLogEvents"
          ],
          "Resource": "arn:aws:logs:*:*:*"
        }
        ]
    }