I'm trying to launch an EMR cluster from a lambda that gets triggered from an event in an S3 bucket.
The trigger works fine and then I create an AWSRequest with runJobFlow
but there are no events in the EMR Console nor the cluster is started.
Everything including the AWSRequest created is logged into CloudWatch but no error is logged.
It just doesn't do anything
Here is the code:
const aws = require('aws-sdk');
const emr = new aws.EMR({
apiVersion: '2009-03-31',
region: 'us-east-1'
});
const emrClusterConfig = (s3_input_path, s3_output_path) => {
const ret = {
Name:`cluster-for-job`,
ServiceRole: 'EMR_DefaultRole',
JobFlowRole: 'EMR_EC2_DefaultRole',
VisibleToAllUsers: true,
ScaleDownBehavior: 'TERMINATE_AT_TASK_COMPLETION',
LogUri: 's3n://log-uri/elasticmapreduce/',
ReleaseLabel: 'emr-5.29.0',
Instances:{
InstanceGroups: [
{
Name: 'Master Instance Group',
Market: 'ON_DEMAND',
InstanceRole: 'MASTER',
InstanceType: 'm5.xlarge',
InstanceCount: 1,
EbsConfiguration: {
EbsBlockDeviceConfigs: [
{
VolumeSpecification: {
SizeInGB: 32,
VolumeType: 'gp2',
},
VolumesPerInstance: 2
},
]
},
},
{
Name: 'Core Instance Group',
{... similar to master ...}
}
],
Ec2KeyName: 'my-keys',
Ec2SubnetId: 'my-subnet-id',
EmrManagedSlaveSecurityGroup:'sg-slave-security-group',
EmrManagedMasterSecurityGroup:'sg-master-security-group',
KeepJobFlowAliveWhenNoSteps: false,
TerminationProtected: false
},
Applications:[
{
'Name': 'Spark'
},
],
Configurations:[{
"Classification":"spark",
"Properties":{}
}],
Steps:[{
'Name': 'step',
'ActionOnFailure': 'TERMINATE_CLUSTER',
'HadoopJarStep': {
'Jar': 's3n://elasticmapreduce/libs/script-runner/script-runner.jar',
'Args': [
"/usr/bin/spark-submit", "--deploy-mode", "cluster",
's3://path-to-a-very-humble.jar', s3_input_path, s3_output_path
]
}
}],
}
return ret
}
exports.handler = async (event, context) => {
const record = event.Records[0];
const eventName = record.eventName;
if(eventName === 'ObjectCreated:Put' || eventName === 'ObjectCreated:Post' || eventName === 'ObjectCreated:CompleteMultipartUpload' || eventName === 'ObjectCreated:Copy'){
const s3_inputPath = 's3n://in-bucket/key';
const s3_outputPath = 's3n://out-bucket/key';
try{
const cluster_config = emrClusterConfig(s3_inputPath,s3_outputPath);
const AWS_EMRJobRequest = emr.runJobFlow(cluster_config)
AWS_EMRJobRequest
.on('success', function(response){ console.log("success => " + response)})
.on('error', function(response){ console.log("error => " + response)})
.on('complete', function(response){ console.log("complete => " + response)})
.send( function(err, data){
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
context.done(null,'λ Completed');
});
console.log('Finished Launching EMR cluster: ', AWS_EMRJobRequest)
}
catch(err){
console.log(err);
}
}
else{
console.log(`:: not interested in event ${eventName}`);
}
context.done(null, 'λ Completed');
};
I have set up these clusters before manually and they work fine. I copied the cluster configuration from the information in the AWS CLI Export, to match the settings of my existing clusters.
This just doesn't do anything, just logs at the end "Finished Launching EMR Cluster" with the request obj but nothing happens.
aws is terminating the function before the response is received because AWSRequest send out the request async. Since your using the async handler you can use AWS.Request.promise. This immediately starts the service call and returns a promise that is either fulfilled with the response data property or rejected with the response error property.
let AWS_EMRJobRequest = emr.runJobFlow(cluster_config);
return AWS_EMRJobRequest.promise();
refer to documentation for more information.