After hosting an application on FARGATE, we're trying to switch to EC2 instances with a custom capacity provider. For some reason the capacity provider we created won't run more than 1 task per instance.
The configuration below works well if we only want 1 task, but as soon as we switch to 10, 5 instances are created and each has only 1 task running.
{
"CapacityProvider": {
"Type": "AWS::ECS::CapacityProvider",
"DeletionPolicy": "Retain",
"Properties": {
"AutoScalingGroupProvider": {
"AutoScalingGroupArn": {
"Ref": "AutoScaleGroup"
},
"ManagedDraining": "ENABLED",
"ManagedScaling": {
"InstanceWarmupPeriod": 0,
"MaximumScalingStepSize": 1,
"MinimumScalingStepSize": 1,
"Status": "ENABLED",
"TargetCapacity": 100
}
},
"Name": {
"Fn::Sub": "${ServiceName}-${Env}-CapacityProvider"
}
}
},
"AutoScaleGroup": {
"Type": "AWS::AutoScaling::AutoScalingGroup",
"Properties": {
"ServiceLinkedRoleARN": {
"Fn::Sub": "arn:aws:iam::${AWS::AccountId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling"
},
"CapacityRebalance": true,
"Cooldown": "300",
"AvailabilityZones": [
"eu-central-1a",
"eu-central-1b",
"eu-central-1c"
],
"DesiredCapacity": "2",
"HealthCheckGracePeriod": 300,
"MetricsCollection": [],
"InstanceMaintenancePolicy": {
"MinHealthyPercentage": 90,
"MaxHealthyPercentage": 100
},
"MaxSize": "5",
"NewInstancesProtectedFromScaleIn": false,
"MinSize": "2",
"TerminationPolicies": [
"Default"
],
"AutoScalingGroupName": {
"Fn::Sub": "${ServiceName}-auto-scaling-group"
},
"MixedInstancesPolicy": {
"LaunchTemplate": {
"LaunchTemplateSpecification": {
"Version": {
"Fn::GetAtt": [
"LaunchTemplate",
"LatestVersionNumber"
]
},
"LaunchTemplateId": {
"Ref": "LaunchTemplate"
}
},
"Overrides": [
{
"InstanceType": "m5.large",
"WeightedCapacity": "1"
},
{
"InstanceType": "m4.large",
"WeightedCapacity": "1"
},
{
"InstanceType": "m5.xlarge",
"WeightedCapacity": "1"
},
{
"InstanceType": "m4.xlarge",
"WeightedCapacity": "1"
},
{
"InstanceType": "m5.2xlarge",
"WeightedCapacity": "1"
}
]
},
"InstancesDistribution": {
"OnDemandAllocationStrategy": "prioritized",
"OnDemandBaseCapacity": 0,
"SpotAllocationStrategy": "price-capacity-optimized",
"OnDemandPercentageAboveBaseCapacity": 0
}
},
"VPCZoneIdentifier": [
{
"Ref": "Subnet1"
},
{
"Ref": "Subnet2"
},
{
"Ref": "Subnet3"
}
],
"DesiredCapacityType": "units",
"Tags": [],
"HealthCheckType": "ELB"
}
}
}
And the application configuration:
{
"AWSTemplateFormatVersion": "2010-09-09",
"Parameters": {
"Env": {
"Description": "An environment name",
"Type": "String",
"AllowedValues": [
"dev",
"test",
"prod"
],
"ConstraintDescription": "Allowed values is dev/test/prod."
},
"ServiceName": {
"Type": "String",
"Default": ""
},
"VPCID": {
"Type": "String",
"Default": ""
},
"Subnet1": {
"Type": "String",
"Default": ""
},
"Subnet2": {
"Type": "String",
"Default": ""
},
"Subnet3": {
"Type": "String",
"Default": ""
},
"Cluster": {
"Type": "String",
"Default": ""
},
"LoadBalancer": {
"Type": "String",
"Default": ""
},
"Listener": {
"Type": "String",
"Default": ""
},
"Version": {
"Type": "String",
"Default": "1.0"
},
"Registry": {
"Type": "String",
"Default": ""
},
"Domain": {
"Type": "String",
"Default": ""
}
},
"Resources": {
"Certificate": {
"Type": "AWS::CertificateManager::Certificate",
"Properties": {
"DomainName": {
"Ref": "Domain"
},
"ValidationMethod": "DNS"
}
},
"CertificateListener": {
"Type": "AWS::ElasticLoadBalancingV2::ListenerCertificate",
"Properties": {
"Certificates": [
{
"CertificateArn": {
"Ref": "Certificate"
}
}
],
"ListenerArn": {
"Ref": "Listener"
}
}
},
"LogGroup": {
"Type": "AWS::Logs::LogGroup",
"Properties": {
"RetentionInDays": 14
}
},
"Filesystem": {
"Type": "AWS::EFS::FileSystem",
"Properties": {
"Encrypted": true,
"KmsKeyId": {
"Ref": "KMS"
},
"FileSystemTags": [
{
"Key": "Name",
"Value": {
"Fn::Sub": "${ServiceName}-${Env}"
}
}
]
}
},
"MountTarget": {
"Type": "AWS::EFS::MountTarget",
"Properties": {
"FileSystemId": {
"Ref": "Filesystem"
},
"SubnetId": {
"Ref": "Subnet1"
},
"SecurityGroups": [
{
"Fn::GetAtt": [
"SecurityGroup",
"GroupId"
]
}
]
}
},
"MountTarget2": {
"Type": "AWS::EFS::MountTarget",
"Properties": {
"FileSystemId": {
"Ref": "Filesystem"
},
"SubnetId": {
"Ref": "Subnet2"
},
"SecurityGroups": [
{
"Fn::GetAtt": [
"SecurityGroup",
"GroupId"
]
}
]
}
},
"MountTarget3": {
"Type": "AWS::EFS::MountTarget",
"Properties": {
"FileSystemId": {
"Ref": "Filesystem"
},
"SubnetId": {
"Ref": "Subnet3"
},
"SecurityGroups": [
{
"Fn::GetAtt": [
"SecurityGroup",
"GroupId"
]
}
]
}
},
"KMS": {
"Type": "AWS::KMS::Key",
"Properties": {
"Enabled": true,
"EnableKeyRotation": false,
"KeyPolicy": {
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Enable IAM User Permissions",
"Effect": "Allow",
"Principal": {
"AWS": {
"Fn::Join": [
"",
[
"arn:aws:iam::",
{
"Ref": "AWS::AccountId"
},
":root"
]
]
}
},
"Action": "kms:*",
"Resource": "*"
},
{
"Sid": "Allow use of the key",
"Effect": "Allow",
"Principal": {
"AWS": "*"
},
"Action": [
"kms:Sign",
"kms:Verify",
"kms:DescribeKey",
"kms:List*"
],
"Resource": "*"
}
]
}
}
},
"ShopECSService": {
"Type": "AWS::ECS::Service",
"Properties": {
"Cluster": {
"Ref": "Cluster"
},
"ServiceName": {
"Fn::Sub": "${ServiceName}-${Env}"
},
"DesiredCount": 1,
"CapacityProviderStrategy": [
{
"CapacityProvider": "shop-prod-CapacityProvider",
"Weight": 1
}
],
"DeploymentConfiguration": {
"MaximumPercent": 200,
"MinimumHealthyPercent": 100,
"DeploymentCircuitBreaker": {
"Enable": true,
"Rollback": true
}
},
"LoadBalancers": [
{
"ContainerName": "nginx",
"ContainerPort": 80,
"TargetGroupArn": {
"Ref": "TargetGroup"
}
}
],
"NetworkConfiguration": {
"AwsvpcConfiguration": {
"AssignPublicIp": "DISABLED",
"SecurityGroups": [
{
"Fn::GetAtt": [
"SecurityGroup",
"GroupId"
]
}
],
"Subnets": [
{
"Ref": "Subnet1"
},
{
"Ref": "Subnet2"
},
{
"Ref": "Subnet3"
}
]
}
},
"TaskDefinition": {
"Ref": "TaskDefinition"
}
}
},
"Role": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Version": "2008-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "ecs-tasks.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
},
"ManagedPolicyArns": [
"arn:aws:iam::aws:policy/AmazonEC2FullAccess",
"arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy",
"arn:aws:iam::aws:policy/service-role/AWSLambdaSQSQueueExecutionRole",
"arn:aws:iam::aws:policy/SecretsManagerReadWrite"
]
}
},
"TaskDefinition": {
"Type": "AWS::ECS::TaskDefinition",
"Properties": {
"Family": {
"Fn::Sub": "task-${ServiceName}-${Env}"
},
"ExecutionRoleArn": {
"Ref": "Role"
},
"Volumes": [
{
"Name": "efs",
"EFSVolumeConfiguration": {
"FilesystemId": {
"Ref": "Filesystem"
}
}
}
],
"NetworkMode": "awsvpc",
"RequiresCompatibilities": [
"EC2"
],
"ContainerDefinitions": [
{
"Name": "app",
"Image": {
"Fn::Sub": "${Registry}:${Version}-app"
},
"MemoryReservation": "512",
"MountPoints": [
{
"ContainerPath": "/var/www/html/user",
"SourceVolume": "efs"
}
],
"Secrets": [],
"LogConfiguration": {
"LogDriver": "awslogs",
"Options": {
"awslogs-group": {
"Ref": "LogGroup"
},
"awslogs-region": {
"Ref": "AWS::Region"
},
"awslogs-stream-prefix": "ecs"
}
},
"Environment": [],
"Essential": true,
"PortMappings": [
{
"ContainerPort": 9000,
"Protocol": "tcp",
"AppProtocol": "http"
}
]
},
{
"Name": "nginx",
"Image": {
"Fn::Sub": "${Registry}:${Version}-nginx"
},
"MemoryReservation": "512",
"LogConfiguration": {
"LogDriver": "awslogs",
"Options": {
"awslogs-group": {
"Ref": "LogGroup"
},
"awslogs-region": {
"Ref": "AWS::Region"
},
"awslogs-stream-prefix": "ecs"
}
},
"Essential": true,
"PortMappings": [
{
"ContainerPort": 80,
"Protocol": "tcp",
"AppProtocol": "http"
}
]
}
]
}
},
"SecurityGroup": {
"Type": "AWS::EC2::SecurityGroup",
"Properties": {
"GroupDescription": "ECS security group",
"VpcId": {
"Ref": "VPCID"
},
"SecurityGroupIngress": [
{
"IpProtocol": "tcp",
"FromPort": 80,
"ToPort": 9000,
"CidrIp": "0.0.0.0/0"
},
{
"IpProtocol": "tcp",
"FromPort": 9000,
"ToPort": 9000,
"CidrIp": "0.0.0.0/0"
}
]
}
},
"TargetGroup": {
"Type": "AWS::ElasticLoadBalancingV2::TargetGroup",
"Properties": {
"Port": 80,
"Protocol": "HTTP",
"VpcId": {
"Ref": "VPCID"
},
"TargetType": "ip"
}
},
"ListenerRule": {
"Type": "AWS::ElasticLoadBalancingV2::ListenerRule",
"Properties": {
"Priority": 1,
"ListenerArn": {
"Ref": "Listener"
},
"Actions": [
{
"Type": "forward",
"TargetGroupArn": {
"Ref": "TargetGroup"
}
}
],
"Conditions": [
{
"Field": "host-header",
"HostHeaderConfig": {
"Values": [
{
"Ref": "Domain"
}
]
}
}
]
}
}
}
}
If more info is needed I'd be happy to supply it.
The problem we had became apparent as soon as we had not just m4 instances running; An m4 instance can not run multiple tasks on the same port.
At some point we had an m5 instance with multiple tasks running and an m4 with just a single tasks.
Somewhere deep in the documentation of the m4 and the m5 we read the difference and removing the m4 as an option from the launch template did the trick.