I currently have a web-app that I can run locally using docker. I need to pass the AWS credentials at runtime for it to run docker run --env-file=.env -p 8501:8501 web-app
. The web-app saves a file to S3 and also reads data from an RDS database.
Now I'm trying to deploy it on ECS with Fargate through terraform. I've been following the instructions in this article: https://medium.com/avmconsulting-blog/how-to-deploy-a-dockerised-node-js-application-on-aws-ecs-with-terraform-3e6bceb48785 I've replaced the port from 3000 to 8501 to match my app's port and also replaced the region to match mine.
However, it does not work. The health checks fail and the containers keep draining. I don't know how to debug it. Do I need to somehow pass my AWS credentials on the web-app for it to run as it did when running docker locally?
Docker file:
FROM python:3.10
WORKDIR /Fenix
ARG POETRY_VERSION=1.3.1
RUN curl -sSL https://install.python-poetry.org | python - --version $POETRY_VERSION
ENV PATH /root/.local/bin:$PATH
RUN poetry --version
RUN poetry config virtualenvs.create false
COPY pyproject.toml poetry.lock ./
RUN poetry install --no-root --no-interaction --no-ansi
COPY . .
EXPOSE 8501
CMD ["streamlit", "run", "app/main.py"]
terraform iam.tf:
resource "aws_iam_role" "ecsTaskExecutionRole" {
name = "ecsTaskExecutionRole"
assume_role_policy = "${data.aws_iam_policy_document.assume_role_policy.json}"
}
data "aws_iam_policy_document" "assume_role_policy" {
statement {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}
resource "aws_iam_role_policy_attachment" "ecsTaskExecutionRole_policy" {
role = "${aws_iam_role.ecsTaskExecutionRole.name}"
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
resource "aws_iam_role" "task_role" {
name = "example-task-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
}
]
})
}
resource "aws_iam_policy" "task_policy" {
name = "example-task-policy"
description = "Policy to allow access to Secrets Manager, S3, and RDS"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Sid = "SecretsManagerAccess"
Effect = "Allow"
Action = [
"secretsmanager:GetSecretValue"
]
Resource = "*"
},
{
Sid = "S3Access"
Effect = "Allow"
Action = [
"s3:GetObject",
"s3:PutObject"
]
Resource = "*"
},
{
Sid = "RDSAccess"
Effect = "Allow"
Action = [
"rds:DescribeDBInstances",
"rds:DescribeDBClusters",
"rds-data:ExecuteStatement",
]
Resource = "*"
}
]
})
}
resource "aws_iam_role_policy_attachment" "task_policy_attachment" {
policy_arn = aws_iam_policy.task_policy.arn
role = aws_iam_role.task_role.name
}
terraform deploy.tf:
resource "aws_ecr_repository" "fenix_ecr_repo" {
name = "fenix-ecr-repo" # Naming my repository
}
resource "aws_ecs_cluster" "fenix_cluster" {
name = "fenix-cluster" # Naming the cluster
}
# Providing a reference to our default VPC
resource "aws_default_vpc" "default_vpc" {
}
# Providing a reference to our default subnets
resource "aws_default_subnet" "default_subnet_a" {
availability_zone = "eu-north-1a"
}
resource "aws_default_subnet" "default_subnet_b" {
availability_zone = "eu-north-1b"
}
resource "aws_ecs_task_definition" "fenix_task" {
family = "my-first-task" # Naming our first task
container_definitions = <<DEFINITION
[
{
"name": "my-first-task",
"image": "${aws_ecr_repository.fenix_ecr_repo.repository_url}:${var.image_tag}",
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-region": <aws-region>,
"awslogs-group": "fenix_deployment",
"awslogs-stream-prefix": "fenix-container"
}
},
"essential": true,
"portMappings": [
{
"containerPort": 8501,
"hostPort": 8501
}
],
"memory": 512,
"cpu": 256
}
]
DEFINITION
requires_compatibilities = ["FARGATE"] # Stating that we are using ECS Fargate
network_mode = "awsvpc" # Using awsvpc as our network mode as this is required for Fargate
memory = 512 # Specifying the memory our container requires
cpu = 256 # Specifying the CPU our container requires
execution_role_arn = "${aws_iam_role.ecsTaskExecutionRole.arn}"
task_role_arn = "${aws_iam_role.task_role.arn}"
}
resource "aws_alb" "application_load_balancer" {
name = "test-lb-tf" # Naming our load balancer
load_balancer_type = "application"
subnets = [ # Referencing the default subnets
"${aws_default_subnet.default_subnet_a.id}",
"${aws_default_subnet.default_subnet_b.id}",
]
# Referencing the security group
security_groups = ["${aws_security_group.load_balancer_security_group.id}"]
}
# Creating a security group for the load balancer:
resource "aws_security_group" "load_balancer_security_group" {
ingress {
from_port = 80 # Allowing traffic in from port 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"] # Allowing traffic in from all sources
}
egress {
from_port = 0 # Allowing any incoming port
to_port = 0 # Allowing any outgoing port
protocol = "-1" # Allowing any outgoing protocol
cidr_blocks = ["0.0.0.0/0"] # Allowing traffic out to all IP addresses
}
}
resource "aws_ecs_service" "fenix_service" {
name = "fenix-service" # Naming our first service
cluster = "${aws_ecs_cluster.fenix_cluster.id}" # Referencing our created Cluster
task_definition = "${aws_ecs_task_definition.fenix_task.arn}" # Referencing the task our service will spin up
launch_type = "FARGATE"
desired_count = 2 # Setting the number of containers we want deployed to 3
load_balancer {
target_group_arn = "${aws_lb_target_group.target_group.arn}" # Referencing our target group
container_name = "${aws_ecs_task_definition.fenix_task.family}"
container_port = 8501 # Specifying the container port
}
network_configuration {
subnets = ["${aws_default_subnet.default_subnet_a.id}", "${aws_default_subnet.default_subnet_b.id}"]
assign_public_ip = true # Providing our containers with public IPs
security_groups = ["${aws_security_group.service_security_group.id}"] # Setting the security group
}
}
resource "aws_lb_target_group" "target_group" {
name = "target-group"
port = 80
protocol = "HTTP"
target_type = "ip"
vpc_id = "${aws_default_vpc.default_vpc.id}" # Referencing the default VPC
health_check {
matcher = "200,301,302"
path = "/"
}
}
resource "aws_lb_listener" "listener" {
load_balancer_arn = "${aws_alb.application_load_balancer.arn}" # Referencing our load balancer
port = "80"
protocol = "HTTP"
default_action {
type = "forward"
target_group_arn = "${aws_lb_target_group.target_group.arn}" # Referencing our tagrte group
}
}
resource "aws_security_group" "service_security_group" {
ingress {
from_port = 0
to_port = 0
protocol = "-1"
# Only allowing traffic in from the load balancer security group
security_groups = ["${aws_security_group.load_balancer_security_group.id}"]
}
egress {
from_port = 0 # Allowing any incoming port
to_port = 0 # Allowing any outgoing port
protocol = "-1" # Allowing any outgoing protocol
cidr_blocks = ["0.0.0.0/0"]
}
}
UPDATE: Following Mark B's suggestion I added a task_role_arn and logConfiguration to the ecs_task_definition. Now I can see from the logs that the containers fail with the error:
exec /usr/local/bin/streamlit: exec format error
The execution_role_arn
gives permissions to the ECS service to access your AWS account to perform certain things that it needs to do to deploy your Fargate task.
The task_role_arn
gives permissions to the code running in your container to access your AWS account.
You are currently missing the task_role_arn
so your code does not have permission to do what it needs to do, such as access S3.
Also note that you are having trouble debugging this because you are not saving your Fargate container's logs anywhere. You need to add a logConfiguration
to your container_definition
that sends the container logs to CloudWatch Logs so you can see the error messages being reported in your container.