Search code examples
amazon-web-servicesdockerterraformamazon-ecsaws-fargate

Deploying dockerised web-app on AWS with Fargate with terraform


I currently have a web-app that I can run locally using docker. I need to pass the AWS credentials at runtime for it to run docker run --env-file=.env -p 8501:8501 web-app. The web-app saves a file to S3 and also reads data from an RDS database.

Now I'm trying to deploy it on ECS with Fargate through terraform. I've been following the instructions in this article: https://medium.com/avmconsulting-blog/how-to-deploy-a-dockerised-node-js-application-on-aws-ecs-with-terraform-3e6bceb48785 I've replaced the port from 3000 to 8501 to match my app's port and also replaced the region to match mine.

However, it does not work. The health checks fail and the containers keep draining. I don't know how to debug it. Do I need to somehow pass my AWS credentials on the web-app for it to run as it did when running docker locally?

Docker file:

FROM python:3.10

WORKDIR /Fenix

ARG POETRY_VERSION=1.3.1

RUN curl -sSL https://install.python-poetry.org | python - --version $POETRY_VERSION
ENV PATH /root/.local/bin:$PATH

RUN poetry --version

RUN poetry config virtualenvs.create false

COPY pyproject.toml poetry.lock ./

RUN poetry install --no-root --no-interaction --no-ansi

COPY . .

EXPOSE 8501

CMD ["streamlit", "run", "app/main.py"]

terraform iam.tf:

resource "aws_iam_role" "ecsTaskExecutionRole" {
  name               = "ecsTaskExecutionRole"
  assume_role_policy = "${data.aws_iam_policy_document.assume_role_policy.json}"
}

data "aws_iam_policy_document" "assume_role_policy" {
  statement {
    actions = ["sts:AssumeRole"]

    principals {
      type        = "Service"
      identifiers = ["ecs-tasks.amazonaws.com"]
    }
  }
}

resource "aws_iam_role_policy_attachment" "ecsTaskExecutionRole_policy" {
  role       = "${aws_iam_role.ecsTaskExecutionRole.name}"
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}

resource "aws_iam_role" "task_role" {
  name = "example-task-role"

  assume_role_policy = jsonencode({
    Version   = "2012-10-17"
    Statement = [
      {
        Action    = "sts:AssumeRole"
        Effect    = "Allow"
        Principal = {
          Service = "ecs-tasks.amazonaws.com"
        }
      }
    ]
  })
}

resource "aws_iam_policy" "task_policy" {
  name        = "example-task-policy"
  description = "Policy to allow access to Secrets Manager, S3, and RDS"

  policy = jsonencode({
    Version   = "2012-10-17"
    Statement = [
      {
        Sid       = "SecretsManagerAccess"
        Effect    = "Allow"
        Action    = [
          "secretsmanager:GetSecretValue"
        ]
        Resource  = "*"
      },
      {
        Sid       = "S3Access"
        Effect    = "Allow"
        Action    = [
          "s3:GetObject",
          "s3:PutObject"
        ]
        Resource  = "*"
      },
      {
        Sid       = "RDSAccess"
        Effect    = "Allow"
        Action    = [
          "rds:DescribeDBInstances",
          "rds:DescribeDBClusters",
          "rds-data:ExecuteStatement",
        ]
        Resource  = "*"
      }
    ]
  })
}


resource "aws_iam_role_policy_attachment" "task_policy_attachment" {
  policy_arn = aws_iam_policy.task_policy.arn
  role       = aws_iam_role.task_role.name
}

terraform deploy.tf:

resource "aws_ecr_repository" "fenix_ecr_repo" {
  name = "fenix-ecr-repo" # Naming my repository
}

resource "aws_ecs_cluster" "fenix_cluster" {
  name = "fenix-cluster" # Naming the cluster
}

# Providing a reference to our default VPC
resource "aws_default_vpc" "default_vpc" {
}

# Providing a reference to our default subnets
resource "aws_default_subnet" "default_subnet_a" {
  availability_zone = "eu-north-1a"
}

resource "aws_default_subnet" "default_subnet_b" {
  availability_zone = "eu-north-1b"
}

resource "aws_ecs_task_definition" "fenix_task" {
  family                   = "my-first-task" # Naming our first task
  container_definitions    = <<DEFINITION
  [
    {
      "name": "my-first-task",
      "image": "${aws_ecr_repository.fenix_ecr_repo.repository_url}:${var.image_tag}",
      "logConfiguration": {
        "logDriver": "awslogs",
        "options": {
          "awslogs-region": <aws-region>,
          "awslogs-group": "fenix_deployment",
          "awslogs-stream-prefix": "fenix-container"
        }
      },
      "essential": true,
      "portMappings": [
        {
          "containerPort": 8501,
          "hostPort": 8501
        }
      ],
      "memory": 512,
      "cpu": 256
    }
  ]
  DEFINITION
  requires_compatibilities = ["FARGATE"] # Stating that we are using ECS Fargate
  network_mode             = "awsvpc"    # Using awsvpc as our network mode as this is required for Fargate
  memory                   = 512         # Specifying the memory our container requires
  cpu                      = 256         # Specifying the CPU our container requires
  execution_role_arn       = "${aws_iam_role.ecsTaskExecutionRole.arn}"
  task_role_arn            = "${aws_iam_role.task_role.arn}"
}

resource "aws_alb" "application_load_balancer" {
  name               = "test-lb-tf" # Naming our load balancer
  load_balancer_type = "application"
  subnets = [ # Referencing the default subnets
    "${aws_default_subnet.default_subnet_a.id}",
    "${aws_default_subnet.default_subnet_b.id}",
  ]
  # Referencing the security group
  security_groups = ["${aws_security_group.load_balancer_security_group.id}"]
}

# Creating a security group for the load balancer:
resource "aws_security_group" "load_balancer_security_group" {
  ingress {
    from_port   = 80 # Allowing traffic in from port 80
    to_port     = 80
    protocol    = "tcp"
    cidr_blocks = ["0.0.0.0/0"] # Allowing traffic in from all sources
  }

  egress {
    from_port   = 0 # Allowing any incoming port
    to_port     = 0 # Allowing any outgoing port
    protocol    = "-1" # Allowing any outgoing protocol
    cidr_blocks = ["0.0.0.0/0"] # Allowing traffic out to all IP addresses
  }
}

resource "aws_ecs_service" "fenix_service" {
  name            = "fenix-service"                             # Naming our first service
  cluster         = "${aws_ecs_cluster.fenix_cluster.id}"             # Referencing our created Cluster
  task_definition = "${aws_ecs_task_definition.fenix_task.arn}" # Referencing the task our service will spin up
  launch_type     = "FARGATE"
  desired_count   = 2 # Setting the number of containers we want deployed to 3

  load_balancer {
  target_group_arn = "${aws_lb_target_group.target_group.arn}" # Referencing our target group
  container_name   = "${aws_ecs_task_definition.fenix_task.family}"
  container_port   = 8501 # Specifying the container port
  }

  network_configuration {
  subnets          = ["${aws_default_subnet.default_subnet_a.id}", "${aws_default_subnet.default_subnet_b.id}"]
  assign_public_ip = true # Providing our containers with public IPs
  security_groups  = ["${aws_security_group.service_security_group.id}"] # Setting the security group

  }
}

resource "aws_lb_target_group" "target_group" {
  name        = "target-group"
  port        = 80
  protocol    = "HTTP"
  target_type = "ip"
  vpc_id      = "${aws_default_vpc.default_vpc.id}" # Referencing the default VPC
  health_check {
    matcher = "200,301,302"
    path = "/"
  }
}

resource "aws_lb_listener" "listener" {
  load_balancer_arn = "${aws_alb.application_load_balancer.arn}" # Referencing our load balancer
  port              = "80"
  protocol          = "HTTP"
  default_action {
    type             = "forward"
    target_group_arn = "${aws_lb_target_group.target_group.arn}" # Referencing our tagrte group
  }
}

resource "aws_security_group" "service_security_group" {
  ingress {
    from_port = 0
    to_port   = 0
    protocol  = "-1"
    # Only allowing traffic in from the load balancer security group
    security_groups = ["${aws_security_group.load_balancer_security_group.id}"]
  }

  egress {
    from_port   = 0 # Allowing any incoming port
    to_port     = 0 # Allowing any outgoing port
    protocol    = "-1" # Allowing any outgoing protocol
    cidr_blocks = ["0.0.0.0/0"]
  }
}

UPDATE: Following Mark B's suggestion I added a task_role_arn and logConfiguration to the ecs_task_definition. Now I can see from the logs that the containers fail with the error:

exec /usr/local/bin/streamlit: exec format error

Solution

  • The execution_role_arn gives permissions to the ECS service to access your AWS account to perform certain things that it needs to do to deploy your Fargate task.

    The task_role_arn gives permissions to the code running in your container to access your AWS account.

    You are currently missing the task_role_arn so your code does not have permission to do what it needs to do, such as access S3.


    Also note that you are having trouble debugging this because you are not saving your Fargate container's logs anywhere. You need to add a logConfiguration to your container_definition that sends the container logs to CloudWatch Logs so you can see the error messages being reported in your container.