Search code examples
amazon-web-serviceskubernetesterraformterraform-provider-awsamazon-eks

Error deploying EKS node-group with terraform


I have a problem deploying with Terraform a node group in an EKS cluster. The error looks like one plugin is having problems but I do not know how to resolve it.

If I see the EC2 in the AWS console (web), I can see the instance of the cluster but I have this error in the cluster.

The error was shown in my pipeline:

Error: waiting for EKS Node Group (UNIR-API-REST-CLUSTER-DEV:node_sping_boot) creation: NodeCreationFailure: Instances failed to join the kubernetes cluster. Resource IDs: [i-05ed58f8101240dc8]
on EKS.tf line 17, in resource "aws_eks_node_group" "nodes":
17: resource "aws_eks_node_group" "nodes"
2020-06-01T00:03:50.576Z [DEBUG] plugin: plugin process exited: path=/home/ubuntu/.jenkins/workspace/shop_infraestucture_generator_pipline/shop-proyect-dev/.terraform/plugins/linux_amd64/terraform-provider-aws_v2.64.0_x4 pid=13475
2020-06-01T00:03:50.576Z [DEBUG] plugin: plugin exited

And the error is printed in AWS console:

Link

This is the code in Terraform I use to create my project:

EKS.tf for creating the cluster and de nodes

resource "aws_eks_cluster" "CLUSTER" {
  name     = "UNIR-API-REST-CLUSTER-${var.SUFFIX}"
  role_arn = "${aws_iam_role.eks_cluster_role.arn}"
  vpc_config {
    subnet_ids = [
      "${aws_subnet.unir_subnet_cluster_1.id}","${aws_subnet.unir_subnet_cluster_2.id}"
    ]
  }
  depends_on = [
    "aws_iam_role_policy_attachment.AmazonEKSWorkerNodePolicy",
    "aws_iam_role_policy_attachment.AmazonEKS_CNI_Policy",
    "aws_iam_role_policy_attachment.AmazonEC2ContainerRegistryReadOnly",
  ]
}


resource "aws_eks_node_group" "nodes" {
  cluster_name    = "${aws_eks_cluster.CLUSTER.name}"
  node_group_name = "node_sping_boot"
  node_role_arn   = "${aws_iam_role.eks_nodes_role.arn}"
  subnet_ids      = [
      "${aws_subnet.unir_subnet_cluster_1.id}","${aws_subnet.unir_subnet_cluster_2.id}"
  ]
  scaling_config {
    desired_size = 1
    max_size     = 5
    min_size     = 1
  }
# instance_types is mediumt3 by default
# Ensure that IAM Role permissions are created before and deleted after EKS Node Group handling.
# Otherwise, EKS will not be able to properly delete EC2 Instances and Elastic Network Interfaces.
  depends_on = [
    "aws_iam_role_policy_attachment.AmazonEKSWorkerNodePolicy",
    "aws_iam_role_policy_attachment.AmazonEKS_CNI_Policy",
    "aws_iam_role_policy_attachment.AmazonEC2ContainerRegistryReadOnly",
  ]
}

output "eks_cluster_endpoint" {
  value = "${aws_eks_cluster.CLUSTER.endpoint}"
}

output "eks_cluster_certificat_authority" {
    value = "${aws_eks_cluster.CLUSTER.certificate_authority}"
}

securityAndGroups.tf

resource "aws_iam_role" "eks_cluster_role" {
  name = "eks-cluster-${var.SUFFIX}"

  assume_role_policy = <<POLICY
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "eks.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}
POLICY
}


resource "aws_iam_role" "eks_nodes_role" {
  name = "eks-node-${var.SUFFIX}"

  assume_role_policy = <<POLICY
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "ec2.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}
POLICY
}


resource "aws_iam_role_policy_attachment" "AmazonEKSClusterPolicy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
  role       = "${aws_iam_role.eks_cluster_role.name}"
}

resource "aws_iam_role_policy_attachment" "AmazonEKSServicePolicy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSServicePolicy"
  role       = "${aws_iam_role.eks_cluster_role.name}"
}

resource "aws_iam_role_policy_attachment" "AmazonEKSWorkerNodePolicy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
  role       = "${aws_iam_role.eks_nodes_role.name}"
}

resource "aws_iam_role_policy_attachment" "AmazonEKS_CNI_Policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
  role       = "${aws_iam_role.eks_nodes_role.name}"
}

resource "aws_iam_role_policy_attachment" "AmazonEC2ContainerRegistryReadOnly" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
  role       = "${aws_iam_role.eks_nodes_role.name}"
}

VPCAndRouting.tf to create my routing, VPC, and Subnets

resource "aws_vpc" "unir_shop_vpc_dev" {
  cidr_block = "${var.NET_CIDR_BLOCK}"
  enable_dns_hostnames = true
  enable_dns_support = true
  tags = {
    Name = "UNIR-VPC-SHOP-${var.SUFFIX}"
    Environment = "${var.SUFFIX}"
  }
}
resource "aws_route_table" "route" {
  vpc_id = "${aws_vpc.unir_shop_vpc_dev.id}"
  route {
    cidr_block = "0.0.0.0/0"
    gateway_id = "${aws_internet_gateway.unir_gat_shop_dev.id}"
  }
  tags = {
    Name = "UNIR-RoutePublic-${var.SUFFIX}"
    Environment = "${var.SUFFIX}"
  }
}

data "aws_availability_zones" "available" {
  state = "available"
}
resource "aws_subnet" "unir_subnet_aplications" {
  vpc_id = "${aws_vpc.unir_shop_vpc_dev.id}"
  cidr_block = "${var.SUBNET_CIDR_APLICATIONS}"
  availability_zone = "${var.ZONE_SUB}"
  depends_on = ["aws_internet_gateway.unir_gat_shop_dev"]
  map_public_ip_on_launch = true
  tags = {
    Name = "UNIR-SUBNET-APLICATIONS-${var.SUFFIX}"
    Environment = "${var.SUFFIX}"
  }
}

resource "aws_subnet" "unir_subnet_cluster_1" {
  vpc_id = "${aws_vpc.unir_shop_vpc_dev.id}"
  cidr_block = "${var.SUBNET_CIDR_CLUSTER_1}"
  map_public_ip_on_launch = true
  availability_zone = "${var.ZONE_SUB_CLUSTER_2}"
  tags = {
    "kubernetes.io/cluster/UNIR-API-REST-CLUSTER-${var.SUFFIX}" = "shared"
  }
}

resource "aws_subnet" "unir_subnet_cluster_2" {
  vpc_id = "${aws_vpc.unir_shop_vpc_dev.id}"
  cidr_block = "${var.SUBNET_CIDR_CLUSTER_2}"
  availability_zone = "${var.ZONE_SUB_CLUSTER_1}"
  map_public_ip_on_launch = true
  tags = {
    "kubernetes.io/cluster/UNIR-API-REST-CLUSTER-${var.SUFFIX}" = "shared"
  }

}

resource "aws_internet_gateway" "unir_gat_shop_dev" {
  vpc_id = "${aws_vpc.unir_shop_vpc_dev.id}"
  tags = {
    Environment = "${var.SUFFIX}"
    Name = "UNIR-publicGateway-${var.SUFFIX}"
  }
}

My variables:

SUFFIX="DEV"
ZONE="eu-west-1"
TERRAFORM_USER_ID=
TERRAFORM_USER_PASS=
ZONE_SUB="eu-west-1b"
ZONE_SUB_CLUSTER_1="eu-west-1a"
ZONE_SUB_CLUSTER_2="eu-west-1c"
NET_CIDR_BLOCK="172.15.0.0/24"
SUBNET_CIDR_APLICATIONS="172.15.0.0/27"
SUBNET_CIDR_CLUSTER_1="172.15.0.32/27"
SUBNET_CIDR_CLUSTER_2="172.15.0.64/27"
SUBNET_CIDR_CLUSTER_3="172.15.0.128/27"
SUBNET_CIDR_CLUSTER_4="172.15.0.160/27"
SUBNET_CIDR_CLUSTER_5="172.15.0.192/27"
SUBNET_CIDR_CLUSTER_6="172.15.0.224/27"
MONGO_SSH_KEY=
KIBANA_SSH_KEY=
CLUSTER_SSH_KEY=

Will be more logs necesary?


Solution

  • According to the AWS documentation:

    If you receive the error "Instances failed to join the kubernetes cluster" in the AWS Management Console, ensure that either the cluster's private endpoint access is enabled, or that you have correctly configured CIDR blocks for public endpoint access. For more information, see Amazon EKS cluster endpoint access control.

    I noticed that you are switching the availability zones for your subnets:

    resource "aws_subnet" "unir_subnet_cluster_1" {
      vpc_id = "${aws_vpc.unir_shop_vpc_dev.id}"
      cidr_block = "${var.SUBNET_CIDR_CLUSTER_1}"
      map_public_ip_on_launch = true
      availability_zone = "${var.ZONE_SUB_CLUSTER_2}"
    

    You have assigned var.ZONE_SUB_CLUSTER_2 to unir_subnet_cluster_1 and var.ZONE_SUB_CLUSTER_1 to unir_subnet_cluster_2. Maybe this could be a cause for the misconfiguration.