skip to Main Content

I have a terraform following code that’s configuring me a gateway service on AWS ECS Fargate. Services that are not under load balancer which are in private network work as expected however gateway with added LB is failing it’s health check and every 2-3 minute is deprovisioning and provisioning new task. Docker file is exposing a service on port 3000.

Here’s a terraform plan that is failing

locals {
  gateway_version = "1.0.0"
  gateway_port    = 3000
}

## VPC

module "vpc" {
  source  = "terraform-aws-modules/vpc/aws"
  version = "3.11.0"

  name        = "${var.env}-vpc"
  cidr        = "20.0.0.0/16"
  enable_ipv6 = true

  azs                 = ["eu-central-1a", "eu-central-1b"]
  public_subnets      = ["20.0.1.0/24", "20.0.2.0/24"]
  private_subnets     = ["20.0.86.0/24", "20.0.172.0/24"]
  elasticache_subnets = ["20.0.31.0/24", "20.0.32.0/24"]

  enable_nat_gateway = true
  single_nat_gateway = true

  tags = {
    Terraform = "true"
  }
}

## Security Groups

module "sg" {
  source  = "terraform-aws-modules/security-group/aws"
  version = "~> 4.0"

  name        = "${var.env}-sg-default"
  description = "Default service security group"
  vpc_id      = module.vpc.vpc_id

  ingress_cidr_blocks = ["0.0.0.0/0"]
  ingress_rules = [
    "all-icmp",
    "http-80-tcp",
    "https-443-tcp",
    "mysql-tcp",
    "rabbitmq-4369-tcp",
    "rabbitmq-5671-tcp",
    "rabbitmq-5672-tcp",
    "rabbitmq-15672-tcp",
    "rabbitmq-25672-tcp",
    "redis-tcp"
  ]
  egress_rules = ["all-all"]
}

module "security_group" {
  source  = "terraform-aws-modules/security-group/aws"
  version = "~> 4.0"

  name        = "${var.env}-sg-lb"
  description = "Security group for ALB"
  vpc_id      = module.vpc.vpc_id

  ingress_cidr_blocks = ["0.0.0.0/0"]
  ingress_rules       = ["http-80-tcp", "all-icmp"]
  egress_rules        = ["all-all"]
}


resource "aws_security_group" "service_security_group" {
  name = "${var.env}-lb-connection"
  ingress {
    from_port = 0
    to_port   = 0
    protocol  = "-1"
    # Only allowing traffic in from the load balancer security group
    security_groups = [module.security_group.security_group_id]
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  vpc_id = module.vpc.vpc_id
}

## ECS Cluster

resource "aws_ecs_cluster" "default" {
  name = "${var.env}-cluster"
}

## ECR

data "aws_ecr_repository" "gateway_ecr" {
  name = "gateway-${var.env}"
}

## ECS Task Definition

resource "aws_ecs_task_definition" "gateway_task" {
  family                   = "${var.env}-gateway-task"
  container_definitions    = <<DEFINITION
  [
    {
      "name": "${var.env}-gateway-task",
      "image": "${data.aws_ecr_repository.gateway_ecr.repository_url}:${local.gateway_version}",
      "networkMode": "awsvpc",
      "essential": true,
      "logConfiguration": {
        "logDriver": "awslogs",
        "options": {
          "awslogs-group": "${aws_cloudwatch_log_group.gateway_logs.name}",
          "awslogs-stream-prefix": "ecs",
          "awslogs-region": "${var.aws-region}"
        }
      },
      "portMappings": [
        {
          "containerPort": ${local.gateway_port},
          "hostPort": ${local.gateway_port}
        }
      ],
      "environment": [
        {
          "name": "AWS_REGION",
          "value": "${var.aws-region}"
        },
        {
          "name": "PORT",
          "value": "${local.gateway_port}"
        },
        {
          "name": "STAGE",
          "value": "${var.env}"
        },
        {
          "name": "NODE_ENV",
          "value": "development"
        },
        {
          "name": "VERSION",
          "value": "${local.gateway_version}"
        }
      ],
      "memory": 512,
      "cpu": 256
    }
  ]
  DEFINITION
  requires_compatibilities = ["FARGATE"]
  network_mode             = "awsvpc"
  memory                   = 512
  cpu                      = 256
  task_role_arn            = aws_iam_role.gateway_task_definition_role.arn
  execution_role_arn       = aws_iam_role.gateway_task_execution_role.arn
}

## ECS Service

resource "aws_ecs_service" "gateway_service" {
  name            = "${var.env}-gateway-service"
  cluster         = aws_ecs_cluster.default.id
  task_definition = aws_ecs_task_definition.gateway_task.arn
  launch_type     = "FARGATE"
  desired_count   = 1

  force_new_deployment = true

  network_configuration {
    subnets = concat(
      module.vpc.public_subnets,
      module.vpc.private_subnets,
    )
    security_groups = [
      module.sg.security_group_id,
      aws_security_group.service_security_group.id
    ]
    assign_public_ip = true
  }

  lifecycle {
    ignore_changes = [desired_count]
  }

  load_balancer {
    target_group_arn = aws_lb_target_group.target_group.arn
    container_name   = aws_ecs_task_definition.gateway_task.family
    container_port   = local.gateway_port
  }
}

## Cloudwatch Log Group

resource "aws_cloudwatch_log_group" "gateway_logs" {
  name = "${var.env}-gateway-log-group"


  tags = {
    Name = "${var.env}-gateway-log-group"
  }
}

## IAM Roles

resource "aws_iam_role" "gateway_task_definition_role" {
  name               = "${var.env}-gateway-task-definition-role"
  assume_role_policy = data.aws_iam_policy_document.gateway_assume_role_policy.json

  tags = {
    Name = "${var.env}-gateway-task-definition-role"
  }
}

resource "aws_iam_role" "gateway_task_execution_role" {
  name               = "${var.env}-gateway-task-execution-role"
  assume_role_policy = data.aws_iam_policy_document.gateway_assume_role_policy.json

  tags = {
    Name = "${var.env}-gateway-task-execution-role"
  }
}

data "aws_iam_policy_document" "gateway_assume_role_policy" {
  statement {
    effect  = "Allow"
    actions = ["sts:AssumeRole"]

    principals {
      type        = "Service"
      identifiers = ["ecs-tasks.amazonaws.com"]
    }
  }
}

resource "aws_iam_role_policy" "gateway_exec" {
  name   = "${var.env}-gateway-execution-role-policy"
  role   = aws_iam_role.gateway_task_execution_role.id
  policy = data.aws_iam_policy_document.gateway_exec_policy.json
}

data "aws_iam_policy_document" "gateway_exec_policy" {
  statement {
    effect    = "Allow"
    resources = ["*"]

    actions = [
      "ecr:GetAuthorizationToken",
      "ecr:BatchCheckLayerAvailability",
      "ecr:GetDownloadUrlForLayer",
      "ecr:BatchGetImage",
      "logs:CreateLogStream",
      "logs:PutLogEvents",
    ]
  }
}

## ALB

resource "aws_lb" "alb" {
  name               = "${var.env}-lb"
  load_balancer_type = "application"
  subnets            = module.vpc.public_subnets
  security_groups    = [module.security_group.security_group_id]
}

resource "aws_lb_target_group" "target_group" {
  name        = "target-group"
  port        = 80
  protocol    = "HTTP"
  target_type = "ip"
  vpc_id      = module.vpc.vpc_id

  health_check {
    matcher = "200,301,302"
    path    = "/health"
    interval = 120
    timeout = 30
  }
}

resource "aws_lb_listener" "listener" {
  load_balancer_arn = aws_alb.alb.arn
  port              = 80
  protocol          = "HTTP"
  default_action {
    type             = "forward"
    target_group_arn = aws_lb_target_group.target_group.arn
  }
}

That’s the error

Task failed ELB health checks in (target-group arn:aws:elasticloadbalancing:eu-central-1:129228585726:targetgroup/target-group/5853904c0d3ad322)

After it’s deployed I see that a ECS service is started and it’s working there however I don’t see any requests to check it’s health

3

Answers


  1. Chosen as BEST ANSWER

    Target group was not an issue -> the issue was wrong security_group which didn't allowed to hit port 3000


  2. Your target group uses port = 80, but your ECS task definition specifies port 3000. So this is likely reason why your ALB can’t connect to your containers.

    Login or Signup to reply.
  3. The load balancer tries to check if it is able to reach the application on the specified target port. In your case it is 3000.

    Replace your target group resource to use the application port for LB healthchecks to pass.

    resource "aws_lb_target_group" "target_group" {
      name        = "target-group"
      port        = 3000
      protocol    = "HTTP"
      target_type = "ip"
      vpc_id      = module.vpc.vpc_id
    
      health_check {
        matcher = "200,301,302"
        path    = "/health"
        interval = 120
        timeout = 30
      }
    }
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search