skip to Main Content

I am trying to start using fargate but my terraform setup seems to be wrong since the public DNS of the ALB returns 503 Service Temporarily Unavailable. The task never get started and health status is unknown.

Infra setup

Public ALB->ECS service in private subnets with Fargate

I have also created necessary vpc endpoints

NAT gateway : 1 per AZ

ECS

module "ecs_service" {
  source = "terraform-aws-modules/ecs/aws//modules/service"

  name = var.ecs_service_name
  cluster_arn = module.ecs_cluster.arn

  runtime_platform = {
    operating_system_family = "LINUX"
    cpu_architecture        = "X86_64"
  }

  cpu    = 1024
  memory = 4096

  # Container definition(s)
  container_definitions = {

    ecs-sample = {
      cpu       = 512
      memory    = 1024
      essential = true
      image     = "public.ecr.aws/aws-containers/ecsdemo-frontend:latest"
      port_mappings = [
        {
          name          = var.ecs_service_name
          containerPort = var.ecs_service_port
          hostPort      = var.ecs_service_port
          protocol      = "tcp"
        }
      ]
    }
  }

  subnet_ids = module.vpc.private_subnets
  security_group_ids = [module.ecs-container-sg.security_group_id]

  autoscaling_min_capacity = 1
  autoscaling_max_capacity = 5
  
  load_balancer = {
    service = {
      target_group_arn = element(module.alb.target_group_arns, 0)
      container_name   = var.ecs_service_name
      container_port   = var.ecs_service_port
    }
  } 

  create_security_group = false

  depends_on = [
   module.ecs-container-sg,
   module.alb
 ]
}

vars.tf

ecs_service_name = "ecs-sample" 
ecs_service_port = 3000

ALB

 module "alb" {
  source  = "terraform-aws-modules/alb/aws"
  version = "~> 8.0"

  name = "${var.project_name}-alb"

  load_balancer_type = "application"

  vpc_id          = module.vpc.vpc_id
  subnets         = module.vpc.public_subnets
  security_groups = [module.alb-webtraffic-sg.security_group_id]

  http_tcp_listeners = [
    {
      port               = 80
      protocol           = "HTTP"
      target_group_index = 0
    }
  ]

 target_groups = [
    {
      name                       = "${var.ecs_service_name}-blue-tg"
      backend_protocol                  = "HTTP"
      backend_port                      = var.ecs_service_port
      target_type                       = "ip"
      deregistration_delay              = 10
      load_balancing_cross_zone_enabled = false
      health_check = {
        enabled             = true
        interval            = 5
        path                = "/"
        port                = var.ecs_service_port
        healthy_threshold   = 2
        unhealthy_threshold = 3
        timeout             = 3
        protocol            = "HTTP"
        matcher             = "200"
      }
    }
  ]

} 

Security Groups

module "alb-webtraffic-sg" {
  source  = "terraform-aws-modules/security-group/aws"
  version = "~> 4.0"

  name        = "${var.project_name}-alb-webtraffic-sg"
  description = "Allow Inbound traffic port 80 & 443 from anywhere"
  vpc_id      = module.vpc.vpc_id

  ingress_cidr_blocks = ["0.0.0.0/0"]
  ingress_rules       = ["http-80-tcp", "https-443-tcp"]
  egress_rules        = ["all-all"]

  tags = {
    Name = "${var.project_name}-sg-webtraffic"
  }
}

module "ecs-container-sg" {
  source  = "terraform-aws-modules/security-group/aws"
  version = "~> 4.0"

  name        = "${var.project_name}-ecs-container-sg"
  description = "Allow Inbound traffic from ALB Security Group"
  vpc_id      = module.vpc.vpc_id

  ingress_with_source_security_group_id = [
    {
      description              = "Allow Inbound traffic from ALB Security Group"
      rule                     = "all-tcp"
      source_security_group_id = module.alb-webtraffic-sg.security_group_id
    }
  ]

  tags = {
    Name = "${var.project_name}-sg-webtraffic"
  }
}

module "vpc-sg" {
  source  = "terraform-aws-modules/security-group/aws"
  version = "~> 4.0"

  name        = "vpc-sg"
  description = "Allow all traffic within the VPC"
  vpc_id      = module.vpc.vpc_id

  ingress_cidr_blocks = [var.main_vpc_prefix]
  ingress_rules       = ["all-all"]
  egress_rules        = ["all-all"]

  tags = {
    Name = "${var.project_name}-vpc-sg"
  }
}

VPC endpoints

module "endpoints" {
  source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints"

  vpc_id             = module.vpc.vpc_id
  security_group_ids = [module.vpc-sg.security_group_id]

  endpoints = {
    dynamodb = {
      service         = "dynamodb",
      service_type    = "Gateway"
      route_table_ids = module.vpc.private_route_table_ids
      tags            = { Name = "dynamodb-vpc-endpoint" }
    },
    s3 = {
      service         = "s3",
      service_type    = "Gateway"
      route_table_ids = module.vpc.private_route_table_ids
      tags            = { Name = "s3-vpc-endpoint" }
    },
    ecr = {
      # interface endpoint
      service_name        = "com.amazonaws.${data.aws_region.current.name}.ecr.api"
      subnet_ids          = module.vpc.private_subnets
      private_dns_enabled = true
      tags                = { Name = "logs-vpc-endpoint" }
    },
    dkr = {
      # interface endpoint
      service_name        = "com.amazonaws.${data.aws_region.current.name}.ecr.dkr"
      subnet_ids          = module.vpc.private_subnets
      private_dns_enabled = true
      tags                = { Name = "logs-vpc-endpoint" }
    },
    logs = {
      # interface endpoint
      service_name        = "com.amazonaws.${data.aws_region.current.name}.logs"
      subnet_ids          = module.vpc.private_subnets
      private_dns_enabled = true
      tags                = { Name = "logs-vpc-endpoint" }
    }
  }
}

I don’t see any logs being generated in cloudwatch either

Edit
It seems that the Task have been stopped for the following reason:

CannotPullContainerError: pull image manifest has been retried 5 time(s): failed to resolve ref public.ecr.aws/aws-containers/ecsdemo-frontend:latest: failed to do request: Head "https://public.ecr.aws/v2/aws-containers/ecsdemo-frontend/manifests/latest": dial tcp 99.83.145.10:443: i/o timeout

I do have a NAT gateway so it should work as expected.

module "vpc" {
  source = "terraform-aws-modules/vpc/aws"

  name = var.main_vpc_name
  cidr = var.main_vpc_prefix

  azs                 = var.availability_zones
  private_subnets     = var.private_subnets
  public_subnets      = var.public_subnets
  database_subnets    = var.database_subnets
  elasticache_subnets = var.elasticache_subnets

  enable_nat_gateway     = true
  single_nat_gateway     = false
  one_nat_gateway_per_az = true


  enable_vpn_gateway   = false
  enable_dns_support   = true
  enable_dns_hostnames = true

  create_database_subnet_group           = true
  create_database_subnet_route_table     = true
  create_database_internet_gateway_route = true
}

Edit 2

I have added the extra permission to the ecs task execution role but pulling the ecr image still fails

resource "aws_iam_role_policy_attachment" "task_exec_ecs" {
  role       = "${module.ecs_service.task_exec_iam_role_name}"
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}

2

Answers


  1. Make sure that your ecsTaskExecutionRole has the correct permissions for ECR.

    Here’s a good reference: Amazon ECS task execution IAM role

    There is a managed policy AmazonECSTaskExecutionRolePolicy that you may attach to your role.

    Login or Signup to reply.
  2. I ran into this problem when I needed a static IP for a Fargate container and I finally managed to solve it.

    When you put a Fargate on a private subnet (I had to do this so that ECS would not automatically assign a dynamic IP), it does not have internet access, in order to pull an image from the ECR repository you need to have internet access from the network.

    So I created a NAT Gateway (which will provide internet service) and assigned it to the Fargate container VPC and subnet using the route table with destiny 0.0.0.0/0 like in the image.

    Note. Only one nat gateway is needed

    I hope it helps you.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search