I am trying to start using fargate but my terraform setup seems to be wrong since the public DNS of the ALB returns 503 Service Temporarily Unavailable
. The task never get started and health status is unknown.
Infra setup
Public ALB->ECS service in private subnets with Fargate
I have also created necessary vpc endpoints
NAT gateway : 1 per AZ
ECS
module "ecs_service" {
source = "terraform-aws-modules/ecs/aws//modules/service"
name = var.ecs_service_name
cluster_arn = module.ecs_cluster.arn
runtime_platform = {
operating_system_family = "LINUX"
cpu_architecture = "X86_64"
}
cpu = 1024
memory = 4096
# Container definition(s)
container_definitions = {
ecs-sample = {
cpu = 512
memory = 1024
essential = true
image = "public.ecr.aws/aws-containers/ecsdemo-frontend:latest"
port_mappings = [
{
name = var.ecs_service_name
containerPort = var.ecs_service_port
hostPort = var.ecs_service_port
protocol = "tcp"
}
]
}
}
subnet_ids = module.vpc.private_subnets
security_group_ids = [module.ecs-container-sg.security_group_id]
autoscaling_min_capacity = 1
autoscaling_max_capacity = 5
load_balancer = {
service = {
target_group_arn = element(module.alb.target_group_arns, 0)
container_name = var.ecs_service_name
container_port = var.ecs_service_port
}
}
create_security_group = false
depends_on = [
module.ecs-container-sg,
module.alb
]
}
vars.tf
ecs_service_name = "ecs-sample"
ecs_service_port = 3000
ALB
module "alb" {
source = "terraform-aws-modules/alb/aws"
version = "~> 8.0"
name = "${var.project_name}-alb"
load_balancer_type = "application"
vpc_id = module.vpc.vpc_id
subnets = module.vpc.public_subnets
security_groups = [module.alb-webtraffic-sg.security_group_id]
http_tcp_listeners = [
{
port = 80
protocol = "HTTP"
target_group_index = 0
}
]
target_groups = [
{
name = "${var.ecs_service_name}-blue-tg"
backend_protocol = "HTTP"
backend_port = var.ecs_service_port
target_type = "ip"
deregistration_delay = 10
load_balancing_cross_zone_enabled = false
health_check = {
enabled = true
interval = 5
path = "/"
port = var.ecs_service_port
healthy_threshold = 2
unhealthy_threshold = 3
timeout = 3
protocol = "HTTP"
matcher = "200"
}
}
]
}
Security Groups
module "alb-webtraffic-sg" {
source = "terraform-aws-modules/security-group/aws"
version = "~> 4.0"
name = "${var.project_name}-alb-webtraffic-sg"
description = "Allow Inbound traffic port 80 & 443 from anywhere"
vpc_id = module.vpc.vpc_id
ingress_cidr_blocks = ["0.0.0.0/0"]
ingress_rules = ["http-80-tcp", "https-443-tcp"]
egress_rules = ["all-all"]
tags = {
Name = "${var.project_name}-sg-webtraffic"
}
}
module "ecs-container-sg" {
source = "terraform-aws-modules/security-group/aws"
version = "~> 4.0"
name = "${var.project_name}-ecs-container-sg"
description = "Allow Inbound traffic from ALB Security Group"
vpc_id = module.vpc.vpc_id
ingress_with_source_security_group_id = [
{
description = "Allow Inbound traffic from ALB Security Group"
rule = "all-tcp"
source_security_group_id = module.alb-webtraffic-sg.security_group_id
}
]
tags = {
Name = "${var.project_name}-sg-webtraffic"
}
}
module "vpc-sg" {
source = "terraform-aws-modules/security-group/aws"
version = "~> 4.0"
name = "vpc-sg"
description = "Allow all traffic within the VPC"
vpc_id = module.vpc.vpc_id
ingress_cidr_blocks = [var.main_vpc_prefix]
ingress_rules = ["all-all"]
egress_rules = ["all-all"]
tags = {
Name = "${var.project_name}-vpc-sg"
}
}
VPC endpoints
module "endpoints" {
source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints"
vpc_id = module.vpc.vpc_id
security_group_ids = [module.vpc-sg.security_group_id]
endpoints = {
dynamodb = {
service = "dynamodb",
service_type = "Gateway"
route_table_ids = module.vpc.private_route_table_ids
tags = { Name = "dynamodb-vpc-endpoint" }
},
s3 = {
service = "s3",
service_type = "Gateway"
route_table_ids = module.vpc.private_route_table_ids
tags = { Name = "s3-vpc-endpoint" }
},
ecr = {
# interface endpoint
service_name = "com.amazonaws.${data.aws_region.current.name}.ecr.api"
subnet_ids = module.vpc.private_subnets
private_dns_enabled = true
tags = { Name = "logs-vpc-endpoint" }
},
dkr = {
# interface endpoint
service_name = "com.amazonaws.${data.aws_region.current.name}.ecr.dkr"
subnet_ids = module.vpc.private_subnets
private_dns_enabled = true
tags = { Name = "logs-vpc-endpoint" }
},
logs = {
# interface endpoint
service_name = "com.amazonaws.${data.aws_region.current.name}.logs"
subnet_ids = module.vpc.private_subnets
private_dns_enabled = true
tags = { Name = "logs-vpc-endpoint" }
}
}
}
I don’t see any logs being generated in cloudwatch either
Edit
It seems that the Task have been stopped for the following reason:
CannotPullContainerError: pull image manifest has been retried 5 time(s): failed to resolve ref public.ecr.aws/aws-containers/ecsdemo-frontend:latest: failed to do request: Head "https://public.ecr.aws/v2/aws-containers/ecsdemo-frontend/manifests/latest": dial tcp 99.83.145.10:443: i/o timeout
I do have a NAT gateway so it should work as expected.
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
name = var.main_vpc_name
cidr = var.main_vpc_prefix
azs = var.availability_zones
private_subnets = var.private_subnets
public_subnets = var.public_subnets
database_subnets = var.database_subnets
elasticache_subnets = var.elasticache_subnets
enable_nat_gateway = true
single_nat_gateway = false
one_nat_gateway_per_az = true
enable_vpn_gateway = false
enable_dns_support = true
enable_dns_hostnames = true
create_database_subnet_group = true
create_database_subnet_route_table = true
create_database_internet_gateway_route = true
}
Edit 2
I have added the extra permission to the ecs task execution role but pulling the ecr image still fails
resource "aws_iam_role_policy_attachment" "task_exec_ecs" {
role = "${module.ecs_service.task_exec_iam_role_name}"
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
2
Answers
Make sure that your
ecsTaskExecutionRole
has the correct permissions for ECR.Here’s a good reference: Amazon ECS task execution IAM role
There is a managed policy
AmazonECSTaskExecutionRolePolicy
that you may attach to your role.I ran into this problem when I needed a static IP for a Fargate container and I finally managed to solve it.
When you put a Fargate on a private subnet (I had to do this so that ECS would not automatically assign a dynamic IP), it does not have internet access, in order to pull an image from the ECR repository you need to have internet access from the network.
So I created a NAT Gateway (which will provide internet service) and assigned it to the Fargate container VPC and subnet using the route table with destiny 0.0.0.0/0 like in the image.
Note. Only one nat gateway is needed
I hope it helps you.