skip to Main Content

I’m trying to deploy an EKS self managed with Terraform. While I can deploy the cluster with addons, vpc, subnet and all other resources, it always fails at helm:

Error: Kubernetes cluster unreachable: the server has asked for the client to provide credentials
with module.eks-ssp-kubernetes-addons.module.ingress_nginx[0].helm_release.nginx[0]
on .terraform/modules/eks-ssp-kubernetes-addons/modules/kubernetes-addons/ingress-nginx/main.tf line 19, in resource "helm_release" "nginx":
resource "helm_release" "nginx" {

This error repeats for metrics_server, lb_ingress, argocd, but cluster-autoscaler throws:

Warning: Helm release "cluster-autoscaler" was created but has a failed status.
with module.eks-ssp-kubernetes-addons.module.cluster_autoscaler[0].helm_release.cluster_autoscaler[0]
on .terraform/modules/eks-ssp-kubernetes-addons/modules/kubernetes-addons/cluster-autoscaler/main.tf line 1, in resource "helm_release" "cluster_autoscaler":
resource "helm_release" "cluster_autoscaler" {

My main.tf looks like this:

terraform {

  backend "remote" {}

  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = ">= 3.66.0"
    }
    kubernetes = {
      source  = "hashicorp/kubernetes"
      version = ">= 2.7.1"
    }
    helm = {
      source  = "hashicorp/helm"
      version = ">= 2.4.1"
    }
  }
}

data "aws_eks_cluster" "cluster" {
  name = module.eks-ssp.eks_cluster_id
}

data "aws_eks_cluster_auth" "cluster" {
  name = module.eks-ssp.eks_cluster_id
}

provider "aws" {
  access_key = "xxx"
  secret_key = "xxx"
  region     = "xxx"
  assume_role {
    role_arn = "xxx"
  }
}

provider "kubernetes" {
  host                   = data.aws_eks_cluster.cluster.endpoint
  cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data)
  token                  = data.aws_eks_cluster_auth.cluster.token
}

provider "helm" {
  kubernetes {
    host                   = data.aws_eks_cluster.cluster.endpoint
    token                  = data.aws_eks_cluster_auth.cluster.token
    cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data)
  }
}

My eks.tf looks like this:

module "eks-ssp" {
    source = "github.com/aws-samples/aws-eks-accelerator-for-terraform"

    # EKS CLUSTER
    tenant            = "DevOpsLabs2b"
    environment       = "dev-test"
    zone              = ""
    terraform_version = "Terraform v1.1.4"

    # EKS Cluster VPC and Subnet mandatory config
    vpc_id             = "xxx"
    private_subnet_ids = ["xxx","xxx", "xxx", "xxx"]

    # EKS CONTROL PLANE VARIABLES
    create_eks         = true
    kubernetes_version = "1.19"

  # EKS SELF MANAGED NODE GROUPS
    self_managed_node_groups = {
    self_mg = {
      node_group_name        = "DevOpsLabs2b"
      subnet_ids             = ["xxx","xxx", "xxx", "xxx"]
      create_launch_template = true
      launch_template_os     = "bottlerocket"       # amazonlinux2eks  or bottlerocket or windows
      custom_ami_id          = "xxx"
      public_ip              = true                   # Enable only for public subnets
      pre_userdata           = <<-EOT
            yum install -y amazon-ssm-agent 
            systemctl enable amazon-ssm-agent && systemctl start amazon-ssm-agent 
        EOT

      disk_size     = 10
      instance_type = "t2.small"
      desired_size  = 2
      max_size      = 10
      min_size      = 0
      capacity_type = "" # Optional Use this only for SPOT capacity as  capacity_type = "spot"

      k8s_labels = {
        Environment = "dev-test"
        Zone        = ""
        WorkerType  = "SELF_MANAGED_ON_DEMAND"
      }

      additional_tags = {
        ExtraTag    = "t2x-on-demand"
        Name        = "t2x-on-demand"
        subnet_type = "public"
      }
      create_worker_security_group = false # Creates a dedicated sec group for this Node Group
    },
  }
}

    enable_amazon_eks_vpc_cni             = true
        amazon_eks_vpc_cni_config = {
        addon_name               = "vpc-cni"
        addon_version            = "v1.7.5-eksbuild.2"
        service_account          = "aws-node"
        resolve_conflicts        = "OVERWRITE"
        namespace                = "kube-system"
        additional_iam_policies  = []
        service_account_role_arn = ""
        tags                     = {}
    }
    enable_amazon_eks_kube_proxy          = true
        amazon_eks_kube_proxy_config = {
        addon_name               = "kube-proxy"
        addon_version            = "v1.19.8-eksbuild.1"
        service_account          = "kube-proxy"
        resolve_conflicts        = "OVERWRITE"
        namespace                = "kube-system"
        additional_iam_policies  = []
        service_account_role_arn = ""
        tags                     = {}
    }

    #K8s Add-ons
    enable_aws_load_balancer_controller   = true
    enable_metrics_server                 = true
    enable_cluster_autoscaler             = true
    enable_aws_for_fluentbit              = true
    enable_argocd                         = true
    enable_ingress_nginx                  = true

    depends_on = [module.eks-ssp.self_managed_node_groups]
}

3

Answers


  1. I solved this error by adding dependencies in the helm installations.
    The depends_on will wait for the step to successfully complete and then helm module runs.

    module "nginx-ingress" {
      depends_on      =  [module.eks, module.aws-load-balancer-controller]
      source          = "terraform-module/release/helm"
    
    ...}
    
    
    
     module "aws-load-balancer-controller" {
      depends_on      = [module.eks]
      source          = "terraform-module/release/helm"
    
    ...}
    
    
    
    module "helm_autoscaler" {
      depends_on      = [module.eks]
      source          = "terraform-module/release/helm"
    ...}
    
    Login or Signup to reply.
  2. OP has confirmed in the comment that the problem was resolved:

    Of course. I think I found the issue. Doing "kubectl get svc" throws: "An error occurred (AccessDenied) when calling the AssumeRole operation: User: arn:aws:iam::xxx:user/terraform_deploy is not authorized to perform: sts:AssumeRole on resource: arn:aws:iam::xxx:user/terraform_deploy"

    Solved it by using my actual role, that’s crazy. No idea why it was calling itself.

    For similar problem look also this issue.

    Login or Signup to reply.
  3. For me it helped to explicitly provide the config file location to helm with --kubeconfig ~/.kube/config. Even though its the default config path

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search