terraform-aws-eks
terraform-aws-eks copied to clipboard
Karpenter helm_release timeout due to karpenter pods being un-schedulable
Description
When using the Karpenter example the helm_release encounters a timeout, Upon further investigation it appear the Karpenter pods cannot be scheduled. Detailed outputs below.
- [x] β I have searched the open/closed issues and my issue is not listed.
Versions
- Module version 19.21.0:
Terraform v1.6.6
on darwin_arm64
+ provider registry.terraform.io/gavinbunney/kubectl v1.14.0
+ provider registry.terraform.io/hashicorp/aws v5.31.0
+ provider registry.terraform.io/hashicorp/cloudinit v2.3.3
+ provider registry.terraform.io/hashicorp/helm v2.12.1
+ provider registry.terraform.io/hashicorp/kubernetes v2.24.0
+ provider registry.terraform.io/hashicorp/null v3.2.2
+ provider registry.terraform.io/hashicorp/time v0.10.0
+ provider registry.terraform.io/hashicorp/tls v4.0.5
Reproduction Code [Required]
provider "aws" {
region = "us-east-1"
alias = "virginia"
}
provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
}
provider "kubectl" {
apply_retry_count = 5
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false
exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
data "aws_availability_zones" "available" {}
data "aws_ecrpublic_authorization_token" "token" {
provider = aws.virginia
}
locals {
name = "shared-services"
cluster_version = "1.28"
vpc_cidr = "10.0.0.0/16"
azs = slice(data.aws_availability_zones.available.names, 0, 3)
}
################################################################################
# EKS Module
################################################################################
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "19.21.0"
cluster_name = local.name
cluster_version = local.cluster_version
cluster_endpoint_public_access = true
cluster_addons = {
kube-proxy = {}
vpc-cni = {}
coredns = {
configuration_values = jsonencode({
computeType = "Fargate"
# Ensure that we fully utilize the minimum amount of resources that are supplied by
# Fargate https://docs.aws.amazon.com/eks/latest/userguide/fargate-pod-configuration.html
# Fargate adds 256 MB to each pod's memory reservation for the required Kubernetes
# components (kubelet, kube-proxy, and containerd). Fargate rounds up to the following
# compute configuration that most closely matches the sum of vCPU and memory requests in
# order to ensure pods always have the resources that they need to run.
resources = {
limits = {
cpu = "0.25"
# We are targeting the smallest Task size of 512Mb, so we subtract 256Mb from the
# request/limit to ensure we can fit within that task
memory = "256M"
}
requests = {
cpu = "0.25"
# We are targeting the smallest Task size of 512Mb, so we subtract 256Mb from the
# request/limit to ensure we can fit within that task
memory = "256M"
}
}
})
}
}
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
control_plane_subnet_ids = module.vpc.intra_subnets
# Fargate profiles use the cluster primary security group so these are not utilized
create_cluster_security_group = false
create_node_security_group = false
manage_aws_auth_configmap = true
aws_auth_roles = [
# We need to add in the Karpenter node IAM role for nodes launched by Karpenter
{
rolearn = module.karpenter.role_arn
username = "system:node:{{EC2PrivateDNSName}}"
groups = [
"system:bootstrappers",
"system:nodes",
]
},
]
fargate_profiles = {
karpenter = {
selectors = [
{ namespace = "karpenter" }
]
}
kube-system = {
selectors = [
{ namespace = "kube-system" }
]
}
}
tags = {
# NOTE - if creating multiple security groups with this module, only tag the
# security group that Karpenter should utilize with the following tag
# (i.e. - at most, only one security group should have this tag in your account)
"karpenter.sh/discovery" = local.name
}
}
################################################################################
# Karpenter
################################################################################
module "karpenter" {
source = "terraform-aws-modules/eks/aws//modules/karpenter"
cluster_name = module.eks.cluster_name
irsa_oidc_provider_arn = module.eks.oidc_provider_arn
# In v0.32.0/v1beta1, Karpenter now creates the IAM instance profile
# so we disable the Terraform creation and add the necessary permissions for Karpenter IRSA
enable_karpenter_instance_profile_creation = true
# Used to attach additional IAM policies to the Karpenter node IAM role
iam_role_additional_policies = {
AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
}
}
resource "helm_release" "karpenter" {
namespace = "karpenter"
create_namespace = true
name = "karpenter"
repository = "oci://public.ecr.aws/karpenter"
repository_username = data.aws_ecrpublic_authorization_token.token.user_name
repository_password = data.aws_ecrpublic_authorization_token.token.password
chart = "karpenter"
version = "v0.33.0"
values = [
<<-EOT
settings:
clusterName: ${module.eks.cluster_name}
clusterEndpoint: ${module.eks.cluster_endpoint}
interruptionQueueName: ${module.karpenter.queue_name}
serviceAccount:
annotations:
eks.amazonaws.com/role-arn: ${module.karpenter.irsa_arn}
EOT
]
}
resource "kubectl_manifest" "karpenter_node_class" {
yaml_body = <<-YAML
apiVersion: karpenter.k8s.aws/v1beta1
kind: EC2NodeClass
metadata:
name: default
spec:
amiFamily: AL2
role: ${module.karpenter.role_name}
subnetSelectorTerms:
- tags:
karpenter.sh/discovery: ${module.eks.cluster_name}
securityGroupSelectorTerms:
- tags:
karpenter.sh/discovery: ${module.eks.cluster_name}
tags:
karpenter.sh/discovery: ${module.eks.cluster_name}
YAML
depends_on = [
helm_release.karpenter
]
}
resource "kubectl_manifest" "karpenter_node_pool" {
yaml_body = <<-YAML
apiVersion: karpenter.sh/v1beta1
kind: NodePool
metadata:
name: default
spec:
template:
spec:
nodeClassRef:
name: default
requirements:
- key: "karpenter.k8s.aws/instance-category"
operator: In
values: ["c", "m", "r"]
- key: "karpenter.k8s.aws/instance-cpu"
operator: In
values: ["4", "8", "16",]
- key: "karpenter.k8s.aws/instance-hypervisor"
operator: In
values: ["nitro"]
- key: "karpenter.k8s.aws/instance-generation"
operator: Gt
values: ["2"]
limits:
cpu: 1000
disruption:
consolidationPolicy: WhenEmpty
consolidateAfter: 30s
YAML
depends_on = [
kubectl_manifest.karpenter_node_class
]
}
# Example deployment using the [pause image](https://www.ianlewis.org/en/almighty-pause-container)
# and starts with zero replicas
resource "kubectl_manifest" "karpenter_example_deployment" {
yaml_body = <<-YAML
apiVersion: apps/v1
kind: Deployment
metadata:
name: inflate
spec:
replicas: 0
selector:
matchLabels:
app: inflate
template:
metadata:
labels:
app: inflate
spec:
terminationGracePeriodSeconds: 0
containers:
- name: inflate
image: public.ecr.aws/eks-distro/kubernetes/pause:3.7
resources:
requests:
cpu: 1
YAML
depends_on = [
helm_release.karpenter
]
}
################################################################################
# Supporting Resources
################################################################################
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "~> 5.0"
name = local.name
cidr = local.vpc_cidr
azs = local.azs
private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 4, k)]
public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 48)]
intra_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 52)]
enable_nat_gateway = true
single_nat_gateway = true
public_subnet_tags = {
"kubernetes.io/role/elb" = 1
}
private_subnet_tags = {
"kubernetes.io/role/internal-elb" = 1
# Tags subnets for Karpenter auto-discovery
"karpenter.sh/discovery" = local.name
}
}
Expected behavior
Expected Karpenter pods to be scheduled successfully
Actual behavior
Karpenter pods stuck pending and helm charts fail to complete installing
Terminal Output Screenshot(s)
output of terrafrom apply:
Warning: Helm release "karpenter" was created but has a failed status. Use the `helm` command to investigate the error, correct it, then run Terraform again.
Error: context deadline exceeded
with module.eks.helm_release.karpenter,
on eks/main.tf line 162, in resource "helm_release" "karpenter":
162: resource "helm_release" "karpenter"
karpenter pod events:
Events:
β Type Reason Age From Message
β ---- ------ ---- ---- -------
β Warning FailedScheduling 26m default-scheduler no nodes available to schedule pods
β Warning FailedScheduling 23m default-scheduler 0/1 nodes are available: 1 node(s) had untolerated taint {eks.amazonaws.com/compute-type: fargate}. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling..
β Warning FailedScheduling 2m41s (x4 over 18m) default-scheduler 0/2 nodes are available: 2 node(s) had untolerated taint {eks.amazonaws.com/compute-type: fargate}. preemption: 0/2 nodes are available: 2 Preemption is not helpful for scheduling..
happened to me also
solved it by adding to the fargate_profiles an app selector in the labels area and by adding to the karpenter pod label the same label.
my fargate profile configuration:
fargate_profiles = {
karpenter = {
name = "karpernter"
selectors = [
{ namespace = "karpenter"
labels = {
"k8s-app" = "karpenter"
}
}
]
}
kube-system = {
name = "kube-system"
selectors = [
{ namespace = "kube-system"
labels = {
"k8s-app" = "kube-dns"
}
}
]
}
}
my karpenter helm_release tf module(pod request/limit as recommended here) :
resource "helm_release" "karpenter" {
namespace = "karpenter"
create_namespace = true
name = "karpenter"
repository = "oci://public.ecr.aws/karpenter"
repository_username = data.aws_ecrpublic_authorization_token.token.user_name
repository_password = data.aws_ecrpublic_authorization_token.token.password
chart = "karpenter"
version = "v0.32.1"
values = [
<<-EOT
settings:
clusterName: ${module.cluster.cluster_name}
clusterEndpoint: ${module.cluster.cluster_endpoint}
interruptionQueueName: ${module.karpenter.queue_name}
serviceAccount:
annotations:
eks.amazonaws.com/role-arn: ${module.karpenter.irsa_arn}
controller:
resources:
requests:
cpu: 1
memory: 1Gi
limits:
cpu: 1
memory: 1Gi
podLabels:
k8s-app: karpenter
EOT
]
}
with this configuration change things worked properly, @jonathan-d-palumbo can you try and verify that this works also for you?
closing since this doesn't appear to be a module issue - please correct me if I am wrong
I'm going to lock this issue because it has been closed for 30 days β³. This helps our maintainers find and focus on the active issues. If you have found a problem that seems similar to this, please open a new issue and complete the issue template so we can capture all the details necessary to investigate further.