terraform-aws-eks
terraform-aws-eks copied to clipboard
self_managed_node_group /etc/eks/bootstrap.sh eks script giving error that --b64-cluster-ca: command not found
Description
I am trying to deploy an EKS self_managed_node_group cluster and install some services using pre_bootstrap_user_data but some pre-defined commands are causing the installation to fail.
This is my self managed node group config:
provider "aws" {
region = local.region
}
provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
data "aws_caller_identity" "current" {}
data "aws_availability_zones" "available" {}
locals {
name = "eks"
cluster_version = "1.26"
region = "eu-west-1" # NEEDS TO BE CHANGED
app-node-group = "app"
vpc_id = "vpc-XXXX" # NEEDS TO BE CHANGED
existing_k8s_route_table = "rtb-XXXX" # NEEDS TO BE CHANGED
tags = {
cluster-name = local.name
"k8s.io/cluster-autoscaler/${local.name}" = "owned"
"k8s.io/cluster-autoscaler/enabled" = true
}
}
################################################################################
# EKS Module
################################################################################
module "eks" {
source = "../.."
cluster_name = local.name
cluster_version = local.cluster_version
cluster_endpoint_public_access = true
cluster_endpoint_private_access = true
cluster_addons = {
coredns = {
resolve_conflicts = "OVERWRITE"
}
kube-proxy = {
most_recent = true
}
vpc-cni = {
resolve_conflicts = "OVERWRITE"
}
}
cluster_encryption_policy_name = "eks-secret"
cluster_encryption_config = { # NEEDS TO BE CHANGED
resources = ["secrets"]
}
vpc_id = local.vpc_id
subnet_ids = ["subnet-XXXX", "subnet-XXXXX"]
# Self managed node groups will not automatically create the aws-auth configmap so we need to
create_aws_auth_configmap = true
manage_aws_auth_configmap = true
self_managed_node_group_defaults = {
# enable discovery of autoscaling groups by cluster-autoscaler
autoscaling_group_tags = {
"k8s.io/cluster-autoscaler/enabled" : true,
"k8s.io/cluster-autoscaler/${local.name}" : "owned",
}
}
self_managed_node_groups = {
"${local.app-node-group}-node" = {
###########
### EC2 ###
###########
name = local.app-node-group
ami_id = "ami-0608033dd04759ae5" # NEEDS TO BE CHANGED
instance_type = "t3.medium" # NEEDS TO BE CHANGED
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
delete_on_termination = true
encrypted = true
volume_size = 50
volume_type = "gp3"
kms_key_id = aws_kms_key.ebs.arn
}
}
}
use_name_prefix = false
enable_monitoring = true
metadata_options = {
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 2
instance_metadata_tags = "disabled"
}
####################
### SSH Key Pair ###
####################
key_name = "XXXXXXXX" # NEEDS TO BE CHANGED
#####################
#### BOOTSTRAPING ###
#####################
bootstrap_extra_args = <<-EOT
--kubelet-extra-args '--max-pods=50 --node-labels=apps=true'
EOT
pre_bootstrap_user_data = <<-EOT
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=true
EOT
post_bootstrap_user_data = <<-EOT
sudo yum install -y htop
wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.26.0/crictl-v1.26.0-linux-amd64.tar.gz
tar zxvf crictl-v1.26.0-linux-amd64.tar.gz -C /usr/bin/
EOT
#################################
### AUTO SCALING GROUP CONFIG ###
#################################
min_size = 1
max_size = 4
desired_size = 1
##############################################
### LAUNCH TEMPLATE FOR AUTO SCALING GROUP ###
##############################################
launch_template_name = "${local.app-node-group}-lt"
launch_template_use_name_prefix = true
launch_template_description = "${local.app-node-group} self managed node group launch template"
ebs_optimized = true
vpc_security_group_ids = ["sg-XXXXX"] # NEEDS TO BE CHANGED
#############
### IAM ###
#############
create_iam_role = true # NEEDS TO BE CHANGED
iam_role_name = "${local.app-node-group}-eu-west-1"
iam_role_use_name_prefix = false
iam_role_description = "${local.app-node-group} self managed node group role"
iam_role_tags = {
Purpose = "${local.app-node-group} protector of the kubelet"
}
iam_role_additional_policies = {
AmazonEC2ContainerRegistryReadOnly = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly",
AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore",
AmazonSSMPatchAssociation = "arn:aws:iam::aws:policy/AmazonSSMPatchAssociation",
additional = aws_iam_policy.additional.arn
}
#################
### NETWORK ###
#################
subnet_ids = ["subnet-XXXXX"]
create_security_group = true
security_group_name = "${local.app-node-group}-sg"
security_group_use_name_prefix = false
security_group_description = "${local.app-node-group} security group"
security_group_rules = {
nginx_ingress_port_1 = {
description = "nginx ingress port. allow api server to call nginx ingress"
protocol = "tcp"
from_port = 8443
to_port = 8443
type = "ingress"
source_cluster_security_group = true # it will add master sg as source
}
nginx_ingress_port_2 = {
description = "nginx ingress port. allow api server to call nginx ingress"
protocol = "tcp"
from_port = 10254
to_port = 10254
type = "ingress"
source_cluster_security_group = true # it will add master sg as source
}
additonal = {
from_port = 22
to_port = 22
protocol = "tcp"
type = "ingress"
cidr_blocks = [
"10.0.0.0/8",
"172.16.0.0/12",
"192.168.0.0/16",
]
}
}
security_group_tags = {
Purpose = "${local.app-node-group} protector of the kubelet"
}
#################
### TIMEOUTS ####
#################
timeouts = {
create = "80m"
update = "80m"
delete = "80m"
}
#############
### TAGS ####
#############
tags = {
type = "production"
node-type = local.app-node-group
}
}
}
}
################################################################################
# Supporting Resources
################################################################################
resource "tls_private_key" "this" {
algorithm = "RSA"
}
resource "aws_key_pair" "this" {
key_name = local.name
public_key = tls_private_key.this.public_key_openssh
}
resource "aws_kms_key" "ebs" {
description = "Customer managed key to encrypt self managed node group volumes"
deletion_window_in_days = 30
policy = data.aws_iam_policy_document.ebs.json
enable_key_rotation = false
}
resource "aws_kms_alias" "cluster_nodegroup_ebs_volume_ecncryption" {
name = "alias/${local.name}-cluster-nodegroup-ebs-volume-ecncryption"
target_key_id = aws_kms_key.ebs.arn
}
# This policy is required for the KMS key used for EKS root volumes, so the cluster is allowed to enc/dec/attach encrypted EBS volumes
data "aws_iam_policy_document" "ebs" {
# Copy of default KMS policy that lets you manage it
statement {
sid = "Enable IAM User Permissions"
actions = ["kms:*"]
resources = ["*"]
principals {
type = "AWS"
identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"]
}
}
# Required for EKS
statement {
sid = "Allow service-linked role use of the CMK"
actions = [
"kms:Encrypt",
"kms:Decrypt",
"kms:ReEncrypt*",
"kms:GenerateDataKey*",
"kms:DescribeKey"
]
resources = ["*"]
principals {
type = "AWS"
identifiers = [
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling", # required for the ASG to manage encrypted volumes for nodes
module.eks.cluster_iam_role_arn, # required for the cluster / persistentvolume-controller to create encrypted PVCs
]
}
}
statement {
sid = "Allow attachment of persistent resources"
actions = ["kms:CreateGrant"]
resources = ["*"]
principals {
type = "AWS"
identifiers = [
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling", # required for the ASG to manage encrypted volumes for nodes
module.eks.cluster_iam_role_arn, # required for the cluster / persistentvolume-controller to create encrypted PVCs
]
}
condition {
test = "Bool"
variable = "kms:GrantIsForAWSResource"
values = ["true"]
}
}
}
resource "aws_iam_policy" "additional" {
name = "${local.name}-additional"
description = "Example usage of node additional policy"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = [
"ec2:Describe*",
]
Effect = "Allow"
Resource = "*"
},
]
})
tags = local.tags
}
I used the above manifest to provision the cluster. Once the cluster was provisioned, I checked the nodes to see if the utilities were installed. I found out they were not.
I checked the cloud init logs and found out this
# cloud init logs
/var/log/cloud-init-output.log
Logs:
Slice=runtime.slice
‘/etc/eks/containerd/containerd-config.toml’ -> ‘/etc/containerd/config.toml’
‘/etc/eks/containerd/sandbox-image.service’ -> ‘/etc/systemd/system/sandbox-image.service’
‘/etc/eks/containerd/kubelet-containerd.service’ -> ‘/etc/systemd/system/kubelet.service’
Created symlink from /etc/systemd/system/multi-user.target.wants/kubelet.service to /etc/systemd/system/kubelet.service.
2023-05-29T12:29:51+0000 [eks-bootstrap] INFO: complete!
/var/lib/cloud/instance/scripts/part-001: line 23: --b64-cluster-ca: command not found
May 29 12:29:51 cloud-init[2415]: util.py[WARNING]: Failed running /var/lib/cloud/instance/scripts/part-001 [127]
May 29 12:29:51 cloud-init[2415]: cc_scripts_user.py[WARNING]: Failed to run module scripts-user (scripts in /var/lib/cloud/instance/scripts)
May 29 12:29:51 cloud-init[2415]: util.py[WARNING]: Running module scripts-user (<module 'cloudinit.config.cc_scripts_user' from '/usr/lib/python2.7/site-packages/cloudinit/config/cc_scripts_user.pyc'>) failed
Cloud-init v. 19.3-46.amzn2 finished at Mon, 29 May 2023 12:29:51 +0000. Datasource DataSourceEc2. Up 47.47 seconds
The content of the /var/lib/cloud/instance/scripts/part-001
#!/bin/bash
set -e
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=true
B64_CLUSTER_CA=XXXXXXXXX
API_SERVER_URL=XXXXXXXXX
/etc/eks/bootstrap.sh eks --kubelet-extra-args '--max-pods=50 --node-labels=apps=true'
--b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL
sudo yum install -y htop
wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.26.0/crictl-v1.26.0-linux-amd64.tar.gz
tar zxvf crictl-v1.26.0-linux-amd64.tar.gz -C /usr/bin/
- [x] ✋ I have searched the open/closed issues and my issue is not listed.
Versions
- Module version [Required]: v19.15.0
- Terraform version: Terraform v1.4.6
- Provider version(s):
provider registry.terraform.io/hashicorp/aws v4.67.0 provider registry.terraform.io/hashicorp/cloudinit v2.3.2 provider registry.terraform.io/hashicorp/kubernetes v2.20.0 provider registry.terraform.io/hashicorp/time v0.9.1 provider registry.terraform.io/hashicorp/tls v4.0.4
Reproduction Code [Required]
Using the manifests given above provision an EKS Cluster. Once the nodes are available check the nodes if the utilities are installed. Steps to reproduce the behavior:
Expected behavior
A user should be able to install services using the user data script.
Actual behavior
The user data scripts fail due to the following reason:
/var/lib/cloud/instance/scripts/part-001: line 23: --b64-cluster-ca: command not found
Because of the above issues the rest of the commands don't run.
Terminal Output Screenshot(s)
Already provided
Additional context
Your bootstrap_extra_args
setting is inserting a newline at the end due to the use of the Heredoc String:
bootstrap_extra_args = <<-EOT
--kubelet-extra-args '--max-pods=50 --node-labels=apps=true'
EOT
You can see the use of this setting in the template file here: https://github.com/terraform-aws-modules/terraform-aws-eks/blob/master/templates/linux_user_data.tpl#L12
To fix this, I'd recommend using a simple string. i.e.:
bootstrap_extra_args = "--kubelet-extra-args '--max-pods=50 --node-labels=apps=true'"
Alternately, you could use trimspace or chomp functions to remove the offending newline character. e.g.:
bootstrap_extra_args = chomp(
<<-EOT
--kubelet-extra-args '--max-pods=50 --node-labels=apps=true'
EOT
)
see comments above; we also have examples that demonstrate how to pass in custom/additional user data - this does not appear to be a module issue so I am closing it out for now
thank you @bdalpe and @bryantbiggs
I'm going to lock this issue because it has been closed for 30 days ⏳. This helps our maintainers find and focus on the active issues. If you have found a problem that seems similar to this, please open a new issue and complete the issue template so we can capture all the details necessary to investigate further.