terraform-aws-eks
terraform-aws-eks copied to clipboard
Karpenter cannot create new nodes on v1 Policy
Description
When updating the module from 20.17.2 to 20.24.0 (terraform 1.9.5, aws provider 5.64.0 ) and setting enable_v1_permissions = true i get the below plan:
# module.k8s.module.karpenter.aws_iam_policy.controller[0] will be updated in-place
~ resource "aws_iam_policy" "controller" {
id = "arn:aws:iam::******:policy/KarpenterController-2024022809571951390000000f"
name = "KarpenterController-2024022809571951390000000f"
~ policy = jsonencode(
~ {
~ Statement = [
~ {
~ Resource = [
- "arn:aws:ec2:*::snapshot/*",
- "arn:aws:ec2:*::image/*",
- "arn:aws:ec2:*:*:subnet/*",
- "arn:aws:ec2:*:*:spot-instances-request/*",
- "arn:aws:ec2:*:*:security-group/*",
- "arn:aws:ec2:*:*:launch-template/*",
+ "arn:aws:ec2:eu-west-1::snapshot/*",
+ "arn:aws:ec2:eu-west-1::image/*",
+ "arn:aws:ec2:eu-west-1:*:subnet/*",
+ "arn:aws:ec2:eu-west-1:*:security-group/*",
]
~ Sid = "AllowScopedEC2InstanceActions" -> "AllowScopedEC2InstanceAccessActions"
# (2 unchanged attributes hidden)
},
~ {
~ Action = [
"ec2:RunInstances",
- "ec2:CreateLaunchTemplate",
"ec2:CreateFleet",
]
~ Condition = {
~ StringEquals = {
- "aws:RequestTag/kubernetes.io/cluster/******" = "owned"
+ "aws:ResourceTag/kubernetes.io/cluster/******" = "owned"
}
~ StringLike = {
- "aws:RequestTag/karpenter.sh/nodepool" = "*"
+ "aws:ResourceTag/karpenter.sh/nodepool" = "*"
}
}
~ Resource = [
- "arn:aws:ec2:*:*:volume/*",
- "arn:aws:ec2:*:*:spot-instances-request/*",
- "arn:aws:ec2:*:*:network-interface/*",
- "arn:aws:ec2:*:*:launch-template/*",
- "arn:aws:ec2:*:*:instance/*",
- "arn:aws:ec2:*:*:fleet/*",
] -> "arn:aws:ec2:eu-west-1:*:launch-template/*"
~ Sid = "AllowScopedEC2InstanceActionsWithTags" -> "AllowScopedEC2LaunchTemplateAccessActions"
# (1 unchanged attribute hidden)
},
~ {
~ Action = "ec2:CreateTags" -> [
+ "ec2:RunInstances",
+ "ec2:CreateLaunchTemplate",
+ "ec2:CreateFleet",
]
~ Condition = {
~ StringEquals = {
+ "aws:RequestTag/eks:eks-cluster-name" = "******"
- "ec2:CreateAction" = [
- "RunInstances",
- "CreateFleet",
- "CreateLaunchTemplate",
]
# (1 unchanged attribute hidden)
}
# (1 unchanged attribute hidden)
}
~ Resource = [
~ "arn:aws:ec2:*:*:volume/*" -> "arn:aws:ec2:eu-west-1:*:volume/*",
~ "arn:aws:ec2:*:*:spot-instances-request/*" -> "arn:aws:ec2:eu-west-1:*:spot-instances-request/*",
~ "arn:aws:ec2:*:*:network-interface/*" -> "arn:aws:ec2:eu-west-1:*:network-interface/*",
~ "arn:aws:ec2:*:*:launch-template/*" -> "arn:aws:ec2:eu-west-1:*:launch-template/*",
~ "arn:aws:ec2:*:*:instance/*" -> "arn:aws:ec2:eu-west-1:*:instance/*",
~ "arn:aws:ec2:*:*:fleet/*" -> "arn:aws:ec2:eu-west-1:*:fleet/*",
]
~ Sid = "AllowScopedResourceCreationTagging" -> "AllowScopedEC2InstanceActionsWithTags"
# (1 unchanged attribute hidden)
},
~ {
~ Condition = {
- "ForAllValues:StringEquals" = {
- "aws:TagKeys" = [
- "karpenter.sh/nodeclaim",
- "Name",
]
}
~ StringEquals = {
+ "aws:RequestTag/eks:eks-cluster-name" = "******"
+ "aws:RequestTag/kubernetes.io/cluster/******" = "owned"
- "aws:ResourceTag/kubernetes.io/cluster/******" = "owned"
+ "ec2:CreateAction" = [
+ "RunInstances",
+ "CreateFleet",
+ "CreateLaunchTemplate",
]
}
~ StringLike = {
+ "aws:RequestTag/karpenter.sh/nodepool" = "*"
- "aws:ResourceTag/karpenter.sh/nodepool" = "*"
}
}
~ Resource = "arn:aws:ec2:*:*:instance/*" -> [
+ "arn:aws:ec2:eu-west-1:*:volume/*",
+ "arn:aws:ec2:eu-west-1:*:spot-instances-request/*",
+ "arn:aws:ec2:eu-west-1:*:network-interface/*",
+ "arn:aws:ec2:eu-west-1:*:launch-template/*",
+ "arn:aws:ec2:eu-west-1:*:instance/*",
+ "arn:aws:ec2:eu-west-1:*:fleet/*",
]
~ Sid = "AllowScopedResourceTagging" -> "AllowScopedResourceCreationTagging"
# (2 unchanged attributes hidden)
},
~ {
~ Action = [
- "ec2:TerminateInstances",
- "ec2:DeleteLaunchTemplate",
] -> "ec2:CreateTags"
~ Condition = {
+ "ForAllValues:StringEquals" = {
+ "aws:TagKeys" = [
+ "eks:eks-cluster-name",
+ "karpenter.sh/nodeclaim",
+ "Name",
]
}
+ StringEqualsIfExists = {
+ "aws:RequestTag/eks:eks-cluster-name" = "****"
}
# (2 unchanged attributes hidden)
}
~ Resource = [
- "arn:aws:ec2:*:*:launch-template/*",
- "arn:aws:ec2:*:*:instance/*",
] -> "arn:aws:ec2:eu-west-1:*:instance/*"
~ Sid = "AllowScopedDeletion" -> "AllowScopedResourceTagging"
# (1 unchanged attribute hidden)
},
+ {
+ Action = [
+ "ec2:TerminateInstances",
+ "ec2:DeleteLaunchTemplate",
]
+ Condition = {
+ StringEquals = {
+ "aws:ResourceTag/kubernetes.io/cluster/******" = "owned"
}
+ StringLike = {
+ "aws:ResourceTag/karpenter.sh/nodepool" = "*"
}
}
+ Effect = "Allow"
+ Resource = [
+ "arn:aws:ec2:eu-west-1:*:launch-template/*",
+ "arn:aws:ec2:eu-west-1:*:instance/*",
]
+ Sid = "AllowScopedDeletion"
},
{
Action = [
"ec2:DescribeSubnets",
"ec2:DescribeSpotPriceHistory",
"ec2:DescribeSecurityGroups",
"ec2:DescribeLaunchTemplates",
"ec2:DescribeInstances",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInstanceTypeOfferings",
"ec2:DescribeImages",
"ec2:DescribeAvailabilityZones",
]
Condition = {
StringEquals = {
"aws:RequestedRegion" = "eu-west-1"
}
}
Effect = "Allow"
Resource = "*"
Sid = "AllowRegionalReadActions"
},
# (1 unchanged element hidden)
{
Action = "pricing:GetProducts"
Effect = "Allow"
Resource = "*"
Sid = "AllowPricingReadActions"
},
~ {
~ Action = [
# (1 unchanged element hidden)
"sqs:GetQueueUrl",
- "sqs:GetQueueAttributes",
"sqs:DeleteMessage",
]
# (3 unchanged attributes hidden)
},
{
Action = "iam:PassRole"
Condition = {
StringEquals = {
"iam:PassedToService" = "ec2.amazonaws.com"
}
}
Effect = "Allow"
Resource = "arn:aws:iam::******::role/*****"
Sid = "AllowPassingInstanceRole"
},
~ {
~ Condition = {
~ StringEquals = {
+ "aws:RequestTag/eks:eks-cluster-name" = "******"
# (2 unchanged attributes hidden)
}
# (1 unchanged attribute hidden)
}
~ Resource = "*" -> "arn:aws:iam::******::instance-profile/*"
# (3 unchanged attributes hidden)
},
~ {
~ Condition = {
~ StringEquals = {
+ "aws:RequestTag/eks:eks-cluster-name" = "****"
+ "aws:RequestTag/topology.kubernetes.io/region" = "eu-west-1"
# (3 unchanged attributes hidden)
}
# (1 unchanged attribute hidden)
}
~ Resource = "*" -> "arn:aws:iam::******::instance-profile/*"
# (3 unchanged attributes hidden)
},
~ {
~ Resource = "*" -> "arn:aws:iam::******::instance-profile/*"
# (4 unchanged attributes hidden)
},
~ {
~ Resource = "*" -> "arn:aws:iam::******:instance-profile/*"
# (3 unchanged attributes hidden)
},
{
Action = "eks:DescribeCluster"
Effect = "Allow"
Resource = "arn:aws:eks:************"
Sid = "AllowAPIServerEndpointDiscovery"
},
]
# (1 unchanged attribute hidden)
}
)
tags = {
"Environment" = "revproxy"
"ManagedBy" = "Terraform"
"Team" = "devops"
}
# (7 unchanged attributes hidden)
}
Plan: 0 to add, 1 to change, 0 to destroy.
After apply and updating Karpenter to v1.0.2 it cannot spawn nodes from NodeClaims, this is the error on the NodeClaim resource :
message: >-
creating instance, getting launch template configs, getting launch
templates, creating launch template, UnauthorizedOperation: You are not
authorized to perform this operation. User:
arn:aws:sts::756979186773:assumed-role/tf-eks-devops-revproxy-karpenter-controller/1726052365915287551
is not authori...
Rollback to Karpenter 0.37.2 and non v1 policy works ( still on 20.24.0 module version , just setting enable_v1_permissions = false
Did i miss anything ?
Thanks, Adrian
@adrianmiron Attaching my plan after enabling v1 policy flag plan.txt
I think this is what does the trick:
~ StringEquals = {
- "aws:RequestTag/kubernetes.io/cluster/mgmt" = "owned"
+ "aws:ResourceTag/kubernetes.io/cluster/mgmt" = "owned"
}
~ StringLike = {
- "aws:RequestTag/karpenter.sh/nodepool" = "*"
+ "aws:ResourceTag/karpenter.sh/nodepool" = "*"
}
the change from RequestTag to ResourceTag
Yeah, i had to edit quite a few Condition blocks for it to be able to spawn nodes.... But it is still unable to properly delete 0.37.2 nodeclaims because even though 1.X should support both crd types... it does not here. The NodeCLaim is seen an invalid so it cannot even delete it.... I will push this back a few months, 0.37.2 is good enough for now
Hi @bryantbiggs, this is not really a question but a blocking issue which is preventing us from updating Karpenter from v0.37 to v1.
Hi @bryantbiggs, this is not really a question but a blocking issue which is preventing us from updating Karpenter from v0.37 to v1.
While i agree it's blocking, the root cause lies with Karpenter and we need an update there.
Hello, @adrianmiron ,
I agree with you since the v1 policy in the module is perfectly aligned with the one of the v1 migration guide of Karpenter. So the issue is coming from Karpenter.
Could you please give the full error message you are receiving from the controller on the Unauthorized error I would like to see if you have the same error as I do.
Thank you,
It seems the latest version bumps ( 0.37.5 and 1.0.6 ) generate no more errors during the upgrade, not related to the policy anyway ( just be sure to enable the webhooks on 0.37.5 )
I activated the webhook since 0.37.0. Our issue is not related to the policy directly indeed: However we implemented an explicit deny of the creation of public IPs on our AWS accounts and the karpenter controller even though creating nodes on subnets without the auto assign public IPV4 enabled receive an explicit deny like if it was trying to create a public IP on a network interface ... / this is fixed by explicitly defining associatePublicIPAddress to false but it is not the expected behavior IMO.
We mentioned this here: https://github.com/aws/karpenter-provider-aws/issues/6847#issuecomment-2395991560
I activated the webhook since 0.37.0. Our issue is not related to the policy directly indeed: However we implemented an explicit deny of the creation of public IPs on our AWS accounts and the karpenter controller even though creating nodes on subnets without the auto assign public IPV4 enabled receive an explicit deny like if it was trying to create a public IP on a network interface ... / this is fixed by explicitly defining
associatePublicIPAddresstofalsebut it is not the expected behavior IMO.We mentioned this here: aws/karpenter-provider-aws#6847 (comment)
Weird....it must perform a check or something because it does not create a public IP for our nodes ( with them being in private subnets )
Yes that is really weird .... 0.37.5 was not causing this and the policy from my reading is definitely not the issue.
Could you setup a an explicit deny to try to reproduce the error on your side is it too risky ? (I cannot remove the explicit deny on my side even on one AWS account )
This issue has been automatically marked as stale because it has been open 30 days with no activity. Remove stale label or comment or this issue will be closed in 10 days
I have the same issue, any recommendation, how it can be done?
This issue has been automatically marked as stale because it has been open 30 days with no activity. Remove stale label or comment or this issue will be closed in 10 days
This issue was automatically closed because of stale in 10 days
I'm going to lock this issue because it has been closed for 30 days β³. This helps our maintainers find and focus on the active issues. If you have found a problem that seems similar to this, please open a new issue and complete the issue template so we can capture all the details necessary to investigate further.