terraform-provider-rancher2
terraform-provider-rancher2 copied to clipboard
ClusterRegistrationToken not being created by rancher2_cluster
Hello!
I'm creating an RKE Azure cluster, which has previously worked without issues
# ...
provider "rancher2" {
api_url = "<snip>"
access_key = "<snip>"
secret_key = "<snip>"
insecure = true
}
resource "rancher2_cluster" "my-cluster" {
name = "my-cluster"
rke_config {
kubernetes_version = "v1.18.14-rancher1-1"
network {
plugin = "calico"
}
cloud_provider {
azure_cloud_provider {
aad_client_id = "<snip>"
aad_client_secret = "<snip>"
subscription_id = "<snip>"
tenant_id = "<snip>"
cloud_provider_backoff = false
cloud_provider_backoff_duration = 0
cloud_provider_backoff_exponent = 0
cloud_provider_backoff_jitter = 0
cloud_provider_backoff_retries = 0
cloud_provider_rate_limit = false
cloud_provider_rate_limit_bucket = 0
cloud_provider_rate_limit_qps = 0
maximum_load_balancer_rule_count = 0
use_instance_metadata = false
use_managed_identity_extension = false
}
name = "azure"
}
ingress {
provider = "none"
}
services {
etcd {
extra_args = {
cipher-suites = "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305"
}
}
}
}
cluster_auth_endpoint {
enabled = false
}
enable_cluster_monitoring = true
}
# ...
But now I'm getting the following error
Error: Bad response statusCode [404]. Status [404 Not Found]. Body: [baseType=error, code=NotFound, message=namespaces "c-l9dvb" not found] from [<snip>/v3/clusterregistrationtokens]
on TFDEPLOYMENT_main_202102092224.tf line 4156, in resource "rancher2_cluster" "my-cluster":
4156: resource "rancher2_cluster" "my-cluster" {
The cluster is created fine, but the ClusterRegistrationToken is nowhere to be found Once I "edit" the cluster in the web UI, it is generated
I'm not sure why a new cluster would even look for namespaces -- I tried looking at the source code but I wasn't able to find anything, and the API doesn't seem to use namespaces to create the tokens (in fact, namespaceId
seems to be null
for all tokens, is that normal?)
The versions I'm using are Terraform 0.12.26
and Rancher2 1.11.0
Please let me know if more information is needed, and thank you for your time!
Hello @sentry-gun , yes, that sounds weird, the tfp is not managing namespaceId
to create ClusterRegistrationToken
. What Rancher version are you using?? Have you updated Rancher server??
Rancher is on 2.4.8
, and no, we haven't updated recently
Tested your config using same Rancher2 and tfp versions but unable to reproduce the issue, cluster and cluster registration token are both created properly.
Are you using same user on rancher ui and tfp?? Have you made any change/update on tf side?? May be something wrong at tfstate??
I'm using the local admin account for the UI and an API token from that account for Terraform Just to verify I tried creating a cluster & CRT through the API with the same token and curl, works fine
No changes on Terraform side, been on 0.12.26 since the start, probably updating to 0.13 or 0.14 at some point TF state for the cluster resource below, I assume the provider pulls the information from Rancher after creating the cluster so fields being null seems normal to me if its failing to find it
{
"mode": "managed",
"type": "rancher2_cluster",
"name": "my-cluster",
"provider": "provider.rancher2",
"instances": [
{
"status": "tainted",
"schema_version": 1,
"attributes": {
"aks_config": [],
"annotations": null,
"ca_cert": null,
"cluster_auth_endpoint": [
{
"ca_certs": "",
"enabled": false,
"fqdn": ""
}
],
"cluster_monitoring_input": [],
"cluster_registration_token": null,
"cluster_template_answers": null,
"cluster_template_id": null,
"cluster_template_questions": null,
"cluster_template_revision_id": null,
"default_pod_security_policy_template_id": null,
"default_project_id": null,
"description": null,
"desired_agent_image": null,
"desired_auth_image": null,
"docker_root_dir": null,
"driver": null,
"eks_config": [],
"eks_config_v2": [],
"enable_cluster_alerting": null,
"enable_cluster_istio": null,
"enable_cluster_monitoring": false,
"enable_network_policy": null,
"gke_config": [],
"id": "c-l9dvb",
"istio_enabled": null,
"k3s_config": null,
"kube_config": null,
"labels": null,
"name": "my-cluster",
"oke_config": [],
"rke_config": [
{
"addon_job_timeout": 0,
"addons": "",
"addons_include": null,
"authentication": [],
"authorization": [],
"bastion_host": [],
"cloud_provider": [
{
"aws_cloud_provider": [],
"azure_cloud_provider": [
{
"aad_client_cert_password": "",
"aad_client_cert_path": "",
"aad_client_id": "<snip>",
"aad_client_secret": "<snip>",
"cloud": "",
"cloud_provider_backoff": false,
"cloud_provider_backoff_duration": 0,
"cloud_provider_backoff_exponent": 0,
"cloud_provider_backoff_jitter": 0,
"cloud_provider_backoff_retries": 0,
"cloud_provider_rate_limit": false,
"cloud_provider_rate_limit_bucket": 0,
"cloud_provider_rate_limit_qps": 0,
"load_balancer_sku": "basic",
"location": "",
"maximum_load_balancer_rule_count": 0,
"primary_availability_set_name": "",
"primary_scale_set_name": "",
"resource_group": "",
"route_table_name": "",
"security_group_name": "",
"subnet_name": "",
"subscription_id": "<snip>",
"tenant_id": "<snip>",
"use_instance_metadata": false,
"use_managed_identity_extension": false,
"vm_type": "",
"vnet_name": "",
"vnet_resource_group": ""
}
],
"custom_cloud_provider": "",
"name": "azure",
"openstack_cloud_provider": [],
"vsphere_cloud_provider": []
}
],
"dns": [],
"ignore_docker_version": true,
"ingress": [
{
"dns_policy": "",
"extra_args": {},
"node_selector": {},
"options": {},
"provider": "none"
}
],
"kubernetes_version": "v1.18.14-rancher1-1",
"monitoring": [],
"network": [
{
"calico_network_provider": [],
"canal_network_provider": [],
"flannel_network_provider": [],
"mtu": 0,
"options": {},
"plugin": "calico",
"weave_network_provider": []
}
],
"nodes": [],
"prefix_path": "",
"private_registries": [],
"services": [
{
"etcd": [
{
"backup_config": [],
"ca_cert": "",
"cert": "",
"creation": "",
"external_urls": null,
"extra_args": {
"cipher-suites": "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305"
},
"extra_binds": null,
"extra_env": null,
"gid": 0,
"image": "",
"key": "",
"path": "",
"retention": "",
"snapshot": false,
"uid": 0
}
],
"kube_api": [],
"kube_controller": [],
"kubelet": [],
"kubeproxy": [],
"scheduler": []
}
],
"ssh_agent_auth": false,
"ssh_cert_path": "",
"ssh_key_path": "",
"upgrade_strategy": []
}
],
"scheduled_cluster_scan": null,
"system_project_id": null,
"timeouts": null,
"windows_prefered_cluster": false
},
"private": "<snip>",
"dependencies": [
<snip>
]
}
]
}
I'm not able to reproduce the issue using your config and versions, working fine to me. Something related to your installation?? Have you tested same tf config on another rancher server or with a new tfstate??
Tested on a different Rancher server (Terraform 0.12.26, Rancher 2.4.8), worked fine Tested with a clean install of Terraform 0.12.26, 0.12.30, 0.13.6, 0.14.6 on the same server, all failed
I can't really wrap my head around this issue, is there some debug mode for the provider specifically? I tried TF_LOG=TRACE
but it didn't really show me anything new
[...]
2021-02-12T13:35:43.424Z [DEBUG] plugin.terraform-provider-rancher2_v1.11.0: 2021/02/12 13:35:43 [INFO] Creating Cluster azure-test-cluster
2021-02-12T13:35:45.727Z [DEBUG] plugin.terraform-provider-rancher2_v1.11.0: 2021/02/12 13:35:45 [DEBUG] Waiting for state to become: [provisioning]
2021-02-12T13:35:47.392Z [DEBUG] plugin.terraform-provider-rancher2_v1.11.0: 2021/02/12 13:35:47 [INFO] Refreshing Cluster ID c-6zxdq
2021/02/12 13:35:47 [TRACE] dag/walk: vertex "provider.rancher2 (close)" is waiting for "rancher2_cluster.azure-test-cluster"
2021/02/12 13:35:47 [TRACE] dag/walk: vertex "meta.count-boundary (EachMode fixup)" is waiting for "rancher2_cluster.azure-test-cluster"
2021/02/12 13:35:47 [TRACE] dag/walk: vertex "root" is waiting for "meta.count-boundary (EachMode fixup)"
2021/02/12 13:35:47 [DEBUG] rancher2_cluster.azure-test-cluster: apply errored, but we're indicating that via the Error pointer rather than returning it: Bad response statusCode [404]. Status [404 Not Found]. Body: [baseType=error, code=NotFound, message=namespaces "c-6zxdq" not found] from [https://<snip>/v3/clusterregistrationtokens]
[...]
I'd really like to stick with the provider as it is better in basically every way, so if you have any more ideas I'll keep testing them
The server that's failing is a huge production server, so rebuilding it is probably out of the question Considering API calls with curl work fine, at least there's a workaround if all else fails
Thank you
having similar issue with RKE on AWS rancher2_cluster.this.cluster_registration_token
randomly is just empty...
after destroy and re-create cluster it works.
rancher 2.6.2, provider version 1.21.0, terraform 1.0.8
@pgonin Assigning this to the Highlander team since it's a TF rke cluster on Azure. I have another community customer who is seeing this issue way back on Rancher 2.6.13. I'd try to reproduce this on the latest Rancher v2.7 and v2.8; hopefully it is already fixed on the latest version.
I can't seem to reproduce the problem with rancher 2.7.6, local cluster: v1.25.13+k3s1, ds cluster: v1.24.17-rancher1-1
{
"mode": "managed",
"type": "rancher2_cluster",
"name": "cluster_az",
"provider": "provider[\"registry.terraform.io/rancher/rancher2\"]",
"instances": [
{
"schema_version": 1,
"attributes": {
"agent_env_vars": [],
"aks_config": [],
"aks_config_v2": [],
"annotations": {
"authz.management.cattle.io/creator-role-bindings": "{\"created\":[\"cluster-owner\"],\"required\":[\"cluster-owner\"]}"
},
"ca_cert": null,
"cluster_auth_endpoint": [
{
"ca_certs": "",
"enabled": false,
"fqdn": ""
}
],
"cluster_monitoring_input": [],
"cluster_registration_token": [
{
"annotations": {},
"cluster_id": "c-gjh5n",
"command": "kubectl apply -f https://XXXXXXXXXXXXXXX/v3/import/dslj58xbhmzb5wnxpm5sbh56f8ztfxmznxxznts4ptmqqfw9gcvp2r_c-gjh5n.yaml",
"id": "c-gjh5n:default-token",
"insecure_command": "curl --insecure -sfL https://XXXXXXXXXXX/v3/import/dslj58xbhmzb5wnxpm5sbh56f8ztfxmznxxznts4ptmqqfw9gcvp2r_c-gjh5n.yaml | kubectl apply -f -",
"insecure_node_command": "",
"insecure_windows_node_command": "",
"labels": {},
"manifest_url": "https://XXXXXXXXXXXXX/v3/import/dslj58xbhmzb5wnxpm5sbh56f8ztfxmznxxznts4ptmqqfw9gcvp2r_c-gjh5n.yaml",
"name": "default-token",
"node_command": "sudo docker run -d --privileged --restart=unless-stopped --net=host -v /etc/kubernetes:/etc/kubernetes -v /var/run:/var/run rancher/rancher-agent:v2.7.6 --server https://XXXXXXXXXXXXXX --token XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX --ca-checksum b7dc23c3341cf74d75a9033229bc29ba37147138c65a86c5de08c2dbc66ab405",
"token": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
"windows_node_command": "PowerShell -NoLogo -NonInteractive -Command \"\u0026 {docker run -v c:\\:c:\\host rancher/rancher-agent:v2.7.6 bootstrap --server https://XXXXXXXXXXXXXXXXXXX --token dslj58xbhmzb5wnxpm5sbh56f8ztfxmznxxznts4ptmqqfw9gcvp2r --ca-checksum b7dc23c3341cf74d75a9033229bc29ba37147138c65a86c5de08c2dbc66ab405 | iex}\""
}
],
"cluster_template_answers": [
{
I see the clusterregistration tokens are created and you can see the same in tfstate:
ip-10-0-0-35:~ # kubectl get clusterregistrationtokens -A
NAMESPACE NAME AGE
local default-token 55m
c-gjh5n default-token 10m
c-gjh5n system 7m46s
ip-10-0-0-35:~ #
ip-10-0-0-35:~ # kubectl get clusters -A
NAMESPACE NAME READY KUBECONFIG
fleet-local local true local-kubeconfig
fleet-default c-gjh5n true c-gjh5n-kubeconfig
ip-10-0-0-35:~ #
I've attempted to reproduce the issue using Rancher version 2.6.13, but it remains non-reproducible.... ds cluster: tried with both rke2 and k3s
{
"mode": "managed",
"type": "rancher2_cluster",
"name": "cluster_az",
"provider": "provider[\"registry.terraform.io/rancher/rancher2\"]",
"instances": [
{
"schema_version": 1,
"attributes": {
"agent_env_vars": [],
"aks_config": [],
"aks_config_v2": [],
"annotations": {
"authz.management.cattle.io/creator-role-bindings": "{\"created\":[\"cluster-owner\"],\"required\":[\"cluster-owner\"]}"
},
"ca_cert": null,
"cluster_auth_endpoint": [
{
"ca_certs": "",
"enabled": false,
"fqdn": ""
}
],
"cluster_monitoring_input": [],
"cluster_registration_token": [
{
"annotations": {},
"cluster_id": "c-zgxzf",
"command": "kubectl apply -f https://XXXXXXXXXXXXXXXXXXXXX/v3/import/XXXXXXXXXX_c-zgxzf.yaml",
"id": "c-zgxzf:default-token",
"insecure_command": "curl --insecure -sfL https://XXXXXXXXXXXXXXXXXXXXX/v3/import/XXXXXXXXXX_c-zgxzf.yaml | kubectl apply -f -",
"insecure_node_command": "",
"insecure_windows_node_command": "",
"labels": {},
"manifest_url": "https://XXXXXXXXXXXXXXXXXXXXX/v3/import/XXXXXXXXXX_c-zgxzf.yaml",
"name": "default-token",
"node_command": "sudo docker run -d --privileged --restart=unless-stopped --net=host -v /etc/kubernetes:/etc/kubernetes -v /var/run:/var/run rancher/rancher-agent:v2.6.13 --server https://XXXXXXXXXXXXXXXXXXXXX --token XXXXXXXXXX --ca-checksum ea166fd2278025b8d7ae0eb55c3a20e550a57822cb7a7013b1c732c9992e12af",
"token": "XXXXXXXXXX",
"windows_node_command": "PowerShell -NoLogo -NonInteractive -Command \"\u0026 {docker run -v c:\\:c:\\host rancher/rancher-agent:v2.6.13 bootstrap --server https://XXXXXXXXXXXXXXXXXXXXX --token XXXXXXXXXX --ca-checksum ea166fd2278025b8d7ae0eb55c3a20e550a57822cb7a7013b1c732c9992e12af | iex}\""
}
],
"cluster_template_answers": [
{
"cluster_id": null,
"project_id": null,
"values": null
}
],
"cluster_template_id": "cattle-global-data:ct-9whf7",
"cluster_template_questions": null,
"cluster_template_revision_id": "cattle-global-data:ctr-jv5jn",
"default_pod_security_policy_template_id": null,
"default_project_id": "c-zgxzf:p-jg9k7",
"description": "Terraform",
"desired_agent_image": null,
"desired_auth_image": null,
"docker_root_dir": "/var/lib/docker",
"driver": "",
"eks_config": [],
"eks_config_v2": null,
"enable_cluster_alerting": false,
"enable_cluster_istio": null,
"enable_cluster_monitoring": false,
"enable_network_policy": false,
"fleet_workspace_name": "fleet-default",
"gke_config": [],
"gke_config_v2": [],
"id": "c-zgxzf",
"istio_enabled": false,
Given @skanakal 's comments above it looks like the issue is no longer reproducible. Due to the age of the issue, closing it as likely fixed indirectly in the last 2 years.
@sentry-gun @riuvshyn , please reopen if it's still an issue for you.