boundary
boundary copied to clipboard
TLS handshake error from Calico in Controller Logs
Describe the bug I am seeing TLS handshake issues in the Controller Logs. The source IP listed for the pod, belongs to Calico pod.
{"id":"t2UZHLchKp","source":"https://hashicorp.com/boundary/boundary-controller-d6f7bc666-lfjzm/controller","specversion":"1.0","type":"observation","data":{"latency-ms":75.353934,"request_info":{"id":"gtraceid_twMvakO7uv5kr0g1hGVO","method":"GET","path":"/v1/auth-tokens/at_IFDg03Yv2G","public_id":"at_IFDg03Yv2G","client_ip":"10.17.249.7"},"start":"2024-05-31T15:29:04.672662867Z","status":200,"stop":"2024-05-31T15:29:04.748016801Z","version":"v0.1"},"datacontentype":"application/cloudevents","time":"2024-05-31T15:29:04.748078801Z"} {"id":"ubbpl5j7Vm","source":"https://hashicorp.com/boundary/boundary-controller-d6f7bc666-lfjzm/controller","specversion":"1.0","type":"observation","data":{"latency-ms":86.611384,"request_info": {"id":"gtraceid_HxnioEmRiq3FVsS1tY7c","method":"GET","path":"/v1/users/u_lXr23TPO94:list-resolvable-aliases","public_id":"at_IFDg03Yv2G","client_ip":"10.17.249.6"},"start":"2024-05-31T15:29:04.843414423Z","status":200,"stop":"2024-05-31T15:29:04.930025907Z","version":"v0.1"},"datacontentype":"application/cloudevents","time":"2024-05-31T15:29:04.930091007Z"} {"id":"J8WdRGOzbA","source":"https://hashicorp.com/boundary/boundary-controller-d6f7bc666-lfjzm/controller","specversion":"1.0","type":"observation","data":{"latency-ms":153.125578,"request_info":{"id":"gtraceid_gVSaxuA2CazdHm3EhEIl","method":"GET","path":"/v1/targets?list_token=Be9MGxCtJuWGF5rvg3UgaAw5nTF1R4GqToPjeJwjn2XuFJJSQSBxWdnWDAndK7XohvViEA43\u0026recursive=true\u0026scope_id=global","public_id":"at_IFDg03Yv2G","client_ip":"10.17.249.5"},"start":"2024-05-31T15:29:05.038347087Z","status":200,"stop":"2024-05-31T15:29:05.191472765Z","version":"v0.1"},"datacontentype":"application/cloudevents","time":"2024-05-31T15:29:05.191547465Z"}
{"@level":"info","@message":"http: TLS handshake error from 10.17.249.7:4026: EOF","@module":"controller","@timestamp":"2024-05-31T15:29:05.241575Z"}{"id":"nzzAum4suM","source":"https://hashicorp.com/boundary/boundary-controller-d6f7bc666-lfjzm/controller","specversion":"1.0","type":"observation","data":{"latency-ms":230.22182,"request_info":{"id":"gtraceid_oslt8YMbAbFrUeFEkviX","method":"GET","path":"/v1/sessions?include_terminated=true\u0026list_token=GnJNyACsonPozNu7UDniHh2RJbsrhoJJFogc52zjcqrPjfrX1tepd8dEUK4madNEM45PG2zcpebr\u0026recursive=true\u0026scope_id=global","public_id":"at_IFDg03Yv2G","client_ip":"10.17.249.7"},"start":"2024-05-31T15:29:05.292514013Z","status":200,"stop":"2024-05-31T15:29:05.522735733Z","version":"v0.1"},"datacontentype":"application/cloudevents","time":"2024-05-31T15:29:05.522766533Z"}
{"@level":"info","@message":"http: TLS handshake error from 10.17.249.5:39099: EOF","@module":"controller","@timestamp":"2024-05-31T15:29:06.101070Z"}
{"@level":"info","@message":"http: TLS handshake error from 10.17.249.6:9206: EOF","@module":"controller","@timestamp":"2024-05-31T15:29:10.376638Z"}
{"@level":"info","@message":"http: TLS handshake error from 10.17.249.7:59121: EOF","@module":"controller","@timestamp":"2024-05-31T15:29:11.242132Z"}
To Reproduce Both Controller and Worker are running in Azure, in the same subscription, same subnet/VNet. Both controller and worker are fronted with Azure loadbalancer.
Here is the Controller HCL
disable_mlock = true enable_worker_auth_debugging = true controller { enable_worker_auth_debugging = true name = "gpam-boundary-controller" description = "GPAM Boundary Controller" database { url = "env://BOUNDARY_PG_URL"
}
public_cluster_addr = "gpam-cluster-np1.[XYZ].net:443" #This Loadbalancer/DNS resolves to the cluster (port) on the controller
}
listener "tcp" {
address = "0.0.0.0:9200"
purpose = "api"
tls_disable = false
tls_cert_file = "/etc/boundary.d/tls/boundary-cert.pem"
tls_key_file = "/etc/boundary.d/tls/boundary-key.pem"
}
listener "tcp" {
address = "0.0.0.0:9201"
purpose = "cluster"
}
listener "tcp" {
address = "0.0.0.0:9203"
tls_disable = false
tls_cert_file = "/etc/boundary.d/tls/boundary-cert.pem"
tls_key_file = "/etc/boundary.d/tls/boundary-key.pem"
purpose = "ops"
}
Here is the Worker HCL
disable_mlock = true listener "tcp" { address = "0.0.0.0:9202" purpose = "proxy" } worker { name = "[az np east] 10.24.218.15" #This is the IP of the worker Loadbalancer resolving to targetPort 9202 public_addr = "10.24.218.15:443" #This is the IP of the worker Loadbalancer resolving to targetPort 9202 initial_upstreams = ["gpam-cluster-np1.[XYZ].net:443"] #This Loadbalancer/DNS resolves to the cluster (port) on the controller tags { env = ["np"] region = ["azure-np-east"] } } kms "aead" { purpose = "worker-auth" aead_type = "aes-gcm" key = "8fZBjCUfN0TzjEGLQldGY4+iE9AkOvCfjh7+p0GtRBQ=" key_id = "global_worker-auth" }
Expected behavior Would like to understand where the TLS handshake issues are coming from and how can we prevent this. The worker pod is registered to the controller and it can establish remote sessions, while I still see the TLS handshake issues. Earlier this morning I noticed status error grace period has expired in the logs due to TLS, restarted the worker pod and it seems to have registered again.