zookeeper-operator
zookeeper-operator copied to clipboard
Helm Zookeeper install fails
Description
Installing Zookeeper using Helm fails. Post install upgrade hook job fails because ZookeeperCluster information is incorrect. The installation is being performed on a MacBookPro with 64GB RAM using Minikube.
Importance
Blocker
Location
https://github.com/pravega/zookeeper-operator/blob/master/charts/zookeeper/templates/post-install-upgrade-hooks.yaml#L66
Suggestions for an improvement
It appears the ZookeeperCluster information for the zookeeper PODs is not updated properly causing key checks to fail in the post-install-upgrade-hook script.
- Check the status of the PODs - it is not ready as you can see below. In addition, status.currentVersion is NOT set.
> kubectl get ZookeeperCluster -n default zookeeper -o json
{
"apiVersion": "zookeeper.pravega.io/v1beta1",
"kind": "ZookeeperCluster",
"metadata": {
"annotations": {
"meta.helm.sh/release-name": "zookeeper",
"meta.helm.sh/release-namespace": "default"
},
"creationTimestamp": "2021-12-16T16:03:26Z",
"generation": 2,
"labels": {
"app.kubernetes.io/managed-by": "Helm",
"app.kubernetes.io/name": "zookeeper",
"app.kubernetes.io/version": "0.2.13",
"helm.sh/chart": "zookeeper-0.2.13"
},
"name": "zookeeper",
"namespace": "default",
"resourceVersion": "68085",
"uid": "0941179f-8180-4ae7-b8a8-754f0b80aa8a"
},
"spec": {
"adminServerService": {},
"clientService": {},
"config": {
"autoPurgePurgeInterval": 1,
"autoPurgeSnapRetainCount": 3,
"commitLogCount": 500,
"globalOutstandingLimit": 1000,
"initLimit": 10,
"maxClientCnxns": 60,
"maxSessionTimeout": 40000,
"minSessionTimeout": 4000,
"preAllocSize": 16384,
"snapCount": 10000,
"snapSizeLimitInKb": 4194304,
"syncLimit": 2,
"tickTime": 2000
},
"headlessService": {},
"image": {
"pullPolicy": "IfNotPresent",
"repository": "harbor.tools.com/catapult/zookeeper",
"tag": "0.2.13"
},
"kubernetesClusterDomain": "cluster.local",
"labels": {
"app": "zookeeper",
"release": "zookeeper"
},
"persistence": {
"reclaimPolicy": "Retain",
"spec": {
"accessModes": [
"ReadWriteOnce"
],
"resources": {
"requests": {
"storage": "8Gi"
}
}
}
},
"pod": {
"affinity": {
"podAntiAffinity": {
"preferredDuringSchedulingIgnoredDuringExecution": [
{
"podAffinityTerm": {
"labelSelector": {
"matchExpressions": [
{
"key": "app",
"operator": "In",
"values": [
"zookeeper"
]
}
]
},
"topologyKey": "kubernetes.io/hostname"
},
"weight": 20
}
]
}
},
"labels": {
"app": "zookeeper",
"release": "zookeeper"
},
"resources": {},
"serviceAccountName": "zookeeper",
"terminationGracePeriodSeconds": 30
},
"ports": [
{
"containerPort": 2181,
"name": "client"
},
{
"containerPort": 2888,
"name": "quorum"
},
{
"containerPort": 3888,
"name": "leader-election"
},
{
"containerPort": 7000,
"name": "metrics"
},
{
"containerPort": 8080,
"name": "admin-server"
}
],
"probes": {
"livenessProbe": {
"failureThreshold": 3,
"initialDelaySeconds": 10,
"periodSeconds": 10,
"successThreshold": 0,
"timeoutSeconds": 10
},
"readinessProbe": {
"failureThreshold": 3,
"initialDelaySeconds": 10,
"periodSeconds": 10,
"successThreshold": 1,
"timeoutSeconds": 10
}
},
"replicas": 1,
"storageType": "persistence"
},
"status": {
"conditions": [
{
"status": "False",
"type": "PodsReady"
},
{
"status": "False",
"type": "Upgrading"
},
{
"status": "False",
"type": "Error"
}
],
"externalClientEndpoint": "N/A",
"internalClientEndpoint": "10.106.174.190:2181",
"members": {
"unready": [
"zookeeper-0"
]
},
"replicas": 1
}
}
- Check running PODs
> kubectl get all
NAME READY STATUS RESTARTS AGE
pod/client-tools-8448f964c4-sxw5c 1/1 Running 0 19h
pod/zookeeper-0 1/1 Running 0 30s
pod/zookeeper-operator-f579bfcf5-srpl8 1/1 Running 0 68m
pod/zookeeper-post-install-upgrade--1-f6qcm 1/1 Running 0 29s
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 2d22h
service/zookeeper-admin-server ClusterIP 10.100.65.88 <none> 8080/TCP 30s
service/zookeeper-client ClusterIP 10.106.174.190 <none> 2181/TCP 30s
service/zookeeper-headless ClusterIP None <none> 2181/TCP,2888/TCP,3888/TCP,7000/TCP,8080/TCP 30s
NAME READY UP-TO-DATE AVAILABLE AGE
deployment.apps/client-tools 1/1 1 1 19h
deployment.apps/zookeeper-operator 1/1 1 1 68m
NAME DESIRED CURRENT READY AGE
replicaset.apps/client-tools-8448f964c4 1 1 1 19h
replicaset.apps/zookeeper-operator-f579bfcf5 1 1 1 68m
NAME READY AGE
statefulset.apps/zookeeper 1/1 30s
NAME COMPLETIONS DURATION AGE
job.batch/zookeeper-post-install-upgrade 0/1 29s 29s
- Check ZookeepCluster information again and you see the below - which does not make sense. readyReplicas is 1 but members listed under unready section. Also, we can see status.currentVersion is not set. Overall, many checks fail in post-install-uprgade-hook script causing the installation to fail.
"members": {
"unready": [
"zookeeper-0"
]
},
"readyReplicas": 1,
"replicas": 1
> kubectl get ZookeeperCluster -n default zookeeper -o json
{
"apiVersion": "zookeeper.pravega.io/v1beta1",
"kind": "ZookeeperCluster",
"metadata": {
"annotations": {
"meta.helm.sh/release-name": "zookeeper",
"meta.helm.sh/release-namespace": "default"
},
"creationTimestamp": "2021-12-16T16:03:26Z",
"generation": 2,
"labels": {
"app.kubernetes.io/managed-by": "Helm",
"app.kubernetes.io/name": "zookeeper",
"app.kubernetes.io/version": "0.2.13",
"helm.sh/chart": "zookeeper-0.2.13"
},
"name": "zookeeper",
"namespace": "default",
"resourceVersion": "68141",
"uid": "0941179f-8180-4ae7-b8a8-754f0b80aa8a"
},
"spec": {
"adminServerService": {},
"clientService": {},
"config": {
"autoPurgePurgeInterval": 1,
"autoPurgeSnapRetainCount": 3,
"commitLogCount": 500,
"globalOutstandingLimit": 1000,
"initLimit": 10,
"maxClientCnxns": 60,
"maxSessionTimeout": 40000,
"minSessionTimeout": 4000,
"preAllocSize": 16384,
"snapCount": 10000,
"snapSizeLimitInKb": 4194304,
"syncLimit": 2,
"tickTime": 2000
},
"headlessService": {},
"image": {
"pullPolicy": "IfNotPresent",
"repository": "harbor.tools.com/catapult/zookeeper",
"tag": "0.2.13"
},
"kubernetesClusterDomain": "cluster.local",
"labels": {
"app": "zookeeper",
"release": "zookeeper"
},
"persistence": {
"reclaimPolicy": "Retain",
"spec": {
"accessModes": [
"ReadWriteOnce"
],
"resources": {
"requests": {
"storage": "8Gi"
}
}
}
},
"pod": {
"affinity": {
"podAntiAffinity": {
"preferredDuringSchedulingIgnoredDuringExecution": [
{
"podAffinityTerm": {
"labelSelector": {
"matchExpressions": [
{
"key": "app",
"operator": "In",
"values": [
"zookeeper"
]
}
]
},
"topologyKey": "kubernetes.io/hostname"
},
"weight": 20
}
]
}
},
"labels": {
"app": "zookeeper",
"release": "zookeeper"
},
"resources": {},
"serviceAccountName": "zookeeper",
"terminationGracePeriodSeconds": 30
},
"ports": [
{
"containerPort": 2181,
"name": "client"
},
{
"containerPort": 2888,
"name": "quorum"
},
{
"containerPort": 3888,
"name": "leader-election"
},
{
"containerPort": 7000,
"name": "metrics"
},
{
"containerPort": 8080,
"name": "admin-server"
}
],
"probes": {
"livenessProbe": {
"failureThreshold": 3,
"initialDelaySeconds": 10,
"periodSeconds": 10,
"successThreshold": 0,
"timeoutSeconds": 10
},
"readinessProbe": {
"failureThreshold": 3,
"initialDelaySeconds": 10,
"periodSeconds": 10,
"successThreshold": 1,
"timeoutSeconds": 10
}
},
"replicas": 1,
"storageType": "persistence"
},
"status": {
"conditions": [
{
"status": "False",
"type": "PodsReady"
},
{
"status": "False",
"type": "Upgrading"
},
{
"status": "False",
"type": "Error"
}
],
"externalClientEndpoint": "N/A",
"internalClientEndpoint": "10.106.174.190:2181",
"members": {
"unready": [
"zookeeper-0"
]
},
"readyReplicas": 1,
"replicas": 1
}
}
This is a followup for a comment I put in already closed ticket : https://github.com/pravega/zookeeper-operator/issues/417
@RangaSamudrala the pod should not come in the unready members. Are you using latest version of zookeeper-operator
Yes, I am using v0.2.13