zookeeper-operator icon indicating copy to clipboard operation
zookeeper-operator copied to clipboard

Helm Zookeeper install fails

Open RangaSamudrala opened this issue 3 years ago • 3 comments

Description

Installing Zookeeper using Helm fails. Post install upgrade hook job fails because ZookeeperCluster information is incorrect. The installation is being performed on a MacBookPro with 64GB RAM using Minikube.

Importance

Blocker

Location

https://github.com/pravega/zookeeper-operator/blob/master/charts/zookeeper/templates/post-install-upgrade-hooks.yaml#L66

Suggestions for an improvement

It appears the ZookeeperCluster information for the zookeeper PODs is not updated properly causing key checks to fail in the post-install-upgrade-hook script.

  1. Check the status of the PODs - it is not ready as you can see below. In addition, status.currentVersion is NOT set.
> kubectl get ZookeeperCluster -n default zookeeper -o json
{
    "apiVersion": "zookeeper.pravega.io/v1beta1",
    "kind": "ZookeeperCluster",
    "metadata": {
        "annotations": {
            "meta.helm.sh/release-name": "zookeeper",
            "meta.helm.sh/release-namespace": "default"
        },
        "creationTimestamp": "2021-12-16T16:03:26Z",
        "generation": 2,
        "labels": {
            "app.kubernetes.io/managed-by": "Helm",
            "app.kubernetes.io/name": "zookeeper",
            "app.kubernetes.io/version": "0.2.13",
            "helm.sh/chart": "zookeeper-0.2.13"
        },
        "name": "zookeeper",
        "namespace": "default",
        "resourceVersion": "68085",
        "uid": "0941179f-8180-4ae7-b8a8-754f0b80aa8a"
    },
    "spec": {
        "adminServerService": {},
        "clientService": {},
        "config": {
            "autoPurgePurgeInterval": 1,
            "autoPurgeSnapRetainCount": 3,
            "commitLogCount": 500,
            "globalOutstandingLimit": 1000,
            "initLimit": 10,
            "maxClientCnxns": 60,
            "maxSessionTimeout": 40000,
            "minSessionTimeout": 4000,
            "preAllocSize": 16384,
            "snapCount": 10000,
            "snapSizeLimitInKb": 4194304,
            "syncLimit": 2,
            "tickTime": 2000
        },
        "headlessService": {},
        "image": {
            "pullPolicy": "IfNotPresent",
            "repository": "harbor.tools.com/catapult/zookeeper",
            "tag": "0.2.13"
        },
        "kubernetesClusterDomain": "cluster.local",
        "labels": {
            "app": "zookeeper",
            "release": "zookeeper"
        },
        "persistence": {
            "reclaimPolicy": "Retain",
            "spec": {
                "accessModes": [
                    "ReadWriteOnce"
                ],
                "resources": {
                    "requests": {
                        "storage": "8Gi"
                    }
                }
            }
        },
        "pod": {
            "affinity": {
                "podAntiAffinity": {
                    "preferredDuringSchedulingIgnoredDuringExecution": [
                        {
                            "podAffinityTerm": {
                                "labelSelector": {
                                    "matchExpressions": [
                                        {
                                            "key": "app",
                                            "operator": "In",
                                            "values": [
                                                "zookeeper"
                                            ]
                                        }
                                    ]
                                },
                                "topologyKey": "kubernetes.io/hostname"
                            },
                            "weight": 20
                        }
                    ]
                }
            },
            "labels": {
                "app": "zookeeper",
                "release": "zookeeper"
            },
            "resources": {},
            "serviceAccountName": "zookeeper",
            "terminationGracePeriodSeconds": 30
        },
        "ports": [
            {
                "containerPort": 2181,
                "name": "client"
            },
            {
                "containerPort": 2888,
                "name": "quorum"
            },
            {
                "containerPort": 3888,
                "name": "leader-election"
            },
            {
                "containerPort": 7000,
                "name": "metrics"
            },
            {
                "containerPort": 8080,
                "name": "admin-server"
            }
        ],
        "probes": {
            "livenessProbe": {
                "failureThreshold": 3,
                "initialDelaySeconds": 10,
                "periodSeconds": 10,
                "successThreshold": 0,
                "timeoutSeconds": 10
            },
            "readinessProbe": {
                "failureThreshold": 3,
                "initialDelaySeconds": 10,
                "periodSeconds": 10,
                "successThreshold": 1,
                "timeoutSeconds": 10
            }
        },
        "replicas": 1,
        "storageType": "persistence"
    },
    "status": {
        "conditions": [
            {
                "status": "False",
                "type": "PodsReady"
            },
            {
                "status": "False",
                "type": "Upgrading"
            },
            {
                "status": "False",
                "type": "Error"
            }
        ],
        "externalClientEndpoint": "N/A",
        "internalClientEndpoint": "10.106.174.190:2181",
        "members": {
            "unready": [
                "zookeeper-0"
            ]
        },
        "replicas": 1
    }
}
  1. Check running PODs
>  kubectl get all
NAME                                          READY   STATUS    RESTARTS   AGE
pod/client-tools-8448f964c4-sxw5c             1/1     Running   0          19h
pod/zookeeper-0                               1/1     Running   0          30s
pod/zookeeper-operator-f579bfcf5-srpl8        1/1     Running   0          68m
pod/zookeeper-post-install-upgrade--1-f6qcm   1/1     Running   0          29s

NAME                             TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)                                        AGE
service/kubernetes               ClusterIP   10.96.0.1        <none>        443/TCP                                        2d22h
service/zookeeper-admin-server   ClusterIP   10.100.65.88     <none>        8080/TCP                                       30s
service/zookeeper-client         ClusterIP   10.106.174.190   <none>        2181/TCP                                       30s
service/zookeeper-headless       ClusterIP   None             <none>        2181/TCP,2888/TCP,3888/TCP,7000/TCP,8080/TCP   30s

NAME                                 READY   UP-TO-DATE   AVAILABLE   AGE
deployment.apps/client-tools         1/1     1            1           19h
deployment.apps/zookeeper-operator   1/1     1            1           68m

NAME                                           DESIRED   CURRENT   READY   AGE
replicaset.apps/client-tools-8448f964c4        1         1         1       19h
replicaset.apps/zookeeper-operator-f579bfcf5   1         1         1       68m

NAME                         READY   AGE
statefulset.apps/zookeeper   1/1     30s

NAME                                       COMPLETIONS   DURATION   AGE
job.batch/zookeeper-post-install-upgrade   0/1           29s        29s
  1. Check ZookeepCluster information again and you see the below - which does not make sense. readyReplicas is 1 but members listed under unready section. Also, we can see status.currentVersion is not set. Overall, many checks fail in post-install-uprgade-hook script causing the installation to fail.
        "members": {
            "unready": [
                "zookeeper-0"
            ]
        },
        "readyReplicas": 1,
        "replicas": 1
>  kubectl get ZookeeperCluster -n default zookeeper -o json
{
    "apiVersion": "zookeeper.pravega.io/v1beta1",
    "kind": "ZookeeperCluster",
    "metadata": {
        "annotations": {
            "meta.helm.sh/release-name": "zookeeper",
            "meta.helm.sh/release-namespace": "default"
        },
        "creationTimestamp": "2021-12-16T16:03:26Z",
        "generation": 2,
        "labels": {
            "app.kubernetes.io/managed-by": "Helm",
            "app.kubernetes.io/name": "zookeeper",
            "app.kubernetes.io/version": "0.2.13",
            "helm.sh/chart": "zookeeper-0.2.13"
        },
        "name": "zookeeper",
        "namespace": "default",
        "resourceVersion": "68141",
        "uid": "0941179f-8180-4ae7-b8a8-754f0b80aa8a"
    },
    "spec": {
        "adminServerService": {},
        "clientService": {},
        "config": {
            "autoPurgePurgeInterval": 1,
            "autoPurgeSnapRetainCount": 3,
            "commitLogCount": 500,
            "globalOutstandingLimit": 1000,
            "initLimit": 10,
            "maxClientCnxns": 60,
            "maxSessionTimeout": 40000,
            "minSessionTimeout": 4000,
            "preAllocSize": 16384,
            "snapCount": 10000,
            "snapSizeLimitInKb": 4194304,
            "syncLimit": 2,
            "tickTime": 2000
        },
        "headlessService": {},
        "image": {
            "pullPolicy": "IfNotPresent",
            "repository": "harbor.tools.com/catapult/zookeeper",
            "tag": "0.2.13"
        },
        "kubernetesClusterDomain": "cluster.local",
        "labels": {
            "app": "zookeeper",
            "release": "zookeeper"
        },
        "persistence": {
            "reclaimPolicy": "Retain",
            "spec": {
                "accessModes": [
                    "ReadWriteOnce"
                ],
                "resources": {
                    "requests": {
                        "storage": "8Gi"
                    }
                }
            }
        },
        "pod": {
            "affinity": {
                "podAntiAffinity": {
                    "preferredDuringSchedulingIgnoredDuringExecution": [
                        {
                            "podAffinityTerm": {
                                "labelSelector": {
                                    "matchExpressions": [
                                        {
                                            "key": "app",
                                            "operator": "In",
                                            "values": [
                                                "zookeeper"
                                            ]
                                        }
                                    ]
                                },
                                "topologyKey": "kubernetes.io/hostname"
                            },
                            "weight": 20
                        }
                    ]
                }
            },
            "labels": {
                "app": "zookeeper",
                "release": "zookeeper"
            },
            "resources": {},
            "serviceAccountName": "zookeeper",
            "terminationGracePeriodSeconds": 30
        },
        "ports": [
            {
                "containerPort": 2181,
                "name": "client"
            },
            {
                "containerPort": 2888,
                "name": "quorum"
            },
            {
                "containerPort": 3888,
                "name": "leader-election"
            },
            {
                "containerPort": 7000,
                "name": "metrics"
            },
            {
                "containerPort": 8080,
                "name": "admin-server"
            }
        ],
        "probes": {
            "livenessProbe": {
                "failureThreshold": 3,
                "initialDelaySeconds": 10,
                "periodSeconds": 10,
                "successThreshold": 0,
                "timeoutSeconds": 10
            },
            "readinessProbe": {
                "failureThreshold": 3,
                "initialDelaySeconds": 10,
                "periodSeconds": 10,
                "successThreshold": 1,
                "timeoutSeconds": 10
            }
        },
        "replicas": 1,
        "storageType": "persistence"
    },
    "status": {
        "conditions": [
            {
                "status": "False",
                "type": "PodsReady"
            },
            {
                "status": "False",
                "type": "Upgrading"
            },
            {
                "status": "False",
                "type": "Error"
            }
        ],
        "externalClientEndpoint": "N/A",
        "internalClientEndpoint": "10.106.174.190:2181",
        "members": {
            "unready": [
                "zookeeper-0"
            ]
        },
        "readyReplicas": 1,
        "replicas": 1
    }
}

RangaSamudrala avatar Dec 16 '21 16:12 RangaSamudrala

This is a followup for a comment I put in already closed ticket : https://github.com/pravega/zookeeper-operator/issues/417

RangaSamudrala avatar Dec 16 '21 16:12 RangaSamudrala

@RangaSamudrala the pod should not come in the unready members. Are you using latest version of zookeeper-operator

anishakj avatar Jan 10 '22 06:01 anishakj

Yes, I am using v0.2.13

RangaSamudrala avatar Jan 11 '22 13:01 RangaSamudrala