kubeblocks icon indicating copy to clipboard operation
kubeblocks copied to clipboard

[BUG] redis-cluster do restart ops all pod role are primary

Open JashBook opened this issue 8 months ago • 1 comments

Describe the bug A clear and concise description of what the bug is.

To Reproduce Steps to reproduce the behavior:

  1. create redis cluster
kubectl apply -f -<<EOF
apiVersion: apps.kubeblocks.io/v1alpha1
kind: Cluster
metadata:
  name: rcluster-cluster
  namespace: default
spec:
  terminationPolicy: Delete
  shardingSpecs:
    - name: shard
      shards: 3
      template:
        name: redis
        componentDef: redis-cluster-7
        replicas: 2
        switchPolicy:
          type: Noop
        resources:
          limits:
            cpu: 100m
            memory: 0.5Gi
          requests:
            cpu: 100m
            memory: 0.5Gi
        volumeClaimTemplates:
          - name: data
            spec:
              accessModes:
                - ReadWriteOnce
              resources:
                 requests:
                  storage: 1Gi
EOF
kubectl get cluster    
NAME               CLUSTER-DEFINITION   VERSION   TERMINATION-POLICY   STATUS    AGE
rcluster-cluster                                  Delete               Running   52s

➜  ~ kbcli cluster list-instances rcluster-cluster
NAME                           NAMESPACE   CLUSTER            COMPONENT   STATUS    ROLE        ACCESSMODE   AZ              CPU(REQUEST/LIMIT)   MEMORY(REQUEST/LIMIT)   STORAGE   NODE                                                              CREATED-TIME                 
rcluster-cluster-shard-42p-0   default     rcluster-cluster   shard-42p   Running   primary     <none>       us-central1-a   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-7l88/10.128.0.44   Jun 21,2024 12:52 UTC+0800   
rcluster-cluster-shard-42p-1   default     rcluster-cluster   shard-42p   Running   secondary   <none>       us-central1-b   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8/10.128.0.55   Jun 21,2024 12:52 UTC+0800   
rcluster-cluster-shard-jfs-0   default     rcluster-cluster   shard-jfs   Running   primary     <none>       us-central1-b   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8/10.128.0.55   Jun 21,2024 12:52 UTC+0800   
rcluster-cluster-shard-jfs-1   default     rcluster-cluster   shard-jfs   Running   secondary   <none>       us-central1-a   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-7l88/10.128.0.44   Jun 21,2024 12:52 UTC+0800   
rcluster-cluster-shard-qqs-0   default     rcluster-cluster   shard-qqs   Running   primary     <none>       us-central1-b   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8/10.128.0.55   Jun 21,2024 12:52 UTC+0800   
rcluster-cluster-shard-qqs-1   default     rcluster-cluster   shard-qqs   Running   secondary   <none>       us-central1-a   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-7l88/10.128.0.44   Jun 21,2024 12:52 UTC+0800   

kubectl get pod -l app.kubernetes.io/instance=rcluster-cluster -o wide
NAME                           READY   STATUS    RESTARTS   AGE   IP             NODE                                                  NOMINATED NODE   READINESS GATES
rcluster-cluster-shard-42p-0   3/3     Running   0          78s   10.116.44.7    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-7l88   <none>           <none>
rcluster-cluster-shard-42p-1   3/3     Running   0          78s   10.116.45.28   gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8   <none>           <none>
rcluster-cluster-shard-jfs-0   3/3     Running   0          76s   10.116.45.29   gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8   <none>           <none>
rcluster-cluster-shard-jfs-1   3/3     Running   0          76s   10.116.44.9    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-7l88   <none>           <none>
rcluster-cluster-shard-qqs-0   3/3     Running   0          78s   10.116.45.30   gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8   <none>           <none>
rcluster-cluster-shard-qqs-1   3/3     Running   0          78s   10.116.44.8    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-7l88   <none>           <none>
➜  ~ 
  1. restart cluster
kbcli cluster restart rcluster-cluster --auto-approve 

kubectl get ops     
NAME                             TYPE      CLUSTER            STATUS    PROGRESS   AGE
rcluster-cluster-restart-6kg9g   Restart   rcluster-cluster   Succeed   6/6        8m47s

kubectl get cluster 
NAME               CLUSTER-DEFINITION   VERSION   TERMINATION-POLICY   STATUS    AGE
rcluster-cluster                                  Delete               Running   10m
kubectl get pod -l app.kubernetes.io/instance=rcluster-cluster -o wide
NAME                           READY   STATUS    RESTARTS     AGE     IP             NODE                                                  NOMINATED NODE   READINESS GATES
rcluster-cluster-shard-42p-0   3/3     Running   0            2m46s   10.116.8.46    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-xx58   <none>           <none>
rcluster-cluster-shard-42p-1   3/3     Running   0            9m17s   10.116.45.36   gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8   <none>           <none>
rcluster-cluster-shard-jfs-0   3/3     Running   0            8m54s   10.116.45.38   gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8   <none>           <none>
rcluster-cluster-shard-jfs-1   2/3     Running   1 (6s ago)   2m46s   10.116.25.66   gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-9w0d   <none>           <none>
rcluster-cluster-shard-qqs-0   3/3     Running   0            8m55s   10.116.45.37   gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8   <none>           <none>
rcluster-cluster-shard-qqs-1   2/3     Running   1 (2s ago)   2m47s   10.116.25.67   gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-9w0d   <none>           <none>
  1. See error all pod role are primary
kubectl get pod
NAME                           READY   STATUS      RESTARTS   AGE
rcluster-cluster-shard-42p-0   0/3     Completed   0          5m45s
rcluster-cluster-shard-42p-1   3/3     Running     0          6m9s
rcluster-cluster-shard-jfs-0   3/3     Running     0          5m46s
rcluster-cluster-shard-jfs-1   0/3     Completed   1          6m7s
rcluster-cluster-shard-qqs-0   3/3     Running     0          5m47s
rcluster-cluster-shard-qqs-1   0/3     Completed   1          6m9s

kubectl get pod
NAME                           READY   STATUS    RESTARTS   AGE
rcluster-cluster-shard-42p-0   3/3     Running   0          93s
rcluster-cluster-shard-42p-1   3/3     Running   0          8m4s
rcluster-cluster-shard-jfs-0   3/3     Running   0          7m41s
rcluster-cluster-shard-jfs-1   3/3     Running   0          93s
rcluster-cluster-shard-qqs-0   3/3     Running   0          7m42s
rcluster-cluster-shard-qqs-1   3/3     Running   0          94s
➜  ~ 

kbcli cluster list-instances rcluster-cluster
NAME                           NAMESPACE   CLUSTER            COMPONENT   STATUS    ROLE        ACCESSMODE   AZ              CPU(REQUEST/LIMIT)   MEMORY(REQUEST/LIMIT)   STORAGE   NODE                                                                CREATED-TIME                 
rcluster-cluster-shard-42p-0   default     rcluster-cluster   shard-42p   Running   primary     <none>       us-central1-a   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-xx58/10.128.15.204   Jun 21,2024 13:00 UTC+0800   
rcluster-cluster-shard-42p-1   default     rcluster-cluster   shard-42p   Running   secondary   <none>       us-central1-b   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8/10.128.0.55     Jun 21,2024 12:54 UTC+0800   
rcluster-cluster-shard-jfs-0   default     rcluster-cluster   shard-jfs   Running   primary     <none>       us-central1-b   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8/10.128.0.55     Jun 21,2024 12:54 UTC+0800   
rcluster-cluster-shard-jfs-1   default     rcluster-cluster   shard-jfs   Running   primary     <none>       us-central1-a   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-9w0d/10.128.0.77     Jun 21,2024 13:00 UTC+0800   
rcluster-cluster-shard-qqs-0   default     rcluster-cluster   shard-qqs   Running   primary     <none>       us-central1-b   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-219d1488-cwn8/10.128.0.55     Jun 21,2024 12:54 UTC+0800   
rcluster-cluster-shard-qqs-1   default     rcluster-cluster   shard-qqs   Running   primary     <none>       us-central1-a   100m / 100m          512Mi / 512Mi           <none>    gke-cicd-gke-q3zypql-cicd-gke-q3zypql-771d13bc-9w0d/10.128.0.77     Jun 21,2024 13:00 UTC+0800   

logs pod-0 redis-cluster

kubectl logs rcluster-cluster-shard-jfs-0 redis-cluster
+ parse_redis_cluster_advertised_svc_if_exist rcluster-cluster-shard-jfs-0
+ local pod_name=rcluster-cluster-shard-jfs-0
+ [[ -z '' ]]
+ echo 'Environment variable REDIS_CLUSTER_ADVERTISED_PORT and REDIS_CLUSTER_ADVERTISED_BUS_PORT not found. Ignoring.'
+ return 0
+ build_redis_conf
+ load_redis_template_conf
+ echo 'include /etc/conf/redis.conf'
Environment variable REDIS_CLUSTER_ADVERTISED_PORT and REDIS_CLUSTER_ADVERTISED_BUS_PORT not found. Ignoring.
+ build_redis_cluster_service_port
+ service_port=6379
+ cluster_bus_port=16379
+ '[' -n 6379 ']'
+ service_port=6379
+ '[' -n 16379 ']'
+ cluster_bus_port=16379
redis use kb pod fqdn rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc to announce
redis use kb pod fqdn rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc to announce
+ echo 'port 6379'
+ echo 'cluster-port 16379'
+ build_announce_ip_and_port
+ '[' -n '' ']'
+ kb_pod_fqdn=rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc
+ echo 'redis use kb pod fqdn rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc to announce'
+ echo 'replica-announce-ip rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc'
+ build_cluster_announce_info
+ kb_pod_fqdn=rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc
+ '[' -n '' ']'
+ echo 'redis use kb pod fqdn rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc to announce'
+ echo 'cluster-announce-ip 10.116.45.38'
+ echo 'cluster-announce-hostname rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc'
+ echo 'cluster-preferred-endpoint-type hostname'
+ rebuild_redis_acl_file
+ '[' -f /data/users.acl ']'
+ sed -i '/user default on/d' /data/users.acl
+ sed -i '/user kbreplicator on/d' /data/users.acl
+ sed -i '/user  on/d' /data/users.acl
+ build_redis_default_accounts
+ '[' -n O3605v7HsS ']'
+ echo 'masteruser kbreplicator'
+ echo 'masterauth O3605v7HsS'
+ echo 'user kbreplicator on +psync +replconf +ping >O3605v7HsS'
+ '[' '!' -z O3605v7HsS ']'
+ echo 'protected-mode yes'
+ echo 'user default on >O3605v7HsS ~* &* +@all '
+ echo 'aclfile /data/users.acl'
+ start_redis_server
+ scale_redis_cluster_replica
+ exec_cmd='exec redis-server /etc/redis/redis.conf'
+ '[' -f /opt/redis-stack/lib/redisearch.so ']'
+ '[' -n O3605v7HsS ']'
+ retry redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping
+ local max_attempts=20
+ local attempt=1
+ redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so '
+ '[' -f /opt/redis-stack/lib/redistimeseries.so ']'
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so '
+ '[' -f /opt/redis-stack/lib/rejson.so ']'
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so '
+ '[' -f /opt/redis-stack/lib/redisbloom.so ']'
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so '
+ '[' -f /opt/redis-stack/lib/redisgraph.so ']'
+ '[' -f /opt/redis-stack/lib/rediscompat.so ']'
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so'
+ '[' -f /opt/redis-stack/lib/redisgears.so ']'
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so '
Starting redis server cmd: exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so 
+ echo 'Starting redis server cmd: exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so '
+ eval 'exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so '
++ exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so --loadmodule /opt/redis-stack/lib/redistimeseries.so --loadmodule /opt/redis-stack/lib/rejson.so --loadmodule /opt/redis-stack/lib/redisbloom.so --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
Could not connect to Redis at 127.0.0.1:6379: Connection refused
+ '[' 1 -eq 20 ']'
+ echo 'Command '\''redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping'\'' failed. Attempt 1 of 20. Retrying in 5 seconds...'
+ attempt=2
+ sleep 3
Command 'redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping' failed. Attempt 1 of 20. Retrying in 5 seconds...
+ redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
PONG
+ '[' 2 -eq 20 ']'
+ current_pod_name=rcluster-cluster-shard-jfs-0
+ current_pod_fqdn=rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc
+ '[' -n rcluster-cluster-shard-jfs-1 ']'
+ target_node_fqdn=rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc
+ get_current_comp_nodes_for_scale_out_replica rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc 6379
+ local cluster_node=rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc
+ local cluster_node_port=6379
+ '[' -z O3605v7HsS ']'
++ redis-cli -h rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc -p 6379 -a O3605v7HsS cluster nodes
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
+ cluster_nodes_info='542fd78cd8d275d3a9e8e6f60b95279c91f5875c 10.116.44.10:6379@16379,rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc myself,master - 0 1718945660054 0 connected'
+ current_comp_primary_node=()
+ current_comp_other_nodes=()
++ echo '542fd78cd8d275d3a9e8e6f60b95279c91f5875c 10.116.44.10:6379@16379,rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc myself,master - 0 1718945660054 0 connected'
++ wc -l
Cluster nodes info contains only one line, returning...
current_comp_primary_node is empty, skip scale out replica
+ '[' 1 -eq 1 ']'
+ echo 'Cluster nodes info contains only one line, returning...'
+ return
+ '[' 0 -eq 0 ']'
+ echo 'current_comp_primary_node is empty, skip scale out replica'
+ exit 0

logs pod-1 redis-cluster

kubectl logs rcluster-cluster-shard-jfs-1 redis-cluster
+ parse_redis_cluster_advertised_svc_if_exist rcluster-cluster-shard-jfs-1
+ local pod_name=rcluster-cluster-shard-jfs-1
+ [[ -z '' ]]
+ echo 'Environment variable REDIS_CLUSTER_ADVERTISED_PORT and REDIS_CLUSTER_ADVERTISED_BUS_PORT not found. Ignoring.'
+ return 0
+ build_redis_conf
+ load_redis_template_conf
+ echo 'include /etc/conf/redis.conf'
+ build_redis_cluster_service_port
+ service_port=6379
+ cluster_bus_port=16379
+ '[' -n 6379 ']'
+ service_port=6379
+ '[' -n 16379 ']'
+ cluster_bus_port=16379
+ echo 'port 6379'
+ echo 'cluster-port 16379'
+ build_announce_ip_and_port
+ '[' -n '' ']'
+ kb_pod_fqdn=rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc
+ echo 'redis use kb pod fqdn rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc to announce'
+ echo 'replica-announce-ip rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc'
+ build_cluster_announce_info
+ kb_pod_fqdn=rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc
+ '[' -n '' ']'
+ echo 'redis use kb pod fqdn rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc to announce'
+ echo 'cluster-announce-ip 10.116.25.66'
Environment variable REDIS_CLUSTER_ADVERTISED_PORT and REDIS_CLUSTER_ADVERTISED_BUS_PORT not found. Ignoring.
redis use kb pod fqdn rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc to announce
redis use kb pod fqdn rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc to announce
+ echo 'cluster-announce-hostname rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc'
+ echo 'cluster-preferred-endpoint-type hostname'
+ rebuild_redis_acl_file
+ '[' -f /data/users.acl ']'
+ sed -i '/user default on/d' /data/users.acl
+ sed -i '/user kbreplicator on/d' /data/users.acl
+ sed -i '/user  on/d' /data/users.acl
+ build_redis_default_accounts
+ '[' -n O3605v7HsS ']'
+ echo 'masteruser kbreplicator'
+ echo 'masterauth O3605v7HsS'
+ echo 'user kbreplicator on +psync +replconf +ping >O3605v7HsS'
+ '[' '!' -z O3605v7HsS ']'
+ echo 'protected-mode yes'
+ echo 'user default on >O3605v7HsS ~* &* +@all '
+ echo 'aclfile /data/users.acl'
+ start_redis_server
+ exec_cmd='exec redis-server /etc/redis/redis.conf'
+ '[' -f /opt/redis-stack/lib/redisearch.so ']'
Starting redis server cmd: exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so 
+ scale_redis_cluster_replica
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so '
+ '[' -f /opt/redis-stack/lib/redistimeseries.so ']'
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so '
+ '[' -f /opt/redis-stack/lib/rejson.so ']'
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so '
+ '[' -f /opt/redis-stack/lib/redisbloom.so ']'
+ '[' -n O3605v7HsS ']'
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so '
+ retry redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping
+ '[' -f /opt/redis-stack/lib/redisgraph.so ']'
+ '[' -f /opt/redis-stack/lib/rediscompat.so ']'
+ local max_attempts=20
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so'
+ local attempt=1
+ '[' -f /opt/redis-stack/lib/redisgears.so ']'
+ redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping
+ exec_cmd='exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so '
+ echo 'Starting redis server cmd: exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so '
+ eval 'exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so  --loadmodule /opt/redis-stack/lib/redistimeseries.so  --loadmodule /opt/redis-stack/lib/rejson.so  --loadmodule /opt/redis-stack/lib/redisbloom.so  --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so '
++ exec redis-server /etc/redis/redis.conf --loadmodule /opt/redis-stack/lib/redisearch.so --loadmodule /opt/redis-stack/lib/redistimeseries.so --loadmodule /opt/redis-stack/lib/rejson.so --loadmodule /opt/redis-stack/lib/redisbloom.so --loadmodule /opt/redis-stack/lib/rediscompat.so --loadmodule /opt/redis-stack/lib/redisgears.so v8-plugin-path /opt/redis-stack/lib/libredisgears_v8_plugin.so
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
Could not connect to Redis at 127.0.0.1:6379: Connection refused
+ '[' 1 -eq 20 ']'
+ echo 'Command '\''redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping'\'' failed. Attempt 1 of 20. Retrying in 5 seconds...'
+ attempt=2
+ sleep 3
Command 'redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping' failed. Attempt 1 of 20. Retrying in 5 seconds...
+ redis-cli -h 127.0.0.1 -p 6379 -a O3605v7HsS ping
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
PONG
+ '[' 2 -eq 20 ']'
+ current_pod_name=rcluster-cluster-shard-jfs-1
+ current_pod_fqdn=rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc
+ '[' -n rcluster-cluster-shard-jfs-1 ']'
+ target_node_fqdn=rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc
+ get_current_comp_nodes_for_scale_out_replica rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc 6379
+ local cluster_node=rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc
+ local cluster_node_port=6379
+ '[' -z O3605v7HsS ']'
++ redis-cli -h rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc -p 6379 -a O3605v7HsS cluster nodes
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
+ cluster_nodes_info='542fd78cd8d275d3a9e8e6f60b95279c91f5875c 10.116.25.66:6379@16379,rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc myself,master - 0 1718946199239 0 connected'
+ current_comp_primary_node=()
+ current_comp_other_nodes=()
++ echo '542fd78cd8d275d3a9e8e6f60b95279c91f5875c 10.116.25.66:6379@16379,rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc myself,master - 0 1718946199239 0 connected'
++ wc -l
+ '[' 1 -eq 1 ']'
+ echo 'Cluster nodes info contains only one line, returning...'
+ return
+ '[' 0 -eq 0 ']'
+ echo 'current_comp_primary_node is empty, skip scale out replica'
+ exit 0
Cluster nodes info contains only one line, returning...
current_comp_primary_node is empty, skip scale out replica

logs pod-0 lorry

kubectl logs rcluster-cluster-shard-jfs-0  lorry
2024-06-21T04:54:36Z	INFO	Initialize DB manager
2024-06-21T04:54:36Z	INFO	KB_WORKLOAD_TYPE ENV not set
2024-06-21T04:54:36Z	INFO	Volume-Protection	succeed to init volume protection	{"pod": "rcluster-cluster-shard-jfs-0", "spec": {"highWatermark":"0","volumes":[]}}
2024-06-21T04:54:36Z	INFO	HTTPServer	Starting HTTP Server
2024-06-21T04:54:36Z	INFO	HTTPServer	API route path	{"method": "POST", "path": ["/v1.0/checkrunning", "/v1.0/rebuild", "/v1.0/grantuserrole", "/v1.0/unlockinstance", "/v1.0/switchover", "/v1.0/revokeuserrole", "/v1.0/exec", "/v1.0/deleteuser", "/v1.0/volumeprotection", "/v1.0/getlag", "/v1.0/leavemember", "/v1.0/joinmember", "/v1.0/createuser", "/v1.0/lockinstance", "/v1.0/postprovision", "/v1.0/preterminate", "/v1.0/datadump", "/v1.0/dataload"]}
2024-06-21T04:54:36Z	INFO	HTTPServer	API route path	{"method": "GET", "path": ["/v1.0/query", "/v1.0/describeuser", "/v1.0/listsystemaccounts", "/v1.0/checkrole", "/v1.0/getrole", "/v1.0/healthycheck", "/v1.0/listusers"]}
2024-06-21T04:54:36Z	INFO	cronjobs	env is not set	{"env": "KB_CRON_JOBS"}
2024-06-21T04:54:46Z	INFO	Redis	DB startup ready
2024-06-21T04:54:46Z	INFO	DCS-K8S	pod selector: app.kubernetes.io/instance=rcluster-cluster,app.kubernetes.io/managed-by=kubeblocks,apps.kubeblocks.io/component-name=shard-jfs
2024-06-21T04:54:46Z	INFO	DCS-K8S	podlist: 2
2024-06-21T04:54:46Z	INFO	DCS-K8S	Leader configmap is not found	{"configmap": "rcluster-cluster-shard-jfs-leader"}
2024-06-21T04:54:46Z	INFO	DCS-K8S	pod selector: app.kubernetes.io/instance=rcluster-cluster,app.kubernetes.io/managed-by=kubeblocks,apps.kubeblocks.io/component-name=shard-jfs
2024-06-21T04:54:46Z	INFO	DCS-K8S	podlist: 2
2024-06-21T04:54:46Z	DEBUG	checkrole	check member	{"member": "rcluster-cluster-shard-jfs-0", "role": ""}
2024-06-21T04:54:46Z	DEBUG	checkrole	check member	{"member": "rcluster-cluster-shard-jfs-1", "role": "primary"}
2024-06-21T04:54:46Z	INFO	checkrole	there is a another leader	{"member": "rcluster-cluster-shard-jfs-1"}
2024-06-21T04:54:46Z	INFO	checkrole	another leader's lorry is online, just ignore	{"member": "rcluster-cluster-shard-jfs-1"}
2024-06-21T04:54:46Z	INFO	event	send event: map[event:Success operation:checkRole originalRole:waitForStart role:{"term":"1718945686592801","PodRoleNamePairs":[{"podName":"rcluster-cluster-shard-jfs-0","roleName":"primary","podUid":"200c9c9a-f17f-41a8-9223-9ee87e73b490"}]}]
2024-06-21T04:54:46Z	INFO	event	send event success	{"message": "{\"event\":\"Success\",\"operation\":\"checkRole\",\"originalRole\":\"waitForStart\",\"role\":\"{\\\"term\\\":\\\"1718945686592801\\\",\\\"PodRoleNamePairs\\\":[{\\\"podName\\\":\\\"rcluster-cluster-shard-jfs-0\\\",\\\"roleName\\\":\\\"primary\\\",\\\"podUid\\\":\\\"200c9c9a-f17f-41a8-9223-9ee87e73b490\\\"}]}\"}"}

logs pod-1 lorry

kubectl logs rcluster-cluster-shard-jfs-1  lorry
2024-06-21T05:01:04Z	INFO	Initialize DB manager
2024-06-21T05:01:04Z	INFO	KB_WORKLOAD_TYPE ENV not set
2024-06-21T05:01:04Z	INFO	Volume-Protection	succeed to init volume protection	{"pod": "rcluster-cluster-shard-jfs-1", "spec": {"highWatermark":"0","volumes":[]}}
2024-06-21T05:01:04Z	INFO	HTTPServer	Starting HTTP Server
2024-06-21T05:01:04Z	INFO	HTTPServer	API route path	{"method": "POST", "path": ["/v1.0/leavemember", "/v1.0/exec", "/v1.0/volumeprotection", "/v1.0/switchover", "/v1.0/lockinstance", "/v1.0/postprovision", "/v1.0/dataload", "/v1.0/preterminate", "/v1.0/unlockinstance", "/v1.0/checkrunning", "/v1.0/joinmember", "/v1.0/rebuild", "/v1.0/revokeuserrole", "/v1.0/datadump", "/v1.0/getlag", "/v1.0/deleteuser", "/v1.0/grantuserrole", "/v1.0/createuser"]}
2024-06-21T05:01:04Z	INFO	HTTPServer	API route path	{"method": "GET", "path": ["/v1.0/listsystemaccounts", "/v1.0/checkrole", "/v1.0/describeuser", "/v1.0/getrole", "/v1.0/healthycheck", "/v1.0/query", "/v1.0/listusers"]}
2024-06-21T05:01:04Z	INFO	cronjobs	env is not set	{"env": "KB_CRON_JOBS"}
2024-06-21T05:01:12Z	INFO	Redis	DB startup ready
2024-06-21T05:01:12Z	INFO	DCS-K8S	pod selector: app.kubernetes.io/instance=rcluster-cluster,app.kubernetes.io/managed-by=kubeblocks,apps.kubeblocks.io/component-name=shard-jfs
2024-06-21T05:01:12Z	INFO	DCS-K8S	podlist: 2
2024-06-21T05:01:12Z	INFO	DCS-K8S	Leader configmap is not found	{"configmap": "rcluster-cluster-shard-jfs-leader"}
2024-06-21T05:01:12Z	INFO	DCS-K8S	pod selector: app.kubernetes.io/instance=rcluster-cluster,app.kubernetes.io/managed-by=kubeblocks,apps.kubeblocks.io/component-name=shard-jfs
2024-06-21T05:01:12Z	INFO	DCS-K8S	podlist: 2
2024-06-21T05:01:12Z	DEBUG	checkrole	check member	{"member": "rcluster-cluster-shard-jfs-0", "role": "primary"}
2024-06-21T05:01:12Z	INFO	checkrole	there is a another leader	{"member": "rcluster-cluster-shard-jfs-0"}
2024-06-21T05:01:12Z	INFO	checkrole	another leader's lorry is online, just ignore	{"member": "rcluster-cluster-shard-jfs-0"}
2024-06-21T05:01:12Z	DEBUG	checkrole	check member	{"member": "rcluster-cluster-shard-jfs-1", "role": ""}
2024-06-21T05:01:12Z	INFO	event	send event: map[event:Success operation:checkRole originalRole:waitForStart role:{"term":"1718946072918100","PodRoleNamePairs":[{"podName":"rcluster-cluster-shard-jfs-1","roleName":"primary","podUid":"545d1043-b09a-4df5-9702-d57a9a0a6ac7"}]}]
2024-06-21T05:01:12Z	INFO	event	send event success	{"message": "{\"event\":\"Success\",\"operation\":\"checkRole\",\"originalRole\":\"waitForStart\",\"role\":\"{\\\"term\\\":\\\"1718946072918100\\\",\\\"PodRoleNamePairs\\\":[{\\\"podName\\\":\\\"rcluster-cluster-shard-jfs-1\\\",\\\"roleName\\\":\\\"primary\\\",\\\"podUid\\\":\\\"545d1043-b09a-4df5-9702-d57a9a0a6ac7\\\"}]}\"}"}
2024-06-21T05:03:19Z	ERROR	Redis	Role query error	{"error": "dial tcp 127.0.0.1:6379: connect: connection refused"}
github.com/apecloud/kubeblocks/pkg/lorry/engines/redis.(*Manager).GetReplicaRole
	/src/pkg/lorry/engines/redis/get_replica_role.go:44
github.com/apecloud/kubeblocks/pkg/lorry/operations/replica.(*CheckRole).Do
	/src/pkg/lorry/operations/replica/checkrole.go:144
github.com/apecloud/kubeblocks/pkg/lorry/httpserver.(*api).RegisterOperations.OperationWrapper.func1
	/src/pkg/lorry/httpserver/apis.go:119
github.com/fasthttp/router.(*Router).Handler
	/go/pkg/mod/github.com/fasthttp/[email protected]/router.go:420
github.com/apecloud/kubeblocks/pkg/lorry/httpserver.(*server).StartNonBlocking.(*server).apiLogger.func2
	/src/pkg/lorry/httpserver/server.go:120
github.com/valyala/fasthttp.(*Server).serveConn
	/go/pkg/mod/github.com/valyala/[email protected]/server.go:2359
github.com/valyala/fasthttp.(*workerPool).workerFunc
	/go/pkg/mod/github.com/valyala/[email protected]/workerpool.go:224
github.com/valyala/fasthttp.(*workerPool).getCh.func1
	/go/pkg/mod/github.com/valyala/[email protected]/workerpool.go:196
2024-06-21T05:03:19Z	INFO	checkrole	executing checkRole error	{"error": "dial tcp 127.0.0.1:6379: connect: connection refused"}
2024-06-21T05:03:19Z	INFO	checkrole	role checks failed continuously	{"times": 0}
2024-06-21T05:03:19Z	INFO	event	send event: map[operation:checkRole originalRole:primary]
2024-06-21T05:03:19Z	INFO	event	send event success	{"message": "{\"operation\":\"checkRole\",\"originalRole\":\"primary\"}"}

kubectl exec -it rcluster-cluster-shard-jfs-0 bash

root@rcluster-cluster-shard-jfs-0:/# redis-cli -a O3605v7HsS

cluster nodes
ec893952e27517db9ee0094815fb0f0ef4633820 10.116.45.30:6379@16379,rcluster-cluster-shard-qqs-0.rcluster-cluster-shard-qqs-headless.default.svc master,fail? - 1718945677694 1718945676686 1 connected 0-5460
6dedd0fbec884b96bc6c650f339b3fa59bd51c0b 10.116.44.7:6379@16379,rcluster-cluster-shard-42p-0.rcluster-cluster-shard-42p-headless.default.svc master,fail? - 1718945678698 1718945676686 2 connected 5461-10922
b81837847cdf48c1f6c33befcf95c3979d2293ad 10.116.45.38:6379@16379,rcluster-cluster-shard-jfs-0.rcluster-cluster-shard-jfs-headless.default.svc myself,master - 0 1718945676686 3 connected 10923-16383

kubectl exec -it rcluster-cluster-shard-jfs-1 bash
redis-cli -a O3605v7HsS
cluster nodes
542fd78cd8d275d3a9e8e6f60b95279c91f5875c 10.116.25.66:6379@16379,rcluster-cluster-shard-jfs-1.rcluster-cluster-shard-jfs-headless.default.svc myself,master - 0 1718946199239 0 connected



Expected behavior A clear and concise description of what you expected to happen.

Screenshots If applicable, add screenshots to help explain your problem.

Desktop (please complete the following information):

  • OS: [e.g. iOS]
  • Browser [e.g. chrome, safari]
  • Version [e.g. 22]
kbcli version
Kubernetes: v1.27.13-gke.1070000
KubeBlocks: 0.9.0-beta.34
kbcli: 0.9.0-beta.27

Additional context Add any other context about the problem here.

JashBook avatar Jun 20 '24 14:06 JashBook