kubeblocks
kubeblocks copied to clipboard
[BUG]promote didn't tolerate kb-data or kb-controller node taint
➜ ~ kbcli version Kubernetes: v1.24.6-aliyun.1 KubeBlocks: 0.8.1 kbcli: 0.8.1
➜ ~ k describe node | grep Taint
Taints: kb-controller=true:NoSchedule
Taints: kb-data=true:NoSchedule
Taints: kb-data=true:NoSchedule
➜ ~ kbcli cluster describe mysqltest
Name: mysqltest Created Time: Feb 05,2024 14:41 UTC+0800
NAMESPACE CLUSTER-DEFINITION VERSION STATUS TERMINATION-POLICY
default mysql mysql-5.7.42 Running WipeOut
Endpoints:
COMPONENT MODE INTERNAL EXTERNAL
mysql ReadWrite mysqltest-mysql.default.svc.cluster.local:3306 <none>
Topology:
COMPONENT INSTANCE ROLE STATUS AZ NODE CREATED-TIME
mysql mysqltest-mysql-0 primary Running cn-zhangjiakou-c cn-zhangjiakou.192.168.36.134/192.168.36.134 Feb 05,2024 14:47 UTC+0800
mysql mysqltest-mysql-1 secondary Running cn-zhangjiakou-c cn-zhangjiakou.192.168.36.133/192.168.36.133 Feb 05,2024 14:51 UTC+0800
Resources Allocation:
COMPONENT DEDICATED CPU(REQUEST/LIMIT) MEMORY(REQUEST/LIMIT) STORAGE-SIZE STORAGE-CLASS
mysql false 2100m / 2100m 4402341478400m / 4402341478400m data:20Gi kb-default-sc
Images:
COMPONENT TYPE IMAGE
mysql mysql docker.io/mysql:5.7.42
Data Protection:
BACKUP-REPO AUTO-BACKUP BACKUP-SCHEDULE BACKUP-METHOD BACKUP-RETENTION
Show cluster events: kbcli cluster list-events -n default mysqltest
➜ ~ kbcli cluster promote mysqltest --auto-approve --component mysql
OpsRequest mysqltest-custom-d6jkl created successfully, you can view the progress:
kbcli cluster describe-ops mysqltest-custom-d6jkl -n default
➜ ~ k get pod
NAME READY STATUS RESTARTS AGE
mysqltest-custom-d6jkl-mysql-0-vn4d6 0/1 Pending 0 8s
mysqltest-mysql-0 4/4 Running 0 4m9s
mysqltest-mysql-1 4/4 Running 0 50s
➜ ~ k describe pod mysqltest-custom-d6jkl-mysql-0-vn4d6
Name: mysqltest-custom-d6jkl-mysql-0-vn4d6
Namespace: default
Priority: 0
Node: <none>
Labels: controller-uid=649bd49d-2b48-4de3-b7b4-5f11a7088bab
job-name=mysqltest-custom-d6jkl-mysql-0
Annotations: <none>
Status: Pending
IP:
IPs: <none>
Controlled By: Job/mysqltest-custom-d6jkl-mysql-0
Containers:
switchover:
Image: docker.io/apecloud/kubeblocks-tools:latest
Port: <none>
Host Port: <none>
Command:
sh
-c
set -e
# do switchover
url="http://${TARGET_POD_IP}:${LORRY_HTTP_PORT}/v1.0/switchover"
params="{\"parameters\": {\"primary\":\"${primary}\",\"candidate\":\"${candidate}\"}}"
echo "curl ${url}, parameters: ${params}"
res=`curl -s -X POST -H 'Content-Type: application/json' "${url}" -d "${params}"`
echo "curl result: ${res}"
# check if switchover successfully.
echo "INFO: start to check if switchover successfully, timeout is 60s"
executedUnix=$(date +%s)
while true; do
sleep 5
if [ ! -z ${candidate} ]; then
# if candidate specified, only check it
role=$(kubectl get pod ${candidate} -ojson | jq -r '.metadata.labels["kubeblocks.io/role"]')
if [ "$role" == "primary" ] || [ "$role" == "leader" ] || [ "$role" == "master" ]; then
echo "INFO: switchover successfully, ${candidate} is ${role}"
exit 0
fi
else
# check if the candidate instance has been promote to primary
pods=$(kubectl get pod -l apps.kubeblocks.io/component-name=${KB_COMP_NAME},app.kubernetes.io/instance=${KB_CLUSTER_NAME} | awk 'NR > 1 {print $1}')
for podName in ${pods}; do
if [ "${podName}" != "${primary}" ];then
role=$(kubectl get pod ${podName} -ojson | jq -r '.metadata.labels["kubeblocks.io/role"]')
if [ "$role" == "primary" ] || [ "$role" == "leader" ] || [ "$role" == "master" ]; then
echo "INFO: switchover successfully, ${podName} is ${role}"
exit 0
fi
fi
done
fi
currentUnix=$(date +%s)
diff_time=$((${currentUnix}-${executedUnix}))
if [ ${diff_time} -ge 60 ]; then
echo "ERROR: switchover failed."
exit 1
fi
done
Limits:
cpu: 0
memory: 0
Requests:
cpu: 0
memory: 0
Environment:
KB_CLUSTER_NAME: mysqltest
KB_COMP_NAME: mysql
KB_CLUSTER_COMP_NAME: mysqltest-mysql
KB_COMP_REPLICAS: 2
KB_COMP_HEADLESS_SVC_NAME: mysqltest-mysql-headless
TARGET_POD_IP: 10.114.0.143
LORRY_HTTP_PORT: 3501
primary: mysqltest-mysql-0
candidate:
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-blkj8 (ro)
Conditions:
Type Status
PodScheduled False
Volumes:
kube-api-access-blkj8:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: BestEffort
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedScheduling 16s default-scheduler 0/3 nodes are available: 1 node(s) had untolerated taint {kb-controller: true}, 2 node(s) had untolerated taint {kb-data: true}. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling.,
This issue has been marked as stale because it has been open for 30 days with no activity