2025-10-30 18:26:21 +08:00
#!/bin/bash
set -euo pipefail
# ========================== 全局配置与工具函数 ==========================
# 颜色定义
RED = '\033[0;31m'
GREEN = '\033[0;32m'
YELLOW = '\033[1;33m'
BLUE = '\033[0;34m'
NC = '\033[0m'
2025-10-31 09:41:35 +08:00
# 命令记录存储(用于最后汇总展示)
declare -a EXECUTED_COMMANDS = ( )
LOG_FILE = " /var/log/k8s-deploy- $( date +%Y%m%d-%H%M%S) .log "
2025-10-30 18:26:21 +08:00
# 打印函数
2025-10-31 09:41:35 +08:00
info( ) {
echo -e " ${ YELLOW } [INFO] ${ NC } $1 " | tee -a " $LOG_FILE "
}
success( ) {
echo -e " ${ GREEN } [SUCCESS] ${ NC } $1 " | tee -a " $LOG_FILE "
}
error( ) {
echo -e " ${ RED } [ERROR] ${ NC } $1 " | tee -a " $LOG_FILE "
exit 1
}
step( ) {
echo -e " \n ${ BLUE } ===== $1 ===== ${ NC } " | tee -a " $LOG_FILE "
}
# 远程执行命令并记录
remote_exec( ) {
local ip = $1
local cmd = $2
local desc = ${ 3 :- "执行命令" }
info " 在 $ip 上 $desc : ${ YELLOW } $cmd ${ NC } "
EXECUTED_COMMANDS += ( " 在 $ip 上: $cmd " )
# 执行远程命令并记录输出
if ! ssh -o StrictHostKeyChecking = no -o ConnectTimeout = 10 " $REMOTE_USER @ $ip " " $cmd " >> " $LOG_FILE " 2>& 1; then
error " 在 $ip 上执行命令失败: $cmd ,请查看日志: $LOG_FILE "
fi
}
2025-10-30 18:26:21 +08:00
# 固定路径(可根据实际调整)
2025-10-31 09:41:35 +08:00
KUBEADM_CONF = "/opt/k8s-install-conf/kubeadm-conf.yaml" # Master节点的kubeadm配置文件
2025-10-30 18:26:21 +08:00
INIT_RESULT_FILE = "/opt/k8s-install-conf/kubeadm-init-result.txt" # 初始化结果保存文件
CALICO_YAML_OFFICIAL = "https://docs.projectcalico.org/v3.25/manifests/calico.yaml"
LOCAL_CALICO_YAML = "/tmp/calico-v3.25-modified.yaml"
SWR_CALICO_PREFIX = "swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/calico"
REMOTE_USER = "root" # 远程节点登录用户
2025-10-31 09:41:35 +08:00
# ========================== 参数解析 ==========================
2025-10-30 18:26:21 +08:00
MASTER_IPS = ( ) # 3个Master节点IP( 顺序: Master1, Master2, Master3)
2025-10-31 09:41:35 +08:00
NODE_IPS = ( ) # Node节点IP列表
2025-10-30 18:26:21 +08:00
parse_args( ) {
while [ [ $# -gt 0 ] ] ; do
case " $1 " in
--master-ips)
IFS = ',' read -ra IPS <<< " $2 "
if [ [ ${# IPS [@] } -ne 3 ] ] ; then
error " --master-ips必须包含3个IP( 逗号分隔) , 当前提供 ${# IPS [@] } 个 "
fi
MASTER_IPS = ( " ${ IPS [@] } " )
shift 2
; ;
--node-ips)
IFS = ',' read -ra IPS <<< " $2 "
NODE_IPS = ( " ${ IPS [@] } " )
shift 2
; ;
--help)
echo " 用法: $0 --master-ips <IP1,IP2,IP3> [--node-ips <IP1,IP2,...>] "
2025-10-31 09:41:35 +08:00
echo "功能: 部署K8s集群, 包含3个Master节点和可选的Node节点"
2025-10-30 18:26:21 +08:00
echo "参数说明:"
echo " --master-ips 必选, 3个Master节点IP( 顺序: Master1(初始化节点), Master2, Master3) "
echo " --node-ips 可选, Node节点IP列表( 逗号分隔, 如不指定则仅部署Master集群) "
echo "示例:"
echo " $0 --master-ips 192.168.61.10,192.168.61.11,192.168.61.12 --node-ips 192.168.61.20,192.168.61.21 "
exit 0
; ;
*)
error " 未知参数: $1 (执行 $0 --help查看用法) "
; ;
esac
done
# 校验必选参数
if [ [ ${# MASTER_IPS [@] } -eq 0 ] ] ; then
error "缺少必选参数--master-ips( 需指定3个Master节点IP) "
fi
2025-10-31 09:41:35 +08:00
# 提取Master IP
2025-10-30 18:26:21 +08:00
export MASTER1_IP = " ${ MASTER_IPS [0] } "
export MASTER2_IP = " ${ MASTER_IPS [1] } "
export MASTER3_IP = " ${ MASTER_IPS [2] } "
info "已识别节点:"
info " Master1( 初始化) : $MASTER1_IP "
info " Master2: $MASTER2_IP "
info " Master3: $MASTER3_IP "
[ [ ${# NODE_IPS [@] } -gt 0 ] ] && info " Node节点: ${ NODE_IPS [*] } " || info "未指定Node节点, 仅部署Master集群"
}
2025-10-31 09:41:35 +08:00
# ========================== 免密登录检测 ==========================
check_ssh_access( ) {
step "前置检查: SSH免密登录验证"
local all_ips = ( " ${ MASTER_IPS [@] } " " ${ NODE_IPS [@] } " )
local failed_ips = ( )
for ip in " ${ all_ips [@] } " ; do
info " 验证SSH免密登录: $REMOTE_USER @ $ip "
if ssh -o StrictHostKeyChecking = no -o ConnectTimeout = 10 -o BatchMode = yes " $REMOTE_USER @ $ip " "echo 'SSH连通正常'" >/dev/null 2>& 1; then
info " $ip SSH免密登录验证通过 "
else
error " $ip SSH免密登录验证失败, 请先执行: ssh-copy-id $REMOTE_USER @ $ip 配置免密 "
failed_ips += ( " $ip " )
fi
done
if [ [ ${# failed_ips [@] } -gt 0 ] ] ; then
error " 以下节点SSH免密登录配置失败: ${ failed_ips [*] } "
fi
success "所有节点SSH免密登录验证通过"
}
# ========================== 步骤1: Master1初始化 ==========================
2025-10-30 18:26:21 +08:00
init_master1( ) {
step " Step 1: Master1节点初始化( $MASTER1_IP ) "
# 检查Master1的kubeadm配置文件是否存在
if ! ssh " $REMOTE_USER @ $MASTER1_IP " " test -f $KUBEADM_CONF " ; then
error " Master1的kubeadm配置文件不存在: $MASTER1_IP : $KUBEADM_CONF (请先执行配置分发脚本) "
fi
2025-10-31 09:41:35 +08:00
# 执行kubeadm init
local init_cmd = " kubeadm init --upload-certs --config $KUBEADM_CONF 2>&1 "
info " 执行初始化命令: $init_cmd "
2025-10-30 18:26:21 +08:00
info " 初始化结果将保存到: $INIT_RESULT_FILE "
# 远程执行初始化,同时保存输出到本地文件
2025-10-31 09:41:35 +08:00
ssh " $REMOTE_USER @ $MASTER1_IP " " $init_cmd " | tee " $INIT_RESULT_FILE "
local init_exit_code = ${ PIPESTATUS [0] }
2025-10-30 18:26:21 +08:00
if [ [ $init_exit_code -ne 0 ] ] ; then
error " Master1初始化失败! 查看详情: cat $INIT_RESULT_FILE "
fi
2025-10-31 09:41:35 +08:00
EXECUTED_COMMANDS += ( " 在 $MASTER1_IP 上初始化: $init_cmd " )
2025-10-30 18:26:21 +08:00
success " Master1初始化完成, 结果已保存到: $INIT_RESULT_FILE "
info "初始化输出预览( 前20行) : "
head -n 20 " $INIT_RESULT_FILE "
}
# ========================== 步骤2: Master1配置kubeconfig ==========================
config_master1_kubeconfig( ) {
step "Step 2: 配置Master1的kubeconfig"
# 远程执行kubeconfig配置命令
local kube_cmds = (
"mkdir -p \$HOME/.kube"
"cp -i /etc/kubernetes/admin.conf \$HOME/.kube/config"
"chown \$(id -u):\$(id -g) \$HOME/.kube/config"
"echo 'export KUBECONFIG=/etc/kubernetes/admin.conf' >> ~/.bash_profile"
"source ~/.bash_profile"
)
for cmd in " ${ kube_cmds [@] } " ; do
2025-10-31 09:41:35 +08:00
remote_exec " $MASTER1_IP " " $cmd " "配置kubeconfig"
2025-10-30 18:26:21 +08:00
done
# 验证kubectl是否可用
2025-10-31 09:41:35 +08:00
remote_exec " $MASTER1_IP " "kubectl get nodes" "验证kubectl"
2025-10-30 18:26:21 +08:00
success "Master1 kubeconfig配置完成"
}
2025-10-31 09:41:35 +08:00
# ========================== 步骤3: 提取join命令 ==========================
2025-10-30 18:26:21 +08:00
extract_join_commands( ) {
step "Step 3: 从初始化结果中提取join命令"
# 检查结果文件是否存在
[ [ -f " $INIT_RESULT_FILE " ] ] || error " 初始化结果文件不存在: $INIT_RESULT_FILE "
2025-10-31 09:41:35 +08:00
# 提取Master节点join命令
2025-10-30 18:26:21 +08:00
info "提取Master节点join命令"
local master_join_cmd
master_join_cmd = $( grep -A 3 "You can now join any number of control-plane nodes by running the following command on each as root" " $INIT_RESULT_FILE " | grep -v "You can now" | tr -d '\n' | sed 's/ */ /g' )
[ [ -z " $master_join_cmd " ] ] && error "未从初始化结果中提取到Master join命令"
2025-10-31 09:41:35 +08:00
# 补充--config参数
2025-10-30 18:26:21 +08:00
master_join_cmd = " $master_join_cmd --config $KUBEADM_CONF "
export MASTER_JOIN_CMD = " $master_join_cmd "
2025-10-31 09:41:35 +08:00
EXECUTED_COMMANDS += ( " Master节点加入命令: $MASTER_JOIN_CMD " )
2025-10-30 18:26:21 +08:00
success "Master join命令提取完成: "
echo -e " ${ YELLOW } ${ MASTER_JOIN_CMD } ${ NC } "
2025-10-31 09:41:35 +08:00
# 提取Node节点join命令
2025-10-30 18:26:21 +08:00
info "提取Node节点join命令"
local node_join_cmd
node_join_cmd = $( grep -A 2 "Then you can join any number of worker nodes by running the following on each as root" " $INIT_RESULT_FILE " | grep -v "Then you can" | tr -d '\n' | sed 's/ */ /g' )
[ [ -z " $node_join_cmd " ] ] && error "未从初始化结果中提取到Node join命令"
export NODE_JOIN_CMD = " $node_join_cmd "
2025-10-31 09:41:35 +08:00
EXECUTED_COMMANDS += ( " Node节点加入命令: $NODE_JOIN_CMD " )
2025-10-30 18:26:21 +08:00
success "Node join命令提取完成: "
echo -e " ${ YELLOW } ${ NODE_JOIN_CMD } ${ NC } "
}
2025-10-31 09:41:35 +08:00
# ========================== 步骤4: 其他Master节点加入 ==========================
2025-10-30 18:26:21 +08:00
join_other_masters( ) {
step " Step 4: 远程Master2( $MASTER2_IP ) 和Master3( $MASTER3_IP )加入集群 "
2025-10-31 09:41:35 +08:00
# 定义要加入的Master节点列表
2025-10-30 18:26:21 +08:00
local other_masters = ( " $MASTER2_IP " " $MASTER3_IP " )
for master_ip in " ${ other_masters [@] } " ; do
info " 处理Master节点: $master_ip "
# 检查该Master的kubeadm配置文件是否存在
if ! ssh " $REMOTE_USER @ $master_ip " " test -f $KUBEADM_CONF " ; then
2025-10-31 09:41:35 +08:00
error " Master $master_ip的配置文件不存在 : $KUBEADM_CONF "
2025-10-30 18:26:21 +08:00
fi
# 远程执行join命令
2025-10-31 09:41:35 +08:00
remote_exec " $master_ip " " $MASTER_JOIN_CMD " "执行加入集群命令"
# 配置该Master的kubeconfig
local kube_config_cmds = (
"mkdir -p \$HOME/.kube"
"cp -i /etc/kubernetes/admin.conf \$HOME/.kube/config"
"chown \$(id -u):\$(id -g) \$HOME/.kube/config"
)
for cmd in " ${ kube_config_cmds [@] } " ; do
remote_exec " $master_ip " " $cmd " "配置kubeconfig"
done
2025-10-30 18:26:21 +08:00
success " Master $master_ip加入集群并配置完成 "
done
# 验证所有Master节点状态
2025-10-31 09:41:35 +08:00
remote_exec " $MASTER1_IP " "kubectl get nodes | grep master" "验证Master节点状态"
2025-10-30 18:26:21 +08:00
success "所有Master节点加入完成"
}
2025-10-31 09:41:35 +08:00
# ========================== 步骤5: Node节点加入 ==========================
2025-10-30 18:26:21 +08:00
join_nodes( ) {
if [ [ ${# NODE_IPS [@] } -eq 0 ] ] ; then
info "未指定Node节点, 跳过Node加入步骤"
return
fi
step " Step 5: 远程Node节点加入集群( 共 ${# NODE_IPS [@] } 个) "
for node_ip in " ${ NODE_IPS [@] } " ; do
info " 处理Node节点: $node_ip "
2025-10-31 09:41:35 +08:00
# 检查Node节点是否已安装基础组件
local check_cmd = "command -v kubeadm &>/dev/null && command -v kubelet &>/dev/null && systemctl is-active --quiet containerd"
if ! ssh " $REMOTE_USER @ $node_ip " " $check_cmd " ; then
2025-10-30 18:26:21 +08:00
error " Node $node_ip未安装基础组件 ( 需先安装kubeadm、kubelet、containerd) "
fi
# 远程执行Node join命令
2025-10-31 09:41:35 +08:00
remote_exec " $node_ip " " $NODE_JOIN_CMD " "执行加入集群命令"
2025-10-30 18:26:21 +08:00
success " Node $node_ip加入集群完成 "
done
# 验证所有节点状态
2025-10-31 09:41:35 +08:00
remote_exec " $MASTER1_IP " "kubectl get nodes" "验证所有节点状态"
2025-10-30 18:26:21 +08:00
success "所有Node节点加入完成"
}
2025-10-31 09:41:35 +08:00
# ========================== 步骤6: 部署Calico网络插件 ==========================
2025-10-30 18:26:21 +08:00
deploy_calico( ) {
step "Step 6: 部署Calico v3.25( 华为云SWR镜像) "
2025-10-31 09:41:35 +08:00
# 下载Calico YAML到本地
2025-10-30 18:26:21 +08:00
info " 下载Calico v3.25官方YAML: $CALICO_YAML_OFFICIAL "
if command -v wget & >/dev/null; then
wget -q -O " $LOCAL_CALICO_YAML " " $CALICO_YAML_OFFICIAL " || error "wget下载Calico YAML失败"
elif command -v curl & >/dev/null; then
curl -s -o " $LOCAL_CALICO_YAML " " $CALICO_YAML_OFFICIAL " || error "curl下载Calico YAML失败"
else
error "请先安装wget或curl"
fi
[ [ -f " $LOCAL_CALICO_YAML " ] ] || error "Calico YAML下载失败, 文件不存在"
success " Calico YAML下载完成( 本地路径: $LOCAL_CALICO_YAML ) "
2025-10-31 09:41:35 +08:00
# 替换YAML中的所有镜像地址为华为云SWR
2025-10-30 18:26:21 +08:00
info " 替换镜像地址为华为云SWR: $SWR_CALICO_PREFIX "
sed -i " s#docker.io/calico/cni:v3.25.0# ${ SWR_CALICO_PREFIX } /cni:v3.25.0#g " " $LOCAL_CALICO_YAML "
sed -i " s#docker.io/calico/node:v3.25.0# ${ SWR_CALICO_PREFIX } /node:v3.25.0#g " " $LOCAL_CALICO_YAML "
sed -i " s#docker.io/calico/kube-controllers:v3.25.0# ${ SWR_CALICO_PREFIX } /kube-controllers:v3.25.0#g " " $LOCAL_CALICO_YAML "
# 验证替换结果
info "验证镜像替换结果( 查看前10行含镜像的内容) "
grep -n "image:" " $LOCAL_CALICO_YAML " | head -n 10 || error "Calico YAML中未找到镜像配置, 替换失败"
success "Calico镜像地址替换完成"
2025-10-31 09:41:35 +08:00
# 将修改后的YAML上传到Master1节点
2025-10-30 18:26:21 +08:00
local remote_calico_yaml = "/opt/k8s-install-conf/calico-v3.25-swr.yaml"
info " 上传修改后的YAML到Master1: $remote_calico_yaml "
scp -o StrictHostKeyChecking = no " $LOCAL_CALICO_YAML " " $REMOTE_USER @ $MASTER1_IP : $remote_calico_yaml " || error "YAML上传失败"
2025-10-31 09:41:35 +08:00
EXECUTED_COMMANDS += ( " 上传Calico配置到 $MASTER1_IP : scp $LOCAL_CALICO_YAML $REMOTE_USER @ $MASTER1_IP : $remote_calico_yaml " )
2025-10-30 18:26:21 +08:00
success "YAML上传完成"
2025-10-31 09:41:35 +08:00
# 在Master1执行Calico部署
local deploy_cmd = " kubectl apply -f $remote_calico_yaml "
remote_exec " $MASTER1_IP " " $deploy_cmd " "执行Calico部署"
2025-10-30 18:26:21 +08:00
2025-10-31 09:41:35 +08:00
# 等待Calico Pod启动
2025-10-30 18:26:21 +08:00
info "等待Calico Pod启动( 最多5分钟) ..."
local wait_time = 0
while true; do
local pod_status
pod_status = $( ssh " $REMOTE_USER @ $MASTER1_IP " "kubectl get pods -n calico-system -o jsonpath='{.items[*].status.phase}' 2>/dev/null" )
if [ [ " $pod_status " = ~ ^( Running\ ) *Running$ && -n " $pod_status " ] ] ; then
break
fi
2025-10-31 09:41:35 +08:00
if [ [ $wait_time -ge 300 ] ] ; then
2025-10-30 18:26:21 +08:00
error "Calico Pod启动超时( 5分钟) , 请手动检查: kubectl get pods -n calico-system"
fi
sleep 10
wait_time = $(( wait_time + 10 ))
info " 已等待 ${ wait_time } 秒, Calico Pod状态: $pod_status "
done
2025-10-31 09:41:35 +08:00
# 验证Calico状态
remote_exec " $MASTER1_IP " "kubectl get pods -n calico-system" "验证Calico状态"
remote_exec " $MASTER1_IP " "kubectl get nodes -o wide | grep -E 'STATUS|Ready'" "验证节点网络状态"
2025-10-30 18:26:21 +08:00
success "Calico v3.25( 华为云SWR) 部署完成, 集群网络已就绪"
2025-10-31 09:41:35 +08:00
# 清理本地临时YAML文件
2025-10-30 18:26:21 +08:00
info " 清理本地临时文件: $LOCAL_CALICO_YAML "
rm -f " $LOCAL_CALICO_YAML " || info "本地文件清理失败,可手动删除"
}
2025-10-31 09:41:35 +08:00
# ========================== 步骤7: 展示执行的所有命令 ==========================
show_executed_commands( ) {
step "Step 7: 执行命令汇总"
echo -e " ${ YELLOW } 以下是部署过程中执行的关键命令: ${ NC } "
echo -e "----------------------------------------------------------------------"
local count = 1
for cmd in " ${ EXECUTED_COMMANDS [@] } " ; do
echo -e " ${ count } . ${ cmd } "
( ( count++) )
done
echo -e "----------------------------------------------------------------------"
}
# ========================== 主流程 ==========================
2025-10-30 18:26:21 +08:00
main( ) {
2025-10-31 09:41:35 +08:00
# 记录开始时间
local start_time = $( date +%s)
2025-10-30 18:26:21 +08:00
# 1. 解析参数
parse_args " $@ "
2025-10-31 09:41:35 +08:00
# 2. 检查SSH免密登录
check_ssh_access
2025-10-30 18:26:21 +08:00
# 3. 执行核心步骤
2025-10-31 09:41:35 +08:00
init_master1
config_master1_kubeconfig
extract_join_commands
join_other_masters
join_nodes
deploy_calico
# 4. 展示执行的命令
show_executed_commands
# 5. 集群部署完成总结
2025-10-30 18:26:21 +08:00
step "集群部署完成总结"
2025-10-31 09:41:35 +08:00
local end_time = $( date +%s)
local duration = $(( end_time - start_time))
2025-10-30 18:26:21 +08:00
echo -e " ${ GREEN } ======================================== "
echo -e "K8s集群部署全流程完成! "
2025-10-31 09:41:35 +08:00
echo -e " 部署耗时: $(( duration/60)) 分 $(( duration%60)) 秒 "
2025-10-30 18:26:21 +08:00
echo -e "关键信息:"
echo -e " 1. Calico版本: v3.25( 镜像源: 华为云SWR) "
echo -e " 2. 集群节点状态: ssh $REMOTE_USER @ $MASTER1_IP 'kubectl get nodes' "
echo -e " 3. Calico状态: ssh $REMOTE_USER @ $MASTER1_IP 'kubectl get pods -n calico-system' "
echo -e " 4. 初始化结果: $INIT_RESULT_FILE "
2025-10-31 09:41:35 +08:00
echo -e " 5. 部署日志: $LOG_FILE "
2025-10-30 18:26:21 +08:00
echo -e "========================================"
}
# 启动主流程
main " $@ "