ansible-devops/scripts/k8s-cluster-deploy.sh

356 lines
12 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -euo pipefail
# -------------------------- 全局变量定义 --------------------------
master_ips=""
node_ips=""
target_dir="/opt/k8s-install-conf"
CERT_KEY=""
JOIN_CMD_BASE=""
# -------------------------- 基础工具函数 --------------------------
# 打印分隔符
print_separator() {
echo "========================================================================"
}
# 打印子分隔符
print_sub_separator() {
echo "------------------------------------------------------------------------"
}
# 自动等待5秒替代手动确认
auto_wait() {
echo "等待5秒后继续下一步..."
sleep 5
echo
}
# -------------------------- 步骤函数:参数解析与校验 --------------------------
parse_and_validate_args() {
print_separator
echo "【步骤1/7】解析命令行参数与前置校验"
print_sub_separator
# 解析参数(支持 --master-ips=xxx 和 --master-ips xxx 两种格式)
while [[ $# -gt 0 ]]; do
case "$1" in
--master-ips=*)
master_ips="${1#*=}"
echo "已指定Master节点IP列表$master_ips"
shift 1
;;
--master-ips)
master_ips="$2"
echo "已指定Master节点IP列表$master_ips"
shift 2
;;
--node-ips=*)
node_ips="${1#*=}"
echo "已指定Node节点IP列表$node_ips"
shift 1
;;
--node-ips)
node_ips="$2"
echo "已指定Node节点IP列表$node_ips"
shift 2
;;
*)
echo "错误:未知参数 $1"
echo "使用方式:$0 --master-ips=192.168.61.131,192.168.61.132 --node-ips=192.168.61.134"
exit 1
;;
esac
done
# 校验参数完整性
if [ -z "$master_ips" ] && [ -z "$node_ips" ]; then
echo "错误:必须指定 --master-ips 或 --node-ips"
exit 1
fi
# 校验kubeadm-conf.yaml文件
echo "校验kubeadm-conf.yaml配置文件..."
if [ ! -f "${target_dir}/kubeadm-conf.yaml" ]; then
echo "错误:当前目录未找到 kubeadm-conf.yaml 配置文件"
exit 1
fi
echo "✓ 已找到kubeadm-conf.yaml配置文件"
# 校验root权限
echo "校验执行权限..."
if [ "$(id -u)" -ne 0 ]; then
echo "错误脚本需以root权限执行"
exit 1
fi
echo "✓ root权限校验通过"
print_separator
auto_wait
}
# -------------------------- 步骤函数初始化第一台Master --------------------------
init_first_master() {
print_separator
echo "【步骤2/7】初始化第一台Master节点"
print_sub_separator
# 执行kubeadm init
echo "执行集群初始化命令kubeadm init --config ${target_dir}/kubeadm-conf.yaml --upload-certs"
kubeadm init --config ${target_dir}/kubeadm-conf.yaml --upload-certs
echo "✓ kubeadm init 执行完成"
print_sub_separator
# 配置kubeconfig
echo "配置当前用户kubeconfig..."
echo "命令1mkdir -p $HOME/.kube"
mkdir -p "$HOME/.kube"
echo "命令2cp -i /etc/kubernetes/admin.conf $HOME/.kube/config"
cp -i /etc/kubernetes/admin.conf "$HOME/.kube/config"
echo "命令3chown $(id -u):$(id -g) $HOME/.kube/config"
chown "$(id -u):$(id -g)" "$HOME/.kube/config"
echo "命令4export KUBECONFIG=/etc/kubernetes/admin.conf"
export KUBECONFIG=/etc/kubernetes/admin.conf
echo "命令5echo 'export KUBECONFIG=/etc/kubernetes/admin.conf' >> $HOME/.profile"
echo 'export KUBECONFIG=/etc/kubernetes/admin.conf' >> "$HOME/.profile"
echo "✓ kubeconfig配置完成已永久写入环境变量"
print_separator
auto_wait
}
# -------------------------- 步骤函数安装Calico网络插件 --------------------------
install_calico() {
print_separator
echo "【步骤3/7】安装Calico网络插件"
print_sub_separator
# 创建目标目录
echo "创建配置文件目录mkdir -p $target_dir"
mkdir -p "$target_dir"
echo "✓ 目录创建成功"
print_sub_separator
# 下载Calico配置
echo "下载Calico配置文件wget -q -c -O $target_dir/calico.yaml http://116.205.97.109/scripts/calico.yaml --show-progress"
wget -q -c -O "$target_dir/calico.yaml" http://116.205.97.109/scripts/calico.yaml --show-progress
echo "✓ Calico配置文件下载完成"
print_sub_separator
# 应用Calico配置
echo "部署Calico网络插件kubectl apply -f $target_dir/calico.yaml"
kubectl apply -f "$target_dir/calico.yaml"
echo "✓ Calico配置已提交至集群"
print_sub_separator
# 查看节点状态
echo "当前节点状态网络插件部署中状态可能为NotReady"
kubectl get node
echo
echo "提示Calico组件需2-5分钟部署完成请耐心等待"
echo "部署完成后可执行kubectl get node 验证节点状态目标状态为Ready"
print_separator
auto_wait
}
# -------------------------- 步骤函数:生成节点加入脚本 --------------------------
generate_join_scripts() {
print_separator
echo "【步骤4/7】生成Master/Node节点加入脚本"
print_sub_separator
# 生成证书密钥和基础加入命令
echo "生成集群加入核心参数..."
echo "命令1kubeadm init phase upload-certs --upload-certs刷新证书"
CERT_KEY=$(kubeadm init phase upload-certs --upload-certs 2>/dev/null | grep -E '^[0-9a-f]{64,}$')
if [ -z "$CERT_KEY" ]; then
echo "错误生成certificate-key失败"
exit 1
fi
echo "✓ certificate-key 生成完成"
echo "命令2kubeadm token create --print-join-command生成基础加入命令"
JOIN_CMD_BASE=$(kubeadm token create --print-join-command 2>/dev/null)
if [ -z "$JOIN_CMD_BASE" ]; then
echo "错误生成join命令失败"
exit 1
fi
echo "✓ 基础join命令含token+CA哈希生成完成"
print_sub_separator
# 生成Master加入脚本
echo "生成Master节点加入脚本$target_dir/add_master.sh"
cat > "$target_dir/add_master.sh" <<EOF
#!/bin/bash
set -euo pipefail
# 执行Master节点加入
$JOIN_CMD_BASE --control-plane --certificate-key $CERT_KEY
# 配置kubeconfig
mkdir -p \$HOME/.kube
cp -i /etc/kubernetes/admin.conf \$HOME/.kube/config
chown \$(id -u):\$(id -g) \$HOME/.kube/config
export KUBECONFIG=/etc/kubernetes/admin.conf
echo 'export KUBECONFIG=/etc/kubernetes/admin.conf' >> \$HOME/.profile
echo "Master节点加入集群并配置完成"
EOF
echo "✓ add_master.sh 生成完成"
print_sub_separator
# 生成Node加入脚本
echo "生成Node节点加入脚本$target_dir/add_node.sh"
cat > "$target_dir/add_node.sh" <<EOF
#!/bin/bash
set -euo pipefail
# 执行Node节点加入
$JOIN_CMD_BASE
echo "Node节点加入集群完成"
EOF
echo "✓ add_node.sh 生成完成"
print_sub_separator
# 赋予执行权限
echo "赋予脚本执行权限chmod +x $target_dir/add_master.sh $target_dir/add_node.sh"
chmod +x "$target_dir/add_master.sh" "$target_dir/add_node.sh"
echo "✓ 脚本执行权限配置完成"
print_separator
auto_wait
}
# -------------------------- 辅助函数:远程操作工具 --------------------------
# 免密登录验证
check_ssh_auth() {
local ip="$1"
echo "验证 $ip 免密登录..."
if ! ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$ip" "echo 'auth_ok'"; then
echo "错误:$ip 免密登录失败请先配置SSH免密登录"
exit 1
fi
echo "$ip 免密登录验证通过"
}
# 拷贝脚本到远程节点
copy_remote_script() {
local ip="$1"
local script_path="$2"
local script_name=$(basename "$script_path")
echo "拷贝 $script_name$ip:/tmp/..."
scp -o StrictHostKeyChecking=no "$script_path" "$ip:/tmp/"
echo "$script_name 拷贝完成"
}
# 远程执行脚本并清理
run_remote_script() {
local ip="$1"
local script_name="$2"
echo "$ip 执行脚本bash /tmp/$script_name"
ssh -o StrictHostKeyChecking=no "$ip" "bash /tmp/$script_name"
echo "$ip 脚本执行完成"
echo "清理 $ip 临时脚本rm -f /tmp/$script_name"
ssh -o StrictHostKeyChecking=no "$ip" "rm -f /tmp/$script_name"
echo "$ip 临时脚本清理完成"
}
# -------------------------- 步骤函数处理远程Master节点 --------------------------
process_remote_masters() {
if [ -z "$master_ips" ]; then
return
fi
print_separator
echo "【步骤5/7】部署远程Master节点"
print_sub_separator
local master_count=$(echo "$master_ips" | tr ',' '\n' | wc -l)
echo "待部署Master节点数量$master_count"
print_sub_separator
# 分割IP列表兼容空格
IFS=',' read -ra master_ip_arr <<< "$(echo "$master_ips" | tr -d ' ')"
for ip in "${master_ip_arr[@]}"; do
echo -e "\n--- 开始处理Master节点$ip ---"
check_ssh_auth "$ip"
copy_remote_script "$ip" "$target_dir/add_master.sh"
run_remote_script "$ip" "add_master.sh"
echo "--- Master节点 $ip 部署完成 ---"
auto_wait # 单个节点处理完成后等待5秒
done
echo -e "\n✓ 所有Master节点部署完成"
print_separator
auto_wait
}
# -------------------------- 步骤函数处理远程Node节点 --------------------------
process_remote_nodes() {
if [ -z "$node_ips" ]; then
return
fi
print_separator
echo "【步骤6/7】部署远程Node节点"
print_sub_separator
local node_count=$(echo "$node_ips" | tr ',' '\n' | wc -l)
echo "待部署Node节点数量$node_count"
print_sub_separator
# 分割IP列表兼容空格
IFS=',' read -ra node_ip_arr <<< "$(echo "$node_ips" | tr -d ' ')"
for ip in "${node_ip_arr[@]}"; do
echo -e "\n--- 开始处理Node节点$ip ---"
check_ssh_auth "$ip"
copy_remote_script "$ip" "$target_dir/add_node.sh"
run_remote_script "$ip" "add_node.sh"
echo "--- Node节点 $ip 部署完成 ---"
auto_wait # 单个节点处理完成后等待5秒
done
echo -e "\n✓ 所有Node节点部署完成"
print_separator
auto_wait
}
# -------------------------- 步骤函数:输出最终提示 --------------------------
print_final_tips() {
print_separator
echo "【步骤7/7】部署完成 - 后续验证建议"
print_sub_separator
echo "1. 查看集群节点状态所有节点最终状态需为Ready"
echo " kubectl get nodes"
echo
echo "2. 查看控制平面组件状态需全部Running"
echo " kubectl get pods -n kube-system -l component=kube-apiserver,kube-controller-manager,kube-scheduler"
echo
echo "3. 查看Calico网络组件状态需全部Running"
echo " kubectl get pods -n kube-system -l k8s-app=calico-node"
echo
echo "4. 测试集群可用性创建测试Pod"
echo " kubectl run test-pod --image=nginx:alpine && kubectl delete pod test-pod"
print_separator
echo "🎉 Kubernetes集群部署全流程完成"
print_separator
}
# -------------------------- 主函数:按顺序执行所有步骤 --------------------------
main() {
# 按流程调用各步骤函数
parse_and_validate_args "$@"
init_first_master
install_calico
generate_join_scripts
process_remote_masters
process_remote_nodes
print_final_tips
}
# 启动主函数(传递所有命令行参数)
main "$@"