ansible-devops/scripts/install-nfs-storageclass-pr...

405 lines
16 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -euo pipefail
# ===================== 全局常量定义(压缩包地址写死)=====================
# 颜色输出常量
RED="\033[31m"
GREEN="\033[32m"
YELLOW="\033[33m"
BLUE="\033[34m"
RESET="\033[0m"
# 固定配置(压缩包地址写死,无需命令行指定)
NFS_TAR_URL="http://116.205.97.109/scripts/nfs-subdir-external-provisioner.tgz"
TARGET_DIR="/opt/k8s-install-conf"
NFS_DEPLOY_DIR="${TARGET_DIR}/nfs-subdir-external-provisioner"
DEPLOY_FILE_PATH="${NFS_DEPLOY_DIR}/deploy/deployment.yaml"
DOWNLOAD_TEMP_PATH="/tmp/nfs-subdir-external-provisioner.tgz"
# 命令行参数变量(仅保留必选参数)
NFS_SERVER=""
SHARE_DIR=""
# ===================== 函数封装 - 日志输出 =====================
info() {
echo -e "[${BLUE}INFO${RESET}] $1"
}
success() {
echo -e "[${GREEN}SUCCESS${RESET}] $1"
}
warning() {
echo -e "[${YELLOW}WARNING${RESET}] $1"
}
error() {
echo -e "[${RED}ERROR${RESET}] $1" >&2
exit 1
}
# ===================== 函数封装 - 参数解析 =====================
parse_args() {
info "开始解析命令行参数..."
# 仅支持必选参数和帮助
while [[ $# -gt 0 ]]; do
case "$1" in
--nfs-server)
NFS_SERVER="$2"
shift 2
;;
--share-dirs)
SHARE_DIR="$2"
shift 2
;;
--help)
print_usage
exit 0
;;
*)
error "不支持的参数:$1,使用--help查看帮助"
;;
esac
done
# 必传参数校验
if [[ -z "${NFS_SERVER}" || -z "${SHARE_DIR}" ]]; then
error "必传参数缺失!请指定 --nfs-server <NFS服务器IP> 和 --share-dirs <共享目录>"
fi
# NFS服务器IP格式校验
if ! [[ "${NFS_SERVER}" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
error "NFS服务器IP格式非法${NFS_SERVER}请输入合法IPv4地址"
fi
# 共享目录格式校验(必须是绝对路径)
if ! [[ "${SHARE_DIR}" =~ ^/ ]]; then
error "共享目录必须是绝对路径:${SHARE_DIR}(示例:/data/nfs/share"
fi
success "参数解析完成:"
echo -e " NFS服务器IP${GREEN}${NFS_SERVER}${RESET}"
echo -e " NFS共享目录${GREEN}${SHARE_DIR}${RESET}"
echo -e " 部署包地址(固定):${GREEN}${NFS_TAR_URL}${RESET}"
}
# ===================== 函数封装 - 用法说明 =====================
print_usage() {
cat << EOF
==================================================
NFS存储类自动化部署脚本企业级专业版
==================================================
功能:自动下载/解压部署文件、替换NFS配置、按顺序部署组件、验证结果
用法:$0 --nfs-server <NFS_IP> --share-dirs <SHARE_DIR>
参数说明:
--nfs-server 必选 NFS服务器IPv4地址示例192.168.1.100
--share-dirs 必选 NFS共享目录绝对路径示例/data/nfs/k8s
--help 可选 查看帮助信息
部署包固定地址:${NFS_TAR_URL}
示例:
$0 --nfs-server 192.168.1.100 --share-dirs /data/nfs/k8s
$0 --nfs-server 10.0.0.200 --share-dirs /ifs/k8s-prod
==================================================
EOF
}
# ===================== 函数封装 - 前置环境检查 =====================
pre_check() {
info "开始前置环境检查..."
# 1. 权限检查必须root
if [[ "$(id -u)" -ne 0 ]]; then
error "请使用root用户执行脚本sudo -i 切换后重试)"
fi
# 2. kubectl检查
if ! command -v kubectl &> /dev/null; then
error "未安装kubectl命令请先配置Kubernetes客户端环境"
fi
# 3. 集群连接检查
if ! kubectl cluster-info &> /dev/null; then
error "kubectl无法连接Kubernetes集群请检查kubeconfig配置或集群状态"
fi
# 4. 依赖工具检查wget/tar/sed
local dependencies=("wget" "tar" "sed")
for tool in "${dependencies[@]}"; do
if ! command -v "${tool}" &> /dev/null; then
warning "未找到${tool},尝试自动安装..."
if [[ -f /etc/debian_version ]]; then
apt update -qq && apt install -y -qq "${tool}" || error "Debian系系统安装${tool}失败"
elif [[ -f /etc/redhat-release ]]; then
yum install -y -q "${tool}" || error "RHEL系系统安装${tool}失败"
else
error "不支持的系统,无法自动安装${tool},请手动安装后重试"
fi
fi
done
# 5. 压缩包可达性检查(固定地址)
info "检查部署包可达性:${NFS_TAR_URL}"
if ! wget --spider "${NFS_TAR_URL}" &> /dev/null; then
error "部署包地址不可访问请检查1) 地址是否正确 2) 服务器网络是否通畅 3) 文件是否存在"
fi
success "前置环境检查通过"
}
# ===================== 函数封装 - 下载与解压 =====================
download_and_extract() {
info "开始下载并解压部署包..."
# 1. 创建目标目录
mkdir -p "${TARGET_DIR}" || error "创建目标目录失败:${TARGET_DIR}"
# 2. 下载压缩包(固定地址)
info "从固定地址下载:${NFS_TAR_URL}"
wget -q --show-progress -O "${DOWNLOAD_TEMP_PATH}" "${NFS_TAR_URL}" || {
rm -f "${DOWNLOAD_TEMP_PATH}"
error "部署包下载失败!请检查网络或文件地址"
}
# 3. 压缩包完整性校验文件大小≥1KB
if [[ ! -f "${DOWNLOAD_TEMP_PATH}" || $(stat -c %s "${DOWNLOAD_TEMP_PATH}") -lt 1024 ]]; then
rm -f "${DOWNLOAD_TEMP_PATH}"
error "下载的压缩包损坏或为空"
fi
# 4. 解压文件(覆盖已有目录,保留原文件权限)
info "解压到目标目录:${TARGET_DIR}"
tar -zxf "${DOWNLOAD_TEMP_PATH}" -C "${TARGET_DIR}" --preserve-permissions || {
rm -f "${DOWNLOAD_TEMP_PATH}"
error "解压压缩包失败"
}
# 5. 解压后文件校验(核心文件必须存在)
local required_files=("class.yaml" "rbac.yaml" "deployment.yaml" "test-claim.yaml" "test-pod.yaml")
for file in "${required_files[@]}"; do
local file_path="${NFS_DEPLOY_DIR}/deploy/${file}"
if [[ ! -f "${file_path}" ]]; then
rm -f "${DOWNLOAD_TEMP_PATH}"
error "解压后缺失核心文件:${file_path}"
fi
done
success "下载解压完成,部署文件路径:${NFS_DEPLOY_DIR}/deploy"
}
# ===================== 函数封装 - 替换NFS配置 =====================
replace_nfs_config() {
info "开始替换deployment.yaml中的NFS配置..."
# 检查deployment.yaml是否存在
if [[ ! -f "${DEPLOY_FILE_PATH}" ]]; then
error "未找到配置文件:${DEPLOY_FILE_PATH}"
fi
# 备份原配置文件(避免替换失败)
local backup_file="${DEPLOY_FILE_PATH}.bak.$(date +%Y%m%d%H%M%S)"
cp -f "${DEPLOY_FILE_PATH}" "${backup_file}" || error "备份原配置文件失败"
info "已备份原配置文件:${backup_file}"
# 替换默认NFS_SERVER原默认值10.3.243.101
sed -i.bak "s#10.3.243.101#${NFS_SERVER}#g" "${DEPLOY_FILE_PATH}" || {
mv -f "${backup_file}" "${DEPLOY_FILE_PATH}" # 替换失败回滚
error "替换NFS_SERVER失败已回滚原配置"
}
# 替换默认共享目录(原默认值:/ifs/kubernetes
sed -i.bak "s#/ifs/kubernetes#${SHARE_DIR}#g" "${DEPLOY_FILE_PATH}" || {
mv -f "${backup_file}" "${DEPLOY_FILE_PATH}" # 替换失败回滚
error "替换共享目录失败,已回滚原配置"
}
# 清理sed备份文件
rm -f "${DEPLOY_FILE_PATH}.bak"
# 验证替换结果
if ! grep -q "${NFS_SERVER}" "${DEPLOY_FILE_PATH}" || ! grep -q "${SHARE_DIR}" "${DEPLOY_FILE_PATH}"; then
mv -f "${backup_file}" "${DEPLOY_FILE_PATH}"
error "配置替换验证失败,已回滚原配置"
fi
success "NFS配置替换完成"
echo -e " 原NFS服务器${YELLOW}10.3.243.101${RESET} → 新地址:${GREEN}${NFS_SERVER}${RESET}"
echo -e " 原共享目录:${YELLOW}/ifs/kubernetes${RESET} → 新目录:${GREEN}${SHARE_DIR}${RESET}"
}
# ===================== 函数封装 - 部署核心组件 =====================
deploy_core_components() {
info "开始部署NFS存储类核心组件..."
local deploy_dir="${NFS_DEPLOY_DIR}/deploy"
cd "${deploy_dir}" || error "进入部署目录失败:${deploy_dir}"
# 按顺序部署依赖关系RBAC → 存储类 → Provisioner
local deploy_order=("rbac.yaml" "class.yaml" "deployment.yaml")
for file in "${deploy_order[@]}"; do
info "部署 ${file}..."
kubectl apply -f "${file}" || error "${file} 部署失败"
done
# 等待Provisioner启动最多等待30秒
info "核心组件部署完成等待Provisioner Pod启动最多30秒..."
local wait_seconds=0
while true; do
local pod_status=$(kubectl get pods -l app=nfs-subdir-external-provisioner -o jsonpath='{.items[0].status.phase}' 2>/dev/null)
if [[ "${pod_status}" == "Running" ]]; then
break
fi
if [[ ${wait_seconds} -ge 30 ]]; then
warning "Provisioner Pod未在30秒内启动继续部署测试资源后续需手动检查"
break
fi
sleep 5
wait_seconds=$((wait_seconds + 5))
done
success "核心组件部署流程完成"
}
# ===================== 函数封装 - 部署测试资源 =====================
deploy_test_resources() {
info "开始部署测试资源PVC+Pod..."
local deploy_dir="${NFS_DEPLOY_DIR}/deploy"
# 部署测试PVC
if ! kubectl apply -f "${deploy_dir}/test-claim.yaml"; then
warning "测试PVC部署失败不影响核心功能可后续手动排查"
else
info "测试PVCtest-claim部署成功等待绑定..."
sleep 5
fi
# 部署测试Pod
if ! kubectl apply -f "${deploy_dir}/test-pod.yaml"; then
warning "测试Pod部署失败不影响核心功能可后续手动排查"
else
info "测试Podtest-pod部署成功等待启动..."
sleep 5
fi
success "测试资源部署流程完成"
}
# ===================== 函数封装 - 部署验证 =====================
verify_deployment() {
info "开始部署结果验证(核心指标)..."
local verify_status="SUCCESS"
# 1. 验证存储类
info "1. 存储类验证:"
if kubectl get sc nfs-client &> /dev/null; then
echo -e " ✅ nfs-client存储类已创建"
else
echo -e " ❌ nfs-client存储类未创建"
verify_status="FAILED"
fi
# 2. 验证Provisioner Pod
info "2. Provisioner Pod验证"
local provisioner_pod=$(kubectl get pods -l app=nfs-subdir-external-provisioner -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [[ -z "${provisioner_pod}" ]]; then
echo -e " ❌ 未找到Provisioner Pod"
verify_status="FAILED"
else
local pod_status=$(kubectl get pod "${provisioner_pod}" -o jsonpath='{.status.phase}' 2>/dev/null)
if [[ "${pod_status}" == "Running" ]]; then
echo -e " ✅ Pod名称${provisioner_pod},状态:${GREEN}Running${RESET}"
else
echo -e " ⚠️ Pod名称${provisioner_pod},状态:${YELLOW}${pod_status}${RESET}期望Running"
verify_status="PARTIAL_SUCCESS"
fi
fi
# 3. 验证测试PVC绑定
info "3. 测试PVC验证"
if kubectl get pvc test-claim -o jsonpath='{.status.phase}' 2>/dev/null | grep -q "Bound"; then
echo -e " ✅ 测试PVCtest-claim绑定成功"
else
echo -e " ⚠️ 测试PVC未绑定可能是NFS服务器连接失败或权限问题"
fi
# 4. 验证测试Pod状态
info "4. 测试Pod验证"
if kubectl get pod test-pod -o jsonpath='{.status.phase}' 2>/dev/null | grep -q "Running"; then
echo -e " ✅ 测试Podtest-pod运行正常"
else
echo -e " ⚠️ 测试Pod未正常运行可通过kubectl logs test-pod排查"
fi
# 输出最终验证结果
echo -e "\n=================================================="
if [[ "${verify_status}" == "SUCCESS" ]]; then
success "NFS存储类部署完全成功"
elif [[ "${verify_status}" == "PARTIAL_SUCCESS" ]]; then
warning "NFS存储类核心功能部署成功但部分组件状态异常不影响基本使用"
else
error "NFS存储类部署失败请根据上述验证结果排查问题"
fi
echo -e "=================================================="
}
# ===================== 函数封装 - 打印维护命令 =====================
print_maintenance_commands() {
info "输出常用维护命令(按场景分类)..."
cat << EOF
${BLUE}### 一、状态监控命令 ###${RESET}
1. 查看存储类列表kubectl get sc
2. 查看NFS存储类详情kubectl describe sc nfs-client
3. 查看Provisioner Pod状态kubectl get pods -l app=nfs-subdir-external-provisioner
4. 查看Provisioner实时日志kubectl logs -f \$(kubectl get pods -l app=nfs-subdir-external-provisioner -o jsonpath='{.items[0].metadata.name}')
5. 查看所有NFS类型PVCkubectl get pvc -l storageClassName=nfs-client
6. 查看NFS自动创建的PVkubectl get pv -l storageClassName=nfs-client
${BLUE}### 二、资源操作命令 ###${RESET}
1. 删除测试资源生产环境推荐kubectl delete pod test-pod && kubectl delete pvc test-claim
2. 重启Provisionerkubectl rollout restart deployment nfs-subdir-external-provisioner
3. 重新部署核心组件cd ${NFS_DEPLOY_DIR}/deploy && kubectl apply -f rbac.yaml -f class.yaml -f deployment.yaml
4. 完全卸载NFS存储类
kubectl delete pod test-pod --ignore-not-found
kubectl delete pvc test-claim --ignore-not-found
kubectl delete pv -l storageClassName=nfs-client --ignore-not-found
kubectl delete sc nfs-client --ignore-not-found
kubectl delete deployment nfs-subdir-external-provisioner --ignore-not-found
kubectl delete -f ${NFS_DEPLOY_DIR}/deploy/rbac.yaml --ignore-not-found
${BLUE}### 三、问题排查命令 ###${RESET}
1. 检查Provisioner Pod详情启动失败kubectl describe pods -l app=nfs-subdir-external-provisioner
2. 排查PVC绑定失败kubectl describe pvc <PVC_NAME>
3. 测试NFS服务器连通性mount -t nfs ${NFS_SERVER}:${SHARE_DIR} /tmp/test-nfs-mount测试后umount /tmp/test-nfs-mount
4. 查看NFS服务器共享配置showmount -e ${NFS_SERVER}
EOF
}
# ===================== 主函数(流程控制) =====================
main() {
echo -e "${GREEN}==================================================${RESET}"
echo -e "${GREEN}NFS存储类自动化部署脚本企业级专业版${RESET}"
echo -e "${GREEN}==================================================${RESET}"
echo -e "执行时间:$(date +'%Y-%m-%d %H:%M:%S')"
echo -e "脚本版本v1.0.1"
echo -e "部署包地址:${NFS_TAR_URL}"
echo -e "${GREEN}==================================================${RESET}\n"
# 流程执行顺序
parse_args "$@"
pre_check
download_and_extract
replace_nfs_config
deploy_core_components
deploy_test_resources
verify_deployment
print_maintenance_commands
# 清理临时文件
info "清理临时文件:${DOWNLOAD_TEMP_PATH}"
rm -f "${DOWNLOAD_TEMP_PATH}"
echo -e "\n${GREEN}==================================================${RESET}"
success "部署流程全部完成!"
echo -e "${GREEN}==================================================${RESET}"
}
# ===================== 脚本入口 =====================
main "$@"