2025-12-02 15:21:29 +08:00
|
|
|
|
#!/bin/bash
|
|
|
|
|
|
# 自动化部署脚本:同步文件 + 执行各类安装脚本
|
|
|
|
|
|
# 设置脚本出错时立即终止
|
|
|
|
|
|
set -e
|
|
|
|
|
|
|
|
|
|
|
|
# ========== 日志配置 ==========
|
|
|
|
|
|
# 定义日志文件(带时间戳,避免覆盖)
|
2025-12-02 15:35:09 +08:00
|
|
|
|
LOG_FILE="/opt/deploy.log"
|
2025-12-02 15:21:29 +08:00
|
|
|
|
# 将stdout和stderr同时重定向到tee(前台输出+日志写入)
|
|
|
|
|
|
exec > >(tee -a "$LOG_FILE") 2>&1
|
|
|
|
|
|
|
|
|
|
|
|
# ========== 颜色输出函数 ==========
|
|
|
|
|
|
green_echo() {
|
|
|
|
|
|
echo -e "\033[32m[INFO] $(date +'%Y-%m-%d %H:%M:%S') $1\033[0m"
|
|
|
|
|
|
}
|
|
|
|
|
|
red_echo() {
|
|
|
|
|
|
echo -e "\033[31m[ERROR] $(date +'%Y-%m-%d %H:%M:%S') $1\033[0m"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# ========== 执行步骤 ==========
|
|
|
|
|
|
# 1. 同步远程/opt目录到本地
|
|
|
|
|
|
green_echo "开始同步远程服务器172.51.4.158的/opt目录..."
|
2025-12-02 15:35:09 +08:00
|
|
|
|
# 安装sshpass
|
|
|
|
|
|
green_echo "开始安装sshpass..."
|
|
|
|
|
|
if apt install -y sshpass; then
|
|
|
|
|
|
green_echo "sshpass安装成功!"
|
|
|
|
|
|
else
|
|
|
|
|
|
red_echo "sshpass安装失败,请检查网络或软件源配置"
|
|
|
|
|
|
exit 1
|
|
|
|
|
|
fi
|
2025-12-02 15:21:29 +08:00
|
|
|
|
sshpass -p 'Zp5#tr6#xm9' rsync -avzP -e "ssh -o StrictHostKeyChecking=no" root@172.51.4.158:/opt/* /opt/
|
2025-12-02 16:08:16 +08:00
|
|
|
|
#sshpass -p 'Zp5#tr6#xm9' rsync -avzP -e "ssh -o StrictHostKeyChecking=no" root@172.51.4.158:/opt/* /opt/
|
2025-12-02 15:21:29 +08:00
|
|
|
|
green_echo "目录同步完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 系统优化脚本
|
|
|
|
|
|
green_echo "执行系统优化脚本..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/system_optimize.sh | bash
|
|
|
|
|
|
green_echo "系统优化完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 3. IB驱动安装
|
|
|
|
|
|
green_echo "安装IB驱动(版本24.10-2.1.8.0,Ubuntu22.04)..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/ib-drive.sh | bash -s -- --install --version "24.10-2.1.8.0" --distro "ubuntu22.04"
|
|
|
|
|
|
green_echo "IB驱动安装完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 4. IB相关配置
|
|
|
|
|
|
green_echo "执行IB配置脚本..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/ib.sh | bash -s -- --install
|
|
|
|
|
|
green_echo "IB配置完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 5. NVIDIA驱动安装
|
|
|
|
|
|
green_echo "安装NVIDIA驱动(版本570.124.06)..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-driver.sh | bash -s -- --install --version '570.124.06'
|
|
|
|
|
|
green_echo "NVIDIA驱动安装完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 6. NVIDIA Fabric Manager安装
|
|
|
|
|
|
green_echo "安装NVIDIA Fabric Manager(版本570.124.06-1,Ubuntu22.04)..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-fabricmanager.sh | bash -s -- --install --distro ubuntu22.04 --version 570_570.124.06-1
|
|
|
|
|
|
green_echo "Fabric Manager安装完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 7. CUDA安装
|
|
|
|
|
|
green_echo "安装CUDA(版本12.8.1_570.124.06)..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/cuda.sh | bash -s -- --install --version '12.8.1_570.124.06'
|
|
|
|
|
|
green_echo "CUDA安装完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 8. NVIDIA DCGM安装
|
|
|
|
|
|
green_echo "安装NVIDIA DCGM..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-dcgm.sh | bash -s -- --install
|
|
|
|
|
|
green_echo "DCGM安装完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 9. DCGM-Exporter安装
|
|
|
|
|
|
green_echo "安装DCGM-Exporter..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/dcgm-exporter.sh | bash -s -- --install
|
|
|
|
|
|
green_echo "DCGM-Exporter安装完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 10. Node-Exporter安装
|
|
|
|
|
|
green_echo "安装Node-Exporter..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/node-exporter.sh | bash -s -- --install
|
|
|
|
|
|
green_echo "Node-Exporter安装完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# 11. GPU监控部署
|
|
|
|
|
|
green_echo "部署GPU监控..."
|
|
|
|
|
|
cd /opt/ && wget -qO- http://116.205.97.109/scripts/deploy_gpu_monitor.sh | bash -s -- --install
|
|
|
|
|
|
green_echo "GPU监控部署完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# (可选)Docker与NVIDIA容器工具包(如需启用,移除注释)
|
|
|
|
|
|
# green_echo "安装Docker(版本5:20.10.13~3-0~ubuntu-jammy)..."
|
|
|
|
|
|
# cd /opt/ && wget -qO- http://116.205.97.109/scripts/docker.sh | bash -s -- --install --version '5:20.10.13~3-0~ubuntu-jammy'
|
|
|
|
|
|
# green_echo "Docker安装完成!"
|
|
|
|
|
|
|
|
|
|
|
|
# green_echo "安装NVIDIA容器工具包(版本1.17.8-1)..."
|
|
|
|
|
|
# cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-container-toolkit.sh | bash -s -- --install --version '1.17.8-1'
|
|
|
|
|
|
# green_echo "NVIDIA容器工具包安装完成!"
|
|
|
|
|
|
|
|
|
|
|
|
green_echo "所有任务执行完毕!日志文件已保存至:$LOG_FILE"
|