#!/bin/bash # 自动化部署脚本:同步文件 + 执行各类安装脚本 # 设置脚本出错时立即终止 set -e # ========== 日志配置 ========== # 定义日志文件(带时间戳,避免覆盖) LOG_FILE="/opt/deploy.log" # 将stdout和stderr同时重定向到tee(前台输出+日志写入) exec > >(tee -a "$LOG_FILE") 2>&1 # ========== 颜色输出函数 ========== green_echo() { echo -e "\033[32m[INFO] $(date +'%Y-%m-%d %H:%M:%S') $1\033[0m" } red_echo() { echo -e "\033[31m[ERROR] $(date +'%Y-%m-%d %H:%M:%S') $1\033[0m" } # ========== 执行步骤 ========== # 1. 同步远程/opt目录到本地 green_echo "开始同步远程服务器172.51.4.158的/opt目录..." # 安装sshpass green_echo "开始安装sshpass..." if apt install -y sshpass; then green_echo "sshpass安装成功!" else red_echo "sshpass安装失败,请检查网络或软件源配置" exit 1 fi sshpass -p 'Zp5#tr6#xm9' rsync -avzP -e "ssh -o StrictHostKeyChecking=no" root@172.51.4.158:/opt/* /opt/ #sshpass -p 'Zp5#tr6#xm9' rsync -avzP -e "ssh -o StrictHostKeyChecking=no" root@172.51.4.158:/opt/* /opt/ green_echo "目录同步完成!" # 2. 系统优化脚本 green_echo "执行系统优化脚本..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/system_optimize.sh | bash green_echo "系统优化完成!" # 3. IB驱动安装 green_echo "安装IB驱动(版本24.10-2.1.8.0,Ubuntu22.04)..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/ib-drive.sh | bash -s -- --install --version "24.10-2.1.8.0" --distro "ubuntu22.04" green_echo "IB驱动安装完成!" # 4. IB相关配置 green_echo "执行IB配置脚本..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/ib.sh | bash -s -- --install green_echo "IB配置完成!" # 5. NVIDIA驱动安装 green_echo "安装NVIDIA驱动(版本570.124.06)..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-driver.sh | bash -s -- --install --version '570.124.06' green_echo "NVIDIA驱动安装完成!" # 6. NVIDIA Fabric Manager安装 green_echo "安装NVIDIA Fabric Manager(版本570.124.06-1,Ubuntu22.04)..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-fabricmanager.sh | bash -s -- --install --distro ubuntu22.04 --version 570_570.124.06-1 green_echo "Fabric Manager安装完成!" # 7. CUDA安装 green_echo "安装CUDA(版本12.8.1_570.124.06)..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/cuda.sh | bash -s -- --install --version '12.8.1_570.124.06' green_echo "CUDA安装完成!" # 8. NVIDIA DCGM安装 green_echo "安装NVIDIA DCGM..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-dcgm.sh | bash -s -- --install green_echo "DCGM安装完成!" # 9. DCGM-Exporter安装 green_echo "安装DCGM-Exporter..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/dcgm-exporter.sh | bash -s -- --install green_echo "DCGM-Exporter安装完成!" # 10. Node-Exporter安装 green_echo "安装Node-Exporter..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/node-exporter.sh | bash -s -- --install green_echo "Node-Exporter安装完成!" # 11. GPU监控部署 green_echo "部署GPU监控..." cd /opt/ && wget -qO- http://116.205.97.109/scripts/deploy_gpu_monitor.sh | bash -s -- --install green_echo "GPU监控部署完成!" # (可选)Docker与NVIDIA容器工具包(如需启用,移除注释) # green_echo "安装Docker(版本5:20.10.13~3-0~ubuntu-jammy)..." # cd /opt/ && wget -qO- http://116.205.97.109/scripts/docker.sh | bash -s -- --install --version '5:20.10.13~3-0~ubuntu-jammy' # green_echo "Docker安装完成!" # green_echo "安装NVIDIA容器工具包(版本1.17.8-1)..." # cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-container-toolkit.sh | bash -s -- --install --version '1.17.8-1' # green_echo "NVIDIA容器工具包安装完成!" green_echo "所有任务执行完毕!日志文件已保存至:$LOG_FILE"