diff --git a/scripts/all-install.sh b/scripts/all-install.sh new file mode 100644 index 0000000..6d8fde1 --- /dev/null +++ b/scripts/all-install.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# 自动化部署脚本:同步文件 + 执行各类安装脚本 +# 设置脚本出错时立即终止 +set -e + +# ========== 日志配置 ========== +# 定义日志文件(带时间戳,避免覆盖) +LOG_FILE="/opt/deploy_$(date +%Y%m%d_%H%M%S).log" +# 将stdout和stderr同时重定向到tee(前台输出+日志写入) +exec > >(tee -a "$LOG_FILE") 2>&1 + +# ========== 颜色输出函数 ========== +green_echo() { + echo -e "\033[32m[INFO] $(date +'%Y-%m-%d %H:%M:%S') $1\033[0m" +} +red_echo() { + echo -e "\033[31m[ERROR] $(date +'%Y-%m-%d %H:%M:%S') $1\033[0m" +} + +# ========== 执行步骤 ========== +# 1. 同步远程/opt目录到本地 +green_echo "开始同步远程服务器172.51.4.158的/opt目录..." +sshpass -p 'Zp5#tr6#xm9' rsync -avzP -e "ssh -o StrictHostKeyChecking=no" root@172.51.4.158:/opt/* /opt/ +green_echo "目录同步完成!" + +# 2. 系统优化脚本 +green_echo "执行系统优化脚本..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/system_optimize.sh | bash +green_echo "系统优化完成!" + +# 3. IB驱动安装 +green_echo "安装IB驱动(版本24.10-2.1.8.0,Ubuntu22.04)..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/ib-drive.sh | bash -s -- --install --version "24.10-2.1.8.0" --distro "ubuntu22.04" +green_echo "IB驱动安装完成!" + +# 4. IB相关配置 +green_echo "执行IB配置脚本..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/ib.sh | bash -s -- --install +green_echo "IB配置完成!" + +# 5. NVIDIA驱动安装 +green_echo "安装NVIDIA驱动(版本570.124.06)..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-driver.sh | bash -s -- --install --version '570.124.06' +green_echo "NVIDIA驱动安装完成!" + +# 6. NVIDIA Fabric Manager安装 +green_echo "安装NVIDIA Fabric Manager(版本570.124.06-1,Ubuntu22.04)..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-fabricmanager.sh | bash -s -- --install --distro ubuntu22.04 --version 570_570.124.06-1 +green_echo "Fabric Manager安装完成!" + +# 7. CUDA安装 +green_echo "安装CUDA(版本12.8.1_570.124.06)..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/cuda.sh | bash -s -- --install --version '12.8.1_570.124.06' +green_echo "CUDA安装完成!" + +# 8. NVIDIA DCGM安装 +green_echo "安装NVIDIA DCGM..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-dcgm.sh | bash -s -- --install +green_echo "DCGM安装完成!" + +# 9. DCGM-Exporter安装 +green_echo "安装DCGM-Exporter..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/dcgm-exporter.sh | bash -s -- --install +green_echo "DCGM-Exporter安装完成!" + +# 10. Node-Exporter安装 +green_echo "安装Node-Exporter..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/node-exporter.sh | bash -s -- --install +green_echo "Node-Exporter安装完成!" + +# 11. GPU监控部署 +green_echo "部署GPU监控..." +cd /opt/ && wget -qO- http://116.205.97.109/scripts/deploy_gpu_monitor.sh | bash -s -- --install +green_echo "GPU监控部署完成!" + +# (可选)Docker与NVIDIA容器工具包(如需启用,移除注释) +# green_echo "安装Docker(版本5:20.10.13~3-0~ubuntu-jammy)..." +# cd /opt/ && wget -qO- http://116.205.97.109/scripts/docker.sh | bash -s -- --install --version '5:20.10.13~3-0~ubuntu-jammy' +# green_echo "Docker安装完成!" + +# green_echo "安装NVIDIA容器工具包(版本1.17.8-1)..." +# cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-container-toolkit.sh | bash -s -- --install --version '1.17.8-1' +# green_echo "NVIDIA容器工具包安装完成!" + +green_echo "所有任务执行完毕!日志文件已保存至:$LOG_FILE" \ No newline at end of file