ansible-devops/scripts/all-install.sh

93 lines
3.7 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 自动化部署脚本:同步文件 + 执行各类安装脚本
# 设置脚本出错时立即终止
set -e
# ========== 日志配置 ==========
# 定义日志文件(带时间戳,避免覆盖)
LOG_FILE="/opt/deploy.log"
# 将stdout和stderr同时重定向到tee前台输出+日志写入)
exec > >(tee -a "$LOG_FILE") 2>&1
# ========== 颜色输出函数 ==========
green_echo() {
echo -e "\033[32m[INFO] $(date +'%Y-%m-%d %H:%M:%S') $1\033[0m"
}
red_echo() {
echo -e "\033[31m[ERROR] $(date +'%Y-%m-%d %H:%M:%S') $1\033[0m"
}
# ========== 执行步骤 ==========
# 1. 同步远程/opt目录到本地
green_echo "开始同步远程服务器172.51.4.158的/opt目录..."
# 安装sshpass
green_echo "开始安装sshpass..."
if apt install -y sshpass; then
green_echo "sshpass安装成功"
else
red_echo "sshpass安装失败请检查网络或软件源配置"
exit 1
fi
sshpass -p 'Zp5#tr6#xm9' rsync -avzP -e "ssh -o StrictHostKeyChecking=no" root@172.51.4.158:/opt/* /opt/
green_echo "目录同步完成!"
# 2. 系统优化脚本
green_echo "执行系统优化脚本..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/system_optimize.sh | bash
green_echo "系统优化完成!"
# 3. IB驱动安装
green_echo "安装IB驱动版本24.10-2.1.8.0Ubuntu22.04..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/ib-drive.sh | bash -s -- --install --version "24.10-2.1.8.0" --distro "ubuntu22.04"
green_echo "IB驱动安装完成"
# 4. IB相关配置
green_echo "执行IB配置脚本..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/ib.sh | bash -s -- --install
green_echo "IB配置完成"
# 5. NVIDIA驱动安装
green_echo "安装NVIDIA驱动版本570.124.06..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-driver.sh | bash -s -- --install --version '570.124.06'
green_echo "NVIDIA驱动安装完成"
# 6. NVIDIA Fabric Manager安装
green_echo "安装NVIDIA Fabric Manager版本570.124.06-1Ubuntu22.04..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-fabricmanager.sh | bash -s -- --install --distro ubuntu22.04 --version 570_570.124.06-1
green_echo "Fabric Manager安装完成"
# 7. CUDA安装
green_echo "安装CUDA版本12.8.1_570.124.06..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/cuda.sh | bash -s -- --install --version '12.8.1_570.124.06'
green_echo "CUDA安装完成"
# 8. NVIDIA DCGM安装
green_echo "安装NVIDIA DCGM..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-dcgm.sh | bash -s -- --install
green_echo "DCGM安装完成"
# 9. DCGM-Exporter安装
green_echo "安装DCGM-Exporter..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/dcgm-exporter.sh | bash -s -- --install
green_echo "DCGM-Exporter安装完成"
# 10. Node-Exporter安装
green_echo "安装Node-Exporter..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/node-exporter.sh | bash -s -- --install
green_echo "Node-Exporter安装完成"
# 11. GPU监控部署
green_echo "部署GPU监控..."
cd /opt/ && wget -qO- http://116.205.97.109/scripts/deploy_gpu_monitor.sh | bash -s -- --install
green_echo "GPU监控部署完成"
# 可选Docker与NVIDIA容器工具包如需启用移除注释
# green_echo "安装Docker版本5:20.10.13~3-0~ubuntu-jammy..."
# cd /opt/ && wget -qO- http://116.205.97.109/scripts/docker.sh | bash -s -- --install --version '5:20.10.13~3-0~ubuntu-jammy'
# green_echo "Docker安装完成"
# green_echo "安装NVIDIA容器工具包版本1.17.8-1..."
# cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-container-toolkit.sh | bash -s -- --install --version '1.17.8-1'
# green_echo "NVIDIA容器工具包安装完成"
green_echo "所有任务执行完毕!日志文件已保存至:$LOG_FILE"