更新 scripts/README.md

This commit is contained in:
joy 2025-07-16 17:02:46 +08:00
parent c8e4db1ad3
commit c08fcf5c7e
1 changed files with 26 additions and 0 deletions

View File

@ -182,3 +182,29 @@ tail -f /opt/gpu-manager.log
2超威B200:再生龙镜像:10.102.35.99/nfs/clone.iso 备份路径: /nfs/chaowei-B200-1.7T-img #注意超威机型对再生龙引导镜像对版本有要求,最新版本无法引导。
(3) 技嘉A100:再生龙镜像:10.101.0.86:/nfs/ 备份路径: /nfs/2025-07-15-03-Jijia-A100-960G-img #技嘉A100-磁盘960G-CX7
```
**ubuntu2404:**
```bash
cd /opt/
wget https://content.mellanox.com/ofed/MLNX_OFED-24.10-2.1.8.0/MLNX_OFED_LINUX-24.10-2.1.8.0-ubuntu24.04-x86_64.tgz #[ubuntu24.04]
wget https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2404/x86_64/nvidia-fabricmanager-570_570.124.06-1_amd64.deb #[ubuntu24.04]
wget https://cn.download.nvidia.com/tesla/570.124.06/NVIDIA-Linux-x86_64-570.124.06.run #[无版本要求]
wget https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_570.124.06_linux.run #[无版本要求]
cd /opt/ && git clone http://116.205.97.109:3000/yindun/ansible-devops.git
cd /opt/ansible-devops/scripts/
#-----临时替换适配ubuntu24.04
sed -i -e 's/5.8-6.0.4.2/24.10-2.1.8.0/g' -e 's/22.04/24.04/g' ib-drive.sh && sed -i 's/2204/2404/g' nvidia-fabricmanager.sh
bash system_optimize.sh --install
bash ib-drive.sh --install --version "24.10-2.1.8.0"
bash nvidia-driver.sh --install --version '570.124.06'
bash nvidia-fabricmanager.sh --install --version "570_570.124.06-1"
bash cuda.sh --install --version "12.8.1_570.124.06"
#安装exporter
cd /opt/ && wget -qO- http://116.205.97.109/scripts/nvidia-dcgm.sh | bash -s -- --install
cd /opt/ && wget -qO- http://116.205.97.109/scripts/dcgm-exporter.sh | bash -s -- --install
cd /opt/ && wget -qO- http://116.205.97.109/scripts/node-exporter.sh | bash -s -- --install
#修改主机名,内核版本锁定,根分区扩容已集成在初始化脚本中无须重复执行。
```