ansible-devops/scripts/get-gpu-ib-sn.sh

86 lines
2.5 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -euo pipefail
# 输出分隔线,增强可读性
print_sep() {
echo "======================================"
}
# --------------------------
# 获取GPU信息序号、Bus号、SN号
# --------------------------
get_gpu_info() {
echo "=== GPU 设备信息 ==="
if ! command -v nvidia-smi &> /dev/null; then
echo "错误:未找到 nvidia-smi 命令请确认已安装NVIDIA驱动"
print_sep
return 1
fi
# 通过nvidia-smi查询序号(index)、Bus号(pci.bus_id)、SN号(serial)
# 输出格式index, pci.bus_id, serial
gpu_info=$(nvidia-smi --query-gpu=index,pci.bus_id,serial --format=csv,noheader,nounits)
if [ -z "$gpu_info" ]; then
echo "未检测到NVIDIA GPU设备"
print_sep
return 0
fi
# 打印表头
echo -e "序号\tBus号PCI地址\tSN号"
echo -e "----\t----------------\t-----"
# 遍历GPU信息并输出序号与nvidia-smi的index一致
echo "$gpu_info" | while IFS=',' read -r idx bus sn; do
# 清理空格csv输出可能带空格
idx=$(echo "$idx" | xargs)
bus=$(echo "$bus" | xargs)
sn=$(echo "$sn" | xargs)
echo -e "$idx\t$bus\t$sn"
done
print_sep
}
# --------------------------
# 获取IB网卡信息序号、PCI地址、SN号
# 参考你的筛选逻辑保留Mellanox网卡排除Infi/X-5/0.1相关型号
# --------------------------
get_ib_info() {
echo "=== IB网卡 设备信息 ==="
# 获取符合条件的Mellanox网卡PCI地址参考你的脚本逻辑
pci_devices=$(lspci | grep Mellanox | grep -Ev 'Infi|X-5|0\.1' | awk '{print $1}')
if [ -z "$pci_devices" ]; then
echo "未找到符合条件的Mellanox IB网卡设备"
print_sep
return 0
fi
# 打印表头
echo -e "序号\tPCI地址\tSN号"
echo -e "----\t-------\t-----"
# 遍历PCI设备添加序号从0开始
idx=0
for pci in $pci_devices; do
# 查询该PCI设备的序列号从lspci详细信息中提取
serial=$(lspci -vvv -s "$pci" | grep -i "Serial Number" | awk -F': ' '{print $2}' | xargs)
# 输出信息
echo -e "$idx\t$pci\t${serial:-未找到SN号}"
idx=$((idx + 1))
done
print_sep
}
# 主逻辑:依次获取并打印信息
echo "开始收集GPU和IB网卡详细信息..."
print_sep
get_gpu_info
get_ib_info
echo "信息收集完成!"