86 lines
2.5 KiB
Bash
86 lines
2.5 KiB
Bash
|
|
#!/bin/bash
|
|||
|
|
set -euo pipefail
|
|||
|
|
|
|||
|
|
# 输出分隔线,增强可读性
|
|||
|
|
print_sep() {
|
|||
|
|
echo "======================================"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# --------------------------
|
|||
|
|
# 获取GPU信息(序号、Bus号、SN号)
|
|||
|
|
# --------------------------
|
|||
|
|
get_gpu_info() {
|
|||
|
|
echo "=== GPU 设备信息 ==="
|
|||
|
|
if ! command -v nvidia-smi &> /dev/null; then
|
|||
|
|
echo "错误:未找到 nvidia-smi 命令,请确认已安装NVIDIA驱动"
|
|||
|
|
print_sep
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 通过nvidia-smi查询:序号(index)、Bus号(pci.bus_id)、SN号(serial)
|
|||
|
|
# 输出格式:index, pci.bus_id, serial
|
|||
|
|
gpu_info=$(nvidia-smi --query-gpu=index,pci.bus_id,serial --format=csv,noheader,nounits)
|
|||
|
|
|
|||
|
|
if [ -z "$gpu_info" ]; then
|
|||
|
|
echo "未检测到NVIDIA GPU设备"
|
|||
|
|
print_sep
|
|||
|
|
return 0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 打印表头
|
|||
|
|
echo -e "序号\tBus号(PCI地址)\tSN号"
|
|||
|
|
echo -e "----\t----------------\t-----"
|
|||
|
|
|
|||
|
|
# 遍历GPU信息并输出(序号与nvidia-smi的index一致)
|
|||
|
|
echo "$gpu_info" | while IFS=',' read -r idx bus sn; do
|
|||
|
|
# 清理空格(csv输出可能带空格)
|
|||
|
|
idx=$(echo "$idx" | xargs)
|
|||
|
|
bus=$(echo "$bus" | xargs)
|
|||
|
|
sn=$(echo "$sn" | xargs)
|
|||
|
|
echo -e "$idx\t$bus\t$sn"
|
|||
|
|
done
|
|||
|
|
|
|||
|
|
print_sep
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# --------------------------
|
|||
|
|
# 获取IB网卡信息(序号、PCI地址、SN号)
|
|||
|
|
# 参考你的筛选逻辑:保留Mellanox网卡,排除Infi/X-5/0.1相关型号
|
|||
|
|
# --------------------------
|
|||
|
|
get_ib_info() {
|
|||
|
|
echo "=== IB网卡 设备信息 ==="
|
|||
|
|
|
|||
|
|
# 获取符合条件的Mellanox网卡PCI地址(参考你的脚本逻辑)
|
|||
|
|
pci_devices=$(lspci | grep Mellanox | grep -Ev 'Infi|X-5|0\.1' | awk '{print $1}')
|
|||
|
|
|
|||
|
|
if [ -z "$pci_devices" ]; then
|
|||
|
|
echo "未找到符合条件的Mellanox IB网卡设备"
|
|||
|
|
print_sep
|
|||
|
|
return 0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 打印表头
|
|||
|
|
echo -e "序号\tPCI地址\tSN号"
|
|||
|
|
echo -e "----\t-------\t-----"
|
|||
|
|
|
|||
|
|
# 遍历PCI设备,添加序号(从0开始)
|
|||
|
|
idx=0
|
|||
|
|
for pci in $pci_devices; do
|
|||
|
|
# 查询该PCI设备的序列号(从lspci详细信息中提取)
|
|||
|
|
serial=$(lspci -vvv -s "$pci" | grep -i "Serial Number" | awk -F': ' '{print $2}' | xargs)
|
|||
|
|
|
|||
|
|
# 输出信息
|
|||
|
|
echo -e "$idx\t$pci\t${serial:-未找到SN号}"
|
|||
|
|
|
|||
|
|
idx=$((idx + 1))
|
|||
|
|
done
|
|||
|
|
|
|||
|
|
print_sep
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 主逻辑:依次获取并打印信息
|
|||
|
|
echo "开始收集GPU和IB网卡详细信息..."
|
|||
|
|
print_sep
|
|||
|
|
get_gpu_info
|
|||
|
|
get_ib_info
|
|||
|
|
echo "信息收集完成!"
|