261 lines
8.0 KiB
Bash
261 lines
8.0 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
set -e
|
||
|
|
|
||
|
|
# 颜色定义
|
||
|
|
GREEN='\033[1;32m'
|
||
|
|
RED='\033[1;31m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
NC='\033[0m' # 重置颜色
|
||
|
|
|
||
|
|
# 输出带颜色的信息
|
||
|
|
log_info() { echo -e "${GREEN}[INFO] $1${NC}"; }
|
||
|
|
log_error() { echo -e "${RED}[ERROR] $1${NC}"; exit 1; }
|
||
|
|
log_warning() { echo -e "${YELLOW}[WARNING] $1${NC}"; }
|
||
|
|
|
||
|
|
# 默认变量
|
||
|
|
ACTION=""
|
||
|
|
DRIVER_VERSION="5.8-6.0.4.2"
|
||
|
|
DISTRO="ubuntu22.04"
|
||
|
|
ARCH="x86_64"
|
||
|
|
FORCE=0
|
||
|
|
|
||
|
|
# 生成包名和路径
|
||
|
|
generate_package_info() {
|
||
|
|
DRIVER_PACKAGE="MLNX_OFED_LINUX-${DRIVER_VERSION}-${DISTRO}-${ARCH}.tgz"
|
||
|
|
PACKAGE_PATH="/opt/${DRIVER_PACKAGE}"
|
||
|
|
DRIVER_DIR="/opt/MLNX_OFED_LINUX-${DRIVER_VERSION}-${DISTRO}-${ARCH}"
|
||
|
|
INTERNAL_URL="http://10.101.0.51:5588/mlnx-ofed/${DRIVER_PACKAGE}"
|
||
|
|
OFFICIAL_URL="https://example.com/${DRIVER_PACKAGE}" # 请替换为实际官网地址
|
||
|
|
}
|
||
|
|
|
||
|
|
# 解析命令行参数
|
||
|
|
parse_args() {
|
||
|
|
while [[ $# -gt 0 ]]; do
|
||
|
|
case "$1" in
|
||
|
|
--install)
|
||
|
|
ACTION="install"
|
||
|
|
shift
|
||
|
|
;;
|
||
|
|
--uninstall)
|
||
|
|
ACTION="uninstall"
|
||
|
|
shift
|
||
|
|
;;
|
||
|
|
--version)
|
||
|
|
if [[ -z "$2" ]]; then
|
||
|
|
log_error "请指定版本号,如: --version 5.8-6.0.4.2"
|
||
|
|
fi
|
||
|
|
DRIVER_VERSION="$2"
|
||
|
|
generate_package_info
|
||
|
|
shift 2
|
||
|
|
;;
|
||
|
|
--force)
|
||
|
|
FORCE=1
|
||
|
|
shift
|
||
|
|
;;
|
||
|
|
*)
|
||
|
|
log_error "未知参数: $1"
|
||
|
|
;;
|
||
|
|
esac
|
||
|
|
done
|
||
|
|
|
||
|
|
if [[ -z "$ACTION" ]]; then
|
||
|
|
log_error "请指定操作: --install 或 --uninstall"
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# 下载驱动包
|
||
|
|
download_driver() {
|
||
|
|
log_info "开始下载驱动包: $DRIVER_PACKAGE"
|
||
|
|
if [ -f "$PACKAGE_PATH" ]; then
|
||
|
|
log_info "使用本地驱动包: $PACKAGE_PATH"
|
||
|
|
else
|
||
|
|
log_info "本地包不存在,尝试从内网下载"
|
||
|
|
if wget -q -O "$PACKAGE_PATH" "$INTERNAL_URL"; then
|
||
|
|
log_info "内网下载成功"
|
||
|
|
else
|
||
|
|
log_warning "内网下载失败,尝试从官网下载"
|
||
|
|
if wget -q -O "$PACKAGE_PATH" "$OFFICIAL_URL"; then
|
||
|
|
log_info "官网下载成功"
|
||
|
|
else
|
||
|
|
log_error "驱动包下载失败,请手动放置到 /opt/"
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# 安装驱动
|
||
|
|
install_driver() {
|
||
|
|
log_info "开始安装驱动: $DRIVER_VERSION"
|
||
|
|
|
||
|
|
# 检查是否已安装
|
||
|
|
#if command -v ibv_devinfo &> /dev/null && [[ $FORCE -eq 0 ]]; then
|
||
|
|
# log_warning "检测到驱动已安装,使用 --force 覆盖安装"
|
||
|
|
# exit 0
|
||
|
|
#fi
|
||
|
|
|
||
|
|
kernel_version=$(uname -r)
|
||
|
|
log_info "当前内核版本: $kernel_version"
|
||
|
|
|
||
|
|
log_info "安装依赖包"
|
||
|
|
apt update &>> /tmp/mlnx_install.log
|
||
|
|
apt install -y net-tools bzip2 &>> /tmp/mlnx_install.log
|
||
|
|
|
||
|
|
log_info "解压驱动包"
|
||
|
|
tar -zxf "$PACKAGE_PATH" -C /opt/
|
||
|
|
|
||
|
|
log_info "执行驱动安装"
|
||
|
|
cd "$DRIVER_DIR"
|
||
|
|
./mlnxofedinstall --without-dkms --add-kernel-support --kernel "$kernel_version" --with-fw-update --force &>> /tmp/mlnx_install.log
|
||
|
|
sleep 10
|
||
|
|
}
|
||
|
|
|
||
|
|
# 卸载驱动(修改后版本)
|
||
|
|
uninstall_driver() {
|
||
|
|
log_info "开始卸载驱动: $DRIVER_VERSION"
|
||
|
|
|
||
|
|
# 检查驱动目录,不存在则重新下载解压
|
||
|
|
if [ ! -d "$DRIVER_DIR" ]; then
|
||
|
|
log_warning "驱动目录不存在,尝试重新下载和解压"
|
||
|
|
download_driver # 复用安装的下载逻辑
|
||
|
|
log_info "解压驱动包"
|
||
|
|
tar -zxf "$PACKAGE_PATH" -C /opt/
|
||
|
|
if [ ! -d "$DRIVER_DIR" ]; then
|
||
|
|
log_error "解压失败,无法找到驱动目录: $DRIVER_DIR"
|
||
|
|
else
|
||
|
|
log_info "成功解压驱动包到: $DRIVER_DIR"
|
||
|
|
fi
|
||
|
|
else
|
||
|
|
log_info "找到驱动目录: $DRIVER_DIR"
|
||
|
|
fi
|
||
|
|
|
||
|
|
# 执行卸载
|
||
|
|
cd "$DRIVER_DIR"
|
||
|
|
log_info "执行卸载脚本"
|
||
|
|
./uninstall.sh -q -y &>> /tmp/mlnx_install.log || log_warning "卸载脚本执行失败,尝试手动清理"
|
||
|
|
|
||
|
|
log_info "清理残留文件"
|
||
|
|
rm -rf "$DRIVER_DIR" "$PACKAGE_PATH"
|
||
|
|
|
||
|
|
log_info "停止并禁用openibd服务"
|
||
|
|
systemctl stop openibd.service &>> /tmp/mlnx_install.log || true
|
||
|
|
systemctl disable openibd.service &>> /tmp/mlnx_install.log || true
|
||
|
|
|
||
|
|
log_info "恢复网卡命名规则"
|
||
|
|
sed -i '/^\s*#/!d' /etc/udev/rules.d/70-persistent-ipoib.rules
|
||
|
|
sed -i '/^\s*#/!d' /etc/udev/rules.d/70-persistent-net.rules
|
||
|
|
rm -f /etc/modprobe.d/nvidia-gsp.conf
|
||
|
|
update-initramfs -u &>> /tmp/mlnx_install.log
|
||
|
|
}
|
||
|
|
|
||
|
|
# 配置网卡命名规则
|
||
|
|
configure_naming_rules() {
|
||
|
|
log_info "配置IB网卡命名规则"
|
||
|
|
|
||
|
|
log_info "备份原有规则"
|
||
|
|
cp /etc/udev/rules.d/70-persistent-ipoib.rules /etc/udev/rules.d/70-persistent-ipoib.rules.bak &>> /tmp/mlnx_install.log || true
|
||
|
|
cp /etc/udev/rules.d/70-persistent-net.rules /etc/udev/rules.d/70-persistent-net.rules.bak &>> /tmp/mlnx_install.log || true
|
||
|
|
|
||
|
|
log_info "清除原有规则"
|
||
|
|
sed -i '/^\s*#/!d' /etc/udev/rules.d/70-persistent-ipoib.rules &>> /tmp/mlnx_install.log || true
|
||
|
|
sed -i '/^\s*#/!d' /etc/udev/rules.d/70-persistent-net.rules &>> /tmp/mlnx_install.log || true
|
||
|
|
|
||
|
|
log_info "生成IB设备命名规则"
|
||
|
|
ID=20
|
||
|
|
for i in $(ibdev2netdev -v | grep -v 200G | grep -v 25G | awk '{print $1}'); do
|
||
|
|
if [ -n "$i" ]; then
|
||
|
|
echo "ACTION==\"add\", KERNELS==\"$i\", SUBSYSTEM==\"infiniband\",PROGRAM=\"rdma_rename %k NAME_FIXED mlx5_$ID\"" >> /etc/udev/rules.d/70-persistent-ipoib.rules
|
||
|
|
ID=$((ID+1))
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
log_info "生成网络设备命名规则"
|
||
|
|
IDS=0
|
||
|
|
for j in $(ibdev2netdev -v | grep -v 200G | grep -v 25G | awk '{print $1}'); do
|
||
|
|
if [ -n "$j" ]; then
|
||
|
|
echo "SUBSYSTEM==\"net\", ACTION==\"add\", KERNELS==\"$j\", NAME=\"ib$IDS\"" >> /etc/udev/rules.d/70-persistent-net.rules
|
||
|
|
IDS=$((IDS+1))
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
log_info "配置nvidia选项"
|
||
|
|
echo "options nvidia NVreg_EnableGpuFirmware=0" > /etc/modprobe.d/nvidia-gsp.conf
|
||
|
|
update-initramfs -u &>> /tmp/mlnx_install.log
|
||
|
|
|
||
|
|
log_info "重启openibd服务"
|
||
|
|
systemctl restart openibd.service
|
||
|
|
sleep 15
|
||
|
|
}
|
||
|
|
|
||
|
|
# 检查驱动安装结果
|
||
|
|
check_installation() {
|
||
|
|
log_info "检查驱动安装结果"
|
||
|
|
if command -v ibv_devinfo &> /dev/null; then
|
||
|
|
log_info "驱动安装成功"
|
||
|
|
else
|
||
|
|
log_error "驱动安装失败"
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_info "检查网卡命名规则"
|
||
|
|
valid_count=0
|
||
|
|
for dev in $(ibdev2netdev -v | grep -v 200G | grep -v 25G | awk '{print $1}'); do
|
||
|
|
if [ -n "$dev" ]; then
|
||
|
|
mlx_name=$(udevadm info -q name -n "$dev" 2>/dev/null | grep "mlx5_" || true)
|
||
|
|
net_name=$(ip link show "$dev" | grep "ib[0-9]" || true)
|
||
|
|
|
||
|
|
if [ -n "$mlx_name" ] || [ -n "$net_name" ]; then
|
||
|
|
valid_count=$((valid_count+1))
|
||
|
|
else
|
||
|
|
log_warning "网卡 $dev 命名规则未生效"
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
if [ $valid_count -gt 0 ]; then
|
||
|
|
log_info "网卡命名规则生效,成功配置 $valid_count 个网卡"
|
||
|
|
else
|
||
|
|
log_warning "所有网卡命名规则均未生效,建议重启系统手工配置!"
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# 检查卸载结果
|
||
|
|
check_uninstallation() {
|
||
|
|
log_info "检查卸载结果"
|
||
|
|
if ! command -v ibv_devinfo &> /dev/null; then
|
||
|
|
log_info "驱动已成功卸载"
|
||
|
|
else
|
||
|
|
log_warning "驱动命令仍存在,可能需要手动清理"
|
||
|
|
fi
|
||
|
|
|
||
|
|
if [ ! -d "$DRIVER_DIR" ]; then
|
||
|
|
log_info "驱动目录已删除"
|
||
|
|
else
|
||
|
|
log_warning "驱动目录未完全删除: $DRIVER_DIR"
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# 主函数
|
||
|
|
main() {
|
||
|
|
generate_package_info
|
||
|
|
parse_args "$@"
|
||
|
|
|
||
|
|
log_info "开始执行 ${ACTION^^} 操作,版本: $DRIVER_VERSION"
|
||
|
|
|
||
|
|
case "$ACTION" in
|
||
|
|
install)
|
||
|
|
download_driver
|
||
|
|
install_driver
|
||
|
|
configure_naming_rules
|
||
|
|
check_installation
|
||
|
|
;;
|
||
|
|
uninstall)
|
||
|
|
uninstall_driver
|
||
|
|
check_uninstallation
|
||
|
|
;;
|
||
|
|
esac
|
||
|
|
|
||
|
|
log_info "操作完成!"
|
||
|
|
}
|
||
|
|
|
||
|
|
# 执行主函数
|
||
|
|
main "$@"
|