更新 scripts/ib-drive.sh
This commit is contained in:
parent
3eee97ee4e
commit
402cf5312b
|
|
@ -7,16 +7,20 @@ IFS=$'\n\t'
|
|||
#================================
|
||||
LOG_FILE="/var/log/mlnx_driver_install_$(date +%Y%m%d%H%M%S).log"
|
||||
DRIVER_NAME="MLNX_OFED"
|
||||
APP_VERSION="5.8-6.0.4.2"
|
||||
APP_VERSION="" # 不再设置默认值,必须手动指定
|
||||
ARCH="x86_64"
|
||||
DISTRO="ubuntu22.04"
|
||||
DISTRO="" # 不再设置默认值,必须手动指定
|
||||
APP_DIR="/opt"
|
||||
FORCE=0
|
||||
|
||||
# 支持的系统版本列表
|
||||
SUPPORTED_DISTROS=("ubuntu22.04" "ubuntu24.04")
|
||||
|
||||
# 颜色定义
|
||||
GREEN='\033[1;32m' # 绿色 - 成功
|
||||
RED='\033[1;31m' # 红色 - 失败/错误
|
||||
BLUE='\033[1;34m' # 蓝色 - 标题/信息
|
||||
YELLOW='\033[1;33m' # 黄色 - 警告/进度
|
||||
NC='\033[0m' # 重置颜色
|
||||
|
||||
#================================
|
||||
|
|
@ -27,14 +31,15 @@ log() {
|
|||
local message="$2"
|
||||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
|
||||
# 颜色输出 - 仅SUCCESS和ERROR使用特殊颜色
|
||||
# 颜色输出
|
||||
case "$level" in
|
||||
"SUCCESS") echo -e "${GREEN}[${level}]${NC} $message" ;;
|
||||
"ERROR") echo -e "${RED}[${level}]${NC} $message" ;;
|
||||
"WARN") echo -e "${YELLOW}[${level}]${NC} $message" ;;
|
||||
*) echo -e "${BLUE}[${level}]${NC} $message" ;;
|
||||
esac
|
||||
|
||||
# 记录到日志文件(不包含颜色)
|
||||
# 记录到日志文件
|
||||
echo "[$timestamp] [$level] $message" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
|
|
@ -54,7 +59,8 @@ error() {
|
|||
run_cmd() {
|
||||
local command="$1"
|
||||
local description="${2:-"执行命令"}"
|
||||
log "INFO" "$description: $command"
|
||||
log "INFO" "$description"
|
||||
#log "INFO" "$description: $command"
|
||||
|
||||
# 执行命令并捕获输出
|
||||
local output
|
||||
|
|
@ -74,7 +80,7 @@ test_network() {
|
|||
local url="$1"
|
||||
log "INFO" "测试网络连接: $url"
|
||||
|
||||
# 直接测试URL连通性,设置5秒超时
|
||||
# 直接测试URL连通性
|
||||
if ! curl -fsSLI --connect-timeout 5 "$url" &>> "$LOG_FILE"; then
|
||||
log "WARN" "网络源不可用: $url"
|
||||
return 1
|
||||
|
|
@ -84,25 +90,89 @@ test_network() {
|
|||
}
|
||||
|
||||
#================================
|
||||
# 下载文件
|
||||
# 下载文件 - 带进度条(使用curl)
|
||||
#================================
|
||||
download_file() {
|
||||
local url="$1"
|
||||
local dest="$2"
|
||||
log "INFO" "开始下载: $url"
|
||||
|
||||
# 使用wget下载,显示进度条
|
||||
if ! wget -q --show-progress -O "$dest" "$url" &>> "$LOG_FILE"; then
|
||||
error "下载失败: $url"
|
||||
# 创建一个临时文件来存储进度信息
|
||||
local progress_file=$(mktemp)
|
||||
|
||||
# 使用curl下载,显示进度条
|
||||
echo -e "${YELLOW}[DOWNLOAD]${NC} 下载进度:"
|
||||
|
||||
# 启动后台进程来执行下载
|
||||
(curl -# -L -o "$dest" "$url" 2>"$progress_file") &
|
||||
local curl_pid=$!
|
||||
|
||||
# 显示进度信息
|
||||
while kill -0 $curl_pid 2>/dev/null; do
|
||||
if [ -s "$progress_file" ]; then
|
||||
# 获取最后一行进度信息并显示
|
||||
local last_line=$(tail -n 1 "$progress_file")
|
||||
echo -ne "\r${YELLOW}[DOWNLOAD]${NC} $last_line"
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
# 等待下载完成并获取退出状态
|
||||
wait $curl_pid
|
||||
local exit_status=$?
|
||||
|
||||
# 确保捕获最后一行进度信息
|
||||
if [ -s "$progress_file" ]; then
|
||||
local last_line=$(tail -n 1 "$progress_file")
|
||||
echo -e "\r${YELLOW}[DOWNLOAD]${NC} $last_line"
|
||||
fi
|
||||
|
||||
# 清理临时文件
|
||||
rm -f "$progress_file"
|
||||
|
||||
# 检查下载是否成功
|
||||
if [ $exit_status -ne 0 ]; then
|
||||
error "下载失败: $url (错误码: $exit_status)"
|
||||
fi
|
||||
|
||||
echo -e "\r${YELLOW}[DOWNLOAD]${NC} 下载完成! "
|
||||
log "INFO" "下载完成: $dest"
|
||||
|
||||
# 验证文件完整性(检查文件大小是否大于10MB)
|
||||
local file_size=$(stat -c%s "$dest")
|
||||
if [ $file_size -lt 10485760 ]; then
|
||||
log "ERROR" "下载的文件大小异常: $file_size 字节"
|
||||
log "ERROR" "请检查网络连接或下载源的可用性"
|
||||
error "建议手动下载后放置到/opt目录"
|
||||
fi
|
||||
}
|
||||
|
||||
#================================
|
||||
# 生成包信息
|
||||
#================================
|
||||
generate_package_info() {
|
||||
# 检查是否设置了必需的参数
|
||||
if [[ -z "$APP_VERSION" ]]; then
|
||||
error "必须指定驱动版本,请使用 --version 参数"
|
||||
fi
|
||||
|
||||
if [[ -z "$DISTRO" ]]; then
|
||||
error "必须指定系统版本,请使用 --distro 参数"
|
||||
fi
|
||||
|
||||
# 使用改进的系统版本检查方法
|
||||
local found=false
|
||||
for supported in "${SUPPORTED_DISTROS[@]}"; do
|
||||
if [[ "$supported" == "$DISTRO" ]]; then
|
||||
found=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ "$found" == "false" ]]; then
|
||||
error "不支持的系统版本: $DISTRO。支持的系统: ${SUPPORTED_DISTROS[*]}"
|
||||
fi
|
||||
|
||||
DRIVER_PACKAGE="${DRIVER_NAME}_LINUX-${APP_VERSION}-${DISTRO}-${ARCH}.tgz"
|
||||
PACKAGE_PATH="${APP_DIR}/${DRIVER_PACKAGE}"
|
||||
DRIVER_DIR="${APP_DIR}/${DRIVER_NAME}_LINUX-${APP_VERSION}-${DISTRO}-${ARCH}"
|
||||
|
|
@ -110,18 +180,34 @@ generate_package_info() {
|
|||
}
|
||||
|
||||
#================================
|
||||
# 下载驱动包(优化版)
|
||||
# 生成下载URL
|
||||
#================================
|
||||
generate_download_urls() {
|
||||
# 清空下载源数组
|
||||
DOWNLOAD_SOURCES=()
|
||||
|
||||
# 定义下载源模板 - 按优先级排序
|
||||
local SOURCE_TEMPLATES=(
|
||||
"$PACKAGE_PATH" # 本地文件优先
|
||||
"http://10.101.0.51:5588/mlnx-ofed/${DRIVER_PACKAGE}" # 内网1
|
||||
"http://10.102.32.207:5588/mlnx-ofed/${DRIVER_PACKAGE}" # 内网2
|
||||
"https://content.mellanox.com/ofed/MLNX_OFED-${APP_VERSION}/${DRIVER_PACKAGE}" # 官网
|
||||
)
|
||||
|
||||
# 生成最终下载源列表
|
||||
for template in "${SOURCE_TEMPLATES[@]}"; do
|
||||
DOWNLOAD_SOURCES+=("$template")
|
||||
done
|
||||
}
|
||||
|
||||
#================================
|
||||
# 下载驱动包
|
||||
#================================
|
||||
download_driver() {
|
||||
log "TITLE" "${BLUE}开始获取驱动安装包${NC}"
|
||||
|
||||
# 定义下载源列表 (按优先级排序)
|
||||
DOWNLOAD_SOURCES=(
|
||||
"${PACKAGE_PATH}" # 本地文件
|
||||
"http://10.101.0.51:5588/mlnx-ofed/${DRIVER_PACKAGE}" # 内部源1
|
||||
"http://10.102.32.207:5588/mlnx-ofed/${DRIVER_PACKAGE}" # 内部源2
|
||||
"https://www.mellanox.com/downloads/ofed/${DRIVER_PACKAGE}" # 公共源
|
||||
)
|
||||
# 生成下载URL - 按优先级排序
|
||||
generate_download_urls
|
||||
|
||||
# 查找可用的下载源
|
||||
DOWNLOAD_URL=""
|
||||
|
|
@ -140,13 +226,14 @@ download_driver() {
|
|||
fi
|
||||
else
|
||||
# 网络URL检查
|
||||
if ! test_network "$source"; then
|
||||
continue # 跳过不可用源
|
||||
fi
|
||||
|
||||
log "INFO" "检查网络源: $source"
|
||||
if test_network "$source"; then
|
||||
DOWNLOAD_URL="$source"
|
||||
log "INFO" "${BLUE}找到可用下载源: $DOWNLOAD_URL${NC}"
|
||||
break
|
||||
else
|
||||
log "WARN" "网络源不可用: $source"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
|
|
@ -154,60 +241,72 @@ download_driver() {
|
|||
if [[ -z "$DOWNLOAD_URL" ]]; then
|
||||
log "ERROR" "无法找到可用的下载源"
|
||||
log "ERROR" "请检查网络连接或手动下载安装包到/opt目录"
|
||||
error "下载地址: https://www.mellanox.com/downloads/ofed"
|
||||
error "官网下载地址: ${DOWNLOAD_SOURCES[-1]}"
|
||||
fi
|
||||
|
||||
# 下载文件
|
||||
TEMP_FILE="/tmp/${DRIVER_PACKAGE}"
|
||||
TEMP_FILE="/opt/${DRIVER_PACKAGE}"
|
||||
log "INFO" "准备获取驱动包..."
|
||||
|
||||
if [[ "$DOWNLOAD_URL" == /* ]]; then
|
||||
# 使用本地文件
|
||||
log "INFO" "使用本地文件: $DOWNLOAD_URL"
|
||||
run_cmd "cp $DOWNLOAD_URL $TEMP_FILE" "复制本地文件到临时目录"
|
||||
#run_cmd "cp $DOWNLOAD_URL $TEMP_FILE" "复制本地文件到临时目录"
|
||||
else
|
||||
# 从网络下载
|
||||
log "INFO" "从网络下载: $DOWNLOAD_URL"
|
||||
download_file "$DOWNLOAD_URL" "$TEMP_FILE"
|
||||
fi
|
||||
|
||||
# 验证文件完整性
|
||||
log "INFO" "验证下载文件的完整性..."
|
||||
file_size=$(stat -c%s "$TEMP_FILE")
|
||||
|
||||
if [[ $file_size -lt 10485760 ]]; then # 检查文件大小是否小于10MB
|
||||
log "ERROR" "下载的文件大小异常: $file_size 字节"
|
||||
log "ERROR" "请检查网络连接或下载源的可用性"
|
||||
error "建议手动下载后放置到/opt目录"
|
||||
fi
|
||||
|
||||
log "SUCCESS" "文件完整性验证通过: $file_size 字节"
|
||||
return 0
|
||||
}
|
||||
|
||||
#================================
|
||||
# 其他函数保持不变...
|
||||
#================================
|
||||
# 安装驱动
|
||||
#================================
|
||||
install_driver() {
|
||||
log "TITLE" "${BLUE}开始安装驱动: $APP_VERSION${NC}"
|
||||
|
||||
kernel_version=$(uname -r)
|
||||
log "INFO" "当前内核版本: $kernel_version"
|
||||
|
||||
log "INFO" "安装依赖包"
|
||||
# 根据系统版本安装不同的依赖
|
||||
case "$DISTRO" in
|
||||
"ubuntu22.04")
|
||||
log "INFO" "安装Ubuntu 22.04依赖包"
|
||||
run_cmd "apt update" "更新软件包索引"
|
||||
run_cmd "apt install -y net-tools bzip2" "安装依赖包"
|
||||
;;
|
||||
"ubuntu24.04")
|
||||
log "INFO" "安装Ubuntu 24.04依赖包"
|
||||
run_cmd "apt update" "更新软件包索引"
|
||||
run_cmd "apt install -y net-tools bzip2 dkms" "安装依赖包(包含dkms)"
|
||||
;;
|
||||
*)
|
||||
error "不支持的系统版本: $DISTRO"
|
||||
;;
|
||||
esac
|
||||
|
||||
log "INFO" "解压驱动包"
|
||||
run_cmd "tar -zxf $TEMP_FILE -C $APP_DIR" "解压驱动包"
|
||||
|
||||
log "INFO" "执行驱动安装"
|
||||
case "$DISTRO" in
|
||||
"ubuntu22.04")
|
||||
run_cmd "cd $DRIVER_DIR && ./mlnxofedinstall --without-dkms --add-kernel-support --kernel $kernel_version --with-fw-update --force" "执行驱动安装"
|
||||
;;
|
||||
"ubuntu24.04")
|
||||
# Ubuntu 24.04可能需要不同的安装选项
|
||||
run_cmd "cd $DRIVER_DIR && ./mlnxofedinstall --add-kernel-support --kernel $kernel_version --with-fw-update --force" "执行驱动安装(Ubuntu 24.04)"
|
||||
;;
|
||||
*)
|
||||
error "不支持的系统版本: $DISTRO"
|
||||
;;
|
||||
esac
|
||||
sleep 10
|
||||
}
|
||||
|
||||
#================================
|
||||
# 卸载驱动
|
||||
#================================
|
||||
uninstall_driver() {
|
||||
log "TITLE" "${BLUE}开始卸载驱动: $APP_VERSION${NC}"
|
||||
|
||||
|
|
@ -226,23 +325,23 @@ uninstall_driver() {
|
|||
fi
|
||||
|
||||
# 执行卸载
|
||||
run_cmd "cd $DRIVER_DIR && ./uninstall.sh -q -y" "执行卸载脚本" || log "WARN" "卸载脚本执行失败,尝试手动清理"
|
||||
run_cmd "cd $DRIVER_DIR && DEBIAN_FRONTEND=noninteractive && ./uninstall.sh -q --force 2>/dev/null" "执行卸载脚本" || log "WARN" "卸载脚本执行失败,尝试手动清理"
|
||||
|
||||
log "INFO" "清理残留文件"
|
||||
run_cmd "rm -rf $DRIVER_DIR $TEMP_FILE" "删除驱动目录和临时文件"
|
||||
|
||||
log "INFO" "停止并禁用openibd服务"
|
||||
run_cmd "systemctl stop openibd.service || true" "停止openibd服务"
|
||||
run_cmd "systemctl disable openibd.service || true" "禁用openibd服务"
|
||||
|
||||
log "INFO" "恢复网卡命名规则"
|
||||
run_cmd "sed -i '/^\s*#/!d' /etc/udev/rules.d/70-persistent-ipoib.rules &>> /tmp/mlnx_install.log||true" "清理IPOIB规则"
|
||||
run_cmd "sed -i '/^\s*#/!d' /etc/udev/rules.d/70-persistent-net.rules &>> /tmp/mlnx_install.log|true" "清理网络规则"
|
||||
run_cmd ">/etc/udev/rules.d/70-persistent-ipoib.rules &>> /tmp/mlnx_install.log||true" "清理IPOIB规则"
|
||||
run_cmd ">/etc/udev/rules.d/70-persistent-net.rules &>> /tmp/mlnx_install.log|true" "清理网络规则"
|
||||
run_cmd "rm -f /etc/modprobe.d/nvidia-gsp.conf" "删除nvidia配置"
|
||||
run_cmd "update-initramfs -u" "更新initramfs"
|
||||
}
|
||||
|
||||
#================================
|
||||
# 配置网卡命名规则
|
||||
#================================
|
||||
configure_naming_rules() {
|
||||
log "TITLE" "${BLUE}配置IB网卡命名规则${NC}"
|
||||
|
||||
|
|
@ -281,7 +380,9 @@ configure_naming_rules() {
|
|||
sleep 15
|
||||
}
|
||||
|
||||
#================================
|
||||
# 检查驱动安装结果
|
||||
#================================
|
||||
check_installation() {
|
||||
log "TITLE" "${BLUE}检查驱动安装结果${NC}"
|
||||
if command -v ibv_devinfo &> /dev/null; then
|
||||
|
|
@ -289,30 +390,11 @@ check_installation() {
|
|||
else
|
||||
error "驱动安装失败"
|
||||
fi
|
||||
|
||||
log "INFO" "检查网卡命名规则"
|
||||
valid_count=0
|
||||
for dev in $(ibdev2netdev -v | grep -v 200G | grep -v 25G | awk '{print $1}'); do
|
||||
if [ -n "$dev" ]; then
|
||||
mlx_name=$(udevadm info -q name -n "$dev" 2>/dev/null | grep "mlx5_" || true)
|
||||
net_name=$(ip link show "$dev" | grep "ib[0-9]" || true)
|
||||
|
||||
if [ -n "$mlx_name" ] || [ -n "$net_name" ]; then
|
||||
valid_count=$((valid_count+1))
|
||||
else
|
||||
log "WARN" "网卡 $dev 命名规则未生效"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $valid_count -gt 0 ]; then
|
||||
log "SUCCESS" "网卡命名规则生效,成功配置 $valid_count 个网卡"
|
||||
else
|
||||
log "WARN" "所有网卡命名规则均未生效,建议重启系统手工配置!"
|
||||
fi
|
||||
}
|
||||
|
||||
#================================
|
||||
# 检查卸载结果
|
||||
#================================
|
||||
check_uninstallation() {
|
||||
log "TITLE" "${BLUE}检查卸载结果${NC}"
|
||||
if ! command -v ibv_devinfo &> /dev/null; then
|
||||
|
|
@ -321,66 +403,110 @@ check_uninstallation() {
|
|||
log "WARN" "驱动命令仍存在,可能需要手动清理"
|
||||
fi
|
||||
|
||||
if [ ! -d "$DRIVER_DIR" ]; then
|
||||
log "SUCCESS" "驱动目录已删除"
|
||||
else
|
||||
log "WARN" "驱动目录未完全删除: $DRIVER_DIR"
|
||||
fi
|
||||
}
|
||||
|
||||
#================================
|
||||
# 显示帮助信息
|
||||
#================================
|
||||
show_help() {
|
||||
echo "MLNX_OFED驱动管理脚本"
|
||||
echo "用法: $0 [选项] [命令]"
|
||||
echo ""
|
||||
echo "命令:"
|
||||
echo " --install 安装驱动"
|
||||
echo " --uninstall 卸载驱动"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " --version <版本号> 指定驱动版本 (必需)"
|
||||
echo " --distro <系统> 指定系统版本 (支持: ${SUPPORTED_DISTROS[*]}, 必需)"
|
||||
echo " --force 强制操作"
|
||||
echo " --help 显示此帮助信息"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 --install --distro ubuntu24.04 --version 5.9-1.0.8.0"
|
||||
echo " $0 --uninstall --distro ubuntu22.04 --version 5.8-3.0.7.0"
|
||||
}
|
||||
|
||||
#================================
|
||||
# 主函数
|
||||
#================================
|
||||
main() {
|
||||
log "TITLE" "${BLUE}MLNX驱动管理脚本启动${NC}"
|
||||
log "TITLE" "${BLUE}MLNX驱动管理脚本启动========================================================${NC}"
|
||||
|
||||
# 检查root权限
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
error "此脚本需要root权限运行"
|
||||
fi
|
||||
|
||||
# 生成包信息
|
||||
generate_package_info
|
||||
|
||||
# 解析参数
|
||||
if [[ $# -lt 1 ]]; then
|
||||
log "ERROR" "请指定操作: --install 或 --uninstall"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
COMMAND=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
"--install")
|
||||
download_driver
|
||||
install_driver
|
||||
configure_naming_rules
|
||||
check_installation
|
||||
COMMAND="install"
|
||||
;;
|
||||
"--uninstall")
|
||||
uninstall_driver
|
||||
check_uninstallation
|
||||
COMMAND="uninstall"
|
||||
;;
|
||||
"--version")
|
||||
if [[ -n "$2" ]]; then
|
||||
APP_VERSION="$2"
|
||||
generate_package_info
|
||||
log "INFO" "设置驱动版本: $APP_VERSION"
|
||||
shift 2
|
||||
main "$@"
|
||||
shift 1
|
||||
else
|
||||
error "请指定版本号,如: --version 5.8-6.0.4.2"
|
||||
error "请指定版本号,如: --version 5.8-3.0.7.0"
|
||||
fi
|
||||
;;
|
||||
"--distro")
|
||||
if [[ -n "$2" ]]; then
|
||||
DISTRO="$2"
|
||||
log "INFO" "设置系统版本: $DISTRO"
|
||||
shift 1
|
||||
else
|
||||
error "请指定系统版本,如: --distro ubuntu22.04"
|
||||
fi
|
||||
;;
|
||||
"--force")
|
||||
FORCE=1
|
||||
shift 1
|
||||
main "$@"
|
||||
;;
|
||||
"--help")
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
error "未知参数: $1"
|
||||
;;
|
||||
esac
|
||||
shift 1
|
||||
done
|
||||
|
||||
log "TITLE" "${BLUE}操作完成!${NC}"
|
||||
# 检查是否指定了命令
|
||||
if [[ -z "$COMMAND" ]]; then
|
||||
error "请指定操作: --install 或 --uninstall"
|
||||
fi
|
||||
|
||||
# 生成包信息(会检查必需参数)
|
||||
generate_package_info
|
||||
|
||||
# 执行命令
|
||||
case "$COMMAND" in
|
||||
"install")
|
||||
download_driver
|
||||
install_driver
|
||||
configure_naming_rules
|
||||
check_installation
|
||||
;;
|
||||
"uninstall")
|
||||
uninstall_driver
|
||||
check_uninstallation
|
||||
;;
|
||||
*)
|
||||
error "未知命令: $COMMAND"
|
||||
;;
|
||||
esac
|
||||
|
||||
log "TITLE" "========================================================================${NC}"
|
||||
}
|
||||
|
||||
# 执行主函数
|
||||
main "$@"
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue