#!/bin/bash set -euo pipefail # 全局变量 DEFAULT_VERSION="565.57.01" INTERNAL_BASE_URL="http://10.101.0.51:5588/nvidia-linux" #OFFICIAL_BASE_URL="https://cn.download.nvidia.com/tesla/${VERSION}" PACKAGE_TEMPLATE="NVIDIA-Linux-x86_64-%s.run" INSTALL_DIR="/opt" LOG_FILE="/var/log/nvidia-driver_$(date +%Y%m%d%H%M%S).log" SERVICE_FILE="/etc/systemd/system/nvidia_peermem.service" PERSISTENCE_SERVICE="/etc/systemd/system/nvidia-persistenced.service" # 颜色定义 GREEN='\033[1;32m' RED='\033[1;31m' NC='\033[0m' # 重置颜色 # 日志函数 - 记录所有操作到日志 log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$LOG_FILE" } # 步骤提示 - 绿色输出到控制台并记录日志 step() { local msg="==> $1" echo -e "${GREEN}$msg${NC}" log "$msg" } # 错误提示 - 红色输出到控制台并记录日志 error() { local msg="错误: $1" echo -e "${RED}$msg${NC}" log "$msg" exit 1 } # 执行命令并屏蔽输出,仅记录关键信息 run_cmd() { step "执行: $1" eval "$1" &>> "$LOG_FILE" || { error "命令执行失败: $1" } } # 参数解析 ACTION="" VERSION="$DEFAULT_VERSION" while [[ $# -gt 0 ]]; do case "$1" in --install) ACTION="install"; shift ;; --uninstall) ACTION="uninstall"; shift ;; --version) VERSION="$2"; shift 2 ;; *) error "未知参数 $1" ;; esac done OFFICIAL_BASE_URL="https://cn.download.nvidia.com/tesla/${VERSION}" [[ -z "$ACTION" ]] && { error "必须指定 --install 或 --uninstall"; } # peermem_service 开机启动函数 install_peermem_service() { step "开始配置 nvidia_peermem 开机启动" # 创建服务文件 cat > "$SERVICE_FILE" < "$PERSISTENCE_SERVICE" <> "$LOG_FILE" 2>&1 || { log "版本验证失败"; exit 1 } install_peermem_service # 开机启动加载:nvidia_peermem install_persistence_service # 开机启动 GPU 持久模式 step "安装完成" } # 显卡驱动卸载函数 uninstall_driver() { step "开始卸载显卡驱动,版本:$VERSION" PACKAGE_NAME=$(printf "$PACKAGE_TEMPLATE" "$VERSION") PACKAGE_PATH="${INSTALL_DIR}/${PACKAGE_NAME}" # 检查卸载脚本 if [[ -f "$PACKAGE_PATH" ]]; then step "找到安装包,使用安装包卸载" cd "$INSTALL_DIR" run_cmd "chmod +x $PACKAGE_NAME" if ! ./"$PACKAGE_NAME" --uninstall -q -s &>/dev/null; then step "卸载失败,请手动卸载驱动程序" return fi else step "未找到本地安装包,尝试下载卸载包" download_package "$PACKAGE_NAME" "$PACKAGE_PATH" if [[ -f "$PACKAGE_PATH" ]]; then cd "$INSTALL_DIR" run_cmd "chmod +x $PACKAGE_NAME" if ! ./"$PACKAGE_NAME" --uninstall -q -s &>/dev/null; then step "卸载失败,请手动卸载驱动程序" return fi else step "无法找到或下载卸载包,请手动卸载驱动程序" return fi fi uninstall_peermem_service # 移除 peermem 服务 uninstall_persistence_service # 移除 persistenced 服务 step "卸载完成" } # 包下载函数 download_package() { local package_name="$1" local package_path="$2" local download_urls=( "${INTERNAL_BASE_URL}/${package_name}" "${OFFICIAL_BASE_URL}/${package_name}" ) for url in "${download_urls[@]}"; do step "尝试从 $url 下载" wget -qO "$package_path" "$url" && return 0 step "下载失败,尝试下一个 URL" done error "无法从任何来源下载 $package_name" } # 根据动作调用对应函数 case "$ACTION" in install) install_driver ;; uninstall) uninstall_driver ;; esac