diff --git a/scripts/node-exporter.sh b/scripts/node-exporter.sh index db719bb..a3bd4d7 100644 --- a/scripts/node-exporter.sh +++ b/scripts/node-exporter.sh @@ -1,177 +1,347 @@ #!/bin/bash -set -euo pipefail +#======================================================================== +# Node Exporter 安装/卸载脚本 - 最终优化版 +# 用途:在Linux系统上自动化安装和卸载Prometheus Node Exporter +# 版本:1.3.3 +#======================================================================== -# 全局变量 -LOG_FILE="/var/log/node_exporter_$(date +%Y%m%d%H%M%S).log" -NODE_EXPORTER_VERSION="1.8.2" -PRIMARY_DOWNLOAD_URL="http://10.101.0.51:5588/node-exporter/node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64.tar.gz" -BACKUP_DOWNLOAD_URL="https://github.com/prometheus/node_exporter/releases/download/v${NODE_EXPORTER_VERSION}/node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64.tar.gz" -LOCAL_PACKAGE_PATH="/opt/node_exporter.tar.gz" +# 严格模式 +set -euo pipefail +IFS=$'\n\t' + +#================================ +# 全局配置 +#================================ +LOG_FILE="/var/log/node_exporter_install_$(date +%Y%m%d%H%M%S).log" +APP_NAME="node_exporter" +APP_VERSION="1.8.2" +APP_USER="node_exporter" +APP_DIR="/opt/${APP_NAME}" +BINARY_DIR="${APP_DIR}/bin" +SYSTEMD_SERVICE_FILE="/lib/systemd/system/${APP_NAME}.service" +WEB_LISTEN_ADDRESS=":10086" # 颜色定义 -GREEN='\033[1;32m' -RED='\033[1;31m' -NC='\033[0m' # 重置颜色 +GREEN='\033[1;32m' # 绿色 - 成功 +RED='\033[1;31m' # 红色 - 失败/错误 +BLUE='\033[1;34m' # 蓝色 - 标题/信息 +NC='\033[0m' # 重置颜色 -# 日志函数 - 记录所有操作到日志 +#================================ +# 日志函数 +#================================ log() { - echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$LOG_FILE" + local level="$1" + local message="$2" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + # 颜色输出 - 仅SUCCESS和ERROR使用特殊颜色 + case "$level" in + "SUCCESS") echo -e "${GREEN}[${level}]${NC} $message" ;; + "ERROR") echo -e "${RED}[${level}]${NC} $message" ;; + *) echo -e "${BLUE}[${level}]${NC} $message" ;; + esac + + # 记录到日志文件(不包含颜色) + echo "[$timestamp] [$level] $message" >> "$LOG_FILE" } -# 步骤提示 - 绿色输出到控制台并记录日志 -step() { - local msg="==> $1" - echo -e "${GREEN}$msg${NC}" - log "$msg" -} - -# 错误提示 - 红色输出到控制台并记录日志 +#================================ +# 错误处理 +#================================ error() { - local msg="错误: $1" - echo -e "${RED}$msg${NC}" - log "$msg" + local message="$1" + log "ERROR" "$message" + log "ERROR" "详细日志请查看: $LOG_FILE" exit 1 } -# 执行命令并屏蔽输出 +#================================ +# 执行命令 +#================================ run_cmd() { - step "执行: $1" - eval "$1" &>> "$LOG_FILE" || { - error "命令执行失败: $1" + local command="$1" + local description="${2:-"执行命令"}" + log "INFO" "$description: $command" + + # 执行命令并捕获输出 + local output + output=$(eval "$command" 2>&1) || { + log "ERROR" "命令执行失败: $command" + log "ERROR" "错误详情: $output" + return 1 } + + return 0 } -# 测试网络连通性 -test_network_connectivity() { - local url=$1 - step "测试网络连通性: $url" - if curl -fsSLI --connect-timeout 10 "$url" &>> "$LOG_FILE"; then - return 0 - else +#================================ +# 测试网络连接(极简日志版) +#================================ +test_network() { + local url="$1" + log "INFO" "测试网络连接: $url" + + # 直接测试URL连通性 + if ! curl -fsSLI --connect-timeout 5 "$url" &>> "$LOG_FILE"; then return 1 fi + + return 0 } +#================================ # 下载文件 +#================================ download_file() { - local url=$1 - local dest=$2 - step "下载文件: $url 到 $dest" - if wget -qO "$dest" "$url" &>> "$LOG_FILE"; then - return 0 - else - return 1 + local url="$1" + local dest="$2" + log "INFO" "开始下载: $url" + + # 使用wget下载,显示进度条 + if ! wget -q --show-progress -O "$dest" "$url" &>> "$LOG_FILE"; then + error "下载失败: $url" fi + + log "INFO" "下载完成: $dest" } -# 安装 node_exporter +#================================ +# 安装 Node Exporter +#================================ install_node_exporter() { - step "开始安装 node_exporter ${NODE_EXPORTER_VERSION}" + log "TITLE" "${BLUE}开始安装 Node Exporter ${APP_VERSION}${NC}" - # 切换到 /opt 目录 - run_cmd "cd /opt" - - # 检查本地是否存在安装包 - if [[ -f "$LOCAL_PACKAGE_PATH" ]]; then - step "发现本地安装包: $LOCAL_PACKAGE_PATH" - DOWNLOAD_URL="$LOCAL_PACKAGE_PATH" - else - # 测试主要下载地址的连通性 - if test_network_connectivity "$PRIMARY_DOWNLOAD_URL"; then - DOWNLOAD_URL="$PRIMARY_DOWNLOAD_URL" - elif test_network_connectivity "$BACKUP_DOWNLOAD_URL"; then - DOWNLOAD_URL="$BACKUP_DOWNLOAD_URL" - else - error "无法连接到任何下载地址" + # 检查基本网络工具 + log "INFO" "检查必备工具..." + for tool in wget curl tar; do + if ! command -v "$tool" &>> "$LOG_FILE"; then + log "ERROR" "未找到必备工具: $tool" + log "ERROR" "请先安装: yum install -y $tool 或 apt-get install -y $tool" + exit 1 fi - - # 下载 node_exporter - download_file "$DOWNLOAD_URL" "node_exporter.tar.gz" + done + + # 创建目录 + run_cmd "mkdir -p $BINARY_DIR" "创建目录结构" + + # 定义下载源列表 (按优先级排序) + DOWNLOAD_SOURCES=( + "/opt/${APP_NAME}-${APP_VERSION}.linux-amd64.tar.gz" # 本地文件 + "http://10.101.0.51:5588/${APP_NAME}/${APP_NAME}-${APP_VERSION}.linux-amd64.tar.gz" # 内部源1 + "http://10.102.32.207:5588/${APP_NAME}/${APP_NAME}-${APP_VERSION}.linux-amd64.tar.gz" # 内部源2 + "https://github.com/prometheus/${APP_NAME}/releases/download/v${APP_VERSION}/${APP_NAME}-${APP_VERSION}.linux-amd64.tar.gz" # 公共源 + "https://mirrors.tuna.tsinghua.edu.cn/github-release/prometheus/node_exporter/Node-${APP_VERSION}/${APP_NAME}-${APP_VERSION}.linux-amd64.tar.gz" # 清华镜像 + ) + + # 查找可用的下载源 + DOWNLOAD_URL="" + log "INFO" "开始查找可用的下载源..." + + for source in "${DOWNLOAD_SOURCES[@]}"; do + if [[ "$source" == /* ]]; then + # 本地文件检查 + log "INFO" "检查本地文件: $source" + if [[ -f "$source" ]]; then + DOWNLOAD_URL="$source" + log "INFO" "${BLUE}找到本地文件: $DOWNLOAD_URL${NC}" + break + else + log "WARN" "本地文件不存在: $source" + fi + else + # 网络URL检查(仅保留必要日志) + if ! test_network "$source"; then + log "WARN" "网络源不可用: $source" + continue # 直接跳过不可用源 + fi + + DOWNLOAD_URL="$source" + log "INFO" "${BLUE}找到可用下载源: $DOWNLOAD_URL${NC}" + break + fi + done + + # 检查是否找到可用源 + if [[ -z "$DOWNLOAD_URL" ]]; then + log "ERROR" "无法找到可用的下载源" + log "ERROR" "请检查网络连接或手动下载安装包到/opt目录" + log "ERROR" "下载地址: https://github.com/prometheus/node_exporter/releases/download/v${APP_VERSION}/${APP_NAME}-${APP_VERSION}.linux-amd64.tar.gz" + exit 1 fi - # 解压 tar 包 - run_cmd "tar -zxvf node_exporter.tar.gz" + # 下载文件 + TEMP_FILE="/tmp/${APP_NAME}.tar.gz" + log "INFO" "准备获取安装包..." - # 移动文件夹 - run_cmd "mv node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64/ ./node_exporter" + if [[ "$DOWNLOAD_URL" == /* ]]; then + # 使用本地文件 + log "INFO" "使用本地文件: $DOWNLOAD_URL" + run_cmd "cp $DOWNLOAD_URL $TEMP_FILE" "复制本地文件到临时目录" + else + # 从网络下载 + log "INFO" "从网络下载: $DOWNLOAD_URL" + download_file "$DOWNLOAD_URL" "$TEMP_FILE" + fi - # 进入 node_exporter 目录 - run_cmd "cd node_exporter/" + # 验证文件完整性 + log "INFO" "验证下载文件的完整性..." + file_size=$(stat -c%s "$TEMP_FILE") - # 创建 bin 目录并移动二进制文件 - run_cmd "mkdir bin" - run_cmd "mv node_exporter bin/" + if [[ $file_size -lt 1048576 ]]; then # 检查文件大小是否小于1MB + log "ERROR" "下载的文件大小异常: $file_size 字节" + log "ERROR" "请检查网络连接或下载源的可用性" + exit 1 + fi - # 配置 systemd 服务 - cat > /lib/systemd/system/node_exporter.service <> "$LOG_FILE"; then + run_cmd "useradd --system --no-create-home --shell /bin/false $APP_USER" "创建系统用户" + fi + + # 设置权限 + run_cmd "chown -R $APP_USER:$APP_USER $APP_DIR" "设置文件权限" + + # 创建systemd服务 + log "INFO" "配置systemd服务" + cat > "$SYSTEMD_SERVICE_FILE" <> "$LOG_FILE"; then + log "ERROR" "指标端点不可访问" + log "ERROR" "请检查服务是否正常运行或端口是否被占用" + exit 1 + fi + + # 获取服务器IP(优先使用第二个内部IP) + SERVER_IP=$(hostname -I | awk '{print $2}') + if [[ -z "$SERVER_IP" ]]; then + SERVER_IP=$(hostname -I | awk '{print $1}') # 备用获取第一个IP + fi + + # 美化结果展示(全蓝色,仅结果用红绿) + log "TITLE" "${BLUE}========================================" + log "INFO" "服务名称:Node Exporter" + log "SUCCESS" "安装结果:成功" + log "INFO" "访问地址:http://${SERVER_IP}:10086/metrics" + log "INFO" "日志位置:$LOG_FILE" + log "TITLE" "${BLUE}========================================" } -# 卸载 node_exporter +#================================ +# 卸载 Node Exporter +#================================ uninstall_node_exporter() { - step "开始卸载 node_exporter" + log "TITLE" "${BLUE}开始卸载 Node Exporter${NC}" - # 停止并禁用服务 - run_cmd "systemctl stop node_exporter" - run_cmd "systemctl disable node_exporter" + # 停止服务 + if systemctl is-active --quiet ${APP_NAME}.service; then + run_cmd "systemctl stop ${APP_NAME}.service" "停止服务" + fi - # 删除 systemd 服务文件 - run_cmd "rm -f /lib/systemd/system/node_exporter.service" + # 禁用服务 + if systemctl is-enabled --quiet ${APP_NAME}.service; then + run_cmd "systemctl disable ${APP_NAME}.service" "禁用服务" + fi - # 重新加载 systemd 配置 - run_cmd "systemctl daemon-reload" + # 删除服务文件 + if [[ -f "$SYSTEMD_SERVICE_FILE" ]]; then + run_cmd "rm -f $SYSTEMD_SERVICE_FILE" "删除服务文件" + fi - # 删除 node_exporter 目录和文件 - run_cmd "rm -rf /opt/node_exporter" - run_cmd "rm -f /opt/node_exporter.tar.gz" + # 重新加载systemd + run_cmd "systemctl daemon-reload" "重新加载systemd配置" - step "node_exporter 卸载完成" + # 删除文件 + if [[ -d "$APP_DIR" ]]; then + run_cmd "rm -rf $APP_DIR" "删除应用目录" + fi + + # 可选:删除用户 + log "SUCCESS" "${GREEN}Node Exporter 卸载完成!${NC}" + log "INFO" "注意: 系统用户 '$APP_USER' 未被删除,如需删除请手动执行: userdel $APP_USER" + log "INFO" "日志文件位置: $LOG_FILE" } -# 参数解析 -if [[ $# -ne 1 ]]; then - error "请使用 --install 或 --uninstall" -fi +#================================ +# 显示帮助 +#================================ +show_help() { + echo "用法: $0 [选项]" + echo "选项:" + echo " --install 安装 Node Exporter" + echo " --uninstall 卸载 Node Exporter" + echo " --help 显示此帮助信息" +} -ACTION=$1 -case "$ACTION" in - "--install") - install_node_exporter - ;; - "--uninstall") - uninstall_node_exporter - ;; - *) - error "无效的参数,请使用 --install 或 --uninstall" - ;; -esac +#================================ +# 主函数 +#================================ +main() { + log "TITLE" "${BLUE}Node Exporter 管理脚本启动${NC}" + + # 检查root权限 + if [[ $EUID -ne 0 ]]; then + error "此脚本需要root权限运行" + fi + + # 解析参数 + if [[ $# -ne 1 ]]; then + show_help + exit 1 + fi + + case "$1" in + "--install") + install_node_exporter + ;; + "--uninstall") + uninstall_node_exporter + ;; + "--help") + show_help + exit 0 + ;; + *) + error "未知参数: $1" + ;; + esac +} + +# 执行主函数 +main "$@" -step "操作完成,日志路径: $LOG_FILE"