ansible-devops/scripts/node-exporter.sh

178 lines
4.4 KiB
Bash
Raw Normal View History

2025-07-05 15:49:53 +08:00
#!/bin/bash
set -euo pipefail
# 全局变量
LOG_FILE="/var/log/node_exporter_$(date +%Y%m%d%H%M%S).log"
NODE_EXPORTER_VERSION="1.8.2"
PRIMARY_DOWNLOAD_URL="http://10.101.0.51:5588/node-exporter/node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64.tar.gz"
BACKUP_DOWNLOAD_URL="https://github.com/prometheus/node_exporter/releases/download/v${NODE_EXPORTER_VERSION}/node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64.tar.gz"
LOCAL_PACKAGE_PATH="/opt/node_exporter.tar.gz"
# 颜色定义
GREEN='\033[1;32m'
RED='\033[1;31m'
NC='\033[0m' # 重置颜色
# 日志函数 - 记录所有操作到日志
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$LOG_FILE"
}
# 步骤提示 - 绿色输出到控制台并记录日志
step() {
local msg="==> $1"
echo -e "${GREEN}$msg${NC}"
log "$msg"
}
# 错误提示 - 红色输出到控制台并记录日志
error() {
local msg="错误: $1"
echo -e "${RED}$msg${NC}"
log "$msg"
exit 1
}
# 执行命令并屏蔽输出
run_cmd() {
step "执行: $1"
eval "$1" &>> "$LOG_FILE" || {
error "命令执行失败: $1"
}
}
# 测试网络连通性
test_network_connectivity() {
local url=$1
step "测试网络连通性: $url"
if curl -fsSLI --connect-timeout 10 "$url" &>> "$LOG_FILE"; then
return 0
else
return 1
fi
}
# 下载文件
download_file() {
local url=$1
local dest=$2
step "下载文件: $url$dest"
if wget -qO "$dest" "$url" &>> "$LOG_FILE"; then
return 0
else
return 1
fi
}
# 安装 node_exporter
install_node_exporter() {
step "开始安装 node_exporter ${NODE_EXPORTER_VERSION}"
# 切换到 /opt 目录
run_cmd "cd /opt"
# 检查本地是否存在安装包
if [[ -f "$LOCAL_PACKAGE_PATH" ]]; then
step "发现本地安装包: $LOCAL_PACKAGE_PATH"
DOWNLOAD_URL="$LOCAL_PACKAGE_PATH"
else
# 测试主要下载地址的连通性
if test_network_connectivity "$PRIMARY_DOWNLOAD_URL"; then
DOWNLOAD_URL="$PRIMARY_DOWNLOAD_URL"
elif test_network_connectivity "$BACKUP_DOWNLOAD_URL"; then
DOWNLOAD_URL="$BACKUP_DOWNLOAD_URL"
else
error "无法连接到任何下载地址"
fi
# 下载 node_exporter
download_file "$DOWNLOAD_URL" "node_exporter.tar.gz"
fi
# 解压 tar 包
run_cmd "tar -zxvf node_exporter.tar.gz"
# 移动文件夹
run_cmd "mv node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64/ ./node_exporter"
# 进入 node_exporter 目录
run_cmd "cd node_exporter/"
# 创建 bin 目录并移动二进制文件
run_cmd "mkdir bin"
run_cmd "mv node_exporter bin/"
# 配置 systemd 服务
cat > /lib/systemd/system/node_exporter.service <<EOF
[Unit]
Description=node_exporter
Documentation=https://prometheus.io/docs/guides/node-exporter/
After=network.target
[Service]
User=root
Group=root
ExecStart=/opt/node_exporter/bin/node_exporter --web.listen-address=:10086
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
# 重新加载 systemd 配置
run_cmd "systemctl daemon-reload"
# 启用并启动 node_exporter 服务
run_cmd "systemctl enable node_exporter"
run_cmd "systemctl restart node_exporter"
# 检查服务状态
run_cmd "systemctl status node_exporter"
# 验证安装
run_cmd "curl -I http://127.000.1:10086/metrics"
step "node_exporter 安装成功"
}
# 卸载 node_exporter
uninstall_node_exporter() {
step "开始卸载 node_exporter"
# 停止并禁用服务
run_cmd "systemctl stop node_exporter"
run_cmd "systemctl disable node_exporter"
# 删除 systemd 服务文件
run_cmd "rm -f /lib/systemd/system/node_exporter.service"
# 重新加载 systemd 配置
run_cmd "systemctl daemon-reload"
# 删除 node_exporter 目录和文件
run_cmd "rm -rf /opt/node_exporter"
run_cmd "rm -f /opt/node_exporter.tar.gz"
step "node_exporter 卸载完成"
}
# 参数解析
if [[ $# -ne 1 ]]; then
error "请使用 --install 或 --uninstall"
fi
ACTION=$1
case "$ACTION" in
"--install")
install_node_exporter
;;
"--uninstall")
uninstall_node_exporter
;;
*)
error "无效的参数,请使用 --install 或 --uninstall"
;;
esac
step "操作完成,日志路径: $LOG_FILE"