1. 引言
众所周知Prometheus已成为众多监控方案的首选,本文呢,只是记录个人在公网环境及非k8s及中安装Prometheus使用的一些心得,如果你想系统性的学习Prometheus监控体系,我十分推荐这几篇博客文章,这是个人在阅读众多博客文章后,觉得质量最高的几篇,强力安利!!《Prometheus操作指南》《PromQL教程》《30天精通Prometheus》,如果你已有Prometheus监控体系的基础,想在k8s中使用Prometheus,建议直接学习《Prometheus Operator》
2. 安装Prometheus
2.1 规划安装目录Prometheus账号
# 存放Prometheus时序数据目录
mkdir -p /opt/prometheus/prometheus_service/data
# 存放Prometheus配置文件目录
mkdir -p /opt/prometheus/prometheus_service/conf
# 存放Prometheus二进制程序目录
mkdir -p /opt/prometheus/prometheus_service/bin2.2 创建Prometheus系统账号
# 创建group组,-g指定gid,可不指定,看个人习惯
groupadd -g 1111 prometheus
# 创建prometheus用户, -g指定所属group组,-u指定uid,-s指定user使用的shell,-d指定user的home目录
useradd -g 1111 -u 1111 -s /usr/sbin/nologin -d /opt/prometheus/prometheus_service prometheus2.3 下载Prometheus
下载地址:https://github.com/prometheus/prometheus/releases
cd /opt/prometheus/prometheus_service
# 版本和架构自行选择
wget https://github.com/prometheus/prometheus/releases/download/v3.9.1/prometheus-3.9.1.linux-amd64.tar.gz
# 解压tar包
tar -zxvf prometheus-3.9.1.linux-amd64.tar.gz --strip-components=1
# 规划至指定目录
mv ./prometheus ./promtool ./bin
mv ./prometheus.yml ./conf
# 删除无用文件
rm LICENSE NOTICE prometheus-3.9.1.linux-amd64.tar.gz2.4 使用systemd管理,创建service文件
cat > /etc/systemd/system/prometheus_service.service <<'EOF'
[Unit]
Description=Prometheus Service
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Restart=on-failure
WorkingDirectory=/opt/prometheus/prometheus_service/
ExecStart=/opt/prometheus/prometheus_service/bin/prometheus \
# 指定启动配置文件路径
--config.file=/opt/prometheus/prometheus_service/conf/prometheus.yml \
# 只允许本地监听访问
--web.listen-address=127.0.0.1:9090 \
# 设置子路径前缀
--web.route-prefix=/prometheus \
# 生成页面、跳转 、资源时使用的根地址
--web.external-url=http://127.0.0.1:9090/prometheus \
# 允许通过http接口,远程控制prometheus的生命周期操作,例如修改prometheus.yml文件后,通过http://127.0.0.1:9090/prometheus/-/reload热加载配置文件,无需重启prometheus服务
--web.enable-lifecycle
ExecReload=/bin/kill -HUP $MAINPID
LimitNOFILE=65535
User=prometheus
Group=prometheus
[Install]
WantedBy=multi-user.target
EOF2.5 修改prometheus.yml配置
# 因为上面设置了--web.route-prefix=/prometheus,需修改收集metrics指标路径,为了,精简,官方默认注释已删除
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: "prometheus"
metrics_path: '/prometheus/metrics'
static_configs:
- targets: ["localhost:9090"]
labels:
app: "prometheus"2.6 修改prometheus_service目录所属用户和组
chown -R prometheus:prometheus /opt/prometheus/prometheus_service2.7 启动验证Prometheus
systemctl daemon-reload
systemctl enable --now prometheus_service.service
systemctl status prometheus_service.service
curl http://127.0.0.1:9090/prometheus/metrics3. Nginx反向代理Prometheus
因--web.listen-address=127.0.0.1:9090设置只监听本地,公网环境下无法访问,需要安装Nginx进行反向代理,同时,注意开启防火墙设置,个人一般只放行80、443和ssh端口。
还有就是需自备域名和证书,提前准备做好域名的 DNS 解析,如果你没有域名,可以使用 acme.sh 申请 Let's Encrypt 的 IP 证书,如果你没有公网IP,也可使用自签证书,自签证书教程可参考《OpenSSL 自签证书链申请》
Nginx官方包安装教程:https://nginx.org/en/linux_packages.html
3.1 使用htpasswd生成auth_basic的密码文件
# -B指定使用bcrypt加密算法 -C指定bcrypt加密的计算强度 -c指定创建密码文件
htpasswd -B -C 12 -c /opt/nginx/prometheus_htpasswd prometheus
# 修改密码文件权限
chmod 640 /opt/nginx/prometheus_htpasswd
chown root:nginx /opt/nginx/prometheus_htpasswd3.2 配置Nginx
3.2.1 删除默认的default.conf
rm /etc/nginx/conf.d/default.conf3.2.2 配置反向代理
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log notice;
pid /var/run/nginx.pid;
events {
worker_connections 65536;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
server_tokens off;
log_format main '$remote_addr - $remote_user [$time_local] "$request_method $request_uri $server_protocol" '
'$status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for"';
sendfile on;
tcp_nopush on;
gzip on;
keepalive_timeout 65;
# 压缩比由低到高从1到9,默认为1,值越高,cpu压力越大
gzip_comp_level 5;
# gzip压缩的最小文件,小于设置值的文件将不会压缩
gzip_min_length 1k;
map $proxy_add_x_forwarded_for $new_x_forwarded_for {
'127.0.0.1' $proxy_protocol_addr;
'' $proxy_protocol_addr;
default $proxy_add_x_forwarded_for;
}
# 禁止使用ip访问80端口
server {
listen 80 default_server;
return 403;
}
# 只允许域名是lesslog.com和www.lesslog.com的请求重定向https
server {
listen 80 reuseport;
server_name prometheus.lesslog.com;
return 301 https://$host$request_uri;
}
# 对于使用未知域名和直接使用IP访问443的https请求全部拒绝
server {
listen 443 ssl default_server;
listen 443 quic default_server;
server_name _;
ssl_reject_handshake on; # 触发 TLS 但直接拒绝
}
server {
listen 443 ssl reuseport;
listen 443 quic reuseport; # 开启http3
server_name prometheus.lesslog.com;
http2 on; # 开启http2
access_log /var/log/nginx/access.log main; # 日志路径
ssl_certificate "/opt/nginx/certs/fullchain.cer"; # 证书位置
ssl_certificate_key "/opt/nginx/certs/lesslog.com.key"; # 私钥位置
ssl_protocols TLSv1.3; # 只允许更安全的TLS1.3
ssl_conf_command Ciphersuites TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:TLS_AES_128_GCM_SHA256; # 只
ssl_session_cache shared:SSL:1m;
ssl_session_timeout 10m;
client_header_timeout 5m;
client_max_body_size 1024m;
keepalive_timeout 5m;
error_page 404 /404.html;
error_page 500 502 503 504 /50x.html;
# 禁止访问一些敏感文件
location ~ ^/(\.user.ini|\.htaccess|\.git|\.env|\.svn|\.project|LICENSE|README.md) {
deny all;
}
location ~* \.log$ {
deny all;
}
#
location /prometheus {
auth_basic "Prometheus Access"; # 开启 auth_basic 认证
auth_basic_user_file /opt/nginx/prometheus_htpasswd;
proxy_pass http://127.0.0.1:9090;
proxy_http_version 1.1;
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
proxy_buffer_size 128k;
proxy_buffers 4 256k;
proxy_busy_buffers_size 256k;
proxy_max_temp_file_size 0;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $new_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# 其余请求全部deny,只放行子路径是prometheus的请求
location / {
deny all;
}
}
include /etc/nginx/conf.d/*.conf;
}3.3 启动nginx,使用域名公网访问
root@halocloudsg:/etc/nginx# nginx -t
nginx: the configuration file /etc/nginx/nginx.conf syntax is ok
nginx: configuration file /etc/nginx/nginx.conf test is successful
root@halocloudsg:/etc/nginx# systemctl enable --now nginx
Synchronizing state of nginx.service with SysV service script with /lib/systemd/systemd-sysv-install.
Executing: /lib/systemd/systemd-sysv-install enable nginx