配置高可用负载均衡器监控需要以下几个关键组件: - 负载均衡器(如Nginx、HAProxy等) - 高可用集群(如Keepalived、Pacemaker等) - 监控系统(如Prometheus、Zabbix等) - 告警系统(如Alertmanager、Grafana等)
http {
upstream backend {
server backend1.example.com;
server backend2.example.com;
# 添加更多后端服务器
# 负载均衡算法
least_conn; # 最少连接算法
}
server {
listen 80;
location / {
proxy_pass http://backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
# 添加状态监控页面
location /nginx_status {
stub_status on;
access_log off;
allow 127.0.0.1;
allow 192.168.1.0/24;
deny all;
}
}
}
global
log /dev/log local0
log /dev/log local1 notice
chroot /var/lib/haproxy
stats socket /run/haproxy/admin.sock mode 660 level admin
stats timeout 30s
user haproxy
group haproxy
daemon
defaults
log global
mode http
option httplog
option dontlognull
timeout connect 5000
timeout client 50000
timeout server 50000
frontend http_front
bind *:80
stats uri /haproxy?stats
default_backend http_back
backend http_back
balance roundrobin
server server1 192.168.1.10:80 check
server server2 192.168.1.11:80 check
listen stats
bind *:9000
stats enable
stats uri /stats
stats realm Haproxy\ Statistics
stats auth admin:password
vrrp_script chk_haproxy {
script "killall -0 haproxy" # 检查haproxy进程是否存在
interval 2 # 每2秒检查一次
weight 2 # 如果检查失败,权重减少2
}
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 101 # 主节点优先级更高
advert_int 1
virtual_ipaddress {
192.168.1.100/24 dev eth0
}
track_script {
chk_haproxy
}
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51
priority 100 # 备用节点优先级较低
advert_int 1
virtual_ipaddress {
192.168.1.100/24 dev eth0
}
track_script {
chk_haproxy
}
}
wget https://github.com/prometheus/prometheus/releases/download/v2.30.3/prometheus-2.30.3.linux-amd64.tar.gz
tar xvfz prometheus-*.tar.gz
cd prometheus-*
编辑prometheus.yml
:
scrape_configs:
- job_name: 'haproxy'
static_configs:
- targets: ['192.168.1.100:9000'] # HAProxy统计页面
metrics_path: '/metrics'
- job_name: 'nginx'
static_configs:
- targets: ['192.168.1.100:80'] # Nginx状态页面
metrics_path: '/nginx_status'
- job_name: 'keepalived'
static_configs:
- targets: ['192.168.1.101:9100', '192.168.1.102:9100'] # 节点导出器
wget https://github.com/prometheus/node_exporter/releases/download/v1.2.2/node_exporter-1.2.2.linux-amd64.tar.gz
tar xvfz node_exporter-*.tar.gz
cd node_exporter-*
./node_exporter &
创建/etc/prometheus/alert.rules.yml
:
groups:
- name: haproxy.rules
rules:
- alert: HaproxyDown
expr: up{job="haproxy"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Haproxy down (instance {{ $labels.instance }})"
description: "Haproxy has been down for more than 1 minute"
- alert: BackendDown
expr: haproxy_server_up == 0
for: 2m
labels:
severity: warning
annotations:
summary: "Backend server down (instance {{ $labels.instance }})"
description: "Backend server {{ $labels.server }} has been down for more than 2 minutes"
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
receiver: 'team-email'
receivers:
- name: 'team-email'
email_configs:
- to: 'admin@example.com'
from: 'alertmanager@example.com'
smarthost: 'smtp.example.com:587'
auth_username: 'alertmanager'
auth_password: 'password'
send_resolved: true
sudo apt-get install -y adduser libfontconfig1
wget https://dl.grafana.com/oss/release/grafana_8.1.5_amd64.deb
sudo dpkg -i grafana_8.1.5_amd64.deb
sudo systemctl start grafana-server
对于更全面的监控,可以配置ELK(Elasticsearch, Logstash, Kibana)来收集和分析负载均衡器日志。
创建定期检查脚本/usr/local/bin/check_lb.sh
:
#!/bin/bash
# 检查VIP是否在当前节点
if ip addr show | grep -q "192.168.1.100"; then
echo "VIP is on this node"
# 检查服务状态
if ! systemctl is-active --quiet haproxy; then
echo "HAProxy is not running, attempting to restart"
systemctl restart haproxy
fi
else
echo "VIP is not on this node"
fi
# 检查后端服务器健康状态
for backend in $(haproxy -c -f /etc/haproxy/haproxy.cfg | grep "server" | awk '{print $2}'); do
if ! nc -z $backend 80; then
echo "Backend $backend is down"
# 可以添加自动从负载均衡池中移除的逻辑
fi
done
设置cron任务定期执行:
crontab -e
# 添加以下行
*/5 * * * * /usr/local/bin/check_lb.sh >> /var/log/lb_check.log 2>&1
限制监控端口的访问:
iptables -A INPUT -p tcp --dport 9100 -s 192.168.1.0/24 -j ACCEPT
iptables -A INPUT -p tcp --dport 9100 -j DROP
使用HTTPS和认证保护监控界面
定期更新所有组件到最新安全版本
测试故障转移:
# 在主节点上停止HAProxy
systemctl stop haproxy
# 验证VIP是否转移到备用节点
测试监控告警:
# 停止一个后端服务器
systemctl stop nginx
# 检查Prometheus和Alertmanager是否触发告警
测试负载均衡:
ab -n 1000 -c 100 http://192.168.1.100/
# 检查Grafana仪表板上的负载分布
通过以上配置,您将拥有一个高可用的负载均衡器系统,并具备全面的监控和告警能力。