高可用(HA)Kubernetes集群通常包含以下关键组件: - 多个Master节点(通常3个或5个) - 多个Worker节点 - 负载均衡器(用于API Server访问) - 高可用的etcd集群 - 高可用的控制平面组件
# 禁用swap
sudo swapoff -a
sudo sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
# 设置主机名(在不同节点上使用不同名称)
sudo hostnamectl set-hostname master1 # 在master1上
sudo hostnamectl set-hostname master2 # 在master2上
# ...
# 添加hosts解析
sudo tee -a /etc/hosts <<EOF
192.168.1.101 master1
192.168.1.102 master2
192.168.1.103 master3
192.168.1.201 worker1
192.168.1.202 worker2
EOF
# 加载内核模块
sudo tee /etc/modules-load.d/k8s.conf <<EOF
br_netfilter
EOF
sudo tee /etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
sudo sysctl --system
# 安装containerd
sudo apt-get update && sudo apt-get install -y containerd # Ubuntu
sudo yum install -y containerd # CentOS/RHEL
# 配置containerd
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml
sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml
sudo systemctl restart containerd
sudo systemctl enable containerd
sudo apt-get update && sudo apt-get install -y apt-transport-https curl # Ubuntu
sudo yum install -y yum-utils # CentOS/RHEL
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - # Ubuntu
cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo # CentOS/RHEL
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-\$basearch
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
EOF
# Ubuntu
sudo apt-add-repository "deb https://apt.kubernetes.io/ kubernetes-xenial main"
sudo apt-get update
sudo apt-get install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl
# CentOS/RHEL
sudo yum install -y kubelet kubeadm kubectl
sudo systemctl enable --now kubelet
# 在第一台master节点上初始化集群
sudo kubeadm init --control-plane-endpoint "LOAD_BALANCER_DNS:LOAD_BALANCER_PORT" \
--upload-certs \
--pod-network-cidr=10.244.0.0/16
# 在其他master节点上加入集群
sudo kubeadm join LOAD_BALANCER_DNS:LOAD_BALANCER_PORT --token <token> \
--discovery-token-ca-cert-hash sha256:<hash> \
--control-plane --certificate-key <key>
# 下载etcd
ETCD_VER=v3.5.0
wget https://github.com/etcd-io/etcd/releases/download/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz
tar xvf etcd-${ETCD_VER}-linux-amd64.tar.gz
sudo mv etcd-${ETCD_VER}-linux-amd64/etcd* /usr/local/bin/
# 创建配置文件(每台etcd节点不同)
sudo mkdir -p /etc/etcd /var/lib/etcd
sudo chmod 700 /var/lib/etcd
sudo cp ca.pem kubernetes-key.pem kubernetes.pem /etc/etcd/
# 示例systemd服务文件(调整ETCD_NAME和IP地址)
cat <<EOF | sudo tee /etc/systemd/system/etcd.service
[Unit]
Description=etcd
Documentation=https://github.com/coreos
[Service]
ExecStart=/usr/local/bin/etcd \\
--name etcd1 \\
--cert-file=/etc/etcd/kubernetes.pem \\
--key-file=/etc/etcd/kubernetes-key.pem \\
--peer-cert-file=/etc/etcd/kubernetes.pem \\
--peer-key-file=/etc/etcd/kubernetes-key.pem \\
--trusted-ca-file=/etc/etcd/ca.pem \\
--peer-trusted-ca-file=/etc/etcd/ca.pem \\
--peer-client-cert-auth \\
--client-cert-auth \\
--initial-advertise-peer-urls https://192.168.1.101:2380 \\
--listen-peer-urls https://192.168.1.101:2380 \\
--listen-client-urls https://192.168.1.101:2379,https://127.0.0.1:2379 \\
--advertise-client-urls https://192.168.1.101:2379 \\
--initial-cluster-token etcd-cluster-0 \\
--initial-cluster etcd1=https://192.168.1.101:2380,etcd2=https://192.168.1.102:2380,etcd3=https://192.168.1.103:2380 \\
--initial-cluster-state new \\
--data-dir=/var/lib/etcd
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable etcd
sudo systemctl start etcd
可以使用硬件负载均衡器或软件方案如HAProxy、Nginx:
# 安装HAProxy
sudo apt-get install -y haproxy # Ubuntu
sudo yum install -y haproxy # CentOS/RHEL
# 配置/etc/haproxy/haproxy.cfg
frontend kubernetes
bind *:6443
option tcplog
mode tcp
default_backend kubernetes-master-nodes
backend kubernetes-master-nodes
mode tcp
balance roundrobin
option tcp-check
server master1 192.168.1.101:6443 check fall 3 rise 2
server master2 192.168.1.102:6443 check fall 3 rise 2
server master3 192.168.1.103:6443 check fall 3 rise 2
# 重启HAProxy
sudo systemctl restart haproxy
# 使用kubeadm初始化第一个控制平面节点
sudo kubeadm init \
--control-plane-endpoint "LOAD_BALANCER_DNS:6443" \
--upload-certs \
--pod-network-cidr=10.244.0.0/16
# 按照输出提示在其他master节点上运行join命令
# 示例:
sudo kubeadm join LOAD_BALANCER_DNS:6443 \
--token <token> \
--discovery-token-ca-cert-hash sha256:<hash> \
--control-plane \
--certificate-key <key>
# 安装Calico网络插件(支持网络策略)
kubectl apply -f https://docs.projectcalico.org/manifests/calico.yaml
# 或者安装Flannel
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
# 查看节点状态
kubectl get nodes -o wide
# 查看所有pod状态
kubectl get pods --all-namespaces -o wide
# 检查组件状态
kubectl get componentstatuses
# 创建etcd快照
ETCDCTL_API=3 etcdctl --endpoints=https://127.0.0.1:2379 \
--cacert=/etc/etcd/ca.pem \
--cert=/etc/etcd/kubernetes.pem \
--key=/etc/etcd/kubernetes-key.pem \
snapshot save /backup/etcd-snapshot-$(date +%Y-%m-%d).db
通过以上步骤,您可以在Linux上建立一个高可用的Kubernetes集群,能够容忍节点故障并保持服务连续性。