安装Kubernetes集群
# 使用kubeadm快速部署(以Ubuntu为例)
sudo apt-get update && sudo apt-get install -y apt-transport-https curl
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | sudo tee -a /etc/apt/sources.list.d/kubernetes.list
sudo apt-get update
sudo apt-get install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl
# 初始化主节点
sudo kubeadm init --pod-network-cidr=10.244.0.0/16
部署数据处理平台
# 使用Helm部署Spark
helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator
helm install my-spark spark-operator/spark-operator --namespace spark-operator --create-namespace
version: '3'
services:
spark-master:
image: bitnami/spark:latest
ports:
- "8080:8080"
- "7077:7077"
environment:
- SPARK_MODE=master
spark-worker:
image: bitnami/spark:latest
depends_on:
- spark-master
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
deploy:
replicas: 3
资源分配:
# 为Spark设置资源限制示例
spark-submit --master k8s://https://<k8s-apiserver>:6443 \
--deploy-mode cluster \
--name spark-pi \
--class org.apache.spark.examples.SparkPi \
--conf spark.executor.instances=5 \
--conf spark.executor.memory=4G \
--conf spark.executor.cores=2 \
--conf spark.driver.memory=2G \
local:///opt/spark/examples/jars/spark-examples_2.12-3.1.1.jar
持久化存储:
# 创建持久卷(PV)和持久卷声明(PVC)
kubectl apply -f - <<EOF
apiVersion: v1
kind: PersistentVolume
metadata:
name: spark-data-pv
spec:
capacity:
storage: 100Gi
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
storageClassName: manual
hostPath:
path: "/mnt/data"
EOF
部署监控系统:
# 使用Helm部署Prometheus和Grafana
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm install prometheus prometheus-community/prometheus
helm install grafana grafana/grafana
日志收集:
# 部署Fluentd日志收集器
kubectl apply -f https://raw.githubusercontent.com/fluent/fluentd-kubernetes-daemonset/master/fluentd-daemonset-elasticsearch.yaml
网络优化:
存储优化:
调度优化:
# 设置节点亲和性和反亲和性规则
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: accelerator
operator: In
values:
- gpu
最小权限原则:
# 创建专用服务账户
kubectl create serviceaccount spark
kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=default:spark
网络策略:
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: spark-network-policy
spec:
podSelector:
matchLabels:
app: spark
policyTypes:
- Ingress
- Egress
ingress:
- from:
- podSelector:
matchLabels:
app: spark
egress:
- to:
- podSelector:
matchLabels:
app: spark
官方文档:
性能调优指南:
安全最佳实践: