10.1 容器化部署
1. Docker 容器化
Dockerfile 编写
用户服务 Dockerfile
# 使用官方OpenJDK运行时作为基础镜像
FROM openjdk:11-jre-slim
# 设置工作目录
WORKDIR /app
# 创建应用用户(安全最佳实践)
RUN groupadd -r appuser && useradd -r -g appuser appuser
# 复制应用JAR文件
COPY target/user-service-*.jar app.jar
# 创建日志目录
RUN mkdir -p /app/logs && chown -R appuser:appuser /app
# 切换到应用用户
USER appuser
# 暴露端口
EXPOSE 8081
# 设置JVM参数
ENV JAVA_OPTS="-Xms512m -Xmx1024m -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/app/logs/gc.log"
# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8081/actuator/health || exit 1
# 启动应用
ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -jar app.jar"]
订单服务 Dockerfile
FROM openjdk:11-jre-slim
WORKDIR /app
# 安装curl用于健康检查
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# 创建应用用户
RUN groupadd -r appuser && useradd -r -g appuser appuser
# 复制应用文件
COPY target/order-service-*.jar app.jar
COPY docker/entrypoint.sh entrypoint.sh
# 设置权限
RUN chmod +x entrypoint.sh && \
mkdir -p /app/logs && \
chown -R appuser:appuser /app
USER appuser
EXPOSE 8082
# 环境变量
ENV SPRING_PROFILES_ACTIVE=docker
ENV JAVA_OPTS="-Xms512m -Xmx1024m -XX:+UseG1GC"
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8082/actuator/health || exit 1
ENTRYPOINT ["./entrypoint.sh"]
启动脚本 entrypoint.sh
#!/bin/bash
# 等待依赖服务启动
echo "Waiting for dependencies..."
# 等待数据库
while ! nc -z $DB_HOST $DB_PORT; do
echo "Waiting for database at $DB_HOST:$DB_PORT..."
sleep 2
done
# 等待Eureka服务
while ! nc -z $EUREKA_HOST $EUREKA_PORT; do
echo "Waiting for Eureka at $EUREKA_HOST:$EUREKA_PORT..."
sleep 2
done
echo "Dependencies are ready. Starting application..."
# 启动应用
exec java $JAVA_OPTS -jar app.jar
Docker Compose 配置
docker-compose.yml
version: '3.8'
services:
# 数据库服务
mysql:
image: mysql:8.0
container_name: microservices-mysql
environment:
MYSQL_ROOT_PASSWORD: rootpassword
MYSQL_DATABASE: microservices_db
MYSQL_USER: microuser
MYSQL_PASSWORD: micropass
ports:
- "3306:3306"
volumes:
- mysql_data:/var/lib/mysql
- ./docker/mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
networks:
- microservices-network
healthcheck:
test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
timeout: 20s
retries: 10
# Redis服务
redis:
image: redis:6.2-alpine
container_name: microservices-redis
ports:
- "6379:6379"
volumes:
- redis_data:/data
networks:
- microservices-network
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 30s
timeout: 10s
retries: 3
# Eureka服务注册中心
eureka-server:
build:
context: ./eureka-server
dockerfile: Dockerfile
container_name: eureka-server
ports:
- "8761:8761"
environment:
- SPRING_PROFILES_ACTIVE=docker
networks:
- microservices-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8761/actuator/health"]
interval: 30s
timeout: 10s
retries: 5
# API网关
api-gateway:
build:
context: ./api-gateway
dockerfile: Dockerfile
container_name: api-gateway
ports:
- "8080:8080"
environment:
- SPRING_PROFILES_ACTIVE=docker
- EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE=http://eureka-server:8761/eureka/
depends_on:
eureka-server:
condition: service_healthy
networks:
- microservices-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/actuator/health"]
interval: 30s
timeout: 10s
retries: 5
# 用户服务
user-service:
build:
context: ./user-service
dockerfile: Dockerfile
container_name: user-service
environment:
- SPRING_PROFILES_ACTIVE=docker
- SPRING_DATASOURCE_URL=jdbc:mysql://mysql:3306/microservices_db
- SPRING_DATASOURCE_USERNAME=microuser
- SPRING_DATASOURCE_PASSWORD=micropass
- EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE=http://eureka-server:8761/eureka/
- SPRING_REDIS_HOST=redis
depends_on:
mysql:
condition: service_healthy
eureka-server:
condition: service_healthy
redis:
condition: service_healthy
networks:
- microservices-network
deploy:
replicas: 2
resources:
limits:
memory: 1G
cpus: '0.5'
reservations:
memory: 512M
cpus: '0.25'
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8081/actuator/health"]
interval: 30s
timeout: 10s
retries: 5
# 订单服务
order-service:
build:
context: ./order-service
dockerfile: Dockerfile
container_name: order-service
environment:
- SPRING_PROFILES_ACTIVE=docker
- SPRING_DATASOURCE_URL=jdbc:mysql://mysql:3306/microservices_db
- SPRING_DATASOURCE_USERNAME=microuser
- SPRING_DATASOURCE_PASSWORD=micropass
- EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE=http://eureka-server:8761/eureka/
- SPRING_REDIS_HOST=redis
- DB_HOST=mysql
- DB_PORT=3306
- EUREKA_HOST=eureka-server
- EUREKA_PORT=8761
depends_on:
mysql:
condition: service_healthy
eureka-server:
condition: service_healthy
redis:
condition: service_healthy
user-service:
condition: service_healthy
networks:
- microservices-network
deploy:
replicas: 2
resources:
limits:
memory: 1G
cpus: '0.5'
reservations:
memory: 512M
cpus: '0.25'
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8082/actuator/health"]
interval: 30s
timeout: 10s
retries: 5
# 配置中心
config-server:
build:
context: ./config-server
dockerfile: Dockerfile
container_name: config-server
ports:
- "8888:8888"
environment:
- SPRING_PROFILES_ACTIVE=docker
- EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE=http://eureka-server:8761/eureka/
depends_on:
eureka-server:
condition: service_healthy
networks:
- microservices-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8888/actuator/health"]
interval: 30s
timeout: 10s
retries: 5
volumes:
mysql_data:
redis_data:
networks:
microservices-network:
driver: bridge
2. 多阶段构建优化
优化的 Dockerfile
多阶段构建 Dockerfile
# 构建阶段
FROM maven:3.8.4-openjdk-11-slim AS builder
WORKDIR /app
# 复制pom文件并下载依赖(利用Docker缓存)
COPY pom.xml .
RUN mvn dependency:go-offline -B
# 复制源代码并构建
COPY src ./src
RUN mvn clean package -DskipTests -B
# 运行阶段
FROM openjdk:11-jre-slim AS runtime
# 安装必要工具
RUN apt-get update && \
apt-get install -y curl netcat-openbsd && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
# 创建应用用户
RUN groupadd -r appuser && useradd -r -g appuser appuser
WORKDIR /app
# 从构建阶段复制JAR文件
COPY --from=builder /app/target/*.jar app.jar
# 创建必要目录
RUN mkdir -p /app/logs /app/config && \
chown -R appuser:appuser /app
# 复制配置文件
COPY docker/application-docker.yml /app/config/
COPY docker/entrypoint.sh /app/
RUN chmod +x /app/entrypoint.sh
USER appuser
EXPOSE 8080
# 优化的JVM参数
ENV JAVA_OPTS="-Xms256m -Xmx512m \
-XX:+UseG1GC \
-XX:MaxGCPauseMillis=200 \
-XX:+UnlockExperimentalVMOptions \
-XX:+UseCGroupMemoryLimitForHeap \
-XX:+UseContainerSupport \
-Djava.security.egd=file:/dev/./urandom"
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8080/actuator/health || exit 1
ENTRYPOINT ["/app/entrypoint.sh"]
10.2 Kubernetes 部署
1. Kubernetes 资源配置
命名空间配置
namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: microservices
labels:
name: microservices
environment: production
ConfigMap 配置
configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: microservices-config
namespace: microservices
data:
application.yml: |
spring:
profiles:
active: k8s
eureka:
client:
service-url:
defaultZone: http://eureka-server:8761/eureka/
instance:
prefer-ip-address: true
hostname: ${HOSTNAME}
management:
endpoints:
web:
exposure:
include: health,info,metrics,prometheus
endpoint:
health:
show-details: when-authorized
logging:
level:
com.example: INFO
pattern:
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level [%X{traceId},%X{spanId}] %logger{36} - %msg%n"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: mysql-config
namespace: microservices
data:
my.cnf: |
[mysqld]
default-authentication-plugin=mysql_native_password
character-set-server=utf8mb4
collation-server=utf8mb4_unicode_ci
max_connections=200
innodb_buffer_pool_size=256M
innodb_log_file_size=64M
Secret 配置
secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: microservices-secrets
namespace: microservices
type: Opaque
data:
# base64编码的密码
mysql-root-password: cm9vdHBhc3N3b3Jk # rootpassword
mysql-user-password: bWljcm9wYXNz # micropass
jwt-secret: bXlWZXJ5U2VjcmV0S2V5VGhhdElzQXRMZWFzdDI1NkJpdHNMb25nRm9ySFM1MTJBbGdvcml0aG0=
redis-password: ""
---
apiVersion: v1
kind: Secret
metadata:
name: registry-secret
namespace: microservices
type: kubernetes.io/dockerconfigjson
data:
.dockerconfigjson: eyJhdXRocyI6eyJyZWdpc3RyeS5leGFtcGxlLmNvbSI6eyJ1c2VybmFtZSI6InVzZXIiLCJwYXNzd29yZCI6InBhc3MiLCJhdXRoIjoiZFhObGNqcHdZWE56In19fQ==
持久化存储配置
persistent-volume.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: mysql-pv
namespace: microservices
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: fast-ssd
hostPath:
path: /data/mysql
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: mysql-pvc
namespace: microservices
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
storageClassName: fast-ssd
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: redis-pv
namespace: microservices
spec:
capacity:
storage: 5Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: fast-ssd
hostPath:
path: /data/redis
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: redis-pvc
namespace: microservices
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
storageClassName: fast-ssd
2. 服务部署配置
MySQL 部署
mysql-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: mysql
namespace: microservices
labels:
app: mysql
spec:
replicas: 1
selector:
matchLabels:
app: mysql
template:
metadata:
labels:
app: mysql
spec:
containers:
- name: mysql
image: mysql:8.0
ports:
- containerPort: 3306
env:
- name: MYSQL_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: microservices-secrets
key: mysql-root-password
- name: MYSQL_DATABASE
value: "microservices_db"
- name: MYSQL_USER
value: "microuser"
- name: MYSQL_PASSWORD
valueFrom:
secretKeyRef:
name: microservices-secrets
key: mysql-user-password
volumeMounts:
- name: mysql-storage
mountPath: /var/lib/mysql
- name: mysql-config
mountPath: /etc/mysql/conf.d
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
livenessProbe:
exec:
command:
- mysqladmin
- ping
- -h
- localhost
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
readinessProbe:
exec:
command:
- mysqladmin
- ping
- -h
- localhost
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
volumes:
- name: mysql-storage
persistentVolumeClaim:
claimName: mysql-pvc
- name: mysql-config
configMap:
name: mysql-config
---
apiVersion: v1
kind: Service
metadata:
name: mysql
namespace: microservices
labels:
app: mysql
spec:
ports:
- port: 3306
targetPort: 3306
selector:
app: mysql
type: ClusterIP
Redis 部署
redis-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
namespace: microservices
labels:
app: redis
spec:
replicas: 1
selector:
matchLabels:
app: redis
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:6.2-alpine
ports:
- containerPort: 6379
command:
- redis-server
- --appendonly
- "yes"
volumeMounts:
- name: redis-storage
mountPath: /data
resources:
requests:
memory: "256Mi"
cpu: "125m"
limits:
memory: "512Mi"
cpu: "250m"
livenessProbe:
exec:
command:
- redis-cli
- ping
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
exec:
command:
- redis-cli
- ping
initialDelaySeconds: 5
periodSeconds: 5
volumes:
- name: redis-storage
persistentVolumeClaim:
claimName: redis-pvc
---
apiVersion: v1
kind: Service
metadata:
name: redis
namespace: microservices
labels:
app: redis
spec:
ports:
- port: 6379
targetPort: 6379
selector:
app: redis
type: ClusterIP
Eureka Server 部署
eureka-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: eureka-server
namespace: microservices
labels:
app: eureka-server
spec:
replicas: 2
selector:
matchLabels:
app: eureka-server
template:
metadata:
labels:
app: eureka-server
spec:
containers:
- name: eureka-server
image: microservices/eureka-server:latest
ports:
- containerPort: 8761
env:
- name: SPRING_PROFILES_ACTIVE
value: "k8s"
- name: EUREKA_INSTANCE_HOSTNAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- name: config-volume
mountPath: /app/config
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
livenessProbe:
httpGet:
path: /actuator/health
port: 8761
initialDelaySeconds: 60
periodSeconds: 30
timeoutSeconds: 10
readinessProbe:
httpGet:
path: /actuator/health
port: 8761
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
volumes:
- name: config-volume
configMap:
name: microservices-config
imagePullSecrets:
- name: registry-secret
---
apiVersion: v1
kind: Service
metadata:
name: eureka-server
namespace: microservices
labels:
app: eureka-server
spec:
ports:
- port: 8761
targetPort: 8761
selector:
app: eureka-server
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
name: eureka-server-external
namespace: microservices
labels:
app: eureka-server
spec:
ports:
- port: 8761
targetPort: 8761
nodePort: 30761
selector:
app: eureka-server
type: NodePort
用户服务部署
user-service-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: user-service
namespace: microservices
labels:
app: user-service
spec:
replicas: 3
selector:
matchLabels:
app: user-service
template:
metadata:
labels:
app: user-service
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8081"
prometheus.io/path: "/actuator/prometheus"
spec:
containers:
- name: user-service
image: microservices/user-service:latest
ports:
- containerPort: 8081
env:
- name: SPRING_PROFILES_ACTIVE
value: "k8s"
- name: SPRING_DATASOURCE_URL
value: "jdbc:mysql://mysql:3306/microservices_db"
- name: SPRING_DATASOURCE_USERNAME
value: "microuser"
- name: SPRING_DATASOURCE_PASSWORD
valueFrom:
secretKeyRef:
name: microservices-secrets
key: mysql-user-password
- name: SPRING_REDIS_HOST
value: "redis"
- name: EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE
value: "http://eureka-server:8761/eureka/"
- name: JWT_SECRET
valueFrom:
secretKeyRef:
name: microservices-secrets
key: jwt-secret
- name: HOSTNAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- name: config-volume
mountPath: /app/config
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
livenessProbe:
httpGet:
path: /actuator/health
port: 8081
initialDelaySeconds: 90
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /actuator/health
port: 8081
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
volumes:
- name: config-volume
configMap:
name: microservices-config
imagePullSecrets:
- name: registry-secret
initContainers:
- name: wait-for-mysql
image: busybox:1.35
command: ['sh', '-c', 'until nc -z mysql 3306; do echo waiting for mysql; sleep 2; done;']
- name: wait-for-eureka
image: busybox:1.35
command: ['sh', '-c', 'until nc -z eureka-server 8761; do echo waiting for eureka; sleep 2; done;']
---
apiVersion: v1
kind: Service
metadata:
name: user-service
namespace: microservices
labels:
app: user-service
spec:
ports:
- port: 8081
targetPort: 8081
selector:
app: user-service
type: ClusterIP
3. 水平扩缩容配置
HPA 配置
hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: user-service-hpa
namespace: microservices
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: user-service
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60
scaleUp:
stabilizationWindowSeconds: 60
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
selectPolicy: Max
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: order-service-hpa
namespace: microservices
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: order-service
minReplicas: 2
maxReplicas: 8
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
10.3 服务监控
1. Prometheus 监控配置
Prometheus 部署
prometheus-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: microservices
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "/etc/prometheus/rules/*.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- microservices
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: 'kubernetes-services'
kubernetes_sd_configs:
- role: service
namespaces:
names:
- microservices
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_service_name
alert_rules.yml: |
groups:
- name: microservices.rules
rules:
- alert: ServiceDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Service {{ $labels.instance }} is down"
description: "{{ $labels.instance }} has been down for more than 1 minute."
- alert: HighCPUUsage
expr: (100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU usage on {{ $labels.instance }}"
description: "CPU usage is above 80% for more than 5 minutes."
- alert: HighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage on {{ $labels.instance }}"
description: "Memory usage is above 85% for more than 5 minutes."
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.1
for: 2m
labels:
severity: critical
annotations:
summary: "High error rate on {{ $labels.instance }}"
description: "Error rate is above 10% for more than 2 minutes."
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: microservices
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
serviceAccountName: prometheus
containers:
- name: prometheus
image: prom/prometheus:v2.40.0
ports:
- containerPort: 9090
args:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus/'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=15d'
- '--web.enable-lifecycle'
volumeMounts:
- name: prometheus-config
mountPath: /etc/prometheus
- name: prometheus-storage
mountPath: /prometheus
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
volumes:
- name: prometheus-config
configMap:
name: prometheus-config
- name: prometheus-storage
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: microservices
labels:
app: prometheus
spec:
ports:
- port: 9090
targetPort: 9090
selector:
app: prometheus
type: ClusterIP
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: microservices
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups:
- extensions
resources:
- ingresses
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: microservices
2. Grafana 可视化
Grafana 部署
grafana-deployment.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-datasources
namespace: microservices
data:
prometheus.yaml: |
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
access: proxy
isDefault: true
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana
namespace: microservices
labels:
app: grafana
spec:
replicas: 1
selector:
matchLabels:
app: grafana
template:
metadata:
labels:
app: grafana
spec:
containers:
- name: grafana
image: grafana/grafana:9.3.0
ports:
- containerPort: 3000
env:
- name: GF_SECURITY_ADMIN_PASSWORD
value: "admin123"
- name: GF_USERS_ALLOW_SIGN_UP
value: "false"
volumeMounts:
- name: grafana-storage
mountPath: /var/lib/grafana
- name: grafana-datasources
mountPath: /etc/grafana/provisioning/datasources
resources:
requests:
memory: "256Mi"
cpu: "125m"
limits:
memory: "512Mi"
cpu: "250m"
volumes:
- name: grafana-storage
emptyDir: {}
- name: grafana-datasources
configMap:
name: grafana-datasources
---
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: microservices
labels:
app: grafana
spec:
ports:
- port: 3000
targetPort: 3000
selector:
app: grafana
type: ClusterIP
10.4 日志管理
1. ELK Stack 部署
Elasticsearch 部署
elasticsearch-deployment.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: elasticsearch
namespace: microservices
labels:
app: elasticsearch
spec:
serviceName: elasticsearch
replicas: 3
selector:
matchLabels:
app: elasticsearch
template:
metadata:
labels:
app: elasticsearch
spec:
containers:
- name: elasticsearch
image: docker.elastic.co/elasticsearch/elasticsearch:8.5.0
ports:
- containerPort: 9200
- containerPort: 9300
env:
- name: cluster.name
value: "microservices-cluster"
- name: node.name
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: discovery.seed_hosts
value: "elasticsearch-0.elasticsearch,elasticsearch-1.elasticsearch,elasticsearch-2.elasticsearch"
- name: cluster.initial_master_nodes
value: "elasticsearch-0,elasticsearch-1,elasticsearch-2"
- name: ES_JAVA_OPTS
value: "-Xms1g -Xmx1g"
- name: xpack.security.enabled
value: "false"
volumeMounts:
- name: elasticsearch-data
mountPath: /usr/share/elasticsearch/data
resources:
requests:
memory: "2Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "1000m"
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
---
apiVersion: v1
kind: Service
metadata:
name: elasticsearch
namespace: microservices
labels:
app: elasticsearch
spec:
ports:
- port: 9200
targetPort: 9200
name: http
- port: 9300
targetPort: 9300
name: transport
selector:
app: elasticsearch
clusterIP: None
Logstash 部署
logstash-deployment.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: logstash-config
namespace: microservices
data:
logstash.yml: |
http.host: "0.0.0.0"
path.config: /usr/share/logstash/pipeline
logstash.conf: |
input {
beats {
port => 5044
}
}
filter {
if [fields][service] {
mutate {
add_field => { "service_name" => "%{[fields][service]}" }
}
}
# 解析Spring Boot日志格式
if [service_name] =~ /.*-service/ {
grok {
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp} \[%{DATA:thread}\] %{LOGLEVEL:level} \[%{DATA:trace_info}\] %{DATA:logger} - %{GREEDYDATA:log_message}" }
}
date {
match => [ "timestamp", "yyyy-MM-dd HH:mm:ss" ]
}
# 解析trace信息
if [trace_info] {
grok {
match => { "trace_info" => "%{DATA:trace_id},%{DATA:span_id}" }
}
}
}
# 添加环境标签
mutate {
add_field => { "environment" => "production" }
add_field => { "cluster" => "microservices" }
}
}
output {
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "microservices-logs-%{+YYYY.MM.dd}"
}
stdout {
codec => rubydebug
}
}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: logstash
namespace: microservices
labels:
app: logstash
spec:
replicas: 2
selector:
matchLabels:
app: logstash
template:
metadata:
labels:
app: logstash
spec:
containers:
- name: logstash
image: docker.elastic.co/logstash/logstash:8.5.0
ports:
- containerPort: 5044
env:
- name: LS_JAVA_OPTS
value: "-Xms1g -Xmx1g"
volumeMounts:
- name: logstash-config
mountPath: /usr/share/logstash/config
- name: logstash-pipeline
mountPath: /usr/share/logstash/pipeline
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
volumes:
- name: logstash-config
configMap:
name: logstash-config
items:
- key: logstash.yml
path: logstash.yml
- name: logstash-pipeline
configMap:
name: logstash-config
items:
- key: logstash.conf
path: logstash.conf
---
apiVersion: v1
kind: Service
metadata:
name: logstash
namespace: microservices
labels:
app: logstash
spec:
ports:
- port: 5044
targetPort: 5044
selector:
app: logstash
type: ClusterIP
Kibana 部署
kibana-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: kibana
namespace: microservices
labels:
app: kibana
spec:
replicas: 1
selector:
matchLabels:
app: kibana
template:
metadata:
labels:
app: kibana
spec:
containers:
- name: kibana
image: docker.elastic.co/kibana/kibana:8.5.0
ports:
- containerPort: 5601
env:
- name: ELASTICSEARCH_HOSTS
value: "http://elasticsearch:9200"
- name: SERVER_NAME
value: "kibana"
- name: SERVER_HOST
value: "0.0.0.0"
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
readinessProbe:
httpGet:
path: /api/status
port: 5601
initialDelaySeconds: 30
periodSeconds: 10
livenessProbe:
httpGet:
path: /api/status
port: 5601
initialDelaySeconds: 60
periodSeconds: 30
---
apiVersion: v1
kind: Service
metadata:
name: kibana
namespace: microservices
labels:
app: kibana
spec:
ports:
- port: 5601
targetPort: 5601
selector:
app: kibana
type: ClusterIP
2. Filebeat 日志收集
Filebeat 配置
filebeat-daemonset.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: filebeat-config
namespace: microservices
data:
filebeat.yml: |
filebeat.inputs:
- type: container
paths:
- /var/log/containers/*microservices*.log
processors:
- add_kubernetes_metadata:
host: ${NODE_NAME}
matchers:
- logs_path:
logs_path: "/var/log/containers/"
- decode_json_fields:
fields: ["message"]
target: ""
overwrite_keys: true
output.logstash:
hosts: ["logstash:5044"]
logging.level: info
logging.to_files: true
logging.files:
path: /var/log/filebeat
name: filebeat
keepfiles: 7
permissions: 0644
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: filebeat
namespace: microservices
labels:
app: filebeat
spec:
selector:
matchLabels:
app: filebeat
template:
metadata:
labels:
app: filebeat
spec:
serviceAccountName: filebeat
terminationGracePeriodSeconds: 30
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: filebeat
image: docker.elastic.co/beats/filebeat:8.5.0
args: [
"-c", "/etc/filebeat.yml",
"-e",
]
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
securityContext:
runAsUser: 0
resources:
limits:
memory: 200Mi
cpu: 100m
requests:
memory: 100Mi
cpu: 50m
volumeMounts:
- name: config
mountPath: /etc/filebeat.yml
readOnly: true
subPath: filebeat.yml
- name: data
mountPath: /usr/share/filebeat/data
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
- name: varlog
mountPath: /var/log
readOnly: true
volumes:
- name: config
configMap:
defaultMode: 0640
name: filebeat-config
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
- name: varlog
hostPath:
path: /var/log
- name: data
hostPath:
path: /var/lib/filebeat-data
type: DirectoryOrCreate
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: filebeat
namespace: microservices
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: filebeat
rules:
- apiGroups: [""]
resources:
- namespaces
- pods
- nodes
verbs:
- get
- watch
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: filebeat
subjects:
- kind: ServiceAccount
name: filebeat
namespace: microservices
roleRef:
kind: ClusterRole
name: filebeat
apiGroup: rbac.authorization.k8s.io
10.5 CI/CD 流水线
1. Jenkins 流水线配置
Jenkinsfile
Jenkinsfile
pipeline {
agent any
environment {
DOCKER_REGISTRY = 'registry.example.com'
DOCKER_REPO = 'microservices'
KUBECONFIG = credentials('kubeconfig')
DOCKER_CREDENTIALS = credentials('docker-registry')
SONAR_TOKEN = credentials('sonar-token')
}
stages {
stage('Checkout') {
steps {
checkout scm
script {
env.GIT_COMMIT_SHORT = sh(
script: "git rev-parse --short HEAD",
returnStdout: true
).trim()
env.BUILD_VERSION = "${env.BUILD_NUMBER}-${env.GIT_COMMIT_SHORT}"
}
}
}
stage('Build') {
parallel {
stage('User Service') {
steps {
dir('user-service') {
sh 'mvn clean compile'
}
}
}
stage('Order Service') {
steps {
dir('order-service') {
sh 'mvn clean compile'
}
}
}
stage('API Gateway') {
steps {
dir('api-gateway') {
sh 'mvn clean compile'
}
}
}
}
}
stage('Test') {
parallel {
stage('Unit Tests') {
steps {
script {
def services = ['user-service', 'order-service', 'api-gateway']
services.each { service ->
dir(service) {
sh 'mvn test'
publishTestResults(
testResultsPattern: 'target/surefire-reports/*.xml',
allowEmptyResults: true
)
}
}
}
}
}
stage('Integration Tests') {
steps {
sh 'mvn verify -Pintegration-test'
publishTestResults(
testResultsPattern: '**/target/failsafe-reports/*.xml',
allowEmptyResults: true
)
}
}
}
}
stage('Code Quality') {
parallel {
stage('SonarQube Analysis') {
steps {
withSonarQubeEnv('SonarQube') {
sh '''
mvn sonar:sonar \
-Dsonar.projectKey=microservices \
-Dsonar.host.url=$SONAR_HOST_URL \
-Dsonar.login=$SONAR_TOKEN
'''
}
}
}
stage('Security Scan') {
steps {
sh 'mvn org.owasp:dependency-check-maven:check'
publishHTML([
allowMissing: false,
alwaysLinkToLastBuild: true,
keepAll: true,
reportDir: 'target',
reportFiles: 'dependency-check-report.html',
reportName: 'OWASP Dependency Check Report'
])
}
}
}
}
stage('Quality Gate') {
steps {
timeout(time: 5, unit: 'MINUTES') {
waitForQualityGate abortPipeline: true
}
}
}
stage('Package') {
parallel {
stage('User Service') {
steps {
dir('user-service') {
sh 'mvn package -DskipTests'
archiveArtifacts artifacts: 'target/*.jar', fingerprint: true
}
}
}
stage('Order Service') {
steps {
dir('order-service') {
sh 'mvn package -DskipTests'
archiveArtifacts artifacts: 'target/*.jar', fingerprint: true
}
}
}
stage('API Gateway') {
steps {
dir('api-gateway') {
sh 'mvn package -DskipTests'
archiveArtifacts artifacts: 'target/*.jar', fingerprint: true
}
}
}
}
}
stage('Docker Build') {
parallel {
stage('User Service Image') {
steps {
dir('user-service') {
script {
def image = docker.build("${DOCKER_REGISTRY}/${DOCKER_REPO}/user-service:${BUILD_VERSION}")
docker.withRegistry("https://${DOCKER_REGISTRY}", DOCKER_CREDENTIALS) {
image.push()
image.push('latest')
}
}
}
}
}
stage('Order Service Image') {
steps {
dir('order-service') {
script {
def image = docker.build("${DOCKER_REGISTRY}/${DOCKER_REPO}/order-service:${BUILD_VERSION}")
docker.withRegistry("https://${DOCKER_REGISTRY}", DOCKER_CREDENTIALS) {
image.push()
image.push('latest')
}
}
}
}
}
stage('API Gateway Image') {
steps {
dir('api-gateway') {
script {
def image = docker.build("${DOCKER_REGISTRY}/${DOCKER_REPO}/api-gateway:${BUILD_VERSION}")
docker.withRegistry("https://${DOCKER_REGISTRY}", DOCKER_CREDENTIALS) {
image.push()
image.push('latest')
}
}
}
}
}
}
}
stage('Security Scan Images') {
parallel {
stage('Trivy Scan') {
steps {
script {
def services = ['user-service', 'order-service', 'api-gateway']
services.each { service ->
sh """
trivy image --format json --output ${service}-scan.json \
${DOCKER_REGISTRY}/${DOCKER_REPO}/${service}:${BUILD_VERSION}
"""
}
}
publishHTML([
allowMissing: false,
alwaysLinkToLastBuild: true,
keepAll: true,
reportDir: '.',
reportFiles: '*-scan.json',
reportName: 'Trivy Security Scan Report'
])
}
}
}
}
stage('Deploy to Staging') {
when {
branch 'develop'
}
steps {
script {
sh """
helm upgrade --install microservices-staging ./helm/microservices \
--namespace microservices-staging \
--create-namespace \
--set image.tag=${BUILD_VERSION} \
--set environment=staging \
--wait --timeout=600s
"""
}
// 运行冒烟测试
sh 'mvn test -Dtest=SmokeTest -Dtest.environment=staging'
}
}
stage('Deploy to Production') {
when {
branch 'main'
}
steps {
script {
// 蓝绿部署
def currentColor = sh(
script: "kubectl get service api-gateway -n microservices -o jsonpath='{.spec.selector.color}'",
returnStdout: true
).trim()
def newColor = currentColor == 'blue' ? 'green' : 'blue'
echo "Current color: ${currentColor}, deploying to: ${newColor}"
// 部署新版本
sh """
helm upgrade --install microservices-${newColor} ./helm/microservices \
--namespace microservices \
--set image.tag=${BUILD_VERSION} \
--set deployment.color=${newColor} \
--set environment=production \
--wait --timeout=600s
"""
// 健康检查
sh """
kubectl wait --for=condition=ready pod \
-l app=user-service,color=${newColor} \
-n microservices --timeout=300s
kubectl wait --for=condition=ready pod \
-l app=order-service,color=${newColor} \
-n microservices --timeout=300s
"""
// 运行生产环境测试
sh 'mvn test -Dtest=ProductionTest -Dtest.environment=production'
// 切换流量
input message: 'Switch traffic to new version?', ok: 'Deploy'
sh """
kubectl patch service api-gateway -n microservices \
-p '{"spec":{"selector":{"color":"${newColor}"}}}'
"""
// 清理旧版本
sh "helm uninstall microservices-${currentColor} -n microservices || true"
}
}
}
}
post {
always {
// 清理工作空间
cleanWs()
}
success {
// 发送成功通知
slackSend(
channel: '#deployments',
color: 'good',
message: "✅ Pipeline succeeded for ${env.JOB_NAME} - ${env.BUILD_NUMBER}"
)
}
failure {
// 发送失败通知
slackSend(
channel: '#deployments',
color: 'danger',
message: "❌ Pipeline failed for ${env.JOB_NAME} - ${env.BUILD_NUMBER}"
)
}
}
}
2. GitLab CI/CD 配置
.gitlab-ci.yml
.gitlab-ci.yml
stages:
- build
- test
- quality
- package
- security
- deploy-staging
- deploy-production
variables:
DOCKER_REGISTRY: registry.gitlab.com
DOCKER_REPO: microservices
MAVEN_OPTS: "-Dmaven.repo.local=$CI_PROJECT_DIR/.m2/repository"
MAVEN_CLI_OPTS: "--batch-mode --errors --fail-at-end --show-version"
cache:
paths:
- .m2/repository/
- target/
# 构建阶段
build:
stage: build
image: maven:3.8.4-openjdk-11
script:
- mvn $MAVEN_CLI_OPTS clean compile
artifacts:
paths:
- target/
expire_in: 1 hour
# 单元测试
unit-test:
stage: test
image: maven:3.8.4-openjdk-11
script:
- mvn $MAVEN_CLI_OPTS test
artifacts:
reports:
junit:
- "**/target/surefire-reports/TEST-*.xml"
paths:
- target/site/jacoco/
expire_in: 1 week
coverage: '/Total.*?([0-9]{1,3})%/'
# 集成测试
integration-test:
stage: test
image: maven:3.8.4-openjdk-11
services:
- mysql:8.0
- redis:6.2
variables:
MYSQL_ROOT_PASSWORD: rootpassword
MYSQL_DATABASE: test_db
SPRING_PROFILES_ACTIVE: test
script:
- mvn $MAVEN_CLI_OPTS verify -Pintegration-test
artifacts:
reports:
junit:
- "**/target/failsafe-reports/TEST-*.xml"
expire_in: 1 week
# 代码质量检查
sonarqube-check:
stage: quality
image: maven:3.8.4-openjdk-11
script:
- mvn $MAVEN_CLI_OPTS sonar:sonar
-Dsonar.projectKey=$CI_PROJECT_NAME
-Dsonar.host.url=$SONAR_HOST_URL
-Dsonar.login=$SONAR_TOKEN
only:
- main
- develop
# 安全扫描
security-scan:
stage: quality
image: maven:3.8.4-openjdk-11
script:
- mvn $MAVEN_CLI_OPTS org.owasp:dependency-check-maven:check
artifacts:
reports:
dependency_scanning: target/dependency-check-report.json
paths:
- target/dependency-check-report.html
expire_in: 1 week
# 打包阶段
package:
stage: package
image: maven:3.8.4-openjdk-11
script:
- mvn $MAVEN_CLI_OPTS package -DskipTests
artifacts:
paths:
- "**/target/*.jar"
expire_in: 1 week
# Docker镜像构建
.docker-build: &docker-build
stage: package
image: docker:20.10.16
services:
- docker:20.10.16-dind
before_script:
- echo $CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY
script:
- cd $SERVICE_NAME
- docker build -t $CI_REGISTRY_IMAGE/$SERVICE_NAME:$CI_COMMIT_SHA .
- docker build -t $CI_REGISTRY_IMAGE/$SERVICE_NAME:latest .
- docker push $CI_REGISTRY_IMAGE/$SERVICE_NAME:$CI_COMMIT_SHA
- docker push $CI_REGISTRY_IMAGE/$SERVICE_NAME:latest
build-user-service:
<<: *docker-build
variables:
SERVICE_NAME: user-service
build-order-service:
<<: *docker-build
variables:
SERVICE_NAME: order-service
build-api-gateway:
<<: *docker-build
variables:
SERVICE_NAME: api-gateway
# 容器安全扫描
container-security-scan:
stage: security
image: aquasec/trivy:latest
script:
- trivy image --format template --template "@contrib/gitlab.tpl"
--output gl-container-scanning-report.json
$CI_REGISTRY_IMAGE/user-service:$CI_COMMIT_SHA
- trivy image --format template --template "@contrib/gitlab.tpl"
--output gl-container-scanning-order.json
$CI_REGISTRY_IMAGE/order-service:$CI_COMMIT_SHA
- trivy image --format template --template "@contrib/gitlab.tpl"
--output gl-container-scanning-gateway.json
$CI_REGISTRY_IMAGE/api-gateway:$CI_COMMIT_SHA
artifacts:
reports:
container_scanning:
- gl-container-scanning-*.json
expire_in: 1 week
# 部署到测试环境
deploy-staging:
stage: deploy-staging
image: bitnami/kubectl:latest
environment:
name: staging
url: https://staging.microservices.example.com
script:
- kubectl config use-context $KUBE_CONTEXT_STAGING
- helm upgrade --install microservices-staging ./helm/microservices
--namespace microservices-staging
--create-namespace
--set image.tag=$CI_COMMIT_SHA
--set environment=staging
--wait --timeout=600s
# 运行冒烟测试
- mvn test -Dtest=SmokeTest -Dtest.environment=staging
only:
- develop
# 部署到生产环境
deploy-production:
stage: deploy-production
image: bitnami/kubectl:latest
environment:
name: production
url: https://microservices.example.com
script:
- kubectl config use-context $KUBE_CONTEXT_PRODUCTION
# 蓝绿部署逻辑
- ./scripts/blue-green-deploy.sh $CI_COMMIT_SHA
when: manual
only:
- main
3. Helm Chart 配置
Chart.yaml
helm/microservices/Chart.yaml
apiVersion: v2
name: microservices
description: A Helm chart for microservices application
type: application
version: 0.1.0
appVersion: "1.0.0"
dependencies:
- name: mysql
version: 9.4.1
repository: https://charts.bitnami.com/bitnami
condition: mysql.enabled
- name: redis
version: 17.3.7
repository: https://charts.bitnami.com/bitnami
condition: redis.enabled
values.yaml
helm/microservices/values.yaml
# 全局配置
global:
imageRegistry: registry.example.com
imagePullSecrets:
- name: registry-secret
# 镜像配置
image:
repository: microservices
tag: latest
pullPolicy: IfNotPresent
# 环境配置
environment: production
# 部署配置
deployment:
color: blue
replicaCount: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
# 资源配置
resources:
userService:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
orderService:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
apiGateway:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
# 自动扩缩容配置
autoscaling:
enabled: true
minReplicas: 2
maxReplicas: 10
targetCPUUtilizationPercentage: 70
targetMemoryUtilizationPercentage: 80
# 服务配置
service:
type: ClusterIP
ports:
userService: 8081
orderService: 8082
apiGateway: 8080
# Ingress配置
ingress:
enabled: true
className: nginx
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
nginx.ingress.kubernetes.io/ssl-redirect: "true"
cert-manager.io/cluster-issuer: letsencrypt-prod
hosts:
- host: microservices.example.com
paths:
- path: /
pathType: Prefix
service: api-gateway
tls:
- secretName: microservices-tls
hosts:
- microservices.example.com
# 配置管理
configMap:
data:
spring.profiles.active: k8s
logging.level.com.example: INFO
management.endpoints.web.exposure.include: health,info,metrics,prometheus
# 密钥管理
secrets:
mysql:
rootPassword: rootpassword
userPassword: micropass
jwt:
secret: myVerySecretKeyThatIsAtLeast256BitsLongForHS512Algorithm
# MySQL配置
mysql:
enabled: true
auth:
rootPassword: rootpassword
database: microservices_db
username: microuser
password: micropass
primary:
persistence:
enabled: true
size: 10Gi
storageClass: fast-ssd
# Redis配置
redis:
enabled: true
auth:
enabled: false
master:
persistence:
enabled: true
size: 5Gi
storageClass: fast-ssd
# 监控配置
monitoring:
prometheus:
enabled: true
scrape: true
grafana:
enabled: true
# 日志配置
logging:
level: INFO
pattern: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level [%X{traceId},%X{spanId}] %logger{36} - %msg%n"
10.6 性能优化
1. JVM 调优
JVM 参数配置
application-production.yml
# JVM调优配置
spring:
application:
name: ${SERVICE_NAME:unknown}
# 服务器配置
server:
tomcat:
threads:
max: 200
min-spare: 10
connection-timeout: 20000
max-connections: 8192
accept-count: 100
compression:
enabled: true
mime-types: text/html,text/xml,text/plain,text/css,text/javascript,application/javascript,application/json
min-response-size: 1024
# 连接池配置
spring:
datasource:
hikari:
maximum-pool-size: 20
minimum-idle: 5
idle-timeout: 300000
max-lifetime: 1800000
connection-timeout: 30000
validation-timeout: 5000
leak-detection-threshold: 60000
redis:
lettuce:
pool:
max-active: 20
max-idle: 8
min-idle: 2
max-wait: 2000ms
shutdown-timeout: 100ms
# 缓存配置
spring:
cache:
type: redis
redis:
time-to-live: 600000
cache-null-values: false
# 异步配置
spring:
task:
execution:
pool:
core-size: 8
max-size: 20
queue-capacity: 100
keep-alive: 60s
thread-name-prefix: async-task-
scheduling:
pool:
size: 5
thread-name-prefix: scheduled-task-
# 监控配置
management:
endpoints:
web:
exposure:
include: health,info,metrics,prometheus,env,configprops
endpoint:
health:
show-details: when-authorized
probes:
enabled: true
metrics:
export:
prometheus:
enabled: true
distribution:
percentiles-histogram:
http.server.requests: true
percentiles:
http.server.requests: 0.5, 0.9, 0.95, 0.99
health:
circuitbreakers:
enabled: true
ratelimiters:
enabled: true
启动脚本优化
optimized-entrypoint.sh
#!/bin/bash
# 设置JVM参数
JAVA_OPTS="${JAVA_OPTS} \
-server \
-Xms${HEAP_MIN:-512m} \
-Xmx${HEAP_MAX:-1024m} \
-XX:NewRatio=2 \
-XX:SurvivorRatio=8 \
-XX:+UseG1GC \
-XX:MaxGCPauseMillis=200 \
-XX:G1HeapRegionSize=16m \
-XX:+UseG1MixedGCCountTarget=8 \
-XX:+UseStringDeduplication \
-XX:+OptimizeStringConcat \
-XX:+UseCompressedOops \
-XX:+UseCompressedClassPointers \
-XX:+UnlockExperimentalVMOptions \
-XX:+UseCGroupMemoryLimitForHeap \
-XX:+UseContainerSupport \
-XX:InitialRAMPercentage=50.0 \
-XX:MaxRAMPercentage=80.0 \
-Djava.security.egd=file:/dev/./urandom \
-Djava.awt.headless=true \
-Dfile.encoding=UTF-8 \
-Duser.timezone=Asia/Shanghai"
# GC日志配置
if [ "${ENABLE_GC_LOG:-true}" = "true" ]; then
JAVA_OPTS="${JAVA_OPTS} \
-XX:+PrintGC \
-XX:+PrintGCDetails \
-XX:+PrintGCTimeStamps \
-XX:+PrintGCApplicationStoppedTime \
-XX:+PrintStringDeduplicationStatistics \
-Xloggc:/app/logs/gc.log \
-XX:+UseGCLogFileRotation \
-XX:NumberOfGCLogFiles=5 \
-XX:GCLogFileSize=10M"
fi
# 性能分析配置
if [ "${ENABLE_PROFILING:-false}" = "true" ]; then
JAVA_OPTS="${JAVA_OPTS} \
-XX:+FlightRecorder \
-XX:StartFlightRecording=duration=60s,filename=/app/logs/flight-recording.jfr"
fi
# 调试配置
if [ "${ENABLE_DEBUG:-false}" = "true" ]; then
JAVA_OPTS="${JAVA_OPTS} \
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"
fi
# 等待依赖服务
echo "Waiting for dependencies..."
# 等待数据库
if [ -n "${DB_HOST}" ] && [ -n "${DB_PORT}" ]; then
while ! nc -z "${DB_HOST}" "${DB_PORT}"; do
echo "Waiting for database at ${DB_HOST}:${DB_PORT}..."
sleep 2
done
fi
# 等待Redis
if [ -n "${REDIS_HOST}" ] && [ -n "${REDIS_PORT:-6379}" ]; then
while ! nc -z "${REDIS_HOST}" "${REDIS_PORT:-6379}"; do
echo "Waiting for Redis at ${REDIS_HOST}:${REDIS_PORT:-6379}..."
sleep 2
done
fi
# 等待Eureka
if [ -n "${EUREKA_HOST}" ] && [ -n "${EUREKA_PORT:-8761}" ]; then
while ! nc -z "${EUREKA_HOST}" "${EUREKA_PORT:-8761}"; do
echo "Waiting for Eureka at ${EUREKA_HOST}:${EUREKA_PORT:-8761}..."
sleep 2
done
fi
echo "Dependencies are ready. Starting application..."
echo "Java options: ${JAVA_OPTS}"
# 启动应用
exec java ${JAVA_OPTS} -jar app.jar
2. 数据库优化
MySQL 配置优化
mysql-optimized.cnf
[mysqld]
# 基础配置
default-authentication-plugin=mysql_native_password
character-set-server=utf8mb4
collation-server=utf8mb4_unicode_ci
skip-character-set-client-handshake
# 连接配置
max_connections=500
max_connect_errors=1000
connect_timeout=10
wait_timeout=28800
interactive_timeout=28800
# 缓冲区配置
innodb_buffer_pool_size=1G
innodb_buffer_pool_instances=4
innodb_log_buffer_size=16M
key_buffer_size=256M
read_buffer_size=2M
read_rnd_buffer_size=4M
sort_buffer_size=4M
join_buffer_size=4M
# InnoDB配置
innodb_file_per_table=1
innodb_flush_log_at_trx_commit=2
innodb_log_file_size=256M
innodb_log_files_in_group=2
innodb_flush_method=O_DIRECT
innodb_io_capacity=200
innodb_io_capacity_max=400
innodb_read_io_threads=8
innodb_write_io_threads=8
innodb_thread_concurrency=16
innodb_lock_wait_timeout=50
# 查询缓存
query_cache_type=1
query_cache_size=128M
query_cache_limit=2M
# 临时表配置
tmp_table_size=128M
max_heap_table_size=128M
# 二进制日志
log-bin=mysql-bin
binlog_format=ROW
expire_logs_days=7
max_binlog_size=100M
# 慢查询日志
slow_query_log=1
slow_query_log_file=/var/log/mysql/slow.log
long_query_time=2
log_queries_not_using_indexes=1
# 错误日志
log-error=/var/log/mysql/error.log
# 性能监控
performance_schema=ON
10.7 故障排查
1. 常见问题诊断
健康检查脚本
health-check.sh
#!/bin/bash
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# 配置
NAMESPACE="microservices"
SERVICES=("user-service" "order-service" "api-gateway" "eureka-server")
INFRA_SERVICES=("mysql" "redis")
echo "=== 微服务健康检查 ==="
echo "检查时间: $(date)"
echo "命名空间: ${NAMESPACE}"
echo
# 检查命名空间
echo "1. 检查命名空间..."
if kubectl get namespace ${NAMESPACE} &>/dev/null; then
echo -e "${GREEN}✓${NC} 命名空间 ${NAMESPACE} 存在"
else
echo -e "${RED}✗${NC} 命名空间 ${NAMESPACE} 不存在"
exit 1
fi
echo
# 检查基础设施服务
echo "2. 检查基础设施服务..."
for service in "${INFRA_SERVICES[@]}"; do
echo "检查 ${service}..."
# 检查Pod状态
pod_status=$(kubectl get pods -n ${NAMESPACE} -l app=${service} -o jsonpath='{.items[0].status.phase}' 2>/dev/null)
if [ "${pod_status}" = "Running" ]; then
echo -e " ${GREEN}✓${NC} Pod状态: Running"
else
echo -e " ${RED}✗${NC} Pod状态: ${pod_status:-NotFound}"
fi
# 检查服务可达性
if kubectl get service ${service} -n ${NAMESPACE} &>/dev/null; then
echo -e " ${GREEN}✓${NC} 服务存在"
# 检查端口连通性
case ${service} in
"mysql")
port=3306
;;
"redis")
port=6379
;;
esac
if kubectl exec -n ${NAMESPACE} deployment/${service} -- nc -z localhost ${port} &>/dev/null; then
echo -e " ${GREEN}✓${NC} 端口 ${port} 可达"
else
echo -e " ${RED}✗${NC} 端口 ${port} 不可达"
fi
else
echo -e " ${RED}✗${NC} 服务不存在"
fi
echo
done
# 检查微服务
echo "3. 检查微服务..."
for service in "${SERVICES[@]}"; do
echo "检查 ${service}..."
# 检查Deployment
if kubectl get deployment ${service} -n ${NAMESPACE} &>/dev/null; then
echo -e " ${GREEN}✓${NC} Deployment存在"
# 检查副本数
desired=$(kubectl get deployment ${service} -n ${NAMESPACE} -o jsonpath='{.spec.replicas}')
ready=$(kubectl get deployment ${service} -n ${NAMESPACE} -o jsonpath='{.status.readyReplicas}')
if [ "${ready}" = "${desired}" ]; then
echo -e " ${GREEN}✓${NC} 副本状态: ${ready}/${desired}"
else
echo -e " ${YELLOW}!${NC} 副本状态: ${ready:-0}/${desired}"
fi
# 检查Pod状态
pod_count=$(kubectl get pods -n ${NAMESPACE} -l app=${service} --field-selector=status.phase=Running --no-headers | wc -l)
if [ ${pod_count} -gt 0 ]; then
echo -e " ${GREEN}✓${NC} 运行中的Pod: ${pod_count}"
else
echo -e " ${RED}✗${NC} 没有运行中的Pod"
fi
# 检查健康状态
case ${service} in
"eureka-server")
port=8761
health_path="/actuator/health"
;;
"api-gateway")
port=8080
health_path="/actuator/health"
;;
"user-service")
port=8081
health_path="/actuator/health"
;;
"order-service")
port=8082
health_path="/actuator/health"
;;
esac
if kubectl exec -n ${NAMESPACE} deployment/${service} -- curl -f http://localhost:${port}${health_path} &>/dev/null; then
echo -e " ${GREEN}✓${NC} 健康检查通过"
else
echo -e " ${RED}✗${NC} 健康检查失败"
fi
else
echo -e " ${RED}✗${NC} Deployment不存在"
fi
echo
done
# 检查网络连通性
echo "4. 检查网络连通性..."
echo "检查服务间通信..."
# 从API网关访问用户服务
if kubectl exec -n ${NAMESPACE} deployment/api-gateway -- curl -f http://user-service:8081/actuator/health &>/dev/null; then
echo -e " ${GREEN}✓${NC} API网关 -> 用户服务"
else
echo -e " ${RED}✗${NC} API网关 -> 用户服务"
fi
# 从API网关访问订单服务
if kubectl exec -n ${NAMESPACE} deployment/api-gateway -- curl -f http://order-service:8082/actuator/health &>/dev/null; then
echo -e " ${GREEN}✓${NC} API网关 -> 订单服务"
else
echo -e " ${RED}✗${NC} API网关 -> 订单服务"
fi
# 从用户服务访问数据库
if kubectl exec -n ${NAMESPACE} deployment/user-service -- nc -z mysql 3306 &>/dev/null; then
echo -e " ${GREEN}✓${NC} 用户服务 -> MySQL"
else
echo -e " ${RED}✗${NC} 用户服务 -> MySQL"
fi
# 从订单服务访问Redis
if kubectl exec -n ${NAMESPACE} deployment/order-service -- nc -z redis 6379 &>/dev/null; then
echo -e " ${GREEN}✓${NC} 订单服务 -> Redis"
else
echo -e " ${RED}✗${NC} 订单服务 -> Redis"
fi
echo
# 检查资源使用情况
echo "5. 检查资源使用情况..."
echo "CPU和内存使用情况:"
kubectl top pods -n ${NAMESPACE} 2>/dev/null || echo " metrics-server未安装或不可用"
echo
# 检查事件
echo "6. 检查最近事件..."
echo "最近的Warning事件:"
kubectl get events -n ${NAMESPACE} --field-selector type=Warning --sort-by='.lastTimestamp' | tail -5
echo
echo "=== 健康检查完成 ==="
2. 日志分析工具
日志聚合脚本
log-analyzer.sh
#!/bin/bash
# 配置
NAMESPACE="microservices"
LOG_LINES=100
TIME_RANGE="1h"
# 函数:获取服务日志
get_service_logs() {
local service=$1
local lines=${2:-$LOG_LINES}
echo "=== ${service} 日志 ==="
kubectl logs -n ${NAMESPACE} deployment/${service} --tail=${lines} --since=${TIME_RANGE}
echo
}
# 函数:分析错误日志
analyze_errors() {
local service=$1
echo "=== ${service} 错误分析 ==="
kubectl logs -n ${NAMESPACE} deployment/${service} --since=${TIME_RANGE} | \
grep -i "error\|exception\|failed\|timeout" | \
sort | uniq -c | sort -nr
echo
}
# 函数:分析性能指标
analyze_performance() {
local service=$1
echo "=== ${service} 性能分析 ==="
kubectl logs -n ${NAMESPACE} deployment/${service} --since=${TIME_RANGE} | \
grep -E "took [0-9]+ms|duration=[0-9]+ms|elapsed=[0-9]+ms" | \
tail -10
echo
}
# 主程序
echo "微服务日志分析工具"
echo "时间范围: ${TIME_RANGE}"
echo "命名空间: ${NAMESPACE}"
echo
case ${1:-"all"} in
"user-service"|"order-service"|"api-gateway"|"eureka-server")
get_service_logs $1
analyze_errors $1
analyze_performance $1
;;
"errors")
for service in user-service order-service api-gateway eureka-server; do
analyze_errors $service
done
;;
"performance")
for service in user-service order-service api-gateway eureka-server; do
analyze_performance $service
done
;;
"all")
for service in user-service order-service api-gateway eureka-server; do
get_service_logs $service 50
analyze_errors $service
analyze_performance $service
done
;;
*)
echo "用法: $0 [service-name|errors|performance|all]"
echo "服务名称: user-service, order-service, api-gateway, eureka-server"
exit 1
;;
esac
总结
本章详细介绍了微服务的部署与运维,涵盖了以下核心内容:
核心概念
- 容器化部署 - Docker容器化、多阶段构建优化
- Kubernetes部署 - K8s资源配置、服务部署、水平扩缩容
- 服务监控 - Prometheus监控、Grafana可视化
- 日志管理 - ELK Stack部署、日志收集与分析
- CI/CD流水线 - Jenkins/GitLab CI配置、自动化部署
- 性能优化 - JVM调优、数据库优化
- 故障排查 - 健康检查、日志分析
最佳实践
- 部署策略 - 蓝绿部署、滚动更新、金丝雀发布
- 监控体系 - 全链路监控、告警机制、性能指标
- 日志管理 - 结构化日志、集中收集、实时分析
- 自动化运维 - 自动化部署、自动扩缩容、自愈机制
- 安全防护 - 镜像安全扫描、网络隔离、访问控制
注意事项
- 资源管理 - 合理配置资源限制、避免资源竞争
- 配置管理 - 环境隔离、配置版本控制、敏感信息保护
- 依赖管理 - 服务依赖检查、启动顺序控制
- 故障处理 - 快速定位、自动恢复、降级策略
扩展方向
- 高级功能 - 服务网格(Istio)、多集群管理、边缘计算
- 运维工具 - GitOps、Operator模式、混沌工程
- 云原生 - Serverless、FaaS、云原生数据库
通过本章的学习,你已经掌握了微服务的完整部署与运维体系。下一章我们将学习微服务的高级主题和最佳实践。