10.1 容器化部署

1. Docker 容器化

Dockerfile 编写

用户服务 Dockerfile

# 使用官方OpenJDK运行时作为基础镜像
FROM openjdk:11-jre-slim

# 设置工作目录
WORKDIR /app

# 创建应用用户(安全最佳实践)
RUN groupadd -r appuser && useradd -r -g appuser appuser

# 复制应用JAR文件
COPY target/user-service-*.jar app.jar

# 创建日志目录
RUN mkdir -p /app/logs && chown -R appuser:appuser /app

# 切换到应用用户
USER appuser

# 暴露端口
EXPOSE 8081

# 设置JVM参数
ENV JAVA_OPTS="-Xms512m -Xmx1024m -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/app/logs/gc.log"

# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8081/actuator/health || exit 1

# 启动应用
ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -jar app.jar"]

订单服务 Dockerfile

FROM openjdk:11-jre-slim

WORKDIR /app

# 安装curl用于健康检查
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*

# 创建应用用户
RUN groupadd -r appuser && useradd -r -g appuser appuser

# 复制应用文件
COPY target/order-service-*.jar app.jar
COPY docker/entrypoint.sh entrypoint.sh

# 设置权限
RUN chmod +x entrypoint.sh && \
    mkdir -p /app/logs && \
    chown -R appuser:appuser /app

USER appuser

EXPOSE 8082

# 环境变量
ENV SPRING_PROFILES_ACTIVE=docker
ENV JAVA_OPTS="-Xms512m -Xmx1024m -XX:+UseG1GC"

HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8082/actuator/health || exit 1

ENTRYPOINT ["./entrypoint.sh"]

启动脚本 entrypoint.sh

#!/bin/bash

# 等待依赖服务启动
echo "Waiting for dependencies..."

# 等待数据库
while ! nc -z $DB_HOST $DB_PORT; do
    echo "Waiting for database at $DB_HOST:$DB_PORT..."
    sleep 2
done

# 等待Eureka服务
while ! nc -z $EUREKA_HOST $EUREKA_PORT; do
    echo "Waiting for Eureka at $EUREKA_HOST:$EUREKA_PORT..."
    sleep 2
done

echo "Dependencies are ready. Starting application..."

# 启动应用
exec java $JAVA_OPTS -jar app.jar

Docker Compose 配置

docker-compose.yml

version: '3.8'

services:
  # 数据库服务
  mysql:
    image: mysql:8.0
    container_name: microservices-mysql
    environment:
      MYSQL_ROOT_PASSWORD: rootpassword
      MYSQL_DATABASE: microservices_db
      MYSQL_USER: microuser
      MYSQL_PASSWORD: micropass
    ports:
      - "3306:3306"
    volumes:
      - mysql_data:/var/lib/mysql
      - ./docker/mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
    networks:
      - microservices-network
    healthcheck:
      test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
      timeout: 20s
      retries: 10

  # Redis服务
  redis:
    image: redis:6.2-alpine
    container_name: microservices-redis
    ports:
      - "6379:6379"
    volumes:
      - redis_data:/data
    networks:
      - microservices-network
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 30s
      timeout: 10s
      retries: 3

  # Eureka服务注册中心
  eureka-server:
    build:
      context: ./eureka-server
      dockerfile: Dockerfile
    container_name: eureka-server
    ports:
      - "8761:8761"
    environment:
      - SPRING_PROFILES_ACTIVE=docker
    networks:
      - microservices-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8761/actuator/health"]
      interval: 30s
      timeout: 10s
      retries: 5

  # API网关
  api-gateway:
    build:
      context: ./api-gateway
      dockerfile: Dockerfile
    container_name: api-gateway
    ports:
      - "8080:8080"
    environment:
      - SPRING_PROFILES_ACTIVE=docker
      - EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE=http://eureka-server:8761/eureka/
    depends_on:
      eureka-server:
        condition: service_healthy
    networks:
      - microservices-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/actuator/health"]
      interval: 30s
      timeout: 10s
      retries: 5

  # 用户服务
  user-service:
    build:
      context: ./user-service
      dockerfile: Dockerfile
    container_name: user-service
    environment:
      - SPRING_PROFILES_ACTIVE=docker
      - SPRING_DATASOURCE_URL=jdbc:mysql://mysql:3306/microservices_db
      - SPRING_DATASOURCE_USERNAME=microuser
      - SPRING_DATASOURCE_PASSWORD=micropass
      - EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE=http://eureka-server:8761/eureka/
      - SPRING_REDIS_HOST=redis
    depends_on:
      mysql:
        condition: service_healthy
      eureka-server:
        condition: service_healthy
      redis:
        condition: service_healthy
    networks:
      - microservices-network
    deploy:
      replicas: 2
      resources:
        limits:
          memory: 1G
          cpus: '0.5'
        reservations:
          memory: 512M
          cpus: '0.25'
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8081/actuator/health"]
      interval: 30s
      timeout: 10s
      retries: 5

  # 订单服务
  order-service:
    build:
      context: ./order-service
      dockerfile: Dockerfile
    container_name: order-service
    environment:
      - SPRING_PROFILES_ACTIVE=docker
      - SPRING_DATASOURCE_URL=jdbc:mysql://mysql:3306/microservices_db
      - SPRING_DATASOURCE_USERNAME=microuser
      - SPRING_DATASOURCE_PASSWORD=micropass
      - EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE=http://eureka-server:8761/eureka/
      - SPRING_REDIS_HOST=redis
      - DB_HOST=mysql
      - DB_PORT=3306
      - EUREKA_HOST=eureka-server
      - EUREKA_PORT=8761
    depends_on:
      mysql:
        condition: service_healthy
      eureka-server:
        condition: service_healthy
      redis:
        condition: service_healthy
      user-service:
        condition: service_healthy
    networks:
      - microservices-network
    deploy:
      replicas: 2
      resources:
        limits:
          memory: 1G
          cpus: '0.5'
        reservations:
          memory: 512M
          cpus: '0.25'
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8082/actuator/health"]
      interval: 30s
      timeout: 10s
      retries: 5

  # 配置中心
  config-server:
    build:
      context: ./config-server
      dockerfile: Dockerfile
    container_name: config-server
    ports:
      - "8888:8888"
    environment:
      - SPRING_PROFILES_ACTIVE=docker
      - EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE=http://eureka-server:8761/eureka/
    depends_on:
      eureka-server:
        condition: service_healthy
    networks:
      - microservices-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8888/actuator/health"]
      interval: 30s
      timeout: 10s
      retries: 5

volumes:
  mysql_data:
  redis_data:

networks:
  microservices-network:
    driver: bridge

2. 多阶段构建优化

优化的 Dockerfile

多阶段构建 Dockerfile

# 构建阶段
FROM maven:3.8.4-openjdk-11-slim AS builder

WORKDIR /app

# 复制pom文件并下载依赖(利用Docker缓存)
COPY pom.xml .
RUN mvn dependency:go-offline -B

# 复制源代码并构建
COPY src ./src
RUN mvn clean package -DskipTests -B

# 运行阶段
FROM openjdk:11-jre-slim AS runtime

# 安装必要工具
RUN apt-get update && \
    apt-get install -y curl netcat-openbsd && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

# 创建应用用户
RUN groupadd -r appuser && useradd -r -g appuser appuser

WORKDIR /app

# 从构建阶段复制JAR文件
COPY --from=builder /app/target/*.jar app.jar

# 创建必要目录
RUN mkdir -p /app/logs /app/config && \
    chown -R appuser:appuser /app

# 复制配置文件
COPY docker/application-docker.yml /app/config/
COPY docker/entrypoint.sh /app/
RUN chmod +x /app/entrypoint.sh

USER appuser

EXPOSE 8080

# 优化的JVM参数
ENV JAVA_OPTS="-Xms256m -Xmx512m \
    -XX:+UseG1GC \
    -XX:MaxGCPauseMillis=200 \
    -XX:+UnlockExperimentalVMOptions \
    -XX:+UseCGroupMemoryLimitForHeap \
    -XX:+UseContainerSupport \
    -Djava.security.egd=file:/dev/./urandom"

HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8080/actuator/health || exit 1

ENTRYPOINT ["/app/entrypoint.sh"]

10.2 Kubernetes 部署

1. Kubernetes 资源配置

命名空间配置

namespace.yaml

apiVersion: v1
kind: Namespace
metadata:
  name: microservices
  labels:
    name: microservices
    environment: production

ConfigMap 配置

configmap.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: microservices-config
  namespace: microservices
data:
  application.yml: |
    spring:
      profiles:
        active: k8s
    
    eureka:
      client:
        service-url:
          defaultZone: http://eureka-server:8761/eureka/
      instance:
        prefer-ip-address: true
        hostname: ${HOSTNAME}
    
    management:
      endpoints:
        web:
          exposure:
            include: health,info,metrics,prometheus
      endpoint:
        health:
          show-details: when-authorized
    
    logging:
      level:
        com.example: INFO
      pattern:
        console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level [%X{traceId},%X{spanId}] %logger{36} - %msg%n"

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: mysql-config
  namespace: microservices
data:
  my.cnf: |
    [mysqld]
    default-authentication-plugin=mysql_native_password
    character-set-server=utf8mb4
    collation-server=utf8mb4_unicode_ci
    max_connections=200
    innodb_buffer_pool_size=256M
    innodb_log_file_size=64M

Secret 配置

secret.yaml

apiVersion: v1
kind: Secret
metadata:
  name: microservices-secrets
  namespace: microservices
type: Opaque
data:
  # base64编码的密码
  mysql-root-password: cm9vdHBhc3N3b3Jk  # rootpassword
  mysql-user-password: bWljcm9wYXNz        # micropass
  jwt-secret: bXlWZXJ5U2VjcmV0S2V5VGhhdElzQXRMZWFzdDI1NkJpdHNMb25nRm9ySFM1MTJBbGdvcml0aG0=
  redis-password: ""

---
apiVersion: v1
kind: Secret
metadata:
  name: registry-secret
  namespace: microservices
type: kubernetes.io/dockerconfigjson
data:
  .dockerconfigjson: eyJhdXRocyI6eyJyZWdpc3RyeS5leGFtcGxlLmNvbSI6eyJ1c2VybmFtZSI6InVzZXIiLCJwYXNzd29yZCI6InBhc3MiLCJhdXRoIjoiZFhObGNqcHdZWE56In19fQ==

持久化存储配置

persistent-volume.yaml

apiVersion: v1
kind: PersistentVolume
metadata:
  name: mysql-pv
  namespace: microservices
spec:
  capacity:
    storage: 10Gi
  accessModes:
    - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
  storageClassName: fast-ssd
  hostPath:
    path: /data/mysql

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: mysql-pvc
  namespace: microservices
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 10Gi
  storageClassName: fast-ssd

---
apiVersion: v1
kind: PersistentVolume
metadata:
  name: redis-pv
  namespace: microservices
spec:
  capacity:
    storage: 5Gi
  accessModes:
    - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
  storageClassName: fast-ssd
  hostPath:
    path: /data/redis

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: redis-pvc
  namespace: microservices
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 5Gi
  storageClassName: fast-ssd

2. 服务部署配置

MySQL 部署

mysql-deployment.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: mysql
  namespace: microservices
  labels:
    app: mysql
spec:
  replicas: 1
  selector:
    matchLabels:
      app: mysql
  template:
    metadata:
      labels:
        app: mysql
    spec:
      containers:
      - name: mysql
        image: mysql:8.0
        ports:
        - containerPort: 3306
        env:
        - name: MYSQL_ROOT_PASSWORD
          valueFrom:
            secretKeyRef:
              name: microservices-secrets
              key: mysql-root-password
        - name: MYSQL_DATABASE
          value: "microservices_db"
        - name: MYSQL_USER
          value: "microuser"
        - name: MYSQL_PASSWORD
          valueFrom:
            secretKeyRef:
              name: microservices-secrets
              key: mysql-user-password
        volumeMounts:
        - name: mysql-storage
          mountPath: /var/lib/mysql
        - name: mysql-config
          mountPath: /etc/mysql/conf.d
        resources:
          requests:
            memory: "512Mi"
            cpu: "250m"
          limits:
            memory: "1Gi"
            cpu: "500m"
        livenessProbe:
          exec:
            command:
            - mysqladmin
            - ping
            - -h
            - localhost
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
        readinessProbe:
          exec:
            command:
            - mysqladmin
            - ping
            - -h
            - localhost
          initialDelaySeconds: 10
          periodSeconds: 5
          timeoutSeconds: 3
      volumes:
      - name: mysql-storage
        persistentVolumeClaim:
          claimName: mysql-pvc
      - name: mysql-config
        configMap:
          name: mysql-config

---
apiVersion: v1
kind: Service
metadata:
  name: mysql
  namespace: microservices
  labels:
    app: mysql
spec:
  ports:
  - port: 3306
    targetPort: 3306
  selector:
    app: mysql
  type: ClusterIP

Redis 部署

redis-deployment.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: redis
  namespace: microservices
  labels:
    app: redis
spec:
  replicas: 1
  selector:
    matchLabels:
      app: redis
  template:
    metadata:
      labels:
        app: redis
    spec:
      containers:
      - name: redis
        image: redis:6.2-alpine
        ports:
        - containerPort: 6379
        command:
        - redis-server
        - --appendonly
        - "yes"
        volumeMounts:
        - name: redis-storage
          mountPath: /data
        resources:
          requests:
            memory: "256Mi"
            cpu: "125m"
          limits:
            memory: "512Mi"
            cpu: "250m"
        livenessProbe:
          exec:
            command:
            - redis-cli
            - ping
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          exec:
            command:
            - redis-cli
            - ping
          initialDelaySeconds: 5
          periodSeconds: 5
      volumes:
      - name: redis-storage
        persistentVolumeClaim:
          claimName: redis-pvc

---
apiVersion: v1
kind: Service
metadata:
  name: redis
  namespace: microservices
  labels:
    app: redis
spec:
  ports:
  - port: 6379
    targetPort: 6379
  selector:
    app: redis
  type: ClusterIP

Eureka Server 部署

eureka-deployment.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: eureka-server
  namespace: microservices
  labels:
    app: eureka-server
spec:
  replicas: 2
  selector:
    matchLabels:
      app: eureka-server
  template:
    metadata:
      labels:
        app: eureka-server
    spec:
      containers:
      - name: eureka-server
        image: microservices/eureka-server:latest
        ports:
        - containerPort: 8761
        env:
        - name: SPRING_PROFILES_ACTIVE
          value: "k8s"
        - name: EUREKA_INSTANCE_HOSTNAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        volumeMounts:
        - name: config-volume
          mountPath: /app/config
        resources:
          requests:
            memory: "512Mi"
            cpu: "250m"
          limits:
            memory: "1Gi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /actuator/health
            port: 8761
          initialDelaySeconds: 60
          periodSeconds: 30
          timeoutSeconds: 10
        readinessProbe:
          httpGet:
            path: /actuator/health
            port: 8761
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
      volumes:
      - name: config-volume
        configMap:
          name: microservices-config
      imagePullSecrets:
      - name: registry-secret

---
apiVersion: v1
kind: Service
metadata:
  name: eureka-server
  namespace: microservices
  labels:
    app: eureka-server
spec:
  ports:
  - port: 8761
    targetPort: 8761
  selector:
    app: eureka-server
  type: ClusterIP

---
apiVersion: v1
kind: Service
metadata:
  name: eureka-server-external
  namespace: microservices
  labels:
    app: eureka-server
spec:
  ports:
  - port: 8761
    targetPort: 8761
    nodePort: 30761
  selector:
    app: eureka-server
  type: NodePort

用户服务部署

user-service-deployment.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: user-service
  namespace: microservices
  labels:
    app: user-service
spec:
  replicas: 3
  selector:
    matchLabels:
      app: user-service
  template:
    metadata:
      labels:
        app: user-service
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8081"
        prometheus.io/path: "/actuator/prometheus"
    spec:
      containers:
      - name: user-service
        image: microservices/user-service:latest
        ports:
        - containerPort: 8081
        env:
        - name: SPRING_PROFILES_ACTIVE
          value: "k8s"
        - name: SPRING_DATASOURCE_URL
          value: "jdbc:mysql://mysql:3306/microservices_db"
        - name: SPRING_DATASOURCE_USERNAME
          value: "microuser"
        - name: SPRING_DATASOURCE_PASSWORD
          valueFrom:
            secretKeyRef:
              name: microservices-secrets
              key: mysql-user-password
        - name: SPRING_REDIS_HOST
          value: "redis"
        - name: EUREKA_CLIENT_SERVICE_URL_DEFAULTZONE
          value: "http://eureka-server:8761/eureka/"
        - name: JWT_SECRET
          valueFrom:
            secretKeyRef:
              name: microservices-secrets
              key: jwt-secret
        - name: HOSTNAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        volumeMounts:
        - name: config-volume
          mountPath: /app/config
        resources:
          requests:
            memory: "512Mi"
            cpu: "250m"
          limits:
            memory: "1Gi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /actuator/health
            port: 8081
          initialDelaySeconds: 90
          periodSeconds: 30
          timeoutSeconds: 10
          failureThreshold: 3
        readinessProbe:
          httpGet:
            path: /actuator/health
            port: 8081
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
      volumes:
      - name: config-volume
        configMap:
          name: microservices-config
      imagePullSecrets:
      - name: registry-secret
      initContainers:
      - name: wait-for-mysql
        image: busybox:1.35
        command: ['sh', '-c', 'until nc -z mysql 3306; do echo waiting for mysql; sleep 2; done;']
      - name: wait-for-eureka
        image: busybox:1.35
        command: ['sh', '-c', 'until nc -z eureka-server 8761; do echo waiting for eureka; sleep 2; done;']

---
apiVersion: v1
kind: Service
metadata:
  name: user-service
  namespace: microservices
  labels:
    app: user-service
spec:
  ports:
  - port: 8081
    targetPort: 8081
  selector:
    app: user-service
  type: ClusterIP

3. 水平扩缩容配置

HPA 配置

hpa.yaml

apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: user-service-hpa
  namespace: microservices
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: user-service
  minReplicas: 2
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 10
        periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 60
      policies:
      - type: Percent
        value: 50
        periodSeconds: 60
      - type: Pods
        value: 2
        periodSeconds: 60
      selectPolicy: Max

---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: order-service-hpa
  namespace: microservices
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: order-service
  minReplicas: 2
  maxReplicas: 8
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80

10.3 服务监控

1. Prometheus 监控配置

Prometheus 部署

prometheus-config.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: microservices
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      evaluation_interval: 15s
    
    rule_files:
      - "/etc/prometheus/rules/*.yml"
    
    alerting:
      alertmanagers:
        - static_configs:
            - targets:
              - alertmanager:9093
    
    scrape_configs:
      - job_name: 'prometheus'
        static_configs:
          - targets: ['localhost:9090']
      
      - job_name: 'kubernetes-pods'
        kubernetes_sd_configs:
          - role: pod
            namespaces:
              names:
                - microservices
        relabel_configs:
          - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
            action: keep
            regex: true
          - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
            action: replace
            target_label: __metrics_path__
            regex: (.+)
          - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
            action: replace
            regex: ([^:]+)(?::\d+)?;(\d+)
            replacement: $1:$2
            target_label: __address__
          - action: labelmap
            regex: __meta_kubernetes_pod_label_(.+)
          - source_labels: [__meta_kubernetes_namespace]
            action: replace
            target_label: kubernetes_namespace
          - source_labels: [__meta_kubernetes_pod_name]
            action: replace
            target_label: kubernetes_pod_name
      
      - job_name: 'kubernetes-services'
        kubernetes_sd_configs:
          - role: service
            namespaces:
              names:
                - microservices
        relabel_configs:
          - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
            action: keep
            regex: true
          - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
            action: replace
            target_label: __metrics_path__
            regex: (.+)
          - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
            action: replace
            regex: ([^:]+)(?::\d+)?;(\d+)
            replacement: $1:$2
            target_label: __address__
          - action: labelmap
            regex: __meta_kubernetes_service_label_(.+)
          - source_labels: [__meta_kubernetes_namespace]
            action: replace
            target_label: kubernetes_namespace
          - source_labels: [__meta_kubernetes_service_name]
            action: replace
            target_label: kubernetes_service_name

  alert_rules.yml: |
    groups:
      - name: microservices.rules
        rules:
          - alert: ServiceDown
            expr: up == 0
            for: 1m
            labels:
              severity: critical
            annotations:
              summary: "Service {{ $labels.instance }} is down"
              description: "{{ $labels.instance }} has been down for more than 1 minute."
          
          - alert: HighCPUUsage
            expr: (100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)) > 80
            for: 5m
            labels:
              severity: warning
            annotations:
              summary: "High CPU usage on {{ $labels.instance }}"
              description: "CPU usage is above 80% for more than 5 minutes."
          
          - alert: HighMemoryUsage
            expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
            for: 5m
            labels:
              severity: warning
            annotations:
              summary: "High memory usage on {{ $labels.instance }}"
              description: "Memory usage is above 85% for more than 5 minutes."
          
          - alert: HighErrorRate
            expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.1
            for: 2m
            labels:
              severity: critical
            annotations:
              summary: "High error rate on {{ $labels.instance }}"
              description: "Error rate is above 10% for more than 2 minutes."

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus
  namespace: microservices
  labels:
    app: prometheus
spec:
  replicas: 1
  selector:
    matchLabels:
      app: prometheus
  template:
    metadata:
      labels:
        app: prometheus
    spec:
      serviceAccountName: prometheus
      containers:
      - name: prometheus
        image: prom/prometheus:v2.40.0
        ports:
        - containerPort: 9090
        args:
          - '--config.file=/etc/prometheus/prometheus.yml'
          - '--storage.tsdb.path=/prometheus/'
          - '--web.console.libraries=/etc/prometheus/console_libraries'
          - '--web.console.templates=/etc/prometheus/consoles'
          - '--storage.tsdb.retention.time=15d'
          - '--web.enable-lifecycle'
        volumeMounts:
        - name: prometheus-config
          mountPath: /etc/prometheus
        - name: prometheus-storage
          mountPath: /prometheus
        resources:
          requests:
            memory: "1Gi"
            cpu: "500m"
          limits:
            memory: "2Gi"
            cpu: "1000m"
      volumes:
      - name: prometheus-config
        configMap:
          name: prometheus-config
      - name: prometheus-storage
        emptyDir: {}

---
apiVersion: v1
kind: Service
metadata:
  name: prometheus
  namespace: microservices
  labels:
    app: prometheus
spec:
  ports:
  - port: 9090
    targetPort: 9090
  selector:
    app: prometheus
  type: ClusterIP

---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus
  namespace: microservices

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: prometheus
rules:
- apiGroups: [""]
  resources:
  - nodes
  - nodes/proxy
  - services
  - endpoints
  - pods
  verbs: ["get", "list", "watch"]
- apiGroups:
  - extensions
  resources:
  - ingresses
  verbs: ["get", "list", "watch"]

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: prometheus
  namespace: microservices

2. Grafana 可视化

Grafana 部署

grafana-deployment.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: grafana-datasources
  namespace: microservices
data:
  prometheus.yaml: |
    apiVersion: 1
    datasources:
    - name: Prometheus
      type: prometheus
      url: http://prometheus:9090
      access: proxy
      isDefault: true

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: grafana
  namespace: microservices
  labels:
    app: grafana
spec:
  replicas: 1
  selector:
    matchLabels:
      app: grafana
  template:
    metadata:
      labels:
        app: grafana
    spec:
      containers:
      - name: grafana
        image: grafana/grafana:9.3.0
        ports:
        - containerPort: 3000
        env:
        - name: GF_SECURITY_ADMIN_PASSWORD
          value: "admin123"
        - name: GF_USERS_ALLOW_SIGN_UP
          value: "false"
        volumeMounts:
        - name: grafana-storage
          mountPath: /var/lib/grafana
        - name: grafana-datasources
          mountPath: /etc/grafana/provisioning/datasources
        resources:
          requests:
            memory: "256Mi"
            cpu: "125m"
          limits:
            memory: "512Mi"
            cpu: "250m"
      volumes:
      - name: grafana-storage
        emptyDir: {}
      - name: grafana-datasources
        configMap:
          name: grafana-datasources

---
apiVersion: v1
kind: Service
metadata:
  name: grafana
  namespace: microservices
  labels:
    app: grafana
spec:
  ports:
  - port: 3000
    targetPort: 3000
  selector:
    app: grafana
  type: ClusterIP

10.4 日志管理

1. ELK Stack 部署

Elasticsearch 部署

elasticsearch-deployment.yaml

apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: elasticsearch
  namespace: microservices
  labels:
    app: elasticsearch
spec:
  serviceName: elasticsearch
  replicas: 3
  selector:
    matchLabels:
      app: elasticsearch
  template:
    metadata:
      labels:
        app: elasticsearch
    spec:
      containers:
      - name: elasticsearch
        image: docker.elastic.co/elasticsearch/elasticsearch:8.5.0
        ports:
        - containerPort: 9200
        - containerPort: 9300
        env:
        - name: cluster.name
          value: "microservices-cluster"
        - name: node.name
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: discovery.seed_hosts
          value: "elasticsearch-0.elasticsearch,elasticsearch-1.elasticsearch,elasticsearch-2.elasticsearch"
        - name: cluster.initial_master_nodes
          value: "elasticsearch-0,elasticsearch-1,elasticsearch-2"
        - name: ES_JAVA_OPTS
          value: "-Xms1g -Xmx1g"
        - name: xpack.security.enabled
          value: "false"
        volumeMounts:
        - name: elasticsearch-data
          mountPath: /usr/share/elasticsearch/data
        resources:
          requests:
            memory: "2Gi"
            cpu: "500m"
          limits:
            memory: "4Gi"
            cpu: "1000m"
  volumeClaimTemplates:
  - metadata:
      name: elasticsearch-data
    spec:
      accessModes: ["ReadWriteOnce"]
      resources:
        requests:
          storage: 20Gi

---
apiVersion: v1
kind: Service
metadata:
  name: elasticsearch
  namespace: microservices
  labels:
    app: elasticsearch
spec:
  ports:
  - port: 9200
    targetPort: 9200
    name: http
  - port: 9300
    targetPort: 9300
    name: transport
  selector:
    app: elasticsearch
  clusterIP: None

Logstash 部署

logstash-deployment.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: logstash-config
  namespace: microservices
data:
  logstash.yml: |
    http.host: "0.0.0.0"
    path.config: /usr/share/logstash/pipeline
    
  logstash.conf: |
    input {
      beats {
        port => 5044
      }
    }
    
    filter {
      if [fields][service] {
        mutate {
          add_field => { "service_name" => "%{[fields][service]}" }
        }
      }
      
      # 解析Spring Boot日志格式
      if [service_name] =~ /.*-service/ {
        grok {
          match => { "message" => "%{TIMESTAMP_ISO8601:timestamp} \[%{DATA:thread}\] %{LOGLEVEL:level} \[%{DATA:trace_info}\] %{DATA:logger} - %{GREEDYDATA:log_message}" }
        }
        
        date {
          match => [ "timestamp", "yyyy-MM-dd HH:mm:ss" ]
        }
        
        # 解析trace信息
        if [trace_info] {
          grok {
            match => { "trace_info" => "%{DATA:trace_id},%{DATA:span_id}" }
          }
        }
      }
      
      # 添加环境标签
      mutate {
        add_field => { "environment" => "production" }
        add_field => { "cluster" => "microservices" }
      }
    }
    
    output {
      elasticsearch {
        hosts => ["elasticsearch:9200"]
        index => "microservices-logs-%{+YYYY.MM.dd}"
      }
      
      stdout {
        codec => rubydebug
      }
    }

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: logstash
  namespace: microservices
  labels:
    app: logstash
spec:
  replicas: 2
  selector:
    matchLabels:
      app: logstash
  template:
    metadata:
      labels:
        app: logstash
    spec:
      containers:
      - name: logstash
        image: docker.elastic.co/logstash/logstash:8.5.0
        ports:
        - containerPort: 5044
        env:
        - name: LS_JAVA_OPTS
          value: "-Xms1g -Xmx1g"
        volumeMounts:
        - name: logstash-config
          mountPath: /usr/share/logstash/config
        - name: logstash-pipeline
          mountPath: /usr/share/logstash/pipeline
        resources:
          requests:
            memory: "1Gi"
            cpu: "500m"
          limits:
            memory: "2Gi"
            cpu: "1000m"
      volumes:
      - name: logstash-config
        configMap:
          name: logstash-config
          items:
          - key: logstash.yml
            path: logstash.yml
      - name: logstash-pipeline
        configMap:
          name: logstash-config
          items:
          - key: logstash.conf
            path: logstash.conf

---
apiVersion: v1
kind: Service
metadata:
  name: logstash
  namespace: microservices
  labels:
    app: logstash
spec:
  ports:
  - port: 5044
    targetPort: 5044
  selector:
    app: logstash
  type: ClusterIP

Kibana 部署

kibana-deployment.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: kibana
  namespace: microservices
  labels:
    app: kibana
spec:
  replicas: 1
  selector:
    matchLabels:
      app: kibana
  template:
    metadata:
      labels:
        app: kibana
    spec:
      containers:
      - name: kibana
        image: docker.elastic.co/kibana/kibana:8.5.0
        ports:
        - containerPort: 5601
        env:
        - name: ELASTICSEARCH_HOSTS
          value: "http://elasticsearch:9200"
        - name: SERVER_NAME
          value: "kibana"
        - name: SERVER_HOST
          value: "0.0.0.0"
        resources:
          requests:
            memory: "1Gi"
            cpu: "500m"
          limits:
            memory: "2Gi"
            cpu: "1000m"
        readinessProbe:
          httpGet:
            path: /api/status
            port: 5601
          initialDelaySeconds: 30
          periodSeconds: 10
        livenessProbe:
          httpGet:
            path: /api/status
            port: 5601
          initialDelaySeconds: 60
          periodSeconds: 30

---
apiVersion: v1
kind: Service
metadata:
  name: kibana
  namespace: microservices
  labels:
    app: kibana
spec:
  ports:
  - port: 5601
    targetPort: 5601
  selector:
    app: kibana
  type: ClusterIP

2. Filebeat 日志收集

Filebeat 配置

filebeat-daemonset.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: filebeat-config
  namespace: microservices
data:
  filebeat.yml: |
    filebeat.inputs:
    - type: container
      paths:
        - /var/log/containers/*microservices*.log
      processors:
        - add_kubernetes_metadata:
            host: ${NODE_NAME}
            matchers:
            - logs_path:
                logs_path: "/var/log/containers/"
        - decode_json_fields:
            fields: ["message"]
            target: ""
            overwrite_keys: true
    
    output.logstash:
      hosts: ["logstash:5044"]
    
    logging.level: info
    logging.to_files: true
    logging.files:
      path: /var/log/filebeat
      name: filebeat
      keepfiles: 7
      permissions: 0644

---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: filebeat
  namespace: microservices
  labels:
    app: filebeat
spec:
  selector:
    matchLabels:
      app: filebeat
  template:
    metadata:
      labels:
        app: filebeat
    spec:
      serviceAccountName: filebeat
      terminationGracePeriodSeconds: 30
      hostNetwork: true
      dnsPolicy: ClusterFirstWithHostNet
      containers:
      - name: filebeat
        image: docker.elastic.co/beats/filebeat:8.5.0
        args: [
          "-c", "/etc/filebeat.yml",
          "-e",
        ]
        env:
        - name: NODE_NAME
          valueFrom:
            fieldRef:
              fieldPath: spec.nodeName
        securityContext:
          runAsUser: 0
        resources:
          limits:
            memory: 200Mi
            cpu: 100m
          requests:
            memory: 100Mi
            cpu: 50m
        volumeMounts:
        - name: config
          mountPath: /etc/filebeat.yml
          readOnly: true
          subPath: filebeat.yml
        - name: data
          mountPath: /usr/share/filebeat/data
        - name: varlibdockercontainers
          mountPath: /var/lib/docker/containers
          readOnly: true
        - name: varlog
          mountPath: /var/log
          readOnly: true
      volumes:
      - name: config
        configMap:
          defaultMode: 0640
          name: filebeat-config
      - name: varlibdockercontainers
        hostPath:
          path: /var/lib/docker/containers
      - name: varlog
        hostPath:
          path: /var/log
      - name: data
        hostPath:
          path: /var/lib/filebeat-data
          type: DirectoryOrCreate

---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: filebeat
  namespace: microservices

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: filebeat
rules:
- apiGroups: [""]
  resources:
  - namespaces
  - pods
  - nodes
  verbs:
  - get
  - watch
  - list

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: filebeat
subjects:
- kind: ServiceAccount
  name: filebeat
  namespace: microservices
roleRef:
  kind: ClusterRole
  name: filebeat
  apiGroup: rbac.authorization.k8s.io

10.5 CI/CD 流水线

1. Jenkins 流水线配置

Jenkinsfile

Jenkinsfile

pipeline {
    agent any
    
    environment {
        DOCKER_REGISTRY = 'registry.example.com'
        DOCKER_REPO = 'microservices'
        KUBECONFIG = credentials('kubeconfig')
        DOCKER_CREDENTIALS = credentials('docker-registry')
        SONAR_TOKEN = credentials('sonar-token')
    }
    
    stages {
        stage('Checkout') {
            steps {
                checkout scm
                script {
                    env.GIT_COMMIT_SHORT = sh(
                        script: "git rev-parse --short HEAD",
                        returnStdout: true
                    ).trim()
                    env.BUILD_VERSION = "${env.BUILD_NUMBER}-${env.GIT_COMMIT_SHORT}"
                }
            }
        }
        
        stage('Build') {
            parallel {
                stage('User Service') {
                    steps {
                        dir('user-service') {
                            sh 'mvn clean compile'
                        }
                    }
                }
                stage('Order Service') {
                    steps {
                        dir('order-service') {
                            sh 'mvn clean compile'
                        }
                    }
                }
                stage('API Gateway') {
                    steps {
                        dir('api-gateway') {
                            sh 'mvn clean compile'
                        }
                    }
                }
            }
        }
        
        stage('Test') {
            parallel {
                stage('Unit Tests') {
                    steps {
                        script {
                            def services = ['user-service', 'order-service', 'api-gateway']
                            services.each { service ->
                                dir(service) {
                                    sh 'mvn test'
                                    publishTestResults(
                                        testResultsPattern: 'target/surefire-reports/*.xml',
                                        allowEmptyResults: true
                                    )
                                }
                            }
                        }
                    }
                }
                
                stage('Integration Tests') {
                    steps {
                        sh 'mvn verify -Pintegration-test'
                        publishTestResults(
                            testResultsPattern: '**/target/failsafe-reports/*.xml',
                            allowEmptyResults: true
                        )
                    }
                }
            }
        }
        
        stage('Code Quality') {
            parallel {
                stage('SonarQube Analysis') {
                    steps {
                        withSonarQubeEnv('SonarQube') {
                            sh '''
                                mvn sonar:sonar \
                                    -Dsonar.projectKey=microservices \
                                    -Dsonar.host.url=$SONAR_HOST_URL \
                                    -Dsonar.login=$SONAR_TOKEN
                            '''
                        }
                    }
                }
                
                stage('Security Scan') {
                    steps {
                        sh 'mvn org.owasp:dependency-check-maven:check'
                        publishHTML([
                            allowMissing: false,
                            alwaysLinkToLastBuild: true,
                            keepAll: true,
                            reportDir: 'target',
                            reportFiles: 'dependency-check-report.html',
                            reportName: 'OWASP Dependency Check Report'
                        ])
                    }
                }
            }
        }
        
        stage('Quality Gate') {
            steps {
                timeout(time: 5, unit: 'MINUTES') {
                    waitForQualityGate abortPipeline: true
                }
            }
        }
        
        stage('Package') {
            parallel {
                stage('User Service') {
                    steps {
                        dir('user-service') {
                            sh 'mvn package -DskipTests'
                            archiveArtifacts artifacts: 'target/*.jar', fingerprint: true
                        }
                    }
                }
                stage('Order Service') {
                    steps {
                        dir('order-service') {
                            sh 'mvn package -DskipTests'
                            archiveArtifacts artifacts: 'target/*.jar', fingerprint: true
                        }
                    }
                }
                stage('API Gateway') {
                    steps {
                        dir('api-gateway') {
                            sh 'mvn package -DskipTests'
                            archiveArtifacts artifacts: 'target/*.jar', fingerprint: true
                        }
                    }
                }
            }
        }
        
        stage('Docker Build') {
            parallel {
                stage('User Service Image') {
                    steps {
                        dir('user-service') {
                            script {
                                def image = docker.build("${DOCKER_REGISTRY}/${DOCKER_REPO}/user-service:${BUILD_VERSION}")
                                docker.withRegistry("https://${DOCKER_REGISTRY}", DOCKER_CREDENTIALS) {
                                    image.push()
                                    image.push('latest')
                                }
                            }
                        }
                    }
                }
                stage('Order Service Image') {
                    steps {
                        dir('order-service') {
                            script {
                                def image = docker.build("${DOCKER_REGISTRY}/${DOCKER_REPO}/order-service:${BUILD_VERSION}")
                                docker.withRegistry("https://${DOCKER_REGISTRY}", DOCKER_CREDENTIALS) {
                                    image.push()
                                    image.push('latest')
                                }
                            }
                        }
                    }
                }
                stage('API Gateway Image') {
                    steps {
                        dir('api-gateway') {
                            script {
                                def image = docker.build("${DOCKER_REGISTRY}/${DOCKER_REPO}/api-gateway:${BUILD_VERSION}")
                                docker.withRegistry("https://${DOCKER_REGISTRY}", DOCKER_CREDENTIALS) {
                                    image.push()
                                    image.push('latest')
                                }
                            }
                        }
                    }
                }
            }
        }
        
        stage('Security Scan Images') {
            parallel {
                stage('Trivy Scan') {
                    steps {
                        script {
                            def services = ['user-service', 'order-service', 'api-gateway']
                            services.each { service ->
                                sh """
                                    trivy image --format json --output ${service}-scan.json \
                                        ${DOCKER_REGISTRY}/${DOCKER_REPO}/${service}:${BUILD_VERSION}
                                """
                            }
                        }
                        publishHTML([
                            allowMissing: false,
                            alwaysLinkToLastBuild: true,
                            keepAll: true,
                            reportDir: '.',
                            reportFiles: '*-scan.json',
                            reportName: 'Trivy Security Scan Report'
                        ])
                    }
                }
            }
        }
        
        stage('Deploy to Staging') {
            when {
                branch 'develop'
            }
            steps {
                script {
                    sh """
                        helm upgrade --install microservices-staging ./helm/microservices \
                            --namespace microservices-staging \
                            --create-namespace \
                            --set image.tag=${BUILD_VERSION} \
                            --set environment=staging \
                            --wait --timeout=600s
                    """
                }
                
                // 运行冒烟测试
                sh 'mvn test -Dtest=SmokeTest -Dtest.environment=staging'
            }
        }
        
        stage('Deploy to Production') {
            when {
                branch 'main'
            }
            steps {
                script {
                    // 蓝绿部署
                    def currentColor = sh(
                        script: "kubectl get service api-gateway -n microservices -o jsonpath='{.spec.selector.color}'",
                        returnStdout: true
                    ).trim()
                    
                    def newColor = currentColor == 'blue' ? 'green' : 'blue'
                    
                    echo "Current color: ${currentColor}, deploying to: ${newColor}"
                    
                    // 部署新版本
                    sh """
                        helm upgrade --install microservices-${newColor} ./helm/microservices \
                            --namespace microservices \
                            --set image.tag=${BUILD_VERSION} \
                            --set deployment.color=${newColor} \
                            --set environment=production \
                            --wait --timeout=600s
                    """
                    
                    // 健康检查
                    sh """
                        kubectl wait --for=condition=ready pod \
                            -l app=user-service,color=${newColor} \
                            -n microservices --timeout=300s
                        kubectl wait --for=condition=ready pod \
                            -l app=order-service,color=${newColor} \
                            -n microservices --timeout=300s
                    """
                    
                    // 运行生产环境测试
                    sh 'mvn test -Dtest=ProductionTest -Dtest.environment=production'
                    
                    // 切换流量
                    input message: 'Switch traffic to new version?', ok: 'Deploy'
                    
                    sh """
                        kubectl patch service api-gateway -n microservices \
                            -p '{"spec":{"selector":{"color":"${newColor}"}}}'
                    """
                    
                    // 清理旧版本
                    sh "helm uninstall microservices-${currentColor} -n microservices || true"
                }
            }
        }
    }
    
    post {
        always {
            // 清理工作空间
            cleanWs()
        }
        
        success {
            // 发送成功通知
            slackSend(
                channel: '#deployments',
                color: 'good',
                message: "✅ Pipeline succeeded for ${env.JOB_NAME} - ${env.BUILD_NUMBER}"
            )
        }
        
        failure {
            // 发送失败通知
            slackSend(
                channel: '#deployments',
                color: 'danger',
                message: "❌ Pipeline failed for ${env.JOB_NAME} - ${env.BUILD_NUMBER}"
            )
        }
    }
}

2. GitLab CI/CD 配置

.gitlab-ci.yml

.gitlab-ci.yml

stages:
  - build
  - test
  - quality
  - package
  - security
  - deploy-staging
  - deploy-production

variables:
  DOCKER_REGISTRY: registry.gitlab.com
  DOCKER_REPO: microservices
  MAVEN_OPTS: "-Dmaven.repo.local=$CI_PROJECT_DIR/.m2/repository"
  MAVEN_CLI_OPTS: "--batch-mode --errors --fail-at-end --show-version"

cache:
  paths:
    - .m2/repository/
    - target/

# 构建阶段
build:
  stage: build
  image: maven:3.8.4-openjdk-11
  script:
    - mvn $MAVEN_CLI_OPTS clean compile
  artifacts:
    paths:
      - target/
    expire_in: 1 hour

# 单元测试
unit-test:
  stage: test
  image: maven:3.8.4-openjdk-11
  script:
    - mvn $MAVEN_CLI_OPTS test
  artifacts:
    reports:
      junit:
        - "**/target/surefire-reports/TEST-*.xml"
    paths:
      - target/site/jacoco/
    expire_in: 1 week
  coverage: '/Total.*?([0-9]{1,3})%/'

# 集成测试
integration-test:
  stage: test
  image: maven:3.8.4-openjdk-11
  services:
    - mysql:8.0
    - redis:6.2
  variables:
    MYSQL_ROOT_PASSWORD: rootpassword
    MYSQL_DATABASE: test_db
    SPRING_PROFILES_ACTIVE: test
  script:
    - mvn $MAVEN_CLI_OPTS verify -Pintegration-test
  artifacts:
    reports:
      junit:
        - "**/target/failsafe-reports/TEST-*.xml"
    expire_in: 1 week

# 代码质量检查
sonarqube-check:
  stage: quality
  image: maven:3.8.4-openjdk-11
  script:
    - mvn $MAVEN_CLI_OPTS sonar:sonar
      -Dsonar.projectKey=$CI_PROJECT_NAME
      -Dsonar.host.url=$SONAR_HOST_URL
      -Dsonar.login=$SONAR_TOKEN
  only:
    - main
    - develop

# 安全扫描
security-scan:
  stage: quality
  image: maven:3.8.4-openjdk-11
  script:
    - mvn $MAVEN_CLI_OPTS org.owasp:dependency-check-maven:check
  artifacts:
    reports:
      dependency_scanning: target/dependency-check-report.json
    paths:
      - target/dependency-check-report.html
    expire_in: 1 week

# 打包阶段
package:
  stage: package
  image: maven:3.8.4-openjdk-11
  script:
    - mvn $MAVEN_CLI_OPTS package -DskipTests
  artifacts:
    paths:
      - "**/target/*.jar"
    expire_in: 1 week

# Docker镜像构建
.docker-build: &docker-build
  stage: package
  image: docker:20.10.16
  services:
    - docker:20.10.16-dind
  before_script:
    - echo $CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY
  script:
    - cd $SERVICE_NAME
    - docker build -t $CI_REGISTRY_IMAGE/$SERVICE_NAME:$CI_COMMIT_SHA .
    - docker build -t $CI_REGISTRY_IMAGE/$SERVICE_NAME:latest .
    - docker push $CI_REGISTRY_IMAGE/$SERVICE_NAME:$CI_COMMIT_SHA
    - docker push $CI_REGISTRY_IMAGE/$SERVICE_NAME:latest

build-user-service:
  <<: *docker-build
  variables:
    SERVICE_NAME: user-service

build-order-service:
  <<: *docker-build
  variables:
    SERVICE_NAME: order-service

build-api-gateway:
  <<: *docker-build
  variables:
    SERVICE_NAME: api-gateway

# 容器安全扫描
container-security-scan:
  stage: security
  image: aquasec/trivy:latest
  script:
    - trivy image --format template --template "@contrib/gitlab.tpl" 
        --output gl-container-scanning-report.json 
        $CI_REGISTRY_IMAGE/user-service:$CI_COMMIT_SHA
    - trivy image --format template --template "@contrib/gitlab.tpl" 
        --output gl-container-scanning-order.json 
        $CI_REGISTRY_IMAGE/order-service:$CI_COMMIT_SHA
    - trivy image --format template --template "@contrib/gitlab.tpl" 
        --output gl-container-scanning-gateway.json 
        $CI_REGISTRY_IMAGE/api-gateway:$CI_COMMIT_SHA
  artifacts:
    reports:
      container_scanning:
        - gl-container-scanning-*.json
    expire_in: 1 week

# 部署到测试环境
deploy-staging:
  stage: deploy-staging
  image: bitnami/kubectl:latest
  environment:
    name: staging
    url: https://staging.microservices.example.com
  script:
    - kubectl config use-context $KUBE_CONTEXT_STAGING
    - helm upgrade --install microservices-staging ./helm/microservices
        --namespace microservices-staging
        --create-namespace
        --set image.tag=$CI_COMMIT_SHA
        --set environment=staging
        --wait --timeout=600s
    # 运行冒烟测试
    - mvn test -Dtest=SmokeTest -Dtest.environment=staging
  only:
    - develop

# 部署到生产环境
deploy-production:
  stage: deploy-production
  image: bitnami/kubectl:latest
  environment:
    name: production
    url: https://microservices.example.com
  script:
    - kubectl config use-context $KUBE_CONTEXT_PRODUCTION
    # 蓝绿部署逻辑
    - ./scripts/blue-green-deploy.sh $CI_COMMIT_SHA
  when: manual
  only:
    - main

3. Helm Chart 配置

Chart.yaml

helm/microservices/Chart.yaml

apiVersion: v2
name: microservices
description: A Helm chart for microservices application
type: application
version: 0.1.0
appVersion: "1.0.0"

dependencies:
  - name: mysql
    version: 9.4.1
    repository: https://charts.bitnami.com/bitnami
    condition: mysql.enabled
  - name: redis
    version: 17.3.7
    repository: https://charts.bitnami.com/bitnami
    condition: redis.enabled

values.yaml

helm/microservices/values.yaml

# 全局配置
global:
  imageRegistry: registry.example.com
  imagePullSecrets:
    - name: registry-secret

# 镜像配置
image:
  repository: microservices
  tag: latest
  pullPolicy: IfNotPresent

# 环境配置
environment: production

# 部署配置
deployment:
  color: blue
  replicaCount: 3
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0

# 资源配置
resources:
  userService:
    requests:
      memory: "512Mi"
      cpu: "250m"
    limits:
      memory: "1Gi"
      cpu: "500m"
  orderService:
    requests:
      memory: "512Mi"
      cpu: "250m"
    limits:
      memory: "1Gi"
      cpu: "500m"
  apiGateway:
    requests:
      memory: "512Mi"
      cpu: "250m"
    limits:
      memory: "1Gi"
      cpu: "500m"

# 自动扩缩容配置
autoscaling:
  enabled: true
  minReplicas: 2
  maxReplicas: 10
  targetCPUUtilizationPercentage: 70
  targetMemoryUtilizationPercentage: 80

# 服务配置
service:
  type: ClusterIP
  ports:
    userService: 8081
    orderService: 8082
    apiGateway: 8080

# Ingress配置
ingress:
  enabled: true
  className: nginx
  annotations:
    nginx.ingress.kubernetes.io/rewrite-target: /
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
    cert-manager.io/cluster-issuer: letsencrypt-prod
  hosts:
    - host: microservices.example.com
      paths:
        - path: /
          pathType: Prefix
          service: api-gateway
  tls:
    - secretName: microservices-tls
      hosts:
        - microservices.example.com

# 配置管理
configMap:
  data:
    spring.profiles.active: k8s
    logging.level.com.example: INFO
    management.endpoints.web.exposure.include: health,info,metrics,prometheus

# 密钥管理
secrets:
  mysql:
    rootPassword: rootpassword
    userPassword: micropass
  jwt:
    secret: myVerySecretKeyThatIsAtLeast256BitsLongForHS512Algorithm

# MySQL配置
mysql:
  enabled: true
  auth:
    rootPassword: rootpassword
    database: microservices_db
    username: microuser
    password: micropass
  primary:
    persistence:
      enabled: true
      size: 10Gi
      storageClass: fast-ssd

# Redis配置
redis:
  enabled: true
  auth:
    enabled: false
  master:
    persistence:
      enabled: true
      size: 5Gi
      storageClass: fast-ssd

# 监控配置
monitoring:
  prometheus:
    enabled: true
    scrape: true
  grafana:
    enabled: true

# 日志配置
logging:
  level: INFO
  pattern: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level [%X{traceId},%X{spanId}] %logger{36} - %msg%n"

10.6 性能优化

1. JVM 调优

JVM 参数配置

application-production.yml

# JVM调优配置
spring:
  application:
    name: ${SERVICE_NAME:unknown}
  
# 服务器配置
server:
  tomcat:
    threads:
      max: 200
      min-spare: 10
    connection-timeout: 20000
    max-connections: 8192
    accept-count: 100
  compression:
    enabled: true
    mime-types: text/html,text/xml,text/plain,text/css,text/javascript,application/javascript,application/json
    min-response-size: 1024

# 连接池配置
spring:
  datasource:
    hikari:
      maximum-pool-size: 20
      minimum-idle: 5
      idle-timeout: 300000
      max-lifetime: 1800000
      connection-timeout: 30000
      validation-timeout: 5000
      leak-detection-threshold: 60000
  
  redis:
    lettuce:
      pool:
        max-active: 20
        max-idle: 8
        min-idle: 2
        max-wait: 2000ms
      shutdown-timeout: 100ms

# 缓存配置
spring:
  cache:
    type: redis
    redis:
      time-to-live: 600000
      cache-null-values: false

# 异步配置
spring:
  task:
    execution:
      pool:
        core-size: 8
        max-size: 20
        queue-capacity: 100
        keep-alive: 60s
      thread-name-prefix: async-task-
    scheduling:
      pool:
        size: 5
      thread-name-prefix: scheduled-task-

# 监控配置
management:
  endpoints:
    web:
      exposure:
        include: health,info,metrics,prometheus,env,configprops
  endpoint:
    health:
      show-details: when-authorized
      probes:
        enabled: true
  metrics:
    export:
      prometheus:
        enabled: true
    distribution:
      percentiles-histogram:
        http.server.requests: true
      percentiles:
        http.server.requests: 0.5, 0.9, 0.95, 0.99
  health:
    circuitbreakers:
      enabled: true
    ratelimiters:
      enabled: true

启动脚本优化

optimized-entrypoint.sh

#!/bin/bash

# 设置JVM参数
JAVA_OPTS="${JAVA_OPTS} \
    -server \
    -Xms${HEAP_MIN:-512m} \
    -Xmx${HEAP_MAX:-1024m} \
    -XX:NewRatio=2 \
    -XX:SurvivorRatio=8 \
    -XX:+UseG1GC \
    -XX:MaxGCPauseMillis=200 \
    -XX:G1HeapRegionSize=16m \
    -XX:+UseG1MixedGCCountTarget=8 \
    -XX:+UseStringDeduplication \
    -XX:+OptimizeStringConcat \
    -XX:+UseCompressedOops \
    -XX:+UseCompressedClassPointers \
    -XX:+UnlockExperimentalVMOptions \
    -XX:+UseCGroupMemoryLimitForHeap \
    -XX:+UseContainerSupport \
    -XX:InitialRAMPercentage=50.0 \
    -XX:MaxRAMPercentage=80.0 \
    -Djava.security.egd=file:/dev/./urandom \
    -Djava.awt.headless=true \
    -Dfile.encoding=UTF-8 \
    -Duser.timezone=Asia/Shanghai"

# GC日志配置
if [ "${ENABLE_GC_LOG:-true}" = "true" ]; then
    JAVA_OPTS="${JAVA_OPTS} \
        -XX:+PrintGC \
        -XX:+PrintGCDetails \
        -XX:+PrintGCTimeStamps \
        -XX:+PrintGCApplicationStoppedTime \
        -XX:+PrintStringDeduplicationStatistics \
        -Xloggc:/app/logs/gc.log \
        -XX:+UseGCLogFileRotation \
        -XX:NumberOfGCLogFiles=5 \
        -XX:GCLogFileSize=10M"
fi

# 性能分析配置
if [ "${ENABLE_PROFILING:-false}" = "true" ]; then
    JAVA_OPTS="${JAVA_OPTS} \
        -XX:+FlightRecorder \
        -XX:StartFlightRecording=duration=60s,filename=/app/logs/flight-recording.jfr"
fi

# 调试配置
if [ "${ENABLE_DEBUG:-false}" = "true" ]; then
    JAVA_OPTS="${JAVA_OPTS} \
        -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"
fi

# 等待依赖服务
echo "Waiting for dependencies..."

# 等待数据库
if [ -n "${DB_HOST}" ] && [ -n "${DB_PORT}" ]; then
    while ! nc -z "${DB_HOST}" "${DB_PORT}"; do
        echo "Waiting for database at ${DB_HOST}:${DB_PORT}..."
        sleep 2
    done
fi

# 等待Redis
if [ -n "${REDIS_HOST}" ] && [ -n "${REDIS_PORT:-6379}" ]; then
    while ! nc -z "${REDIS_HOST}" "${REDIS_PORT:-6379}"; do
        echo "Waiting for Redis at ${REDIS_HOST}:${REDIS_PORT:-6379}..."
        sleep 2
    done
fi

# 等待Eureka
if [ -n "${EUREKA_HOST}" ] && [ -n "${EUREKA_PORT:-8761}" ]; then
    while ! nc -z "${EUREKA_HOST}" "${EUREKA_PORT:-8761}"; do
        echo "Waiting for Eureka at ${EUREKA_HOST}:${EUREKA_PORT:-8761}..."
        sleep 2
    done
fi

echo "Dependencies are ready. Starting application..."
echo "Java options: ${JAVA_OPTS}"

# 启动应用
exec java ${JAVA_OPTS} -jar app.jar

2. 数据库优化

MySQL 配置优化

mysql-optimized.cnf

[mysqld]
# 基础配置
default-authentication-plugin=mysql_native_password
character-set-server=utf8mb4
collation-server=utf8mb4_unicode_ci
skip-character-set-client-handshake

# 连接配置
max_connections=500
max_connect_errors=1000
connect_timeout=10
wait_timeout=28800
interactive_timeout=28800

# 缓冲区配置
innodb_buffer_pool_size=1G
innodb_buffer_pool_instances=4
innodb_log_buffer_size=16M
key_buffer_size=256M
read_buffer_size=2M
read_rnd_buffer_size=4M
sort_buffer_size=4M
join_buffer_size=4M

# InnoDB配置
innodb_file_per_table=1
innodb_flush_log_at_trx_commit=2
innodb_log_file_size=256M
innodb_log_files_in_group=2
innodb_flush_method=O_DIRECT
innodb_io_capacity=200
innodb_io_capacity_max=400
innodb_read_io_threads=8
innodb_write_io_threads=8
innodb_thread_concurrency=16
innodb_lock_wait_timeout=50

# 查询缓存
query_cache_type=1
query_cache_size=128M
query_cache_limit=2M

# 临时表配置
tmp_table_size=128M
max_heap_table_size=128M

# 二进制日志
log-bin=mysql-bin
binlog_format=ROW
expire_logs_days=7
max_binlog_size=100M

# 慢查询日志
slow_query_log=1
slow_query_log_file=/var/log/mysql/slow.log
long_query_time=2
log_queries_not_using_indexes=1

# 错误日志
log-error=/var/log/mysql/error.log

# 性能监控
performance_schema=ON

10.7 故障排查

1. 常见问题诊断

健康检查脚本

health-check.sh

#!/bin/bash

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# 配置
NAMESPACE="microservices"
SERVICES=("user-service" "order-service" "api-gateway" "eureka-server")
INFRA_SERVICES=("mysql" "redis")

echo "=== 微服务健康检查 ==="
echo "检查时间: $(date)"
echo "命名空间: ${NAMESPACE}"
echo

# 检查命名空间
echo "1. 检查命名空间..."
if kubectl get namespace ${NAMESPACE} &>/dev/null; then
    echo -e "${GREEN}✓${NC} 命名空间 ${NAMESPACE} 存在"
else
    echo -e "${RED}✗${NC} 命名空间 ${NAMESPACE} 不存在"
    exit 1
fi
echo

# 检查基础设施服务
echo "2. 检查基础设施服务..."
for service in "${INFRA_SERVICES[@]}"; do
    echo "检查 ${service}..."
    
    # 检查Pod状态
    pod_status=$(kubectl get pods -n ${NAMESPACE} -l app=${service} -o jsonpath='{.items[0].status.phase}' 2>/dev/null)
    if [ "${pod_status}" = "Running" ]; then
        echo -e "  ${GREEN}✓${NC} Pod状态: Running"
    else
        echo -e "  ${RED}✗${NC} Pod状态: ${pod_status:-NotFound}"
    fi
    
    # 检查服务可达性
    if kubectl get service ${service} -n ${NAMESPACE} &>/dev/null; then
        echo -e "  ${GREEN}✓${NC} 服务存在"
        
        # 检查端口连通性
        case ${service} in
            "mysql")
                port=3306
                ;;
            "redis")
                port=6379
                ;;
        esac
        
        if kubectl exec -n ${NAMESPACE} deployment/${service} -- nc -z localhost ${port} &>/dev/null; then
            echo -e "  ${GREEN}✓${NC} 端口 ${port} 可达"
        else
            echo -e "  ${RED}✗${NC} 端口 ${port} 不可达"
        fi
    else
        echo -e "  ${RED}✗${NC} 服务不存在"
    fi
    echo
done

# 检查微服务
echo "3. 检查微服务..."
for service in "${SERVICES[@]}"; do
    echo "检查 ${service}..."
    
    # 检查Deployment
    if kubectl get deployment ${service} -n ${NAMESPACE} &>/dev/null; then
        echo -e "  ${GREEN}✓${NC} Deployment存在"
        
        # 检查副本数
        desired=$(kubectl get deployment ${service} -n ${NAMESPACE} -o jsonpath='{.spec.replicas}')
        ready=$(kubectl get deployment ${service} -n ${NAMESPACE} -o jsonpath='{.status.readyReplicas}')
        
        if [ "${ready}" = "${desired}" ]; then
            echo -e "  ${GREEN}✓${NC} 副本状态: ${ready}/${desired}"
        else
            echo -e "  ${YELLOW}!${NC} 副本状态: ${ready:-0}/${desired}"
        fi
        
        # 检查Pod状态
        pod_count=$(kubectl get pods -n ${NAMESPACE} -l app=${service} --field-selector=status.phase=Running --no-headers | wc -l)
        if [ ${pod_count} -gt 0 ]; then
            echo -e "  ${GREEN}✓${NC} 运行中的Pod: ${pod_count}"
        else
            echo -e "  ${RED}✗${NC} 没有运行中的Pod"
        fi
        
        # 检查健康状态
        case ${service} in
            "eureka-server")
                port=8761
                health_path="/actuator/health"
                ;;
            "api-gateway")
                port=8080
                health_path="/actuator/health"
                ;;
            "user-service")
                port=8081
                health_path="/actuator/health"
                ;;
            "order-service")
                port=8082
                health_path="/actuator/health"
                ;;
        esac
        
        if kubectl exec -n ${NAMESPACE} deployment/${service} -- curl -f http://localhost:${port}${health_path} &>/dev/null; then
            echo -e "  ${GREEN}✓${NC} 健康检查通过"
        else
            echo -e "  ${RED}✗${NC} 健康检查失败"
        fi
    else
        echo -e "  ${RED}✗${NC} Deployment不存在"
    fi
    echo
done

# 检查网络连通性
echo "4. 检查网络连通性..."
echo "检查服务间通信..."

# 从API网关访问用户服务
if kubectl exec -n ${NAMESPACE} deployment/api-gateway -- curl -f http://user-service:8081/actuator/health &>/dev/null; then
    echo -e "  ${GREEN}✓${NC} API网关 -> 用户服务"
else
    echo -e "  ${RED}✗${NC} API网关 -> 用户服务"
fi

# 从API网关访问订单服务
if kubectl exec -n ${NAMESPACE} deployment/api-gateway -- curl -f http://order-service:8082/actuator/health &>/dev/null; then
    echo -e "  ${GREEN}✓${NC} API网关 -> 订单服务"
else
    echo -e "  ${RED}✗${NC} API网关 -> 订单服务"
fi

# 从用户服务访问数据库
if kubectl exec -n ${NAMESPACE} deployment/user-service -- nc -z mysql 3306 &>/dev/null; then
    echo -e "  ${GREEN}✓${NC} 用户服务 -> MySQL"
else
    echo -e "  ${RED}✗${NC} 用户服务 -> MySQL"
fi

# 从订单服务访问Redis
if kubectl exec -n ${NAMESPACE} deployment/order-service -- nc -z redis 6379 &>/dev/null; then
    echo -e "  ${GREEN}✓${NC} 订单服务 -> Redis"
else
    echo -e "  ${RED}✗${NC} 订单服务 -> Redis"
fi
echo

# 检查资源使用情况
echo "5. 检查资源使用情况..."
echo "CPU和内存使用情况:"
kubectl top pods -n ${NAMESPACE} 2>/dev/null || echo "  metrics-server未安装或不可用"
echo

# 检查事件
echo "6. 检查最近事件..."
echo "最近的Warning事件:"
kubectl get events -n ${NAMESPACE} --field-selector type=Warning --sort-by='.lastTimestamp' | tail -5
echo

echo "=== 健康检查完成 ==="

2. 日志分析工具

日志聚合脚本

log-analyzer.sh

#!/bin/bash

# 配置
NAMESPACE="microservices"
LOG_LINES=100
TIME_RANGE="1h"

# 函数:获取服务日志
get_service_logs() {
    local service=$1
    local lines=${2:-$LOG_LINES}
    
    echo "=== ${service} 日志 ==="
    kubectl logs -n ${NAMESPACE} deployment/${service} --tail=${lines} --since=${TIME_RANGE}
    echo
}

# 函数:分析错误日志
analyze_errors() {
    local service=$1
    
    echo "=== ${service} 错误分析 ==="
    kubectl logs -n ${NAMESPACE} deployment/${service} --since=${TIME_RANGE} | \
        grep -i "error\|exception\|failed\|timeout" | \
        sort | uniq -c | sort -nr
    echo
}

# 函数:分析性能指标
analyze_performance() {
    local service=$1
    
    echo "=== ${service} 性能分析 ==="
    kubectl logs -n ${NAMESPACE} deployment/${service} --since=${TIME_RANGE} | \
        grep -E "took [0-9]+ms|duration=[0-9]+ms|elapsed=[0-9]+ms" | \
        tail -10
    echo
}

# 主程序
echo "微服务日志分析工具"
echo "时间范围: ${TIME_RANGE}"
echo "命名空间: ${NAMESPACE}"
echo

case ${1:-"all"} in
    "user-service"|"order-service"|"api-gateway"|"eureka-server")
        get_service_logs $1
        analyze_errors $1
        analyze_performance $1
        ;;
    "errors")
        for service in user-service order-service api-gateway eureka-server; do
            analyze_errors $service
        done
        ;;
    "performance")
        for service in user-service order-service api-gateway eureka-server; do
            analyze_performance $service
        done
        ;;
    "all")
        for service in user-service order-service api-gateway eureka-server; do
            get_service_logs $service 50
            analyze_errors $service
            analyze_performance $service
        done
        ;;
    *)
        echo "用法: $0 [service-name|errors|performance|all]"
        echo "服务名称: user-service, order-service, api-gateway, eureka-server"
        exit 1
        ;;
esac

总结

本章详细介绍了微服务的部署与运维,涵盖了以下核心内容:

核心概念

  1. 容器化部署 - Docker容器化、多阶段构建优化
  2. Kubernetes部署 - K8s资源配置、服务部署、水平扩缩容
  3. 服务监控 - Prometheus监控、Grafana可视化
  4. 日志管理 - ELK Stack部署、日志收集与分析
  5. CI/CD流水线 - Jenkins/GitLab CI配置、自动化部署
  6. 性能优化 - JVM调优、数据库优化
  7. 故障排查 - 健康检查、日志分析

最佳实践

  1. 部署策略 - 蓝绿部署、滚动更新、金丝雀发布
  2. 监控体系 - 全链路监控、告警机制、性能指标
  3. 日志管理 - 结构化日志、集中收集、实时分析
  4. 自动化运维 - 自动化部署、自动扩缩容、自愈机制
  5. 安全防护 - 镜像安全扫描、网络隔离、访问控制

注意事项

  1. 资源管理 - 合理配置资源限制、避免资源竞争
  2. 配置管理 - 环境隔离、配置版本控制、敏感信息保护
  3. 依赖管理 - 服务依赖检查、启动顺序控制
  4. 故障处理 - 快速定位、自动恢复、降级策略

扩展方向

  1. 高级功能 - 服务网格(Istio)、多集群管理、边缘计算
  2. 运维工具 - GitOps、Operator模式、混沌工程
  3. 云原生 - Serverless、FaaS、云原生数据库

通过本章的学习,你已经掌握了微服务的完整部署与运维体系。下一章我们将学习微服务的高级主题和最佳实践。