14.1 概述

Kubernetes作为容器编排的事实标准,正在不断演进以适应新的技术趋势和业务需求。本章将探讨Kubernetes的未来发展方向,包括边缘计算、AI/ML工作负载、WebAssembly集成、多云和混合云、可持续发展等前沿技术和趋势。

14.1.1 技术发展趋势

graph TB
    A[Kubernetes核心] --> B[边缘计算]
    A --> C[AI/ML工作负载]
    A --> D[WebAssembly]
    A --> E[多云混合云]
    A --> F[可持续发展]
    A --> G[安全增强]
    A --> H[开发者体验]
    
    B --> B1[边缘节点管理]
    B --> B2[离线运行]
    B --> B3[低延迟应用]
    
    C --> C1[GPU调度]
    C --> C2[模型服务]
    C --> C3[分布式训练]
    
    D --> D1[轻量级运行时]
    D --> D2[多语言支持]
    D --> D3[安全沙箱]
    
    E --> E1[联邦集群]
    E --> E2[跨云迁移]
    E --> E3[统一管理]
    
    F --> F1[绿色计算]
    F --> F2[资源优化]
    F --> F3[碳中和]
    
    G --> G1[零信任]
    G --> G2[供应链安全]
    G --> G3[运行时保护]
    
    H --> H1[简化部署]
    H --> H2[可视化工具]
    H --> H3[自动化运维]

14.2 边缘计算和Kubernetes

14.2.1 边缘计算架构

边缘计算将计算能力推向网络边缘,Kubernetes在边缘计算中扮演重要角色。

# 边缘节点配置
apiVersion: v1
kind: Node
metadata:
  name: edge-node-001
  labels:
    node-type: edge
    location: beijing-datacenter
    hardware: arm64
    network-zone: zone-a
spec:
  # 边缘节点特殊配置
  taints:
  - key: edge-node
    value: "true"
    effect: NoSchedule
  # 资源限制
  capacity:
    cpu: "4"
    memory: "8Gi"
    storage: "100Gi"
    nvidia.com/gpu: "1"
---
# 边缘工作负载调度策略
apiVersion: v1
kind: Pod
metadata:
  name: edge-application
  namespace: edge-apps
spec:
  nodeSelector:
    node-type: edge
    location: beijing-datacenter
  tolerations:
  - key: edge-node
    operator: Equal
    value: "true"
    effect: NoSchedule
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
        - matchExpressions:
          - key: network-zone
            operator: In
            values: ["zone-a", "zone-b"]
  containers:
  - name: edge-app
    image: edge-app:v1.0
    resources:
      requests:
        cpu: 100m
        memory: 128Mi
      limits:
        cpu: 500m
        memory: 512Mi
    env:
    - name: EDGE_MODE
      value: "true"
    - name: OFFLINE_CAPABLE
      value: "true"

14.2.2 K3s轻量级Kubernetes

#!/bin/bash
# K3s边缘Kubernetes部署脚本

echo "=== K3s边缘Kubernetes部署 ==="

# 安装K3s服务器
install_k3s_server() {
    echo "安装K3s服务器..."
    
    # 下载并安装K3s
    curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="--disable traefik --disable servicelb" sh -
    
    # 等待K3s启动
    echo "等待K3s启动..."
    sleep 30
    
    # 检查状态
    sudo k3s kubectl get nodes
    
    # 获取节点token
    sudo cat /var/lib/rancher/k3s/server/node-token
}

# 安装K3s代理节点
install_k3s_agent() {
    local server_ip=$1
    local node_token=$2
    
    echo "安装K3s代理节点..."
    
    # 安装代理节点
    curl -sfL https://get.k3s.io | K3S_URL=https://${server_ip}:6443 K3S_TOKEN=${node_token} sh -
    
    echo "K3s代理节点安装完成"
}

# 配置边缘应用
configure_edge_apps() {
    echo "配置边缘应用..."
    
    # 创建边缘应用命名空间
    cat <<EOF | sudo k3s kubectl apply -f -
apiVersion: v1
kind: Namespace
metadata:
  name: edge-apps
  labels:
    type: edge
---
# 边缘应用部署
apiVersion: apps/v1
kind: Deployment
metadata:
  name: edge-sensor-app
  namespace: edge-apps
spec:
  replicas: 1
  selector:
    matchLabels:
      app: edge-sensor
  template:
    metadata:
      labels:
        app: edge-sensor
    spec:
      containers:
      - name: sensor-collector
        image: nginx:alpine
        ports:
        - containerPort: 80
        env:
        - name: EDGE_LOCATION
          valueFrom:
            fieldRef:
              fieldPath: spec.nodeName
        resources:
          requests:
            cpu: 50m
            memory: 64Mi
          limits:
            cpu: 200m
            memory: 256Mi
        volumeMounts:
        - name: sensor-data
          mountPath: /data
      volumes:
      - name: sensor-data
        hostPath:
          path: /opt/sensor-data
          type: DirectoryOrCreate
      nodeSelector:
        kubernetes.io/arch: arm64
EOF
}

# 配置离线运行
configure_offline_mode() {
    echo "配置离线运行模式..."
    
    # 创建离线配置
    cat <<EOF | sudo k3s kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
  name: offline-config
  namespace: edge-apps
data:
  offline.conf: |
    # 离线模式配置
    offline_mode=true
    cache_duration=3600
    sync_interval=300
    local_storage=/data/cache
    
    # 网络配置
    network_timeout=30
    retry_attempts=3
    fallback_mode=local
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: offline-sync
  namespace: edge-apps
spec:
  selector:
    matchLabels:
      app: offline-sync
  template:
    metadata:
      labels:
        app: offline-sync
    spec:
      containers:
      - name: sync-agent
        image: alpine:latest
        command: ["/bin/sh"]
        args: ["-c", "while true; do echo 'Syncing data...'; sleep 300; done"]
        volumeMounts:
        - name: config
          mountPath: /etc/config
        - name: data-cache
          mountPath: /data/cache
        env:
        - name: NODE_NAME
          valueFrom:
            fieldRef:
              fieldPath: spec.nodeName
      volumes:
      - name: config
        configMap:
          name: offline-config
      - name: data-cache
        hostPath:
          path: /opt/edge-cache
          type: DirectoryOrCreate
      hostNetwork: true
EOF
}

# 监控边缘节点
setup_edge_monitoring() {
    echo "设置边缘节点监控..."
    
    # 部署轻量级监控
    cat <<EOF | sudo k3s kubectl apply -f -
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: edge-monitor
  namespace: kube-system
spec:
  selector:
    matchLabels:
      app: edge-monitor
  template:
    metadata:
      labels:
        app: edge-monitor
    spec:
      containers:
      - name: node-exporter
        image: prom/node-exporter:latest
        ports:
        - containerPort: 9100
        args:
        - '--path.rootfs=/host'
        - '--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)'
        - '--collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$'
        volumeMounts:
        - name: proc
          mountPath: /host/proc
          readOnly: true
        - name: sys
          mountPath: /host/sys
          readOnly: true
        - name: root
          mountPath: /host
          readOnly: true
        resources:
          requests:
            cpu: 10m
            memory: 32Mi
          limits:
            cpu: 50m
            memory: 128Mi
      volumes:
      - name: proc
        hostPath:
          path: /proc
      - name: sys
        hostPath:
          path: /sys
      - name: root
        hostPath:
          path: /
      hostNetwork: true
      hostPID: true
EOF
}

# 主函数
case "$1" in
    server)
        install_k3s_server
        ;;
    agent)
        if [ -z "$2" ] || [ -z "$3" ]; then
            echo "用法: $0 agent <server_ip> <node_token>"
            exit 1
        fi
        install_k3s_agent "$2" "$3"
        ;;
    apps)
        configure_edge_apps
        ;;
    offline)
        configure_offline_mode
        ;;
    monitoring)
        setup_edge_monitoring
        ;;
    all)
        install_k3s_server
        configure_edge_apps
        configure_offline_mode
        setup_edge_monitoring
        ;;
    *)
        echo "用法: $0 {server|agent|apps|offline|monitoring|all}"
        echo "  server     - 安装K3s服务器"
        echo "  agent      - 安装K3s代理节点"
        echo "  apps       - 配置边缘应用"
        echo "  offline    - 配置离线模式"
        echo "  monitoring - 设置监控"
        echo "  all        - 执行所有配置"
        exit 1
        ;;
esac

14.6 可持续发展和绿色计算

14.6.1 碳足迹监控

# 碳足迹监控配置
apiVersion: v1
kind: ConfigMap
metadata:
  name: carbon-footprint-config
  namespace: sustainability
data:
  carbon-metrics.yaml: |
    # 碳排放计算配置
    carbon_intensity:
      # 不同云提供商的碳强度 (gCO2/kWh)
      aws:
        us-east-1: 415.755
        us-west-2: 350.993
        eu-west-1: 316.0
      gcp:
        us-central1: 479.0
        us-west1: 350.0
        europe-west1: 167.0
      azure:
        eastus: 415.755
        westus2: 350.993
        westeurope: 316.0
    
    # 资源功耗系数
    power_consumption:
      cpu_per_core: 3.5  # 瓦特/核心
      memory_per_gb: 0.375  # 瓦特/GB
      storage_per_gb: 0.65  # 瓦特/GB
      network_per_gbps: 5.0  # 瓦特/Gbps
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: carbon-footprint-exporter
  namespace: sustainability
spec:
  replicas: 1
  selector:
    matchLabels:
      app: carbon-footprint-exporter
  template:
    metadata:
      labels:
        app: carbon-footprint-exporter
    spec:
      containers:
      - name: exporter
        image: carbon-footprint-exporter:latest
        ports:
        - containerPort: 8080
        env:
        - name: CLOUD_PROVIDER
          value: "aws"
        - name: REGION
          value: "us-west-2"
        - name: CARBON_INTENSITY
          valueFrom:
            configMapKeyRef:
              name: carbon-footprint-config
              key: carbon-metrics.yaml
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
          limits:
            cpu: 500m
            memory: 512Mi
        volumeMounts:
        - name: config
          mountPath: /etc/config
      volumes:
      - name: config
        configMap:
          name: carbon-footprint-config
      serviceAccountName: carbon-footprint-exporter
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: carbon-footprint-exporter
  namespace: sustainability
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: carbon-footprint-exporter
rules:
- apiGroups: [""]
  resources: ["nodes", "pods"]
  verbs: ["get", "list", "watch"]
- apiGroups: ["metrics.k8s.io"]
  resources: ["nodes", "pods"]
  verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: carbon-footprint-exporter
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: carbon-footprint-exporter
subjects:
- kind: ServiceAccount
  name: carbon-footprint-exporter
  namespace: sustainability

14.6.2 绿色计算优化脚本

#!/bin/bash
# Kubernetes绿色计算优化脚本

echo "=== Kubernetes绿色计算优化 ==="

# 安装可持续性监控
install_sustainability_monitoring() {
    echo "安装可持续性监控..."
    
    # 创建命名空间
    kubectl create namespace sustainability --dry-run=client -o yaml | kubectl apply -f -
    
    # 部署Kepler (Kubernetes Efficient Power Level Exporter)
    kubectl apply -f https://raw.githubusercontent.com/sustainable-computing-io/kepler/main/manifests/kubernetes/deployment.yaml
    
    # 部署碳足迹计算器
    cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: carbon-calculator
  namespace: sustainability
spec:
  replicas: 1
  selector:
    matchLabels:
      app: carbon-calculator
  template:
    metadata:
      labels:
        app: carbon-calculator
    spec:
      containers:
      - name: calculator
        image: python:3.9-slim
        command: ["/bin/sh"]
        args: ["-c", "while true; do python /app/carbon_calculator.py; sleep 300; done"]
        env:
        - name: PROMETHEUS_URL
          value: "http://prometheus:9090"
        - name: CARBON_INTENSITY_API
          value: "https://api.carbonintensity.org.uk/intensity"
        volumeMounts:
        - name: calculator-script
          mountPath: /app
        resources:
          requests:
            cpu: 50m
            memory: 64Mi
          limits:
            cpu: 200m
            memory: 256Mi
      volumes:
      - name: calculator-script
        configMap:
          name: carbon-calculator-script
          defaultMode: 0755
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: carbon-calculator-script
  namespace: sustainability
data:
  carbon_calculator.py: |
    #!/usr/bin/env python3
    import requests
    import json
    import os
    import time
    from datetime import datetime
    
    def get_cluster_metrics():
        """获取集群资源使用指标"""
        prometheus_url = os.getenv('PROMETHEUS_URL', 'http://prometheus:9090')
        
        queries = {
            'cpu_usage': 'sum(rate(container_cpu_usage_seconds_total[5m]))',
            'memory_usage': 'sum(container_memory_working_set_bytes)',
            'network_rx': 'sum(rate(container_network_receive_bytes_total[5m]))',
            'network_tx': 'sum(rate(container_network_transmit_bytes_total[5m]))'
        }
        
        metrics = {}
        for name, query in queries.items():
            try:
                response = requests.get(f'{prometheus_url}/api/v1/query', 
                                      params={'query': query})
                data = response.json()
                if data['status'] == 'success' and data['data']['result']:
                    metrics[name] = float(data['data']['result'][0]['value'][1])
                else:
                    metrics[name] = 0
            except Exception as e:
                print(f"Error fetching {name}: {e}")
                metrics[name] = 0
        
        return metrics
    
    def get_carbon_intensity():
        """获取当前碳强度"""
        try:
            # 这里使用英国的碳强度API作为示例
            response = requests.get('https://api.carbonintensity.org.uk/intensity')
            data = response.json()
            return data['data'][0]['intensity']['actual']
        except:
            # 如果API不可用,使用默认值
            return 350  # gCO2/kWh
    
    def calculate_carbon_footprint(metrics, carbon_intensity):
        """计算碳足迹"""
        # 功耗计算 (瓦特)
        cpu_power = metrics['cpu_usage'] * 3.5  # 3.5W per CPU core
        memory_power = (metrics['memory_usage'] / 1024**3) * 0.375  # 0.375W per GB
        network_power = ((metrics['network_rx'] + metrics['network_tx']) / 1024**3) * 5.0  # 5W per Gbps
        
        total_power = cpu_power + memory_power + network_power  # 瓦特
        
        # 碳排放计算 (gCO2)
        carbon_emission = (total_power / 1000) * (carbon_intensity / 1000)  # kgCO2/hour
        
        return {
            'timestamp': datetime.now().isoformat(),
            'power_consumption': {
                'cpu_watts': cpu_power,
                'memory_watts': memory_power,
                'network_watts': network_power,
                'total_watts': total_power
            },
            'carbon_intensity': carbon_intensity,
            'carbon_emission_kg_per_hour': carbon_emission,
            'metrics': metrics
        }
    
    def main():
        print("Carbon footprint calculator started")
        
        while True:
            try:
                # 获取指标
                metrics = get_cluster_metrics()
                carbon_intensity = get_carbon_intensity()
                
                # 计算碳足迹
                result = calculate_carbon_footprint(metrics, carbon_intensity)
                
                # 输出结果
                print(json.dumps(result, indent=2))
                
                # 这里可以将结果发送到监控系统
                # send_to_prometheus(result)
                
            except Exception as e:
                print(f"Error in carbon calculation: {e}")
            
            time.sleep(300)  # 5分钟间隔
    
    if __name__ == '__main__':
        main()
EOF
}

# 实施绿色调度策略
implement_green_scheduling() {
    echo "实施绿色调度策略..."
    
    # 创建绿色调度器配置
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
  name: green-scheduler-config
  namespace: kube-system
data:
  config.yaml: |
    apiVersion: kubescheduler.config.k8s.io/v1beta3
    kind: KubeSchedulerConfiguration
    profiles:
    - schedulerName: green-scheduler
      plugins:
        score:
          enabled:
          - name: NodeResourcesFit
          - name: NodeAffinity
          - name: PodTopologySpread
          - name: GreenScoring  # 自定义绿色评分插件
      pluginConfig:
      - name: NodeResourcesFit
        args:
          scoringStrategy:
            type: LeastAllocated
            resources:
            - name: cpu
              weight: 1
            - name: memory
              weight: 1
            - name: carbon-efficiency
              weight: 2  # 给碳效率更高的权重
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: green-scheduler
  namespace: kube-system
spec:
  replicas: 1
  selector:
    matchLabels:
      app: green-scheduler
  template:
    metadata:
      labels:
        app: green-scheduler
    spec:
      serviceAccountName: green-scheduler
      containers:
      - name: kube-scheduler
        image: k8s.gcr.io/kube-scheduler:v1.28.0
        command:
        - kube-scheduler
        - --config=/etc/kubernetes/scheduler-config.yaml
        - --v=2
        volumeMounts:
        - name: config
          mountPath: /etc/kubernetes
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
          limits:
            cpu: 500m
            memory: 512Mi
      volumes:
      - name: config
        configMap:
          name: green-scheduler-config
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: green-scheduler
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: green-scheduler
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:kube-scheduler
subjects:
- kind: ServiceAccount
  name: green-scheduler
  namespace: kube-system
EOF
    
    # 创建使用绿色调度器的Pod示例
    cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: green-app
  namespace: default
spec:
  replicas: 3
  selector:
    matchLabels:
      app: green-app
  template:
    metadata:
      labels:
        app: green-app
      annotations:
        scheduler.alpha.kubernetes.io/preferred-carbon-efficiency: "high"
    spec:
      schedulerName: green-scheduler
      containers:
      - name: app
        image: nginx:alpine
        resources:
          requests:
            cpu: 50m
            memory: 64Mi
          limits:
            cpu: 200m
            memory: 256Mi
        env:
        - name: GREEN_MODE
          value: "enabled"
      nodeSelector:
        carbon-efficiency: high
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            preference:
              matchExpressions:
              - key: energy-source
                operator: In
                values: ["renewable", "solar", "wind"]
          - weight: 50
            preference:
              matchExpressions:
              - key: carbon-intensity
                operator: In
                values: ["low", "very-low"]
EOF
}

# 优化资源使用
optimize_resource_usage() {
    echo "优化资源使用..."
    
    # 部署VPA (Vertical Pod Autoscaler)
    git clone https://github.com/kubernetes/autoscaler.git
    cd autoscaler/vertical-pod-autoscaler
    ./hack/vpa-install.sh
    cd ../../..
    rm -rf autoscaler
    
    # 创建VPA配置
    cat <<EOF | kubectl apply -f -
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
  name: green-app-vpa
  namespace: default
spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: green-app
  updatePolicy:
    updateMode: "Auto"
  resourcePolicy:
    containerPolicies:
    - containerName: app
      maxAllowed:
        cpu: 500m
        memory: 512Mi
      minAllowed:
        cpu: 10m
        memory: 32Mi
      controlledResources: ["cpu", "memory"]
EOF
    
    # 创建资源清理CronJob
    cat <<EOF | kubectl apply -f -
apiVersion: batch/v1
kind: CronJob
metadata:
  name: resource-cleanup
  namespace: sustainability
spec:
  schedule: "0 2 * * *"  # 每天凌晨2点执行
  jobTemplate:
    spec:
      template:
        spec:
          containers:
          - name: cleanup
            image: bitnami/kubectl:latest
            command:
            - /bin/sh
            - -c
            - |
              echo "Starting resource cleanup..."
              
              # 清理已完成的Jobs
              kubectl get jobs --all-namespaces --field-selector status.successful=1 -o json | \
              jq -r '.items[] | "\(.metadata.namespace) \(.metadata.name)"' | \
              while read namespace name; do
                echo "Deleting completed job: $namespace/$name"
                kubectl delete job "$name" -n "$namespace"
              done
              
              # 清理失败的Pods
              kubectl get pods --all-namespaces --field-selector status.phase=Failed -o json | \
              jq -r '.items[] | "\(.metadata.namespace) \(.metadata.name)"' | \
              while read namespace name; do
                echo "Deleting failed pod: $namespace/$name"
                kubectl delete pod "$name" -n "$namespace"
              done
              
              # 清理未使用的ConfigMaps (超过30天)
              kubectl get configmaps --all-namespaces -o json | \
              jq -r '.items[] | select(.metadata.creationTimestamp < (now - 30*24*3600 | strftime("%Y-%m-%dT%H:%M:%SZ"))) | "\(.metadata.namespace) \(.metadata.name)"' | \
              while read namespace name; do
                # 检查是否被使用
                used=$(kubectl get pods -n $namespace -o json | jq -r --arg cm "$name" '.items[] | select(.spec.volumes[]?.configMap.name == $cm) | .metadata.name')
                if [ -z "$used" ]; then
                  echo "Deleting unused configmap: $namespace/$name"
                  kubectl delete configmap "$name" -n "$namespace"
                fi
              done
              
              echo "Resource cleanup completed"
            resources:
              requests:
                cpu: 50m
                memory: 64Mi
              limits:
                cpu: 200m
                memory: 256Mi
          restartPolicy: OnFailure
          serviceAccountName: resource-cleanup
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: resource-cleanup
  namespace: sustainability
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: resource-cleanup
rules:
- apiGroups: [""]
  resources: ["pods", "configmaps", "secrets"]
  verbs: ["get", "list", "delete"]
- apiGroups: ["batch"]
  resources: ["jobs"]
  verbs: ["get", "list", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: resource-cleanup
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: resource-cleanup
subjects:
- kind: ServiceAccount
  name: resource-cleanup
  namespace: sustainability
EOF
}

# 生成可持续性报告
generate_sustainability_report() {
    echo "生成可持续性报告..."
    
    local report_file="sustainability-report-$(date +%Y%m%d).md"
    
    cat > "$report_file" <<EOF
# Kubernetes集群可持续性报告

生成时间: $(date)

## 1. 资源使用概览

### 节点资源使用
\`\`\`
$(kubectl top nodes 2>/dev/null || echo "需要安装metrics-server")
\`\`\`

### Pod资源使用 (Top 10)
\`\`\`
$(kubectl top pods --all-namespaces --sort-by=cpu 2>/dev/null | head -11 || echo "需要安装metrics-server")
\`\`\`

## 2. 资源效率分析

### 未使用的资源
- **未绑定的PV**: $(kubectl get pv | grep Available | wc -l)
- **失败的Pod**: $(kubectl get pods --all-namespaces --field-selector status.phase=Failed | wc -l)
- **已完成的Job**: $(kubectl get jobs --all-namespaces --field-selector status.successful=1 | wc -l)

### 资源配置分析
- **没有资源限制的Pod**: $(kubectl get pods --all-namespaces -o json | jq '[.items[] | select(.spec.containers[].resources.limits == null)] | length')
- **没有资源请求的Pod**: $(kubectl get pods --all-namespaces -o json | jq '[.items[] | select(.spec.containers[].resources.requests == null)] | length')

## 3. 碳足迹估算

基于当前资源使用情况的碳足迹估算:

- **CPU使用**: $(kubectl top nodes --no-headers 2>/dev/null | awk '{sum+=$3} END {print sum}' || echo "N/A") cores
- **内存使用**: $(kubectl top nodes --no-headers 2>/dev/null | awk '{sum+=$5} END {print sum}' || echo "N/A") GB
- **估算功耗**: 基于标准服务器功耗计算
- **估算碳排放**: 基于地区电网碳强度计算

## 4. 优化建议

### 立即行动项
1. 为所有Pod设置资源请求和限制
2. 清理未使用的资源
3. 实施自动扩缩容
4. 使用更高效的镜像

### 中期优化
1. 实施绿色调度策略
2. 优化应用架构
3. 使用可再生能源
4. 实施碳感知调度

### 长期目标
1. 实现碳中和运营
2. 建立可持续性KPI
3. 持续监控和优化
4. 参与绿色计算倡议

## 5. 监控指标

建议监控以下可持续性指标:
- 每个工作负载的碳足迹
- 资源利用率
- 能源效率
- 可再生能源使用比例

EOF
    
    echo "可持续性报告已生成: $report_file"
}

# 主函数
case "$1" in
    monitoring)
        install_sustainability_monitoring
        ;;
    scheduling)
        implement_green_scheduling
        ;;
    optimize)
        optimize_resource_usage
        ;;
    report)
        generate_sustainability_report
        ;;
    all)
        install_sustainability_monitoring
        implement_green_scheduling
        optimize_resource_usage
        generate_sustainability_report
        ;;
    *)
        echo "用法: $0 {monitoring|scheduling|optimize|report|all}"
        echo "  monitoring  - 安装可持续性监控"
        echo "  scheduling  - 实施绿色调度"
        echo "  optimize    - 优化资源使用"
        echo "  report      - 生成可持续性报告"
        echo "  all         - 执行所有操作"
        exit 1
        ;;
esac

14.7 安全增强和零信任

14.7.1 零信任架构

# 零信任网络策略
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: zero-trust-default-deny
  namespace: production
spec:
  podSelector: {}
  policyTypes:
  - Ingress
  - Egress
  # 默认拒绝所有流量
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: zero-trust-web-tier
  namespace: production
spec:
  podSelector:
    matchLabels:
      tier: web
  policyTypes:
  - Ingress
  - Egress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          name: ingress-nginx
    ports:
    - protocol: TCP
      port: 80
    - protocol: TCP
      port: 443
  egress:
  - to:
    - podSelector:
        matchLabels:
          tier: api
    ports:
    - protocol: TCP
      port: 8080
  - to: []  # DNS
    ports:
    - protocol: TCP
      port: 53
    - protocol: UDP
      port: 53
---
# 服务网格安全策略
apiVersion: security.istio.io/v1beta1
kind: PeerAuthentication
metadata:
  name: default
  namespace: production
spec:
  mtls:
    mode: STRICT
---
apiVersion: security.istio.io/v1beta1
kind: AuthorizationPolicy
metadata:
  name: zero-trust-authz
  namespace: production
spec:
  selector:
    matchLabels:
      app: web-app
  rules:
  - from:
    - source:
        principals: ["cluster.local/ns/production/sa/web-service"]
  - to:
    - operation:
        methods: ["GET", "POST"]
        paths: ["/api/*"]
  - when:
    - key: source.ip
      values: ["10.0.0.0/8"]

14.7.2 供应链安全

#!/bin/bash
# Kubernetes供应链安全脚本

echo "=== Kubernetes供应链安全 ==="

# 安装镜像扫描工具
install_image_scanning() {
    echo "安装镜像扫描工具..."
    
    # 安装Trivy
    curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
    
    # 部署Trivy Operator
    kubectl apply -f https://raw.githubusercontent.com/aquasecurity/trivy-operator/main/deploy/static/trivy-operator.yaml
    
    # 配置镜像扫描策略
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
  name: trivy-operator-config
  namespace: trivy-system
data:
  trivy.repository: "ghcr.io/aquasecurity/trivy"
  trivy.tag: "latest"
  trivy.severity: "CRITICAL,HIGH,MEDIUM"
  trivy.ignoreUnfixed: "false"
  trivy.timeout: "5m0s"
  trivy.skipUpdate: "false"
---
apiVersion: aquasecurity.github.io/v1alpha1
kind: ConfigAuditReport
metadata:
  name: scan-policy
  namespace: trivy-system
spec:
  scanner:
    name: Trivy
    vendor: Aqua Security
    version: "0.45.0"
  summary:
    criticalCount: 0
    highCount: 0
    mediumCount: 5
    lowCount: 10
EOF
}

# 实施镜像签名验证
implement_image_signing() {
    echo "实施镜像签名验证..."
    
    # 安装Cosign
    curl -O -L "https://github.com/sigstore/cosign/releases/latest/download/cosign-linux-amd64"
    sudo mv cosign-linux-amd64 /usr/local/bin/cosign
    sudo chmod +x /usr/local/bin/cosign
    
    # 生成密钥对
    cosign generate-key-pair
    
    # 创建密钥Secret
    kubectl create secret generic cosign-keys \
        --from-file=cosign.key=cosign.key \
        --from-file=cosign.pub=cosign.pub \
        -n kube-system
    
    # 部署镜像验证Webhook
    cat <<EOF | kubectl apply -f -
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionWebhook
metadata:
  name: image-signature-webhook
webhooks:
- name: verify-signature.example.com
  clientConfig:
    service:
      name: image-signature-webhook
      namespace: kube-system
      path: "/verify"
  rules:
  - operations: ["CREATE", "UPDATE"]
    apiGroups: [""]
    apiVersions: ["v1"]
    resources: ["pods"]
  - operations: ["CREATE", "UPDATE"]
    apiGroups: ["apps"]
    apiVersions: ["v1"]
    resources: ["deployments", "replicasets", "daemonsets", "statefulsets"]
  admissionReviewVersions: ["v1", "v1beta1"]
  sideEffects: None
  failurePolicy: Fail
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: image-signature-webhook
  namespace: kube-system
spec:
  replicas: 2
  selector:
    matchLabels:
      app: image-signature-webhook
  template:
    metadata:
      labels:
        app: image-signature-webhook
    spec:
      containers:
      - name: webhook
        image: image-signature-webhook:latest
        ports:
        - containerPort: 8443
        env:
        - name: TLS_CERT_FILE
          value: "/etc/certs/tls.crt"
        - name: TLS_PRIVATE_KEY_FILE
          value: "/etc/certs/tls.key"
        - name: COSIGN_PUBLIC_KEY
          value: "/etc/cosign/cosign.pub"
        volumeMounts:
        - name: certs
          mountPath: /etc/certs
          readOnly: true
        - name: cosign-keys
          mountPath: /etc/cosign
          readOnly: true
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
          limits:
            cpu: 500m
            memory: 512Mi
      volumes:
      - name: certs
        secret:
          secretName: webhook-certs
      - name: cosign-keys
        secret:
          secretName: cosign-keys
EOF
}

# 配置SBOM生成
setup_sbom_generation() {
    echo "配置SBOM生成..."
    
    # 安装Syft
    curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b /usr/local/bin
    
    # 创建SBOM生成Job
    cat <<EOF | kubectl apply -f -
apiVersion: batch/v1
kind: CronJob
metadata:
  name: sbom-generator
  namespace: security
spec:
  schedule: "0 2 * * *"  # 每天凌晨2点
  jobTemplate:
    spec:
      template:
        spec:
          containers:
          - name: sbom-generator
            image: anchore/syft:latest
            command:
            - /bin/sh
            - -c
            - |
              echo "Generating SBOMs for all images..."
              
              # 获取所有运行中的镜像
              kubectl get pods --all-namespaces -o jsonpath='{.items[*].spec.containers[*].image}' | \
              tr ' ' '\n' | sort -u | while read image; do
                echo "Generating SBOM for $image"
                
                # 生成SBOM
                syft "$image" -o spdx-json > "/sbom/$(echo $image | tr '/' '_' | tr ':' '_').spdx.json"
                
                # 上传到存储
                # aws s3 cp "/sbom/$(echo $image | tr '/' '_' | tr ':' '_').spdx.json" s3://sbom-bucket/
              done
              
              echo "SBOM generation completed"
            volumeMounts:
            - name: sbom-storage
              mountPath: /sbom
            - name: docker-socket
              mountPath: /var/run/docker.sock
            resources:
              requests:
                cpu: 200m
                memory: 256Mi
              limits:
                cpu: 1
                memory: 1Gi
          volumes:
          - name: sbom-storage
            persistentVolumeClaim:
              claimName: sbom-storage-pvc
          - name: docker-socket
            hostPath:
              path: /var/run/docker.sock
          restartPolicy: OnFailure
          serviceAccountName: sbom-generator
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: sbom-generator
  namespace: security
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: sbom-generator
rules:
- apiGroups: [""]
  resources: ["pods"]
  verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: sbom-generator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: sbom-generator
subjects:
- kind: ServiceAccount
  name: sbom-generator
  namespace: security
EOF
}

# 实施运行时安全监控
implement_runtime_security() {
    echo "实施运行时安全监控..."
    
    # 部署Falco
    helm repo add falcosecurity https://falcosecurity.github.io/charts
    helm repo update
    helm install falco falcosecurity/falco \
        --namespace falco-system \
        --create-namespace \
        --set falco.grpc.enabled=true \
        --set falco.grpcOutput.enabled=true
    
    # 配置自定义Falco规则
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
  name: falco-custom-rules
  namespace: falco-system
data:
  custom_rules.yaml: |
    # 自定义安全规则
    
    # 检测特权容器
    - rule: Privileged Container Started
      desc: Detect privileged container started
      condition: >
        spawned_process and container and
        k8s_audit and ka.verb=create and ka.target.resource=pods and
        ka.req.pod.containers.privileged=true
      output: >
        Privileged container started (user=%ka.user.name verb=%ka.verb 
        pod=%ka.target.name container=%ka.req.pod.containers.name 
        image=%ka.req.pod.containers.image)
      priority: WARNING
      tags: [container, privilege_escalation, k8s_audit]
    
    # 检测敏感文件访问
    - rule: Sensitive File Access
      desc: Detect access to sensitive files
      condition: >
        open_read and sensitive_files and not proc_name_exists and
        not user_known_read_sensitive_files_activities
      output: >
        Sensitive file opened for reading (user=%user.name command=%proc.cmdline 
        file=%fd.name parent=%proc.pname pcmdline=%proc.pcmdline gparent=%proc.aname[2])
      priority: WARNING
      tags: [filesystem, sensitive_files]
    
    # 检测网络连接异常
    - rule: Unexpected Network Connection
      desc: Detect unexpected network connections
      condition: >
        inbound_outbound and fd.sockfamily=ip and
        not proc.name in (known_network_tools) and
        not fd.sport in (known_ports) and
        not fd.dport in (known_ports)
      output: >
        Unexpected network connection (user=%user.name command=%proc.cmdline 
        connection=%fd.name proto=%fd.l4proto)
      priority: NOTICE
      tags: [network, anomaly]
EOF
    
    # 配置Falco输出到Prometheus
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
  name: falco-exporter-config
  namespace: falco-system
data:
  config.yaml: |
    listenAddress: 0.0.0.0
    listenPort: 9376
    falcoGrpcAddress: falco.falco-system.svc.cluster.local
    falcoGrpcPort: 5060
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: falco-exporter
  namespace: falco-system
spec:
  replicas: 1
  selector:
    matchLabels:
      app: falco-exporter
  template:
    metadata:
      labels:
        app: falco-exporter
    spec:
      containers:
      - name: falco-exporter
        image: falcosecurity/falco-exporter:latest
        ports:
        - containerPort: 9376
        volumeMounts:
        - name: config
          mountPath: /etc/falco-exporter
        resources:
          requests:
            cpu: 50m
            memory: 64Mi
          limits:
            cpu: 200m
            memory: 256Mi
      volumes:
      - name: config
        configMap:
          name: falco-exporter-config
EOF
}

# 生成安全报告
generate_security_report() {
    echo "生成安全报告..."
    
    local report_file="security-report-$(date +%Y%m%d).md"
    
    cat > "$report_file" <<EOF
# Kubernetes集群安全报告

生成时间: $(date)

## 1. 镜像安全扫描结果

### 高危漏洞统计
\`\`\`
$(kubectl get vulnerabilityreports --all-namespaces -o json 2>/dev/null | jq -r '.items[] | select(.report.summary.criticalCount > 0) | "\(.metadata.namespace)/\(.metadata.name): Critical=\(.report.summary.criticalCount), High=\(.report.summary.highCount)"' || echo "需要安装Trivy Operator")
\`\`\`

### 配置安全检查
\`\`\`
$(kubectl get configauditreports --all-namespaces -o json 2>/dev/null | jq -r '.items[] | "\(.metadata.namespace)/\(.metadata.name): Danger=\(.report.summary.dangerCount), Warning=\(.report.summary.warningCount)"' || echo "需要安装配置审计工具")
\`\`\`

## 2. 网络安全策略

### 网络策略覆盖率
- **总命名空间数**: $(kubectl get namespaces --no-headers | wc -l)
- **有网络策略的命名空间**: $(kubectl get networkpolicies --all-namespaces --no-headers | awk '{print $1}' | sort -u | wc -l)
- **覆盖率**: $(echo "scale=2; $(kubectl get networkpolicies --all-namespaces --no-headers | awk '{print $1}' | sort -u | wc -l) * 100 / $(kubectl get namespaces --no-headers | wc -l)" | bc)%

### 默认拒绝策略
\`\`\`
$(kubectl get networkpolicies --all-namespaces -o json | jq -r '.items[] | select(.spec.podSelector == {}) | "\(.metadata.namespace): \(.metadata.name)"')
\`\`\`

## 3. RBAC安全分析

### 特权角色
\`\`\`
$(kubectl get clusterrolebindings -o json | jq -r '.items[] | select(.roleRef.name == "cluster-admin") | "\(.metadata.name): \(.subjects[].name)"')
\`\`\`

### 服务账户统计
- **总服务账户数**: $(kubectl get serviceaccounts --all-namespaces --no-headers | wc -l)
- **有自定义权限的SA**: $(kubectl get rolebindings,clusterrolebindings --all-namespaces -o json | jq '[.items[] | select(.subjects[]?.kind == "ServiceAccount")] | length')

## 4. Pod安全标准

### 特权Pod
\`\`\`
$(kubectl get pods --all-namespaces -o json | jq -r '.items[] | select(.spec.securityContext.privileged == true or .spec.containers[].securityContext.privileged == true) | "\(.metadata.namespace)/\(.metadata.name)"')
\`\`\`

### HostNetwork Pod
\`\`\`
$(kubectl get pods --all-namespaces -o json | jq -r '.items[] | select(.spec.hostNetwork == true) | "\(.metadata.namespace)/\(.metadata.name)"')
\`\`\`

## 5. 运行时安全事件

### Falco告警统计 (最近24小时)
\`\`\`
$(kubectl logs -n falco-system -l app.kubernetes.io/name=falco --since=24h | grep -c "Priority:" || echo "Falco未部署或无日志")
\`\`\`

## 6. 安全建议

### 立即修复
1. 修复所有Critical和High级别的漏洞
2. 为所有命名空间配置网络策略
3. 移除不必要的特权权限
4. 实施Pod安全标准

### 安全加固
1. 启用镜像签名验证
2. 实施零信任网络架构
3. 配置运行时安全监控
4. 定期进行安全审计

### 合规要求
1. 实施SBOM管理
2. 建立安全基线
3. 配置审计日志
4. 实施访问控制

EOF
    
    echo "安全报告已生成: $report_file"
}

# 主函数
case "$1" in
    scanning)
        install_image_scanning
        ;;
    signing)
        implement_image_signing
        ;;
    sbom)
        setup_sbom_generation
        ;;
    runtime)
        implement_runtime_security
        ;;
    report)
        generate_security_report
        ;;
    all)
        install_image_scanning
        implement_image_signing
        setup_sbom_generation
        implement_runtime_security
        generate_security_report
        ;;
    *)
        echo "用法: $0 {scanning|signing|sbom|runtime|report|all}"
        echo "  scanning - 安装镜像扫描"
        echo "  signing  - 实施镜像签名"
        echo "  sbom     - 配置SBOM生成"
        echo "  runtime  - 运行时安全监控"
        echo "  report   - 生成安全报告"
        echo "  all      - 执行所有配置"
        exit 1
        ;;
esac

14.8 开发者体验和平台工程

14.8.1 内部开发者平台 (IDP)

# Backstage 开发者门户配置
apiVersion: v1
kind: ConfigMap
metadata:
  name: backstage-config
  namespace: platform
data:
  app-config.yaml: |
    app:
      title: Internal Developer Platform
      baseUrl: http://localhost:3000
    
    organization:
      name: My Company
    
    backend:
      baseUrl: http://localhost:7007
      listen:
        port: 7007
      csp:
        connect-src: ["'self'", 'http:', 'https:']
      cors:
        origin: http://localhost:3000
        methods: [GET, HEAD, PATCH, POST, PUT, DELETE]
        credentials: true
      database:
        client: better-sqlite3
        connection: ':memory:'
    
    integrations:
      github:
        - host: github.com
          token: ${GITHUB_TOKEN}
      gitlab:
        - host: gitlab.com
          token: ${GITLAB_TOKEN}
    
    techdocs:
      builder: 'local'
      generator:
        runIn: 'local'
      publisher:
        type: 'local'
    
    auth:
      providers:
        github:
          development:
            clientId: ${AUTH_GITHUB_CLIENT_ID}
            clientSecret: ${AUTH_GITHUB_CLIENT_SECRET}
    
    scaffolder:
      defaultAuthor:
        name: Platform Team
        email: platform@company.com
      defaultCommitMessage: 'Initial commit'
    
    catalog:
      import:
        entityFilename: catalog-info.yaml
        pullRequestBranchName: backstage-integration
      rules:
        - allow: [Component, System, API, Resource, Location]
      locations:
        - type: file
          target: ../../examples/entities.yaml
        - type: file
          target: ../../examples/template/template.yaml
          rules:
            - allow: [Template]
        - type: file
          target: ../../examples/org.yaml
          rules:
            - allow: [User, Group]
    
    kubernetes:
      serviceLocatorMethod:
        type: 'multiTenant'
      clusterLocatorMethods:
        - type: 'config'
          clusters:
            - url: https://kubernetes.default.svc
              name: local-cluster
              authProvider: 'serviceAccount'
              skipTLSVerify: true
              skipMetricsLookup: false
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: backstage
  namespace: platform
spec:
  replicas: 1
  selector:
    matchLabels:
      app: backstage
  template:
    metadata:
      labels:
        app: backstage
    spec:
      containers:
      - name: backstage
        image: backstage:latest
        ports:
        - containerPort: 7007
        - containerPort: 3000
        env:
        - name: POSTGRES_HOST
          value: postgres
        - name: POSTGRES_PORT
          value: "5432"
        - name: POSTGRES_USER
          value: backstage
        - name: POSTGRES_PASSWORD
          valueFrom:
            secretKeyRef:
              name: postgres-secrets
              key: password
        - name: GITHUB_TOKEN
          valueFrom:
            secretKeyRef:
              name: backstage-secrets
              key: github-token
        - name: AUTH_GITHUB_CLIENT_ID
          valueFrom:
            secretKeyRef:
              name: backstage-secrets
              key: github-client-id
        - name: AUTH_GITHUB_CLIENT_SECRET
          valueFrom:
            secretKeyRef:
              name: backstage-secrets
              key: github-client-secret
        volumeMounts:
        - name: config
          mountPath: /app/app-config.yaml
          subPath: app-config.yaml
        resources:
          requests:
            cpu: 500m
            memory: 512Mi
          limits:
            cpu: 1
            memory: 1Gi
      volumes:
      - name: config
        configMap:
          name: backstage-config
      serviceAccountName: backstage
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: backstage
  namespace: platform
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: backstage-reader
rules:
- apiGroups: [""]
  resources: ["pods", "services", "configmaps", "secrets"]
  verbs: ["get", "list", "watch"]
- apiGroups: ["apps"]
  resources: ["deployments", "replicasets", "statefulsets", "daemonsets"]
  verbs: ["get", "list", "watch"]
- apiGroups: ["networking.k8s.io"]
  resources: ["ingresses"]
  verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: backstage-reader
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: backstage-reader
subjects:
- kind: ServiceAccount
  name: backstage
  namespace: platform

14.8.2 应用模板和脚手架

#!/bin/bash
# 开发者体验优化脚本

echo "=== 开发者体验优化 ==="

# 部署应用模板系统
deploy_app_templates() {
    echo "部署应用模板系统..."
    
    # 创建模板仓库
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
  name: app-templates
  namespace: platform
data:
  microservice-template.yaml: |
    apiVersion: scaffolder.backstage.io/v1beta3
    kind: Template
    metadata:
      name: microservice-template
      title: Microservice Template
      description: Create a new microservice with best practices
      tags:
        - recommended
        - microservice
        - go
    spec:
      owner: platform-team
      type: service
      parameters:
        - title: Service Information
          required:
            - name
            - description
          properties:
            name:
              title: Name
              type: string
              description: Unique name of the service
              pattern: '^([a-z0-9\\-]+)$'
            description:
              title: Description
              type: string
              description: Help others understand what this service is for
            owner:
              title: Owner
              type: string
              description: Owner of the component
              ui:field: OwnerPicker
              ui:options:
                allowedKinds:
                  - Group
        - title: Choose a location
          required:
            - repoUrl
          properties:
            repoUrl:
              title: Repository Location
              type: string
              ui:field: RepoUrlPicker
              ui:options:
                allowedHosts:
                  - github.com
      steps:
        - id: fetch-base
          name: Fetch Base
          action: fetch:template
          input:
            url: ./content
            values:
              name: ${{ parameters.name }}
              description: ${{ parameters.description }}
              owner: ${{ parameters.owner }}
        
        - id: publish
          name: Publish
          action: publish:github
          input:
            allowedHosts: ['github.com']
            description: This is ${{ parameters.name }}
            repoUrl: ${{ parameters.repoUrl }}
        
        - id: register
          name: Register
          action: catalog:register
          input:
            repoContentsUrl: ${{ steps.publish.output.repoContentsUrl }}
            catalogInfoPath: '/catalog-info.yaml'
      
      output:
        links:
          - title: Repository
            url: ${{ steps.publish.output.remoteUrl }}
          - title: Open in catalog
            icon: catalog
            entityRef: ${{ steps.register.output.entityRef }}
  
  web-app-template.yaml: |
    apiVersion: scaffolder.backstage.io/v1beta3
    kind: Template
    metadata:
      name: web-app-template
      title: Web Application Template
      description: Create a new web application with React and TypeScript
      tags:
        - recommended
        - web
        - react
        - typescript
    spec:
      owner: frontend-team
      type: website
      parameters:
        - title: Application Information
          required:
            - name
            - description
          properties:
            name:
              title: Name
              type: string
              description: Unique name of the application
            description:
              title: Description
              type: string
              description: Help others understand what this application does
            framework:
              title: Framework
              type: string
              description: Choose the frontend framework
              default: react
              enum:
                - react
                - vue
                - angular
              enumNames:
                - React
                - Vue.js
                - Angular
      steps:
        - id: fetch-base
          name: Fetch Base
          action: fetch:template
          input:
            url: ./web-content
            values:
              name: ${{ parameters.name }}
              description: ${{ parameters.description }}
              framework: ${{ parameters.framework }}
EOF
    
    # 部署模板内容
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
  name: microservice-content
  namespace: platform
data:
  catalog-info.yaml: |
    apiVersion: backstage.io/v1alpha1
    kind: Component
    metadata:
      name: \${{ values.name }}
      description: \${{ values.description }}
      annotations:
        github.com/project-slug: \${{ values.destination.owner }}/\${{ values.destination.repo }}
    spec:
      type: service
      lifecycle: experimental
      owner: \${{ values.owner }}
  
  Dockerfile: |
    FROM golang:1.19-alpine AS builder
    WORKDIR /app
    COPY go.mod go.sum ./
    RUN go mod download
    COPY . .
    RUN CGO_ENABLED=0 GOOS=linux go build -o main .
    
    FROM alpine:latest
    RUN apk --no-cache add ca-certificates
    WORKDIR /root/
    COPY --from=builder /app/main .
    CMD ["./main"]
  
  main.go: |
    package main
    
    import (
        "fmt"
        "log"
        "net/http"
        "os"
    )
    
    func main() {
        port := os.Getenv("PORT")
        if port == "" {
            port = "8080"
        }
        
        http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
            fmt.Fprintf(w, "Hello from \${{ values.name }}!")
        })
        
        http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
            w.WriteHeader(http.StatusOK)
            fmt.Fprintf(w, "OK")
        })
        
        log.Printf("Server starting on port %s", port)
        log.Fatal(http.ListenAndServe(":"+port, nil))
    }
  
  go.mod: |
    module \${{ values.name }}
    
    go 1.19
  
  k8s-deployment.yaml: |
    apiVersion: apps/v1
    kind: Deployment
    metadata:
      name: \${{ values.name }}
      labels:
        app: \${{ values.name }}
    spec:
      replicas: 3
      selector:
        matchLabels:
          app: \${{ values.name }}
      template:
        metadata:
          labels:
            app: \${{ values.name }}
        spec:
          containers:
          - name: \${{ values.name }}
            image: \${{ values.name }}:latest
            ports:
            - containerPort: 8080
            env:
            - name: PORT
              value: "8080"
            livenessProbe:
              httpGet:
                path: /health
                port: 8080
              initialDelaySeconds: 30
              periodSeconds: 10
            readinessProbe:
              httpGet:
                path: /health
                port: 8080
              initialDelaySeconds: 5
              periodSeconds: 5
            resources:
              requests:
                cpu: 100m
                memory: 128Mi
              limits:
                cpu: 500m
                memory: 512Mi
    ---
    apiVersion: v1
    kind: Service
    metadata:
      name: \${{ values.name }}
    spec:
      selector:
        app: \${{ values.name }}
      ports:
      - protocol: TCP
        port: 80
        targetPort: 8080
      type: ClusterIP
EOF
}

# 部署开发环境管理
deploy_dev_environments() {
    echo "部署开发环境管理..."
    
    # 部署Okteto开发环境
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Namespace
metadata:
  name: dev-environments
  labels:
    name: dev-environments
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: dev-env-controller
  namespace: dev-environments
spec:
  replicas: 1
  selector:
    matchLabels:
      app: dev-env-controller
  template:
    metadata:
      labels:
        app: dev-env-controller
    spec:
      containers:
      - name: controller
        image: dev-env-controller:latest
        ports:
        - containerPort: 8080
        env:
        - name: NAMESPACE_PREFIX
          value: "dev-"
        - name: DEFAULT_RESOURCES
          value: |
            requests:
              cpu: 100m
              memory: 256Mi
            limits:
              cpu: 1
              memory: 1Gi
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
          limits:
            cpu: 500m
            memory: 512Mi
      serviceAccountName: dev-env-controller
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: dev-env-controller
  namespace: dev-environments
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: dev-env-controller
rules:
- apiGroups: [""]
  resources: ["namespaces", "pods", "services", "configmaps", "secrets"]
  verbs: ["*"]
- apiGroups: ["apps"]
  resources: ["deployments", "replicasets"]
  verbs: ["*"]
- apiGroups: ["networking.k8s.io"]
  resources: ["ingresses"]
  verbs: ["*"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: dev-env-controller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: dev-env-controller
subjects:
- kind: ServiceAccount
  name: dev-env-controller
  namespace: dev-environments
EOF
    
    # 创建开发环境模板
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
  name: dev-env-templates
  namespace: dev-environments
data:
  basic-dev-env.yaml: |
    apiVersion: v1
    kind: Namespace
    metadata:
      name: dev-{{.Username}}-{{.ProjectName}}
      labels:
        type: development
        owner: {{.Username}}
        project: {{.ProjectName}}
      annotations:
        dev-env/created-at: {{.CreatedAt}}
        dev-env/expires-at: {{.ExpiresAt}}
    ---
    apiVersion: apps/v1
    kind: Deployment
    metadata:
      name: {{.ProjectName}}
      namespace: dev-{{.Username}}-{{.ProjectName}}
    spec:
      replicas: 1
      selector:
        matchLabels:
          app: {{.ProjectName}}
      template:
        metadata:
          labels:
            app: {{.ProjectName}}
        spec:
          containers:
          - name: dev-container
            image: {{.Image}}
            ports:
            - containerPort: {{.Port}}
            env:
            - name: ENV
              value: "development"
            - name: DEBUG
              value: "true"
            volumeMounts:
            - name: code
              mountPath: /workspace
            resources:
              requests:
                cpu: 100m
                memory: 256Mi
              limits:
                cpu: 1
                memory: 1Gi
          volumes:
          - name: code
            emptyDir: {}
    ---
    apiVersion: v1
    kind: Service
    metadata:
      name: {{.ProjectName}}
      namespace: dev-{{.Username}}-{{.ProjectName}}
    spec:
      selector:
        app: {{.ProjectName}}
      ports:
      - protocol: TCP
        port: 80
        targetPort: {{.Port}}
      type: ClusterIP
EOF
}

# 配置CI/CD集成
setup_cicd_integration() {
    echo "配置CI/CD集成..."
    
    # 部署Tekton Dashboard
    kubectl apply --filename https://storage.googleapis.com/tekton-releases/dashboard/latest/tekton-dashboard-release.yaml
    
    # 创建通用Pipeline模板
    cat <<EOF | kubectl apply -f -
apiVersion: tekton.dev/v1beta1
kind: Pipeline
metadata:
  name: build-and-deploy
  namespace: tekton-pipelines
spec:
  description: |
    This pipeline clones a git repo, builds a Docker image with Kaniko and
    deploys it to Kubernetes
  params:
  - name: repo-url
    type: string
  - name: image-reference
    type: string
  - name: deployment-name
    type: string
  - name: namespace
    type: string
    default: default
  workspaces:
  - name: shared-data
    description: |
      This workspace contains the cloned repo files, so they can be read by the
      next task.
  - name: docker-credentials
    description: |
      This workspace contains docker credentials for pushing images.
  tasks:
  - name: fetch-source
    taskRef:
      name: git-clone
    workspaces:
    - name: output
      workspace: shared-data
    params:
    - name: url
      value: $(params.repo-url)
  - name: build-image
    runAfter: ["fetch-source"]
    taskRef:
      name: kaniko
    workspaces:
    - name: source
      workspace: shared-data
    - name: dockerconfig
      workspace: docker-credentials
    params:
    - name: IMAGE
      value: $(params.image-reference)
  - name: deploy
    runAfter: ["build-image"]
    taskRef:
      name: kubernetes-actions
    params:
    - name: script
      value: |
        kubectl set image deployment/$(params.deployment-name) \
          $(params.deployment-name)=$(params.image-reference) \
          -n $(params.namespace)
        kubectl rollout status deployment/$(params.deployment-name) -n $(params.namespace)
---
apiVersion: tekton.dev/v1beta1
kind: Task
metadata:
  name: kubernetes-actions
  namespace: tekton-pipelines
spec:
  description: |
    This task performs kubernetes actions like apply, delete, etc.
  params:
  - name: script
    description: The kubectl script to run
    type: string
  steps:
  - name: kubectl
    image: bitnami/kubectl:latest
    script: |
      #!/bin/bash
      set -e
      $(params.script)
EOF
    
    # 创建自动化触发器
    cat <<EOF | kubectl apply -f -
apiVersion: triggers.tekton.dev/v1beta1
kind: EventListener
metadata:
  name: github-listener
  namespace: tekton-pipelines
spec:
  serviceAccountName: tekton-triggers-sa
  triggers:
  - name: github-push
    interceptors:
    - ref:
        name: "github"
      params:
      - name: "secretRef"
        value:
          secretName: github-secret
          secretKey: secretToken
      - name: "eventTypes"
        value: ["push"]
    bindings:
    - ref: github-push-binding
    template:
      ref: build-and-deploy-template
---
apiVersion: triggers.tekton.dev/v1beta1
kind: TriggerBinding
metadata:
  name: github-push-binding
  namespace: tekton-pipelines
spec:
  params:
  - name: repo-url
    value: $(body.repository.clone_url)
  - name: repo-name
    value: $(body.repository.name)
  - name: revision
    value: $(body.head_commit.id)
---
apiVersion: triggers.tekton.dev/v1beta1
kind: TriggerTemplate
metadata:
  name: build-and-deploy-template
  namespace: tekton-pipelines
spec:
  params:
  - name: repo-url
  - name: repo-name
  - name: revision
  resourcetemplates:
  - apiVersion: tekton.dev/v1beta1
    kind: PipelineRun
    metadata:
      generateName: build-and-deploy-run-
    spec:
      pipelineRef:
        name: build-and-deploy
      params:
      - name: repo-url
        value: $(tt.params.repo-url)
      - name: image-reference
        value: registry.example.com/$(tt.params.repo-name):$(tt.params.revision)
      - name: deployment-name
        value: $(tt.params.repo-name)
      workspaces:
      - name: shared-data
        volumeClaimTemplate:
          spec:
            accessModes:
            - ReadWriteOnce
            resources:
              requests:
                storage: 1Gi
      - name: docker-credentials
        secret:
          secretName: docker-credentials
EOF
}

# 部署开发者工具
deploy_developer_tools() {
    echo "部署开发者工具..."
    
    # 部署代码质量检查
    cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: sonarqube
  namespace: platform
spec:
  replicas: 1
  selector:
    matchLabels:
      app: sonarqube
  template:
    metadata:
      labels:
        app: sonarqube
    spec:
      containers:
      - name: sonarqube
        image: sonarqube:community
        ports:
        - containerPort: 9000
        env:
        - name: SONAR_JDBC_URL
          value: jdbc:postgresql://postgres:5432/sonar
        - name: SONAR_JDBC_USERNAME
          value: sonar
        - name: SONAR_JDBC_PASSWORD
          valueFrom:
            secretKeyRef:
              name: sonar-secrets
              key: password
        resources:
          requests:
            cpu: 500m
            memory: 1Gi
          limits:
            cpu: 2
            memory: 4Gi
        volumeMounts:
        - name: sonarqube-data
          mountPath: /opt/sonarqube/data
        - name: sonarqube-logs
          mountPath: /opt/sonarqube/logs
        - name: sonarqube-extensions
          mountPath: /opt/sonarqube/extensions
      volumes:
      - name: sonarqube-data
        persistentVolumeClaim:
          claimName: sonarqube-data-pvc
      - name: sonarqube-logs
        persistentVolumeClaim:
          claimName: sonarqube-logs-pvc
      - name: sonarqube-extensions
        persistentVolumeClaim:
          claimName: sonarqube-extensions-pvc
---
apiVersion: v1
kind: Service
metadata:
  name: sonarqube
  namespace: platform
spec:
  selector:
    app: sonarqube
  ports:
  - protocol: TCP
    port: 9000
    targetPort: 9000
  type: ClusterIP
EOF
    
    # 部署API文档生成
    cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: swagger-ui
  namespace: platform
spec:
  replicas: 1
  selector:
    matchLabels:
      app: swagger-ui
  template:
    metadata:
      labels:
        app: swagger-ui
    spec:
      containers:
      - name: swagger-ui
        image: swaggerapi/swagger-ui:latest
        ports:
        - containerPort: 8080
        env:
        - name: SWAGGER_JSON_URL
          value: "http://api-docs-aggregator/swagger.json"
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
          limits:
            cpu: 500m
            memory: 512Mi
---
apiVersion: v1
kind: Service
metadata:
  name: swagger-ui
  namespace: platform
spec:
  selector:
    app: swagger-ui
  ports:
  - protocol: TCP
    port: 80
    targetPort: 8080
  type: ClusterIP
EOF
}

# 生成开发者体验报告
generate_dx_report() {
    echo "生成开发者体验报告..."
    
    local report_file="developer-experience-report-$(date +%Y%m%d).md"
    
    cat > "$report_file" <<EOF
# 开发者体验报告

生成时间: $(date)

## 1. 平台概览

### 开发者门户状态
- **Backstage状态**: $(kubectl get pods -n platform -l app=backstage -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "未部署")
- **可用模板数量**: $(kubectl get configmaps -n platform -l type=template --no-headers 2>/dev/null | wc -l)
- **注册组件数量**: 通过Backstage API获取

### 开发环境
- **活跃开发环境**: $(kubectl get namespaces -l type=development --no-headers 2>/dev/null | wc -l)
- **环境资源使用**: $(kubectl top pods -n dev-environments 2>/dev/null | tail -n +2 | awk '{sum+=$2} END {print sum "m CPU"}' || echo "N/A")

## 2. CI/CD 管道

### Pipeline 统计
- **总Pipeline数**: $(kubectl get pipelines -n tekton-pipelines --no-headers 2>/dev/null | wc -l)
- **最近24小时运行**: $(kubectl get pipelineruns -n tekton-pipelines --no-headers 2>/dev/null | wc -l)
- **成功率**: 通过Tekton Dashboard API计算

### 部署频率
\`\`\`
$(kubectl get pipelineruns -n tekton-pipelines -o json 2>/dev/null | jq -r '.items[] | select(.status.completionTime != null) | .metadata.creationTimestamp' | head -10 || echo "无数据")
\`\`\`

## 3. 开发工具

### 代码质量
- **SonarQube状态**: $(kubectl get pods -n platform -l app=sonarqube -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "未部署")
- **扫描项目数**: 通过SonarQube API获取
- **代码覆盖率**: 通过SonarQube API获取

### API文档
- **Swagger UI状态**: $(kubectl get pods -n platform -l app=swagger-ui -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "未部署")
- **文档化API数量**: 通过API聚合器获取

## 4. 开发者满意度指标

### 部署时间
- **平均部署时间**: 通过Pipeline运行时间计算
- **从代码到生产时间**: 端到端时间测量

### 错误率
- **Pipeline失败率**: $(kubectl get pipelineruns -n tekton-pipelines -o json 2>/dev/null | jq '[.items[] | select(.status.conditions[0].status == "False")] | length' || echo "0")%
- **环境创建失败率**: 通过开发环境控制器日志分析

## 5. 改进建议

### 立即改进
1. 优化Pipeline性能
2. 增加更多应用模板
3. 改善文档和培训
4. 实施自动化测试

### 中期目标
1. 实施GitOps工作流
2. 增强监控和可观测性
3. 实施安全扫描集成
4. 优化资源使用

### 长期愿景
1. 实现完全自助服务
2. 建立开发者社区
3. 持续改进开发体验
4. 实施AI辅助开发

## 6. 关键指标

建议跟踪以下开发者体验指标:
- 部署频率
- 变更前置时间
- 平均恢复时间
- 变更失败率
- 开发者满意度评分

EOF
    
    echo "开发者体验报告已生成: $report_file"
}

# 主函数
case "$1" in
    templates)
        deploy_app_templates
        ;;
    environments)
        deploy_dev_environments
        ;;
    cicd)
        setup_cicd_integration
        ;;
    tools)
        deploy_developer_tools
        ;;
    report)
        generate_dx_report
        ;;
    all)
        deploy_app_templates
        deploy_dev_environments
        setup_cicd_integration
        deploy_developer_tools
        generate_dx_report
        ;;
    *)
        echo "用法: $0 {templates|environments|cicd|tools|report|all}"
        echo "  templates     - 部署应用模板"
        echo "  environments  - 部署开发环境管理"
        echo "  cicd          - 配置CI/CD集成"
        echo "  tools         - 部署开发者工具"
        echo "  report        - 生成开发者体验报告"
        echo "  all           - 执行所有配置"
        exit 1
        ;;
esac

14.9 总结与展望

14.9.1 技术趋势总结

# 未来技术趋势配置图
apiVersion: v1
kind: ConfigMap
metadata:
  name: k8s-future-trends
  namespace: kube-system
data:
  trends-summary.yaml: |
    # Kubernetes未来发展趋势
    trends:
      edge_computing:
        description: "边缘计算和IoT设备管理"
        maturity: "成长期"
        adoption_timeline: "2024-2026"
        key_technologies:
          - K3s
          - KubeEdge
          - OpenYurt
          - Akri
        use_cases:
          - IoT设备管理
          - 边缘AI推理
          - 实时数据处理
          - 低延迟应用
      
      ai_ml_workloads:
        description: "AI/ML工作负载原生支持"
        maturity: "快速发展期"
        adoption_timeline: "2024-2025"
        key_technologies:
          - Kubeflow
          - KServe
          - Volcano
          - GPU Operator
        use_cases:
          - 模型训练
          - 模型推理
          - 数据管道
          - MLOps
      
      webassembly:
        description: "WebAssembly运行时集成"
        maturity: "早期阶段"
        adoption_timeline: "2025-2027"
        key_technologies:
          - Wasmtime
          - WasmEdge
          - Krustlet
          - WASI
        use_cases:
          - 轻量级函数
          - 多语言支持
          - 安全沙箱
          - 边缘计算
      
      multi_cloud:
        description: "多云和混合云管理"
        maturity: "成熟期"
        adoption_timeline: "2024-2025"
        key_technologies:
          - Cluster API
          - Admiral
          - Submariner
          - Liqo
        use_cases:
          - 多云部署
          - 灾难恢复
          - 成本优化
          - 避免厂商锁定
      
      sustainability:
        description: "可持续发展和绿色计算"
        maturity: "新兴期"
        adoption_timeline: "2024-2026"
        key_technologies:
          - Kepler
          - Carbon Aware Scheduler
          - Green Metrics
          - Sustainable Computing
        use_cases:
          - 碳足迹监控
          - 绿色调度
          - 能效优化
          - 可持续运营
      
      security_enhancement:
        description: "安全增强和零信任"
        maturity: "快速发展期"
        adoption_timeline: "2024-2025"
        key_technologies:
          - Sigstore
          - SPIFFE/SPIRE
          - Falco
          - OPA Gatekeeper
        use_cases:
          - 供应链安全
          - 零信任架构
          - 运行时安全
          - 合规管理
      
      developer_experience:
        description: "开发者体验和平台工程"
        maturity: "快速发展期"
        adoption_timeline: "2024-2025"
        key_technologies:
          - Backstage
          - Crossplane
          - Tekton
          - ArgoCD
        use_cases:
          - 内部开发者平台
          - 自助服务
          - GitOps
          - 应用模板
    
    adoption_recommendations:
      immediate_focus:
        - "实施GitOps工作流"
        - "加强安全基线"
        - "优化资源使用"
        - "建立监控体系"
      
      short_term:
        - "探索边缘计算场景"
        - "实施AI/ML工作负载"
        - "建设开发者平台"
        - "实施多云策略"
      
      long_term:
        - "采用WebAssembly技术"
        - "实现可持续运营"
        - "建立零信任架构"
        - "实施智能运维"
    
    success_metrics:
      technical:
        - "部署频率"
        - "变更前置时间"
        - "平均恢复时间"
        - "变更失败率"
      
      business:
        - "开发者生产力"
        - "运营成本"
        - "安全事件数量"
        - "合规达成率"
      
      sustainability:
        - "碳足迹"
        - "能源效率"
        - "资源利用率"
        - "可再生能源使用率"

14.9.2 实施路线图

#!/bin/bash
# Kubernetes未来技术实施路线图脚本

echo "=== Kubernetes未来技术实施路线图 ==="

# 生成实施路线图
generate_roadmap() {
    echo "生成实施路线图..."
    
    local roadmap_file="k8s-future-roadmap-$(date +%Y%m%d).md"
    
    cat > "$roadmap_file" <<EOF
# Kubernetes未来技术实施路线图

生成时间: $(date)

## 阶段一:基础设施现代化 (0-6个月)

### 目标
- 建立现代化的Kubernetes基础设施
- 实施基本的安全和监控
- 优化资源使用和成本

### 关键任务
1. **集群升级和标准化**
   - 升级到最新稳定版本
   - 统一集群配置
   - 实施基础安全策略

2. **监控和可观测性**
   - 部署Prometheus + Grafana
   - 实施日志聚合
   - 配置告警规则

3. **CI/CD现代化**
   - 实施GitOps工作流
   - 自动化部署管道
   - 集成安全扫描

4. **资源优化**
   - 实施资源配额
   - 配置自动扩缩容
   - 优化镜像和存储

### 成功指标
- 集群可用性 > 99.9%
- 部署时间 < 10分钟
- 资源利用率 > 70%
- 安全扫描覆盖率 100%

## 阶段二:平台工程和开发者体验 (6-12个月)

### 目标
- 建设内部开发者平台
- 提升开发者生产力
- 实施自助服务模式

### 关键任务
1. **开发者门户**
   - 部署Backstage平台
   - 创建应用模板
   - 集成开发工具链

2. **自助服务**
   - 实施环境自动化
   - 配置资源自助申请
   - 建立服务目录

3. **质量保证**
   - 集成代码质量检查
   - 自动化测试流水线
   - 实施性能测试

4. **文档和培训**
   - 建立知识库
   - 开发者培训计划
   - 最佳实践指南

### 成功指标
- 开发者满意度 > 4.5/5
- 环境创建时间 < 5分钟
- 自助服务使用率 > 80%
- 文档覆盖率 > 90%

## 阶段三:高级特性和新兴技术 (12-18个月)

### 目标
- 探索和实施新兴技术
- 扩展到边缘和多云
- 支持AI/ML工作负载

### 关键任务
1. **边缘计算**
   - 部署边缘集群
   - 实施边缘应用管理
   - 配置边缘监控

2. **AI/ML平台**
   - 部署Kubeflow
   - 配置GPU资源池
   - 实施模型管理

3. **多云管理**
   - 实施集群联邦
   - 配置跨云网络
   - 建立灾难恢复

4. **WebAssembly探索**
   - 试点WASM运行时
   - 开发WASM应用
   - 评估性能和安全性

### 成功指标
- 边缘节点数量 > 10
- ML模型部署时间 < 30分钟
- 多云部署成功率 > 95%
- WASM应用性能提升 > 20%

## 阶段四:可持续发展和智能运维 (18-24个月)

### 目标
- 实现可持续运营
- 建立智能运维体系
- 实施零信任安全

### 关键任务
1. **绿色计算**
   - 实施碳足迹监控
   - 配置绿色调度
   - 优化能源使用

2. **智能运维**
   - 实施AIOps
   - 自动化故障处理
   - 预测性维护

3. **零信任安全**
   - 实施微分段
   - 配置身份验证
   - 建立安全基线

4. **持续优化**
   - 性能调优自动化
   - 成本优化算法
   - 容量规划智能化

### 成功指标
- 碳足迹减少 > 30%
- 故障自动修复率 > 80%
- 安全事件减少 > 50%
- 运维效率提升 > 40%

## 技术选型建议

### 立即采用
- **GitOps**: ArgoCD, Flux
- **监控**: Prometheus, Grafana, Jaeger
- **安全**: OPA Gatekeeper, Falco
- **CI/CD**: Tekton, GitHub Actions

### 短期评估
- **平台工程**: Backstage, Crossplane
- **服务网格**: Istio, Linkerd
- **多云**: Cluster API, Admiral
- **AI/ML**: Kubeflow, KServe

### 长期探索
- **边缘计算**: K3s, KubeEdge
- **WebAssembly**: Krustlet, WasmEdge
- **可持续性**: Kepler, Carbon Aware Scheduler
- **智能运维**: AIOps平台, 预测分析

## 风险和缓解策略

### 技术风险
- **新技术不成熟**: 先试点,后推广
- **兼容性问题**: 充分测试,渐进升级
- **性能影响**: 基准测试,监控对比

### 组织风险
- **技能差距**: 培训计划,外部支持
- **变更阻力**: 沟通策略,激励机制
- **资源不足**: 分阶段实施,优先级管理

### 业务风险
- **服务中断**: 蓝绿部署,回滚策略
- **成本超支**: 预算控制,成本监控
- **合规问题**: 安全审计,合规检查

## 投资回报分析

### 成本节约
- 运维自动化: 节约人力成本30-50%
- 资源优化: 降低基础设施成本20-40%
- 故障减少: 减少业务损失60-80%

### 效率提升
- 部署速度: 提升5-10倍
- 开发效率: 提升30-50%
- 问题解决: 缩短70-90%

### 创新能力
- 新技术采用: 缩短6-12个月
- 产品上市: 加速30-50%
- 竞争优势: 建立技术护城河

EOF
    
    echo "实施路线图已生成: $roadmap_file"
}

# 创建技术评估矩阵
create_assessment_matrix() {
    echo "创建技术评估矩阵..."
    
    cat > "technology-assessment-matrix.csv" <<EOF
技术,成熟度,复杂度,投资成本,预期收益,风险等级,推荐优先级
GitOps,高,中,低,高,低,1
服务网格,高,高,中,高,中,2
边缘计算,中,高,高,中,中,3
AI/ML平台,中,高,高,高,中,2
WebAssembly,低,中,低,中,高,4
可持续计算,低,中,中,中,中,3
零信任安全,中,高,高,高,中,2
平台工程,中,中,中,高,低,1
多云管理,中,高,高,中,中,3
智能运维,低,高,高,高,高,4
EOF
    
    echo "技术评估矩阵已创建: technology-assessment-matrix.csv"
}

# 生成决策支持报告
generate_decision_support() {
    echo "生成决策支持报告..."
    
    local decision_file="decision-support-$(date +%Y%m%d).md"
    
    cat > "$decision_file" <<EOF
# Kubernetes未来技术决策支持报告

## 执行摘要

Kubernetes生态系统正在快速发展,新兴技术为企业带来了前所未有的机遇和挑战。本报告基于当前技术趋势、市场需求和最佳实践,为组织制定Kubernetes未来技术策略提供决策支持。

## 关键发现

### 1. 技术成熟度分析
- **成熟技术**: GitOps、服务网格、监控可观测性
- **快速发展**: AI/ML平台、平台工程、零信任安全
- **新兴技术**: WebAssembly、可持续计算、智能运维

### 2. 业务价值评估
- **高价值**: 平台工程(开发效率)、AI/ML(创新能力)、安全增强(风险控制)
- **中等价值**: 边缘计算(新场景)、多云管理(灵活性)
- **长期价值**: 可持续计算(合规要求)、智能运维(成本优化)

### 3. 实施复杂度
- **低复杂度**: GitOps、基础监控、应用模板
- **中等复杂度**: 平台工程、WebAssembly、可持续计算
- **高复杂度**: 服务网格、AI/ML平台、零信任架构

## 推荐策略

### 短期策略 (6-12个月)
1. **优先实施GitOps和平台工程**
   - 投资回报高,风险低
   - 为后续技术奠定基础
   - 显著提升开发者体验

2. **加强安全基线**
   - 实施基础安全策略
   - 配置监控和告警
   - 建立合规框架

3. **优化现有工作负载**
   - 资源使用优化
   - 性能调优
   - 成本控制

### 中期策略 (12-18个月)
1. **探索AI/ML工作负载**
   - 评估业务需求
   - 试点项目实施
   - 建立ML运维能力

2. **实施服务网格**
   - 微服务治理
   - 流量管理
   - 安全策略

3. **扩展到边缘场景**
   - 识别边缘用例
   - 部署边缘基础设施
   - 建立边缘管理能力

### 长期策略 (18-24个月)
1. **建立可持续运营**
   - 碳足迹监控
   - 绿色计算实践
   - 可持续发展目标

2. **实施智能运维**
   - AIOps平台
   - 自动化运维
   - 预测性维护

3. **探索前沿技术**
   - WebAssembly应用
   - 量子计算准备
   - 下一代架构

## 投资建议

### 预算分配
- **基础设施现代化**: 40%
- **平台工程**: 25%
- **安全增强**: 20%
- **新兴技术**: 15%

### 人员配置
- **平台工程师**: 2-3人
- **DevOps工程师**: 3-4人
- **安全工程师**: 1-2人
- **AI/ML工程师**: 1-2人

### 培训投入
- **内部培训**: 每季度技术分享
- **外部培训**: 年度技术大会
- **认证考试**: 鼓励获得相关认证
- **实践项目**: 20%时间用于技术探索

## 风险管理

### 技术风险
- **缓解策略**: 试点项目、渐进实施、充分测试
- **应急计划**: 回滚策略、备用方案、专家支持

### 组织风险
- **变更管理**: 沟通计划、培训支持、激励机制
- **技能建设**: 招聘计划、培训投入、知识管理

### 业务风险
- **连续性保障**: 蓝绿部署、灾难恢复、监控告警
- **合规要求**: 安全审计、数据保护、行业标准

## 成功指标

### 技术指标
- 部署频率提升 > 5倍
- 故障恢复时间 < 1小时
- 系统可用性 > 99.9%
- 安全事件 < 1次/月

### 业务指标
- 开发效率提升 > 50%
- 运维成本降低 > 30%
- 新功能上线时间 < 2周
- 客户满意度 > 4.5/5

### 创新指标
- 新技术采用数量 > 3个/年
- 技术专利申请 > 2个/年
- 开源贡献 > 10个PR/年
- 技术影响力提升

## 结论

Kubernetes未来技术的采用需要平衡创新与稳定、收益与风险。建议采用渐进式策略,优先实施成熟度高、价值明确的技术,同时保持对新兴技术的关注和试点。成功的关键在于建立强大的平台工程能力、持续的学习文化和有效的风险管理机制。

EOF
    
    echo "决策支持报告已生成: $decision_file"
}

# 主函数
case "$1" in
    roadmap)
        generate_roadmap
        ;;
    matrix)
        create_assessment_matrix
        ;;
    decision)
        generate_decision_support
        ;;
    all)
        generate_roadmap
        create_assessment_matrix
        generate_decision_support
        ;;
    *)
        echo "用法: $0 {roadmap|matrix|decision|all}"
        echo "  roadmap   - 生成实施路线图"
        echo "  matrix    - 创建技术评估矩阵"
        echo "  decision  - 生成决策支持报告"
        echo "  all       - 生成所有文档"
        exit 1
        ;;
esac

本章详细介绍了Kubernetes的未来发展趋势和新兴技术,包括边缘计算、AI/ML工作负载、WebAssembly集成、多云混合云、可持续发展、安全增强、开发者体验等方面。通过实际的配置示例、脚本和最佳实践,帮助读者了解和准备这些前沿技术。

下一章预告: 第15章将总结整个Kubernetes学习之旅,回顾核心概念,展望技术发展,并提供持续学习的建议和资源。

14.2.3 边缘计算最佳实践

# 边缘计算资源配额
apiVersion: v1
kind: ResourceQuota
metadata:
  name: edge-quota
  namespace: edge-apps
spec:
  hard:
    requests.cpu: "2"
    requests.memory: 4Gi
    limits.cpu: "4"
    limits.memory: 8Gi
    persistentvolumeclaims: "5"
    pods: "10"
---
# 边缘网络策略
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: edge-network-policy
  namespace: edge-apps
spec:
  podSelector:
    matchLabels:
      tier: edge
  policyTypes:
  - Ingress
  - Egress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          name: edge-apps
    - podSelector:
        matchLabels:
          role: edge-gateway
    ports:
    - protocol: TCP
      port: 80
    - protocol: TCP
      port: 443
  egress:
  - to:
    - namespaceSelector:
        matchLabels:
          name: kube-system
  - to: []
    ports:
    - protocol: TCP
      port: 53
    - protocol: UDP
      port: 53
---
# 边缘Pod中断预算
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
  name: edge-app-pdb
  namespace: edge-apps
spec:
  minAvailable: 1
  selector:
    matchLabels:
      app: edge-sensor

14.3 AI/ML工作负载支持

14.3.1 GPU资源管理

# GPU节点配置
apiVersion: v1
kind: Node
metadata:
  name: gpu-node-001
  labels:
    accelerator: nvidia-tesla-v100
    gpu-count: "4"
    node-type: gpu
spec:
  capacity:
    nvidia.com/gpu: "4"
  allocatable:
    nvidia.com/gpu: "4"
---
# GPU工作负载
apiVersion: batch/v1
kind: Job
metadata:
  name: ml-training-job
  namespace: ml-workloads
spec:
  template:
    spec:
      containers:
      - name: pytorch-training
        image: pytorch/pytorch:1.12.0-cuda11.3-cudnn8-runtime
        command: ["python"]
        args: ["/app/train.py"]
        resources:
          requests:
            nvidia.com/gpu: 2
            cpu: "4"
            memory: "16Gi"
          limits:
            nvidia.com/gpu: 2
            cpu: "8"
            memory: "32Gi"
        env:
        - name: CUDA_VISIBLE_DEVICES
          value: "0,1"
        - name: NCCL_DEBUG
          value: "INFO"
        volumeMounts:
        - name: training-data
          mountPath: /data
        - name: model-output
          mountPath: /output
        - name: shm
          mountPath: /dev/shm
      volumes:
      - name: training-data
        persistentVolumeClaim:
          claimName: training-data-pvc
      - name: model-output
        persistentVolumeClaim:
          claimName: model-output-pvc
      - name: shm
        emptyDir:
          medium: Memory
          sizeLimit: 8Gi
      restartPolicy: Never
      nodeSelector:
        accelerator: nvidia-tesla-v100
      tolerations:
      - key: nvidia.com/gpu
        operator: Exists
        effect: NoSchedule
  backoffLimit: 3

14.3.2 Kubeflow机器学习平台

#!/bin/bash
# Kubeflow部署脚本

echo "=== Kubeflow机器学习平台部署 ==="

# 安装Kubeflow
install_kubeflow() {
    echo "安装Kubeflow..."
    
    # 下载kfctl
    KUBEFLOW_VERSION="v1.7.0"
    wget https://github.com/kubeflow/kfctl/releases/download/${KUBEFLOW_VERSION}/kfctl_${KUBEFLOW_VERSION}_linux.tar.gz
    tar -xzf kfctl_${KUBEFLOW_VERSION}_linux.tar.gz
    sudo mv kfctl /usr/local/bin/
    
    # 设置Kubeflow配置
    export KF_NAME=kubeflow
    export BASE_DIR=/opt/kubeflow
    export KF_DIR=${BASE_DIR}/${KF_NAME}
    export CONFIG_URI="https://raw.githubusercontent.com/kubeflow/manifests/v1.7-branch/kfdef/kfctl_k8s_istio.v1.7.0.yaml"
    
    # 创建目录
    mkdir -p ${KF_DIR}
    cd ${KF_DIR}
    
    # 下载配置
    kfctl build -V -f ${CONFIG_URI}
    
    # 部署Kubeflow
    kfctl apply -V -f kfctl_k8s_istio.v1.7.0.yaml
    
    echo "等待Kubeflow组件启动..."
    kubectl wait --for=condition=available --timeout=600s deployment --all -n kubeflow
    kubectl wait --for=condition=available --timeout=600s deployment --all -n istio-system
}

# 配置Jupyter Notebook
setup_jupyter() {
    echo "配置Jupyter Notebook..."
    
    cat <<EOF | kubectl apply -f -
apiVersion: kubeflow.org/v1
kind: Notebook
metadata:
  name: ml-notebook
  namespace: kubeflow-user-example-com
spec:
  template:
    spec:
      containers:
      - name: notebook
        image: jupyter/tensorflow-notebook:latest
        resources:
          requests:
            cpu: "1"
            memory: "2Gi"
            nvidia.com/gpu: "1"
          limits:
            cpu: "2"
            memory: "4Gi"
            nvidia.com/gpu: "1"
        env:
        - name: JUPYTER_ENABLE_LAB
          value: "yes"
        volumeMounts:
        - name: workspace
          mountPath: /home/jovyan/work
      volumes:
      - name: workspace
        persistentVolumeClaim:
          claimName: ml-workspace-pvc
EOF
}

# 创建训练Pipeline
create_training_pipeline() {
    echo "创建训练Pipeline..."
    
    cat <<EOF > training_pipeline.py
import kfp
from kfp import dsl
from kfp.components import create_component_from_func

# 数据预处理组件
@create_component_from_func
def preprocess_data(input_path: str, output_path: str) -> str:
    import pandas as pd
    import numpy as np
    
    # 模拟数据预处理
    print(f"Processing data from {input_path}")
    
    # 这里添加实际的数据预处理逻辑
    data = pd.DataFrame(np.random.randn(1000, 10))
    data.to_csv(f"{output_path}/processed_data.csv", index=False)
    
    return output_path

# 模型训练组件
@create_component_from_func
def train_model(data_path: str, model_path: str) -> str:
    import joblib
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    import pandas as pd
    import numpy as np
    
    print(f"Training model with data from {data_path}")
    
    # 加载数据
    data = pd.read_csv(f"{data_path}/processed_data.csv")
    X = data.iloc[:, :-1]
    y = np.random.randint(0, 2, len(data))  # 模拟标签
    
    # 训练模型
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model = RandomForestClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    
    # 保存模型
    joblib.dump(model, f"{model_path}/model.pkl")
    
    return model_path

# 模型评估组件
@create_component_from_func
def evaluate_model(model_path: str, data_path: str) -> float:
    import joblib
    import pandas as pd
    import numpy as np
    from sklearn.metrics import accuracy_score
    from sklearn.model_selection import train_test_split
    
    print(f"Evaluating model from {model_path}")
    
    # 加载模型和数据
    model = joblib.load(f"{model_path}/model.pkl")
    data = pd.read_csv(f"{data_path}/processed_data.csv")
    X = data.iloc[:, :-1]
    y = np.random.randint(0, 2, len(data))  # 模拟标签
    
    # 评估模型
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    
    print(f"Model accuracy: {accuracy}")
    return accuracy

# 定义Pipeline
@dsl.pipeline(
    name='ML Training Pipeline',
    description='A machine learning training pipeline'
)
def ml_training_pipeline(
    input_data_path: str = '/data/input',
    processed_data_path: str = '/data/processed',
    model_output_path: str = '/data/models'
):
    # 数据预处理步骤
    preprocess_task = preprocess_data(
        input_path=input_data_path,
        output_path=processed_data_path
    )
    
    # 模型训练步骤
    train_task = train_model(
        data_path=preprocess_task.output,
        model_path=model_output_path
    )
    
    # 模型评估步骤
    evaluate_task = evaluate_model(
        model_path=train_task.output,
        data_path=preprocess_task.output
    )
    
    # 设置GPU资源
    train_task.set_gpu_limit(1)
    train_task.add_node_selector_constraint('accelerator', 'nvidia-tesla-v100')

if __name__ == '__main__':
    # 编译Pipeline
    kfp.compiler.Compiler().compile(ml_training_pipeline, 'ml_training_pipeline.yaml')
    print("Pipeline compiled successfully!")
EOF
    
    # 运行Pipeline编译
    python training_pipeline.py
    
    echo "训练Pipeline已创建"
}

# 部署模型服务
deploy_model_serving() {
    echo "部署模型服务..."
    
    cat <<EOF | kubectl apply -f -
apiVersion: serving.kubeflow.org/v1beta1
kind: InferenceService
metadata:
  name: sklearn-iris
  namespace: kubeflow-user-example-com
spec:
  predictor:
    sklearn:
      storageUri: "gs://kfserving-examples/models/sklearn/iris"
      resources:
        requests:
          cpu: 100m
          memory: 256Mi
        limits:
          cpu: 500m
          memory: 512Mi
  transformer:
    containers:
    - name: transformer
      image: kfserving/image-transformer:latest
      env:
      - name: STORAGE_URI
        value: "gs://kfserving-examples/models/sklearn/iris"
EOF
}

# 设置监控
setup_ml_monitoring() {
    echo "设置ML监控..."
    
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ServiceMonitor
metadata:
  name: kubeflow-monitoring
  namespace: kubeflow
spec:
  selector:
    matchLabels:
      app: kubeflow
  endpoints:
  - port: http
    interval: 30s
    path: /metrics
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  name: kubeflow-alerts
  namespace: kubeflow
spec:
  groups:
  - name: kubeflow.rules
    rules:
    - alert: KubeflowPipelineFailure
      expr: increase(kubeflow_pipeline_runs_failed_total[5m]) > 0
      for: 0m
      labels:
        severity: warning
      annotations:
        summary: "Kubeflow pipeline failure detected"
        description: "Pipeline {{ \$labels.pipeline_name }} has failed"
    
    - alert: ModelServingDown
      expr: up{job="model-serving"} == 0
      for: 5m
      labels:
        severity: critical
      annotations:
        summary: "Model serving is down"
        description: "Model serving endpoint has been down for more than 5 minutes"
EOF
}

# 主函数
case "$1" in
    install)
        install_kubeflow
        ;;
    jupyter)
        setup_jupyter
        ;;
    pipeline)
        create_training_pipeline
        ;;
    serving)
        deploy_model_serving
        ;;
    monitoring)
        setup_ml_monitoring
        ;;
    all)
        install_kubeflow
        setup_jupyter
        create_training_pipeline
        deploy_model_serving
        setup_ml_monitoring
        ;;
    *)
        echo "用法: $0 {install|jupyter|pipeline|serving|monitoring|all}"
        echo "  install    - 安装Kubeflow"
        echo "  jupyter    - 配置Jupyter Notebook"
        echo "  pipeline   - 创建训练Pipeline"
        echo "  serving    - 部署模型服务"
        echo "  monitoring - 设置监控"
        echo "  all        - 执行所有配置"
        exit 1
        ;;
esac

14.4 WebAssembly (WASM) 集成

14.4.1 WASM运行时支持

WebAssembly为Kubernetes提供了轻量级、安全的运行时选择。

# WASM运行时配置
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: wasmtime
handler: wasmtime
overhead:
  podFixed:
    cpu: 10m
    memory: 32Mi
scheduling:
  nodeClassForRuntimeClass: wasm-capable
  tolerations:
  - key: wasm-runtime
    operator: Equal
    value: "true"
    effect: NoSchedule
---
# WASM应用部署
apiVersion: apps/v1
kind: Deployment
metadata:
  name: wasm-app
  namespace: wasm-apps
spec:
  replicas: 3
  selector:
    matchLabels:
      app: wasm-app
  template:
    metadata:
      labels:
        app: wasm-app
    spec:
      runtimeClassName: wasmtime
      containers:
      - name: wasm-container
        image: wasm-app:latest
        resources:
          requests:
            cpu: 5m
            memory: 16Mi
          limits:
            cpu: 50m
            memory: 128Mi
        env:
        - name: WASM_MODULE_PATH
          value: "/app/module.wasm"
        ports:
        - containerPort: 8080
          protocol: TCP
      nodeSelector:
        wasm-runtime: "true"

14.4.2 WASM应用开发

#!/bin/bash
# WASM应用开发和部署脚本

echo "=== WebAssembly应用开发和部署 ==="

# 安装WASM运行时
install_wasm_runtime() {
    echo "安装WASM运行时..."
    
    # 安装wasmtime
    curl https://wasmtime.dev/install.sh -sSf | bash
    source ~/.bashrc
    
    # 安装containerd-wasm
    wget https://github.com/containerd/runwasi/releases/download/containerd-wasm-shims%2Fv0.3.3/containerd-wasm-shims-v1-linux-x86_64.tar.gz
    tar -xzf containerd-wasm-shims-v1-linux-x86_64.tar.gz
    sudo mv containerd-wasmtime-shim /usr/local/bin/
    
    # 配置containerd
    sudo tee -a /etc/containerd/config.toml <<EOF

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.wasmtime]
  runtime_type = "io.containerd.wasmtime.v1"
EOF
    
    # 重启containerd
    sudo systemctl restart containerd
}

# 创建WASM应用示例
create_wasm_app() {
    echo "创建WASM应用示例..."
    
    # 创建Rust项目
    mkdir -p wasm-hello-world
    cd wasm-hello-world
    
    # 创建Cargo.toml
    cat > Cargo.toml <<EOF
[package]
name = "wasm-hello-world"
version = "0.1.0"
edition = "2021"

[lib]
crate-type = ["cdylib"]

[dependencies]
wasm-bindgen = "0.2"
web-sys = "0.3"
wasi = "0.11"
tokio = { version = "1.0", features = ["full"] }
EOF
    
    # 创建源代码
    mkdir -p src
    cat > src/lib.rs <<EOF
use wasm_bindgen::prelude::*;
use web_sys::console;

#[wasm_bindgen]
extern "C" {
    fn alert(s: &str);
}

#[wasm_bindgen]
pub fn greet(name: &str) {
    alert(&format!("Hello, {}!", name));
}

#[wasm_bindgen(start)]
pub fn main() {
    console::log_1(&"WASM module loaded".into());
}

#[no_mangle]
pub extern "C" fn add(a: i32, b: i32) -> i32 {
    a + b
}

#[no_mangle]
pub extern "C" fn fibonacci(n: i32) -> i32 {
    if n <= 1 {
        return n;
    }
    fibonacci(n - 1) + fibonacci(n - 2)
}
EOF
    
    # 创建HTTP服务器
    cat > src/main.rs <<EOF
use std::io::prelude::*;
use std::net::{TcpListener, TcpStream};
use std::thread;
use std::time::Duration;

fn main() {
    let listener = TcpListener::bind("0.0.0.0:8080").unwrap();
    println!("WASM HTTP Server listening on port 8080");
    
    for stream in listener.incoming() {
        let stream = stream.unwrap();
        thread::spawn(|| {
            handle_connection(stream);
        });
    }
}

fn handle_connection(mut stream: TcpStream) {
    let mut buffer = [0; 1024];
    stream.read(&mut buffer).unwrap();
    
    let response = "HTTP/1.1 200 OK\r\n\r\nHello from WASM!";
    stream.write(response.as_bytes()).unwrap();
    stream.flush().unwrap();
}
EOF
    
    # 编译为WASM
    rustup target add wasm32-wasi
    cargo build --target wasm32-wasi --release
    
    echo "WASM应用编译完成"
    cd ..
}

# 构建WASM容器镜像
build_wasm_image() {
    echo "构建WASM容器镜像..."
    
    # 创建Dockerfile
    cat > Dockerfile.wasm <<EOF
FROM scratch
COPY target/wasm32-wasi/release/wasm-hello-world.wasm /app/module.wasm
EXPOSE 8080
ENTRYPOINT ["/app/module.wasm"]
EOF
    
    # 构建镜像
    docker build -f Dockerfile.wasm -t wasm-app:latest .
    
    echo "WASM镜像构建完成"
}

# 部署WASM应用
deploy_wasm_app() {
    echo "部署WASM应用..."
    
    # 创建命名空间
    kubectl create namespace wasm-apps --dry-run=client -o yaml | kubectl apply -f -
    
    # 部署应用
    cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: wasm-hello-world
  namespace: wasm-apps
spec:
  replicas: 2
  selector:
    matchLabels:
      app: wasm-hello-world
  template:
    metadata:
      labels:
        app: wasm-hello-world
    spec:
      runtimeClassName: wasmtime
      containers:
      - name: wasm-app
        image: wasm-app:latest
        ports:
        - containerPort: 8080
        resources:
          requests:
            cpu: 1m
            memory: 8Mi
          limits:
            cpu: 10m
            memory: 32Mi
        livenessProbe:
          httpGet:
            path: /
            port: 8080
          initialDelaySeconds: 5
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /
            port: 8080
          initialDelaySeconds: 2
          periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
  name: wasm-hello-world-service
  namespace: wasm-apps
spec:
  selector:
    app: wasm-hello-world
  ports:
  - protocol: TCP
    port: 80
    targetPort: 8080
  type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: wasm-hello-world-ingress
  namespace: wasm-apps
  annotations:
    nginx.ingress.kubernetes.io/rewrite-target: /
spec:
  rules:
  - host: wasm-app.local
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: wasm-hello-world-service
            port:
              number: 80
EOF
}

# 性能测试
performance_test() {
    echo "WASM应用性能测试..."
    
    # 等待应用启动
    kubectl wait --for=condition=available --timeout=300s deployment/wasm-hello-world -n wasm-apps
    
    # 端口转发
    kubectl port-forward service/wasm-hello-world-service 8080:80 -n wasm-apps &
    PF_PID=$!
    
    sleep 5
    
    # 性能测试
    echo "开始性能测试..."
    curl -s http://localhost:8080
    
    # 使用ab进行压力测试
    if command -v ab &> /dev/null; then
        echo "执行压力测试..."
        ab -n 1000 -c 10 http://localhost:8080/
    else
        echo "apache2-utils未安装,跳过压力测试"
    fi
    
    # 清理
    kill $PF_PID
}

# 主函数
case "$1" in
    runtime)
        install_wasm_runtime
        ;;
    create)
        create_wasm_app
        ;;
    build)
        build_wasm_image
        ;;
    deploy)
        deploy_wasm_app
        ;;
    test)
        performance_test
        ;;
    all)
        install_wasm_runtime
        create_wasm_app
        build_wasm_image
        deploy_wasm_app
        performance_test
        ;;
    *)
        echo "用法: $0 {runtime|create|build|deploy|test|all}"
        echo "  runtime - 安装WASM运行时"
        echo "  create  - 创建WASM应用"
        echo "  build   - 构建WASM镜像"
        echo "  deploy  - 部署WASM应用"
        echo "  test    - 性能测试"
        echo "  all     - 执行所有步骤"
        exit 1
        ;;
esac

14.5 多云和混合云

14.5.1 集群联邦

# 集群联邦配置
apiVersion: core.kubefed.io/v1beta1
kind: KubeFedCluster
metadata:
  name: cluster-aws
  namespace: kube-federation-system
spec:
  apiEndpoint: https://aws-cluster.example.com
  caBundle: LS0tLS1CRUdJTi...
  secretRef:
    name: cluster-aws-secret
---
apiVersion: core.kubefed.io/v1beta1
kind: KubeFedCluster
metadata:
  name: cluster-gcp
  namespace: kube-federation-system
spec:
  apiEndpoint: https://gcp-cluster.example.com
  caBundle: LS0tLS1CRUdJTi...
  secretRef:
    name: cluster-gcp-secret
---
# 联邦部署
apiVersion: types.kubefed.io/v1beta1
kind: FederatedDeployment
metadata:
  name: multi-cloud-app
  namespace: default
spec:
  template:
    metadata:
      labels:
        app: multi-cloud-app
    spec:
      replicas: 3
      selector:
        matchLabels:
          app: multi-cloud-app
      template:
        metadata:
          labels:
            app: multi-cloud-app
        spec:
          containers:
          - name: app
            image: nginx:latest
            ports:
            - containerPort: 80
            resources:
              requests:
                cpu: 100m
                memory: 128Mi
              limits:
                cpu: 500m
                memory: 512Mi
  placement:
    clusters:
    - name: cluster-aws
    - name: cluster-gcp
  overrides:
  - clusterName: cluster-aws
    clusterOverrides:
    - path: "/spec/replicas"
      value: 5
    - path: "/spec/template/spec/containers/0/env"
      op: "add"
      value:
      - name: CLOUD_PROVIDER
        value: "aws"
  - clusterName: cluster-gcp
    clusterOverrides:
    - path: "/spec/replicas"
      value: 2
    - path: "/spec/template/spec/containers/0/env"
      op: "add"
      value:
      - name: CLOUD_PROVIDER
        value: "gcp"

14.5.2 多云管理脚本

#!/bin/bash
# 多云Kubernetes管理脚本

echo "=== 多云Kubernetes管理 ==="

# 配置多集群访问
setup_multi_cluster_access() {
    echo "配置多集群访问..."
    
    # 合并kubeconfig
    export KUBECONFIG=~/.kube/config:~/.kube/aws-config:~/.kube/gcp-config:~/.kube/azure-config
    kubectl config view --flatten > ~/.kube/merged-config
    export KUBECONFIG=~/.kube/merged-config
    
    # 设置集群别名
    kubectl config rename-context aws-cluster aws
    kubectl config rename-context gcp-cluster gcp
    kubectl config rename-context azure-cluster azure
    
    # 显示可用集群
    echo "可用集群:"
    kubectl config get-contexts
}

# 部署跨云应用
deploy_cross_cloud_app() {
    echo "部署跨云应用..."
    
    # AWS集群部署
    kubectl config use-context aws
    cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: cross-cloud-app-aws
  namespace: default
  labels:
    cloud: aws
    region: us-west-2
spec:
  replicas: 3
  selector:
    matchLabels:
      app: cross-cloud-app
      cloud: aws
  template:
    metadata:
      labels:
        app: cross-cloud-app
        cloud: aws
    spec:
      containers:
      - name: app
        image: nginx:latest
        env:
        - name: CLOUD_PROVIDER
          value: "AWS"
        - name: REGION
          value: "us-west-2"
        ports:
        - containerPort: 80
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
          limits:
            cpu: 500m
            memory: 512Mi
      nodeSelector:
        kubernetes.io/arch: amd64
---
apiVersion: v1
kind: Service
metadata:
  name: cross-cloud-app-aws-service
  namespace: default
spec:
  selector:
    app: cross-cloud-app
    cloud: aws
  ports:
  - port: 80
    targetPort: 80
  type: LoadBalancer
EOF
    
    # GCP集群部署
    kubectl config use-context gcp
    cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: cross-cloud-app-gcp
  namespace: default
  labels:
    cloud: gcp
    region: us-central1
spec:
  replicas: 2
  selector:
    matchLabels:
      app: cross-cloud-app
      cloud: gcp
  template:
    metadata:
      labels:
        app: cross-cloud-app
        cloud: gcp
    spec:
      containers:
      - name: app
        image: nginx:latest
        env:
        - name: CLOUD_PROVIDER
          value: "GCP"
        - name: REGION
          value: "us-central1"
        ports:
        - containerPort: 80
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
          limits:
            cpu: 500m
            memory: 512Mi
      nodeSelector:
        kubernetes.io/arch: amd64
---
apiVersion: v1
kind: Service
metadata:
  name: cross-cloud-app-gcp-service
  namespace: default
spec:
  selector:
    app: cross-cloud-app
    cloud: gcp
  ports:
  - port: 80
    targetPort: 80
  type: LoadBalancer
EOF
}

# 配置跨云网络
setup_cross_cloud_networking() {
    echo "配置跨云网络..."
    
    # 安装Submariner
    curl -Ls https://get.submariner.io | bash
    export PATH=$PATH:~/.local/bin
    
    # 部署Submariner Broker
    kubectl config use-context aws
    subctl deploy-broker
    
    # 加入AWS集群
    subctl join broker-info.subm --kubeconfig ~/.kube/aws-config --clusterid aws-cluster
    
    # 加入GCP集群
    kubectl config use-context gcp
    subctl join broker-info.subm --kubeconfig ~/.kube/gcp-config --clusterid gcp-cluster
    
    # 验证连接
    subctl show connections
    subctl show endpoints
}

# 监控多云环境
setup_multi_cloud_monitoring() {
    echo "设置多云监控..."
    
    # 在每个集群部署Prometheus
    for context in aws gcp azure; do
        if kubectl config get-contexts | grep -q $context; then
            echo "在$context集群部署监控..."
            kubectl config use-context $context
            
            # 部署Prometheus
            cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Namespace
metadata:
  name: monitoring
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus
  namespace: monitoring
spec:
  replicas: 1
  selector:
    matchLabels:
      app: prometheus
  template:
    metadata:
      labels:
        app: prometheus
    spec:
      containers:
      - name: prometheus
        image: prom/prometheus:latest
        ports:
        - containerPort: 9090
        args:
        - '--config.file=/etc/prometheus/prometheus.yml'
        - '--storage.tsdb.path=/prometheus/'
        - '--web.console.libraries=/etc/prometheus/console_libraries'
        - '--web.console.templates=/etc/prometheus/consoles'
        - '--web.enable-lifecycle'
        - '--web.external-url=http://prometheus-$context.example.com'
        volumeMounts:
        - name: prometheus-config
          mountPath: /etc/prometheus/
        - name: prometheus-storage
          mountPath: /prometheus/
        env:
        - name: CLUSTER_NAME
          value: "$context"
      volumes:
      - name: prometheus-config
        configMap:
          name: prometheus-config
      - name: prometheus-storage
        emptyDir: {}
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitoring
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      external_labels:
        cluster: '$context'
        region: '$(REGION)'
    
    scrape_configs:
    - job_name: 'kubernetes-apiservers'
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https
    
    - job_name: 'kubernetes-nodes'
      kubernetes_sd_configs:
      - role: node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
---
apiVersion: v1
kind: Service
metadata:
  name: prometheus-service
  namespace: monitoring
spec:
  selector:
    app: prometheus
  ports:
  - port: 9090
    targetPort: 9090
  type: LoadBalancer
EOF
        fi
    done
}

# 灾难恢复
setup_disaster_recovery() {
    echo "设置灾难恢复..."
    
    # 安装Velero
    wget https://github.com/vmware-tanzu/velero/releases/download/v1.11.0/velero-v1.11.0-linux-amd64.tar.gz
    tar -xzf velero-v1.11.0-linux-amd64.tar.gz
    sudo mv velero-v1.11.0-linux-amd64/velero /usr/local/bin/
    
    # 在每个集群配置Velero
    for context in aws gcp; do
        if kubectl config get-contexts | grep -q $context; then
            echo "在$context集群配置Velero..."
            kubectl config use-context $context
            
            # 创建备份存储位置
            if [ "$context" = "aws" ]; then
                velero install \
                    --provider aws \
                    --plugins velero/velero-plugin-for-aws:v1.7.0 \
                    --bucket velero-backups-aws \
                    --backup-location-config region=us-west-2 \
                    --snapshot-location-config region=us-west-2
            elif [ "$context" = "gcp" ]; then
                velero install \
                    --provider gcp \
                    --plugins velero/velero-plugin-for-gcp:v1.7.0 \
                    --bucket velero-backups-gcp \
                    --backup-location-config project=my-project
            fi
            
            # 创建定期备份
            velero schedule create daily-backup --schedule="0 2 * * *" --ttl 720h0m0s
        fi
    done
}

# 成本优化
optimize_multi_cloud_costs() {
    echo "多云成本优化..."
    
    # 分析各云资源使用情况
    for context in aws gcp azure; do
        if kubectl config get-contexts | grep -q $context; then
            echo "分析$context集群资源使用..."
            kubectl config use-context $context
            
            echo "节点资源使用:"
            kubectl top nodes 2>/dev/null || echo "需要安装metrics-server"
            
            echo "Pod资源使用:"
            kubectl top pods --all-namespaces 2>/dev/null | head -20
            
            echo "未使用的资源:"
            kubectl get pv | grep Available
            kubectl get pvc --all-namespaces | grep Pending
        fi
    done
    
    # 生成成本报告
    cat > multi-cloud-cost-report.md <<EOF
# 多云成本分析报告

生成时间: $(date)

## 集群资源概览

| 集群 | 节点数 | CPU总量 | 内存总量 | 存储总量 |
|------|--------|---------|----------|----------|
EOF
    
    for context in aws gcp azure; do
        if kubectl config get-contexts | grep -q $context; then
            kubectl config use-context $context
            nodes=$(kubectl get nodes --no-headers | wc -l)
            echo "| $context | $nodes | - | - | - |" >> multi-cloud-cost-report.md
        fi
    done
    
    cat >> multi-cloud-cost-report.md <<EOF

## 优化建议

1. **资源右调**: 根据实际使用情况调整Pod资源请求和限制
2. **自动扩缩容**: 实施HPA和VPA减少资源浪费
3. **Spot实例**: 在合适的工作负载中使用Spot/Preemptible实例
4. **存储优化**: 清理未使用的PV和PVC
5. **跨云负载均衡**: 根据成本和性能优化工作负载分布
EOF
    
    echo "成本报告已生成: multi-cloud-cost-report.md"
}

# 主函数
case "$1" in
    setup)
        setup_multi_cluster_access
        ;;
    deploy)
        deploy_cross_cloud_app
        ;;
    network)
        setup_cross_cloud_networking
        ;;
    monitoring)
        setup_multi_cloud_monitoring
        ;;
    backup)
        setup_disaster_recovery
        ;;
    cost)
        optimize_multi_cloud_costs
        ;;
    all)
        setup_multi_cluster_access
        deploy_cross_cloud_app
        setup_cross_cloud_networking
        setup_multi_cloud_monitoring
        setup_disaster_recovery
        optimize_multi_cloud_costs
        ;;
    *)
        echo "用法: $0 {setup|deploy|network|monitoring|backup|cost|all}"
        echo "  setup      - 配置多集群访问"
        echo "  deploy     - 部署跨云应用"
        echo "  network    - 配置跨云网络"
        echo "  monitoring - 设置监控"
        echo "  backup     - 设置灾难恢复"
        echo "  cost       - 成本优化"
        echo "  all        - 执行所有配置"
        exit 1
        ;;
esac