7.1 生产环境部署

7.1.1 系统要求

硬件要求: - CPU:2核心以上(推荐4核心) - 内存:2GB以上(推荐4GB) - 存储:20GB以上可用空间 - 网络:稳定的网络连接

软件要求: - 操作系统:Linux(Ubuntu 20.04+、CentOS 8+、RHEL 8+) - 内核版本:4.15+ - 文件系统:ext4、xfs或btrfs - 防火墙:iptables或firewalld

网络要求: - 端口80(HTTP)和443(HTTPS)开放 - DNS解析正确配置 - 如使用Let’s Encrypt,需要外网访问

7.1.2 安装方式选择

1. 包管理器安装(推荐)

# Ubuntu/Debian
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | sudo gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | sudo tee /etc/apt/sources.list.d/caddy-stable.list
sudo apt update
sudo apt install caddy

# CentOS/RHEL/Fedora
dnf copr enable @caddy/caddy
dnf install caddy

# 或使用官方脚本
curl -fsSL https://getcaddy.com | bash

2. Docker部署

# Dockerfile
FROM caddy:2-alpine

# 复制配置文件
COPY Caddyfile /etc/caddy/Caddyfile
COPY site/ /usr/share/caddy/

# 创建数据目录
RUN mkdir -p /data/caddy

# 暴露端口
EXPOSE 80 443

# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
    CMD curl -f http://localhost/health || exit 1

# 启动命令
CMD ["caddy", "run", "--config", "/etc/caddy/Caddyfile", "--adapter", "caddyfile"]
# docker-compose.yml
version: '3.8'

services:
  caddy:
    build: .
    container_name: caddy-server
    restart: unless-stopped
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./Caddyfile:/etc/caddy/Caddyfile:ro
      - ./site:/usr/share/caddy:ro
      - caddy_data:/data
      - caddy_config:/config
    environment:
      - CADDY_ADMIN=0.0.0.0:2019
    networks:
      - caddy-network
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

volumes:
  caddy_data:
    external: true
  caddy_config:
    external: true

networks:
  caddy-network:
    driver: bridge

3. Kubernetes部署

# caddy-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: caddy
  namespace: web
  labels:
    app: caddy
spec:
  replicas: 3
  selector:
    matchLabels:
      app: caddy
  template:
    metadata:
      labels:
        app: caddy
    spec:
      containers:
      - name: caddy
        image: caddy:2-alpine
        ports:
        - containerPort: 80
        - containerPort: 443
        - containerPort: 2019
        volumeMounts:
        - name: caddyfile
          mountPath: /etc/caddy
          readOnly: true
        - name: caddy-data
          mountPath: /data
        - name: caddy-config
          mountPath: /config
        env:
        - name: CADDY_ADMIN
          value: "0.0.0.0:2019"
        resources:
          requests:
            memory: "128Mi"
            cpu: "100m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /health
            port: 80
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /health
            port: 80
          initialDelaySeconds: 5
          periodSeconds: 5
      volumes:
      - name: caddyfile
        configMap:
          name: caddy-config
      - name: caddy-data
        persistentVolumeClaim:
          claimName: caddy-data-pvc
      - name: caddy-config
        persistentVolumeClaim:
          claimName: caddy-config-pvc

---
apiVersion: v1
kind: Service
metadata:
  name: caddy-service
  namespace: web
spec:
  selector:
    app: caddy
  ports:
  - name: http
    port: 80
    targetPort: 80
  - name: https
    port: 443
    targetPort: 443
  - name: admin
    port: 2019
    targetPort: 2019
  type: LoadBalancer

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: caddy-config
  namespace: web
data:
  Caddyfile: |
    {
        admin 0.0.0.0:2019
        log {
            output stdout
            format json
        }
    }
    
    :80 {
        respond /health "OK" 200
        file_server
    }

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: caddy-data-pvc
  namespace: web
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 10Gi

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: caddy-config-pvc
  namespace: web
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi

7.1.3 系统服务配置

Systemd服务配置

# /etc/systemd/system/caddy.service
[Unit]
Description=Caddy
Documentation=https://caddyserver.com/docs/
After=network.target network-online.target
Requires=network-online.target

[Service]
Type=notify
User=caddy
Group=caddy
ExecStart=/usr/bin/caddy run --environ --config /etc/caddy/Caddyfile
ExecReload=/usr/bin/caddy reload --config /etc/caddy/Caddyfile --force
TimeoutStopSec=5s
LimitNOFILE=1048576
LimitNPROC=1048576
PrivateTmp=true
ProtectSystem=full
AmbientCapabilities=CAP_NET_BIND_SERVICE

[Install]
WantedBy=multi-user.target
# 创建caddy用户
sudo useradd --system --home /var/lib/caddy --create-home --shell /bin/false caddy

# 设置权限
sudo chown -R caddy:caddy /etc/caddy
sudo chown -R caddy:caddy /var/lib/caddy

# 启用和启动服务
sudo systemctl daemon-reload
sudo systemctl enable caddy
sudo systemctl start caddy

# 检查状态
sudo systemctl status caddy

进程管理器配置(PM2)

// ecosystem.config.js
module.exports = {
  apps: [{
    name: 'caddy',
    script: '/usr/bin/caddy',
    args: 'run --config /etc/caddy/Caddyfile',
    instances: 1,
    autorestart: true,
    watch: false,
    max_memory_restart: '1G',
    env: {
      NODE_ENV: 'production'
    },
    error_file: '/var/log/caddy/error.log',
    out_file: '/var/log/caddy/out.log',
    log_file: '/var/log/caddy/combined.log',
    time: true
  }]
};

7.2 配置管理

7.2.1 配置文件组织

目录结构

/etc/caddy/
├── Caddyfile                 # 主配置文件
├── conf.d/                   # 配置片段目录
│   ├── global.conf          # 全局配置
│   ├── sites/               # 站点配置
│   │   ├── example.com.conf
│   │   ├── api.example.com.conf
│   │   └── admin.example.com.conf
│   ├── snippets/            # 配置片段
│   │   ├── ssl.conf
│   │   ├── security.conf
│   │   └── logging.conf
│   └── upstreams/           # 上游配置
│       ├── backend.conf
│       └── api.conf
├── certs/                   # 证书目录
├── logs/                    # 日志目录
└── data/                    # 数据目录

主配置文件

# /etc/caddy/Caddyfile
{
    # 导入全局配置
    import conf.d/global.conf
}

# 导入站点配置
import conf.d/sites/*.conf

全局配置

# /etc/caddy/conf.d/global.conf
# 管理API
admin 127.0.0.1:2019

# 邮箱配置(Let's Encrypt)
email admin@example.com

# 默认SNI
default_sni example.com

# 存储配置
storage file_system {
    root /var/lib/caddy
}

# 日志配置
log {
    output file /var/log/caddy/access.log {
        roll_size 100mb
        roll_keep 10
        roll_keep_for 720h
    }
    format json
    level INFO
}

# 服务器配置
servers {
    metrics
    timeouts {
        read_body 30s
        read_header 30s
        write 30s
        idle 120s
    }
}

站点配置示例

# /etc/caddy/conf.d/sites/example.com.conf
example.com {
    # 导入通用配置片段
    import conf.d/snippets/ssl.conf
    import conf.d/snippets/security.conf
    import conf.d/snippets/logging.conf
    
    # 根目录
    root * /var/www/example.com
    
    # 静态文件服务
    file_server
    
    # 压缩
    encode gzip
    
    # 错误页面
    handle_errors {
        @404 {
            expression {http.error.status_code} == 404
        }
        rewrite @404 /404.html
        file_server
    }
}

配置片段

# /etc/caddy/conf.d/snippets/ssl.conf
# SSL/TLS配置
tls {
    protocols tls1.2 tls1.3
    ciphers TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
    curves x25519 secp384r1 secp256r1
}

# HSTS
header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
# /etc/caddy/conf.d/snippets/security.conf
# 安全头部
header {
    # XSS保护
    X-XSS-Protection "1; mode=block"
    
    # 内容类型嗅探保护
    X-Content-Type-Options "nosniff"
    
    # 点击劫持保护
    X-Frame-Options "DENY"
    
    # 引用者策略
    Referrer-Policy "strict-origin-when-cross-origin"
    
    # 权限策略
    Permissions-Policy "geolocation=(), microphone=(), camera=()"
    
    # 内容安全策略
    Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'"
    
    # 移除服务器信息
    -Server
}

7.2.2 环境配置管理

环境变量配置

# /etc/caddy/env
# 数据库配置
DB_HOST=localhost
DB_PORT=5432
DB_NAME=myapp
DB_USER=caddy
DB_PASSWORD=secure_password

# Redis配置
REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_PASSWORD=redis_password

# API密钥
API_KEY=your_api_key_here
JWT_SECRET=your_jwt_secret_here

# 邮件配置
SMTP_HOST=smtp.example.com
SMTP_PORT=587
SMTP_USER=noreply@example.com
SMTP_PASSWORD=smtp_password

# 监控配置
MONITORING_ENDPOINT=https://monitoring.example.com
ALERT_WEBHOOK=https://alerts.example.com/webhook
# 在Caddyfile中使用环境变量
example.com {
    reverse_proxy {env.DB_HOST}:{env.DB_PORT} {
        header_up X-API-Key {env.API_KEY}
    }
    
    # JWT认证
    jwt {
        trusted_tokens {
            static_secret {env.JWT_SECRET}
        }
    }
    
    # Redis缓存
    cache {
        redis {
            host {env.REDIS_HOST}:{env.REDIS_PORT}
            password {env.REDIS_PASSWORD}
        }
    }
}

多环境配置

# 环境配置脚本
#!/bin/bash
# deploy.sh

ENVIRONMENT=${1:-production}

case $ENVIRONMENT in
    "development")
        export CADDY_CONFIG="/etc/caddy/dev.Caddyfile"
        export LOG_LEVEL="DEBUG"
        export ADMIN_LISTEN="0.0.0.0:2019"
        ;;
    "staging")
        export CADDY_CONFIG="/etc/caddy/staging.Caddyfile"
        export LOG_LEVEL="INFO"
        export ADMIN_LISTEN="127.0.0.1:2019"
        ;;
    "production")
        export CADDY_CONFIG="/etc/caddy/Caddyfile"
        export LOG_LEVEL="WARN"
        export ADMIN_LISTEN="127.0.0.1:2019"
        ;;
    *)
        echo "Unknown environment: $ENVIRONMENT"
        exit 1
        ;;
esac

# 加载环境变量
source /etc/caddy/env.$ENVIRONMENT

# 验证配置
caddy validate --config $CADDY_CONFIG

if [ $? -eq 0 ]; then
    echo "Configuration valid, reloading Caddy..."
    systemctl reload caddy
else
    echo "Configuration invalid, aborting deployment"
    exit 1
fi

7.2.3 配置版本控制

Git配置管理

# 初始化配置仓库
cd /etc/caddy
git init
git add .
git commit -m "Initial Caddy configuration"

# 添加远程仓库
git remote add origin https://github.com/company/caddy-config.git
git push -u origin main
# 配置部署脚本
#!/bin/bash
# config-deploy.sh

CONFIG_REPO="https://github.com/company/caddy-config.git"
CONFIG_DIR="/etc/caddy"
BACKUP_DIR="/etc/caddy/backups"

# 创建备份
echo "Creating backup..."
mkdir -p $BACKUP_DIR
cp -r $CONFIG_DIR $BACKUP_DIR/$(date +%Y%m%d_%H%M%S)

# 拉取最新配置
echo "Pulling latest configuration..."
cd $CONFIG_DIR
git pull origin main

# 验证配置
echo "Validating configuration..."
caddy validate --config $CONFIG_DIR/Caddyfile

if [ $? -eq 0 ]; then
    echo "Configuration valid, reloading Caddy..."
    systemctl reload caddy
    
    # 检查服务状态
    sleep 5
    if systemctl is-active --quiet caddy; then
        echo "Deployment successful"
        # 清理旧备份(保留最近10个)
        ls -t $BACKUP_DIR | tail -n +11 | xargs -r rm -rf
    else
        echo "Service failed to start, rolling back..."
        # 回滚到最新备份
        LATEST_BACKUP=$(ls -t $BACKUP_DIR | head -n 1)
        cp -r $BACKUP_DIR/$LATEST_BACKUP/* $CONFIG_DIR/
        systemctl reload caddy
        exit 1
    fi
else
    echo "Configuration invalid, aborting deployment"
    exit 1
fi

7.3 监控和日志

7.3.1 日志配置

结构化日志配置

{
    # 全局日志配置
    log {
        output file /var/log/caddy/access.log {
            roll_size 100mb
            roll_keep 30
            roll_keep_for 720h
        }
        
        format json {
            time_format "2006-01-02T15:04:05.000Z07:00"
            message_key "message"
            level_key "level"
            time_key "timestamp"
            caller_key "caller"
        }
        
        level INFO
        
        # 包含字段
        include {
            http.request.method
            http.request.uri
            http.request.proto
            http.request.remote_ip
            http.request.remote_port
            http.request.headers.User-Agent
            http.request.headers.Referer
            http.response.status
            http.response.size
            http.response.duration
        }
        
        # 排除敏感字段
        exclude {
            http.request.headers.Authorization
            http.request.headers.Cookie
            http.request.headers.X-API-Key
        }
    }
    
    # 错误日志
    log error {
        output file /var/log/caddy/error.log {
            roll_size 50mb
            roll_keep 10
        }
        format console
        level ERROR
    }
    
    # 安全事件日志
    log security {
        output file /var/log/caddy/security.log {
            roll_size 50mb
            roll_keep 20
        }
        format json
        level WARN
    }
}

example.com {
    # 站点特定日志
    log {
        output file /var/log/caddy/sites/example.com.log {
            roll_size 50mb
            roll_keep 15
        }
        format json
    }
    
    # 安全事件记录
    @security_events {
        status 401 403 429
    }
    
    log @security_events security {
        output file /var/log/caddy/security.log
    }
    
    # API访问日志
    @api_requests {
        path /api/*
    }
    
    log @api_requests {
        output file /var/log/caddy/api-access.log {
            roll_size 100mb
            roll_keep 30
        }
        format json
        include {
            http.request.headers.X-API-Key
            http.request.headers.X-User-ID
        }
    }
    
    file_server
}

日志轮转配置

# /etc/logrotate.d/caddy
/var/log/caddy/*.log {
    daily
    missingok
    rotate 30
    compress
    delaycompress
    notifempty
    create 0644 caddy caddy
    postrotate
        systemctl reload caddy
    endscript
}

/var/log/caddy/sites/*.log {
    daily
    missingok
    rotate 15
    compress
    delaycompress
    notifempty
    create 0644 caddy caddy
    postrotate
        systemctl reload caddy
    endscript
}

7.3.2 监控配置

Prometheus监控

# 监控端点配置
monitoring.internal {
    # 基本认证保护
    basicauth {
        prometheus $2a$14$Zkx19XLiW6VYouLHR5NmfOFU0z2GTNqq9qB6FY9gZKOOdOoKw6Uw.
    }
    
    # Prometheus指标
    handle /metrics {
        metrics {
            # 启用详细指标
            disable_openmetrics
            
            # 自定义标签
            labels {
                instance {system.hostname}
                environment production
                version {system.version}
            }
        }
    }
    
    # 健康检查
    handle /health {
        respond `{
            "status": "healthy",
            "timestamp": "{time.now.unix}",
            "version": "{system.version}",
            "uptime": "{system.uptime}"
        }` 200 {
            header Content-Type application/json
        }
    }
    
    # 详细健康检查
    handle /health/detailed {
        health_check {
            checks {
                # 磁盘空间检查
                disk_usage {
                    path /var/lib/caddy
                    threshold 90
                }
                
                # 内存使用检查
                memory_usage {
                    threshold 80
                }
                
                # 上游服务检查
                upstream_health {
                    url http://backend:8080/health
                    timeout 5s
                }
            }
        }
    }
    
    # 拒绝其他请求
    respond "Not Found" 404
}

Prometheus配置

# prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "caddy_rules.yml"

scrape_configs:
  - job_name: 'caddy'
    static_configs:
      - targets: ['localhost:2019']
    metrics_path: /metrics
    scrape_interval: 30s
    basic_auth:
      username: prometheus
      password: prometheus_password
    
  - job_name: 'caddy-nodes'
    static_configs:
      - targets: 
        - 'caddy-01.example.com:2019'
        - 'caddy-02.example.com:2019'
        - 'caddy-03.example.com:2019'
    metrics_path: /metrics
    basic_auth:
      username: prometheus
      password: prometheus_password

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - alertmanager:9093

告警规则

# caddy_rules.yml
groups:
  - name: caddy
    rules:
      # 高错误率告警
      - alert: CaddyHighErrorRate
        expr: (
          sum(rate(caddy_http_requests_total{status=~"5.."}[5m])) /
          sum(rate(caddy_http_requests_total[5m]))
        ) > 0.05
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: "Caddy high error rate"
          description: "Caddy error rate is {{ $value | humanizePercentage }} for more than 2 minutes"
      
      # 高延迟告警
      - alert: CaddyHighLatency
        expr: histogram_quantile(0.95, sum(rate(caddy_http_request_duration_seconds_bucket[5m])) by (le)) > 1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Caddy high latency"
          description: "Caddy 95th percentile latency is {{ $value }}s for more than 5 minutes"
      
      # 服务不可用告警
      - alert: CaddyDown
        expr: up{job="caddy"} == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "Caddy is down"
          description: "Caddy has been down for more than 1 minute"
      
      # 证书即将过期告警
      - alert: CaddyCertificateExpiring
        expr: caddy_tls_certificate_expiry_timestamp - time() < 7 * 24 * 3600
        for: 1h
        labels:
          severity: warning
        annotations:
          summary: "Caddy certificate expiring soon"
          description: "Certificate for {{ $labels.domain }} will expire in {{ $value | humanizeDuration }}"
      
      # 内存使用过高告警
      - alert: CaddyHighMemoryUsage
        expr: process_resident_memory_bytes{job="caddy"} / 1024 / 1024 > 1000
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "Caddy high memory usage"
          description: "Caddy memory usage is {{ $value }}MB for more than 10 minutes"

7.3.3 日志分析

ELK Stack集成

# docker-compose.yml for ELK
version: '3.8'

services:
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
    container_name: elasticsearch
    environment:
      - discovery.type=single-node
      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
      - xpack.security.enabled=false
    volumes:
      - elasticsearch_data:/usr/share/elasticsearch/data
    ports:
      - "9200:9200"
    networks:
      - elk

  logstash:
    image: docker.elastic.co/logstash/logstash:8.8.0
    container_name: logstash
    volumes:
      - ./logstash/config/logstash.yml:/usr/share/logstash/config/logstash.yml:ro
      - ./logstash/pipeline:/usr/share/logstash/pipeline:ro
    ports:
      - "5044:5044"
      - "5000:5000/tcp"
      - "5000:5000/udp"
      - "9600:9600"
    environment:
      LS_JAVA_OPTS: "-Xmx256m -Xms256m"
    networks:
      - elk
    depends_on:
      - elasticsearch

  kibana:
    image: docker.elastic.co/kibana/kibana:8.8.0
    container_name: kibana
    ports:
      - "5601:5601"
    environment:
      ELASTICSEARCH_URL: http://elasticsearch:9200
      ELASTICSEARCH_HOSTS: '["http://elasticsearch:9200"]'
    networks:
      - elk
    depends_on:
      - elasticsearch

  filebeat:
    image: docker.elastic.co/beats/filebeat:8.8.0
    container_name: filebeat
    user: root
    volumes:
      - ./filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro
      - /var/log/caddy:/var/log/caddy:ro
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
      - /var/run/docker.sock:/var/run/docker.sock:ro
    networks:
      - elk
    depends_on:
      - logstash

volumes:
  elasticsearch_data:
    driver: local

networks:
  elk:
    driver: bridge

Filebeat配置

# filebeat/filebeat.yml
filebeat.inputs:
- type: log
  enabled: true
  paths:
    - /var/log/caddy/*.log
  fields:
    service: caddy
    environment: production
  fields_under_root: true
  json.keys_under_root: true
  json.add_error_key: true
  multiline.pattern: '^\{'
  multiline.negate: true
  multiline.match: after

- type: log
  enabled: true
  paths:
    - /var/log/caddy/sites/*.log
  fields:
    service: caddy
    log_type: site_access
    environment: production
  fields_under_root: true
  json.keys_under_root: true

processors:
- add_host_metadata:
    when.not.contains.tags: forwarded
- add_docker_metadata: ~
- add_kubernetes_metadata: ~

output.logstash:
  hosts: ["logstash:5044"]

logging.level: info
logging.to_files: true
logging.files:
  path: /var/log/filebeat
  name: filebeat
  keepfiles: 7
  permissions: 0644

Logstash配置

# logstash/pipeline/caddy.conf
input {
  beats {
    port => 5044
  }
}

filter {
  if [service] == "caddy" {
    # 解析时间戳
    date {
      match => [ "timestamp", "ISO8601" ]
    }
    
    # 解析用户代理
    if [http][request][headers][User-Agent] {
      useragent {
        source => "[http][request][headers][User-Agent]"
        target => "user_agent"
      }
    }
    
    # 解析IP地址
    if [http][request][remote_ip] {
      geoip {
        source => "[http][request][remote_ip]"
        target => "geoip"
      }
    }
    
    # 添加响应时间分类
    if [http][response][duration] {
      ruby {
        code => "
          duration = event.get('[http][response][duration]')
          if duration
            duration_ms = duration * 1000
            if duration_ms < 100
              event.set('response_time_category', 'fast')
            elsif duration_ms < 500
              event.set('response_time_category', 'normal')
            elsif duration_ms < 1000
              event.set('response_time_category', 'slow')
            else
              event.set('response_time_category', 'very_slow')
            end
          end
        "
      }
    }
    
    # 添加状态码分类
    if [http][response][status] {
      ruby {
        code => "
          status = event.get('[http][response][status]')
          if status
            case status
            when 200..299
              event.set('status_category', 'success')
            when 300..399
              event.set('status_category', 'redirect')
            when 400..499
              event.set('status_category', 'client_error')
            when 500..599
              event.set('status_category', 'server_error')
            else
              event.set('status_category', 'unknown')
            end
          end
        "
      }
    }
  }
}

output {
  elasticsearch {
    hosts => ["elasticsearch:9200"]
    index => "caddy-logs-%{+YYYY.MM.dd}"
  }
  
  # 调试输出
  stdout {
    codec => rubydebug
  }
}

7.4 性能优化

7.4.1 系统级优化

内核参数优化

# /etc/sysctl.d/99-caddy.conf
# 网络优化
net.core.somaxconn = 65535
net.core.netdev_max_backlog = 5000
net.ipv4.tcp_max_syn_backlog = 65535
net.ipv4.tcp_fin_timeout = 30
net.ipv4.tcp_keepalive_time = 1200
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl = 15
net.ipv4.tcp_tw_reuse = 1
net.ipv4.ip_local_port_range = 10000 65535

# 文件描述符限制
fs.file-max = 2097152
fs.nr_open = 2097152

# 内存管理
vm.swappiness = 10
vm.dirty_ratio = 15
vm.dirty_background_ratio = 5

# 应用更改
sudo sysctl -p /etc/sysctl.d/99-caddy.conf

文件描述符限制

# /etc/security/limits.d/caddy.conf
caddy soft nofile 1048576
caddy hard nofile 1048576
caddy soft nproc 1048576
caddy hard nproc 1048576

# 或者在systemd服务中设置
# /etc/systemd/system/caddy.service
[Service]
LimitNOFILE=1048576
LimitNPROC=1048576

7.4.2 Caddy配置优化

连接和超时优化

{
    # 服务器优化
    servers {
        # 协议配置
        protocols h1 h2 h3
        
        # 超时配置
        timeouts {
            read_body 30s
            read_header 10s
            write 60s
            idle 300s
        }
        
        # 连接限制
        max_header_size 16KB
        
        # 启用指标
        metrics
    }
    
    # 全局缓存配置
    cache {
        # 内存缓存
        memory {
            max_size 512MB
        }
        
        # Redis缓存
        redis {
            host localhost:6379
            pool_size 20
            max_idle 10
            idle_timeout 300s
        }
    }
}

example.com {
    # 压缩优化
    encode {
        gzip 6
        zstd
        minimum_length 1024
        match {
            header Content-Type text/* application/json application/javascript application/xml+rss application/atom+xml image/svg+xml
        }
    }
    
    # 静态文件优化
    @static {
        path *.css *.js *.png *.jpg *.jpeg *.gif *.ico *.svg *.woff *.woff2 *.ttf *.eot
    }
    
    handle @static {
        # 缓存头部
        header Cache-Control "public, max-age=31536000, immutable"
        header Expires "Thu, 31 Dec 2037 23:55:55 GMT"
        
        # 预压缩文件
        file_server {
            precompressed gzip br
        }
    }
    
    # API优化
    @api {
        path /api/*
    }
    
    handle @api {
        # API缓存
        cache {
            key {method} {host} {path} {query} {header.Authorization}
            ttl 5m
            stale_ttl 1h
            
            # 缓存条件
            match {
                method GET
                status 200
            }
        }
        
        # 反向代理优化
        reverse_proxy backend:8080 {
            # 负载均衡
            lb_policy least_conn
            
            # 健康检查
            health_uri /health
            health_interval 30s
            health_timeout 5s
            
            # 连接池
            transport http {
                dial_timeout 10s
                response_header_timeout 30s
                expect_continue_timeout 1s
                keep_alive 30s
                max_idle_conns 100
                max_idle_conns_per_host 10
            }
            
            # 重试配置
            @retry {
                status 502 503 504
            }
            handle_response @retry {
                reverse_proxy backend-backup:8080
            }
        }
    }
    
    # 默认处理
    file_server
}

7.4.3 缓存策略优化

多层缓存配置

example.com {
    # L1缓存:内存缓存(最快)
    @frequently_accessed {
        path /api/popular/* /api/trending/*
    }
    
    cache @frequently_accessed {
        memory {
            max_size 100MB
            ttl 1m
        }
        key {method} {host} {path}
    }
    
    # L2缓存:Redis缓存(快)
    @api_cacheable {
        path /api/*
        method GET
        not path /api/user/* /api/private/*
    }
    
    cache @api_cacheable {
        redis {
            host redis-cluster:6379
            ttl 10m
            stale_ttl 1h
        }
        key {method} {host} {path} {query}
        
        # 缓存预热
        warmup {
            urls /api/config /api/menu
            interval 5m
        }
    }
    
    # L3缓存:CDN缓存(全球分发)
    @static_assets {
        path /static/* /assets/* /images/*
    }
    
    handle @static_assets {
        header {
            Cache-Control "public, max-age=31536000, immutable"
            CDN-Cache-Control "public, max-age=31536000"
        }
        
        file_server {
            precompressed gzip br
        }
    }
    
    # 缓存清除端点
    handle /admin/cache/purge {
        @authorized {
            header Authorization "Bearer {env.ADMIN_TOKEN}"
        }
        
        handle @authorized {
            cache_purge {
                patterns /api/* /static/*
            }
            respond "Cache purged" 200
        }
        
        respond "Unauthorized" 401
    }
    
    reverse_proxy backend:8080
}

7.4.4 负载均衡优化

高级负载均衡配置

example.com {
    # 上游服务器配置
    reverse_proxy {
        # 主要后端服务器
        to backend-01:8080 {
            weight 3
        }
        to backend-02:8080 {
            weight 3
        }
        to backend-03:8080 {
            weight 2
        }
        
        # 备用服务器
        to backup-01:8080 {
            weight 1
            backup
        }
        
        # 负载均衡策略
        lb_policy weighted_round_robin
        
        # 健康检查
        health_uri /health
        health_interval 10s
        health_timeout 3s
        health_status 200
        health_body "OK"
        
        # 故障检测
        fail_duration 30s
        max_fails 3
        unhealthy_request_count 5
        
        # 连接优化
        transport http {
            # 连接池
            max_idle_conns 200
            max_idle_conns_per_host 20
            idle_conn_timeout 90s
            
            # 超时设置
            dial_timeout 10s
            response_header_timeout 30s
            expect_continue_timeout 1s
            
            # Keep-Alive
            keep_alive 30s
            
            # TLS配置
            tls_insecure_skip_verify false
            tls_timeout 10s
        }
        
        # 请求修改
        header_up {
            X-Real-IP {remote_host}
            X-Forwarded-For {remote_host}
            X-Forwarded-Proto {scheme}
            X-Forwarded-Host {host}
        }
        
        # 响应修改
        header_down {
            -Server
            -X-Powered-By
            X-Cache-Status {upstream.cache_status}
        }
        
        # 重试配置
        @retry_conditions {
            status 502 503 504
        }
        
        handle_response @retry_conditions {
            # 重试到备用服务器
            reverse_proxy backup-02:8080 {
                transport http {
                    dial_timeout 5s
                    response_header_timeout 15s
                }
            }
        }
    }
}

7.5 安全加固

7.5.1 系统安全

防火墙配置

# UFW配置
sudo ufw default deny incoming
sudo ufw default allow outgoing

# 允许SSH
sudo ufw allow ssh

# 允许HTTP和HTTPS
sudo ufw allow 80/tcp
sudo ufw allow 443/tcp

# 允许管理API(仅本地)
sudo ufw allow from 127.0.0.1 to any port 2019

# 启用防火墙
sudo ufw enable

# 查看状态
sudo ufw status verbose
# iptables配置
#!/bin/bash
# firewall.sh

# 清空现有规则
iptables -F
iptables -X
iptables -t nat -F
iptables -t nat -X
iptables -t mangle -F
iptables -t mangle -X

# 设置默认策略
iptables -P INPUT DROP
iptables -P FORWARD DROP
iptables -P OUTPUT ACCEPT

# 允许本地回环
iptables -A INPUT -i lo -j ACCEPT
iptables -A OUTPUT -o lo -j ACCEPT

# 允许已建立的连接
iptables -A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT

# 允许SSH
iptables -A INPUT -p tcp --dport 22 -j ACCEPT

# 允许HTTP和HTTPS
iptables -A INPUT -p tcp --dport 80 -j ACCEPT
iptables -A INPUT -p tcp --dport 443 -j ACCEPT

# 允许管理API(仅本地)
iptables -A INPUT -s 127.0.0.1 -p tcp --dport 2019 -j ACCEPT

# 防止DDoS攻击
iptables -A INPUT -p tcp --dport 80 -m limit --limit 25/minute --limit-burst 100 -j ACCEPT
iptables -A INPUT -p tcp --dport 443 -m limit --limit 25/minute --limit-burst 100 -j ACCEPT

# 防止端口扫描
iptables -A INPUT -m recent --name portscan --rcheck --seconds 86400 -j DROP
iptables -A INPUT -m recent --name portscan --remove
iptables -A INPUT -p tcp -m tcp --dport 139 -m recent --name portscan --set -j LOG --log-prefix "portscan:"
iptables -A INPUT -p tcp -m tcp --dport 139 -m recent --name portscan --set -j DROP

# 保存规则
iptables-save > /etc/iptables/rules.v4

Fail2ban配置

# /etc/fail2ban/jail.d/caddy.conf
[caddy-auth]
enabled = true
port = http,https
filter = caddy-auth
logpath = /var/log/caddy/access.log
maxretry = 5
bantime = 3600
findtime = 600
action = iptables-multiport[name=caddy-auth, port="http,https", protocol=tcp]

[caddy-dos]
enabled = true
port = http,https
filter = caddy-dos
logpath = /var/log/caddy/access.log
maxretry = 100
bantime = 600
findtime = 60
action = iptables-multiport[name=caddy-dos, port="http,https", protocol=tcp]
# /etc/fail2ban/filter.d/caddy-auth.conf
[Definition]
failregex = ^.*"remote_ip":"<HOST>".*"status":401.*$
            ^.*"remote_ip":"<HOST>".*"status":403.*$
ignoreregex =
# /etc/fail2ban/filter.d/caddy-dos.conf
[Definition]
failregex = ^.*"remote_ip":"<HOST>".*$
ignoreregex =

7.5.2 应用安全

安全头部配置

example.com {
    # 安全头部
    header {
        # HSTS
        Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
        
        # XSS保护
        X-XSS-Protection "1; mode=block"
        
        # 内容类型嗅探保护
        X-Content-Type-Options "nosniff"
        
        # 点击劫持保护
        X-Frame-Options "DENY"
        
        # 引用者策略
        Referrer-Policy "strict-origin-when-cross-origin"
        
        # 权限策略
        Permissions-Policy "geolocation=(), microphone=(), camera=(), payment=(), usb=(), magnetometer=(), gyroscope=()"
        
        # 内容安全策略
        Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' https://cdn.example.com; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src 'self' https://fonts.gstatic.com; img-src 'self' data: https:; connect-src 'self' https://api.example.com; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
        
        # 移除服务器信息
        -Server
        -X-Powered-By
        
        # 自定义服务器标识
        Server "WebServer/1.0"
    }
    
    # 安全限制
    request_body {
        max_size 10MB
    }
    
    # 限流
    rate_limit {
        zone general
        key {remote_host}
        rate 300r/m
        window 1m
        burst 50
    }
    
    # 地理位置限制
    @blocked_countries {
        remote_ip {
            ranges 192.168.1.0/24 10.0.0.0/8
        }
    }
    
    handle @blocked_countries {
        respond "Access denied" 403
    }
    
    # 用户代理过滤
    @bad_bots {
        header User-Agent *bot* *crawler* *spider* *scraper*
    }
    
    handle @bad_bots {
        respond "Forbidden" 403
    }
    
    file_server
}

7.5.3 证书安全

证书管理最佳实践

{
    # 证书存储配置
    storage file_system {
        root /var/lib/caddy
    }
    
    # ACME配置
    acme_ca https://acme-v02.api.letsencrypt.org/directory
    acme_ca_root /etc/ssl/certs/isrgrootx1.pem
    
    # 邮箱配置
    email security@example.com
    
    # 密钥类型
    key_type ec256
}

example.com {
    # TLS配置
    tls {
        # 协议版本
        protocols tls1.2 tls1.3
        
        # 密码套件
        ciphers {
            TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
            TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
            TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
            TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305
            TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
            TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
        }
        
        # 椭圆曲线
        curves x25519 secp384r1 secp256r1
        
        # ALPN
        alpn h2 http/1.1
        
        # OCSP装订
        ocsp_stapling
        
        # 客户端证书(可选)
        client_auth {
            mode request
            trusted_ca_cert_file /etc/caddy/client-ca.pem
        }
    }
    
    # 证书透明度
    header {
        Expect-CT "max-age=86400, enforce, report-uri=https://example.com/ct-report"
    }
    
    file_server
}

# 证书监控端点
certs.example.com {
    basicauth {
        admin $2a$14$Zkx19XLiW6VYouLHR5NmfOFU0z2GTNqq9qB6FY9gZKOOdOoKw6Uw.
    }
    
    handle /certificates {
        certificate_info {
            format json
            include_chain true
        }
    }
    
    respond "Certificate Monitor" 200
}

7.6 备份和恢复

7.6.1 数据备份策略

备份脚本

#!/bin/bash
# backup.sh - Caddy备份脚本

BACKUP_DIR="/backup/caddy"
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_NAME="caddy_backup_$DATE"
RETENTION_DAYS=30

# 创建备份目录
mkdir -p $BACKUP_DIR/$BACKUP_NAME

echo "Starting Caddy backup at $(date)"

# 备份配置文件
echo "Backing up configuration files..."
cp -r /etc/caddy $BACKUP_DIR/$BACKUP_NAME/

# 备份证书和数据
echo "Backing up certificates and data..."
cp -r /var/lib/caddy $BACKUP_DIR/$BACKUP_NAME/

# 备份日志(最近7天)
echo "Backing up recent logs..."
mkdir -p $BACKUP_DIR/$BACKUP_NAME/logs
find /var/log/caddy -name "*.log" -mtime -7 -exec cp {} $BACKUP_DIR/$BACKUP_NAME/logs/ \;

# 备份系统配置
echo "Backing up system configuration..."
mkdir -p $BACKUP_DIR/$BACKUP_NAME/system
cp /etc/systemd/system/caddy.service $BACKUP_DIR/$BACKUP_NAME/system/ 2>/dev/null || true
cp /etc/security/limits.d/caddy.conf $BACKUP_DIR/$BACKUP_NAME/system/ 2>/dev/null || true
cp /etc/sysctl.d/99-caddy.conf $BACKUP_DIR/$BACKUP_NAME/system/ 2>/dev/null || true

# 创建备份信息文件
echo "Creating backup information..."
cat > $BACKUP_DIR/$BACKUP_NAME/backup_info.txt << EOF
Backup Date: $(date)
Hostname: $(hostname)
Caddy Version: $(caddy version)
System: $(uname -a)
Backup Size: $(du -sh $BACKUP_DIR/$BACKUP_NAME | cut -f1)
EOF

# 压缩备份
echo "Compressing backup..."
cd $BACKUP_DIR
tar -czf $BACKUP_NAME.tar.gz $BACKUP_NAME
rm -rf $BACKUP_NAME

# 计算校验和
echo "Calculating checksum..."
sha256sum $BACKUP_NAME.tar.gz > $BACKUP_NAME.tar.gz.sha256

# 清理旧备份
echo "Cleaning up old backups..."
find $BACKUP_DIR -name "caddy_backup_*.tar.gz.sha256" -mtime +$RETENTION_DAYS -delete

echo "Backup completed: $BACKUP_NAME.tar.gz"
echo "Backup size: $(du -sh $BACKUP_DIR/$BACKUP_NAME.tar.gz | cut -f1)"
echo "Backup location: $BACKUP_DIR/$BACKUP_NAME.tar.gz"

# 可选:上传到远程存储
if [ "$REMOTE_BACKUP" = "true" ]; then
    echo "Uploading to remote storage..."
    # AWS S3
    aws s3 cp $BACKUP_DIR/$BACKUP_NAME.tar.gz s3://backup-bucket/caddy/
    aws s3 cp $BACKUP_DIR/$BACKUP_NAME.tar.gz.sha256 s3://backup-bucket/caddy/
    
    # 或者使用rsync
    # rsync -avz $BACKUP_DIR/$BACKUP_NAME.tar.gz backup-server:/backup/caddy/
fi

echo "Backup process completed at $(date)"

自动备份配置

# 添加到crontab
# crontab -e

# 每日凌晨2点备份
0 2 * * * /usr/local/bin/backup.sh >> /var/log/caddy/backup.log 2>&1

# 每周日凌晨1点完整备份
0 1 * * 0 /usr/local/bin/backup.sh --full >> /var/log/caddy/backup.log 2>&1

# 每月1号清理旧备份
0 3 1 * * /usr/local/bin/cleanup-backups.sh >> /var/log/caddy/backup.log 2>&1

7.6.2 恢复策略

恢复脚本

#!/bin/bash
# restore.sh - Caddy恢复脚本

BACKUP_FILE="$1"
RESTORE_DIR="/tmp/caddy_restore"

if [ -z "$BACKUP_FILE" ]; then
    echo "Usage: $0 <backup_file.tar.gz>"
    echo "Available backups:"
    ls -la /backup/caddy/caddy_backup_*.tar.gz
    exit 1
fi

if [ ! -f "$BACKUP_FILE" ]; then
    echo "Backup file not found: $BACKUP_FILE"
    exit 1
fi

echo "Starting Caddy restore from $BACKUP_FILE at $(date)"

# 验证备份文件
echo "Verifying backup integrity..."
if [ -f "$BACKUP_FILE.sha256" ]; then
    if ! sha256sum -c "$BACKUP_FILE.sha256"; then
        echo "Backup file integrity check failed!"
        exit 1
    fi
    echo "Backup integrity verified."
else
    echo "Warning: No checksum file found, skipping integrity check."
fi

# 停止Caddy服务
echo "Stopping Caddy service..."
systemctl stop caddy

# 备份当前配置
echo "Backing up current configuration..."
mkdir -p /backup/caddy/pre-restore
cp -r /etc/caddy /backup/caddy/pre-restore/caddy-config-$(date +%Y%m%d_%H%M%S)
cp -r /var/lib/caddy /backup/caddy/pre-restore/caddy-data-$(date +%Y%m%d_%H%M%S)

# 解压备份文件
echo "Extracting backup..."
mkdir -p $RESTORE_DIR
cd $RESTORE_DIR
tar -xzf "$BACKUP_FILE"

BACKUP_NAME=$(basename "$BACKUP_FILE" .tar.gz)

# 恢复配置文件
echo "Restoring configuration files..."
if [ -d "$RESTORE_DIR/$BACKUP_NAME/caddy" ]; then
    rm -rf /etc/caddy/*
    cp -r $RESTORE_DIR/$BACKUP_NAME/caddy/* /etc/caddy/
    chown -R caddy:caddy /etc/caddy
    chmod -R 644 /etc/caddy
    chmod 755 /etc/caddy
fi

# 恢复数据文件
echo "Restoring data files..."
if [ -d "$RESTORE_DIR/$BACKUP_NAME/caddy" ]; then
    rm -rf /var/lib/caddy/*
    cp -r $RESTORE_DIR/$BACKUP_NAME/caddy/* /var/lib/caddy/
    chown -R caddy:caddy /var/lib/caddy
fi

# 恢复系统配置
echo "Restoring system configuration..."
if [ -d "$RESTORE_DIR/$BACKUP_NAME/system" ]; then
    cp $RESTORE_DIR/$BACKUP_NAME/system/caddy.service /etc/systemd/system/ 2>/dev/null || true
    cp $RESTORE_DIR/$BACKUP_NAME/system/caddy.conf /etc/security/limits.d/ 2>/dev/null || true
    cp $RESTORE_DIR/$BACKUP_NAME/system/99-caddy.conf /etc/sysctl.d/ 2>/dev/null || true
    systemctl daemon-reload
fi

# 验证配置
echo "Validating configuration..."
if caddy validate --config /etc/caddy/Caddyfile; then
    echo "Configuration validation successful."
else
    echo "Configuration validation failed! Rolling back..."
    # 回滚逻辑
    systemctl start caddy
    exit 1
fi

# 启动Caddy服务
echo "Starting Caddy service..."
systemctl start caddy

# 检查服务状态
sleep 5
if systemctl is-active --quiet caddy; then
    echo "Caddy service started successfully."
else
    echo "Failed to start Caddy service!"
    systemctl status caddy
    exit 1
fi

# 清理临时文件
echo "Cleaning up temporary files..."
rm -rf $RESTORE_DIR

echo "Restore completed successfully at $(date)"
echo "Please verify that all services are working correctly."

7.6.3 灾难恢复

灾难恢复计划

#!/bin/bash
# disaster-recovery.sh - 灾难恢复脚本

DR_MODE="$1"
BACKUP_SOURCE="$2"

case $DR_MODE in
    "prepare")
        echo "Preparing for disaster recovery..."
        
        # 创建最新备份
        /usr/local/bin/backup.sh --emergency
        
        # 上传到多个位置
        aws s3 sync /backup/caddy/ s3://dr-backup-bucket/caddy/
        rsync -avz /backup/caddy/ dr-server:/backup/caddy/
        
        # 创建恢复文档
        cat > /backup/caddy/recovery-instructions.txt << EOF
Disaster Recovery Instructions
=============================

Date: $(date)
Server: $(hostname)
Caddy Version: $(caddy version)

Recovery Steps:
1. Install Caddy on new server
2. Run: disaster-recovery.sh restore <backup_source>
3. Verify DNS settings
4. Test all services
5. Update monitoring

Backup Locations:
- Local: /backup/caddy/
- S3: s3://dr-backup-bucket/caddy/
- Remote: dr-server:/backup/caddy/

Contacts:
- Primary Admin: admin@example.com
- Secondary Admin: backup-admin@example.com
- Emergency: +1-555-0123
EOF
        ;;
        
    "restore")
        echo "Starting disaster recovery restore..."
        
        if [ -z "$BACKUP_SOURCE" ]; then
            echo "Please specify backup source (local|s3|remote)"
            exit 1
        fi
        
        case $BACKUP_SOURCE in
            "s3")
                echo "Downloading from S3..."
                aws s3 sync s3://dr-backup-bucket/caddy/ /tmp/dr-restore/
                LATEST_BACKUP=$(ls -t /tmp/dr-restore/caddy_backup_*.tar.gz | head -n 1)
                ;;
            "remote")
                echo "Downloading from remote server..."
                rsync -avz dr-server:/backup/caddy/ /tmp/dr-restore/
                LATEST_BACKUP=$(ls -t /tmp/dr-restore/caddy_backup_*.tar.gz | head -n 1)
                ;;
            "local")
                LATEST_BACKUP=$(ls -t /backup/caddy/caddy_backup_*.tar.gz | head -n 1)
                ;;
            *)
                echo "Unknown backup source: $BACKUP_SOURCE"
                exit 1
                ;;
        esac
        
        if [ -z "$LATEST_BACKUP" ]; then
            echo "No backup found!"
            exit 1
        fi
        
        echo "Using backup: $LATEST_BACKUP"
        
        # 安装Caddy(如果未安装)
        if ! command -v caddy &> /dev/null; then
            echo "Installing Caddy..."
            curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | sudo gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
            curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | sudo tee /etc/apt/sources.list.d/caddy-stable.list
            apt update
            apt install -y caddy
        fi
        
        # 创建用户和目录
        useradd --system --home /var/lib/caddy --create-home --shell /bin/false caddy 2>/dev/null || true
        mkdir -p /etc/caddy /var/lib/caddy /var/log/caddy
        chown -R caddy:caddy /var/lib/caddy /var/log/caddy
        
        # 恢复备份
        /usr/local/bin/restore.sh "$LATEST_BACKUP"
        
        # 更新DNS(如果需要)
        echo "Please update DNS records to point to this server:"
        echo "Current IP: $(curl -s ifconfig.me)"
        
        # 验证服务
        echo "Verifying services..."
        sleep 10
        
        if curl -f http://localhost/health > /dev/null 2>&1; then
            echo "HTTP service is responding"
        else
            echo "Warning: HTTP service not responding"
        fi
        
        if curl -fk https://localhost/health > /dev/null 2>&1; then
            echo "HTTPS service is responding"
        else
            echo "Warning: HTTPS service not responding"
        fi
        
        echo "Disaster recovery completed."
        echo "Please verify all services and update monitoring systems."
        ;;
        
    *)
        echo "Usage: $0 {prepare|restore} [backup_source]"
        echo "  prepare: Create emergency backup and prepare for DR"
        echo "  restore: Restore from backup (backup_source: local|s3|remote)"
        exit 1
        ;;
esac

7.7 故障排除

7.7.1 常见问题诊断

诊断脚本

#!/bin/bash
# diagnose.sh - Caddy故障诊断脚本

echo "Caddy Diagnostic Report"
echo "======================"
echo "Date: $(date)"
echo "Hostname: $(hostname)"
echo ""

# 系统信息
echo "System Information:"
echo "------------------"
echo "OS: $(cat /etc/os-release | grep PRETTY_NAME | cut -d'"' -f2)"
echo "Kernel: $(uname -r)"
echo "Uptime: $(uptime)"
echo "Load Average: $(cat /proc/loadavg)"
echo ""

# Caddy版本和状态
echo "Caddy Information:"
echo "-----------------"
if command -v caddy &> /dev/null; then
    echo "Version: $(caddy version)"
else
    echo "Caddy not found in PATH"
fi

echo "Service Status: $(systemctl is-active caddy)"
echo "Service Enabled: $(systemctl is-enabled caddy)"
echo ""

# 进程信息
echo "Process Information:"
echo "-------------------"
ps aux | grep caddy | grep -v grep
echo ""

# 端口监听
echo "Port Listening:"
echo "--------------"
netstat -tlnp | grep -E ':(80|443|2019)'
echo ""

# 内存使用
echo "Memory Usage:"
echo "------------"
free -h
echo ""

# 磁盘使用
echo "Disk Usage:"
echo "----------"
df -h | grep -E '(Filesystem|/dev/)'
echo ""

# 配置验证
echo "Configuration Validation:"
echo "------------------------"
if [ -f "/etc/caddy/Caddyfile" ]; then
    caddy validate --config /etc/caddy/Caddyfile
else
    echo "Caddyfile not found at /etc/caddy/Caddyfile"
fi
echo ""

# 证书状态
echo "Certificate Status:"
echo "------------------"
if [ -d "/var/lib/caddy/certificates" ]; then
    find /var/lib/caddy/certificates -name "*.crt" -exec openssl x509 -in {} -noout -subject -dates \;
else
    echo "Certificate directory not found"
fi
echo ""

# 日志错误
echo "Recent Errors (last 50 lines):"
echo "------------------------------"
if [ -f "/var/log/caddy/error.log" ]; then
    tail -n 50 /var/log/caddy/error.log
else
    journalctl -u caddy --no-pager -n 50
fi
echo ""

# 网络连接测试
echo "Network Connectivity:"
echo "--------------------"
echo "DNS Resolution:"
nslookup google.com
echo ""
echo "External Connectivity:"
curl -I --connect-timeout 5 http://google.com 2>/dev/null | head -n 1 || echo "Failed to connect"
echo ""

# 防火墙状态
echo "Firewall Status:"
echo "---------------"
if command -v ufw &> /dev/null; then
    ufw status
elif command -v firewall-cmd &> /dev/null; then
    firewall-cmd --list-all
else
    echo "No firewall management tool found"
fi
echo ""

# 文件权限
echo "File Permissions:"
echo "----------------"
ls -la /etc/caddy/
ls -la /var/lib/caddy/
echo ""

echo "Diagnostic completed. Please review the output above."

7.7.2 性能问题排查

性能监控脚本

#!/bin/bash
# performance-monitor.sh - 性能监控脚本

MONITOR_DURATION=${1:-60}  # 默认监控60秒
OUTPUT_FILE="/tmp/caddy-performance-$(date +%Y%m%d_%H%M%S).log"

echo "Starting Caddy performance monitoring for ${MONITOR_DURATION} seconds..."
echo "Output will be saved to: $OUTPUT_FILE"

{
    echo "Caddy Performance Monitor Report"
    echo "==============================="
    echo "Start Time: $(date)"
    echo "Duration: ${MONITOR_DURATION} seconds"
    echo ""
    
    # 获取Caddy进程ID
    CADDY_PID=$(pgrep caddy)
    if [ -z "$CADDY_PID" ]; then
        echo "Error: Caddy process not found"
        exit 1
    fi
    
    echo "Caddy PID: $CADDY_PID"
    echo ""
    
    # 监控循环
    for i in $(seq 1 $MONITOR_DURATION); do
        echo "=== Sample $i ($(date)) ==="
        
        # CPU使用率
        CPU_USAGE=$(ps -p $CADDY_PID -o %cpu --no-headers)
        echo "CPU Usage: ${CPU_USAGE}%"
        
        # 内存使用
        MEMORY_INFO=$(ps -p $CADDY_PID -o rss,vsz --no-headers)
        RSS=$(echo $MEMORY_INFO | awk '{print $1}')
        VSZ=$(echo $MEMORY_INFO | awk '{print $2}')
        echo "Memory RSS: $((RSS/1024)) MB"
        echo "Memory VSZ: $((VSZ/1024)) MB"
        
        # 文件描述符
        FD_COUNT=$(ls /proc/$CADDY_PID/fd 2>/dev/null | wc -l)
        echo "File Descriptors: $FD_COUNT"
        
        # 网络连接
        CONN_COUNT=$(netstat -an | grep -E ':(80|443)' | grep ESTABLISHED | wc -l)
        echo "Active Connections: $CONN_COUNT"
        
        # 负载平均值
        LOAD_AVG=$(cat /proc/loadavg | awk '{print $1, $2, $3}')
        echo "Load Average: $LOAD_AVG"
        
        # 磁盘I/O
        if [ -f "/proc/$CADDY_PID/io" ]; then
            READ_BYTES=$(grep read_bytes /proc/$CADDY_PID/io | awk '{print $2}')
            WRITE_BYTES=$(grep write_bytes /proc/$CADDY_PID/io | awk '{print $2}')
            echo "Disk Read: $((READ_BYTES/1024/1024)) MB"
            echo "Disk Write: $((WRITE_BYTES/1024/1024)) MB"
        fi
        
        echo ""
        sleep 1
    done
    
    echo "Monitoring completed at $(date)"
    
} > $OUTPUT_FILE

echo "Performance monitoring completed. Report saved to: $OUTPUT_FILE"

# 生成摘要
echo ""
echo "Performance Summary:"
echo "=================="
echo "Average CPU Usage: $(grep "CPU Usage:" $OUTPUT_FILE | awk '{sum+=$3; count++} END {print sum/count "%"}')"
echo "Peak Memory RSS: $(grep "Memory RSS:" $OUTPUT_FILE | awk '{print $3}' | sort -n | tail -1) MB"
echo "Peak Connections: $(grep "Active Connections:" $OUTPUT_FILE | awk '{print $3}' | sort -n | tail -1)"
echo "Peak File Descriptors: $(grep "File Descriptors:" $OUTPUT_FILE | awk '{print $3}' | sort -n | tail -1)"

7.7.3 日志分析工具

日志分析脚本

#!/bin/bash
# log-analyzer.sh - 日志分析脚本

LOG_FILE="${1:-/var/log/caddy/access.log}"
TIME_RANGE="${2:-1h}"  # 1h, 24h, 7d等

if [ ! -f "$LOG_FILE" ]; then
    echo "Log file not found: $LOG_FILE"
    exit 1
fi

echo "Caddy Log Analysis Report"
echo "========================"
echo "Log File: $LOG_FILE"
echo "Time Range: $TIME_RANGE"
echo "Analysis Time: $(date)"
echo ""

# 计算时间范围
case $TIME_RANGE in
    "1h")
        SINCE_TIME=$(date -d '1 hour ago' '+%Y-%m-%dT%H:%M:%S')
        ;;
    "24h")
        SINCE_TIME=$(date -d '24 hours ago' '+%Y-%m-%dT%H:%M:%S')
        ;;
    "7d")
        SINCE_TIME=$(date -d '7 days ago' '+%Y-%m-%dT%H:%M:%S')
        ;;
    *)
        SINCE_TIME=$(date -d '1 hour ago' '+%Y-%m-%dT%H:%M:%S')
        ;;
esac

# 过滤时间范围内的日志
TEMP_LOG="/tmp/filtered_log_$(date +%s).json"
awk -v since="$SINCE_TIME" '
{
    if (match($0, /"timestamp":"([^"]+)"/, arr)) {
        if (arr[1] >= since) {
            print $0
        }
    }
}' "$LOG_FILE" > "$TEMP_LOG"

TOTAL_REQUESTS=$(wc -l < "$TEMP_LOG")
echo "Total Requests: $TOTAL_REQUESTS"
echo ""

if [ $TOTAL_REQUESTS -eq 0 ]; then
    echo "No requests found in the specified time range."
    rm -f "$TEMP_LOG"
    exit 0
fi

# 状态码分析
echo "Status Code Distribution:"
echo "------------------------"
jq -r '.http.response.status' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr
echo ""

# 错误请求分析
echo "Error Requests (4xx/5xx):"
echo "-------------------------"
ERROR_COUNT=$(jq -r 'select(.http.response.status >= 400) | .http.response.status' "$TEMP_LOG" 2>/dev/null | wc -l)
echo "Total Errors: $ERROR_COUNT"
if [ $ERROR_COUNT -gt 0 ]; then
    echo "Error Breakdown:"
    jq -r 'select(.http.response.status >= 400) | .http.response.status' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr
fi
echo ""

# 响应时间分析
echo "Response Time Analysis:"
echo "----------------------"
jq -r '.http.response.duration' "$TEMP_LOG" 2>/dev/null | awk '
{
    sum += $1
    count++
    if ($1 > max) max = $1
    if (min == "" || $1 < min) min = $1
    times[count] = $1
}
END {
    if (count > 0) {
        avg = sum / count
        # 计算中位数
        asort(times)
        if (count % 2 == 1) {
            median = times[(count + 1) / 2]
        } else {
            median = (times[count / 2] + times[count / 2 + 1]) / 2
        }
        printf "Average: %.3f seconds\n", avg
        printf "Median: %.3f seconds\n", median
        printf "Min: %.3f seconds\n", min
        printf "Max: %.3f seconds\n", max
    }
}'
echo ""

# 慢请求分析
echo "Slow Requests (>1s):"
echo "-------------------"
SLOW_COUNT=$(jq -r 'select(.http.response.duration > 1) | [.http.request.uri, .http.response.duration] | @tsv' "$TEMP_LOG" 2>/dev/null | wc -l)
echo "Total Slow Requests: $SLOW_COUNT"
if [ $SLOW_COUNT -gt 0 ]; then
    echo "Top 10 Slowest Requests:"
    jq -r 'select(.http.response.duration > 1) | [.http.request.uri, .http.response.duration] | @tsv' "$TEMP_LOG" 2>/dev/null | sort -k2 -nr | head -10
fi
echo ""

# 热门页面
echo "Top 10 Requested Pages:"
echo "----------------------"
jq -r '.http.request.uri' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr | head -10
echo ""

# IP地址分析
echo "Top 10 Client IPs:"
echo "------------------"
jq -r '.http.request.remote_ip' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr | head -10
echo ""

# 用户代理分析
echo "Top 10 User Agents:"
echo "------------------"
jq -r '.http.request.headers["User-Agent"][0]' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr | head -10
echo ""

# 流量分析
echo "Traffic Analysis:"
echo "-----------------"
TOTAL_BYTES=$(jq -r '.http.response.size' "$TEMP_LOG" 2>/dev/null | awk '{sum += $1} END {print sum}')
echo "Total Bytes Served: $(echo $TOTAL_BYTES | awk '{printf "%.2f MB", $1/1024/1024}')"
echo "Average Response Size: $(echo "$TOTAL_BYTES $TOTAL_REQUESTS" | awk '{printf "%.2f KB", $1/$2/1024}')"
echo ""

# 时间分布分析
echo "Hourly Request Distribution:"
echo "---------------------------"
jq -r '.timestamp' "$TEMP_LOG" 2>/dev/null | cut -c12-13 | sort | uniq -c | awk '{printf "%02d:00 - %s requests\n", $2, $1}'
echo ""

# 清理临时文件
rm -f "$TEMP_LOG"

echo "Log analysis completed."

本章总结

本章我们全面学习了Caddy的部署和运维:

  1. 生产环境部署:掌握了系统要求、安装方式和服务配置
  2. 配置管理:学习了配置文件组织、环境管理和版本控制
  3. 监控和日志:了解了日志配置、监控设置和分析方法
  4. 性能优化:掌握了系统级和应用级的性能优化技巧
  5. 安全加固:学习了系统安全、应用安全和证书安全
  6. 备份和恢复:了解了备份策略、恢复流程和灾难恢复
  7. 故障排除:掌握了问题诊断、性能排查和日志分析

通过本章的学习,你应该能够: - 在生产环境中安全可靠地部署Caddy - 建立完善的监控和日志体系 - 优化Caddy的性能和安全性 - 制定有效的备份和恢复策略 - 快速诊断和解决常见问题

练习题

基础练习

  1. 部署配置

    • 在Ubuntu服务器上部署Caddy
    • 配置systemd服务
    • 设置基本的安全策略
  2. 监控设置

    • 配置Prometheus监控
    • 设置日志轮转
    • 创建基本的告警规则
  3. 备份恢复

    • 编写备份脚本
    • 测试恢复流程
    • 配置自动备份

进阶练习

  1. 性能优化

    • 分析性能瓶颈
    • 优化系统参数
    • 实现多层缓存
  2. 安全加固

    • 配置防火墙规则
    • 实现入侵检测
    • 加强证书安全
  3. 高可用部署

    • 配置负载均衡
    • 实现故障转移
    • 设置健康检查

实战练习

  1. 运维自动化

    • 编写部署脚本
    • 实现配置管理
    • 建立CI/CD流程
  2. 故障处理

    • 模拟故障场景
    • 练习故障诊断
    • 优化恢复流程
  3. 容量规划

    • 分析资源使用
    • 预测容量需求
    • 制定扩容计划

下一章我们将学习Caddy的实战案例,这将帮助你将所学知识应用到实际项目中。caddybackup*.tar.gz” -mtime +$RETENTION_DAYS -delete find $BACKUP_DIR -name “