7.1 生产环境部署
7.1.1 系统要求
硬件要求: - CPU:2核心以上(推荐4核心) - 内存:2GB以上(推荐4GB) - 存储:20GB以上可用空间 - 网络:稳定的网络连接
软件要求: - 操作系统:Linux(Ubuntu 20.04+、CentOS 8+、RHEL 8+) - 内核版本:4.15+ - 文件系统:ext4、xfs或btrfs - 防火墙:iptables或firewalld
网络要求: - 端口80(HTTP)和443(HTTPS)开放 - DNS解析正确配置 - 如使用Let’s Encrypt,需要外网访问
7.1.2 安装方式选择
1. 包管理器安装(推荐)
# Ubuntu/Debian
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | sudo gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | sudo tee /etc/apt/sources.list.d/caddy-stable.list
sudo apt update
sudo apt install caddy
# CentOS/RHEL/Fedora
dnf copr enable @caddy/caddy
dnf install caddy
# 或使用官方脚本
curl -fsSL https://getcaddy.com | bash
2. Docker部署
# Dockerfile
FROM caddy:2-alpine
# 复制配置文件
COPY Caddyfile /etc/caddy/Caddyfile
COPY site/ /usr/share/caddy/
# 创建数据目录
RUN mkdir -p /data/caddy
# 暴露端口
EXPOSE 80 443
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost/health || exit 1
# 启动命令
CMD ["caddy", "run", "--config", "/etc/caddy/Caddyfile", "--adapter", "caddyfile"]
# docker-compose.yml
version: '3.8'
services:
caddy:
build: .
container_name: caddy-server
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile:ro
- ./site:/usr/share/caddy:ro
- caddy_data:/data
- caddy_config:/config
environment:
- CADDY_ADMIN=0.0.0.0:2019
networks:
- caddy-network
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
volumes:
caddy_data:
external: true
caddy_config:
external: true
networks:
caddy-network:
driver: bridge
3. Kubernetes部署
# caddy-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: caddy
namespace: web
labels:
app: caddy
spec:
replicas: 3
selector:
matchLabels:
app: caddy
template:
metadata:
labels:
app: caddy
spec:
containers:
- name: caddy
image: caddy:2-alpine
ports:
- containerPort: 80
- containerPort: 443
- containerPort: 2019
volumeMounts:
- name: caddyfile
mountPath: /etc/caddy
readOnly: true
- name: caddy-data
mountPath: /data
- name: caddy-config
mountPath: /config
env:
- name: CADDY_ADMIN
value: "0.0.0.0:2019"
resources:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 80
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 80
initialDelaySeconds: 5
periodSeconds: 5
volumes:
- name: caddyfile
configMap:
name: caddy-config
- name: caddy-data
persistentVolumeClaim:
claimName: caddy-data-pvc
- name: caddy-config
persistentVolumeClaim:
claimName: caddy-config-pvc
---
apiVersion: v1
kind: Service
metadata:
name: caddy-service
namespace: web
spec:
selector:
app: caddy
ports:
- name: http
port: 80
targetPort: 80
- name: https
port: 443
targetPort: 443
- name: admin
port: 2019
targetPort: 2019
type: LoadBalancer
---
apiVersion: v1
kind: ConfigMap
metadata:
name: caddy-config
namespace: web
data:
Caddyfile: |
{
admin 0.0.0.0:2019
log {
output stdout
format json
}
}
:80 {
respond /health "OK" 200
file_server
}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: caddy-data-pvc
namespace: web
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: caddy-config-pvc
namespace: web
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
7.1.3 系统服务配置
Systemd服务配置
# /etc/systemd/system/caddy.service
[Unit]
Description=Caddy
Documentation=https://caddyserver.com/docs/
After=network.target network-online.target
Requires=network-online.target
[Service]
Type=notify
User=caddy
Group=caddy
ExecStart=/usr/bin/caddy run --environ --config /etc/caddy/Caddyfile
ExecReload=/usr/bin/caddy reload --config /etc/caddy/Caddyfile --force
TimeoutStopSec=5s
LimitNOFILE=1048576
LimitNPROC=1048576
PrivateTmp=true
ProtectSystem=full
AmbientCapabilities=CAP_NET_BIND_SERVICE
[Install]
WantedBy=multi-user.target
# 创建caddy用户
sudo useradd --system --home /var/lib/caddy --create-home --shell /bin/false caddy
# 设置权限
sudo chown -R caddy:caddy /etc/caddy
sudo chown -R caddy:caddy /var/lib/caddy
# 启用和启动服务
sudo systemctl daemon-reload
sudo systemctl enable caddy
sudo systemctl start caddy
# 检查状态
sudo systemctl status caddy
进程管理器配置(PM2)
// ecosystem.config.js
module.exports = {
apps: [{
name: 'caddy',
script: '/usr/bin/caddy',
args: 'run --config /etc/caddy/Caddyfile',
instances: 1,
autorestart: true,
watch: false,
max_memory_restart: '1G',
env: {
NODE_ENV: 'production'
},
error_file: '/var/log/caddy/error.log',
out_file: '/var/log/caddy/out.log',
log_file: '/var/log/caddy/combined.log',
time: true
}]
};
7.2 配置管理
7.2.1 配置文件组织
目录结构
/etc/caddy/
├── Caddyfile # 主配置文件
├── conf.d/ # 配置片段目录
│ ├── global.conf # 全局配置
│ ├── sites/ # 站点配置
│ │ ├── example.com.conf
│ │ ├── api.example.com.conf
│ │ └── admin.example.com.conf
│ ├── snippets/ # 配置片段
│ │ ├── ssl.conf
│ │ ├── security.conf
│ │ └── logging.conf
│ └── upstreams/ # 上游配置
│ ├── backend.conf
│ └── api.conf
├── certs/ # 证书目录
├── logs/ # 日志目录
└── data/ # 数据目录
主配置文件
# /etc/caddy/Caddyfile
{
# 导入全局配置
import conf.d/global.conf
}
# 导入站点配置
import conf.d/sites/*.conf
全局配置
# /etc/caddy/conf.d/global.conf
# 管理API
admin 127.0.0.1:2019
# 邮箱配置(Let's Encrypt)
email admin@example.com
# 默认SNI
default_sni example.com
# 存储配置
storage file_system {
root /var/lib/caddy
}
# 日志配置
log {
output file /var/log/caddy/access.log {
roll_size 100mb
roll_keep 10
roll_keep_for 720h
}
format json
level INFO
}
# 服务器配置
servers {
metrics
timeouts {
read_body 30s
read_header 30s
write 30s
idle 120s
}
}
站点配置示例
# /etc/caddy/conf.d/sites/example.com.conf
example.com {
# 导入通用配置片段
import conf.d/snippets/ssl.conf
import conf.d/snippets/security.conf
import conf.d/snippets/logging.conf
# 根目录
root * /var/www/example.com
# 静态文件服务
file_server
# 压缩
encode gzip
# 错误页面
handle_errors {
@404 {
expression {http.error.status_code} == 404
}
rewrite @404 /404.html
file_server
}
}
配置片段
# /etc/caddy/conf.d/snippets/ssl.conf
# SSL/TLS配置
tls {
protocols tls1.2 tls1.3
ciphers TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
curves x25519 secp384r1 secp256r1
}
# HSTS
header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
# /etc/caddy/conf.d/snippets/security.conf
# 安全头部
header {
# XSS保护
X-XSS-Protection "1; mode=block"
# 内容类型嗅探保护
X-Content-Type-Options "nosniff"
# 点击劫持保护
X-Frame-Options "DENY"
# 引用者策略
Referrer-Policy "strict-origin-when-cross-origin"
# 权限策略
Permissions-Policy "geolocation=(), microphone=(), camera=()"
# 内容安全策略
Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'"
# 移除服务器信息
-Server
}
7.2.2 环境配置管理
环境变量配置
# /etc/caddy/env
# 数据库配置
DB_HOST=localhost
DB_PORT=5432
DB_NAME=myapp
DB_USER=caddy
DB_PASSWORD=secure_password
# Redis配置
REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_PASSWORD=redis_password
# API密钥
API_KEY=your_api_key_here
JWT_SECRET=your_jwt_secret_here
# 邮件配置
SMTP_HOST=smtp.example.com
SMTP_PORT=587
SMTP_USER=noreply@example.com
SMTP_PASSWORD=smtp_password
# 监控配置
MONITORING_ENDPOINT=https://monitoring.example.com
ALERT_WEBHOOK=https://alerts.example.com/webhook
# 在Caddyfile中使用环境变量
example.com {
reverse_proxy {env.DB_HOST}:{env.DB_PORT} {
header_up X-API-Key {env.API_KEY}
}
# JWT认证
jwt {
trusted_tokens {
static_secret {env.JWT_SECRET}
}
}
# Redis缓存
cache {
redis {
host {env.REDIS_HOST}:{env.REDIS_PORT}
password {env.REDIS_PASSWORD}
}
}
}
多环境配置
# 环境配置脚本
#!/bin/bash
# deploy.sh
ENVIRONMENT=${1:-production}
case $ENVIRONMENT in
"development")
export CADDY_CONFIG="/etc/caddy/dev.Caddyfile"
export LOG_LEVEL="DEBUG"
export ADMIN_LISTEN="0.0.0.0:2019"
;;
"staging")
export CADDY_CONFIG="/etc/caddy/staging.Caddyfile"
export LOG_LEVEL="INFO"
export ADMIN_LISTEN="127.0.0.1:2019"
;;
"production")
export CADDY_CONFIG="/etc/caddy/Caddyfile"
export LOG_LEVEL="WARN"
export ADMIN_LISTEN="127.0.0.1:2019"
;;
*)
echo "Unknown environment: $ENVIRONMENT"
exit 1
;;
esac
# 加载环境变量
source /etc/caddy/env.$ENVIRONMENT
# 验证配置
caddy validate --config $CADDY_CONFIG
if [ $? -eq 0 ]; then
echo "Configuration valid, reloading Caddy..."
systemctl reload caddy
else
echo "Configuration invalid, aborting deployment"
exit 1
fi
7.2.3 配置版本控制
Git配置管理
# 初始化配置仓库
cd /etc/caddy
git init
git add .
git commit -m "Initial Caddy configuration"
# 添加远程仓库
git remote add origin https://github.com/company/caddy-config.git
git push -u origin main
# 配置部署脚本
#!/bin/bash
# config-deploy.sh
CONFIG_REPO="https://github.com/company/caddy-config.git"
CONFIG_DIR="/etc/caddy"
BACKUP_DIR="/etc/caddy/backups"
# 创建备份
echo "Creating backup..."
mkdir -p $BACKUP_DIR
cp -r $CONFIG_DIR $BACKUP_DIR/$(date +%Y%m%d_%H%M%S)
# 拉取最新配置
echo "Pulling latest configuration..."
cd $CONFIG_DIR
git pull origin main
# 验证配置
echo "Validating configuration..."
caddy validate --config $CONFIG_DIR/Caddyfile
if [ $? -eq 0 ]; then
echo "Configuration valid, reloading Caddy..."
systemctl reload caddy
# 检查服务状态
sleep 5
if systemctl is-active --quiet caddy; then
echo "Deployment successful"
# 清理旧备份(保留最近10个)
ls -t $BACKUP_DIR | tail -n +11 | xargs -r rm -rf
else
echo "Service failed to start, rolling back..."
# 回滚到最新备份
LATEST_BACKUP=$(ls -t $BACKUP_DIR | head -n 1)
cp -r $BACKUP_DIR/$LATEST_BACKUP/* $CONFIG_DIR/
systemctl reload caddy
exit 1
fi
else
echo "Configuration invalid, aborting deployment"
exit 1
fi
7.3 监控和日志
7.3.1 日志配置
结构化日志配置
{
# 全局日志配置
log {
output file /var/log/caddy/access.log {
roll_size 100mb
roll_keep 30
roll_keep_for 720h
}
format json {
time_format "2006-01-02T15:04:05.000Z07:00"
message_key "message"
level_key "level"
time_key "timestamp"
caller_key "caller"
}
level INFO
# 包含字段
include {
http.request.method
http.request.uri
http.request.proto
http.request.remote_ip
http.request.remote_port
http.request.headers.User-Agent
http.request.headers.Referer
http.response.status
http.response.size
http.response.duration
}
# 排除敏感字段
exclude {
http.request.headers.Authorization
http.request.headers.Cookie
http.request.headers.X-API-Key
}
}
# 错误日志
log error {
output file /var/log/caddy/error.log {
roll_size 50mb
roll_keep 10
}
format console
level ERROR
}
# 安全事件日志
log security {
output file /var/log/caddy/security.log {
roll_size 50mb
roll_keep 20
}
format json
level WARN
}
}
example.com {
# 站点特定日志
log {
output file /var/log/caddy/sites/example.com.log {
roll_size 50mb
roll_keep 15
}
format json
}
# 安全事件记录
@security_events {
status 401 403 429
}
log @security_events security {
output file /var/log/caddy/security.log
}
# API访问日志
@api_requests {
path /api/*
}
log @api_requests {
output file /var/log/caddy/api-access.log {
roll_size 100mb
roll_keep 30
}
format json
include {
http.request.headers.X-API-Key
http.request.headers.X-User-ID
}
}
file_server
}
日志轮转配置
# /etc/logrotate.d/caddy
/var/log/caddy/*.log {
daily
missingok
rotate 30
compress
delaycompress
notifempty
create 0644 caddy caddy
postrotate
systemctl reload caddy
endscript
}
/var/log/caddy/sites/*.log {
daily
missingok
rotate 15
compress
delaycompress
notifempty
create 0644 caddy caddy
postrotate
systemctl reload caddy
endscript
}
7.3.2 监控配置
Prometheus监控
# 监控端点配置
monitoring.internal {
# 基本认证保护
basicauth {
prometheus $2a$14$Zkx19XLiW6VYouLHR5NmfOFU0z2GTNqq9qB6FY9gZKOOdOoKw6Uw.
}
# Prometheus指标
handle /metrics {
metrics {
# 启用详细指标
disable_openmetrics
# 自定义标签
labels {
instance {system.hostname}
environment production
version {system.version}
}
}
}
# 健康检查
handle /health {
respond `{
"status": "healthy",
"timestamp": "{time.now.unix}",
"version": "{system.version}",
"uptime": "{system.uptime}"
}` 200 {
header Content-Type application/json
}
}
# 详细健康检查
handle /health/detailed {
health_check {
checks {
# 磁盘空间检查
disk_usage {
path /var/lib/caddy
threshold 90
}
# 内存使用检查
memory_usage {
threshold 80
}
# 上游服务检查
upstream_health {
url http://backend:8080/health
timeout 5s
}
}
}
}
# 拒绝其他请求
respond "Not Found" 404
}
Prometheus配置
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "caddy_rules.yml"
scrape_configs:
- job_name: 'caddy'
static_configs:
- targets: ['localhost:2019']
metrics_path: /metrics
scrape_interval: 30s
basic_auth:
username: prometheus
password: prometheus_password
- job_name: 'caddy-nodes'
static_configs:
- targets:
- 'caddy-01.example.com:2019'
- 'caddy-02.example.com:2019'
- 'caddy-03.example.com:2019'
metrics_path: /metrics
basic_auth:
username: prometheus
password: prometheus_password
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
告警规则
# caddy_rules.yml
groups:
- name: caddy
rules:
# 高错误率告警
- alert: CaddyHighErrorRate
expr: (
sum(rate(caddy_http_requests_total{status=~"5.."}[5m])) /
sum(rate(caddy_http_requests_total[5m]))
) > 0.05
for: 2m
labels:
severity: warning
annotations:
summary: "Caddy high error rate"
description: "Caddy error rate is {{ $value | humanizePercentage }} for more than 2 minutes"
# 高延迟告警
- alert: CaddyHighLatency
expr: histogram_quantile(0.95, sum(rate(caddy_http_request_duration_seconds_bucket[5m])) by (le)) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "Caddy high latency"
description: "Caddy 95th percentile latency is {{ $value }}s for more than 5 minutes"
# 服务不可用告警
- alert: CaddyDown
expr: up{job="caddy"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Caddy is down"
description: "Caddy has been down for more than 1 minute"
# 证书即将过期告警
- alert: CaddyCertificateExpiring
expr: caddy_tls_certificate_expiry_timestamp - time() < 7 * 24 * 3600
for: 1h
labels:
severity: warning
annotations:
summary: "Caddy certificate expiring soon"
description: "Certificate for {{ $labels.domain }} will expire in {{ $value | humanizeDuration }}"
# 内存使用过高告警
- alert: CaddyHighMemoryUsage
expr: process_resident_memory_bytes{job="caddy"} / 1024 / 1024 > 1000
for: 10m
labels:
severity: warning
annotations:
summary: "Caddy high memory usage"
description: "Caddy memory usage is {{ $value }}MB for more than 10 minutes"
7.3.3 日志分析
ELK Stack集成
# docker-compose.yml for ELK
version: '3.8'
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
container_name: elasticsearch
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- xpack.security.enabled=false
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
ports:
- "9200:9200"
networks:
- elk
logstash:
image: docker.elastic.co/logstash/logstash:8.8.0
container_name: logstash
volumes:
- ./logstash/config/logstash.yml:/usr/share/logstash/config/logstash.yml:ro
- ./logstash/pipeline:/usr/share/logstash/pipeline:ro
ports:
- "5044:5044"
- "5000:5000/tcp"
- "5000:5000/udp"
- "9600:9600"
environment:
LS_JAVA_OPTS: "-Xmx256m -Xms256m"
networks:
- elk
depends_on:
- elasticsearch
kibana:
image: docker.elastic.co/kibana/kibana:8.8.0
container_name: kibana
ports:
- "5601:5601"
environment:
ELASTICSEARCH_URL: http://elasticsearch:9200
ELASTICSEARCH_HOSTS: '["http://elasticsearch:9200"]'
networks:
- elk
depends_on:
- elasticsearch
filebeat:
image: docker.elastic.co/beats/filebeat:8.8.0
container_name: filebeat
user: root
volumes:
- ./filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro
- /var/log/caddy:/var/log/caddy:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
networks:
- elk
depends_on:
- logstash
volumes:
elasticsearch_data:
driver: local
networks:
elk:
driver: bridge
Filebeat配置
# filebeat/filebeat.yml
filebeat.inputs:
- type: log
enabled: true
paths:
- /var/log/caddy/*.log
fields:
service: caddy
environment: production
fields_under_root: true
json.keys_under_root: true
json.add_error_key: true
multiline.pattern: '^\{'
multiline.negate: true
multiline.match: after
- type: log
enabled: true
paths:
- /var/log/caddy/sites/*.log
fields:
service: caddy
log_type: site_access
environment: production
fields_under_root: true
json.keys_under_root: true
processors:
- add_host_metadata:
when.not.contains.tags: forwarded
- add_docker_metadata: ~
- add_kubernetes_metadata: ~
output.logstash:
hosts: ["logstash:5044"]
logging.level: info
logging.to_files: true
logging.files:
path: /var/log/filebeat
name: filebeat
keepfiles: 7
permissions: 0644
Logstash配置
# logstash/pipeline/caddy.conf
input {
beats {
port => 5044
}
}
filter {
if [service] == "caddy" {
# 解析时间戳
date {
match => [ "timestamp", "ISO8601" ]
}
# 解析用户代理
if [http][request][headers][User-Agent] {
useragent {
source => "[http][request][headers][User-Agent]"
target => "user_agent"
}
}
# 解析IP地址
if [http][request][remote_ip] {
geoip {
source => "[http][request][remote_ip]"
target => "geoip"
}
}
# 添加响应时间分类
if [http][response][duration] {
ruby {
code => "
duration = event.get('[http][response][duration]')
if duration
duration_ms = duration * 1000
if duration_ms < 100
event.set('response_time_category', 'fast')
elsif duration_ms < 500
event.set('response_time_category', 'normal')
elsif duration_ms < 1000
event.set('response_time_category', 'slow')
else
event.set('response_time_category', 'very_slow')
end
end
"
}
}
# 添加状态码分类
if [http][response][status] {
ruby {
code => "
status = event.get('[http][response][status]')
if status
case status
when 200..299
event.set('status_category', 'success')
when 300..399
event.set('status_category', 'redirect')
when 400..499
event.set('status_category', 'client_error')
when 500..599
event.set('status_category', 'server_error')
else
event.set('status_category', 'unknown')
end
end
"
}
}
}
}
output {
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "caddy-logs-%{+YYYY.MM.dd}"
}
# 调试输出
stdout {
codec => rubydebug
}
}
7.4 性能优化
7.4.1 系统级优化
内核参数优化
# /etc/sysctl.d/99-caddy.conf
# 网络优化
net.core.somaxconn = 65535
net.core.netdev_max_backlog = 5000
net.ipv4.tcp_max_syn_backlog = 65535
net.ipv4.tcp_fin_timeout = 30
net.ipv4.tcp_keepalive_time = 1200
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl = 15
net.ipv4.tcp_tw_reuse = 1
net.ipv4.ip_local_port_range = 10000 65535
# 文件描述符限制
fs.file-max = 2097152
fs.nr_open = 2097152
# 内存管理
vm.swappiness = 10
vm.dirty_ratio = 15
vm.dirty_background_ratio = 5
# 应用更改
sudo sysctl -p /etc/sysctl.d/99-caddy.conf
文件描述符限制
# /etc/security/limits.d/caddy.conf
caddy soft nofile 1048576
caddy hard nofile 1048576
caddy soft nproc 1048576
caddy hard nproc 1048576
# 或者在systemd服务中设置
# /etc/systemd/system/caddy.service
[Service]
LimitNOFILE=1048576
LimitNPROC=1048576
7.4.2 Caddy配置优化
连接和超时优化
{
# 服务器优化
servers {
# 协议配置
protocols h1 h2 h3
# 超时配置
timeouts {
read_body 30s
read_header 10s
write 60s
idle 300s
}
# 连接限制
max_header_size 16KB
# 启用指标
metrics
}
# 全局缓存配置
cache {
# 内存缓存
memory {
max_size 512MB
}
# Redis缓存
redis {
host localhost:6379
pool_size 20
max_idle 10
idle_timeout 300s
}
}
}
example.com {
# 压缩优化
encode {
gzip 6
zstd
minimum_length 1024
match {
header Content-Type text/* application/json application/javascript application/xml+rss application/atom+xml image/svg+xml
}
}
# 静态文件优化
@static {
path *.css *.js *.png *.jpg *.jpeg *.gif *.ico *.svg *.woff *.woff2 *.ttf *.eot
}
handle @static {
# 缓存头部
header Cache-Control "public, max-age=31536000, immutable"
header Expires "Thu, 31 Dec 2037 23:55:55 GMT"
# 预压缩文件
file_server {
precompressed gzip br
}
}
# API优化
@api {
path /api/*
}
handle @api {
# API缓存
cache {
key {method} {host} {path} {query} {header.Authorization}
ttl 5m
stale_ttl 1h
# 缓存条件
match {
method GET
status 200
}
}
# 反向代理优化
reverse_proxy backend:8080 {
# 负载均衡
lb_policy least_conn
# 健康检查
health_uri /health
health_interval 30s
health_timeout 5s
# 连接池
transport http {
dial_timeout 10s
response_header_timeout 30s
expect_continue_timeout 1s
keep_alive 30s
max_idle_conns 100
max_idle_conns_per_host 10
}
# 重试配置
@retry {
status 502 503 504
}
handle_response @retry {
reverse_proxy backend-backup:8080
}
}
}
# 默认处理
file_server
}
7.4.3 缓存策略优化
多层缓存配置
example.com {
# L1缓存:内存缓存(最快)
@frequently_accessed {
path /api/popular/* /api/trending/*
}
cache @frequently_accessed {
memory {
max_size 100MB
ttl 1m
}
key {method} {host} {path}
}
# L2缓存:Redis缓存(快)
@api_cacheable {
path /api/*
method GET
not path /api/user/* /api/private/*
}
cache @api_cacheable {
redis {
host redis-cluster:6379
ttl 10m
stale_ttl 1h
}
key {method} {host} {path} {query}
# 缓存预热
warmup {
urls /api/config /api/menu
interval 5m
}
}
# L3缓存:CDN缓存(全球分发)
@static_assets {
path /static/* /assets/* /images/*
}
handle @static_assets {
header {
Cache-Control "public, max-age=31536000, immutable"
CDN-Cache-Control "public, max-age=31536000"
}
file_server {
precompressed gzip br
}
}
# 缓存清除端点
handle /admin/cache/purge {
@authorized {
header Authorization "Bearer {env.ADMIN_TOKEN}"
}
handle @authorized {
cache_purge {
patterns /api/* /static/*
}
respond "Cache purged" 200
}
respond "Unauthorized" 401
}
reverse_proxy backend:8080
}
7.4.4 负载均衡优化
高级负载均衡配置
example.com {
# 上游服务器配置
reverse_proxy {
# 主要后端服务器
to backend-01:8080 {
weight 3
}
to backend-02:8080 {
weight 3
}
to backend-03:8080 {
weight 2
}
# 备用服务器
to backup-01:8080 {
weight 1
backup
}
# 负载均衡策略
lb_policy weighted_round_robin
# 健康检查
health_uri /health
health_interval 10s
health_timeout 3s
health_status 200
health_body "OK"
# 故障检测
fail_duration 30s
max_fails 3
unhealthy_request_count 5
# 连接优化
transport http {
# 连接池
max_idle_conns 200
max_idle_conns_per_host 20
idle_conn_timeout 90s
# 超时设置
dial_timeout 10s
response_header_timeout 30s
expect_continue_timeout 1s
# Keep-Alive
keep_alive 30s
# TLS配置
tls_insecure_skip_verify false
tls_timeout 10s
}
# 请求修改
header_up {
X-Real-IP {remote_host}
X-Forwarded-For {remote_host}
X-Forwarded-Proto {scheme}
X-Forwarded-Host {host}
}
# 响应修改
header_down {
-Server
-X-Powered-By
X-Cache-Status {upstream.cache_status}
}
# 重试配置
@retry_conditions {
status 502 503 504
}
handle_response @retry_conditions {
# 重试到备用服务器
reverse_proxy backup-02:8080 {
transport http {
dial_timeout 5s
response_header_timeout 15s
}
}
}
}
}
7.5 安全加固
7.5.1 系统安全
防火墙配置
# UFW配置
sudo ufw default deny incoming
sudo ufw default allow outgoing
# 允许SSH
sudo ufw allow ssh
# 允许HTTP和HTTPS
sudo ufw allow 80/tcp
sudo ufw allow 443/tcp
# 允许管理API(仅本地)
sudo ufw allow from 127.0.0.1 to any port 2019
# 启用防火墙
sudo ufw enable
# 查看状态
sudo ufw status verbose
# iptables配置
#!/bin/bash
# firewall.sh
# 清空现有规则
iptables -F
iptables -X
iptables -t nat -F
iptables -t nat -X
iptables -t mangle -F
iptables -t mangle -X
# 设置默认策略
iptables -P INPUT DROP
iptables -P FORWARD DROP
iptables -P OUTPUT ACCEPT
# 允许本地回环
iptables -A INPUT -i lo -j ACCEPT
iptables -A OUTPUT -o lo -j ACCEPT
# 允许已建立的连接
iptables -A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
# 允许SSH
iptables -A INPUT -p tcp --dport 22 -j ACCEPT
# 允许HTTP和HTTPS
iptables -A INPUT -p tcp --dport 80 -j ACCEPT
iptables -A INPUT -p tcp --dport 443 -j ACCEPT
# 允许管理API(仅本地)
iptables -A INPUT -s 127.0.0.1 -p tcp --dport 2019 -j ACCEPT
# 防止DDoS攻击
iptables -A INPUT -p tcp --dport 80 -m limit --limit 25/minute --limit-burst 100 -j ACCEPT
iptables -A INPUT -p tcp --dport 443 -m limit --limit 25/minute --limit-burst 100 -j ACCEPT
# 防止端口扫描
iptables -A INPUT -m recent --name portscan --rcheck --seconds 86400 -j DROP
iptables -A INPUT -m recent --name portscan --remove
iptables -A INPUT -p tcp -m tcp --dport 139 -m recent --name portscan --set -j LOG --log-prefix "portscan:"
iptables -A INPUT -p tcp -m tcp --dport 139 -m recent --name portscan --set -j DROP
# 保存规则
iptables-save > /etc/iptables/rules.v4
Fail2ban配置
# /etc/fail2ban/jail.d/caddy.conf
[caddy-auth]
enabled = true
port = http,https
filter = caddy-auth
logpath = /var/log/caddy/access.log
maxretry = 5
bantime = 3600
findtime = 600
action = iptables-multiport[name=caddy-auth, port="http,https", protocol=tcp]
[caddy-dos]
enabled = true
port = http,https
filter = caddy-dos
logpath = /var/log/caddy/access.log
maxretry = 100
bantime = 600
findtime = 60
action = iptables-multiport[name=caddy-dos, port="http,https", protocol=tcp]
# /etc/fail2ban/filter.d/caddy-auth.conf
[Definition]
failregex = ^.*"remote_ip":"<HOST>".*"status":401.*$
^.*"remote_ip":"<HOST>".*"status":403.*$
ignoreregex =
# /etc/fail2ban/filter.d/caddy-dos.conf
[Definition]
failregex = ^.*"remote_ip":"<HOST>".*$
ignoreregex =
7.5.2 应用安全
安全头部配置
example.com {
# 安全头部
header {
# HSTS
Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
# XSS保护
X-XSS-Protection "1; mode=block"
# 内容类型嗅探保护
X-Content-Type-Options "nosniff"
# 点击劫持保护
X-Frame-Options "DENY"
# 引用者策略
Referrer-Policy "strict-origin-when-cross-origin"
# 权限策略
Permissions-Policy "geolocation=(), microphone=(), camera=(), payment=(), usb=(), magnetometer=(), gyroscope=()"
# 内容安全策略
Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' https://cdn.example.com; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src 'self' https://fonts.gstatic.com; img-src 'self' data: https:; connect-src 'self' https://api.example.com; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
# 移除服务器信息
-Server
-X-Powered-By
# 自定义服务器标识
Server "WebServer/1.0"
}
# 安全限制
request_body {
max_size 10MB
}
# 限流
rate_limit {
zone general
key {remote_host}
rate 300r/m
window 1m
burst 50
}
# 地理位置限制
@blocked_countries {
remote_ip {
ranges 192.168.1.0/24 10.0.0.0/8
}
}
handle @blocked_countries {
respond "Access denied" 403
}
# 用户代理过滤
@bad_bots {
header User-Agent *bot* *crawler* *spider* *scraper*
}
handle @bad_bots {
respond "Forbidden" 403
}
file_server
}
7.5.3 证书安全
证书管理最佳实践
{
# 证书存储配置
storage file_system {
root /var/lib/caddy
}
# ACME配置
acme_ca https://acme-v02.api.letsencrypt.org/directory
acme_ca_root /etc/ssl/certs/isrgrootx1.pem
# 邮箱配置
email security@example.com
# 密钥类型
key_type ec256
}
example.com {
# TLS配置
tls {
# 协议版本
protocols tls1.2 tls1.3
# 密码套件
ciphers {
TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305
TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
}
# 椭圆曲线
curves x25519 secp384r1 secp256r1
# ALPN
alpn h2 http/1.1
# OCSP装订
ocsp_stapling
# 客户端证书(可选)
client_auth {
mode request
trusted_ca_cert_file /etc/caddy/client-ca.pem
}
}
# 证书透明度
header {
Expect-CT "max-age=86400, enforce, report-uri=https://example.com/ct-report"
}
file_server
}
# 证书监控端点
certs.example.com {
basicauth {
admin $2a$14$Zkx19XLiW6VYouLHR5NmfOFU0z2GTNqq9qB6FY9gZKOOdOoKw6Uw.
}
handle /certificates {
certificate_info {
format json
include_chain true
}
}
respond "Certificate Monitor" 200
}
7.6 备份和恢复
7.6.1 数据备份策略
备份脚本
#!/bin/bash
# backup.sh - Caddy备份脚本
BACKUP_DIR="/backup/caddy"
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_NAME="caddy_backup_$DATE"
RETENTION_DAYS=30
# 创建备份目录
mkdir -p $BACKUP_DIR/$BACKUP_NAME
echo "Starting Caddy backup at $(date)"
# 备份配置文件
echo "Backing up configuration files..."
cp -r /etc/caddy $BACKUP_DIR/$BACKUP_NAME/
# 备份证书和数据
echo "Backing up certificates and data..."
cp -r /var/lib/caddy $BACKUP_DIR/$BACKUP_NAME/
# 备份日志(最近7天)
echo "Backing up recent logs..."
mkdir -p $BACKUP_DIR/$BACKUP_NAME/logs
find /var/log/caddy -name "*.log" -mtime -7 -exec cp {} $BACKUP_DIR/$BACKUP_NAME/logs/ \;
# 备份系统配置
echo "Backing up system configuration..."
mkdir -p $BACKUP_DIR/$BACKUP_NAME/system
cp /etc/systemd/system/caddy.service $BACKUP_DIR/$BACKUP_NAME/system/ 2>/dev/null || true
cp /etc/security/limits.d/caddy.conf $BACKUP_DIR/$BACKUP_NAME/system/ 2>/dev/null || true
cp /etc/sysctl.d/99-caddy.conf $BACKUP_DIR/$BACKUP_NAME/system/ 2>/dev/null || true
# 创建备份信息文件
echo "Creating backup information..."
cat > $BACKUP_DIR/$BACKUP_NAME/backup_info.txt << EOF
Backup Date: $(date)
Hostname: $(hostname)
Caddy Version: $(caddy version)
System: $(uname -a)
Backup Size: $(du -sh $BACKUP_DIR/$BACKUP_NAME | cut -f1)
EOF
# 压缩备份
echo "Compressing backup..."
cd $BACKUP_DIR
tar -czf $BACKUP_NAME.tar.gz $BACKUP_NAME
rm -rf $BACKUP_NAME
# 计算校验和
echo "Calculating checksum..."
sha256sum $BACKUP_NAME.tar.gz > $BACKUP_NAME.tar.gz.sha256
# 清理旧备份
echo "Cleaning up old backups..."
find $BACKUP_DIR -name "caddy_backup_*.tar.gz.sha256" -mtime +$RETENTION_DAYS -delete
echo "Backup completed: $BACKUP_NAME.tar.gz"
echo "Backup size: $(du -sh $BACKUP_DIR/$BACKUP_NAME.tar.gz | cut -f1)"
echo "Backup location: $BACKUP_DIR/$BACKUP_NAME.tar.gz"
# 可选:上传到远程存储
if [ "$REMOTE_BACKUP" = "true" ]; then
echo "Uploading to remote storage..."
# AWS S3
aws s3 cp $BACKUP_DIR/$BACKUP_NAME.tar.gz s3://backup-bucket/caddy/
aws s3 cp $BACKUP_DIR/$BACKUP_NAME.tar.gz.sha256 s3://backup-bucket/caddy/
# 或者使用rsync
# rsync -avz $BACKUP_DIR/$BACKUP_NAME.tar.gz backup-server:/backup/caddy/
fi
echo "Backup process completed at $(date)"
自动备份配置
# 添加到crontab
# crontab -e
# 每日凌晨2点备份
0 2 * * * /usr/local/bin/backup.sh >> /var/log/caddy/backup.log 2>&1
# 每周日凌晨1点完整备份
0 1 * * 0 /usr/local/bin/backup.sh --full >> /var/log/caddy/backup.log 2>&1
# 每月1号清理旧备份
0 3 1 * * /usr/local/bin/cleanup-backups.sh >> /var/log/caddy/backup.log 2>&1
7.6.2 恢复策略
恢复脚本
#!/bin/bash
# restore.sh - Caddy恢复脚本
BACKUP_FILE="$1"
RESTORE_DIR="/tmp/caddy_restore"
if [ -z "$BACKUP_FILE" ]; then
echo "Usage: $0 <backup_file.tar.gz>"
echo "Available backups:"
ls -la /backup/caddy/caddy_backup_*.tar.gz
exit 1
fi
if [ ! -f "$BACKUP_FILE" ]; then
echo "Backup file not found: $BACKUP_FILE"
exit 1
fi
echo "Starting Caddy restore from $BACKUP_FILE at $(date)"
# 验证备份文件
echo "Verifying backup integrity..."
if [ -f "$BACKUP_FILE.sha256" ]; then
if ! sha256sum -c "$BACKUP_FILE.sha256"; then
echo "Backup file integrity check failed!"
exit 1
fi
echo "Backup integrity verified."
else
echo "Warning: No checksum file found, skipping integrity check."
fi
# 停止Caddy服务
echo "Stopping Caddy service..."
systemctl stop caddy
# 备份当前配置
echo "Backing up current configuration..."
mkdir -p /backup/caddy/pre-restore
cp -r /etc/caddy /backup/caddy/pre-restore/caddy-config-$(date +%Y%m%d_%H%M%S)
cp -r /var/lib/caddy /backup/caddy/pre-restore/caddy-data-$(date +%Y%m%d_%H%M%S)
# 解压备份文件
echo "Extracting backup..."
mkdir -p $RESTORE_DIR
cd $RESTORE_DIR
tar -xzf "$BACKUP_FILE"
BACKUP_NAME=$(basename "$BACKUP_FILE" .tar.gz)
# 恢复配置文件
echo "Restoring configuration files..."
if [ -d "$RESTORE_DIR/$BACKUP_NAME/caddy" ]; then
rm -rf /etc/caddy/*
cp -r $RESTORE_DIR/$BACKUP_NAME/caddy/* /etc/caddy/
chown -R caddy:caddy /etc/caddy
chmod -R 644 /etc/caddy
chmod 755 /etc/caddy
fi
# 恢复数据文件
echo "Restoring data files..."
if [ -d "$RESTORE_DIR/$BACKUP_NAME/caddy" ]; then
rm -rf /var/lib/caddy/*
cp -r $RESTORE_DIR/$BACKUP_NAME/caddy/* /var/lib/caddy/
chown -R caddy:caddy /var/lib/caddy
fi
# 恢复系统配置
echo "Restoring system configuration..."
if [ -d "$RESTORE_DIR/$BACKUP_NAME/system" ]; then
cp $RESTORE_DIR/$BACKUP_NAME/system/caddy.service /etc/systemd/system/ 2>/dev/null || true
cp $RESTORE_DIR/$BACKUP_NAME/system/caddy.conf /etc/security/limits.d/ 2>/dev/null || true
cp $RESTORE_DIR/$BACKUP_NAME/system/99-caddy.conf /etc/sysctl.d/ 2>/dev/null || true
systemctl daemon-reload
fi
# 验证配置
echo "Validating configuration..."
if caddy validate --config /etc/caddy/Caddyfile; then
echo "Configuration validation successful."
else
echo "Configuration validation failed! Rolling back..."
# 回滚逻辑
systemctl start caddy
exit 1
fi
# 启动Caddy服务
echo "Starting Caddy service..."
systemctl start caddy
# 检查服务状态
sleep 5
if systemctl is-active --quiet caddy; then
echo "Caddy service started successfully."
else
echo "Failed to start Caddy service!"
systemctl status caddy
exit 1
fi
# 清理临时文件
echo "Cleaning up temporary files..."
rm -rf $RESTORE_DIR
echo "Restore completed successfully at $(date)"
echo "Please verify that all services are working correctly."
7.6.3 灾难恢复
灾难恢复计划
#!/bin/bash
# disaster-recovery.sh - 灾难恢复脚本
DR_MODE="$1"
BACKUP_SOURCE="$2"
case $DR_MODE in
"prepare")
echo "Preparing for disaster recovery..."
# 创建最新备份
/usr/local/bin/backup.sh --emergency
# 上传到多个位置
aws s3 sync /backup/caddy/ s3://dr-backup-bucket/caddy/
rsync -avz /backup/caddy/ dr-server:/backup/caddy/
# 创建恢复文档
cat > /backup/caddy/recovery-instructions.txt << EOF
Disaster Recovery Instructions
=============================
Date: $(date)
Server: $(hostname)
Caddy Version: $(caddy version)
Recovery Steps:
1. Install Caddy on new server
2. Run: disaster-recovery.sh restore <backup_source>
3. Verify DNS settings
4. Test all services
5. Update monitoring
Backup Locations:
- Local: /backup/caddy/
- S3: s3://dr-backup-bucket/caddy/
- Remote: dr-server:/backup/caddy/
Contacts:
- Primary Admin: admin@example.com
- Secondary Admin: backup-admin@example.com
- Emergency: +1-555-0123
EOF
;;
"restore")
echo "Starting disaster recovery restore..."
if [ -z "$BACKUP_SOURCE" ]; then
echo "Please specify backup source (local|s3|remote)"
exit 1
fi
case $BACKUP_SOURCE in
"s3")
echo "Downloading from S3..."
aws s3 sync s3://dr-backup-bucket/caddy/ /tmp/dr-restore/
LATEST_BACKUP=$(ls -t /tmp/dr-restore/caddy_backup_*.tar.gz | head -n 1)
;;
"remote")
echo "Downloading from remote server..."
rsync -avz dr-server:/backup/caddy/ /tmp/dr-restore/
LATEST_BACKUP=$(ls -t /tmp/dr-restore/caddy_backup_*.tar.gz | head -n 1)
;;
"local")
LATEST_BACKUP=$(ls -t /backup/caddy/caddy_backup_*.tar.gz | head -n 1)
;;
*)
echo "Unknown backup source: $BACKUP_SOURCE"
exit 1
;;
esac
if [ -z "$LATEST_BACKUP" ]; then
echo "No backup found!"
exit 1
fi
echo "Using backup: $LATEST_BACKUP"
# 安装Caddy(如果未安装)
if ! command -v caddy &> /dev/null; then
echo "Installing Caddy..."
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | sudo gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | sudo tee /etc/apt/sources.list.d/caddy-stable.list
apt update
apt install -y caddy
fi
# 创建用户和目录
useradd --system --home /var/lib/caddy --create-home --shell /bin/false caddy 2>/dev/null || true
mkdir -p /etc/caddy /var/lib/caddy /var/log/caddy
chown -R caddy:caddy /var/lib/caddy /var/log/caddy
# 恢复备份
/usr/local/bin/restore.sh "$LATEST_BACKUP"
# 更新DNS(如果需要)
echo "Please update DNS records to point to this server:"
echo "Current IP: $(curl -s ifconfig.me)"
# 验证服务
echo "Verifying services..."
sleep 10
if curl -f http://localhost/health > /dev/null 2>&1; then
echo "HTTP service is responding"
else
echo "Warning: HTTP service not responding"
fi
if curl -fk https://localhost/health > /dev/null 2>&1; then
echo "HTTPS service is responding"
else
echo "Warning: HTTPS service not responding"
fi
echo "Disaster recovery completed."
echo "Please verify all services and update monitoring systems."
;;
*)
echo "Usage: $0 {prepare|restore} [backup_source]"
echo " prepare: Create emergency backup and prepare for DR"
echo " restore: Restore from backup (backup_source: local|s3|remote)"
exit 1
;;
esac
7.7 故障排除
7.7.1 常见问题诊断
诊断脚本
#!/bin/bash
# diagnose.sh - Caddy故障诊断脚本
echo "Caddy Diagnostic Report"
echo "======================"
echo "Date: $(date)"
echo "Hostname: $(hostname)"
echo ""
# 系统信息
echo "System Information:"
echo "------------------"
echo "OS: $(cat /etc/os-release | grep PRETTY_NAME | cut -d'"' -f2)"
echo "Kernel: $(uname -r)"
echo "Uptime: $(uptime)"
echo "Load Average: $(cat /proc/loadavg)"
echo ""
# Caddy版本和状态
echo "Caddy Information:"
echo "-----------------"
if command -v caddy &> /dev/null; then
echo "Version: $(caddy version)"
else
echo "Caddy not found in PATH"
fi
echo "Service Status: $(systemctl is-active caddy)"
echo "Service Enabled: $(systemctl is-enabled caddy)"
echo ""
# 进程信息
echo "Process Information:"
echo "-------------------"
ps aux | grep caddy | grep -v grep
echo ""
# 端口监听
echo "Port Listening:"
echo "--------------"
netstat -tlnp | grep -E ':(80|443|2019)'
echo ""
# 内存使用
echo "Memory Usage:"
echo "------------"
free -h
echo ""
# 磁盘使用
echo "Disk Usage:"
echo "----------"
df -h | grep -E '(Filesystem|/dev/)'
echo ""
# 配置验证
echo "Configuration Validation:"
echo "------------------------"
if [ -f "/etc/caddy/Caddyfile" ]; then
caddy validate --config /etc/caddy/Caddyfile
else
echo "Caddyfile not found at /etc/caddy/Caddyfile"
fi
echo ""
# 证书状态
echo "Certificate Status:"
echo "------------------"
if [ -d "/var/lib/caddy/certificates" ]; then
find /var/lib/caddy/certificates -name "*.crt" -exec openssl x509 -in {} -noout -subject -dates \;
else
echo "Certificate directory not found"
fi
echo ""
# 日志错误
echo "Recent Errors (last 50 lines):"
echo "------------------------------"
if [ -f "/var/log/caddy/error.log" ]; then
tail -n 50 /var/log/caddy/error.log
else
journalctl -u caddy --no-pager -n 50
fi
echo ""
# 网络连接测试
echo "Network Connectivity:"
echo "--------------------"
echo "DNS Resolution:"
nslookup google.com
echo ""
echo "External Connectivity:"
curl -I --connect-timeout 5 http://google.com 2>/dev/null | head -n 1 || echo "Failed to connect"
echo ""
# 防火墙状态
echo "Firewall Status:"
echo "---------------"
if command -v ufw &> /dev/null; then
ufw status
elif command -v firewall-cmd &> /dev/null; then
firewall-cmd --list-all
else
echo "No firewall management tool found"
fi
echo ""
# 文件权限
echo "File Permissions:"
echo "----------------"
ls -la /etc/caddy/
ls -la /var/lib/caddy/
echo ""
echo "Diagnostic completed. Please review the output above."
7.7.2 性能问题排查
性能监控脚本
#!/bin/bash
# performance-monitor.sh - 性能监控脚本
MONITOR_DURATION=${1:-60} # 默认监控60秒
OUTPUT_FILE="/tmp/caddy-performance-$(date +%Y%m%d_%H%M%S).log"
echo "Starting Caddy performance monitoring for ${MONITOR_DURATION} seconds..."
echo "Output will be saved to: $OUTPUT_FILE"
{
echo "Caddy Performance Monitor Report"
echo "==============================="
echo "Start Time: $(date)"
echo "Duration: ${MONITOR_DURATION} seconds"
echo ""
# 获取Caddy进程ID
CADDY_PID=$(pgrep caddy)
if [ -z "$CADDY_PID" ]; then
echo "Error: Caddy process not found"
exit 1
fi
echo "Caddy PID: $CADDY_PID"
echo ""
# 监控循环
for i in $(seq 1 $MONITOR_DURATION); do
echo "=== Sample $i ($(date)) ==="
# CPU使用率
CPU_USAGE=$(ps -p $CADDY_PID -o %cpu --no-headers)
echo "CPU Usage: ${CPU_USAGE}%"
# 内存使用
MEMORY_INFO=$(ps -p $CADDY_PID -o rss,vsz --no-headers)
RSS=$(echo $MEMORY_INFO | awk '{print $1}')
VSZ=$(echo $MEMORY_INFO | awk '{print $2}')
echo "Memory RSS: $((RSS/1024)) MB"
echo "Memory VSZ: $((VSZ/1024)) MB"
# 文件描述符
FD_COUNT=$(ls /proc/$CADDY_PID/fd 2>/dev/null | wc -l)
echo "File Descriptors: $FD_COUNT"
# 网络连接
CONN_COUNT=$(netstat -an | grep -E ':(80|443)' | grep ESTABLISHED | wc -l)
echo "Active Connections: $CONN_COUNT"
# 负载平均值
LOAD_AVG=$(cat /proc/loadavg | awk '{print $1, $2, $3}')
echo "Load Average: $LOAD_AVG"
# 磁盘I/O
if [ -f "/proc/$CADDY_PID/io" ]; then
READ_BYTES=$(grep read_bytes /proc/$CADDY_PID/io | awk '{print $2}')
WRITE_BYTES=$(grep write_bytes /proc/$CADDY_PID/io | awk '{print $2}')
echo "Disk Read: $((READ_BYTES/1024/1024)) MB"
echo "Disk Write: $((WRITE_BYTES/1024/1024)) MB"
fi
echo ""
sleep 1
done
echo "Monitoring completed at $(date)"
} > $OUTPUT_FILE
echo "Performance monitoring completed. Report saved to: $OUTPUT_FILE"
# 生成摘要
echo ""
echo "Performance Summary:"
echo "=================="
echo "Average CPU Usage: $(grep "CPU Usage:" $OUTPUT_FILE | awk '{sum+=$3; count++} END {print sum/count "%"}')"
echo "Peak Memory RSS: $(grep "Memory RSS:" $OUTPUT_FILE | awk '{print $3}' | sort -n | tail -1) MB"
echo "Peak Connections: $(grep "Active Connections:" $OUTPUT_FILE | awk '{print $3}' | sort -n | tail -1)"
echo "Peak File Descriptors: $(grep "File Descriptors:" $OUTPUT_FILE | awk '{print $3}' | sort -n | tail -1)"
7.7.3 日志分析工具
日志分析脚本
#!/bin/bash
# log-analyzer.sh - 日志分析脚本
LOG_FILE="${1:-/var/log/caddy/access.log}"
TIME_RANGE="${2:-1h}" # 1h, 24h, 7d等
if [ ! -f "$LOG_FILE" ]; then
echo "Log file not found: $LOG_FILE"
exit 1
fi
echo "Caddy Log Analysis Report"
echo "========================"
echo "Log File: $LOG_FILE"
echo "Time Range: $TIME_RANGE"
echo "Analysis Time: $(date)"
echo ""
# 计算时间范围
case $TIME_RANGE in
"1h")
SINCE_TIME=$(date -d '1 hour ago' '+%Y-%m-%dT%H:%M:%S')
;;
"24h")
SINCE_TIME=$(date -d '24 hours ago' '+%Y-%m-%dT%H:%M:%S')
;;
"7d")
SINCE_TIME=$(date -d '7 days ago' '+%Y-%m-%dT%H:%M:%S')
;;
*)
SINCE_TIME=$(date -d '1 hour ago' '+%Y-%m-%dT%H:%M:%S')
;;
esac
# 过滤时间范围内的日志
TEMP_LOG="/tmp/filtered_log_$(date +%s).json"
awk -v since="$SINCE_TIME" '
{
if (match($0, /"timestamp":"([^"]+)"/, arr)) {
if (arr[1] >= since) {
print $0
}
}
}' "$LOG_FILE" > "$TEMP_LOG"
TOTAL_REQUESTS=$(wc -l < "$TEMP_LOG")
echo "Total Requests: $TOTAL_REQUESTS"
echo ""
if [ $TOTAL_REQUESTS -eq 0 ]; then
echo "No requests found in the specified time range."
rm -f "$TEMP_LOG"
exit 0
fi
# 状态码分析
echo "Status Code Distribution:"
echo "------------------------"
jq -r '.http.response.status' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr
echo ""
# 错误请求分析
echo "Error Requests (4xx/5xx):"
echo "-------------------------"
ERROR_COUNT=$(jq -r 'select(.http.response.status >= 400) | .http.response.status' "$TEMP_LOG" 2>/dev/null | wc -l)
echo "Total Errors: $ERROR_COUNT"
if [ $ERROR_COUNT -gt 0 ]; then
echo "Error Breakdown:"
jq -r 'select(.http.response.status >= 400) | .http.response.status' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr
fi
echo ""
# 响应时间分析
echo "Response Time Analysis:"
echo "----------------------"
jq -r '.http.response.duration' "$TEMP_LOG" 2>/dev/null | awk '
{
sum += $1
count++
if ($1 > max) max = $1
if (min == "" || $1 < min) min = $1
times[count] = $1
}
END {
if (count > 0) {
avg = sum / count
# 计算中位数
asort(times)
if (count % 2 == 1) {
median = times[(count + 1) / 2]
} else {
median = (times[count / 2] + times[count / 2 + 1]) / 2
}
printf "Average: %.3f seconds\n", avg
printf "Median: %.3f seconds\n", median
printf "Min: %.3f seconds\n", min
printf "Max: %.3f seconds\n", max
}
}'
echo ""
# 慢请求分析
echo "Slow Requests (>1s):"
echo "-------------------"
SLOW_COUNT=$(jq -r 'select(.http.response.duration > 1) | [.http.request.uri, .http.response.duration] | @tsv' "$TEMP_LOG" 2>/dev/null | wc -l)
echo "Total Slow Requests: $SLOW_COUNT"
if [ $SLOW_COUNT -gt 0 ]; then
echo "Top 10 Slowest Requests:"
jq -r 'select(.http.response.duration > 1) | [.http.request.uri, .http.response.duration] | @tsv' "$TEMP_LOG" 2>/dev/null | sort -k2 -nr | head -10
fi
echo ""
# 热门页面
echo "Top 10 Requested Pages:"
echo "----------------------"
jq -r '.http.request.uri' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr | head -10
echo ""
# IP地址分析
echo "Top 10 Client IPs:"
echo "------------------"
jq -r '.http.request.remote_ip' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr | head -10
echo ""
# 用户代理分析
echo "Top 10 User Agents:"
echo "------------------"
jq -r '.http.request.headers["User-Agent"][0]' "$TEMP_LOG" 2>/dev/null | sort | uniq -c | sort -nr | head -10
echo ""
# 流量分析
echo "Traffic Analysis:"
echo "-----------------"
TOTAL_BYTES=$(jq -r '.http.response.size' "$TEMP_LOG" 2>/dev/null | awk '{sum += $1} END {print sum}')
echo "Total Bytes Served: $(echo $TOTAL_BYTES | awk '{printf "%.2f MB", $1/1024/1024}')"
echo "Average Response Size: $(echo "$TOTAL_BYTES $TOTAL_REQUESTS" | awk '{printf "%.2f KB", $1/$2/1024}')"
echo ""
# 时间分布分析
echo "Hourly Request Distribution:"
echo "---------------------------"
jq -r '.timestamp' "$TEMP_LOG" 2>/dev/null | cut -c12-13 | sort | uniq -c | awk '{printf "%02d:00 - %s requests\n", $2, $1}'
echo ""
# 清理临时文件
rm -f "$TEMP_LOG"
echo "Log analysis completed."
本章总结
本章我们全面学习了Caddy的部署和运维:
- 生产环境部署:掌握了系统要求、安装方式和服务配置
- 配置管理:学习了配置文件组织、环境管理和版本控制
- 监控和日志:了解了日志配置、监控设置和分析方法
- 性能优化:掌握了系统级和应用级的性能优化技巧
- 安全加固:学习了系统安全、应用安全和证书安全
- 备份和恢复:了解了备份策略、恢复流程和灾难恢复
- 故障排除:掌握了问题诊断、性能排查和日志分析
通过本章的学习,你应该能够: - 在生产环境中安全可靠地部署Caddy - 建立完善的监控和日志体系 - 优化Caddy的性能和安全性 - 制定有效的备份和恢复策略 - 快速诊断和解决常见问题
练习题
基础练习
部署配置
- 在Ubuntu服务器上部署Caddy
- 配置systemd服务
- 设置基本的安全策略
监控设置
- 配置Prometheus监控
- 设置日志轮转
- 创建基本的告警规则
备份恢复
- 编写备份脚本
- 测试恢复流程
- 配置自动备份
进阶练习
性能优化
- 分析性能瓶颈
- 优化系统参数
- 实现多层缓存
安全加固
- 配置防火墙规则
- 实现入侵检测
- 加强证书安全
高可用部署
- 配置负载均衡
- 实现故障转移
- 设置健康检查
实战练习
运维自动化
- 编写部署脚本
- 实现配置管理
- 建立CI/CD流程
故障处理
- 模拟故障场景
- 练习故障诊断
- 优化恢复流程
容量规划
- 分析资源使用
- 预测容量需求
- 制定扩容计划
下一章我们将学习Caddy的实战案例,这将帮助你将所学知识应用到实际项目中。caddybackup*.tar.gz” -mtime +$RETENTION_DAYS -delete find $BACKUP_DIR -name “