1. 安全配置

1.1 应用安全配置

// SecurityConfig.java
// Spring Security 配置

package com.example.demo.config;

import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.security.config.annotation.web.builders.HttpSecurity;
import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity;
import org.springframework.security.config.http.SessionCreationPolicy;
import org.springframework.security.core.userdetails.User;
import org.springframework.security.core.userdetails.UserDetails;
import org.springframework.security.core.userdetails.UserDetailsService;
import org.springframework.security.crypto.bcrypt.BCryptPasswordEncoder;
import org.springframework.security.crypto.password.PasswordEncoder;
import org.springframework.security.provisioning.InMemoryUserDetailsManager;
import org.springframework.security.web.SecurityFilterChain;
import org.springframework.security.web.header.writers.ReferrerPolicyHeaderWriter;
import org.springframework.web.cors.CorsConfiguration;
import org.springframework.web.cors.CorsConfigurationSource;
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;

import java.util.Arrays;

@Configuration
@EnableWebSecurity
public class SecurityConfig {

    @Bean
    public SecurityFilterChain filterChain(HttpSecurity http) throws Exception {
        http
            // CSRF 保护
            .csrf(csrf -> csrf
                .ignoringRequestMatchers("/api/public/**")
                .csrfTokenRepository(CookieCsrfTokenRepository.withHttpOnlyFalse())
            )
            
            // 会话管理
            .sessionManagement(session -> session
                .sessionCreationPolicy(SessionCreationPolicy.STATELESS)
                .maximumSessions(1)
                .maxSessionsPreventsLogin(false)
            )
            
            // 授权配置
            .authorizeHttpRequests(authz -> authz
                .requestMatchers("/actuator/health", "/actuator/info").permitAll()
                .requestMatchers("/api/public/**").permitAll()
                .requestMatchers("/api/admin/**").hasRole("ADMIN")
                .requestMatchers("/api/**").hasAnyRole("USER", "ADMIN")
                .anyRequest().authenticated()
            )
            
            // HTTP 基本认证
            .httpBasic(basic -> basic
                .realmName("Spring Native Demo")
            )
            
            // 安全头配置
            .headers(headers -> headers
                .frameOptions().deny()
                .contentTypeOptions().and()
                .httpStrictTransportSecurity(hsts -> hsts
                    .maxAgeInSeconds(31536000)
                    .includeSubdomains(true)
                    .preload(true)
                )
                .referrerPolicy(ReferrerPolicyHeaderWriter.ReferrerPolicy.STRICT_ORIGIN_WHEN_CROSS_ORIGIN)
                .and()
                .cacheControl().and()
                .addHeaderWriter((request, response) -> {
                    response.setHeader("X-Content-Type-Options", "nosniff");
                    response.setHeader("X-Frame-Options", "DENY");
                    response.setHeader("X-XSS-Protection", "1; mode=block");
                    response.setHeader("Permissions-Policy", "geolocation=(), microphone=(), camera=()");
                })
            )
            
            // CORS 配置
            .cors(cors -> cors.configurationSource(corsConfigurationSource()));

        return http.build();
    }

    @Bean
    public CorsConfigurationSource corsConfigurationSource() {
        CorsConfiguration configuration = new CorsConfiguration();
        configuration.setAllowedOriginPatterns(Arrays.asList("https://*.example.com"));
        configuration.setAllowedMethods(Arrays.asList("GET", "POST", "PUT", "DELETE", "OPTIONS"));
        configuration.setAllowedHeaders(Arrays.asList("*"));
        configuration.setAllowCredentials(true);
        configuration.setMaxAge(3600L);
        
        UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource();
        source.registerCorsConfiguration("/api/**", configuration);
        return source;
    }

    @Bean
    public PasswordEncoder passwordEncoder() {
        return new BCryptPasswordEncoder(12);
    }

    @Bean
    public UserDetailsService userDetailsService() {
        UserDetails user = User.builder()
            .username("user")
            .password(passwordEncoder().encode("password"))
            .roles("USER")
            .build();

        UserDetails admin = User.builder()
            .username("admin")
            .password(passwordEncoder().encode("admin"))
            .roles("ADMIN")
            .build();

        return new InMemoryUserDetailsManager(user, admin);
    }
}

1.2 环境变量管理

# application-prod.yml
# 生产环境配置

spring:
  application:
    name: ${APP_NAME:spring-native-demo}
  
  profiles:
    active: prod
  
  datasource:
    url: ${DATABASE_URL:jdbc:postgresql://localhost:5432/demo}
    username: ${DATABASE_USERNAME:demo_user}
    password: ${DATABASE_PASSWORD}
    driver-class-name: org.postgresql.Driver
    hikari:
      maximum-pool-size: ${DB_POOL_SIZE:10}
      minimum-idle: ${DB_POOL_MIN_IDLE:2}
      connection-timeout: ${DB_CONNECTION_TIMEOUT:30000}
      idle-timeout: ${DB_IDLE_TIMEOUT:600000}
      max-lifetime: ${DB_MAX_LIFETIME:1800000}
      leak-detection-threshold: ${DB_LEAK_DETECTION:60000}
  
  jpa:
    hibernate:
      ddl-auto: validate
    show-sql: false
    properties:
      hibernate:
        dialect: org.hibernate.dialect.PostgreSQLDialect
        format_sql: false
        use_sql_comments: false
        jdbc:
          batch_size: 20
        order_inserts: true
        order_updates: true
        batch_versioned_data: true
  
  security:
    user:
      name: ${ADMIN_USERNAME:admin}
      password: ${ADMIN_PASSWORD}
      roles: ADMIN
  
  redis:
    host: ${REDIS_HOST:localhost}
    port: ${REDIS_PORT:6379}
    password: ${REDIS_PASSWORD:}
    timeout: ${REDIS_TIMEOUT:2000}
    lettuce:
      pool:
        max-active: ${REDIS_POOL_MAX_ACTIVE:8}
        max-idle: ${REDIS_POOL_MAX_IDLE:8}
        min-idle: ${REDIS_POOL_MIN_IDLE:0}
        max-wait: ${REDIS_POOL_MAX_WAIT:-1}

server:
  port: ${SERVER_PORT:8080}
  servlet:
    context-path: ${CONTEXT_PATH:/}
  compression:
    enabled: true
    mime-types: text/html,text/xml,text/plain,text/css,text/javascript,application/javascript,application/json
    min-response-size: 1024
  http2:
    enabled: true
  ssl:
    enabled: ${SSL_ENABLED:false}
    key-store: ${SSL_KEYSTORE_PATH:}
    key-store-password: ${SSL_KEYSTORE_PASSWORD:}
    key-store-type: ${SSL_KEYSTORE_TYPE:PKCS12}
    protocol: TLS
    enabled-protocols: TLSv1.2,TLSv1.3

management:
  endpoints:
    web:
      exposure:
        include: health,info,metrics,prometheus
      base-path: /actuator
  endpoint:
    health:
      show-details: when-authorized
      show-components: when-authorized
      probes:
        enabled: true
    metrics:
      enabled: true
    prometheus:
      enabled: true
  metrics:
    export:
      prometheus:
        enabled: true
        step: 30s
    distribution:
      percentiles-histogram:
        http.server.requests: true
      percentiles:
        http.server.requests: 0.5, 0.9, 0.95, 0.99
  health:
    diskspace:
      enabled: true
      threshold: 1GB
    db:
      enabled: true
    redis:
      enabled: true

logging:
  level:
    com.example.demo: ${LOG_LEVEL:INFO}
    org.springframework.security: WARN
    org.springframework.web: WARN
    org.hibernate: WARN
    org.postgresql: WARN
  pattern:
    console: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level [%X{traceId:-},%X{spanId:-}] %logger{36} - %msg%n"
    file: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level [%X{traceId:-},%X{spanId:-}] %logger{36} - %msg%n"
  file:
    name: ${LOG_FILE:/app/logs/application.log}
    max-size: ${LOG_MAX_SIZE:100MB}
    max-history: ${LOG_MAX_HISTORY:30}
    total-size-cap: ${LOG_TOTAL_SIZE:1GB}

# 应用自定义配置
app:
  security:
    jwt:
      secret: ${JWT_SECRET}
      expiration: ${JWT_EXPIRATION:86400}
    rate-limit:
      enabled: ${RATE_LIMIT_ENABLED:true}
      requests-per-minute: ${RATE_LIMIT_RPM:100}
  
  cache:
    enabled: ${CACHE_ENABLED:true}
    ttl: ${CACHE_TTL:3600}
    max-size: ${CACHE_MAX_SIZE:1000}
  
  monitoring:
    enabled: ${MONITORING_ENABLED:true}
    metrics-interval: ${METRICS_INTERVAL:30}
  
  feature-flags:
    new-api: ${FEATURE_NEW_API:false}
    enhanced-logging: ${FEATURE_ENHANCED_LOGGING:true}

1.3 密钥管理

#!/bin/bash
# secrets-management.sh
# 密钥管理脚本

set -e

# 配置变量
SECRETS_DIR="/etc/secrets"
VAULT_ADDR="https://vault.example.com"
VAULT_TOKEN_FILE="/var/run/secrets/vault-token"
APP_NAME="spring-native-demo"
ENVIRONMENT="prod"

# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

log_info() {
    echo -e "${BLUE}[INFO]${NC} $1"
}

log_success() {
    echo -e "${GREEN}[SUCCESS]${NC} $1"
}

log_warning() {
    echo -e "${YELLOW}[WARNING]${NC} $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

# 检查 Vault 连接
check_vault_connection() {
    log_info "检查 Vault 连接..."
    
    if ! command -v vault &> /dev/null; then
        log_error "Vault CLI 未安装"
        exit 1
    fi
    
    if [ ! -f "$VAULT_TOKEN_FILE" ]; then
        log_error "Vault token 文件不存在: $VAULT_TOKEN_FILE"
        exit 1
    fi
    
    export VAULT_ADDR="$VAULT_ADDR"
    export VAULT_TOKEN=$(cat "$VAULT_TOKEN_FILE")
    
    if ! vault auth -method=token &> /dev/null; then
        log_error "Vault 认证失败"
        exit 1
    fi
    
    log_success "Vault 连接成功"
}

# 从 Vault 获取密钥
get_secrets_from_vault() {
    log_info "从 Vault 获取密钥..."
    
    local secret_path="secret/data/$APP_NAME/$ENVIRONMENT"
    
    # 获取密钥数据
    local secrets_json=$(vault kv get -format=json "$secret_path" | jq -r '.data.data')
    
    if [ "$secrets_json" = "null" ]; then
        log_error "未找到密钥数据: $secret_path"
        exit 1
    fi
    
    # 创建密钥目录
    mkdir -p "$SECRETS_DIR"
    chmod 700 "$SECRETS_DIR"
    
    # 写入密钥文件
    echo "$secrets_json" | jq -r 'to_entries[] | "\(.key)=\(.value)"' > "$SECRETS_DIR/app.env"
    chmod 600 "$SECRETS_DIR/app.env"
    
    # 单独写入敏感密钥
    echo "$secrets_json" | jq -r '.database_password // ""' > "$SECRETS_DIR/database_password"
    echo "$secrets_json" | jq -r '.jwt_secret // ""' > "$SECRETS_DIR/jwt_secret"
    echo "$secrets_json" | jq -r '.admin_password // ""' > "$SECRETS_DIR/admin_password"
    echo "$secrets_json" | jq -r '.redis_password // ""' > "$SECRETS_DIR/redis_password"
    
    chmod 600 "$SECRETS_DIR"/*
    
    log_success "密钥获取完成"
}

# 生成随机密钥
generate_random_secrets() {
    log_info "生成随机密钥..."
    
    # JWT 密钥 (256位)
    JWT_SECRET=$(openssl rand -base64 32)
    
    # 数据库密码
    DB_PASSWORD=$(openssl rand -base64 24 | tr -d "=+/" | cut -c1-20)
    
    # 管理员密码
    ADMIN_PASSWORD=$(openssl rand -base64 24 | tr -d "=+/" | cut -c1-16)
    
    # Redis 密码
    REDIS_PASSWORD=$(openssl rand -base64 24 | tr -d "=+/" | cut -c1-20)
    
    # API 密钥
    API_KEY=$(openssl rand -hex 32)
    
    # 加密密钥
    ENCRYPTION_KEY=$(openssl rand -base64 32)
    
    log_success "随机密钥生成完成"
}

# 存储密钥到 Vault
store_secrets_to_vault() {
    log_info "存储密钥到 Vault..."
    
    local secret_path="secret/data/$APP_NAME/$ENVIRONMENT"
    
    # 创建密钥 JSON
    local secrets_json=$(cat << EOF
{
  "database_password": "$DB_PASSWORD",
  "jwt_secret": "$JWT_SECRET",
  "admin_password": "$ADMIN_PASSWORD",
  "redis_password": "$REDIS_PASSWORD",
  "api_key": "$API_KEY",
  "encryption_key": "$ENCRYPTION_KEY"
}
EOF
    )
    
    # 存储到 Vault
    echo "$secrets_json" | vault kv put "$secret_path" -
    
    log_success "密钥已存储到 Vault"
}

# 轮换密钥
rotate_secrets() {
    log_info "轮换密钥..."
    
    # 备份当前密钥
    local backup_path="secret/data/$APP_NAME/$ENVIRONMENT/backup-$(date +%Y%m%d-%H%M%S)"
    local current_secrets=$(vault kv get -format=json "secret/data/$APP_NAME/$ENVIRONMENT" | jq -r '.data.data')
    
    echo "$current_secrets" | vault kv put "$backup_path" -
    
    # 生成新密钥
    generate_random_secrets
    
    # 保留数据库密码(避免中断服务)
    DB_PASSWORD=$(echo "$current_secrets" | jq -r '.database_password')
    
    # 存储新密钥
    store_secrets_to_vault
    
    log_success "密钥轮换完成"
    log_warning "请重启应用以使用新密钥"
}

# 验证密钥
validate_secrets() {
    log_info "验证密钥..."
    
    local secret_path="secret/data/$APP_NAME/$ENVIRONMENT"
    local secrets_json=$(vault kv get -format=json "$secret_path" | jq -r '.data.data')
    
    # 检查必需的密钥
    local required_keys=("database_password" "jwt_secret" "admin_password")
    
    for key in "${required_keys[@]}"; do
        local value=$(echo "$secrets_json" | jq -r ".$key // \"\"")
        if [ -z "$value" ] || [ "$value" = "null" ]; then
            log_error "缺少必需的密钥: $key"
            exit 1
        fi
        
        # 检查密钥强度
        if [ ${#value} -lt 8 ]; then
            log_warning "密钥 $key 长度过短: ${#value} 字符"
        fi
    done
    
    log_success "密钥验证通过"
}

# 导出环境变量
export_environment_variables() {
    log_info "导出环境变量..."
    
    if [ -f "$SECRETS_DIR/app.env" ]; then
        # 导出到当前 shell
        set -a
        source "$SECRETS_DIR/app.env"
        set +a
        
        # 生成 systemd 环境文件
        cat > "$SECRETS_DIR/systemd.env" << EOF
# Spring Native Demo Environment Variables
# Generated on $(date)

DATABASE_PASSWORD=$(cat "$SECRETS_DIR/database_password")
JWT_SECRET=$(cat "$SECRETS_DIR/jwt_secret")
ADMIN_PASSWORD=$(cat "$SECRETS_DIR/admin_password")
REDIS_PASSWORD=$(cat "$SECRETS_DIR/redis_password")
EOF
        
        chmod 600 "$SECRETS_DIR/systemd.env"
        
        log_success "环境变量导出完成"
    else
        log_error "密钥文件不存在: $SECRETS_DIR/app.env"
        exit 1
    fi
}

# 清理密钥
cleanup_secrets() {
    log_info "清理本地密钥..."
    
    if [ -d "$SECRETS_DIR" ]; then
        rm -rf "$SECRETS_DIR"
        log_success "本地密钥已清理"
    fi
}

# 主函数
main() {
    case "$1" in
        "get")
            check_vault_connection
            get_secrets_from_vault
            export_environment_variables
            ;;
        "generate")
            generate_random_secrets
            check_vault_connection
            store_secrets_to_vault
            ;;
        "rotate")
            check_vault_connection
            rotate_secrets
            ;;
        "validate")
            check_vault_connection
            validate_secrets
            ;;
        "cleanup")
            cleanup_secrets
            ;;
        *)
            echo "用法: $0 {get|generate|rotate|validate|cleanup}"
            echo "  get      - 从 Vault 获取密钥"
            echo "  generate - 生成并存储新密钥"
            echo "  rotate   - 轮换现有密钥"
            echo "  validate - 验证密钥完整性"
            echo "  cleanup  - 清理本地密钥"
            exit 1
            ;;
    esac
}

# 错误处理
trap 'log_error "密钥管理过程中发生错误,退出码: $?"' ERR

# 执行主函数
main "$@"

恢复脚本

创建 scripts/restore.sh

#!/bin/bash

# 恢复脚本
# 用法: ./restore.sh [backup_date] [component]

set -euo pipefail

# 配置
APP_NAME="spring-native-app"
BACKUP_DIR="/backup"
RESTORE_DIR="/tmp/restore"
S3_BUCKET="${S3_BUCKET:-my-app-backups}"
ENCRYPTION_KEY="${ENCRYPTION_KEY:-}"

# 日志函数
log_info() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1" | tee -a "$RESTORE_DIR/restore.log"
}

log_success() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] [SUCCESS] $1" | tee -a "$RESTORE_DIR/restore.log"
}

log_warning() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] [WARNING] $1" | tee -a "$RESTORE_DIR/restore.log"
}

log_error() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] [ERROR] $1" | tee -a "$RESTORE_DIR/restore.log" >&2
}

# 检查环境
check_environment() {
    log_info "检查恢复环境..."
    
    # 创建恢复目录
    mkdir -p "$RESTORE_DIR"
    
    # 检查必要工具
    local required_tools=("psql" "redis-cli" "aws" "gpg" "jq")
    for tool in "${required_tools[@]}"; do
        if ! command -v "$tool" &> /dev/null; then
            log_error "缺少必要工具: $tool"
            exit 1
        fi
    done
    
    # 检查数据库连接
    if ! psql "$DATABASE_URL" -c "SELECT 1" &> /dev/null; then
        log_error "无法连接到数据库"
        exit 1
    fi
    
    # 检查 Redis 连接
    if ! redis-cli -u "$REDIS_URL" ping &> /dev/null; then
        log_error "无法连接到 Redis"
        exit 1
    fi
    
    log_success "环境检查完成"
}

# 下载备份文件
download_backup() {
    local backup_date="$1"
    local component="$2"
    
    log_info "下载备份文件: $backup_date - $component"
    
    local s3_prefix="$APP_NAME/${backup_date:0:4}/${backup_date:4:2}/${backup_date:6:2}"
    
    # 列出可用备份
    local backup_files
    backup_files=$(aws s3 ls "s3://$S3_BUCKET/$s3_prefix/" --recursive | grep "$component" | awk '{print $4}')
    
    if [ -z "$backup_files" ]; then
        log_error "未找到 $backup_date 的 $component 备份文件"
        exit 1
    fi
    
    # 下载备份文件
    echo "$backup_files" | while read -r s3_key; do
        local filename=$(basename "$s3_key")
        local local_file="$RESTORE_DIR/$filename"
        
        aws s3 cp "s3://$S3_BUCKET/$s3_key" "$local_file"
        
        if [ $? -eq 0 ]; then
            log_success "已下载: $filename"
        else
            log_error "下载失败: $filename"
            exit 1
        fi
    done
}

# 解密备份文件
decrypt_backup() {
    local encrypted_file="$1"
    
    if [[ "$encrypted_file" == *.gpg ]]; then
        log_info "解密备份文件: $(basename "$encrypted_file")"
        
        local decrypted_file="${encrypted_file%.gpg}"
        
        if [ -n "$ENCRYPTION_KEY" ]; then
            gpg --quiet --batch --yes --passphrase "$ENCRYPTION_KEY" \
                --decrypt "$encrypted_file" > "$decrypted_file"
            
            if [ $? -eq 0 ]; then
                log_success "解密完成: $(basename "$decrypted_file")"
                echo "$decrypted_file"
            else
                log_error "解密失败: $(basename "$encrypted_file")"
                exit 1
            fi
        else
            log_error "缺少解密密钥"
            exit 1
        fi
    else
        echo "$encrypted_file"
    fi
}

# 恢复数据库
restore_database() {
    local backup_date="$1"
    
    log_info "恢复数据库: $backup_date"
    
    # 下载数据库备份
    download_backup "$backup_date" "database"
    
    # 查找数据库备份文件
    local db_backup_file
    db_backup_file=$(find "$RESTORE_DIR" -name "database_${backup_date}*.sql*" | head -1)
    
    if [ -z "$db_backup_file" ]; then
        log_error "未找到数据库备份文件"
        exit 1
    fi
    
    # 解密(如果需要)
    db_backup_file=$(decrypt_backup "$db_backup_file")
    
    # 创建数据库备份(恢复前)
    local pre_restore_backup="$RESTORE_DIR/pre-restore-$(date +%Y%m%d-%H%M%S).sql"
    pg_dump "$DATABASE_URL" > "$pre_restore_backup"
    log_info "已创建恢复前备份: $(basename "$pre_restore_backup")"
    
    # 恢复数据库
    log_warning "开始恢复数据库,这将覆盖现有数据"
    
    # 终止现有连接
    psql "$DATABASE_URL" -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid();"
    
    # 恢复数据
    if psql "$DATABASE_URL" < "$db_backup_file"; then
        log_success "数据库恢复完成"
    else
        log_error "数据库恢复失败"
        
        # 尝试恢复到恢复前状态
        log_info "尝试回滚到恢复前状态..."
        psql "$DATABASE_URL" < "$pre_restore_backup"
        
        exit 1
    fi
}

# 恢复 Redis
restore_redis() {
    local backup_date="$1"
    
    log_info "恢复 Redis: $backup_date"
    
    # 下载 Redis 备份
    download_backup "$backup_date" "redis"
    
    # 查找 Redis 备份文件
    local redis_backup_file
    redis_backup_file=$(find "$RESTORE_DIR" -name "redis_${backup_date}*.rdb*" | head -1)
    
    if [ -z "$redis_backup_file" ]; then
        log_error "未找到 Redis 备份文件"
        exit 1
    fi
    
    # 解密(如果需要)
    redis_backup_file=$(decrypt_backup "$redis_backup_file")
    
    # 创建 Redis 备份(恢复前)
    redis-cli -u "$REDIS_URL" BGSAVE
    log_info "已创建 Redis 恢复前备份"
    
    # 停止 Redis 写入
    redis-cli -u "$REDIS_URL" CONFIG SET save ""
    
    # 清空 Redis
    redis-cli -u "$REDIS_URL" FLUSHALL
    
    # 恢复 Redis 数据
    if redis-cli -u "$REDIS_URL" --rdb "$redis_backup_file"; then
        log_success "Redis 恢复完成"
    else
        log_error "Redis 恢复失败"
        exit 1
    fi
    
    # 重新启用保存
    redis-cli -u "$REDIS_URL" CONFIG SET save "900 1 300 10 60 10000"
}

# 恢复配置文件
restore_configuration() {
    local backup_date="$1"
    
    log_info "恢复配置文件: $backup_date"
    
    # 下载配置备份
    download_backup "$backup_date" "config"
    
    # 查找配置备份文件
    local config_backup_file
    config_backup_file=$(find "$RESTORE_DIR" -name "config_${backup_date}*.tar.gz*" | head -1)
    
    if [ -z "$config_backup_file" ]; then
        log_error "未找到配置备份文件"
        exit 1
    fi
    
    # 解密(如果需要)
    config_backup_file=$(decrypt_backup "$config_backup_file")
    
    # 备份当前配置
    local current_config_backup="$RESTORE_DIR/current-config-$(date +%Y%m%d-%H%M%S).tar.gz"
    tar -czf "$current_config_backup" -C /app config/
    log_info "已备份当前配置: $(basename "$current_config_backup")"
    
    # 恢复配置文件
    if tar -xzf "$config_backup_file" -C /app; then
        log_success "配置文件恢复完成"
    else
        log_error "配置文件恢复失败"
        
        # 恢复当前配置
        tar -xzf "$current_config_backup" -C /app
        
        exit 1
    fi
}

# 验证恢复
verify_restore() {
    log_info "验证恢复结果..."
    
    # 检查数据库连接
    if psql "$DATABASE_URL" -c "SELECT COUNT(*) FROM users;" &> /dev/null; then
        local user_count
        user_count=$(psql "$DATABASE_URL" -t -c "SELECT COUNT(*) FROM users;" | xargs)
        log_success "数据库验证通过,用户数量: $user_count"
    else
        log_error "数据库验证失败"
        return 1
    fi
    
    # 检查 Redis 连接
    if redis-cli -u "$REDIS_URL" ping &> /dev/null; then
        local redis_keys
        redis_keys=$(redis-cli -u "$REDIS_URL" DBSIZE)
        log_success "Redis 验证通过,键数量: $redis_keys"
    else
        log_error "Redis 验证失败"
        return 1
    fi
    
    # 检查应用健康状态
    if curl -f "http://localhost:8080/actuator/health" &> /dev/null; then
        log_success "应用健康检查通过"
    else
        log_warning "应用健康检查失败,可能需要重启应用"
    fi
    
    log_success "恢复验证完成"
}

# 生成恢复报告
generate_restore_report() {
    local backup_date="$1"
    local components="$2"
    
    log_info "生成恢复报告..."
    
    local report_file="$RESTORE_DIR/restore-report-$(date +%Y%m%d-%H%M%S).json"
    
    # 收集恢复信息
    local user_count=0
    local redis_keys=0
    
    if psql "$DATABASE_URL" -c "SELECT 1" &> /dev/null; then
        user_count=$(psql "$DATABASE_URL" -t -c "SELECT COUNT(*) FROM users;" | xargs)
    fi
    
    if redis-cli -u "$REDIS_URL" ping &> /dev/null; then
        redis_keys=$(redis-cli -u "$REDIS_URL" DBSIZE)
    fi
    
    # 生成 JSON 报告
    cat > "$report_file" << EOF
{
    "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
    "application": "$APP_NAME",
    "restore_summary": {
        "backup_date": "$backup_date",
        "components_restored": "$components",
        "restore_directory": "$RESTORE_DIR",
        "restore_duration": "$(date '+%Y-%m-%d %H:%M:%S')"
    },
    "post_restore_status": {
        "database_user_count": $user_count,
        "redis_key_count": $redis_keys,
        "application_health": "$(curl -s http://localhost:8080/actuator/health | jq -r '.status // "UNKNOWN"')"
    },
    "backup_files_used": [
$(find "$RESTORE_DIR" -name "*${backup_date}*" -type f | while read -r file; do
    echo "        \"$(basename "$file")\""
done | sed '$!s/$/,/')
    ]
}
EOF
    
    log_success "恢复报告已生成: $report_file"
}

# 主函数
main() {
    local backup_date="${1:-$(date +%Y%m%d)}"
    local component="${2:-full}"
    
    log_info "开始恢复流程 - 备份日期: $backup_date, 组件: $component"
    
    case "$component" in
        "database")
            check_environment
            restore_database "$backup_date"
            verify_restore
            generate_restore_report "$backup_date" "database"
            ;;
        "redis")
            check_environment
            restore_redis "$backup_date"
            verify_restore
            generate_restore_report "$backup_date" "redis"
            ;;
        "config")
            check_environment
            restore_configuration "$backup_date"
            verify_restore
            generate_restore_report "$backup_date" "configuration"
            ;;
        "full")
            check_environment
            restore_database "$backup_date"
            restore_redis "$backup_date"
            restore_configuration "$backup_date"
            verify_restore
            generate_restore_report "$backup_date" "full"
            ;;
        *)
            echo "用法: $0 [backup_date] [component]"
            echo "  backup_date - 备份日期 (YYYYMMDD,默认今天)"
            echo "  component   - 恢复组件 (database|redis|config|full,默认 full)"
            echo ""
            echo "示例:"
            echo "  $0 20240115 database  # 恢复 2024-01-15 的数据库备份"
            echo "  $0 20240115 full      # 恢复 2024-01-15 的完整备份"
            exit 1
            ;;
    esac
    
    log_info "恢复流程完成 - $(date '+%Y-%m-%d %H:%M:%S')"
}

# 错误处理
trap 'log_error "恢复过程中发生错误,退出码: $?"' ERR

# 执行主函数
main "$@"

总结

本章介绍了 Spring Native 应用在生产环境中的最佳实践,包括:

核心要点

  1. 安全配置

    • 实施多层安全防护
    • 使用强密码策略和会话管理
    • 配置适当的 CORS 和安全头
    • 启用 CSRF 保护和 HTTPS
  2. 环境管理

    • 使用环境变量管理配置
    • 实施密钥轮换和安全存储
    • 分离开发、测试和生产环境
    • 使用配置管理工具
  3. 监控与日志

    • 实施全面的应用监控
    • 配置结构化日志记录
    • 设置告警和通知机制
    • 监控系统资源和性能指标
  4. 备份与恢复

    • 实施自动化备份策略
    • 定期测试恢复流程
    • 使用加密保护备份数据
    • 维护多个备份副本

最佳实践

  1. 部署策略

    • 使用蓝绿部署或滚动更新
    • 实施健康检查和就绪探针
    • 配置适当的资源限制
    • 使用容器编排平台
  2. 性能优化

    • 监控启动时间和内存使用
    • 优化 GraalVM 编译参数
    • 使用缓存减少数据库访问
    • 实施连接池和资源管理
  3. 故障处理

    • 实施断路器模式
    • 配置重试和超时机制
    • 使用优雅关闭
    • 准备灾难恢复计划
  4. 运维自动化

    • 使用基础设施即代码
    • 实施 CI/CD 流水线
    • 自动化测试和部署
    • 监控和告警自动化

下一步学习

  1. 高级主题

    • 微服务架构设计
    • 分布式追踪和监控
    • 服务网格集成
    • 云原生安全
  2. 工具集成

    • Kubernetes 运维
    • Prometheus 监控
    • ELK 日志分析
    • Grafana 可视化
  3. 性能调优

    • JVM 参数优化
    • 原生镜像调优
    • 数据库性能优化
    • 网络和 I/O 优化

通过遵循这些最佳实践,可以确保 Spring Native 应用在生产环境中稳定、安全、高效地运行。


## 2. 监控与日志

### 2.1 应用监控配置

```java
// MonitoringConfig.java
// 监控配置

package com.example.demo.config;

import io.micrometer.core.aop.TimedAspect;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import io.micrometer.core.instrument.config.MeterFilter;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.actuate.health.HealthIndicator;
import org.springframework.boot.actuate.health.Status;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.EnableAspectJAutoProxy;

import java.time.Duration;
import java.util.concurrent.TimeUnit;

@Configuration
@EnableAspectJAutoProxy
public class MonitoringConfig {

    @Bean
    public TimedAspect timedAspect(MeterRegistry registry) {
        return new TimedAspect(registry);
    }

    @Bean
    public MeterRegistryCustomizer<MeterRegistry> metricsCommonTags() {
        return registry -> {
            registry.config()
                .commonTags(
                    "application", "spring-native-demo",
                    "environment", System.getProperty("spring.profiles.active", "unknown"),
                    "version", getClass().getPackage().getImplementationVersion() != null ? 
                        getClass().getPackage().getImplementationVersion() : "unknown"
                )
                .meterFilter(MeterFilter.deny(id -> {
                    String uri = id.getTag("uri");
                    return uri != null && (uri.startsWith("/actuator") || uri.startsWith("/static"));
                }))
                .meterFilter(MeterFilter.maximumExpectedValue("http.server.requests", Duration.ofSeconds(10)))
                .meterFilter(MeterFilter.minimumExpectedValue("http.server.requests", Duration.ofMillis(1)));
        };
    }

    @Bean
    public HealthIndicator customHealthIndicator() {
        return () -> {
            // 自定义健康检查逻辑
            try {
                // 检查关键组件状态
                checkDatabaseConnection();
                checkRedisConnection();
                checkExternalServices();
                
                return Status.UP
                    .withDetail("database", "Connected")
                    .withDetail("redis", "Connected")
                    .withDetail("external-services", "Available")
                    .withDetail("startup-time", getStartupTime())
                    .withDetail("uptime", getUptime())
                    .build();
            } catch (Exception e) {
                return Status.DOWN
                    .withDetail("error", e.getMessage())
                    .withDetail("timestamp", System.currentTimeMillis())
                    .build();
            }
        };
    }

    private void checkDatabaseConnection() {
        // 数据库连接检查
    }

    private void checkRedisConnection() {
        // Redis 连接检查
    }

    private void checkExternalServices() {
        // 外部服务检查
    }

    private String getStartupTime() {
        return System.getProperty("app.startup.time", "unknown");
    }

    private String getUptime() {
        long uptime = System.currentTimeMillis() - Long.parseLong(
            System.getProperty("app.startup.timestamp", "0")
        );
        return Duration.ofMillis(uptime).toString();
    }
}

2.2 结构化日志配置

<!-- logback-spring.xml -->
<!-- Logback 配置 -->

<?xml version="1.0" encoding="UTF-8"?>
<configuration>
    <!-- 属性定义 -->
    <property name="LOG_FILE" value="${LOG_FILE:-/app/logs/application.log}"/>
    <property name="LOG_PATTERN" value="%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level [%X{traceId:-},%X{spanId:-}] %logger{36} - %msg%n"/>
    <property name="JSON_PATTERN" value='{"timestamp":"%d{yyyy-MM-dd HH:mm:ss.SSS}","level":"%level","thread":"%thread","logger":"%logger{36}","traceId":"%X{traceId:-}","spanId":"%X{spanId:-}","message":"%msg","exception":"%ex{full}"}%n'/>
    
    <!-- 控制台输出 -->
    <appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
        <encoder>
            <pattern>${LOG_PATTERN}</pattern>
            <charset>UTF-8</charset>
        </encoder>
    </appender>
    
    <!-- JSON 格式控制台输出(生产环境) -->
    <appender name="JSON_CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
        <encoder class="net.logstash.logback.encoder.LoggingEventCompositeJsonEncoder">
            <providers>
                <timestamp>
                    <timeZone>Asia/Shanghai</timeZone>
                </timestamp>
                <version/>
                <logLevel/>
                <message/>
                <mdc/>
                <arguments/>
                <stackTrace/>
                <pattern>
                    <pattern>
                        {
                            "application": "spring-native-demo",
                            "environment": "${spring.profiles.active:-unknown}",
                            "hostname": "${HOSTNAME:-unknown}",
                            "pid": "${PID:-unknown}"
                        }
                    </pattern>
                </pattern>
            </providers>
        </encoder>
    </appender>
    
    <!-- 文件输出 -->
    <appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <file>${LOG_FILE}</file>
        <encoder>
            <pattern>${LOG_PATTERN}</pattern>
            <charset>UTF-8</charset>
        </encoder>
        <rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
            <fileNamePattern>${LOG_FILE}.%d{yyyy-MM-dd}.%i.gz</fileNamePattern>
            <maxFileSize>100MB</maxFileSize>
            <maxHistory>30</maxHistory>
            <totalSizeCap>1GB</totalSizeCap>
        </rollingPolicy>
    </appender>
    
    <!-- JSON 格式文件输出 -->
    <appender name="JSON_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <file>${LOG_FILE}.json</file>
        <encoder class="net.logstash.logback.encoder.LoggingEventCompositeJsonEncoder">
            <providers>
                <timestamp>
                    <timeZone>Asia/Shanghai</timeZone>
                </timestamp>
                <version/>
                <logLevel/>
                <message/>
                <mdc/>
                <arguments/>
                <stackTrace/>
                <pattern>
                    <pattern>
                        {
                            "application": "spring-native-demo",
                            "environment": "${spring.profiles.active:-unknown}",
                            "hostname": "${HOSTNAME:-unknown}",
                            "pid": "${PID:-unknown}"
                        }
                    </pattern>
                </pattern>
            </providers>
        </encoder>
        <rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
            <fileNamePattern>${LOG_FILE}.json.%d{yyyy-MM-dd}.%i.gz</fileNamePattern>
            <maxFileSize>100MB</maxFileSize>
            <maxHistory>30</maxHistory>
            <totalSizeCap>1GB</totalSizeCap>
        </rollingPolicy>
    </appender>
    
    <!-- 错误日志单独输出 -->
    <appender name="ERROR_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <file>/app/logs/error.log</file>
        <filter class="ch.qos.logback.classic.filter.LevelFilter">
            <level>ERROR</level>
            <onMatch>ACCEPT</onMatch>
            <onMismatch>DENY</onMismatch>
        </filter>
        <encoder>
            <pattern>${LOG_PATTERN}</pattern>
            <charset>UTF-8</charset>
        </encoder>
        <rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
            <fileNamePattern>/app/logs/error.%d{yyyy-MM-dd}.%i.gz</fileNamePattern>
            <maxFileSize>50MB</maxFileSize>
            <maxHistory>60</maxHistory>
            <totalSizeCap>500MB</totalSizeCap>
        </rollingPolicy>
    </appender>
    
    <!-- 异步输出 -->
    <appender name="ASYNC_FILE" class="ch.qos.logback.classic.AsyncAppender">
        <appender-ref ref="FILE"/>
        <queueSize>1024</queueSize>
        <discardingThreshold>0</discardingThreshold>
        <includeCallerData>false</includeCallerData>
    </appender>
    
    <appender name="ASYNC_JSON_FILE" class="ch.qos.logback.classic.AsyncAppender">
        <appender-ref ref="JSON_FILE"/>
        <queueSize>1024</queueSize>
        <discardingThreshold>0</discardingThreshold>
        <includeCallerData>false</includeCallerData>
    </appender>
    
    <!-- 开发环境配置 -->
    <springProfile name="dev,test">
        <root level="INFO">
            <appender-ref ref="CONSOLE"/>
            <appender-ref ref="ASYNC_FILE"/>
        </root>
        
        <logger name="com.example.demo" level="DEBUG" additivity="false">
            <appender-ref ref="CONSOLE"/>
            <appender-ref ref="ASYNC_FILE"/>
        </logger>
    </springProfile>
    
    <!-- 生产环境配置 -->
    <springProfile name="prod">
        <root level="INFO">
            <appender-ref ref="JSON_CONSOLE"/>
            <appender-ref ref="ASYNC_JSON_FILE"/>
            <appender-ref ref="ERROR_FILE"/>
        </root>
        
        <logger name="com.example.demo" level="INFO" additivity="false">
            <appender-ref ref="JSON_CONSOLE"/>
            <appender-ref ref="ASYNC_JSON_FILE"/>
            <appender-ref ref="ERROR_FILE"/>
        </logger>
        
        <!-- 减少第三方库日志 -->
        <logger name="org.springframework" level="WARN"/>
        <logger name="org.hibernate" level="WARN"/>
        <logger name="org.apache" level="WARN"/>
        <logger name="com.zaxxer.hikari" level="WARN"/>
    </springProfile>
    
    <!-- 特定包的日志级别 -->
    <logger name="org.springframework.security" level="WARN"/>
    <logger name="org.springframework.web.filter.CommonsRequestLoggingFilter" level="DEBUG"/>
    <logger name="org.springframework.jdbc.core" level="DEBUG"/>
    
    <!-- SQL 日志(开发环境) -->
    <springProfile name="dev">
        <logger name="org.hibernate.SQL" level="DEBUG"/>
        <logger name="org.hibernate.type.descriptor.sql.BasicBinder" level="TRACE"/>
    </springProfile>
</configuration>

2.3 监控脚本

#!/bin/bash
# monitoring.sh
# 应用监控脚本

set -e

# 配置变量
APP_NAME="spring-native-demo"
APP_PORT="8080"
HEALTH_ENDPOINT="http://localhost:$APP_PORT/actuator/health"
METRICS_ENDPOINT="http://localhost:$APP_PORT/actuator/metrics"
PROMETHEUS_ENDPOINT="http://localhost:$APP_PORT/actuator/prometheus"
LOG_FILE="/app/logs/application.log"
ERROR_LOG_FILE="/app/logs/error.log"
MONITORING_LOG="/app/logs/monitoring.log"
ALERT_WEBHOOK="https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK"
EMAIL_RECIPIENTS="admin@example.com"

# 阈值配置
CPU_THRESHOLD=80
MEMORY_THRESHOLD=85
DISK_THRESHOLD=90
RESPONSE_TIME_THRESHOLD=5000
ERROR_RATE_THRESHOLD=5

# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

log_info() {
    echo -e "${BLUE}[INFO]${NC} $1" | tee -a "$MONITORING_LOG"
}

log_success() {
    echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$MONITORING_LOG"
}

log_warning() {
    echo -e "${YELLOW}[WARNING]${NC} $1" | tee -a "$MONITORING_LOG"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1" | tee -a "$MONITORING_LOG"
}

# 检查应用健康状态
check_health() {
    log_info "检查应用健康状态..."
    
    local health_response
    local http_code
    
    health_response=$(curl -s -w "%{http_code}" "$HEALTH_ENDPOINT" -o /tmp/health.json 2>/dev/null || echo "000")
    http_code=${health_response: -3}
    
    if [ "$http_code" = "200" ]; then
        local status=$(jq -r '.status' /tmp/health.json 2>/dev/null || echo "UNKNOWN")
        if [ "$status" = "UP" ]; then
            log_success "应用健康状态: UP"
            return 0
        else
            log_error "应用健康状态: $status"
            return 1
        fi
    else
        log_error "健康检查失败,HTTP 状态码: $http_code"
        return 1
    fi
}

# 检查系统资源
check_system_resources() {
    log_info "检查系统资源..."
    
    # CPU 使用率
    local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | awk -F'%' '{print $1}')
    cpu_usage=${cpu_usage%.*}
    
    if [ "$cpu_usage" -gt "$CPU_THRESHOLD" ]; then
        log_warning "CPU 使用率过高: ${cpu_usage}%"
        send_alert "CPU 使用率告警" "CPU 使用率: ${cpu_usage}%,超过阈值 ${CPU_THRESHOLD}%"
    else
        log_info "CPU 使用率: ${cpu_usage}%"
    fi
    
    # 内存使用率
    local memory_info=$(free | grep Mem)
    local total_memory=$(echo $memory_info | awk '{print $2}')
    local used_memory=$(echo $memory_info | awk '{print $3}')
    local memory_usage=$((used_memory * 100 / total_memory))
    
    if [ "$memory_usage" -gt "$MEMORY_THRESHOLD" ]; then
        log_warning "内存使用率过高: ${memory_usage}%"
        send_alert "内存使用率告警" "内存使用率: ${memory_usage}%,超过阈值 ${MEMORY_THRESHOLD}%"
    else
        log_info "内存使用率: ${memory_usage}%"
    fi
    
    # 磁盘使用率
    local disk_usage=$(df /app | tail -1 | awk '{print $5}' | sed 's/%//')
    
    if [ "$disk_usage" -gt "$DISK_THRESHOLD" ]; then
        log_warning "磁盘使用率过高: ${disk_usage}%"
        send_alert "磁盘使用率告警" "磁盘使用率: ${disk_usage}%,超过阈值 ${DISK_THRESHOLD}%"
    else
        log_info "磁盘使用率: ${disk_usage}%"
    fi
}

# 检查应用性能指标
check_performance_metrics() {
    log_info "检查应用性能指标..."
    
    # 获取 JVM 指标
    local jvm_memory_used=$(curl -s "$METRICS_ENDPOINT/jvm.memory.used" | jq -r '.measurements[0].value' 2>/dev/null || echo "0")
    local jvm_memory_max=$(curl -s "$METRICS_ENDPOINT/jvm.memory.max" | jq -r '.measurements[0].value' 2>/dev/null || echo "1")
    local jvm_memory_usage=$((jvm_memory_used * 100 / jvm_memory_max))
    
    log_info "JVM 内存使用率: ${jvm_memory_usage}%"
    
    # 获取 HTTP 请求指标
    local http_requests_total=$(curl -s "$METRICS_ENDPOINT/http.server.requests" | jq -r '.measurements[0].value' 2>/dev/null || echo "0")
    local http_requests_error=$(curl -s "$METRICS_ENDPOINT/http.server.requests?tag=status:5xx" | jq -r '.measurements[0].value' 2>/dev/null || echo "0")
    
    if [ "$http_requests_total" -gt 0 ]; then
        local error_rate=$((http_requests_error * 100 / http_requests_total))
        
        if [ "$error_rate" -gt "$ERROR_RATE_THRESHOLD" ]; then
            log_warning "错误率过高: ${error_rate}%"
            send_alert "错误率告警" "错误率: ${error_rate}%,超过阈值 ${ERROR_RATE_THRESHOLD}%"
        else
            log_info "错误率: ${error_rate}%"
        fi
    fi
    
    # 获取响应时间
    local response_time=$(curl -s "$METRICS_ENDPOINT/http.server.requests" | jq -r '.measurements[] | select(.statistic=="MEAN") | .value' 2>/dev/null || echo "0")
    response_time_ms=$(echo "$response_time * 1000" | bc -l | cut -d. -f1)
    
    if [ "$response_time_ms" -gt "$RESPONSE_TIME_THRESHOLD" ]; then
        log_warning "响应时间过长: ${response_time_ms}ms"
        send_alert "响应时间告警" "平均响应时间: ${response_time_ms}ms,超过阈值 ${RESPONSE_TIME_THRESHOLD}ms"
    else
        log_info "平均响应时间: ${response_time_ms}ms"
    fi
}

# 检查日志错误
check_log_errors() {
    log_info "检查日志错误..."
    
    if [ -f "$ERROR_LOG_FILE" ]; then
        # 检查最近5分钟的错误日志
        local recent_errors=$(find "$ERROR_LOG_FILE" -mmin -5 -exec wc -l {} \; 2>/dev/null | awk '{sum+=$1} END {print sum+0}')
        
        if [ "$recent_errors" -gt 10 ]; then
            log_warning "最近5分钟错误日志数量: $recent_errors"
            
            # 获取最新的错误信息
            local latest_errors=$(tail -n 5 "$ERROR_LOG_FILE")
            send_alert "错误日志告警" "最近5分钟错误数量: $recent_errors\n\n最新错误:\n$latest_errors"
        else
            log_info "最近5分钟错误日志数量: $recent_errors"
        fi
    fi
    
    # 检查应用日志中的关键错误
    if [ -f "$LOG_FILE" ]; then
        local critical_errors=$(tail -n 1000 "$LOG_FILE" | grep -i "OutOfMemoryError\|StackOverflowError\|SQLException\|ConnectionException" | wc -l)
        
        if [ "$critical_errors" -gt 0 ]; then
            log_warning "发现 $critical_errors 个关键错误"
            local error_details=$(tail -n 1000 "$LOG_FILE" | grep -i "OutOfMemoryError\|StackOverflowError\|SQLException\|ConnectionException" | tail -n 3)
            send_alert "关键错误告警" "发现 $critical_errors 个关键错误\n\n错误详情:\n$error_details"
        fi
    fi
}

# 发送告警
send_alert() {
    local title="$1"
    local message="$2"
    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    
    log_warning "发送告警: $title"
    
    # 发送到 Slack
    if [ -n "$ALERT_WEBHOOK" ]; then
        local slack_payload=$(cat << EOF
{
    "text": "🚨 $title",
    "attachments": [
        {
            "color": "danger",
            "fields": [
                {
                    "title": "应用",
                    "value": "$APP_NAME",
                    "short": true
                },
                {
                    "title": "时间",
                    "value": "$timestamp",
                    "short": true
                },
                {
                    "title": "详情",
                    "value": "$message",
                    "short": false
                }
            ]
        }
    ]
}
EOF
        )
        
        curl -X POST -H 'Content-type: application/json' \
            --data "$slack_payload" \
            "$ALERT_WEBHOOK" &> /dev/null || true
    fi
    
    # 发送邮件
    if [ -n "$EMAIL_RECIPIENTS" ] && command -v mail &> /dev/null; then
        echo "$message" | mail -s "[$APP_NAME] $title" "$EMAIL_RECIPIENTS" || true
    fi
}

# 生成监控报告
generate_monitoring_report() {
    log_info "生成监控报告..."
    
    local report_file="/app/logs/monitoring-report-$(date +%Y%m%d-%H%M%S).json"
    
    # 获取系统信息
    local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | awk -F'%' '{print $1}')
    local memory_info=$(free | grep Mem)
    local total_memory=$(echo $memory_info | awk '{print $2}')
    local used_memory=$(echo $memory_info | awk '{print $3}')
    local memory_usage=$((used_memory * 100 / total_memory))
    local disk_usage=$(df /app | tail -1 | awk '{print $5}' | sed 's/%//')
    
    # 获取应用指标
    local health_status="UNKNOWN"
    if check_health &> /dev/null; then
        health_status="UP"
    else
        health_status="DOWN"
    fi
    
    # 生成 JSON 报告
    cat > "$report_file" << EOF
{
    "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
    "application": "$APP_NAME",
    "health_status": "$health_status",
    "system_metrics": {
        "cpu_usage_percent": $cpu_usage,
        "memory_usage_percent": $memory_usage,
        "disk_usage_percent": $disk_usage
    },
    "application_metrics": {
        "port": $APP_PORT,
        "endpoints": {
            "health": "$HEALTH_ENDPOINT",
            "metrics": "$METRICS_ENDPOINT",
            "prometheus": "$PROMETHEUS_ENDPOINT"
        }
    },
    "log_files": {
        "application_log": "$LOG_FILE",
        "error_log": "$ERROR_LOG_FILE",
        "monitoring_log": "$MONITORING_LOG"
    }
}
EOF
    
    log_success "监控报告已生成: $report_file"
}

# 主函数
main() {
    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    log_info "开始监控检查 - $timestamp"
    
    # 创建日志目录
    mkdir -p "$(dirname "$MONITORING_LOG")"
    
    case "$1" in
        "health")
            check_health
            ;;
        "resources")
            check_system_resources
            ;;
        "performance")
            check_performance_metrics
            ;;
        "logs")
            check_log_errors
            ;;
        "report")
            generate_monitoring_report
            ;;
        "all"|"")
            check_health
            check_system_resources
            check_performance_metrics
            check_log_errors
            generate_monitoring_report
            ;;
        *)
            echo "用法: $0 {health|resources|performance|logs|report|all}"
            echo "  health      - 检查应用健康状态"
            echo "  resources   - 检查系统资源"
            echo "  performance - 检查应用性能指标"
            echo "  logs        - 检查日志错误"
            echo "  report      - 生成监控报告"
            echo "  all         - 执行所有检查(默认)"
            exit 1
            ;;
    esac
    
    log_info "监控检查完成 - $(date '+%Y-%m-%d %H:%M:%S')"
}

# 错误处理
trap 'log_error "监控过程中发生错误,退出码: $?"' ERR

# 执行主函数
main "$@"

3. 备份与恢复

3.1 数据备份策略

”`bash #!/bin/bash

backup.sh

数据备份脚本

set -e

配置变量

APP_NAME=“spring-native-demo” BACKUP_DIR=“/backup” S3_BUCKET=“your-backup-bucket” DATABASE_URL=“${DATABASE_URL:-jdbc:postgresql://localhost:5432/demo}” DATABASE_USER=“${DATABASE_USERNAME:-demo_user}” DATABASE_PASSWORD=“${DATABASE_PASSWORD}” REDIS_HOST=“${REDIS_HOST:-localhost}” REDIS_PORT=“${REDIS_PORT:-6379}” REDIS_PASSWORD=“${REDIS_PASSWORD}” RETENTION_DAYS=30 ENCRYPTION_KEY=“${BACKUP_ENCRYPTION_KEY}”

颜色输出

RED=‘\033[0;31m’ GREEN=‘\033[0;32m’ YELLOW=‘\033[1;33m’ BLUE=‘\033[0;34m’ NC=‘\033[0m’

log_info() { echo -e “${BLUE}[INFO]${NC} $1” }

log_success() { echo -e “${GREEN}[SUCCESS]${NC} $1” }

log_warning() { echo -e “${YELLOW}[WARNING]${NC} $1” }

log_error() { echo -e “${RED}[ERROR]${NC} $1” }

检查环境

check_environment() { log_info “检查备份环境…”

# 检查必需的工具
local required_tools=("pg_dump" "redis-cli" "aws" "gpg" "tar" "gzip")

for tool in "${required_tools[@]}"; do
    if ! command -v "$tool" &> /dev/null; then
        log_error "缺少必需的工具: $tool"
        exit 1
    fi
done

# 检查备份目录
if [ ! -d "$BACKUP_DIR" ]; then
    mkdir -p "$BACKUP_DIR"
    log_info "创建备份目录: $BACKUP_DIR"
fi

# 检查加密密钥
if [ -z "$ENCRYPTION_KEY" ]; then
    log_warning "未设置加密密钥,备份将不加密"
fi

log_success "环境检查通过"

}

备份数据库

backup_database() { log_info “备份数据库…”

local timestamp=$(date +%Y%m%d_%H%M%S)
local logs_backup="$BACKUP_DIR/logs_${timestamp}.tar.gz"

# 创建日志备份
if [ -d "/app/logs" ]; then
    tar -czf "$logs_backup" \
        -C /app \
        --exclude='*.tmp' \
        --exclude='*.lock' \
        logs/

    # 加密日志备份
    if [ -n "$ENCRYPTION_KEY" ]; then
        gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
            --passphrase "$ENCRYPTION_KEY" \
            --output "$logs_backup.gpg" \
            "$logs_backup"

        rm "$logs_backup"

        log_success "日志备份完成(已加密): $logs_backup.gpg"
    else
        log_success "日志备份完成: $logs_backup"
    fi
else
    log_warning "日志目录不存在,跳过日志备份"
fi

}

上传到云存储

upload_to_cloud() { log_info “上传备份到云存储…”

local backup_files=("$BACKUP_DIR"/*$(date +%Y%m%d)*)

for file in "${backup_files[@]}"; do
    if [ -f "$file" ]; then
        local filename=$(basename "$file")
        local s3_key="$APP_NAME/$(date +%Y/%m/%d)/$filename"

        aws s3 cp "$file" "s3://$S3_BUCKET/$s3_key" \
            --storage-class STANDARD_IA \
            --server-side-encryption AES256

        if [ $? -eq 0 ]; then
            log_success "已上传: $filename -> s3://$S3_BUCKET/$s3_key"
        else
            log_error "上传失败: $filename"
        fi
    fi
done

}

清理旧备份

cleanup_old_backups() { log_info “清理旧备份…”

# 清理本地旧备份
find "$BACKUP_DIR" -name "*" -type f -mtime +$RETENTION_DAYS -delete

# 清理云存储旧备份
local cutoff_date=$(date -d "$RETENTION_DAYS days ago" +%Y-%m-%d)

aws s3api list-objects-v2 \
    --bucket "$S3_BUCKET" \
    --prefix "$APP_NAME/" \
    --query "Contents[?LastModified<='$cutoff_date'].Key" \
    --output text | \
while read -r key; do
    if [ -n "$key" ]; then
        aws s3 rm "s3://$S3_BUCKET/$key"
        log_info "已删除旧备份: $key"
    fi
done

log_success "旧备份清理完成"

}

验证备份

verify_backup() { log_info “验证备份完整性…”

local backup_files=("$BACKUP_DIR"/*$(date +%Y%m%d)*)
local verification_failed=false

for file in "${backup_files[@]}"; do
    if [ -f "$file" ]; then
        local filename=$(basename "$file")

        # 检查文件大小
        local file_size=$(stat -c%s "$file")
        if [ "$file_size" -eq 0 ]; then
            log_error "备份文件为空: $filename"
            verification_failed=true
            continue
        fi

        # 验证加密文件
        if [[ "$filename" == *.gpg ]]; then
            if [ -n "$ENCRYPTION_KEY" ]; then
                if gpg --quiet --batch --yes --passphrase "$ENCRYPTION_KEY" \
                    --decrypt "$file" > /dev/null 2>&1; then
                    log_success "加密文件验证通过: $filename"
                else
                    log_error "加密文件验证失败: $filename"
                    verification_failed=true
                fi
            fi
        fi

        # 验证压缩文件
        if [[ "$filename" == *.gz ]] && [[ "$filename" != *.gpg ]]; then
            if gzip -t "$file" 2>/dev/null; then
                log_success "压缩文件验证通过: $filename"
            else
                log_error "压缩文件验证失败: $filename"
                verification_failed=true
            fi
        fi
    fi
done

if [ "$verification_failed" = true ]; then
    log_error "备份验证失败"
    exit 1
else
    log_success "所有备份文件验证通过"
fi

}

生成备份报告

generate_backup_report() { log_info “生成备份报告…”

local report_file="$BACKUP_DIR/backup-report-$(date +%Y%m%d-%H%M%S).json"
local backup_files=("$BACKUP_DIR"/*$(date +%Y%m%d)*)

# 统计备份信息
local total_files=0
local total_size=0
local file_list="[]"

for file in "${backup_files[@]}"; do
    if [ -f "$file" ]; then
        local filename=$(basename "$file")
        local file_size=$(stat -c%s "$file")
        local file_hash=$(sha256sum "$file" | cut -d' ' -f1)

        total_files=$((total_files + 1))
        total_size=$((total_size + file_size))

        file_list=$(echo "$file_list" | jq ". + [{\"name\": \"$filename\", \"size\": $file_size, \"hash\": \"$file_hash\"}]")
    fi
done

# 生成 JSON 报告
cat > "$report_file" << EOF

{ “timestamp”: “$(date -u +%Y-%m-%dT%H:%M:%SZ)”, “application”: “$APP_NAME”, “backup_summary”: { “total_files”: $total_files, “total_size_bytes”: $total_size, “total_size_human”: “$(numfmt –to=iec $total_size)”, “backup_directory”: “$BACKUP_DIR”, “s3_bucket”: “$S3_BUCKET”, “retention_days”: $RETENTION_DAYS, “encryption_enabled”: $([ -n “$ENCRYPTION_KEY” ] && echo “true” || echo “false”) }, “backup_files”: $file_list, “backup_types”: { “database”: “PostgreSQL dump (custom + SQL)”, “redis”: “RDB snapshot”, “configuration”: “Application config files”, “logs”: “Application log files” } } EOF

log_success "备份报告已生成: $report_file"

}

主函数

main() { local timestamp=$(date ‘+%Y-%m-%d %H:%M:%S’) log_info “开始备份流程 - $timestamp”

case "$1" in
    "database")
        check_environment
        backup_database
        verify_backup
        ;;
    "redis")
        check_environment
        backup_redis
        verify_backup
        ;;
    "config")
        check_environment
        backup_configuration
        verify_backup
        ;;
    "logs")
        check_environment
        backup_logs
        verify_backup
        ;;
    "upload")
        upload_to_cloud
        ;;
    "cleanup")
        cleanup_old_backups
        ;;
    "verify")
        verify_backup
        ;;
    "report")
        generate_backup_report
        ;;
    "full"|"")
        check_environment
        backup_database
        backup_redis
        backup_configuration
        backup_logs
        verify_backup
        upload_to_cloud
        cleanup_old_backups
        generate_backup_report
        ;;
    *)
        echo "用法: $0 {database|redis|config|logs|upload|cleanup|verify|report|full}"
        echo "  database - 备份数据库"
        echo "  redis    - 备份 Redis"
        echo "  config   - 备份配置文件"
        echo "  logs     - 备份日志文件"
        echo "  upload   - 上传到云存储"
        echo "  cleanup  - 清理旧备份"
        echo "  verify   - 验证备份完整性"
        echo "  report   - 生成备份报告"
        echo "  full     - 执行完整备份(默认)"
        exit 1
        ;;
esac

log_info "备份流程完成 - $(date '+%Y-%m-%d %H:%M:%S')"

}

错误处理

trap ‘log_error “备份过程中发生错误,退出码: $?”’ ERR

执行主函数

main “$@” local backup_file=“$BACKUPDIR/database${timestamp}.sql”

# 从数据库 URL 解析连接信息
local db_host=$(echo "$DATABASE_URL" | sed -n 's|.*://[^@]*@\([^:]*\):.*|\1|p')
local db_port=$(echo "$DATABASE_URL" | sed -n 's|.*://[^@]*@[^:]*:\([0-9]*\)/.*|\1|p')
local db_name=$(echo "$DATABASE_URL" | sed -n 's|.*/\([^?]*\).*|\1|p')

# 设置环境变量
export PGPASSWORD="$DATABASE_PASSWORD"

# 执行备份
pg_dump -h "$db_host" -p "$db_port" -U "$DATABASE_USER" -d "$db_name" \
    --verbose --clean --if-exists --create \
    --format=custom --compress=9 \
    --file="$backup_file.custom"

# 生成 SQL 格式备份
pg_dump -h "$db_host" -p "$db_port" -U "$DATABASE_USER" -d "$db_name" \
    --verbose --clean --if-exists --create \
    --format=plain \
    --file="$backup_file"

# 压缩 SQL 文件
gzip "$backup_file"

# 加密备份文件
if [ -n "$ENCRYPTION_KEY" ]; then
    gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
        --passphrase "$ENCRYPTION_KEY" \
        --output "$backup_file.custom.gpg" \
        "$backup_file.custom"

    gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
        --passphrase "$ENCRYPTION_KEY" \
        --output "$backup_file.gz.gpg" \
        "$backup_file.gz"

    # 删除未加密文件
    rm "$backup_file.custom" "$backup_file.gz"

    log_success "数据库备份完成(已加密): $backup_file.custom.gpg, $backup_file.gz.gpg"
else
    log_success "数据库备份完成: $backup_file.custom, $backup_file.gz"
fi

unset PGPASSWORD

}

备份 Redis

backup_redis() { log_info “备份 Redis…”

local timestamp=$(date +%Y%m%d_%H%M%S)
local backup_file="$BACKUP_DIR/redis_${timestamp}.rdb"

# 执行 Redis 备份
if [ -n "$REDIS_PASSWORD" ]; then
    redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" -a "$REDIS_PASSWORD" \
        --rdb "$backup_file"
else
    redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" \
        --rdb "$backup_file"
fi

# 压缩备份文件
gzip "$backup_file"

# 加密备份文件
if [ -n "$ENCRYPTION_KEY" ]; then
    gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
        --passphrase "$ENCRYPTION_KEY" \
        --output "$backup_file.gz.gpg" \
        "$backup_file.gz"

    rm "$backup_file.gz"

    log_success "Redis 备份完成(已加密): $backup_file.gz.gpg"
else
    log_success "Redis 备份完成: $backup_file.gz"
fi

}

备份应用配置

backup_configuration() { log_info “备份应用配置…”

local timestamp=$(date +%Y%m%d_%H%M%S)
local config_backup="$BACKUP_DIR/config_${timestamp}.tar.gz"

# 创建配置备份
tar -czf "$config_backup" \
    -C / \
    --exclude='*.log' \
    --exclude='*.tmp' \
    --exclude='*.pid' \
    app/config \
    etc/systemd/system/${APP_NAME}.service \
    etc/nginx/sites-available/${APP_NAME} \
    2>/dev/null || true

# 加密配置备份
if [ -n "$ENCRYPTION_KEY" ]; then
    gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
        --passphrase "$ENCRYPTION_KEY" \
        --output "$config_backup.gpg" \
        "$config_backup"

    rm "$config_backup"

    log_success "配置备份完成(已加密): $config_backup.gpg"
else
    log_success "配置备份完成: $config_backup"
fi

}

备份日志文件

backup_logs() { log_info “备份日志文件…”

local timestamp=$(date +%Y%m%d_%H%M%S)