1. 安全配置
1.1 应用安全配置
// SecurityConfig.java
// Spring Security 配置
package com.example.demo.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.security.config.annotation.web.builders.HttpSecurity;
import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity;
import org.springframework.security.config.http.SessionCreationPolicy;
import org.springframework.security.core.userdetails.User;
import org.springframework.security.core.userdetails.UserDetails;
import org.springframework.security.core.userdetails.UserDetailsService;
import org.springframework.security.crypto.bcrypt.BCryptPasswordEncoder;
import org.springframework.security.crypto.password.PasswordEncoder;
import org.springframework.security.provisioning.InMemoryUserDetailsManager;
import org.springframework.security.web.SecurityFilterChain;
import org.springframework.security.web.header.writers.ReferrerPolicyHeaderWriter;
import org.springframework.web.cors.CorsConfiguration;
import org.springframework.web.cors.CorsConfigurationSource;
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
import java.util.Arrays;
@Configuration
@EnableWebSecurity
public class SecurityConfig {
@Bean
public SecurityFilterChain filterChain(HttpSecurity http) throws Exception {
http
// CSRF 保护
.csrf(csrf -> csrf
.ignoringRequestMatchers("/api/public/**")
.csrfTokenRepository(CookieCsrfTokenRepository.withHttpOnlyFalse())
)
// 会话管理
.sessionManagement(session -> session
.sessionCreationPolicy(SessionCreationPolicy.STATELESS)
.maximumSessions(1)
.maxSessionsPreventsLogin(false)
)
// 授权配置
.authorizeHttpRequests(authz -> authz
.requestMatchers("/actuator/health", "/actuator/info").permitAll()
.requestMatchers("/api/public/**").permitAll()
.requestMatchers("/api/admin/**").hasRole("ADMIN")
.requestMatchers("/api/**").hasAnyRole("USER", "ADMIN")
.anyRequest().authenticated()
)
// HTTP 基本认证
.httpBasic(basic -> basic
.realmName("Spring Native Demo")
)
// 安全头配置
.headers(headers -> headers
.frameOptions().deny()
.contentTypeOptions().and()
.httpStrictTransportSecurity(hsts -> hsts
.maxAgeInSeconds(31536000)
.includeSubdomains(true)
.preload(true)
)
.referrerPolicy(ReferrerPolicyHeaderWriter.ReferrerPolicy.STRICT_ORIGIN_WHEN_CROSS_ORIGIN)
.and()
.cacheControl().and()
.addHeaderWriter((request, response) -> {
response.setHeader("X-Content-Type-Options", "nosniff");
response.setHeader("X-Frame-Options", "DENY");
response.setHeader("X-XSS-Protection", "1; mode=block");
response.setHeader("Permissions-Policy", "geolocation=(), microphone=(), camera=()");
})
)
// CORS 配置
.cors(cors -> cors.configurationSource(corsConfigurationSource()));
return http.build();
}
@Bean
public CorsConfigurationSource corsConfigurationSource() {
CorsConfiguration configuration = new CorsConfiguration();
configuration.setAllowedOriginPatterns(Arrays.asList("https://*.example.com"));
configuration.setAllowedMethods(Arrays.asList("GET", "POST", "PUT", "DELETE", "OPTIONS"));
configuration.setAllowedHeaders(Arrays.asList("*"));
configuration.setAllowCredentials(true);
configuration.setMaxAge(3600L);
UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource();
source.registerCorsConfiguration("/api/**", configuration);
return source;
}
@Bean
public PasswordEncoder passwordEncoder() {
return new BCryptPasswordEncoder(12);
}
@Bean
public UserDetailsService userDetailsService() {
UserDetails user = User.builder()
.username("user")
.password(passwordEncoder().encode("password"))
.roles("USER")
.build();
UserDetails admin = User.builder()
.username("admin")
.password(passwordEncoder().encode("admin"))
.roles("ADMIN")
.build();
return new InMemoryUserDetailsManager(user, admin);
}
}
1.2 环境变量管理
# application-prod.yml
# 生产环境配置
spring:
application:
name: ${APP_NAME:spring-native-demo}
profiles:
active: prod
datasource:
url: ${DATABASE_URL:jdbc:postgresql://localhost:5432/demo}
username: ${DATABASE_USERNAME:demo_user}
password: ${DATABASE_PASSWORD}
driver-class-name: org.postgresql.Driver
hikari:
maximum-pool-size: ${DB_POOL_SIZE:10}
minimum-idle: ${DB_POOL_MIN_IDLE:2}
connection-timeout: ${DB_CONNECTION_TIMEOUT:30000}
idle-timeout: ${DB_IDLE_TIMEOUT:600000}
max-lifetime: ${DB_MAX_LIFETIME:1800000}
leak-detection-threshold: ${DB_LEAK_DETECTION:60000}
jpa:
hibernate:
ddl-auto: validate
show-sql: false
properties:
hibernate:
dialect: org.hibernate.dialect.PostgreSQLDialect
format_sql: false
use_sql_comments: false
jdbc:
batch_size: 20
order_inserts: true
order_updates: true
batch_versioned_data: true
security:
user:
name: ${ADMIN_USERNAME:admin}
password: ${ADMIN_PASSWORD}
roles: ADMIN
redis:
host: ${REDIS_HOST:localhost}
port: ${REDIS_PORT:6379}
password: ${REDIS_PASSWORD:}
timeout: ${REDIS_TIMEOUT:2000}
lettuce:
pool:
max-active: ${REDIS_POOL_MAX_ACTIVE:8}
max-idle: ${REDIS_POOL_MAX_IDLE:8}
min-idle: ${REDIS_POOL_MIN_IDLE:0}
max-wait: ${REDIS_POOL_MAX_WAIT:-1}
server:
port: ${SERVER_PORT:8080}
servlet:
context-path: ${CONTEXT_PATH:/}
compression:
enabled: true
mime-types: text/html,text/xml,text/plain,text/css,text/javascript,application/javascript,application/json
min-response-size: 1024
http2:
enabled: true
ssl:
enabled: ${SSL_ENABLED:false}
key-store: ${SSL_KEYSTORE_PATH:}
key-store-password: ${SSL_KEYSTORE_PASSWORD:}
key-store-type: ${SSL_KEYSTORE_TYPE:PKCS12}
protocol: TLS
enabled-protocols: TLSv1.2,TLSv1.3
management:
endpoints:
web:
exposure:
include: health,info,metrics,prometheus
base-path: /actuator
endpoint:
health:
show-details: when-authorized
show-components: when-authorized
probes:
enabled: true
metrics:
enabled: true
prometheus:
enabled: true
metrics:
export:
prometheus:
enabled: true
step: 30s
distribution:
percentiles-histogram:
http.server.requests: true
percentiles:
http.server.requests: 0.5, 0.9, 0.95, 0.99
health:
diskspace:
enabled: true
threshold: 1GB
db:
enabled: true
redis:
enabled: true
logging:
level:
com.example.demo: ${LOG_LEVEL:INFO}
org.springframework.security: WARN
org.springframework.web: WARN
org.hibernate: WARN
org.postgresql: WARN
pattern:
console: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level [%X{traceId:-},%X{spanId:-}] %logger{36} - %msg%n"
file: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level [%X{traceId:-},%X{spanId:-}] %logger{36} - %msg%n"
file:
name: ${LOG_FILE:/app/logs/application.log}
max-size: ${LOG_MAX_SIZE:100MB}
max-history: ${LOG_MAX_HISTORY:30}
total-size-cap: ${LOG_TOTAL_SIZE:1GB}
# 应用自定义配置
app:
security:
jwt:
secret: ${JWT_SECRET}
expiration: ${JWT_EXPIRATION:86400}
rate-limit:
enabled: ${RATE_LIMIT_ENABLED:true}
requests-per-minute: ${RATE_LIMIT_RPM:100}
cache:
enabled: ${CACHE_ENABLED:true}
ttl: ${CACHE_TTL:3600}
max-size: ${CACHE_MAX_SIZE:1000}
monitoring:
enabled: ${MONITORING_ENABLED:true}
metrics-interval: ${METRICS_INTERVAL:30}
feature-flags:
new-api: ${FEATURE_NEW_API:false}
enhanced-logging: ${FEATURE_ENHANCED_LOGGING:true}
1.3 密钥管理
#!/bin/bash
# secrets-management.sh
# 密钥管理脚本
set -e
# 配置变量
SECRETS_DIR="/etc/secrets"
VAULT_ADDR="https://vault.example.com"
VAULT_TOKEN_FILE="/var/run/secrets/vault-token"
APP_NAME="spring-native-demo"
ENVIRONMENT="prod"
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 检查 Vault 连接
check_vault_connection() {
log_info "检查 Vault 连接..."
if ! command -v vault &> /dev/null; then
log_error "Vault CLI 未安装"
exit 1
fi
if [ ! -f "$VAULT_TOKEN_FILE" ]; then
log_error "Vault token 文件不存在: $VAULT_TOKEN_FILE"
exit 1
fi
export VAULT_ADDR="$VAULT_ADDR"
export VAULT_TOKEN=$(cat "$VAULT_TOKEN_FILE")
if ! vault auth -method=token &> /dev/null; then
log_error "Vault 认证失败"
exit 1
fi
log_success "Vault 连接成功"
}
# 从 Vault 获取密钥
get_secrets_from_vault() {
log_info "从 Vault 获取密钥..."
local secret_path="secret/data/$APP_NAME/$ENVIRONMENT"
# 获取密钥数据
local secrets_json=$(vault kv get -format=json "$secret_path" | jq -r '.data.data')
if [ "$secrets_json" = "null" ]; then
log_error "未找到密钥数据: $secret_path"
exit 1
fi
# 创建密钥目录
mkdir -p "$SECRETS_DIR"
chmod 700 "$SECRETS_DIR"
# 写入密钥文件
echo "$secrets_json" | jq -r 'to_entries[] | "\(.key)=\(.value)"' > "$SECRETS_DIR/app.env"
chmod 600 "$SECRETS_DIR/app.env"
# 单独写入敏感密钥
echo "$secrets_json" | jq -r '.database_password // ""' > "$SECRETS_DIR/database_password"
echo "$secrets_json" | jq -r '.jwt_secret // ""' > "$SECRETS_DIR/jwt_secret"
echo "$secrets_json" | jq -r '.admin_password // ""' > "$SECRETS_DIR/admin_password"
echo "$secrets_json" | jq -r '.redis_password // ""' > "$SECRETS_DIR/redis_password"
chmod 600 "$SECRETS_DIR"/*
log_success "密钥获取完成"
}
# 生成随机密钥
generate_random_secrets() {
log_info "生成随机密钥..."
# JWT 密钥 (256位)
JWT_SECRET=$(openssl rand -base64 32)
# 数据库密码
DB_PASSWORD=$(openssl rand -base64 24 | tr -d "=+/" | cut -c1-20)
# 管理员密码
ADMIN_PASSWORD=$(openssl rand -base64 24 | tr -d "=+/" | cut -c1-16)
# Redis 密码
REDIS_PASSWORD=$(openssl rand -base64 24 | tr -d "=+/" | cut -c1-20)
# API 密钥
API_KEY=$(openssl rand -hex 32)
# 加密密钥
ENCRYPTION_KEY=$(openssl rand -base64 32)
log_success "随机密钥生成完成"
}
# 存储密钥到 Vault
store_secrets_to_vault() {
log_info "存储密钥到 Vault..."
local secret_path="secret/data/$APP_NAME/$ENVIRONMENT"
# 创建密钥 JSON
local secrets_json=$(cat << EOF
{
"database_password": "$DB_PASSWORD",
"jwt_secret": "$JWT_SECRET",
"admin_password": "$ADMIN_PASSWORD",
"redis_password": "$REDIS_PASSWORD",
"api_key": "$API_KEY",
"encryption_key": "$ENCRYPTION_KEY"
}
EOF
)
# 存储到 Vault
echo "$secrets_json" | vault kv put "$secret_path" -
log_success "密钥已存储到 Vault"
}
# 轮换密钥
rotate_secrets() {
log_info "轮换密钥..."
# 备份当前密钥
local backup_path="secret/data/$APP_NAME/$ENVIRONMENT/backup-$(date +%Y%m%d-%H%M%S)"
local current_secrets=$(vault kv get -format=json "secret/data/$APP_NAME/$ENVIRONMENT" | jq -r '.data.data')
echo "$current_secrets" | vault kv put "$backup_path" -
# 生成新密钥
generate_random_secrets
# 保留数据库密码(避免中断服务)
DB_PASSWORD=$(echo "$current_secrets" | jq -r '.database_password')
# 存储新密钥
store_secrets_to_vault
log_success "密钥轮换完成"
log_warning "请重启应用以使用新密钥"
}
# 验证密钥
validate_secrets() {
log_info "验证密钥..."
local secret_path="secret/data/$APP_NAME/$ENVIRONMENT"
local secrets_json=$(vault kv get -format=json "$secret_path" | jq -r '.data.data')
# 检查必需的密钥
local required_keys=("database_password" "jwt_secret" "admin_password")
for key in "${required_keys[@]}"; do
local value=$(echo "$secrets_json" | jq -r ".$key // \"\"")
if [ -z "$value" ] || [ "$value" = "null" ]; then
log_error "缺少必需的密钥: $key"
exit 1
fi
# 检查密钥强度
if [ ${#value} -lt 8 ]; then
log_warning "密钥 $key 长度过短: ${#value} 字符"
fi
done
log_success "密钥验证通过"
}
# 导出环境变量
export_environment_variables() {
log_info "导出环境变量..."
if [ -f "$SECRETS_DIR/app.env" ]; then
# 导出到当前 shell
set -a
source "$SECRETS_DIR/app.env"
set +a
# 生成 systemd 环境文件
cat > "$SECRETS_DIR/systemd.env" << EOF
# Spring Native Demo Environment Variables
# Generated on $(date)
DATABASE_PASSWORD=$(cat "$SECRETS_DIR/database_password")
JWT_SECRET=$(cat "$SECRETS_DIR/jwt_secret")
ADMIN_PASSWORD=$(cat "$SECRETS_DIR/admin_password")
REDIS_PASSWORD=$(cat "$SECRETS_DIR/redis_password")
EOF
chmod 600 "$SECRETS_DIR/systemd.env"
log_success "环境变量导出完成"
else
log_error "密钥文件不存在: $SECRETS_DIR/app.env"
exit 1
fi
}
# 清理密钥
cleanup_secrets() {
log_info "清理本地密钥..."
if [ -d "$SECRETS_DIR" ]; then
rm -rf "$SECRETS_DIR"
log_success "本地密钥已清理"
fi
}
# 主函数
main() {
case "$1" in
"get")
check_vault_connection
get_secrets_from_vault
export_environment_variables
;;
"generate")
generate_random_secrets
check_vault_connection
store_secrets_to_vault
;;
"rotate")
check_vault_connection
rotate_secrets
;;
"validate")
check_vault_connection
validate_secrets
;;
"cleanup")
cleanup_secrets
;;
*)
echo "用法: $0 {get|generate|rotate|validate|cleanup}"
echo " get - 从 Vault 获取密钥"
echo " generate - 生成并存储新密钥"
echo " rotate - 轮换现有密钥"
echo " validate - 验证密钥完整性"
echo " cleanup - 清理本地密钥"
exit 1
;;
esac
}
# 错误处理
trap 'log_error "密钥管理过程中发生错误,退出码: $?"' ERR
# 执行主函数
main "$@"
恢复脚本
创建 scripts/restore.sh
:
#!/bin/bash
# 恢复脚本
# 用法: ./restore.sh [backup_date] [component]
set -euo pipefail
# 配置
APP_NAME="spring-native-app"
BACKUP_DIR="/backup"
RESTORE_DIR="/tmp/restore"
S3_BUCKET="${S3_BUCKET:-my-app-backups}"
ENCRYPTION_KEY="${ENCRYPTION_KEY:-}"
# 日志函数
log_info() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1" | tee -a "$RESTORE_DIR/restore.log"
}
log_success() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [SUCCESS] $1" | tee -a "$RESTORE_DIR/restore.log"
}
log_warning() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [WARNING] $1" | tee -a "$RESTORE_DIR/restore.log"
}
log_error() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [ERROR] $1" | tee -a "$RESTORE_DIR/restore.log" >&2
}
# 检查环境
check_environment() {
log_info "检查恢复环境..."
# 创建恢复目录
mkdir -p "$RESTORE_DIR"
# 检查必要工具
local required_tools=("psql" "redis-cli" "aws" "gpg" "jq")
for tool in "${required_tools[@]}"; do
if ! command -v "$tool" &> /dev/null; then
log_error "缺少必要工具: $tool"
exit 1
fi
done
# 检查数据库连接
if ! psql "$DATABASE_URL" -c "SELECT 1" &> /dev/null; then
log_error "无法连接到数据库"
exit 1
fi
# 检查 Redis 连接
if ! redis-cli -u "$REDIS_URL" ping &> /dev/null; then
log_error "无法连接到 Redis"
exit 1
fi
log_success "环境检查完成"
}
# 下载备份文件
download_backup() {
local backup_date="$1"
local component="$2"
log_info "下载备份文件: $backup_date - $component"
local s3_prefix="$APP_NAME/${backup_date:0:4}/${backup_date:4:2}/${backup_date:6:2}"
# 列出可用备份
local backup_files
backup_files=$(aws s3 ls "s3://$S3_BUCKET/$s3_prefix/" --recursive | grep "$component" | awk '{print $4}')
if [ -z "$backup_files" ]; then
log_error "未找到 $backup_date 的 $component 备份文件"
exit 1
fi
# 下载备份文件
echo "$backup_files" | while read -r s3_key; do
local filename=$(basename "$s3_key")
local local_file="$RESTORE_DIR/$filename"
aws s3 cp "s3://$S3_BUCKET/$s3_key" "$local_file"
if [ $? -eq 0 ]; then
log_success "已下载: $filename"
else
log_error "下载失败: $filename"
exit 1
fi
done
}
# 解密备份文件
decrypt_backup() {
local encrypted_file="$1"
if [[ "$encrypted_file" == *.gpg ]]; then
log_info "解密备份文件: $(basename "$encrypted_file")"
local decrypted_file="${encrypted_file%.gpg}"
if [ -n "$ENCRYPTION_KEY" ]; then
gpg --quiet --batch --yes --passphrase "$ENCRYPTION_KEY" \
--decrypt "$encrypted_file" > "$decrypted_file"
if [ $? -eq 0 ]; then
log_success "解密完成: $(basename "$decrypted_file")"
echo "$decrypted_file"
else
log_error "解密失败: $(basename "$encrypted_file")"
exit 1
fi
else
log_error "缺少解密密钥"
exit 1
fi
else
echo "$encrypted_file"
fi
}
# 恢复数据库
restore_database() {
local backup_date="$1"
log_info "恢复数据库: $backup_date"
# 下载数据库备份
download_backup "$backup_date" "database"
# 查找数据库备份文件
local db_backup_file
db_backup_file=$(find "$RESTORE_DIR" -name "database_${backup_date}*.sql*" | head -1)
if [ -z "$db_backup_file" ]; then
log_error "未找到数据库备份文件"
exit 1
fi
# 解密(如果需要)
db_backup_file=$(decrypt_backup "$db_backup_file")
# 创建数据库备份(恢复前)
local pre_restore_backup="$RESTORE_DIR/pre-restore-$(date +%Y%m%d-%H%M%S).sql"
pg_dump "$DATABASE_URL" > "$pre_restore_backup"
log_info "已创建恢复前备份: $(basename "$pre_restore_backup")"
# 恢复数据库
log_warning "开始恢复数据库,这将覆盖现有数据"
# 终止现有连接
psql "$DATABASE_URL" -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid();"
# 恢复数据
if psql "$DATABASE_URL" < "$db_backup_file"; then
log_success "数据库恢复完成"
else
log_error "数据库恢复失败"
# 尝试恢复到恢复前状态
log_info "尝试回滚到恢复前状态..."
psql "$DATABASE_URL" < "$pre_restore_backup"
exit 1
fi
}
# 恢复 Redis
restore_redis() {
local backup_date="$1"
log_info "恢复 Redis: $backup_date"
# 下载 Redis 备份
download_backup "$backup_date" "redis"
# 查找 Redis 备份文件
local redis_backup_file
redis_backup_file=$(find "$RESTORE_DIR" -name "redis_${backup_date}*.rdb*" | head -1)
if [ -z "$redis_backup_file" ]; then
log_error "未找到 Redis 备份文件"
exit 1
fi
# 解密(如果需要)
redis_backup_file=$(decrypt_backup "$redis_backup_file")
# 创建 Redis 备份(恢复前)
redis-cli -u "$REDIS_URL" BGSAVE
log_info "已创建 Redis 恢复前备份"
# 停止 Redis 写入
redis-cli -u "$REDIS_URL" CONFIG SET save ""
# 清空 Redis
redis-cli -u "$REDIS_URL" FLUSHALL
# 恢复 Redis 数据
if redis-cli -u "$REDIS_URL" --rdb "$redis_backup_file"; then
log_success "Redis 恢复完成"
else
log_error "Redis 恢复失败"
exit 1
fi
# 重新启用保存
redis-cli -u "$REDIS_URL" CONFIG SET save "900 1 300 10 60 10000"
}
# 恢复配置文件
restore_configuration() {
local backup_date="$1"
log_info "恢复配置文件: $backup_date"
# 下载配置备份
download_backup "$backup_date" "config"
# 查找配置备份文件
local config_backup_file
config_backup_file=$(find "$RESTORE_DIR" -name "config_${backup_date}*.tar.gz*" | head -1)
if [ -z "$config_backup_file" ]; then
log_error "未找到配置备份文件"
exit 1
fi
# 解密(如果需要)
config_backup_file=$(decrypt_backup "$config_backup_file")
# 备份当前配置
local current_config_backup="$RESTORE_DIR/current-config-$(date +%Y%m%d-%H%M%S).tar.gz"
tar -czf "$current_config_backup" -C /app config/
log_info "已备份当前配置: $(basename "$current_config_backup")"
# 恢复配置文件
if tar -xzf "$config_backup_file" -C /app; then
log_success "配置文件恢复完成"
else
log_error "配置文件恢复失败"
# 恢复当前配置
tar -xzf "$current_config_backup" -C /app
exit 1
fi
}
# 验证恢复
verify_restore() {
log_info "验证恢复结果..."
# 检查数据库连接
if psql "$DATABASE_URL" -c "SELECT COUNT(*) FROM users;" &> /dev/null; then
local user_count
user_count=$(psql "$DATABASE_URL" -t -c "SELECT COUNT(*) FROM users;" | xargs)
log_success "数据库验证通过,用户数量: $user_count"
else
log_error "数据库验证失败"
return 1
fi
# 检查 Redis 连接
if redis-cli -u "$REDIS_URL" ping &> /dev/null; then
local redis_keys
redis_keys=$(redis-cli -u "$REDIS_URL" DBSIZE)
log_success "Redis 验证通过,键数量: $redis_keys"
else
log_error "Redis 验证失败"
return 1
fi
# 检查应用健康状态
if curl -f "http://localhost:8080/actuator/health" &> /dev/null; then
log_success "应用健康检查通过"
else
log_warning "应用健康检查失败,可能需要重启应用"
fi
log_success "恢复验证完成"
}
# 生成恢复报告
generate_restore_report() {
local backup_date="$1"
local components="$2"
log_info "生成恢复报告..."
local report_file="$RESTORE_DIR/restore-report-$(date +%Y%m%d-%H%M%S).json"
# 收集恢复信息
local user_count=0
local redis_keys=0
if psql "$DATABASE_URL" -c "SELECT 1" &> /dev/null; then
user_count=$(psql "$DATABASE_URL" -t -c "SELECT COUNT(*) FROM users;" | xargs)
fi
if redis-cli -u "$REDIS_URL" ping &> /dev/null; then
redis_keys=$(redis-cli -u "$REDIS_URL" DBSIZE)
fi
# 生成 JSON 报告
cat > "$report_file" << EOF
{
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"application": "$APP_NAME",
"restore_summary": {
"backup_date": "$backup_date",
"components_restored": "$components",
"restore_directory": "$RESTORE_DIR",
"restore_duration": "$(date '+%Y-%m-%d %H:%M:%S')"
},
"post_restore_status": {
"database_user_count": $user_count,
"redis_key_count": $redis_keys,
"application_health": "$(curl -s http://localhost:8080/actuator/health | jq -r '.status // "UNKNOWN"')"
},
"backup_files_used": [
$(find "$RESTORE_DIR" -name "*${backup_date}*" -type f | while read -r file; do
echo " \"$(basename "$file")\""
done | sed '$!s/$/,/')
]
}
EOF
log_success "恢复报告已生成: $report_file"
}
# 主函数
main() {
local backup_date="${1:-$(date +%Y%m%d)}"
local component="${2:-full}"
log_info "开始恢复流程 - 备份日期: $backup_date, 组件: $component"
case "$component" in
"database")
check_environment
restore_database "$backup_date"
verify_restore
generate_restore_report "$backup_date" "database"
;;
"redis")
check_environment
restore_redis "$backup_date"
verify_restore
generate_restore_report "$backup_date" "redis"
;;
"config")
check_environment
restore_configuration "$backup_date"
verify_restore
generate_restore_report "$backup_date" "configuration"
;;
"full")
check_environment
restore_database "$backup_date"
restore_redis "$backup_date"
restore_configuration "$backup_date"
verify_restore
generate_restore_report "$backup_date" "full"
;;
*)
echo "用法: $0 [backup_date] [component]"
echo " backup_date - 备份日期 (YYYYMMDD,默认今天)"
echo " component - 恢复组件 (database|redis|config|full,默认 full)"
echo ""
echo "示例:"
echo " $0 20240115 database # 恢复 2024-01-15 的数据库备份"
echo " $0 20240115 full # 恢复 2024-01-15 的完整备份"
exit 1
;;
esac
log_info "恢复流程完成 - $(date '+%Y-%m-%d %H:%M:%S')"
}
# 错误处理
trap 'log_error "恢复过程中发生错误,退出码: $?"' ERR
# 执行主函数
main "$@"
总结
本章介绍了 Spring Native 应用在生产环境中的最佳实践,包括:
核心要点
安全配置
- 实施多层安全防护
- 使用强密码策略和会话管理
- 配置适当的 CORS 和安全头
- 启用 CSRF 保护和 HTTPS
环境管理
- 使用环境变量管理配置
- 实施密钥轮换和安全存储
- 分离开发、测试和生产环境
- 使用配置管理工具
监控与日志
- 实施全面的应用监控
- 配置结构化日志记录
- 设置告警和通知机制
- 监控系统资源和性能指标
备份与恢复
- 实施自动化备份策略
- 定期测试恢复流程
- 使用加密保护备份数据
- 维护多个备份副本
最佳实践
部署策略
- 使用蓝绿部署或滚动更新
- 实施健康检查和就绪探针
- 配置适当的资源限制
- 使用容器编排平台
性能优化
- 监控启动时间和内存使用
- 优化 GraalVM 编译参数
- 使用缓存减少数据库访问
- 实施连接池和资源管理
故障处理
- 实施断路器模式
- 配置重试和超时机制
- 使用优雅关闭
- 准备灾难恢复计划
运维自动化
- 使用基础设施即代码
- 实施 CI/CD 流水线
- 自动化测试和部署
- 监控和告警自动化
下一步学习
高级主题
- 微服务架构设计
- 分布式追踪和监控
- 服务网格集成
- 云原生安全
工具集成
- Kubernetes 运维
- Prometheus 监控
- ELK 日志分析
- Grafana 可视化
性能调优
- JVM 参数优化
- 原生镜像调优
- 数据库性能优化
- 网络和 I/O 优化
通过遵循这些最佳实践,可以确保 Spring Native 应用在生产环境中稳定、安全、高效地运行。
## 2. 监控与日志
### 2.1 应用监控配置
```java
// MonitoringConfig.java
// 监控配置
package com.example.demo.config;
import io.micrometer.core.aop.TimedAspect;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import io.micrometer.core.instrument.config.MeterFilter;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.actuate.health.HealthIndicator;
import org.springframework.boot.actuate.health.Status;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.EnableAspectJAutoProxy;
import java.time.Duration;
import java.util.concurrent.TimeUnit;
@Configuration
@EnableAspectJAutoProxy
public class MonitoringConfig {
@Bean
public TimedAspect timedAspect(MeterRegistry registry) {
return new TimedAspect(registry);
}
@Bean
public MeterRegistryCustomizer<MeterRegistry> metricsCommonTags() {
return registry -> {
registry.config()
.commonTags(
"application", "spring-native-demo",
"environment", System.getProperty("spring.profiles.active", "unknown"),
"version", getClass().getPackage().getImplementationVersion() != null ?
getClass().getPackage().getImplementationVersion() : "unknown"
)
.meterFilter(MeterFilter.deny(id -> {
String uri = id.getTag("uri");
return uri != null && (uri.startsWith("/actuator") || uri.startsWith("/static"));
}))
.meterFilter(MeterFilter.maximumExpectedValue("http.server.requests", Duration.ofSeconds(10)))
.meterFilter(MeterFilter.minimumExpectedValue("http.server.requests", Duration.ofMillis(1)));
};
}
@Bean
public HealthIndicator customHealthIndicator() {
return () -> {
// 自定义健康检查逻辑
try {
// 检查关键组件状态
checkDatabaseConnection();
checkRedisConnection();
checkExternalServices();
return Status.UP
.withDetail("database", "Connected")
.withDetail("redis", "Connected")
.withDetail("external-services", "Available")
.withDetail("startup-time", getStartupTime())
.withDetail("uptime", getUptime())
.build();
} catch (Exception e) {
return Status.DOWN
.withDetail("error", e.getMessage())
.withDetail("timestamp", System.currentTimeMillis())
.build();
}
};
}
private void checkDatabaseConnection() {
// 数据库连接检查
}
private void checkRedisConnection() {
// Redis 连接检查
}
private void checkExternalServices() {
// 外部服务检查
}
private String getStartupTime() {
return System.getProperty("app.startup.time", "unknown");
}
private String getUptime() {
long uptime = System.currentTimeMillis() - Long.parseLong(
System.getProperty("app.startup.timestamp", "0")
);
return Duration.ofMillis(uptime).toString();
}
}
2.2 结构化日志配置
<!-- logback-spring.xml -->
<!-- Logback 配置 -->
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<!-- 属性定义 -->
<property name="LOG_FILE" value="${LOG_FILE:-/app/logs/application.log}"/>
<property name="LOG_PATTERN" value="%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level [%X{traceId:-},%X{spanId:-}] %logger{36} - %msg%n"/>
<property name="JSON_PATTERN" value='{"timestamp":"%d{yyyy-MM-dd HH:mm:ss.SSS}","level":"%level","thread":"%thread","logger":"%logger{36}","traceId":"%X{traceId:-}","spanId":"%X{spanId:-}","message":"%msg","exception":"%ex{full}"}%n'/>
<!-- 控制台输出 -->
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>${LOG_PATTERN}</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<!-- JSON 格式控制台输出(生产环境) -->
<appender name="JSON_CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder class="net.logstash.logback.encoder.LoggingEventCompositeJsonEncoder">
<providers>
<timestamp>
<timeZone>Asia/Shanghai</timeZone>
</timestamp>
<version/>
<logLevel/>
<message/>
<mdc/>
<arguments/>
<stackTrace/>
<pattern>
<pattern>
{
"application": "spring-native-demo",
"environment": "${spring.profiles.active:-unknown}",
"hostname": "${HOSTNAME:-unknown}",
"pid": "${PID:-unknown}"
}
</pattern>
</pattern>
</providers>
</encoder>
</appender>
<!-- 文件输出 -->
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_FILE}</file>
<encoder>
<pattern>${LOG_PATTERN}</pattern>
<charset>UTF-8</charset>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>${LOG_FILE}.%d{yyyy-MM-dd}.%i.gz</fileNamePattern>
<maxFileSize>100MB</maxFileSize>
<maxHistory>30</maxHistory>
<totalSizeCap>1GB</totalSizeCap>
</rollingPolicy>
</appender>
<!-- JSON 格式文件输出 -->
<appender name="JSON_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_FILE}.json</file>
<encoder class="net.logstash.logback.encoder.LoggingEventCompositeJsonEncoder">
<providers>
<timestamp>
<timeZone>Asia/Shanghai</timeZone>
</timestamp>
<version/>
<logLevel/>
<message/>
<mdc/>
<arguments/>
<stackTrace/>
<pattern>
<pattern>
{
"application": "spring-native-demo",
"environment": "${spring.profiles.active:-unknown}",
"hostname": "${HOSTNAME:-unknown}",
"pid": "${PID:-unknown}"
}
</pattern>
</pattern>
</providers>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>${LOG_FILE}.json.%d{yyyy-MM-dd}.%i.gz</fileNamePattern>
<maxFileSize>100MB</maxFileSize>
<maxHistory>30</maxHistory>
<totalSizeCap>1GB</totalSizeCap>
</rollingPolicy>
</appender>
<!-- 错误日志单独输出 -->
<appender name="ERROR_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>/app/logs/error.log</file>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<level>ERROR</level>
<onMatch>ACCEPT</onMatch>
<onMismatch>DENY</onMismatch>
</filter>
<encoder>
<pattern>${LOG_PATTERN}</pattern>
<charset>UTF-8</charset>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>/app/logs/error.%d{yyyy-MM-dd}.%i.gz</fileNamePattern>
<maxFileSize>50MB</maxFileSize>
<maxHistory>60</maxHistory>
<totalSizeCap>500MB</totalSizeCap>
</rollingPolicy>
</appender>
<!-- 异步输出 -->
<appender name="ASYNC_FILE" class="ch.qos.logback.classic.AsyncAppender">
<appender-ref ref="FILE"/>
<queueSize>1024</queueSize>
<discardingThreshold>0</discardingThreshold>
<includeCallerData>false</includeCallerData>
</appender>
<appender name="ASYNC_JSON_FILE" class="ch.qos.logback.classic.AsyncAppender">
<appender-ref ref="JSON_FILE"/>
<queueSize>1024</queueSize>
<discardingThreshold>0</discardingThreshold>
<includeCallerData>false</includeCallerData>
</appender>
<!-- 开发环境配置 -->
<springProfile name="dev,test">
<root level="INFO">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="ASYNC_FILE"/>
</root>
<logger name="com.example.demo" level="DEBUG" additivity="false">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="ASYNC_FILE"/>
</logger>
</springProfile>
<!-- 生产环境配置 -->
<springProfile name="prod">
<root level="INFO">
<appender-ref ref="JSON_CONSOLE"/>
<appender-ref ref="ASYNC_JSON_FILE"/>
<appender-ref ref="ERROR_FILE"/>
</root>
<logger name="com.example.demo" level="INFO" additivity="false">
<appender-ref ref="JSON_CONSOLE"/>
<appender-ref ref="ASYNC_JSON_FILE"/>
<appender-ref ref="ERROR_FILE"/>
</logger>
<!-- 减少第三方库日志 -->
<logger name="org.springframework" level="WARN"/>
<logger name="org.hibernate" level="WARN"/>
<logger name="org.apache" level="WARN"/>
<logger name="com.zaxxer.hikari" level="WARN"/>
</springProfile>
<!-- 特定包的日志级别 -->
<logger name="org.springframework.security" level="WARN"/>
<logger name="org.springframework.web.filter.CommonsRequestLoggingFilter" level="DEBUG"/>
<logger name="org.springframework.jdbc.core" level="DEBUG"/>
<!-- SQL 日志(开发环境) -->
<springProfile name="dev">
<logger name="org.hibernate.SQL" level="DEBUG"/>
<logger name="org.hibernate.type.descriptor.sql.BasicBinder" level="TRACE"/>
</springProfile>
</configuration>
2.3 监控脚本
#!/bin/bash
# monitoring.sh
# 应用监控脚本
set -e
# 配置变量
APP_NAME="spring-native-demo"
APP_PORT="8080"
HEALTH_ENDPOINT="http://localhost:$APP_PORT/actuator/health"
METRICS_ENDPOINT="http://localhost:$APP_PORT/actuator/metrics"
PROMETHEUS_ENDPOINT="http://localhost:$APP_PORT/actuator/prometheus"
LOG_FILE="/app/logs/application.log"
ERROR_LOG_FILE="/app/logs/error.log"
MONITORING_LOG="/app/logs/monitoring.log"
ALERT_WEBHOOK="https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK"
EMAIL_RECIPIENTS="admin@example.com"
# 阈值配置
CPU_THRESHOLD=80
MEMORY_THRESHOLD=85
DISK_THRESHOLD=90
RESPONSE_TIME_THRESHOLD=5000
ERROR_RATE_THRESHOLD=5
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() {
echo -e "${BLUE}[INFO]${NC} $1" | tee -a "$MONITORING_LOG"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$MONITORING_LOG"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1" | tee -a "$MONITORING_LOG"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1" | tee -a "$MONITORING_LOG"
}
# 检查应用健康状态
check_health() {
log_info "检查应用健康状态..."
local health_response
local http_code
health_response=$(curl -s -w "%{http_code}" "$HEALTH_ENDPOINT" -o /tmp/health.json 2>/dev/null || echo "000")
http_code=${health_response: -3}
if [ "$http_code" = "200" ]; then
local status=$(jq -r '.status' /tmp/health.json 2>/dev/null || echo "UNKNOWN")
if [ "$status" = "UP" ]; then
log_success "应用健康状态: UP"
return 0
else
log_error "应用健康状态: $status"
return 1
fi
else
log_error "健康检查失败,HTTP 状态码: $http_code"
return 1
fi
}
# 检查系统资源
check_system_resources() {
log_info "检查系统资源..."
# CPU 使用率
local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | awk -F'%' '{print $1}')
cpu_usage=${cpu_usage%.*}
if [ "$cpu_usage" -gt "$CPU_THRESHOLD" ]; then
log_warning "CPU 使用率过高: ${cpu_usage}%"
send_alert "CPU 使用率告警" "CPU 使用率: ${cpu_usage}%,超过阈值 ${CPU_THRESHOLD}%"
else
log_info "CPU 使用率: ${cpu_usage}%"
fi
# 内存使用率
local memory_info=$(free | grep Mem)
local total_memory=$(echo $memory_info | awk '{print $2}')
local used_memory=$(echo $memory_info | awk '{print $3}')
local memory_usage=$((used_memory * 100 / total_memory))
if [ "$memory_usage" -gt "$MEMORY_THRESHOLD" ]; then
log_warning "内存使用率过高: ${memory_usage}%"
send_alert "内存使用率告警" "内存使用率: ${memory_usage}%,超过阈值 ${MEMORY_THRESHOLD}%"
else
log_info "内存使用率: ${memory_usage}%"
fi
# 磁盘使用率
local disk_usage=$(df /app | tail -1 | awk '{print $5}' | sed 's/%//')
if [ "$disk_usage" -gt "$DISK_THRESHOLD" ]; then
log_warning "磁盘使用率过高: ${disk_usage}%"
send_alert "磁盘使用率告警" "磁盘使用率: ${disk_usage}%,超过阈值 ${DISK_THRESHOLD}%"
else
log_info "磁盘使用率: ${disk_usage}%"
fi
}
# 检查应用性能指标
check_performance_metrics() {
log_info "检查应用性能指标..."
# 获取 JVM 指标
local jvm_memory_used=$(curl -s "$METRICS_ENDPOINT/jvm.memory.used" | jq -r '.measurements[0].value' 2>/dev/null || echo "0")
local jvm_memory_max=$(curl -s "$METRICS_ENDPOINT/jvm.memory.max" | jq -r '.measurements[0].value' 2>/dev/null || echo "1")
local jvm_memory_usage=$((jvm_memory_used * 100 / jvm_memory_max))
log_info "JVM 内存使用率: ${jvm_memory_usage}%"
# 获取 HTTP 请求指标
local http_requests_total=$(curl -s "$METRICS_ENDPOINT/http.server.requests" | jq -r '.measurements[0].value' 2>/dev/null || echo "0")
local http_requests_error=$(curl -s "$METRICS_ENDPOINT/http.server.requests?tag=status:5xx" | jq -r '.measurements[0].value' 2>/dev/null || echo "0")
if [ "$http_requests_total" -gt 0 ]; then
local error_rate=$((http_requests_error * 100 / http_requests_total))
if [ "$error_rate" -gt "$ERROR_RATE_THRESHOLD" ]; then
log_warning "错误率过高: ${error_rate}%"
send_alert "错误率告警" "错误率: ${error_rate}%,超过阈值 ${ERROR_RATE_THRESHOLD}%"
else
log_info "错误率: ${error_rate}%"
fi
fi
# 获取响应时间
local response_time=$(curl -s "$METRICS_ENDPOINT/http.server.requests" | jq -r '.measurements[] | select(.statistic=="MEAN") | .value' 2>/dev/null || echo "0")
response_time_ms=$(echo "$response_time * 1000" | bc -l | cut -d. -f1)
if [ "$response_time_ms" -gt "$RESPONSE_TIME_THRESHOLD" ]; then
log_warning "响应时间过长: ${response_time_ms}ms"
send_alert "响应时间告警" "平均响应时间: ${response_time_ms}ms,超过阈值 ${RESPONSE_TIME_THRESHOLD}ms"
else
log_info "平均响应时间: ${response_time_ms}ms"
fi
}
# 检查日志错误
check_log_errors() {
log_info "检查日志错误..."
if [ -f "$ERROR_LOG_FILE" ]; then
# 检查最近5分钟的错误日志
local recent_errors=$(find "$ERROR_LOG_FILE" -mmin -5 -exec wc -l {} \; 2>/dev/null | awk '{sum+=$1} END {print sum+0}')
if [ "$recent_errors" -gt 10 ]; then
log_warning "最近5分钟错误日志数量: $recent_errors"
# 获取最新的错误信息
local latest_errors=$(tail -n 5 "$ERROR_LOG_FILE")
send_alert "错误日志告警" "最近5分钟错误数量: $recent_errors\n\n最新错误:\n$latest_errors"
else
log_info "最近5分钟错误日志数量: $recent_errors"
fi
fi
# 检查应用日志中的关键错误
if [ -f "$LOG_FILE" ]; then
local critical_errors=$(tail -n 1000 "$LOG_FILE" | grep -i "OutOfMemoryError\|StackOverflowError\|SQLException\|ConnectionException" | wc -l)
if [ "$critical_errors" -gt 0 ]; then
log_warning "发现 $critical_errors 个关键错误"
local error_details=$(tail -n 1000 "$LOG_FILE" | grep -i "OutOfMemoryError\|StackOverflowError\|SQLException\|ConnectionException" | tail -n 3)
send_alert "关键错误告警" "发现 $critical_errors 个关键错误\n\n错误详情:\n$error_details"
fi
fi
}
# 发送告警
send_alert() {
local title="$1"
local message="$2"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
log_warning "发送告警: $title"
# 发送到 Slack
if [ -n "$ALERT_WEBHOOK" ]; then
local slack_payload=$(cat << EOF
{
"text": "🚨 $title",
"attachments": [
{
"color": "danger",
"fields": [
{
"title": "应用",
"value": "$APP_NAME",
"short": true
},
{
"title": "时间",
"value": "$timestamp",
"short": true
},
{
"title": "详情",
"value": "$message",
"short": false
}
]
}
]
}
EOF
)
curl -X POST -H 'Content-type: application/json' \
--data "$slack_payload" \
"$ALERT_WEBHOOK" &> /dev/null || true
fi
# 发送邮件
if [ -n "$EMAIL_RECIPIENTS" ] && command -v mail &> /dev/null; then
echo "$message" | mail -s "[$APP_NAME] $title" "$EMAIL_RECIPIENTS" || true
fi
}
# 生成监控报告
generate_monitoring_report() {
log_info "生成监控报告..."
local report_file="/app/logs/monitoring-report-$(date +%Y%m%d-%H%M%S).json"
# 获取系统信息
local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | awk -F'%' '{print $1}')
local memory_info=$(free | grep Mem)
local total_memory=$(echo $memory_info | awk '{print $2}')
local used_memory=$(echo $memory_info | awk '{print $3}')
local memory_usage=$((used_memory * 100 / total_memory))
local disk_usage=$(df /app | tail -1 | awk '{print $5}' | sed 's/%//')
# 获取应用指标
local health_status="UNKNOWN"
if check_health &> /dev/null; then
health_status="UP"
else
health_status="DOWN"
fi
# 生成 JSON 报告
cat > "$report_file" << EOF
{
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"application": "$APP_NAME",
"health_status": "$health_status",
"system_metrics": {
"cpu_usage_percent": $cpu_usage,
"memory_usage_percent": $memory_usage,
"disk_usage_percent": $disk_usage
},
"application_metrics": {
"port": $APP_PORT,
"endpoints": {
"health": "$HEALTH_ENDPOINT",
"metrics": "$METRICS_ENDPOINT",
"prometheus": "$PROMETHEUS_ENDPOINT"
}
},
"log_files": {
"application_log": "$LOG_FILE",
"error_log": "$ERROR_LOG_FILE",
"monitoring_log": "$MONITORING_LOG"
}
}
EOF
log_success "监控报告已生成: $report_file"
}
# 主函数
main() {
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
log_info "开始监控检查 - $timestamp"
# 创建日志目录
mkdir -p "$(dirname "$MONITORING_LOG")"
case "$1" in
"health")
check_health
;;
"resources")
check_system_resources
;;
"performance")
check_performance_metrics
;;
"logs")
check_log_errors
;;
"report")
generate_monitoring_report
;;
"all"|"")
check_health
check_system_resources
check_performance_metrics
check_log_errors
generate_monitoring_report
;;
*)
echo "用法: $0 {health|resources|performance|logs|report|all}"
echo " health - 检查应用健康状态"
echo " resources - 检查系统资源"
echo " performance - 检查应用性能指标"
echo " logs - 检查日志错误"
echo " report - 生成监控报告"
echo " all - 执行所有检查(默认)"
exit 1
;;
esac
log_info "监控检查完成 - $(date '+%Y-%m-%d %H:%M:%S')"
}
# 错误处理
trap 'log_error "监控过程中发生错误,退出码: $?"' ERR
# 执行主函数
main "$@"
3. 备份与恢复
3.1 数据备份策略
”`bash #!/bin/bash
backup.sh
数据备份脚本
set -e
配置变量
APP_NAME=“spring-native-demo” BACKUP_DIR=“/backup” S3_BUCKET=“your-backup-bucket” DATABASE_URL=“${DATABASE_URL:-jdbc:postgresql://localhost:5432/demo}” DATABASE_USER=“${DATABASE_USERNAME:-demo_user}” DATABASE_PASSWORD=“${DATABASE_PASSWORD}” REDIS_HOST=“${REDIS_HOST:-localhost}” REDIS_PORT=“${REDIS_PORT:-6379}” REDIS_PASSWORD=“${REDIS_PASSWORD}” RETENTION_DAYS=30 ENCRYPTION_KEY=“${BACKUP_ENCRYPTION_KEY}”
颜色输出
RED=‘\033[0;31m’ GREEN=‘\033[0;32m’ YELLOW=‘\033[1;33m’ BLUE=‘\033[0;34m’ NC=‘\033[0m’
log_info() { echo -e “${BLUE}[INFO]${NC} $1” }
log_success() { echo -e “${GREEN}[SUCCESS]${NC} $1” }
log_warning() { echo -e “${YELLOW}[WARNING]${NC} $1” }
log_error() { echo -e “${RED}[ERROR]${NC} $1” }
检查环境
check_environment() { log_info “检查备份环境…”
# 检查必需的工具
local required_tools=("pg_dump" "redis-cli" "aws" "gpg" "tar" "gzip")
for tool in "${required_tools[@]}"; do
if ! command -v "$tool" &> /dev/null; then
log_error "缺少必需的工具: $tool"
exit 1
fi
done
# 检查备份目录
if [ ! -d "$BACKUP_DIR" ]; then
mkdir -p "$BACKUP_DIR"
log_info "创建备份目录: $BACKUP_DIR"
fi
# 检查加密密钥
if [ -z "$ENCRYPTION_KEY" ]; then
log_warning "未设置加密密钥,备份将不加密"
fi
log_success "环境检查通过"
}
备份数据库
backup_database() { log_info “备份数据库…”
local timestamp=$(date +%Y%m%d_%H%M%S)
local logs_backup="$BACKUP_DIR/logs_${timestamp}.tar.gz"
# 创建日志备份
if [ -d "/app/logs" ]; then
tar -czf "$logs_backup" \
-C /app \
--exclude='*.tmp' \
--exclude='*.lock' \
logs/
# 加密日志备份
if [ -n "$ENCRYPTION_KEY" ]; then
gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
--passphrase "$ENCRYPTION_KEY" \
--output "$logs_backup.gpg" \
"$logs_backup"
rm "$logs_backup"
log_success "日志备份完成(已加密): $logs_backup.gpg"
else
log_success "日志备份完成: $logs_backup"
fi
else
log_warning "日志目录不存在,跳过日志备份"
fi
}
上传到云存储
upload_to_cloud() { log_info “上传备份到云存储…”
local backup_files=("$BACKUP_DIR"/*$(date +%Y%m%d)*)
for file in "${backup_files[@]}"; do
if [ -f "$file" ]; then
local filename=$(basename "$file")
local s3_key="$APP_NAME/$(date +%Y/%m/%d)/$filename"
aws s3 cp "$file" "s3://$S3_BUCKET/$s3_key" \
--storage-class STANDARD_IA \
--server-side-encryption AES256
if [ $? -eq 0 ]; then
log_success "已上传: $filename -> s3://$S3_BUCKET/$s3_key"
else
log_error "上传失败: $filename"
fi
fi
done
}
清理旧备份
cleanup_old_backups() { log_info “清理旧备份…”
# 清理本地旧备份
find "$BACKUP_DIR" -name "*" -type f -mtime +$RETENTION_DAYS -delete
# 清理云存储旧备份
local cutoff_date=$(date -d "$RETENTION_DAYS days ago" +%Y-%m-%d)
aws s3api list-objects-v2 \
--bucket "$S3_BUCKET" \
--prefix "$APP_NAME/" \
--query "Contents[?LastModified<='$cutoff_date'].Key" \
--output text | \
while read -r key; do
if [ -n "$key" ]; then
aws s3 rm "s3://$S3_BUCKET/$key"
log_info "已删除旧备份: $key"
fi
done
log_success "旧备份清理完成"
}
验证备份
verify_backup() { log_info “验证备份完整性…”
local backup_files=("$BACKUP_DIR"/*$(date +%Y%m%d)*)
local verification_failed=false
for file in "${backup_files[@]}"; do
if [ -f "$file" ]; then
local filename=$(basename "$file")
# 检查文件大小
local file_size=$(stat -c%s "$file")
if [ "$file_size" -eq 0 ]; then
log_error "备份文件为空: $filename"
verification_failed=true
continue
fi
# 验证加密文件
if [[ "$filename" == *.gpg ]]; then
if [ -n "$ENCRYPTION_KEY" ]; then
if gpg --quiet --batch --yes --passphrase "$ENCRYPTION_KEY" \
--decrypt "$file" > /dev/null 2>&1; then
log_success "加密文件验证通过: $filename"
else
log_error "加密文件验证失败: $filename"
verification_failed=true
fi
fi
fi
# 验证压缩文件
if [[ "$filename" == *.gz ]] && [[ "$filename" != *.gpg ]]; then
if gzip -t "$file" 2>/dev/null; then
log_success "压缩文件验证通过: $filename"
else
log_error "压缩文件验证失败: $filename"
verification_failed=true
fi
fi
fi
done
if [ "$verification_failed" = true ]; then
log_error "备份验证失败"
exit 1
else
log_success "所有备份文件验证通过"
fi
}
生成备份报告
generate_backup_report() { log_info “生成备份报告…”
local report_file="$BACKUP_DIR/backup-report-$(date +%Y%m%d-%H%M%S).json"
local backup_files=("$BACKUP_DIR"/*$(date +%Y%m%d)*)
# 统计备份信息
local total_files=0
local total_size=0
local file_list="[]"
for file in "${backup_files[@]}"; do
if [ -f "$file" ]; then
local filename=$(basename "$file")
local file_size=$(stat -c%s "$file")
local file_hash=$(sha256sum "$file" | cut -d' ' -f1)
total_files=$((total_files + 1))
total_size=$((total_size + file_size))
file_list=$(echo "$file_list" | jq ". + [{\"name\": \"$filename\", \"size\": $file_size, \"hash\": \"$file_hash\"}]")
fi
done
# 生成 JSON 报告
cat > "$report_file" << EOF
{ “timestamp”: “$(date -u +%Y-%m-%dT%H:%M:%SZ)”, “application”: “$APP_NAME”, “backup_summary”: { “total_files”: $total_files, “total_size_bytes”: $total_size, “total_size_human”: “$(numfmt –to=iec $total_size)”, “backup_directory”: “$BACKUP_DIR”, “s3_bucket”: “$S3_BUCKET”, “retention_days”: $RETENTION_DAYS, “encryption_enabled”: $([ -n “$ENCRYPTION_KEY” ] && echo “true” || echo “false”) }, “backup_files”: $file_list, “backup_types”: { “database”: “PostgreSQL dump (custom + SQL)”, “redis”: “RDB snapshot”, “configuration”: “Application config files”, “logs”: “Application log files” } } EOF
log_success "备份报告已生成: $report_file"
}
主函数
main() { local timestamp=$(date ‘+%Y-%m-%d %H:%M:%S’) log_info “开始备份流程 - $timestamp”
case "$1" in
"database")
check_environment
backup_database
verify_backup
;;
"redis")
check_environment
backup_redis
verify_backup
;;
"config")
check_environment
backup_configuration
verify_backup
;;
"logs")
check_environment
backup_logs
verify_backup
;;
"upload")
upload_to_cloud
;;
"cleanup")
cleanup_old_backups
;;
"verify")
verify_backup
;;
"report")
generate_backup_report
;;
"full"|"")
check_environment
backup_database
backup_redis
backup_configuration
backup_logs
verify_backup
upload_to_cloud
cleanup_old_backups
generate_backup_report
;;
*)
echo "用法: $0 {database|redis|config|logs|upload|cleanup|verify|report|full}"
echo " database - 备份数据库"
echo " redis - 备份 Redis"
echo " config - 备份配置文件"
echo " logs - 备份日志文件"
echo " upload - 上传到云存储"
echo " cleanup - 清理旧备份"
echo " verify - 验证备份完整性"
echo " report - 生成备份报告"
echo " full - 执行完整备份(默认)"
exit 1
;;
esac
log_info "备份流程完成 - $(date '+%Y-%m-%d %H:%M:%S')"
}
错误处理
trap ‘log_error “备份过程中发生错误,退出码: $?”’ ERR
执行主函数
main “$@” local backup_file=“$BACKUPDIR/database${timestamp}.sql”
# 从数据库 URL 解析连接信息
local db_host=$(echo "$DATABASE_URL" | sed -n 's|.*://[^@]*@\([^:]*\):.*|\1|p')
local db_port=$(echo "$DATABASE_URL" | sed -n 's|.*://[^@]*@[^:]*:\([0-9]*\)/.*|\1|p')
local db_name=$(echo "$DATABASE_URL" | sed -n 's|.*/\([^?]*\).*|\1|p')
# 设置环境变量
export PGPASSWORD="$DATABASE_PASSWORD"
# 执行备份
pg_dump -h "$db_host" -p "$db_port" -U "$DATABASE_USER" -d "$db_name" \
--verbose --clean --if-exists --create \
--format=custom --compress=9 \
--file="$backup_file.custom"
# 生成 SQL 格式备份
pg_dump -h "$db_host" -p "$db_port" -U "$DATABASE_USER" -d "$db_name" \
--verbose --clean --if-exists --create \
--format=plain \
--file="$backup_file"
# 压缩 SQL 文件
gzip "$backup_file"
# 加密备份文件
if [ -n "$ENCRYPTION_KEY" ]; then
gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
--passphrase "$ENCRYPTION_KEY" \
--output "$backup_file.custom.gpg" \
"$backup_file.custom"
gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
--passphrase "$ENCRYPTION_KEY" \
--output "$backup_file.gz.gpg" \
"$backup_file.gz"
# 删除未加密文件
rm "$backup_file.custom" "$backup_file.gz"
log_success "数据库备份完成(已加密): $backup_file.custom.gpg, $backup_file.gz.gpg"
else
log_success "数据库备份完成: $backup_file.custom, $backup_file.gz"
fi
unset PGPASSWORD
}
备份 Redis
backup_redis() { log_info “备份 Redis…”
local timestamp=$(date +%Y%m%d_%H%M%S)
local backup_file="$BACKUP_DIR/redis_${timestamp}.rdb"
# 执行 Redis 备份
if [ -n "$REDIS_PASSWORD" ]; then
redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" -a "$REDIS_PASSWORD" \
--rdb "$backup_file"
else
redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" \
--rdb "$backup_file"
fi
# 压缩备份文件
gzip "$backup_file"
# 加密备份文件
if [ -n "$ENCRYPTION_KEY" ]; then
gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
--passphrase "$ENCRYPTION_KEY" \
--output "$backup_file.gz.gpg" \
"$backup_file.gz"
rm "$backup_file.gz"
log_success "Redis 备份完成(已加密): $backup_file.gz.gpg"
else
log_success "Redis 备份完成: $backup_file.gz"
fi
}
备份应用配置
backup_configuration() { log_info “备份应用配置…”
local timestamp=$(date +%Y%m%d_%H%M%S)
local config_backup="$BACKUP_DIR/config_${timestamp}.tar.gz"
# 创建配置备份
tar -czf "$config_backup" \
-C / \
--exclude='*.log' \
--exclude='*.tmp' \
--exclude='*.pid' \
app/config \
etc/systemd/system/${APP_NAME}.service \
etc/nginx/sites-available/${APP_NAME} \
2>/dev/null || true
# 加密配置备份
if [ -n "$ENCRYPTION_KEY" ]; then
gpg --symmetric --cipher-algo AES256 --compress-algo 2 \
--passphrase "$ENCRYPTION_KEY" \
--output "$config_backup.gpg" \
"$config_backup"
rm "$config_backup"
log_success "配置备份完成(已加密): $config_backup.gpg"
else
log_success "配置备份完成: $config_backup"
fi
}
备份日志文件
backup_logs() { log_info “备份日志文件…”
local timestamp=$(date +%Y%m%d_%H%M%S)