7.1 应用监控概述

7.1.1 监控架构图

graph TB
    A[Quarkus 应用] --> B[Metrics 收集]
    A --> C[Health 检查]
    A --> D[分布式追踪]
    A --> E[日志收集]
    
    B --> F[Prometheus]
    C --> G[Kubernetes Probes]
    D --> H[Jaeger/Zipkin]
    E --> I[ELK Stack]
    
    F --> J[Grafana 仪表板]
    G --> K[集群管理]
    H --> L[追踪分析]
    I --> M[日志分析]
    
    subgraph "监控扩展"
        N[quarkus-micrometer]
        O[quarkus-smallrye-health]
        P[quarkus-smallrye-opentracing]
        Q[quarkus-logging-json]
    end
    
    subgraph "告警系统"
        R[AlertManager]
        S[PagerDuty]
        T[Slack 通知]
        U[邮件告警]
    end

7.1.2 监控依赖配置

<!-- Micrometer 指标 -->
<dependency>
    <groupId>io.quarkus</groupId>
    <artifactId>quarkus-micrometer-registry-prometheus</artifactId>
</dependency>

<!-- 健康检查 -->
<dependency>
    <groupId>io.quarkus</groupId>
    <artifactId>quarkus-smallrye-health</artifactId>
</dependency>

<!-- 分布式追踪 -->
<dependency>
    <groupId>io.quarkus</groupId>
    <artifactId>quarkus-smallrye-opentracing</artifactId>
</dependency>

<!-- JSON 日志 -->
<dependency>
    <groupId>io.quarkus</groupId>
    <artifactId>quarkus-logging-json</artifactId>
</dependency>

<!-- 应用信息 -->
<dependency>
    <groupId>io.quarkus</groupId>
    <artifactId>quarkus-info</artifactId>
</dependency>

<!-- OpenAPI 文档 -->
<dependency>
    <groupId>io.quarkus</groupId>
    <artifactId>quarkus-smallrye-openapi</artifactId>
</dependency>

7.2 指标收集与监控

7.2.1 Micrometer 配置

# Micrometer 配置
quarkus.micrometer.enabled=true
quarkus.micrometer.registry-enabled-default=true
quarkus.micrometer.binder-enabled-default=true

# Prometheus 配置
quarkus.micrometer.export.prometheus.enabled=true
quarkus.micrometer.export.prometheus.path=/q/metrics
quarkus.micrometer.export.prometheus.default-registry=true

# JVM 指标
quarkus.micrometer.binder.jvm=true
quarkus.micrometer.binder.system=true
quarkus.micrometer.binder.http-server=true
quarkus.micrometer.binder.http-client=true

# 数据库指标
quarkus.micrometer.binder.jdbc=true
quarkus.micrometer.binder.hibernate=true

# 自定义指标标签
quarkus.micrometer.export.prometheus.default-labels.application=my-quarkus-app
quarkus.micrometer.export.prometheus.default-labels.environment=${ENVIRONMENT:dev}
quarkus.micrometer.export.prometheus.default-labels.version=${quarkus.application.version}

7.2.2 自定义指标服务

package com.example.monitoring.service;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import io.micrometer.core.instrument.*;
import io.micrometer.core.instrument.Timer;
import io.smallrye.mutiny.Uni;
import java.time.Duration;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.ConcurrentHashMap;
import java.util.Map;

@ApplicationScoped
public class MetricsService {
    
    @Inject
    MeterRegistry meterRegistry;
    
    // 计数器
    private final Counter requestCounter;
    private final Counter errorCounter;
    private final Counter businessEventCounter;
    
    // 计时器
    private final Timer requestTimer;
    private final Timer databaseTimer;
    private final Timer externalApiTimer;
    
    // 仪表
    private final Gauge activeUsersGauge;
    private final Gauge cacheHitRateGauge;
    
    // 分布摘要
    private final DistributionSummary requestSizeSummary;
    private final DistributionSummary responseSizeSummary;
    
    // 自定义指标存储
    private final AtomicInteger activeUsers = new AtomicInteger(0);
    private final Map<String, AtomicInteger> businessMetrics = new ConcurrentHashMap<>();
    private final Map<String, Timer.Sample> activeSamples = new ConcurrentHashMap<>();
    
    public MetricsService(MeterRegistry meterRegistry) {
        this.meterRegistry = meterRegistry;
        
        // 初始化计数器
        this.requestCounter = Counter.builder("http_requests_total")
            .description("Total number of HTTP requests")
            .tag("method", "unknown")
            .tag("status", "unknown")
            .register(meterRegistry);
            
        this.errorCounter = Counter.builder("application_errors_total")
            .description("Total number of application errors")
            .tag("type", "unknown")
            .register(meterRegistry);
            
        this.businessEventCounter = Counter.builder("business_events_total")
            .description("Total number of business events")
            .tag("event_type", "unknown")
            .register(meterRegistry);
        
        // 初始化计时器
        this.requestTimer = Timer.builder("http_request_duration")
            .description("HTTP request duration")
            .tag("method", "unknown")
            .tag("endpoint", "unknown")
            .register(meterRegistry);
            
        this.databaseTimer = Timer.builder("database_query_duration")
            .description("Database query duration")
            .tag("operation", "unknown")
            .tag("table", "unknown")
            .register(meterRegistry);
            
        this.externalApiTimer = Timer.builder("external_api_duration")
            .description("External API call duration")
            .tag("service", "unknown")
            .tag("operation", "unknown")
            .register(meterRegistry);
        
        // 初始化仪表
        this.activeUsersGauge = Gauge.builder("active_users")
            .description("Number of active users")
            .register(meterRegistry, this, MetricsService::getActiveUsersCount);
            
        this.cacheHitRateGauge = Gauge.builder("cache_hit_rate")
            .description("Cache hit rate")
            .register(meterRegistry, this, MetricsService::getCacheHitRate);
        
        // 初始化分布摘要
        this.requestSizeSummary = DistributionSummary.builder("http_request_size_bytes")
            .description("HTTP request size in bytes")
            .baseUnit("bytes")
            .register(meterRegistry);
            
        this.responseSizeSummary = DistributionSummary.builder("http_response_size_bytes")
            .description("HTTP response size in bytes")
            .baseUnit("bytes")
            .register(meterRegistry);
    }
    
    // HTTP 请求指标
    public void recordHttpRequest(String method, String endpoint, int statusCode, Duration duration) {
        requestCounter.increment(
            Tags.of(
                Tag.of("method", method),
                Tag.of("endpoint", endpoint),
                Tag.of("status", String.valueOf(statusCode))
            )
        );
        
        requestTimer.record(duration,
            Tags.of(
                Tag.of("method", method),
                Tag.of("endpoint", endpoint)
            )
        );
    }
    
    // 错误指标
    public void recordError(String errorType, String errorMessage) {
        errorCounter.increment(
            Tags.of(
                Tag.of("type", errorType),
                Tag.of("message", errorMessage)
            )
        );
    }
    
    // 业务事件指标
    public void recordBusinessEvent(String eventType, String... additionalTags) {
        Tags.Builder tagsBuilder = Tags.of("event_type", eventType);
        
        // 添加额外标签
        for (int i = 0; i < additionalTags.length; i += 2) {
            if (i + 1 < additionalTags.length) {
                tagsBuilder = tagsBuilder.and(additionalTags[i], additionalTags[i + 1]);
            }
        }
        
        businessEventCounter.increment(tagsBuilder);
    }
    
    // 数据库查询指标
    public void recordDatabaseQuery(String operation, String table, Duration duration) {
        databaseTimer.record(duration,
            Tags.of(
                Tag.of("operation", operation),
                Tag.of("table", table)
            )
        );
    }
    
    // 外部 API 调用指标
    public void recordExternalApiCall(String service, String operation, Duration duration) {
        externalApiTimer.record(duration,
            Tags.of(
                Tag.of("service", service),
                Tag.of("operation", operation)
            )
        );
    }
    
    // 请求/响应大小指标
    public void recordRequestSize(long sizeBytes) {
        requestSizeSummary.record(sizeBytes);
    }
    
    public void recordResponseSize(long sizeBytes) {
        responseSizeSummary.record(sizeBytes);
    }
    
    // 活跃用户管理
    public void incrementActiveUsers() {
        activeUsers.incrementAndGet();
    }
    
    public void decrementActiveUsers() {
        activeUsers.decrementAndGet();
    }
    
    public void setActiveUsers(int count) {
        activeUsers.set(count);
    }
    
    private double getActiveUsersCount() {
        return activeUsers.get();
    }
    
    // 缓存命中率(示例)
    private double getCacheHitRate() {
        // 这里应该从实际的缓存服务获取命中率
        return 0.85; // 示例值
    }
    
    // 自定义业务指标
    public void incrementBusinessMetric(String metricName) {
        businessMetrics.computeIfAbsent(metricName, k -> {
            AtomicInteger counter = new AtomicInteger(0);
            Gauge.builder("business_metric_" + k)
                .description("Custom business metric: " + k)
                .register(meterRegistry, counter, AtomicInteger::get);
            return counter;
        }).incrementAndGet();
    }
    
    // 计时器采样
    public String startTimer(String name) {
        String sampleId = name + "_" + System.currentTimeMillis();
        Timer.Sample sample = Timer.start(meterRegistry);
        activeSamples.put(sampleId, sample);
        return sampleId;
    }
    
    public void stopTimer(String sampleId, String timerName, String... tags) {
        Timer.Sample sample = activeSamples.remove(sampleId);
        if (sample != null) {
            Timer.Builder timerBuilder = Timer.builder(timerName);
            
            // 添加标签
            for (int i = 0; i < tags.length; i += 2) {
                if (i + 1 < tags.length) {
                    timerBuilder = timerBuilder.tag(tags[i], tags[i + 1]);
                }
            }
            
            sample.stop(timerBuilder.register(meterRegistry));
        }
    }
    
    // 批量指标记录
    public void recordBatch(MetricsBatch batch) {
        batch.getCounters().forEach((name, value) -> {
            Counter.builder(name)
                .tags(batch.getTags())
                .register(meterRegistry)
                .increment(value);
        });
        
        batch.getTimers().forEach((name, duration) -> {
            Timer.builder(name)
                .tags(batch.getTags())
                .register(meterRegistry)
                .record(duration);
        });
        
        batch.getGauges().forEach((name, value) -> {
            Gauge.builder(name)
                .tags(batch.getTags())
                .register(meterRegistry, value, Number::doubleValue);
        });
    }
    
    // 获取指标摘要
    public MetricsSummary getMetricsSummary() {
        MetricsSummary summary = new MetricsSummary();
        
        // 收集各种指标的当前值
        summary.totalRequests = requestCounter.count();
        summary.totalErrors = errorCounter.count();
        summary.activeUsers = activeUsers.get();
        summary.averageRequestDuration = requestTimer.mean(java.util.concurrent.TimeUnit.MILLISECONDS);
        summary.maxRequestDuration = requestTimer.max(java.util.concurrent.TimeUnit.MILLISECONDS);
        
        return summary;
    }
}

// 指标批量记录类
public static class MetricsBatch {
    private final Map<String, Double> counters = new ConcurrentHashMap<>();
    private final Map<String, Duration> timers = new ConcurrentHashMap<>();
    private final Map<String, Number> gauges = new ConcurrentHashMap<>();
    private final Tags tags;
    
    public MetricsBatch(Tags tags) {
        this.tags = tags;
    }
    
    public MetricsBatch addCounter(String name, double value) {
        counters.put(name, value);
        return this;
    }
    
    public MetricsBatch addTimer(String name, Duration duration) {
        timers.put(name, duration);
        return this;
    }
    
    public MetricsBatch addGauge(String name, Number value) {
        gauges.put(name, value);
        return this;
    }
    
    // Getters
    public Map<String, Double> getCounters() { return counters; }
    public Map<String, Duration> getTimers() { return timers; }
    public Map<String, Number> getGauges() { return gauges; }
    public Tags getTags() { return tags; }
}

// 指标摘要类
public static class MetricsSummary {
    public double totalRequests;
    public double totalErrors;
    public int activeUsers;
    public double averageRequestDuration;
    public double maxRequestDuration;
    
    // 构造函数、getter 和 setter
}

7.2.3 HTTP 指标拦截器

package com.example.monitoring.interceptor;

import jakarta.annotation.Priority;
import jakarta.inject.Inject;
import jakarta.ws.rs.container.*;
import jakarta.ws.rs.core.Response;
import jakarta.ws.rs.ext.Provider;
import com.example.monitoring.service.MetricsService;
import java.time.Duration;
import java.time.Instant;

@Provider
@Priority(1000)
public class MetricsInterceptor implements ContainerRequestFilter, ContainerResponseFilter {
    
    private static final String START_TIME_PROPERTY = "metrics.start.time";
    private static final String REQUEST_SIZE_PROPERTY = "metrics.request.size";
    
    @Inject
    MetricsService metricsService;
    
    @Override
    public void filter(ContainerRequestContext requestContext) {
        // 记录请求开始时间
        requestContext.setProperty(START_TIME_PROPERTY, Instant.now());
        
        // 记录请求大小
        if (requestContext.hasEntity()) {
            // 这里需要实际测量请求体大小
            // 简化示例,实际实现需要更复杂的逻辑
            long requestSize = estimateRequestSize(requestContext);
            requestContext.setProperty(REQUEST_SIZE_PROPERTY, requestSize);
            metricsService.recordRequestSize(requestSize);
        }
    }
    
    @Override
    public void filter(ContainerRequestContext requestContext, ContainerResponseContext responseContext) {
        Instant startTime = (Instant) requestContext.getProperty(START_TIME_PROPERTY);
        if (startTime != null) {
            Duration duration = Duration.between(startTime, Instant.now());
            
            String method = requestContext.getMethod();
            String endpoint = getEndpoint(requestContext);
            int statusCode = responseContext.getStatus();
            
            // 记录 HTTP 请求指标
            metricsService.recordHttpRequest(method, endpoint, statusCode, duration);
            
            // 记录错误
            if (statusCode >= 400) {
                String errorType = statusCode >= 500 ? "server_error" : "client_error";
                metricsService.recordError(errorType, "HTTP " + statusCode);
            }
        }
        
        // 记录响应大小
        if (responseContext.hasEntity()) {
            long responseSize = estimateResponseSize(responseContext);
            metricsService.recordResponseSize(responseSize);
        }
    }
    
    private String getEndpoint(ContainerRequestContext requestContext) {
        String path = requestContext.getUriInfo().getPath();
        // 简化路径,移除路径参数
        return simplifyPath(path);
    }
    
    private String simplifyPath(String path) {
        // 将路径参数替换为占位符,例如 /users/123 -> /users/{id}
        return path.replaceAll("/\\d+", "/{id}")
                  .replaceAll("/[a-f0-9-]{36}", "/{uuid}");
    }
    
    private long estimateRequestSize(ContainerRequestContext requestContext) {
        // 简化实现,实际应该测量实际的请求体大小
        return 1024; // 示例值
    }
    
    private long estimateResponseSize(ContainerResponseContext responseContext) {
        // 简化实现,实际应该测量实际的响应体大小
        return 2048; // 示例值
    }
}

7.3 健康检查

7.3.1 健康检查配置

# 健康检查配置
quarkus.smallrye-health.root-path=/q/health
quarkus.smallrye-health.liveness-path=/q/health/live
quarkus.smallrye-health.readiness-path=/q/health/ready
quarkus.smallrye-health.startup-path=/q/health/started

# 健康检查响应缓存
quarkus.smallrye-health.check."database".enabled=true
quarkus.smallrye-health.check."redis".enabled=true

# 额外健康检查信息
quarkus.smallrye-health.additional.property."version"=${quarkus.application.version}
quarkus.smallrye-health.additional.property."build-time"=${quarkus.application.build-time}

7.3.2 自定义健康检查

package com.example.monitoring.health;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import org.eclipse.microprofile.health.*;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import io.smallrye.mutiny.Uni;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;

@ApplicationScoped
public class CustomHealthChecks {
    
    @Inject
    DatabaseHealthService databaseHealthService;
    
    @Inject
    RedisHealthService redisHealthService;
    
    @Inject
    ExternalServiceHealthService externalServiceHealthService;
    
    @ConfigProperty(name = "app.health.database.timeout", defaultValue = "5")
    int databaseTimeoutSeconds;
    
    @ConfigProperty(name = "app.health.redis.timeout", defaultValue = "3")
    int redisTimeoutSeconds;
    
    @ConfigProperty(name = "app.health.external.timeout", defaultValue = "10")
    int externalServiceTimeoutSeconds;
    
    // 存活检查 - 应用是否运行
    @Liveness
    @ApplicationScoped
    public static class LivenessCheck implements HealthCheck {
        
        private final LocalDateTime startTime = LocalDateTime.now();
        
        @Override
        public HealthCheckResponse call() {
            return HealthCheckResponse.named("application-liveness")
                .status(true)
                .withData("uptime", Duration.between(startTime, LocalDateTime.now()).toString())
                .withData("start-time", startTime.toString())
                .build();
        }
    }
    
    // 就绪检查 - 应用是否准备好接收请求
    @Readiness
    @ApplicationScoped
    public class ReadinessCheck implements HealthCheck {
        
        @Override
        public HealthCheckResponse call() {
            HealthCheckResponseBuilder responseBuilder = HealthCheckResponse.named("application-readiness");
            
            try {
                // 检查数据库连接
                boolean dbHealthy = checkDatabaseHealth();
                responseBuilder.withData("database", dbHealthy ? "UP" : "DOWN");
                
                // 检查缓存连接
                boolean cacheHealthy = checkCacheHealth();
                responseBuilder.withData("cache", cacheHealthy ? "UP" : "DOWN");
                
                // 检查关键外部服务
                boolean externalHealthy = checkCriticalExternalServices();
                responseBuilder.withData("external-services", externalHealthy ? "UP" : "DOWN");
                
                // 所有关键组件都健康才算就绪
                boolean overallHealthy = dbHealthy && cacheHealthy && externalHealthy;
                responseBuilder.status(overallHealthy);
                
                if (!overallHealthy) {
                    responseBuilder.withData("message", "One or more critical components are unhealthy");
                }
                
            } catch (Exception e) {
                responseBuilder.status(false)
                    .withData("error", e.getMessage());
            }
            
            return responseBuilder.build();
        }
        
        private boolean checkDatabaseHealth() {
            try {
                return databaseHealthService.isHealthy(Duration.ofSeconds(databaseTimeoutSeconds));
            } catch (Exception e) {
                return false;
            }
        }
        
        private boolean checkCacheHealth() {
            try {
                return redisHealthService.isHealthy(Duration.ofSeconds(redisTimeoutSeconds));
            } catch (Exception e) {
                return false;
            }
        }
        
        private boolean checkCriticalExternalServices() {
            try {
                return externalServiceHealthService.checkCriticalServices(Duration.ofSeconds(externalServiceTimeoutSeconds));
            } catch (Exception e) {
                return false;
            }
        }
    }
    
    // 启动检查 - 应用是否已完成启动
    @Startup
    @ApplicationScoped
    public static class StartupCheck implements HealthCheck {
        
        private volatile boolean startupCompleted = false;
        private String startupMessage = "Application is starting...";
        
        @Override
        public HealthCheckResponse call() {
            return HealthCheckResponse.named("application-startup")
                .status(startupCompleted)
                .withData("message", startupMessage)
                .withData("startup-completed", startupCompleted)
                .build();
        }
        
        public void markStartupCompleted() {
            this.startupCompleted = true;
            this.startupMessage = "Application startup completed successfully";
        }
        
        public void markStartupFailed(String reason) {
            this.startupCompleted = false;
            this.startupMessage = "Application startup failed: " + reason;
        }
    }
}

// 数据库健康检查服务
@ApplicationScoped
public class DatabaseHealthService {
    
    @Inject
    DataSource dataSource;
    
    public boolean isHealthy(Duration timeout) {
        try {
            CompletableFuture<Boolean> future = CompletableFuture.supplyAsync(() -> {
                try (var connection = dataSource.getConnection()) {
                    return connection.isValid(1);
                } catch (Exception e) {
                    return false;
                }
            });
            
            return future.get(timeout.toSeconds(), TimeUnit.SECONDS);
        } catch (Exception e) {
            return false;
        }
    }
    
    public DatabaseHealthInfo getDetailedHealth() {
        DatabaseHealthInfo info = new DatabaseHealthInfo();
        
        try (var connection = dataSource.getConnection()) {
            info.connected = true;
            info.url = connection.getMetaData().getURL();
            info.driverName = connection.getMetaData().getDriverName();
            info.driverVersion = connection.getMetaData().getDriverVersion();
            info.autoCommit = connection.getAutoCommit();
            info.readOnly = connection.isReadOnly();
            
            // 测试查询
            try (var stmt = connection.createStatement()) {
                var rs = stmt.executeQuery("SELECT 1");
                info.queryTest = rs.next();
            }
            
        } catch (Exception e) {
            info.connected = false;
            info.error = e.getMessage();
        }
        
        return info;
    }
    
    public static class DatabaseHealthInfo {
        public boolean connected;
        public String url;
        public String driverName;
        public String driverVersion;
        public boolean autoCommit;
        public boolean readOnly;
        public boolean queryTest;
        public String error;
    }
}

// Redis 健康检查服务
@ApplicationScoped
public class RedisHealthService {
    
    // 假设使用 Redis 客户端
    // @Inject
    // RedisClient redisClient;
    
    public boolean isHealthy(Duration timeout) {
        try {
            // 简化示例 - 实际应该使用真实的 Redis 客户端
            // return redisClient.ping().await().atMost(timeout);
            return true; // 示例返回值
        } catch (Exception e) {
            return false;
        }
    }
    
    public RedisHealthInfo getDetailedHealth() {
        RedisHealthInfo info = new RedisHealthInfo();
        
        try {
            // 获取 Redis 信息
            // info.connected = redisClient.ping().await().atMost(Duration.ofSeconds(5));
            // info.version = redisClient.info("server").await().atMost(Duration.ofSeconds(5));
            info.connected = true; // 示例
            info.version = "6.2.0"; // 示例
        } catch (Exception e) {
            info.connected = false;
            info.error = e.getMessage();
        }
        
        return info;
    }
    
    public static class RedisHealthInfo {
        public boolean connected;
        public String version;
        public String error;
    }
}

// 外部服务健康检查
@ApplicationScoped
public class ExternalServiceHealthService {
    
    // @Inject
    // @RestClient
    // ExternalServiceClient externalServiceClient;
    
    public boolean checkCriticalServices(Duration timeout) {
        try {
            // 检查关键外部服务
            // return externalServiceClient.healthCheck().await().atMost(timeout);
            return true; // 示例返回值
        } catch (Exception e) {
            return false;
        }
    }
    
    public ExternalServiceHealthInfo getDetailedHealth() {
        ExternalServiceHealthInfo info = new ExternalServiceHealthInfo();
        
        try {
            // 检查各个外部服务
            info.paymentService = checkPaymentService();
            info.notificationService = checkNotificationService();
            info.authService = checkAuthService();
        } catch (Exception e) {
            info.error = e.getMessage();
        }
        
        return info;
    }
    
    private boolean checkPaymentService() {
        // 实际检查支付服务
        return true;
    }
    
    private boolean checkNotificationService() {
        // 实际检查通知服务
        return true;
    }
    
    private boolean checkAuthService() {
        // 实际检查认证服务
        return true;
    }
    
    public static class ExternalServiceHealthInfo {
        public boolean paymentService;
        public boolean notificationService;
        public boolean authService;
        public String error;
    }
}

7.3.3 健康检查资源端点

package com.example.monitoring.resource;

import jakarta.inject.Inject;
import jakarta.ws.rs.*;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response;
import org.eclipse.microprofile.openapi.annotations.Operation;
import org.eclipse.microprofile.openapi.annotations.tags.Tag;
import com.example.monitoring.health.*;
import com.example.monitoring.service.MetricsService;
import java.util.HashMap;
import java.util.Map;

@Path("/health")
@Produces(MediaType.APPLICATION_JSON)
@Tag(name = "Health", description = "Application health and status operations")
public class HealthResource {
    
    @Inject
    DatabaseHealthService databaseHealthService;
    
    @Inject
    RedisHealthService redisHealthService;
    
    @Inject
    ExternalServiceHealthService externalServiceHealthService;
    
    @Inject
    MetricsService metricsService;
    
    @GET
    @Path("/detailed")
    @Operation(summary = "Detailed health check", description = "Get detailed health information for all components")
    public Response getDetailedHealth() {
        Map<String, Object> healthInfo = new HashMap<>();
        
        try {
            // 数据库健康信息
            healthInfo.put("database", databaseHealthService.getDetailedHealth());
            
            // Redis 健康信息
            healthInfo.put("redis", redisHealthService.getDetailedHealth());
            
            // 外部服务健康信息
            healthInfo.put("external-services", externalServiceHealthService.getDetailedHealth());
            
            // 应用指标摘要
            healthInfo.put("metrics", metricsService.getMetricsSummary());
            
            // 系统信息
            healthInfo.put("system", getSystemInfo());
            
            return Response.ok(healthInfo).build();
            
        } catch (Exception e) {
            Map<String, String> error = Map.of(
                "error", "Failed to retrieve health information",
                "message", e.getMessage()
            );
            return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
                .entity(error)
                .build();
        }
    }
    
    @GET
    @Path("/status")
    @Operation(summary = "Application status", description = "Get overall application status")
    public Response getApplicationStatus() {
        Map<String, Object> status = new HashMap<>();
        
        try {
            boolean dbHealthy = databaseHealthService.isHealthy(java.time.Duration.ofSeconds(5));
            boolean redisHealthy = redisHealthService.isHealthy(java.time.Duration.ofSeconds(3));
            boolean externalHealthy = externalServiceHealthService.checkCriticalServices(java.time.Duration.ofSeconds(10));
            
            status.put("database", dbHealthy ? "UP" : "DOWN");
            status.put("redis", redisHealthy ? "UP" : "DOWN");
            status.put("external-services", externalHealthy ? "UP" : "DOWN");
            
            boolean overallHealthy = dbHealthy && redisHealthy && externalHealthy;
            status.put("overall", overallHealthy ? "UP" : "DOWN");
            status.put("timestamp", java.time.Instant.now().toString());
            
            return Response.ok(status).build();
            
        } catch (Exception e) {
            Map<String, String> error = Map.of(
                "status", "ERROR",
                "message", e.getMessage(),
                "timestamp", java.time.Instant.now().toString()
            );
            return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
                .entity(error)
                .build();
        }
    }
    
    private Map<String, Object> getSystemInfo() {
        Runtime runtime = Runtime.getRuntime();
        Map<String, Object> systemInfo = new HashMap<>();
        
        systemInfo.put("java-version", System.getProperty("java.version"));
        systemInfo.put("java-vendor", System.getProperty("java.vendor"));
        systemInfo.put("os-name", System.getProperty("os.name"));
        systemInfo.put("os-version", System.getProperty("os.version"));
        systemInfo.put("processors", runtime.availableProcessors());
        systemInfo.put("max-memory", runtime.maxMemory());
        systemInfo.put("total-memory", runtime.totalMemory());
        systemInfo.put("free-memory", runtime.freeMemory());
        systemInfo.put("used-memory", runtime.totalMemory() - runtime.freeMemory());
        
        return systemInfo;
    }
}

7.4 结构化日志

7.4.1 日志配置

# 日志级别配置
quarkus.log.level=INFO
quarkus.log.category."com.example".level=DEBUG
quarkus.log.category."org.hibernate".level=WARN
quarkus.log.category."io.quarkus".level=INFO

# 控制台日志配置
quarkus.log.console.enable=true
quarkus.log.console.format=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] (%t) %s%e%n
quarkus.log.console.level=INFO
quarkus.log.console.color=true

# 文件日志配置
quarkus.log.file.enable=true
quarkus.log.file.path=logs/application.log
quarkus.log.file.level=DEBUG
quarkus.log.file.format=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] (%t) %s%e%n
quarkus.log.file.rotation.max-file-size=10M
quarkus.log.file.rotation.max-backup-index=5
quarkus.log.file.rotation.file-suffix=.yyyy-MM-dd
quarkus.log.file.rotation.rotate-on-boot=true

# JSON 日志配置
quarkus.log.console.json=true
quarkus.log.console.json.pretty-print=false
quarkus.log.console.json.record-delimiter=\n
# 自定义字段
quarkus.log.console.json.additional-field."application".value=my-quarkus-app
quarkus.log.console.json.additional-field."version".value=${quarkus.application.version}
quarkus.log.console.json.additional-field."environment".value=${ENVIRONMENT:dev}

# 异步日志配置
quarkus.log.async=true
quarkus.log.async.queue-length=512
quarkus.log.async.overflow=block

# 审计日志配置
quarkus.log.category."audit".level=INFO
quarkus.log.category."audit".handlers=audit-file
quarkus.log.handler.audit-file.enable=true
quarkus.log.handler.audit-file.path=logs/audit.log
quarkus.log.handler.audit-file.format=%d{yyyy-MM-dd HH:mm:ss,SSS} %s%e%n

7.4.2 结构化日志服务

package com.example.monitoring.logging;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.time.Instant;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;

@ApplicationScoped
public class StructuredLoggingService {
    
    private static final Logger logger = LoggerFactory.getLogger(StructuredLoggingService.class);
    private static final Logger auditLogger = LoggerFactory.getLogger("audit");
    private static final Logger securityLogger = LoggerFactory.getLogger("security");
    private static final Logger performanceLogger = LoggerFactory.getLogger("performance");
    
    @Inject
    ObjectMapper objectMapper;
    
    // 请求上下文常量
    public static final String REQUEST_ID = "requestId";
    public static final String USER_ID = "userId";
    public static final String SESSION_ID = "sessionId";
    public static final String TRACE_ID = "traceId";
    public static final String SPAN_ID = "spanId";
    
    // 业务日志
    public void logBusinessEvent(String event, String description, Map<String, Object> data) {
        try {
            Map<String, Object> logData = new HashMap<>();
            logData.put("event", event);
            logData.put("description", description);
            logData.put("timestamp", Instant.now().toString());
            logData.put("type", "business");
            
            if (data != null) {
                logData.putAll(data);
            }
            
            // 添加上下文信息
            addContextInfo(logData);
            
            logger.info("Business Event: {}", objectMapper.writeValueAsString(logData));
            
        } catch (Exception e) {
            logger.error("Failed to log business event", e);
        }
    }
    
    // 审计日志
    public void logAuditEvent(String action, String resource, String userId, AuditResult result, Map<String, Object> details) {
        try {
            Map<String, Object> auditData = new HashMap<>();
            auditData.put("action", action);
            auditData.put("resource", resource);
            auditData.put("userId", userId);
            auditData.put("result", result.name());
            auditData.put("timestamp", Instant.now().toString());
            auditData.put("type", "audit");
            
            if (details != null) {
                auditData.putAll(details);
            }
            
            // 添加上下文信息
            addContextInfo(auditData);
            
            auditLogger.info(objectMapper.writeValueAsString(auditData));
            
        } catch (Exception e) {
            logger.error("Failed to log audit event", e);
        }
    }
    
    // 安全日志
    public void logSecurityEvent(SecurityEventType eventType, String description, String userId, String ipAddress, Map<String, Object> details) {
        try {
            Map<String, Object> securityData = new HashMap<>();
            securityData.put("eventType", eventType.name());
            securityData.put("description", description);
            securityData.put("userId", userId);
            securityData.put("ipAddress", ipAddress);
            securityData.put("timestamp", Instant.now().toString());
            securityData.put("type", "security");
            
            if (details != null) {
                securityData.putAll(details);
            }
            
            // 添加上下文信息
            addContextInfo(securityData);
            
            securityLogger.warn(objectMapper.writeValueAsString(securityData));
            
        } catch (Exception e) {
            logger.error("Failed to log security event", e);
        }
    }
    
    // 性能日志
    public void logPerformanceEvent(String operation, long durationMs, Map<String, Object> metrics) {
        try {
            Map<String, Object> performanceData = new HashMap<>();
            performanceData.put("operation", operation);
            performanceData.put("durationMs", durationMs);
            performanceData.put("timestamp", Instant.now().toString());
            performanceData.put("type", "performance");
            
            if (metrics != null) {
                performanceData.putAll(metrics);
            }
            
            // 添加上下文信息
            addContextInfo(performanceData);
            
            performanceLogger.info(objectMapper.writeValueAsString(performanceData));
            
        } catch (Exception e) {
            logger.error("Failed to log performance event", e);
        }
    }
    
    // 错误日志
    public void logError(String operation, Throwable error, Map<String, Object> context) {
        try {
            Map<String, Object> errorData = new HashMap<>();
            errorData.put("operation", operation);
            errorData.put("errorType", error.getClass().getSimpleName());
            errorData.put("errorMessage", error.getMessage());
            errorData.put("timestamp", Instant.now().toString());
            errorData.put("type", "error");
            
            if (context != null) {
                errorData.putAll(context);
            }
            
            // 添加上下文信息
            addContextInfo(errorData);
            
            logger.error("Error Event: {}", objectMapper.writeValueAsString(errorData), error);
            
        } catch (Exception e) {
            logger.error("Failed to log error event", e);
        }
    }
    
    // HTTP 请求日志
    public void logHttpRequest(String method, String path, int statusCode, long durationMs, String userAgent, String clientIp) {
        try {
            Map<String, Object> requestData = new HashMap<>();
            requestData.put("method", method);
            requestData.put("path", path);
            requestData.put("statusCode", statusCode);
            requestData.put("durationMs", durationMs);
            requestData.put("userAgent", userAgent);
            requestData.put("clientIp", clientIp);
            requestData.put("timestamp", Instant.now().toString());
            requestData.put("type", "http-request");
            
            // 添加上下文信息
            addContextInfo(requestData);
            
            logger.info("HTTP Request: {}", objectMapper.writeValueAsString(requestData));
            
        } catch (Exception e) {
            logger.error("Failed to log HTTP request", e);
        }
    }
    
    // 数据库操作日志
    public void logDatabaseOperation(String operation, String table, long durationMs, int recordCount, boolean success) {
        try {
            Map<String, Object> dbData = new HashMap<>();
            dbData.put("operation", operation);
            dbData.put("table", table);
            dbData.put("durationMs", durationMs);
            dbData.put("recordCount", recordCount);
            dbData.put("success", success);
            dbData.put("timestamp", Instant.now().toString());
            dbData.put("type", "database");
            
            // 添加上下文信息
            addContextInfo(dbData);
            
            logger.debug("Database Operation: {}", objectMapper.writeValueAsString(dbData));
            
        } catch (Exception e) {
            logger.error("Failed to log database operation", e);
        }
    }
    
    // 添加上下文信息
    private void addContextInfo(Map<String, Object> logData) {
        // 从 MDC 获取上下文信息
        String requestId = MDC.get(REQUEST_ID);
        String userId = MDC.get(USER_ID);
        String sessionId = MDC.get(SESSION_ID);
        String traceId = MDC.get(TRACE_ID);
        String spanId = MDC.get(SPAN_ID);
        
        if (requestId != null) logData.put("requestId", requestId);
        if (userId != null) logData.put("userId", userId);
        if (sessionId != null) logData.put("sessionId", sessionId);
        if (traceId != null) logData.put("traceId", traceId);
        if (spanId != null) logData.put("spanId", spanId);
        
        // 添加应用信息
        logData.put("application", "my-quarkus-app");
        logData.put("environment", System.getProperty("environment", "dev"));
        logData.put("version", System.getProperty("quarkus.application.version", "unknown"));
    }
    
    // 设置请求上下文
    public void setRequestContext(String requestId, String userId, String sessionId) {
        MDC.put(REQUEST_ID, requestId != null ? requestId : UUID.randomUUID().toString());
        if (userId != null) MDC.put(USER_ID, userId);
        if (sessionId != null) MDC.put(SESSION_ID, sessionId);
    }
    
    // 设置追踪上下文
    public void setTraceContext(String traceId, String spanId) {
        if (traceId != null) MDC.put(TRACE_ID, traceId);
        if (spanId != null) MDC.put(SPAN_ID, spanId);
    }
    
    // 清理上下文
    public void clearContext() {
        MDC.clear();
    }
    
    // 创建日志构建器
    public LogEventBuilder createLogEvent(String eventType) {
        return new LogEventBuilder(this, eventType);
    }
    
    // 日志事件构建器
    public static class LogEventBuilder {
        private final StructuredLoggingService loggingService;
        private final Map<String, Object> data = new HashMap<>();
        private final String eventType;
        
        public LogEventBuilder(StructuredLoggingService loggingService, String eventType) {
            this.loggingService = loggingService;
            this.eventType = eventType;
            this.data.put("type", eventType);
            this.data.put("timestamp", Instant.now().toString());
        }
        
        public LogEventBuilder withField(String key, Object value) {
            data.put(key, value);
            return this;
        }
        
        public LogEventBuilder withFields(Map<String, Object> fields) {
            data.putAll(fields);
            return this;
        }
        
        public LogEventBuilder withUserId(String userId) {
            data.put("userId", userId);
            return this;
        }
        
        public LogEventBuilder withOperation(String operation) {
            data.put("operation", operation);
            return this;
        }
        
        public LogEventBuilder withDuration(long durationMs) {
            data.put("durationMs", durationMs);
            return this;
        }
        
        public LogEventBuilder withError(Throwable error) {
            data.put("errorType", error.getClass().getSimpleName());
            data.put("errorMessage", error.getMessage());
            return this;
        }
        
        public void log() {
            try {
                loggingService.addContextInfo(data);
                logger.info("Custom Event: {}", loggingService.objectMapper.writeValueAsString(data));
            } catch (Exception e) {
                logger.error("Failed to log custom event", e);
            }
        }
        
        public void logAsError() {
            try {
                loggingService.addContextInfo(data);
                logger.error("Custom Error Event: {}", loggingService.objectMapper.writeValueAsString(data));
            } catch (Exception e) {
                logger.error("Failed to log custom error event", e);
            }
        }
        
        public void logAsWarning() {
            try {
                loggingService.addContextInfo(data);
                logger.warn("Custom Warning Event: {}", loggingService.objectMapper.writeValueAsString(data));
            } catch (Exception e) {
                logger.error("Failed to log custom warning event", e);
            }
        }
    }
}

// 审计结果枚举
public enum AuditResult {
    SUCCESS,
    FAILURE,
    UNAUTHORIZED,
    FORBIDDEN,
    NOT_FOUND,
    VALIDATION_ERROR
}

// 安全事件类型枚举
public enum SecurityEventType {
    LOGIN_SUCCESS,
    LOGIN_FAILURE,
    LOGOUT,
    PASSWORD_CHANGE,
    ACCOUNT_LOCKED,
    UNAUTHORIZED_ACCESS,
    SUSPICIOUS_ACTIVITY,
    DATA_BREACH_ATTEMPT,
    PRIVILEGE_ESCALATION,
    SESSION_HIJACK_ATTEMPT
}

7.4.3 日志拦截器

package com.example.monitoring.interceptor;

import jakarta.annotation.Priority;
import jakarta.inject.Inject;
import jakarta.ws.rs.container.*;
import jakarta.ws.rs.core.Context;
import jakarta.ws.rs.core.HttpHeaders;
import jakarta.ws.rs.core.UriInfo;
import jakarta.ws.rs.ext.Provider;
import com.example.monitoring.logging.StructuredLoggingService;
import java.time.Instant;
import java.util.UUID;

@Provider
@Priority(1000)
public class LoggingInterceptor implements ContainerRequestFilter, ContainerResponseFilter {
    
    private static final String START_TIME_PROPERTY = "logging.start.time";
    private static final String REQUEST_ID_PROPERTY = "logging.request.id";
    
    @Inject
    StructuredLoggingService loggingService;
    
    @Context
    UriInfo uriInfo;
    
    @Context
    HttpHeaders httpHeaders;
    
    @Override
    public void filter(ContainerRequestContext requestContext) {
        Instant startTime = Instant.now();
        String requestId = UUID.randomUUID().toString();
        
        // 设置属性
        requestContext.setProperty(START_TIME_PROPERTY, startTime);
        requestContext.setProperty(REQUEST_ID_PROPERTY, requestId);
        
        // 设置日志上下文
        String userId = extractUserId(requestContext);
        String sessionId = extractSessionId(requestContext);
        loggingService.setRequestContext(requestId, userId, sessionId);
        
        // 记录请求开始
        loggingService.createLogEvent("http-request-start")
            .withField("method", requestContext.getMethod())
            .withField("path", requestContext.getUriInfo().getPath())
            .withField("userAgent", getUserAgent(requestContext))
            .withField("clientIp", getClientIp(requestContext))
            .withField("requestId", requestId)
            .log();
    }
    
    @Override
    public void filter(ContainerRequestContext requestContext, ContainerResponseContext responseContext) {
        try {
            Instant startTime = (Instant) requestContext.getProperty(START_TIME_PROPERTY);
            String requestId = (String) requestContext.getProperty(REQUEST_ID_PROPERTY);
            
            if (startTime != null) {
                long durationMs = java.time.Duration.between(startTime, Instant.now()).toMillis();
                
                // 记录 HTTP 请求完成
                loggingService.logHttpRequest(
                    requestContext.getMethod(),
                    requestContext.getUriInfo().getPath(),
                    responseContext.getStatus(),
                    durationMs,
                    getUserAgent(requestContext),
                    getClientIp(requestContext)
                );
                
                // 如果是错误响应,记录错误日志
                if (responseContext.getStatus() >= 400) {
                    loggingService.createLogEvent("http-error")
                        .withField("method", requestContext.getMethod())
                        .withField("path", requestContext.getUriInfo().getPath())
                        .withField("statusCode", responseContext.getStatus())
                        .withField("durationMs", durationMs)
                        .withField("requestId", requestId)
                        .logAsError();
                }
            }
        } finally {
            // 清理日志上下文
            loggingService.clearContext();
        }
    }
    
    private String extractUserId(ContainerRequestContext requestContext) {
        // 从 JWT 令牌或会话中提取用户 ID
        // 这里是简化实现
        String authHeader = requestContext.getHeaderString("Authorization");
        if (authHeader != null && authHeader.startsWith("Bearer ")) {
            // 解析 JWT 令牌获取用户 ID
            // 实际实现需要 JWT 解析逻辑
            return "user123"; // 示例值
        }
        return null;
    }
    
    private String extractSessionId(ContainerRequestContext requestContext) {
        // 从 Cookie 或 Header 中提取会话 ID
        return requestContext.getHeaderString("X-Session-ID");
    }
    
    private String getUserAgent(ContainerRequestContext requestContext) {
        return requestContext.getHeaderString("User-Agent");
    }
    
    private String getClientIp(ContainerRequestContext requestContext) {
        // 尝试从各种 Header 中获取真实 IP
        String xForwardedFor = requestContext.getHeaderString("X-Forwarded-For");
        if (xForwardedFor != null && !xForwardedFor.isEmpty()) {
            return xForwardedFor.split(",")[0].trim();
        }
        
        String xRealIp = requestContext.getHeaderString("X-Real-IP");
        if (xRealIp != null && !xRealIp.isEmpty()) {
            return xRealIp;
        }
        
        String xClientIp = requestContext.getHeaderString("X-Client-IP");
        if (xClientIp != null && !xClientIp.isEmpty()) {
            return xClientIp;
        }
        
        // 如果都没有,返回默认值
        return "unknown";
    }
}

7.5 分布式追踪

7.5.1 OpenTracing 配置

# OpenTracing 配置
quarkus.jaeger.service-name=my-quarkus-app
quarkus.jaeger.sampler-type=const
quarkus.jaeger.sampler-param=1
quarkus.jaeger.endpoint=http://localhost:14268/api/traces

# Jaeger Agent 配置
quarkus.jaeger.agent-host-port=localhost:6831
quarkus.jaeger.reporter-log-spans=true
quarkus.jaeger.reporter-max-queue-size=100

# 追踪标签
quarkus.jaeger.tags."environment"=${ENVIRONMENT:dev}
quarkus.jaeger.tags."version"=${quarkus.application.version}
quarkus.jaeger.tags."region"=${AWS_REGION:us-east-1}

# 采样配置
quarkus.jaeger.sampler-manager-host-port=localhost:5778

7.5.2 自定义追踪服务

package com.example.monitoring.tracing;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import io.opentracing.Tracer;
import io.opentracing.Span;
import io.opentracing.SpanContext;
import io.opentracing.tag.Tags;
import io.opentracing.util.GlobalTracer;
import java.util.Map;
import java.util.HashMap;
import java.util.function.Supplier;

@ApplicationScoped
public class TracingService {
    
    @Inject
    Tracer tracer;
    
    // 创建新的 Span
    public Span createSpan(String operationName) {
        return tracer.buildSpan(operationName).start();
    }
    
    // 创建子 Span
    public Span createChildSpan(String operationName, SpanContext parentContext) {
        return tracer.buildSpan(operationName)
            .asChildOf(parentContext)
            .start();
    }
    
    // 创建子 Span(从当前活跃 Span)
    public Span createChildSpan(String operationName) {
        return tracer.buildSpan(operationName)
            .asChildOf(tracer.activeSpan())
            .start();
    }
    
    // 追踪数据库操作
    public <T> T traceDatabase(String operation, String table, Supplier<T> supplier) {
        Span span = createChildSpan("db." + operation);
        try {
            // 设置数据库相关标签
            Tags.DB_TYPE.set(span, "postgresql");
            Tags.DB_STATEMENT.set(span, operation);
            span.setTag("db.table", table);
            span.setTag("component", "hibernate");
            
            return supplier.get();
        } catch (Exception e) {
            // 记录错误
            Tags.ERROR.set(span, true);
            span.setTag("error.message", e.getMessage());
            span.setTag("error.type", e.getClass().getSimpleName());
            throw e;
        } finally {
            span.finish();
        }
    }
    
    // 追踪外部 HTTP 调用
    public <T> T traceHttpCall(String method, String url, Supplier<T> supplier) {
        Span span = createChildSpan("http." + method.toLowerCase());
        try {
            // 设置 HTTP 相关标签
            Tags.HTTP_METHOD.set(span, method);
            Tags.HTTP_URL.set(span, url);
            Tags.SPAN_KIND.set(span, Tags.SPAN_KIND_CLIENT);
            span.setTag("component", "http-client");
            
            T result = supplier.get();
            
            // 如果结果包含状态码,设置它
            if (result instanceof HttpResponse) {
                HttpResponse response = (HttpResponse) result;
                Tags.HTTP_STATUS.set(span, response.getStatusCode());
                
                if (response.getStatusCode() >= 400) {
                    Tags.ERROR.set(span, true);
                }
            }
            
            return result;
        } catch (Exception e) {
            // 记录错误
            Tags.ERROR.set(span, true);
            span.setTag("error.message", e.getMessage());
            span.setTag("error.type", e.getClass().getSimpleName());
            throw e;
        } finally {
            span.finish();
        }
    }
    
    // 追踪业务操作
    public <T> T traceBusiness(String operation, Map<String, Object> tags, Supplier<T> supplier) {
        Span span = createChildSpan("business." + operation);
        try {
            // 设置业务相关标签
            span.setTag("operation.type", "business");
            span.setTag("operation.name", operation);
            
            if (tags != null) {
                tags.forEach((key, value) -> {
                    if (value != null) {
                        span.setTag(key, value.toString());
                    }
                });
            }
            
            return supplier.get();
        } catch (Exception e) {
            // 记录错误
            Tags.ERROR.set(span, true);
            span.setTag("error.message", e.getMessage());
            span.setTag("error.type", e.getClass().getSimpleName());
            throw e;
        } finally {
            span.finish();
        }
    }
    
    // 追踪缓存操作
    public <T> T traceCache(String operation, String key, Supplier<T> supplier) {
        Span span = createChildSpan("cache." + operation);
        try {
            // 设置缓存相关标签
            span.setTag("cache.operation", operation);
            span.setTag("cache.key", key);
            span.setTag("component", "redis");
            
            T result = supplier.get();
            
            // 设置缓存命中/未命中标签
            if ("get".equals(operation)) {
                span.setTag("cache.hit", result != null);
            }
            
            return result;
        } catch (Exception e) {
            // 记录错误
            Tags.ERROR.set(span, true);
            span.setTag("error.message", e.getMessage());
            span.setTag("error.type", e.getClass().getSimpleName());
            throw e;
        } finally {
            span.finish();
        }
    }
    
    // 追踪消息队列操作
    public <T> T traceMessage(String operation, String queue, String messageId, Supplier<T> supplier) {
        Span span = createChildSpan("message." + operation);
        try {
            // 设置消息队列相关标签
            span.setTag("message.operation", operation);
            span.setTag("message.queue", queue);
            span.setTag("message.id", messageId);
            span.setTag("component", "rabbitmq");
            
            if ("send".equals(operation) || "publish".equals(operation)) {
                Tags.SPAN_KIND.set(span, Tags.SPAN_KIND_PRODUCER);
            } else if ("receive".equals(operation) || "consume".equals(operation)) {
                Tags.SPAN_KIND.set(span, Tags.SPAN_KIND_CONSUMER);
            }
            
            return supplier.get();
        } catch (Exception e) {
            // 记录错误
            Tags.ERROR.set(span, true);
            span.setTag("error.message", e.getMessage());
            span.setTag("error.type", e.getClass().getSimpleName());
            throw e;
        } finally {
            span.finish();
        }
    }
    
    // 添加自定义标签到当前 Span
    public void addTag(String key, String value) {
        Span activeSpan = tracer.activeSpan();
        if (activeSpan != null) {
            activeSpan.setTag(key, value);
        }
    }
    
    // 添加自定义标签到当前 Span
    public void addTag(String key, Number value) {
        Span activeSpan = tracer.activeSpan();
        if (activeSpan != null) {
            activeSpan.setTag(key, value);
        }
    }
    
    // 添加自定义标签到当前 Span
    public void addTag(String key, Boolean value) {
        Span activeSpan = tracer.activeSpan();
        if (activeSpan != null) {
            activeSpan.setTag(key, value);
        }
    }
    
    // 记录事件到当前 Span
    public void logEvent(String event, Map<String, Object> fields) {
        Span activeSpan = tracer.activeSpan();
        if (activeSpan != null) {
            Map<String, Object> logFields = new HashMap<>();
            logFields.put("event", event);
            if (fields != null) {
                logFields.putAll(fields);
            }
            activeSpan.log(logFields);
        }
    }
    
    // 记录错误到当前 Span
    public void logError(Throwable error) {
        Span activeSpan = tracer.activeSpan();
        if (activeSpan != null) {
            Tags.ERROR.set(activeSpan, true);
            Map<String, Object> errorFields = new HashMap<>();
            errorFields.put("event", "error");
            errorFields.put("error.object", error);
            errorFields.put("error.kind", error.getClass().getSimpleName());
            errorFields.put("message", error.getMessage());
            activeSpan.log(errorFields);
        }
    }
    
    // 获取当前追踪 ID
    public String getCurrentTraceId() {
        Span activeSpan = tracer.activeSpan();
        if (activeSpan != null) {
            return activeSpan.context().toTraceId();
        }
        return null;
    }
    
    // 获取当前 Span ID
    public String getCurrentSpanId() {
        Span activeSpan = tracer.activeSpan();
        if (activeSpan != null) {
            return activeSpan.context().toSpanId();
        }
        return null;
    }
    
    // 创建追踪构建器
    public TraceBuilder createTrace(String operationName) {
        return new TraceBuilder(this, operationName);
    }
    
    // 追踪构建器
    public static class TraceBuilder {
        private final TracingService tracingService;
        private final String operationName;
        private final Map<String, Object> tags = new HashMap<>();
        private SpanContext parentContext;
        
        public TraceBuilder(TracingService tracingService, String operationName) {
            this.tracingService = tracingService;
            this.operationName = operationName;
        }
        
        public TraceBuilder withTag(String key, String value) {
            tags.put(key, value);
            return this;
        }
        
        public TraceBuilder withTag(String key, Number value) {
            tags.put(key, value);
            return this;
        }
        
        public TraceBuilder withTag(String key, Boolean value) {
            tags.put(key, value);
            return this;
        }
        
        public TraceBuilder withParent(SpanContext parentContext) {
            this.parentContext = parentContext;
            return this;
        }
        
        public <T> T execute(Supplier<T> supplier) {
            Span span = parentContext != null 
                ? tracingService.createChildSpan(operationName, parentContext)
                : tracingService.createSpan(operationName);
                
            try {
                // 设置标签
                tags.forEach((key, value) -> {
                    if (value instanceof String) {
                        span.setTag(key, (String) value);
                    } else if (value instanceof Number) {
                        span.setTag(key, (Number) value);
                    } else if (value instanceof Boolean) {
                        span.setTag(key, (Boolean) value);
                    } else {
                        span.setTag(key, value.toString());
                    }
                });
                
                return supplier.get();
            } catch (Exception e) {
                // 记录错误
                Tags.ERROR.set(span, true);
                span.setTag("error.message", e.getMessage());
                span.setTag("error.type", e.getClass().getSimpleName());
                throw e;
            } finally {
                span.finish();
            }
        }
        
        public void execute(Runnable runnable) {
            execute(() -> {
                runnable.run();
                return null;
            });
        }
    }
}

// HTTP 响应接口(示例)
interface HttpResponse {
    int getStatusCode();
}

7.6 告警与通知

7.6.1 告警配置

# 告警配置
app.alerting.enabled=true
app.alerting.email.enabled=true
app.alerting.slack.enabled=true
app.alerting.webhook.enabled=true

# 邮件告警配置
app.alerting.email.smtp.host=smtp.gmail.com
app.alerting.email.smtp.port=587
app.alerting.email.smtp.username=${SMTP_USERNAME}
app.alerting.email.smtp.password=${SMTP_PASSWORD}
app.alerting.email.from=alerts@mycompany.com
app.alerting.email.to=admin@mycompany.com,ops@mycompany.com

# Slack 告警配置
app.alerting.slack.webhook-url=${SLACK_WEBHOOK_URL}
app.alerting.slack.channel=#alerts
app.alerting.slack.username=QuarkusBot

# Webhook 告警配置
app.alerting.webhook.url=${ALERT_WEBHOOK_URL}
app.alerting.webhook.timeout=10s

# 告警阈值配置
app.alerting.thresholds.error-rate=0.05
app.alerting.thresholds.response-time=5000
app.alerting.thresholds.memory-usage=0.85
app.alerting.thresholds.cpu-usage=0.80
app.alerting.thresholds.disk-usage=0.90

# 告警抑制配置
app.alerting.suppression.duration=5m
app.alerting.suppression.max-alerts-per-hour=10

7.6.2 告警服务

package com.example.monitoring.alerting;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import io.smallrye.mutiny.Uni;
import java.time.Instant;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;

@ApplicationScoped
public class AlertingService {
    
    @Inject
    EmailAlertSender emailAlertSender;
    
    @Inject
    SlackAlertSender slackAlertSender;
    
    @Inject
    WebhookAlertSender webhookAlertSender;
    
    @ConfigProperty(name = "app.alerting.enabled", defaultValue = "true")
    boolean alertingEnabled;
    
    @ConfigProperty(name = "app.alerting.suppression.duration", defaultValue = "5m")
    Duration suppressionDuration;
    
    @ConfigProperty(name = "app.alerting.suppression.max-alerts-per-hour", defaultValue = "10")
    int maxAlertsPerHour;
    
    // 告警抑制缓存
    private final Map<String, Instant> alertSuppressionCache = new ConcurrentHashMap<>();
    private final Map<String, AtomicInteger> alertCountCache = new ConcurrentHashMap<>();
    
    // 发送告警
    public Uni<Void> sendAlert(Alert alert) {
        if (!alertingEnabled) {
            return Uni.createFrom().voidItem();
        }
        
        // 检查告警抑制
        if (isAlertSuppressed(alert)) {
            return Uni.createFrom().voidItem();
        }
        
        // 记录告警发送
        recordAlertSent(alert);
        
        // 并行发送到所有配置的通道
        List<Uni<Void>> sendTasks = new ArrayList<>();
        
        if (alert.getSeverity().ordinal() >= AlertSeverity.WARNING.ordinal()) {
            sendTasks.add(emailAlertSender.sendAlert(alert));
        }
        
        if (alert.getSeverity().ordinal() >= AlertSeverity.CRITICAL.ordinal()) {
            sendTasks.add(slackAlertSender.sendAlert(alert));
        }
        
        sendTasks.add(webhookAlertSender.sendAlert(alert));
        
        return Uni.combine().all().unis(sendTasks)
            .discardItems();
    }
    
    // 创建并发送错误率告警
    public Uni<Void> sendErrorRateAlert(double errorRate, double threshold) {
        Alert alert = Alert.builder()
            .withTitle("High Error Rate Detected")
            .withDescription(String.format("Error rate %.2f%% exceeds threshold %.2f%%", errorRate * 100, threshold * 100))
            .withSeverity(errorRate > threshold * 2 ? AlertSeverity.CRITICAL : AlertSeverity.WARNING)
            .withSource("error-rate-monitor")
            .withTag("metric", "error_rate")
            .withTag("current_value", String.valueOf(errorRate))
            .withTag("threshold", String.valueOf(threshold))
            .build();
            
        return sendAlert(alert);
    }
    
    // 创建并发送响应时间告警
    public Uni<Void> sendResponseTimeAlert(long responseTimeMs, long thresholdMs) {
        Alert alert = Alert.builder()
            .withTitle("High Response Time Detected")
            .withDescription(String.format("Response time %dms exceeds threshold %dms", responseTimeMs, thresholdMs))
            .withSeverity(responseTimeMs > thresholdMs * 2 ? AlertSeverity.CRITICAL : AlertSeverity.WARNING)
            .withSource("response-time-monitor")
            .withTag("metric", "response_time")
            .withTag("current_value", String.valueOf(responseTimeMs))
            .withTag("threshold", String.valueOf(thresholdMs))
            .build();
            
        return sendAlert(alert);
    }
    
    // 创建并发送资源使用告警
    public Uni<Void> sendResourceUsageAlert(String resource, double usage, double threshold) {
        Alert alert = Alert.builder()
            .withTitle(String.format("High %s Usage Detected", resource))
            .withDescription(String.format("%s usage %.2f%% exceeds threshold %.2f%%", resource, usage * 100, threshold * 100))
            .withSeverity(usage > threshold * 1.2 ? AlertSeverity.CRITICAL : AlertSeverity.WARNING)
            .withSource("resource-monitor")
            .withTag("metric", resource.toLowerCase() + "_usage")
            .withTag("current_value", String.valueOf(usage))
            .withTag("threshold", String.valueOf(threshold))
            .build();
            
        return sendAlert(alert);
    }
    
    // 创建并发送健康检查告警
    public Uni<Void> sendHealthCheckAlert(String component, String status, String details) {
        AlertSeverity severity = "DOWN".equals(status) ? AlertSeverity.CRITICAL : AlertSeverity.WARNING;
        
        Alert alert = Alert.builder()
            .withTitle(String.format("%s Health Check Failed", component))
            .withDescription(String.format("%s is %s: %s", component, status, details))
            .withSeverity(severity)
            .withSource("health-check-monitor")
            .withTag("component", component)
            .withTag("status", status)
            .build();
            
        return sendAlert(alert);
    }
    
    // 创建并发送自定义告警
    public Uni<Void> sendCustomAlert(String title, String description, AlertSeverity severity, Map<String, String> tags) {
        Alert.Builder builder = Alert.builder()
            .withTitle(title)
            .withDescription(description)
            .withSeverity(severity)
            .withSource("custom");
            
        if (tags != null) {
            tags.forEach(builder::withTag);
        }
        
        return sendAlert(builder.build());
    }
    
    // 检查告警是否被抑制
    private boolean isAlertSuppressed(Alert alert) {
        String alertKey = generateAlertKey(alert);
        
        // 检查时间抑制
        Instant lastSent = alertSuppressionCache.get(alertKey);
        if (lastSent != null && Duration.between(lastSent, Instant.now()).compareTo(suppressionDuration) < 0) {
            return true;
        }
        
        // 检查频率抑制
        String hourlyKey = alertKey + "_" + (Instant.now().getEpochSecond() / 3600);
        AtomicInteger hourlyCount = alertCountCache.computeIfAbsent(hourlyKey, k -> new AtomicInteger(0));
        
        return hourlyCount.get() >= maxAlertsPerHour;
    }
    
    // 记录告警发送
    private void recordAlertSent(Alert alert) {
        String alertKey = generateAlertKey(alert);
        Instant now = Instant.now();
        
        // 更新抑制缓存
        alertSuppressionCache.put(alertKey, now);
        
        // 更新计数缓存
        String hourlyKey = alertKey + "_" + (now.getEpochSecond() / 3600);
        alertCountCache.computeIfAbsent(hourlyKey, k -> new AtomicInteger(0)).incrementAndGet();
        
        // 清理过期缓存
        cleanupExpiredCache();
    }
    
    // 生成告警键
    private String generateAlertKey(Alert alert) {
        return alert.getSource() + "_" + alert.getTitle().replaceAll("\\s+", "_").toLowerCase();
    }
    
    // 清理过期缓存
    private void cleanupExpiredCache() {
        Instant cutoff = Instant.now().minus(suppressionDuration.multipliedBy(2));
        
        alertSuppressionCache.entrySet().removeIf(entry -> entry.getValue().isBefore(cutoff));
        
        long currentHour = Instant.now().getEpochSecond() / 3600;
        alertCountCache.entrySet().removeIf(entry -> {
            String[] parts = entry.getKey().split("_");
            if (parts.length > 0) {
                try {
                    long hour = Long.parseLong(parts[parts.length - 1]);
                    return currentHour - hour > 2; // 保留最近2小时的数据
                } catch (NumberFormatException e) {
                    return true; // 无效格式,删除
                }
            }
            return true;
        });
    }
    
    // 获取告警统计
    public AlertStatistics getAlertStatistics() {
        AlertStatistics stats = new AlertStatistics();
        
        long currentHour = Instant.now().getEpochSecond() / 3600;
        
        // 计算当前小时的告警数量
        int currentHourAlerts = alertCountCache.entrySet().stream()
            .filter(entry -> {
                String[] parts = entry.getKey().split("_");
                if (parts.length > 0) {
                    try {
                        long hour = Long.parseLong(parts[parts.length - 1]);
                        return hour == currentHour;
                    } catch (NumberFormatException e) {
                        return false;
                    }
                }
                return false;
            })
            .mapToInt(entry -> entry.getValue().get())
            .sum();
            
        stats.currentHourAlerts = currentHourAlerts;
        stats.suppressedAlerts = alertSuppressionCache.size();
        stats.totalAlertTypes = alertCountCache.size();
        
        return stats;
    }
    
    // 告警统计类
    public static class AlertStatistics {
        public int currentHourAlerts;
        public int suppressedAlerts;
        public int totalAlertTypes;
    }
}

7.7 本章小结

7.7.1 核心概念回顾

本章深入探讨了 Quarkus 应用的监控、日志和健康检查体系:

  1. 应用监控:使用 Micrometer 和 Prometheus 实现全面的指标收集
  2. 健康检查:实现存活、就绪和启动检查,确保应用状态可观测
  3. 结构化日志:采用 JSON 格式和 MDC 上下文,提升日志可分析性
  4. 分布式追踪:使用 OpenTracing 和 Jaeger 实现请求链路追踪
  5. 告警通知:建立多渠道告警机制,及时响应系统异常

7.7.2 技术要点总结

  • 指标体系:计数器、计时器、仪表和分布摘要的合理使用
  • 健康检查策略:区分不同类型的健康检查,提供详细的状态信息
  • 日志最佳实践:结构化日志、上下文传递和日志级别管理
  • 追踪集成:与业务逻辑无缝集成的分布式追踪
  • 告警策略:智能抑制和多级别告警机制

7.7.3 最佳实践

  1. 监控设计:建立分层监控体系,从基础设施到业务指标
  2. 告警策略:避免告警疲劳,设置合理的阈值和抑制规则
  3. 日志管理:平衡日志详细程度和性能影响
  4. 追踪采样:在性能和可观测性之间找到平衡点
  5. 健康检查:设计快速响应的健康检查,避免级联故障

7.7.4 下一章预告

下一章将学习《测试策略与实践》,包括: - 单元测试和集成测试 - 测试容器的使用 - 性能测试和压力测试 - 测试数据管理 - 持续集成中的测试策略 “`