7.1 应用监控概述
7.1.1 监控架构图
graph TB
A[Quarkus 应用] --> B[Metrics 收集]
A --> C[Health 检查]
A --> D[分布式追踪]
A --> E[日志收集]
B --> F[Prometheus]
C --> G[Kubernetes Probes]
D --> H[Jaeger/Zipkin]
E --> I[ELK Stack]
F --> J[Grafana 仪表板]
G --> K[集群管理]
H --> L[追踪分析]
I --> M[日志分析]
subgraph "监控扩展"
N[quarkus-micrometer]
O[quarkus-smallrye-health]
P[quarkus-smallrye-opentracing]
Q[quarkus-logging-json]
end
subgraph "告警系统"
R[AlertManager]
S[PagerDuty]
T[Slack 通知]
U[邮件告警]
end
7.1.2 监控依赖配置
<!-- Micrometer 指标 -->
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-micrometer-registry-prometheus</artifactId>
</dependency>
<!-- 健康检查 -->
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-smallrye-health</artifactId>
</dependency>
<!-- 分布式追踪 -->
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-smallrye-opentracing</artifactId>
</dependency>
<!-- JSON 日志 -->
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-logging-json</artifactId>
</dependency>
<!-- 应用信息 -->
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-info</artifactId>
</dependency>
<!-- OpenAPI 文档 -->
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-smallrye-openapi</artifactId>
</dependency>
7.2 指标收集与监控
7.2.1 Micrometer 配置
# Micrometer 配置
quarkus.micrometer.enabled=true
quarkus.micrometer.registry-enabled-default=true
quarkus.micrometer.binder-enabled-default=true
# Prometheus 配置
quarkus.micrometer.export.prometheus.enabled=true
quarkus.micrometer.export.prometheus.path=/q/metrics
quarkus.micrometer.export.prometheus.default-registry=true
# JVM 指标
quarkus.micrometer.binder.jvm=true
quarkus.micrometer.binder.system=true
quarkus.micrometer.binder.http-server=true
quarkus.micrometer.binder.http-client=true
# 数据库指标
quarkus.micrometer.binder.jdbc=true
quarkus.micrometer.binder.hibernate=true
# 自定义指标标签
quarkus.micrometer.export.prometheus.default-labels.application=my-quarkus-app
quarkus.micrometer.export.prometheus.default-labels.environment=${ENVIRONMENT:dev}
quarkus.micrometer.export.prometheus.default-labels.version=${quarkus.application.version}
7.2.2 自定义指标服务
package com.example.monitoring.service;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import io.micrometer.core.instrument.*;
import io.micrometer.core.instrument.Timer;
import io.smallrye.mutiny.Uni;
import java.time.Duration;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.ConcurrentHashMap;
import java.util.Map;
@ApplicationScoped
public class MetricsService {
@Inject
MeterRegistry meterRegistry;
// 计数器
private final Counter requestCounter;
private final Counter errorCounter;
private final Counter businessEventCounter;
// 计时器
private final Timer requestTimer;
private final Timer databaseTimer;
private final Timer externalApiTimer;
// 仪表
private final Gauge activeUsersGauge;
private final Gauge cacheHitRateGauge;
// 分布摘要
private final DistributionSummary requestSizeSummary;
private final DistributionSummary responseSizeSummary;
// 自定义指标存储
private final AtomicInteger activeUsers = new AtomicInteger(0);
private final Map<String, AtomicInteger> businessMetrics = new ConcurrentHashMap<>();
private final Map<String, Timer.Sample> activeSamples = new ConcurrentHashMap<>();
public MetricsService(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
// 初始化计数器
this.requestCounter = Counter.builder("http_requests_total")
.description("Total number of HTTP requests")
.tag("method", "unknown")
.tag("status", "unknown")
.register(meterRegistry);
this.errorCounter = Counter.builder("application_errors_total")
.description("Total number of application errors")
.tag("type", "unknown")
.register(meterRegistry);
this.businessEventCounter = Counter.builder("business_events_total")
.description("Total number of business events")
.tag("event_type", "unknown")
.register(meterRegistry);
// 初始化计时器
this.requestTimer = Timer.builder("http_request_duration")
.description("HTTP request duration")
.tag("method", "unknown")
.tag("endpoint", "unknown")
.register(meterRegistry);
this.databaseTimer = Timer.builder("database_query_duration")
.description("Database query duration")
.tag("operation", "unknown")
.tag("table", "unknown")
.register(meterRegistry);
this.externalApiTimer = Timer.builder("external_api_duration")
.description("External API call duration")
.tag("service", "unknown")
.tag("operation", "unknown")
.register(meterRegistry);
// 初始化仪表
this.activeUsersGauge = Gauge.builder("active_users")
.description("Number of active users")
.register(meterRegistry, this, MetricsService::getActiveUsersCount);
this.cacheHitRateGauge = Gauge.builder("cache_hit_rate")
.description("Cache hit rate")
.register(meterRegistry, this, MetricsService::getCacheHitRate);
// 初始化分布摘要
this.requestSizeSummary = DistributionSummary.builder("http_request_size_bytes")
.description("HTTP request size in bytes")
.baseUnit("bytes")
.register(meterRegistry);
this.responseSizeSummary = DistributionSummary.builder("http_response_size_bytes")
.description("HTTP response size in bytes")
.baseUnit("bytes")
.register(meterRegistry);
}
// HTTP 请求指标
public void recordHttpRequest(String method, String endpoint, int statusCode, Duration duration) {
requestCounter.increment(
Tags.of(
Tag.of("method", method),
Tag.of("endpoint", endpoint),
Tag.of("status", String.valueOf(statusCode))
)
);
requestTimer.record(duration,
Tags.of(
Tag.of("method", method),
Tag.of("endpoint", endpoint)
)
);
}
// 错误指标
public void recordError(String errorType, String errorMessage) {
errorCounter.increment(
Tags.of(
Tag.of("type", errorType),
Tag.of("message", errorMessage)
)
);
}
// 业务事件指标
public void recordBusinessEvent(String eventType, String... additionalTags) {
Tags.Builder tagsBuilder = Tags.of("event_type", eventType);
// 添加额外标签
for (int i = 0; i < additionalTags.length; i += 2) {
if (i + 1 < additionalTags.length) {
tagsBuilder = tagsBuilder.and(additionalTags[i], additionalTags[i + 1]);
}
}
businessEventCounter.increment(tagsBuilder);
}
// 数据库查询指标
public void recordDatabaseQuery(String operation, String table, Duration duration) {
databaseTimer.record(duration,
Tags.of(
Tag.of("operation", operation),
Tag.of("table", table)
)
);
}
// 外部 API 调用指标
public void recordExternalApiCall(String service, String operation, Duration duration) {
externalApiTimer.record(duration,
Tags.of(
Tag.of("service", service),
Tag.of("operation", operation)
)
);
}
// 请求/响应大小指标
public void recordRequestSize(long sizeBytes) {
requestSizeSummary.record(sizeBytes);
}
public void recordResponseSize(long sizeBytes) {
responseSizeSummary.record(sizeBytes);
}
// 活跃用户管理
public void incrementActiveUsers() {
activeUsers.incrementAndGet();
}
public void decrementActiveUsers() {
activeUsers.decrementAndGet();
}
public void setActiveUsers(int count) {
activeUsers.set(count);
}
private double getActiveUsersCount() {
return activeUsers.get();
}
// 缓存命中率(示例)
private double getCacheHitRate() {
// 这里应该从实际的缓存服务获取命中率
return 0.85; // 示例值
}
// 自定义业务指标
public void incrementBusinessMetric(String metricName) {
businessMetrics.computeIfAbsent(metricName, k -> {
AtomicInteger counter = new AtomicInteger(0);
Gauge.builder("business_metric_" + k)
.description("Custom business metric: " + k)
.register(meterRegistry, counter, AtomicInteger::get);
return counter;
}).incrementAndGet();
}
// 计时器采样
public String startTimer(String name) {
String sampleId = name + "_" + System.currentTimeMillis();
Timer.Sample sample = Timer.start(meterRegistry);
activeSamples.put(sampleId, sample);
return sampleId;
}
public void stopTimer(String sampleId, String timerName, String... tags) {
Timer.Sample sample = activeSamples.remove(sampleId);
if (sample != null) {
Timer.Builder timerBuilder = Timer.builder(timerName);
// 添加标签
for (int i = 0; i < tags.length; i += 2) {
if (i + 1 < tags.length) {
timerBuilder = timerBuilder.tag(tags[i], tags[i + 1]);
}
}
sample.stop(timerBuilder.register(meterRegistry));
}
}
// 批量指标记录
public void recordBatch(MetricsBatch batch) {
batch.getCounters().forEach((name, value) -> {
Counter.builder(name)
.tags(batch.getTags())
.register(meterRegistry)
.increment(value);
});
batch.getTimers().forEach((name, duration) -> {
Timer.builder(name)
.tags(batch.getTags())
.register(meterRegistry)
.record(duration);
});
batch.getGauges().forEach((name, value) -> {
Gauge.builder(name)
.tags(batch.getTags())
.register(meterRegistry, value, Number::doubleValue);
});
}
// 获取指标摘要
public MetricsSummary getMetricsSummary() {
MetricsSummary summary = new MetricsSummary();
// 收集各种指标的当前值
summary.totalRequests = requestCounter.count();
summary.totalErrors = errorCounter.count();
summary.activeUsers = activeUsers.get();
summary.averageRequestDuration = requestTimer.mean(java.util.concurrent.TimeUnit.MILLISECONDS);
summary.maxRequestDuration = requestTimer.max(java.util.concurrent.TimeUnit.MILLISECONDS);
return summary;
}
}
// 指标批量记录类
public static class MetricsBatch {
private final Map<String, Double> counters = new ConcurrentHashMap<>();
private final Map<String, Duration> timers = new ConcurrentHashMap<>();
private final Map<String, Number> gauges = new ConcurrentHashMap<>();
private final Tags tags;
public MetricsBatch(Tags tags) {
this.tags = tags;
}
public MetricsBatch addCounter(String name, double value) {
counters.put(name, value);
return this;
}
public MetricsBatch addTimer(String name, Duration duration) {
timers.put(name, duration);
return this;
}
public MetricsBatch addGauge(String name, Number value) {
gauges.put(name, value);
return this;
}
// Getters
public Map<String, Double> getCounters() { return counters; }
public Map<String, Duration> getTimers() { return timers; }
public Map<String, Number> getGauges() { return gauges; }
public Tags getTags() { return tags; }
}
// 指标摘要类
public static class MetricsSummary {
public double totalRequests;
public double totalErrors;
public int activeUsers;
public double averageRequestDuration;
public double maxRequestDuration;
// 构造函数、getter 和 setter
}
7.2.3 HTTP 指标拦截器
package com.example.monitoring.interceptor;
import jakarta.annotation.Priority;
import jakarta.inject.Inject;
import jakarta.ws.rs.container.*;
import jakarta.ws.rs.core.Response;
import jakarta.ws.rs.ext.Provider;
import com.example.monitoring.service.MetricsService;
import java.time.Duration;
import java.time.Instant;
@Provider
@Priority(1000)
public class MetricsInterceptor implements ContainerRequestFilter, ContainerResponseFilter {
private static final String START_TIME_PROPERTY = "metrics.start.time";
private static final String REQUEST_SIZE_PROPERTY = "metrics.request.size";
@Inject
MetricsService metricsService;
@Override
public void filter(ContainerRequestContext requestContext) {
// 记录请求开始时间
requestContext.setProperty(START_TIME_PROPERTY, Instant.now());
// 记录请求大小
if (requestContext.hasEntity()) {
// 这里需要实际测量请求体大小
// 简化示例,实际实现需要更复杂的逻辑
long requestSize = estimateRequestSize(requestContext);
requestContext.setProperty(REQUEST_SIZE_PROPERTY, requestSize);
metricsService.recordRequestSize(requestSize);
}
}
@Override
public void filter(ContainerRequestContext requestContext, ContainerResponseContext responseContext) {
Instant startTime = (Instant) requestContext.getProperty(START_TIME_PROPERTY);
if (startTime != null) {
Duration duration = Duration.between(startTime, Instant.now());
String method = requestContext.getMethod();
String endpoint = getEndpoint(requestContext);
int statusCode = responseContext.getStatus();
// 记录 HTTP 请求指标
metricsService.recordHttpRequest(method, endpoint, statusCode, duration);
// 记录错误
if (statusCode >= 400) {
String errorType = statusCode >= 500 ? "server_error" : "client_error";
metricsService.recordError(errorType, "HTTP " + statusCode);
}
}
// 记录响应大小
if (responseContext.hasEntity()) {
long responseSize = estimateResponseSize(responseContext);
metricsService.recordResponseSize(responseSize);
}
}
private String getEndpoint(ContainerRequestContext requestContext) {
String path = requestContext.getUriInfo().getPath();
// 简化路径,移除路径参数
return simplifyPath(path);
}
private String simplifyPath(String path) {
// 将路径参数替换为占位符,例如 /users/123 -> /users/{id}
return path.replaceAll("/\\d+", "/{id}")
.replaceAll("/[a-f0-9-]{36}", "/{uuid}");
}
private long estimateRequestSize(ContainerRequestContext requestContext) {
// 简化实现,实际应该测量实际的请求体大小
return 1024; // 示例值
}
private long estimateResponseSize(ContainerResponseContext responseContext) {
// 简化实现,实际应该测量实际的响应体大小
return 2048; // 示例值
}
}
7.3 健康检查
7.3.1 健康检查配置
# 健康检查配置
quarkus.smallrye-health.root-path=/q/health
quarkus.smallrye-health.liveness-path=/q/health/live
quarkus.smallrye-health.readiness-path=/q/health/ready
quarkus.smallrye-health.startup-path=/q/health/started
# 健康检查响应缓存
quarkus.smallrye-health.check."database".enabled=true
quarkus.smallrye-health.check."redis".enabled=true
# 额外健康检查信息
quarkus.smallrye-health.additional.property."version"=${quarkus.application.version}
quarkus.smallrye-health.additional.property."build-time"=${quarkus.application.build-time}
7.3.2 自定义健康检查
package com.example.monitoring.health;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import org.eclipse.microprofile.health.*;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import io.smallrye.mutiny.Uni;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
@ApplicationScoped
public class CustomHealthChecks {
@Inject
DatabaseHealthService databaseHealthService;
@Inject
RedisHealthService redisHealthService;
@Inject
ExternalServiceHealthService externalServiceHealthService;
@ConfigProperty(name = "app.health.database.timeout", defaultValue = "5")
int databaseTimeoutSeconds;
@ConfigProperty(name = "app.health.redis.timeout", defaultValue = "3")
int redisTimeoutSeconds;
@ConfigProperty(name = "app.health.external.timeout", defaultValue = "10")
int externalServiceTimeoutSeconds;
// 存活检查 - 应用是否运行
@Liveness
@ApplicationScoped
public static class LivenessCheck implements HealthCheck {
private final LocalDateTime startTime = LocalDateTime.now();
@Override
public HealthCheckResponse call() {
return HealthCheckResponse.named("application-liveness")
.status(true)
.withData("uptime", Duration.between(startTime, LocalDateTime.now()).toString())
.withData("start-time", startTime.toString())
.build();
}
}
// 就绪检查 - 应用是否准备好接收请求
@Readiness
@ApplicationScoped
public class ReadinessCheck implements HealthCheck {
@Override
public HealthCheckResponse call() {
HealthCheckResponseBuilder responseBuilder = HealthCheckResponse.named("application-readiness");
try {
// 检查数据库连接
boolean dbHealthy = checkDatabaseHealth();
responseBuilder.withData("database", dbHealthy ? "UP" : "DOWN");
// 检查缓存连接
boolean cacheHealthy = checkCacheHealth();
responseBuilder.withData("cache", cacheHealthy ? "UP" : "DOWN");
// 检查关键外部服务
boolean externalHealthy = checkCriticalExternalServices();
responseBuilder.withData("external-services", externalHealthy ? "UP" : "DOWN");
// 所有关键组件都健康才算就绪
boolean overallHealthy = dbHealthy && cacheHealthy && externalHealthy;
responseBuilder.status(overallHealthy);
if (!overallHealthy) {
responseBuilder.withData("message", "One or more critical components are unhealthy");
}
} catch (Exception e) {
responseBuilder.status(false)
.withData("error", e.getMessage());
}
return responseBuilder.build();
}
private boolean checkDatabaseHealth() {
try {
return databaseHealthService.isHealthy(Duration.ofSeconds(databaseTimeoutSeconds));
} catch (Exception e) {
return false;
}
}
private boolean checkCacheHealth() {
try {
return redisHealthService.isHealthy(Duration.ofSeconds(redisTimeoutSeconds));
} catch (Exception e) {
return false;
}
}
private boolean checkCriticalExternalServices() {
try {
return externalServiceHealthService.checkCriticalServices(Duration.ofSeconds(externalServiceTimeoutSeconds));
} catch (Exception e) {
return false;
}
}
}
// 启动检查 - 应用是否已完成启动
@Startup
@ApplicationScoped
public static class StartupCheck implements HealthCheck {
private volatile boolean startupCompleted = false;
private String startupMessage = "Application is starting...";
@Override
public HealthCheckResponse call() {
return HealthCheckResponse.named("application-startup")
.status(startupCompleted)
.withData("message", startupMessage)
.withData("startup-completed", startupCompleted)
.build();
}
public void markStartupCompleted() {
this.startupCompleted = true;
this.startupMessage = "Application startup completed successfully";
}
public void markStartupFailed(String reason) {
this.startupCompleted = false;
this.startupMessage = "Application startup failed: " + reason;
}
}
}
// 数据库健康检查服务
@ApplicationScoped
public class DatabaseHealthService {
@Inject
DataSource dataSource;
public boolean isHealthy(Duration timeout) {
try {
CompletableFuture<Boolean> future = CompletableFuture.supplyAsync(() -> {
try (var connection = dataSource.getConnection()) {
return connection.isValid(1);
} catch (Exception e) {
return false;
}
});
return future.get(timeout.toSeconds(), TimeUnit.SECONDS);
} catch (Exception e) {
return false;
}
}
public DatabaseHealthInfo getDetailedHealth() {
DatabaseHealthInfo info = new DatabaseHealthInfo();
try (var connection = dataSource.getConnection()) {
info.connected = true;
info.url = connection.getMetaData().getURL();
info.driverName = connection.getMetaData().getDriverName();
info.driverVersion = connection.getMetaData().getDriverVersion();
info.autoCommit = connection.getAutoCommit();
info.readOnly = connection.isReadOnly();
// 测试查询
try (var stmt = connection.createStatement()) {
var rs = stmt.executeQuery("SELECT 1");
info.queryTest = rs.next();
}
} catch (Exception e) {
info.connected = false;
info.error = e.getMessage();
}
return info;
}
public static class DatabaseHealthInfo {
public boolean connected;
public String url;
public String driverName;
public String driverVersion;
public boolean autoCommit;
public boolean readOnly;
public boolean queryTest;
public String error;
}
}
// Redis 健康检查服务
@ApplicationScoped
public class RedisHealthService {
// 假设使用 Redis 客户端
// @Inject
// RedisClient redisClient;
public boolean isHealthy(Duration timeout) {
try {
// 简化示例 - 实际应该使用真实的 Redis 客户端
// return redisClient.ping().await().atMost(timeout);
return true; // 示例返回值
} catch (Exception e) {
return false;
}
}
public RedisHealthInfo getDetailedHealth() {
RedisHealthInfo info = new RedisHealthInfo();
try {
// 获取 Redis 信息
// info.connected = redisClient.ping().await().atMost(Duration.ofSeconds(5));
// info.version = redisClient.info("server").await().atMost(Duration.ofSeconds(5));
info.connected = true; // 示例
info.version = "6.2.0"; // 示例
} catch (Exception e) {
info.connected = false;
info.error = e.getMessage();
}
return info;
}
public static class RedisHealthInfo {
public boolean connected;
public String version;
public String error;
}
}
// 外部服务健康检查
@ApplicationScoped
public class ExternalServiceHealthService {
// @Inject
// @RestClient
// ExternalServiceClient externalServiceClient;
public boolean checkCriticalServices(Duration timeout) {
try {
// 检查关键外部服务
// return externalServiceClient.healthCheck().await().atMost(timeout);
return true; // 示例返回值
} catch (Exception e) {
return false;
}
}
public ExternalServiceHealthInfo getDetailedHealth() {
ExternalServiceHealthInfo info = new ExternalServiceHealthInfo();
try {
// 检查各个外部服务
info.paymentService = checkPaymentService();
info.notificationService = checkNotificationService();
info.authService = checkAuthService();
} catch (Exception e) {
info.error = e.getMessage();
}
return info;
}
private boolean checkPaymentService() {
// 实际检查支付服务
return true;
}
private boolean checkNotificationService() {
// 实际检查通知服务
return true;
}
private boolean checkAuthService() {
// 实际检查认证服务
return true;
}
public static class ExternalServiceHealthInfo {
public boolean paymentService;
public boolean notificationService;
public boolean authService;
public String error;
}
}
7.3.3 健康检查资源端点
package com.example.monitoring.resource;
import jakarta.inject.Inject;
import jakarta.ws.rs.*;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response;
import org.eclipse.microprofile.openapi.annotations.Operation;
import org.eclipse.microprofile.openapi.annotations.tags.Tag;
import com.example.monitoring.health.*;
import com.example.monitoring.service.MetricsService;
import java.util.HashMap;
import java.util.Map;
@Path("/health")
@Produces(MediaType.APPLICATION_JSON)
@Tag(name = "Health", description = "Application health and status operations")
public class HealthResource {
@Inject
DatabaseHealthService databaseHealthService;
@Inject
RedisHealthService redisHealthService;
@Inject
ExternalServiceHealthService externalServiceHealthService;
@Inject
MetricsService metricsService;
@GET
@Path("/detailed")
@Operation(summary = "Detailed health check", description = "Get detailed health information for all components")
public Response getDetailedHealth() {
Map<String, Object> healthInfo = new HashMap<>();
try {
// 数据库健康信息
healthInfo.put("database", databaseHealthService.getDetailedHealth());
// Redis 健康信息
healthInfo.put("redis", redisHealthService.getDetailedHealth());
// 外部服务健康信息
healthInfo.put("external-services", externalServiceHealthService.getDetailedHealth());
// 应用指标摘要
healthInfo.put("metrics", metricsService.getMetricsSummary());
// 系统信息
healthInfo.put("system", getSystemInfo());
return Response.ok(healthInfo).build();
} catch (Exception e) {
Map<String, String> error = Map.of(
"error", "Failed to retrieve health information",
"message", e.getMessage()
);
return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
.entity(error)
.build();
}
}
@GET
@Path("/status")
@Operation(summary = "Application status", description = "Get overall application status")
public Response getApplicationStatus() {
Map<String, Object> status = new HashMap<>();
try {
boolean dbHealthy = databaseHealthService.isHealthy(java.time.Duration.ofSeconds(5));
boolean redisHealthy = redisHealthService.isHealthy(java.time.Duration.ofSeconds(3));
boolean externalHealthy = externalServiceHealthService.checkCriticalServices(java.time.Duration.ofSeconds(10));
status.put("database", dbHealthy ? "UP" : "DOWN");
status.put("redis", redisHealthy ? "UP" : "DOWN");
status.put("external-services", externalHealthy ? "UP" : "DOWN");
boolean overallHealthy = dbHealthy && redisHealthy && externalHealthy;
status.put("overall", overallHealthy ? "UP" : "DOWN");
status.put("timestamp", java.time.Instant.now().toString());
return Response.ok(status).build();
} catch (Exception e) {
Map<String, String> error = Map.of(
"status", "ERROR",
"message", e.getMessage(),
"timestamp", java.time.Instant.now().toString()
);
return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
.entity(error)
.build();
}
}
private Map<String, Object> getSystemInfo() {
Runtime runtime = Runtime.getRuntime();
Map<String, Object> systemInfo = new HashMap<>();
systemInfo.put("java-version", System.getProperty("java.version"));
systemInfo.put("java-vendor", System.getProperty("java.vendor"));
systemInfo.put("os-name", System.getProperty("os.name"));
systemInfo.put("os-version", System.getProperty("os.version"));
systemInfo.put("processors", runtime.availableProcessors());
systemInfo.put("max-memory", runtime.maxMemory());
systemInfo.put("total-memory", runtime.totalMemory());
systemInfo.put("free-memory", runtime.freeMemory());
systemInfo.put("used-memory", runtime.totalMemory() - runtime.freeMemory());
return systemInfo;
}
}
7.4 结构化日志
7.4.1 日志配置
# 日志级别配置
quarkus.log.level=INFO
quarkus.log.category."com.example".level=DEBUG
quarkus.log.category."org.hibernate".level=WARN
quarkus.log.category."io.quarkus".level=INFO
# 控制台日志配置
quarkus.log.console.enable=true
quarkus.log.console.format=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] (%t) %s%e%n
quarkus.log.console.level=INFO
quarkus.log.console.color=true
# 文件日志配置
quarkus.log.file.enable=true
quarkus.log.file.path=logs/application.log
quarkus.log.file.level=DEBUG
quarkus.log.file.format=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] (%t) %s%e%n
quarkus.log.file.rotation.max-file-size=10M
quarkus.log.file.rotation.max-backup-index=5
quarkus.log.file.rotation.file-suffix=.yyyy-MM-dd
quarkus.log.file.rotation.rotate-on-boot=true
# JSON 日志配置
quarkus.log.console.json=true
quarkus.log.console.json.pretty-print=false
quarkus.log.console.json.record-delimiter=\n
# 自定义字段
quarkus.log.console.json.additional-field."application".value=my-quarkus-app
quarkus.log.console.json.additional-field."version".value=${quarkus.application.version}
quarkus.log.console.json.additional-field."environment".value=${ENVIRONMENT:dev}
# 异步日志配置
quarkus.log.async=true
quarkus.log.async.queue-length=512
quarkus.log.async.overflow=block
# 审计日志配置
quarkus.log.category."audit".level=INFO
quarkus.log.category."audit".handlers=audit-file
quarkus.log.handler.audit-file.enable=true
quarkus.log.handler.audit-file.path=logs/audit.log
quarkus.log.handler.audit-file.format=%d{yyyy-MM-dd HH:mm:ss,SSS} %s%e%n
7.4.2 结构化日志服务
package com.example.monitoring.logging;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.time.Instant;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
@ApplicationScoped
public class StructuredLoggingService {
private static final Logger logger = LoggerFactory.getLogger(StructuredLoggingService.class);
private static final Logger auditLogger = LoggerFactory.getLogger("audit");
private static final Logger securityLogger = LoggerFactory.getLogger("security");
private static final Logger performanceLogger = LoggerFactory.getLogger("performance");
@Inject
ObjectMapper objectMapper;
// 请求上下文常量
public static final String REQUEST_ID = "requestId";
public static final String USER_ID = "userId";
public static final String SESSION_ID = "sessionId";
public static final String TRACE_ID = "traceId";
public static final String SPAN_ID = "spanId";
// 业务日志
public void logBusinessEvent(String event, String description, Map<String, Object> data) {
try {
Map<String, Object> logData = new HashMap<>();
logData.put("event", event);
logData.put("description", description);
logData.put("timestamp", Instant.now().toString());
logData.put("type", "business");
if (data != null) {
logData.putAll(data);
}
// 添加上下文信息
addContextInfo(logData);
logger.info("Business Event: {}", objectMapper.writeValueAsString(logData));
} catch (Exception e) {
logger.error("Failed to log business event", e);
}
}
// 审计日志
public void logAuditEvent(String action, String resource, String userId, AuditResult result, Map<String, Object> details) {
try {
Map<String, Object> auditData = new HashMap<>();
auditData.put("action", action);
auditData.put("resource", resource);
auditData.put("userId", userId);
auditData.put("result", result.name());
auditData.put("timestamp", Instant.now().toString());
auditData.put("type", "audit");
if (details != null) {
auditData.putAll(details);
}
// 添加上下文信息
addContextInfo(auditData);
auditLogger.info(objectMapper.writeValueAsString(auditData));
} catch (Exception e) {
logger.error("Failed to log audit event", e);
}
}
// 安全日志
public void logSecurityEvent(SecurityEventType eventType, String description, String userId, String ipAddress, Map<String, Object> details) {
try {
Map<String, Object> securityData = new HashMap<>();
securityData.put("eventType", eventType.name());
securityData.put("description", description);
securityData.put("userId", userId);
securityData.put("ipAddress", ipAddress);
securityData.put("timestamp", Instant.now().toString());
securityData.put("type", "security");
if (details != null) {
securityData.putAll(details);
}
// 添加上下文信息
addContextInfo(securityData);
securityLogger.warn(objectMapper.writeValueAsString(securityData));
} catch (Exception e) {
logger.error("Failed to log security event", e);
}
}
// 性能日志
public void logPerformanceEvent(String operation, long durationMs, Map<String, Object> metrics) {
try {
Map<String, Object> performanceData = new HashMap<>();
performanceData.put("operation", operation);
performanceData.put("durationMs", durationMs);
performanceData.put("timestamp", Instant.now().toString());
performanceData.put("type", "performance");
if (metrics != null) {
performanceData.putAll(metrics);
}
// 添加上下文信息
addContextInfo(performanceData);
performanceLogger.info(objectMapper.writeValueAsString(performanceData));
} catch (Exception e) {
logger.error("Failed to log performance event", e);
}
}
// 错误日志
public void logError(String operation, Throwable error, Map<String, Object> context) {
try {
Map<String, Object> errorData = new HashMap<>();
errorData.put("operation", operation);
errorData.put("errorType", error.getClass().getSimpleName());
errorData.put("errorMessage", error.getMessage());
errorData.put("timestamp", Instant.now().toString());
errorData.put("type", "error");
if (context != null) {
errorData.putAll(context);
}
// 添加上下文信息
addContextInfo(errorData);
logger.error("Error Event: {}", objectMapper.writeValueAsString(errorData), error);
} catch (Exception e) {
logger.error("Failed to log error event", e);
}
}
// HTTP 请求日志
public void logHttpRequest(String method, String path, int statusCode, long durationMs, String userAgent, String clientIp) {
try {
Map<String, Object> requestData = new HashMap<>();
requestData.put("method", method);
requestData.put("path", path);
requestData.put("statusCode", statusCode);
requestData.put("durationMs", durationMs);
requestData.put("userAgent", userAgent);
requestData.put("clientIp", clientIp);
requestData.put("timestamp", Instant.now().toString());
requestData.put("type", "http-request");
// 添加上下文信息
addContextInfo(requestData);
logger.info("HTTP Request: {}", objectMapper.writeValueAsString(requestData));
} catch (Exception e) {
logger.error("Failed to log HTTP request", e);
}
}
// 数据库操作日志
public void logDatabaseOperation(String operation, String table, long durationMs, int recordCount, boolean success) {
try {
Map<String, Object> dbData = new HashMap<>();
dbData.put("operation", operation);
dbData.put("table", table);
dbData.put("durationMs", durationMs);
dbData.put("recordCount", recordCount);
dbData.put("success", success);
dbData.put("timestamp", Instant.now().toString());
dbData.put("type", "database");
// 添加上下文信息
addContextInfo(dbData);
logger.debug("Database Operation: {}", objectMapper.writeValueAsString(dbData));
} catch (Exception e) {
logger.error("Failed to log database operation", e);
}
}
// 添加上下文信息
private void addContextInfo(Map<String, Object> logData) {
// 从 MDC 获取上下文信息
String requestId = MDC.get(REQUEST_ID);
String userId = MDC.get(USER_ID);
String sessionId = MDC.get(SESSION_ID);
String traceId = MDC.get(TRACE_ID);
String spanId = MDC.get(SPAN_ID);
if (requestId != null) logData.put("requestId", requestId);
if (userId != null) logData.put("userId", userId);
if (sessionId != null) logData.put("sessionId", sessionId);
if (traceId != null) logData.put("traceId", traceId);
if (spanId != null) logData.put("spanId", spanId);
// 添加应用信息
logData.put("application", "my-quarkus-app");
logData.put("environment", System.getProperty("environment", "dev"));
logData.put("version", System.getProperty("quarkus.application.version", "unknown"));
}
// 设置请求上下文
public void setRequestContext(String requestId, String userId, String sessionId) {
MDC.put(REQUEST_ID, requestId != null ? requestId : UUID.randomUUID().toString());
if (userId != null) MDC.put(USER_ID, userId);
if (sessionId != null) MDC.put(SESSION_ID, sessionId);
}
// 设置追踪上下文
public void setTraceContext(String traceId, String spanId) {
if (traceId != null) MDC.put(TRACE_ID, traceId);
if (spanId != null) MDC.put(SPAN_ID, spanId);
}
// 清理上下文
public void clearContext() {
MDC.clear();
}
// 创建日志构建器
public LogEventBuilder createLogEvent(String eventType) {
return new LogEventBuilder(this, eventType);
}
// 日志事件构建器
public static class LogEventBuilder {
private final StructuredLoggingService loggingService;
private final Map<String, Object> data = new HashMap<>();
private final String eventType;
public LogEventBuilder(StructuredLoggingService loggingService, String eventType) {
this.loggingService = loggingService;
this.eventType = eventType;
this.data.put("type", eventType);
this.data.put("timestamp", Instant.now().toString());
}
public LogEventBuilder withField(String key, Object value) {
data.put(key, value);
return this;
}
public LogEventBuilder withFields(Map<String, Object> fields) {
data.putAll(fields);
return this;
}
public LogEventBuilder withUserId(String userId) {
data.put("userId", userId);
return this;
}
public LogEventBuilder withOperation(String operation) {
data.put("operation", operation);
return this;
}
public LogEventBuilder withDuration(long durationMs) {
data.put("durationMs", durationMs);
return this;
}
public LogEventBuilder withError(Throwable error) {
data.put("errorType", error.getClass().getSimpleName());
data.put("errorMessage", error.getMessage());
return this;
}
public void log() {
try {
loggingService.addContextInfo(data);
logger.info("Custom Event: {}", loggingService.objectMapper.writeValueAsString(data));
} catch (Exception e) {
logger.error("Failed to log custom event", e);
}
}
public void logAsError() {
try {
loggingService.addContextInfo(data);
logger.error("Custom Error Event: {}", loggingService.objectMapper.writeValueAsString(data));
} catch (Exception e) {
logger.error("Failed to log custom error event", e);
}
}
public void logAsWarning() {
try {
loggingService.addContextInfo(data);
logger.warn("Custom Warning Event: {}", loggingService.objectMapper.writeValueAsString(data));
} catch (Exception e) {
logger.error("Failed to log custom warning event", e);
}
}
}
}
// 审计结果枚举
public enum AuditResult {
SUCCESS,
FAILURE,
UNAUTHORIZED,
FORBIDDEN,
NOT_FOUND,
VALIDATION_ERROR
}
// 安全事件类型枚举
public enum SecurityEventType {
LOGIN_SUCCESS,
LOGIN_FAILURE,
LOGOUT,
PASSWORD_CHANGE,
ACCOUNT_LOCKED,
UNAUTHORIZED_ACCESS,
SUSPICIOUS_ACTIVITY,
DATA_BREACH_ATTEMPT,
PRIVILEGE_ESCALATION,
SESSION_HIJACK_ATTEMPT
}
7.4.3 日志拦截器
package com.example.monitoring.interceptor;
import jakarta.annotation.Priority;
import jakarta.inject.Inject;
import jakarta.ws.rs.container.*;
import jakarta.ws.rs.core.Context;
import jakarta.ws.rs.core.HttpHeaders;
import jakarta.ws.rs.core.UriInfo;
import jakarta.ws.rs.ext.Provider;
import com.example.monitoring.logging.StructuredLoggingService;
import java.time.Instant;
import java.util.UUID;
@Provider
@Priority(1000)
public class LoggingInterceptor implements ContainerRequestFilter, ContainerResponseFilter {
private static final String START_TIME_PROPERTY = "logging.start.time";
private static final String REQUEST_ID_PROPERTY = "logging.request.id";
@Inject
StructuredLoggingService loggingService;
@Context
UriInfo uriInfo;
@Context
HttpHeaders httpHeaders;
@Override
public void filter(ContainerRequestContext requestContext) {
Instant startTime = Instant.now();
String requestId = UUID.randomUUID().toString();
// 设置属性
requestContext.setProperty(START_TIME_PROPERTY, startTime);
requestContext.setProperty(REQUEST_ID_PROPERTY, requestId);
// 设置日志上下文
String userId = extractUserId(requestContext);
String sessionId = extractSessionId(requestContext);
loggingService.setRequestContext(requestId, userId, sessionId);
// 记录请求开始
loggingService.createLogEvent("http-request-start")
.withField("method", requestContext.getMethod())
.withField("path", requestContext.getUriInfo().getPath())
.withField("userAgent", getUserAgent(requestContext))
.withField("clientIp", getClientIp(requestContext))
.withField("requestId", requestId)
.log();
}
@Override
public void filter(ContainerRequestContext requestContext, ContainerResponseContext responseContext) {
try {
Instant startTime = (Instant) requestContext.getProperty(START_TIME_PROPERTY);
String requestId = (String) requestContext.getProperty(REQUEST_ID_PROPERTY);
if (startTime != null) {
long durationMs = java.time.Duration.between(startTime, Instant.now()).toMillis();
// 记录 HTTP 请求完成
loggingService.logHttpRequest(
requestContext.getMethod(),
requestContext.getUriInfo().getPath(),
responseContext.getStatus(),
durationMs,
getUserAgent(requestContext),
getClientIp(requestContext)
);
// 如果是错误响应,记录错误日志
if (responseContext.getStatus() >= 400) {
loggingService.createLogEvent("http-error")
.withField("method", requestContext.getMethod())
.withField("path", requestContext.getUriInfo().getPath())
.withField("statusCode", responseContext.getStatus())
.withField("durationMs", durationMs)
.withField("requestId", requestId)
.logAsError();
}
}
} finally {
// 清理日志上下文
loggingService.clearContext();
}
}
private String extractUserId(ContainerRequestContext requestContext) {
// 从 JWT 令牌或会话中提取用户 ID
// 这里是简化实现
String authHeader = requestContext.getHeaderString("Authorization");
if (authHeader != null && authHeader.startsWith("Bearer ")) {
// 解析 JWT 令牌获取用户 ID
// 实际实现需要 JWT 解析逻辑
return "user123"; // 示例值
}
return null;
}
private String extractSessionId(ContainerRequestContext requestContext) {
// 从 Cookie 或 Header 中提取会话 ID
return requestContext.getHeaderString("X-Session-ID");
}
private String getUserAgent(ContainerRequestContext requestContext) {
return requestContext.getHeaderString("User-Agent");
}
private String getClientIp(ContainerRequestContext requestContext) {
// 尝试从各种 Header 中获取真实 IP
String xForwardedFor = requestContext.getHeaderString("X-Forwarded-For");
if (xForwardedFor != null && !xForwardedFor.isEmpty()) {
return xForwardedFor.split(",")[0].trim();
}
String xRealIp = requestContext.getHeaderString("X-Real-IP");
if (xRealIp != null && !xRealIp.isEmpty()) {
return xRealIp;
}
String xClientIp = requestContext.getHeaderString("X-Client-IP");
if (xClientIp != null && !xClientIp.isEmpty()) {
return xClientIp;
}
// 如果都没有,返回默认值
return "unknown";
}
}
7.5 分布式追踪
7.5.1 OpenTracing 配置
# OpenTracing 配置
quarkus.jaeger.service-name=my-quarkus-app
quarkus.jaeger.sampler-type=const
quarkus.jaeger.sampler-param=1
quarkus.jaeger.endpoint=http://localhost:14268/api/traces
# Jaeger Agent 配置
quarkus.jaeger.agent-host-port=localhost:6831
quarkus.jaeger.reporter-log-spans=true
quarkus.jaeger.reporter-max-queue-size=100
# 追踪标签
quarkus.jaeger.tags."environment"=${ENVIRONMENT:dev}
quarkus.jaeger.tags."version"=${quarkus.application.version}
quarkus.jaeger.tags."region"=${AWS_REGION:us-east-1}
# 采样配置
quarkus.jaeger.sampler-manager-host-port=localhost:5778
7.5.2 自定义追踪服务
package com.example.monitoring.tracing;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import io.opentracing.Tracer;
import io.opentracing.Span;
import io.opentracing.SpanContext;
import io.opentracing.tag.Tags;
import io.opentracing.util.GlobalTracer;
import java.util.Map;
import java.util.HashMap;
import java.util.function.Supplier;
@ApplicationScoped
public class TracingService {
@Inject
Tracer tracer;
// 创建新的 Span
public Span createSpan(String operationName) {
return tracer.buildSpan(operationName).start();
}
// 创建子 Span
public Span createChildSpan(String operationName, SpanContext parentContext) {
return tracer.buildSpan(operationName)
.asChildOf(parentContext)
.start();
}
// 创建子 Span(从当前活跃 Span)
public Span createChildSpan(String operationName) {
return tracer.buildSpan(operationName)
.asChildOf(tracer.activeSpan())
.start();
}
// 追踪数据库操作
public <T> T traceDatabase(String operation, String table, Supplier<T> supplier) {
Span span = createChildSpan("db." + operation);
try {
// 设置数据库相关标签
Tags.DB_TYPE.set(span, "postgresql");
Tags.DB_STATEMENT.set(span, operation);
span.setTag("db.table", table);
span.setTag("component", "hibernate");
return supplier.get();
} catch (Exception e) {
// 记录错误
Tags.ERROR.set(span, true);
span.setTag("error.message", e.getMessage());
span.setTag("error.type", e.getClass().getSimpleName());
throw e;
} finally {
span.finish();
}
}
// 追踪外部 HTTP 调用
public <T> T traceHttpCall(String method, String url, Supplier<T> supplier) {
Span span = createChildSpan("http." + method.toLowerCase());
try {
// 设置 HTTP 相关标签
Tags.HTTP_METHOD.set(span, method);
Tags.HTTP_URL.set(span, url);
Tags.SPAN_KIND.set(span, Tags.SPAN_KIND_CLIENT);
span.setTag("component", "http-client");
T result = supplier.get();
// 如果结果包含状态码,设置它
if (result instanceof HttpResponse) {
HttpResponse response = (HttpResponse) result;
Tags.HTTP_STATUS.set(span, response.getStatusCode());
if (response.getStatusCode() >= 400) {
Tags.ERROR.set(span, true);
}
}
return result;
} catch (Exception e) {
// 记录错误
Tags.ERROR.set(span, true);
span.setTag("error.message", e.getMessage());
span.setTag("error.type", e.getClass().getSimpleName());
throw e;
} finally {
span.finish();
}
}
// 追踪业务操作
public <T> T traceBusiness(String operation, Map<String, Object> tags, Supplier<T> supplier) {
Span span = createChildSpan("business." + operation);
try {
// 设置业务相关标签
span.setTag("operation.type", "business");
span.setTag("operation.name", operation);
if (tags != null) {
tags.forEach((key, value) -> {
if (value != null) {
span.setTag(key, value.toString());
}
});
}
return supplier.get();
} catch (Exception e) {
// 记录错误
Tags.ERROR.set(span, true);
span.setTag("error.message", e.getMessage());
span.setTag("error.type", e.getClass().getSimpleName());
throw e;
} finally {
span.finish();
}
}
// 追踪缓存操作
public <T> T traceCache(String operation, String key, Supplier<T> supplier) {
Span span = createChildSpan("cache." + operation);
try {
// 设置缓存相关标签
span.setTag("cache.operation", operation);
span.setTag("cache.key", key);
span.setTag("component", "redis");
T result = supplier.get();
// 设置缓存命中/未命中标签
if ("get".equals(operation)) {
span.setTag("cache.hit", result != null);
}
return result;
} catch (Exception e) {
// 记录错误
Tags.ERROR.set(span, true);
span.setTag("error.message", e.getMessage());
span.setTag("error.type", e.getClass().getSimpleName());
throw e;
} finally {
span.finish();
}
}
// 追踪消息队列操作
public <T> T traceMessage(String operation, String queue, String messageId, Supplier<T> supplier) {
Span span = createChildSpan("message." + operation);
try {
// 设置消息队列相关标签
span.setTag("message.operation", operation);
span.setTag("message.queue", queue);
span.setTag("message.id", messageId);
span.setTag("component", "rabbitmq");
if ("send".equals(operation) || "publish".equals(operation)) {
Tags.SPAN_KIND.set(span, Tags.SPAN_KIND_PRODUCER);
} else if ("receive".equals(operation) || "consume".equals(operation)) {
Tags.SPAN_KIND.set(span, Tags.SPAN_KIND_CONSUMER);
}
return supplier.get();
} catch (Exception e) {
// 记录错误
Tags.ERROR.set(span, true);
span.setTag("error.message", e.getMessage());
span.setTag("error.type", e.getClass().getSimpleName());
throw e;
} finally {
span.finish();
}
}
// 添加自定义标签到当前 Span
public void addTag(String key, String value) {
Span activeSpan = tracer.activeSpan();
if (activeSpan != null) {
activeSpan.setTag(key, value);
}
}
// 添加自定义标签到当前 Span
public void addTag(String key, Number value) {
Span activeSpan = tracer.activeSpan();
if (activeSpan != null) {
activeSpan.setTag(key, value);
}
}
// 添加自定义标签到当前 Span
public void addTag(String key, Boolean value) {
Span activeSpan = tracer.activeSpan();
if (activeSpan != null) {
activeSpan.setTag(key, value);
}
}
// 记录事件到当前 Span
public void logEvent(String event, Map<String, Object> fields) {
Span activeSpan = tracer.activeSpan();
if (activeSpan != null) {
Map<String, Object> logFields = new HashMap<>();
logFields.put("event", event);
if (fields != null) {
logFields.putAll(fields);
}
activeSpan.log(logFields);
}
}
// 记录错误到当前 Span
public void logError(Throwable error) {
Span activeSpan = tracer.activeSpan();
if (activeSpan != null) {
Tags.ERROR.set(activeSpan, true);
Map<String, Object> errorFields = new HashMap<>();
errorFields.put("event", "error");
errorFields.put("error.object", error);
errorFields.put("error.kind", error.getClass().getSimpleName());
errorFields.put("message", error.getMessage());
activeSpan.log(errorFields);
}
}
// 获取当前追踪 ID
public String getCurrentTraceId() {
Span activeSpan = tracer.activeSpan();
if (activeSpan != null) {
return activeSpan.context().toTraceId();
}
return null;
}
// 获取当前 Span ID
public String getCurrentSpanId() {
Span activeSpan = tracer.activeSpan();
if (activeSpan != null) {
return activeSpan.context().toSpanId();
}
return null;
}
// 创建追踪构建器
public TraceBuilder createTrace(String operationName) {
return new TraceBuilder(this, operationName);
}
// 追踪构建器
public static class TraceBuilder {
private final TracingService tracingService;
private final String operationName;
private final Map<String, Object> tags = new HashMap<>();
private SpanContext parentContext;
public TraceBuilder(TracingService tracingService, String operationName) {
this.tracingService = tracingService;
this.operationName = operationName;
}
public TraceBuilder withTag(String key, String value) {
tags.put(key, value);
return this;
}
public TraceBuilder withTag(String key, Number value) {
tags.put(key, value);
return this;
}
public TraceBuilder withTag(String key, Boolean value) {
tags.put(key, value);
return this;
}
public TraceBuilder withParent(SpanContext parentContext) {
this.parentContext = parentContext;
return this;
}
public <T> T execute(Supplier<T> supplier) {
Span span = parentContext != null
? tracingService.createChildSpan(operationName, parentContext)
: tracingService.createSpan(operationName);
try {
// 设置标签
tags.forEach((key, value) -> {
if (value instanceof String) {
span.setTag(key, (String) value);
} else if (value instanceof Number) {
span.setTag(key, (Number) value);
} else if (value instanceof Boolean) {
span.setTag(key, (Boolean) value);
} else {
span.setTag(key, value.toString());
}
});
return supplier.get();
} catch (Exception e) {
// 记录错误
Tags.ERROR.set(span, true);
span.setTag("error.message", e.getMessage());
span.setTag("error.type", e.getClass().getSimpleName());
throw e;
} finally {
span.finish();
}
}
public void execute(Runnable runnable) {
execute(() -> {
runnable.run();
return null;
});
}
}
}
// HTTP 响应接口(示例)
interface HttpResponse {
int getStatusCode();
}
7.6 告警与通知
7.6.1 告警配置
# 告警配置
app.alerting.enabled=true
app.alerting.email.enabled=true
app.alerting.slack.enabled=true
app.alerting.webhook.enabled=true
# 邮件告警配置
app.alerting.email.smtp.host=smtp.gmail.com
app.alerting.email.smtp.port=587
app.alerting.email.smtp.username=${SMTP_USERNAME}
app.alerting.email.smtp.password=${SMTP_PASSWORD}
app.alerting.email.from=alerts@mycompany.com
app.alerting.email.to=admin@mycompany.com,ops@mycompany.com
# Slack 告警配置
app.alerting.slack.webhook-url=${SLACK_WEBHOOK_URL}
app.alerting.slack.channel=#alerts
app.alerting.slack.username=QuarkusBot
# Webhook 告警配置
app.alerting.webhook.url=${ALERT_WEBHOOK_URL}
app.alerting.webhook.timeout=10s
# 告警阈值配置
app.alerting.thresholds.error-rate=0.05
app.alerting.thresholds.response-time=5000
app.alerting.thresholds.memory-usage=0.85
app.alerting.thresholds.cpu-usage=0.80
app.alerting.thresholds.disk-usage=0.90
# 告警抑制配置
app.alerting.suppression.duration=5m
app.alerting.suppression.max-alerts-per-hour=10
7.6.2 告警服务
package com.example.monitoring.alerting;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import io.smallrye.mutiny.Uni;
import java.time.Instant;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
@ApplicationScoped
public class AlertingService {
@Inject
EmailAlertSender emailAlertSender;
@Inject
SlackAlertSender slackAlertSender;
@Inject
WebhookAlertSender webhookAlertSender;
@ConfigProperty(name = "app.alerting.enabled", defaultValue = "true")
boolean alertingEnabled;
@ConfigProperty(name = "app.alerting.suppression.duration", defaultValue = "5m")
Duration suppressionDuration;
@ConfigProperty(name = "app.alerting.suppression.max-alerts-per-hour", defaultValue = "10")
int maxAlertsPerHour;
// 告警抑制缓存
private final Map<String, Instant> alertSuppressionCache = new ConcurrentHashMap<>();
private final Map<String, AtomicInteger> alertCountCache = new ConcurrentHashMap<>();
// 发送告警
public Uni<Void> sendAlert(Alert alert) {
if (!alertingEnabled) {
return Uni.createFrom().voidItem();
}
// 检查告警抑制
if (isAlertSuppressed(alert)) {
return Uni.createFrom().voidItem();
}
// 记录告警发送
recordAlertSent(alert);
// 并行发送到所有配置的通道
List<Uni<Void>> sendTasks = new ArrayList<>();
if (alert.getSeverity().ordinal() >= AlertSeverity.WARNING.ordinal()) {
sendTasks.add(emailAlertSender.sendAlert(alert));
}
if (alert.getSeverity().ordinal() >= AlertSeverity.CRITICAL.ordinal()) {
sendTasks.add(slackAlertSender.sendAlert(alert));
}
sendTasks.add(webhookAlertSender.sendAlert(alert));
return Uni.combine().all().unis(sendTasks)
.discardItems();
}
// 创建并发送错误率告警
public Uni<Void> sendErrorRateAlert(double errorRate, double threshold) {
Alert alert = Alert.builder()
.withTitle("High Error Rate Detected")
.withDescription(String.format("Error rate %.2f%% exceeds threshold %.2f%%", errorRate * 100, threshold * 100))
.withSeverity(errorRate > threshold * 2 ? AlertSeverity.CRITICAL : AlertSeverity.WARNING)
.withSource("error-rate-monitor")
.withTag("metric", "error_rate")
.withTag("current_value", String.valueOf(errorRate))
.withTag("threshold", String.valueOf(threshold))
.build();
return sendAlert(alert);
}
// 创建并发送响应时间告警
public Uni<Void> sendResponseTimeAlert(long responseTimeMs, long thresholdMs) {
Alert alert = Alert.builder()
.withTitle("High Response Time Detected")
.withDescription(String.format("Response time %dms exceeds threshold %dms", responseTimeMs, thresholdMs))
.withSeverity(responseTimeMs > thresholdMs * 2 ? AlertSeverity.CRITICAL : AlertSeverity.WARNING)
.withSource("response-time-monitor")
.withTag("metric", "response_time")
.withTag("current_value", String.valueOf(responseTimeMs))
.withTag("threshold", String.valueOf(thresholdMs))
.build();
return sendAlert(alert);
}
// 创建并发送资源使用告警
public Uni<Void> sendResourceUsageAlert(String resource, double usage, double threshold) {
Alert alert = Alert.builder()
.withTitle(String.format("High %s Usage Detected", resource))
.withDescription(String.format("%s usage %.2f%% exceeds threshold %.2f%%", resource, usage * 100, threshold * 100))
.withSeverity(usage > threshold * 1.2 ? AlertSeverity.CRITICAL : AlertSeverity.WARNING)
.withSource("resource-monitor")
.withTag("metric", resource.toLowerCase() + "_usage")
.withTag("current_value", String.valueOf(usage))
.withTag("threshold", String.valueOf(threshold))
.build();
return sendAlert(alert);
}
// 创建并发送健康检查告警
public Uni<Void> sendHealthCheckAlert(String component, String status, String details) {
AlertSeverity severity = "DOWN".equals(status) ? AlertSeverity.CRITICAL : AlertSeverity.WARNING;
Alert alert = Alert.builder()
.withTitle(String.format("%s Health Check Failed", component))
.withDescription(String.format("%s is %s: %s", component, status, details))
.withSeverity(severity)
.withSource("health-check-monitor")
.withTag("component", component)
.withTag("status", status)
.build();
return sendAlert(alert);
}
// 创建并发送自定义告警
public Uni<Void> sendCustomAlert(String title, String description, AlertSeverity severity, Map<String, String> tags) {
Alert.Builder builder = Alert.builder()
.withTitle(title)
.withDescription(description)
.withSeverity(severity)
.withSource("custom");
if (tags != null) {
tags.forEach(builder::withTag);
}
return sendAlert(builder.build());
}
// 检查告警是否被抑制
private boolean isAlertSuppressed(Alert alert) {
String alertKey = generateAlertKey(alert);
// 检查时间抑制
Instant lastSent = alertSuppressionCache.get(alertKey);
if (lastSent != null && Duration.between(lastSent, Instant.now()).compareTo(suppressionDuration) < 0) {
return true;
}
// 检查频率抑制
String hourlyKey = alertKey + "_" + (Instant.now().getEpochSecond() / 3600);
AtomicInteger hourlyCount = alertCountCache.computeIfAbsent(hourlyKey, k -> new AtomicInteger(0));
return hourlyCount.get() >= maxAlertsPerHour;
}
// 记录告警发送
private void recordAlertSent(Alert alert) {
String alertKey = generateAlertKey(alert);
Instant now = Instant.now();
// 更新抑制缓存
alertSuppressionCache.put(alertKey, now);
// 更新计数缓存
String hourlyKey = alertKey + "_" + (now.getEpochSecond() / 3600);
alertCountCache.computeIfAbsent(hourlyKey, k -> new AtomicInteger(0)).incrementAndGet();
// 清理过期缓存
cleanupExpiredCache();
}
// 生成告警键
private String generateAlertKey(Alert alert) {
return alert.getSource() + "_" + alert.getTitle().replaceAll("\\s+", "_").toLowerCase();
}
// 清理过期缓存
private void cleanupExpiredCache() {
Instant cutoff = Instant.now().minus(suppressionDuration.multipliedBy(2));
alertSuppressionCache.entrySet().removeIf(entry -> entry.getValue().isBefore(cutoff));
long currentHour = Instant.now().getEpochSecond() / 3600;
alertCountCache.entrySet().removeIf(entry -> {
String[] parts = entry.getKey().split("_");
if (parts.length > 0) {
try {
long hour = Long.parseLong(parts[parts.length - 1]);
return currentHour - hour > 2; // 保留最近2小时的数据
} catch (NumberFormatException e) {
return true; // 无效格式,删除
}
}
return true;
});
}
// 获取告警统计
public AlertStatistics getAlertStatistics() {
AlertStatistics stats = new AlertStatistics();
long currentHour = Instant.now().getEpochSecond() / 3600;
// 计算当前小时的告警数量
int currentHourAlerts = alertCountCache.entrySet().stream()
.filter(entry -> {
String[] parts = entry.getKey().split("_");
if (parts.length > 0) {
try {
long hour = Long.parseLong(parts[parts.length - 1]);
return hour == currentHour;
} catch (NumberFormatException e) {
return false;
}
}
return false;
})
.mapToInt(entry -> entry.getValue().get())
.sum();
stats.currentHourAlerts = currentHourAlerts;
stats.suppressedAlerts = alertSuppressionCache.size();
stats.totalAlertTypes = alertCountCache.size();
return stats;
}
// 告警统计类
public static class AlertStatistics {
public int currentHourAlerts;
public int suppressedAlerts;
public int totalAlertTypes;
}
}
7.7 本章小结
7.7.1 核心概念回顾
本章深入探讨了 Quarkus 应用的监控、日志和健康检查体系:
- 应用监控:使用 Micrometer 和 Prometheus 实现全面的指标收集
- 健康检查:实现存活、就绪和启动检查,确保应用状态可观测
- 结构化日志:采用 JSON 格式和 MDC 上下文,提升日志可分析性
- 分布式追踪:使用 OpenTracing 和 Jaeger 实现请求链路追踪
- 告警通知:建立多渠道告警机制,及时响应系统异常
7.7.2 技术要点总结
- 指标体系:计数器、计时器、仪表和分布摘要的合理使用
- 健康检查策略:区分不同类型的健康检查,提供详细的状态信息
- 日志最佳实践:结构化日志、上下文传递和日志级别管理
- 追踪集成:与业务逻辑无缝集成的分布式追踪
- 告警策略:智能抑制和多级别告警机制
7.7.3 最佳实践
- 监控设计:建立分层监控体系,从基础设施到业务指标
- 告警策略:避免告警疲劳,设置合理的阈值和抑制规则
- 日志管理:平衡日志详细程度和性能影响
- 追踪采样:在性能和可观测性之间找到平衡点
- 健康检查:设计快速响应的健康检查,避免级联故障
7.7.4 下一章预告
下一章将学习《测试策略与实践》,包括: - 单元测试和集成测试 - 测试容器的使用 - 性能测试和压力测试 - 测试数据管理 - 持续集成中的测试策略 “`