学习目标
通过本章学习,你将掌握:
应用程序部署基础
- 部署模式和策略
- 配置管理
- 环境隔离
- 版本控制
容器化技术
- Docker容器化
- 多阶段构建
- 容器编排
- 镜像优化
持续集成/持续部署(CI/CD)
- GitHub Actions
- Azure DevOps
- 自动化测试
- 部署流水线
云平台部署
- Azure App Service
- AWS部署
- 负载均衡
- 自动扩缩容
监控和日志
- 应用程序监控
- 性能指标
- 日志聚合
- 告警系统
20.1 应用程序部署基础
部署配置管理
// 环境配置管理
public class DeploymentConfiguration
{
public string Environment { get; set; }
public DatabaseConfig Database { get; set; }
public ApiConfig ExternalApis { get; set; }
public LoggingConfig Logging { get; set; }
public SecurityConfig Security { get; set; }
public PerformanceConfig Performance { get; set; }
}
public class DatabaseConfig
{
public string ConnectionString { get; set; }
public int MaxPoolSize { get; set; }
public int CommandTimeout { get; set; }
public bool EnableRetryOnFailure { get; set; }
public string BackupConnectionString { get; set; }
}
public class ApiConfig
{
public string BaseUrl { get; set; }
public string ApiKey { get; set; }
public int TimeoutSeconds { get; set; }
public int MaxRetries { get; set; }
public Dictionary<string, string> Headers { get; set; }
}
public class LoggingConfig
{
public string Level { get; set; }
public string OutputTemplate { get; set; }
public List<LogSink> Sinks { get; set; }
public bool EnableStructuredLogging { get; set; }
}
public class LogSink
{
public string Type { get; set; } // Console, File, Database, ElasticSearch
public string Configuration { get; set; }
public string MinimumLevel { get; set; }
}
public class SecurityConfig
{
public string JwtSecret { get; set; }
public int JwtExpirationMinutes { get; set; }
public bool RequireHttps { get; set; }
public List<string> AllowedOrigins { get; set; }
public EncryptionConfig Encryption { get; set; }
}
public class EncryptionConfig
{
public string Algorithm { get; set; }
public string Key { get; set; }
public string IV { get; set; }
}
public class PerformanceConfig
{
public int MaxConcurrentRequests { get; set; }
public int CacheExpirationMinutes { get; set; }
public bool EnableCompression { get; set; }
public bool EnableResponseCaching { get; set; }
public RateLimitConfig RateLimit { get; set; }
}
public class RateLimitConfig
{
public int RequestsPerMinute { get; set; }
public int BurstSize { get; set; }
public string Strategy { get; set; } // FixedWindow, SlidingWindow, TokenBucket
}
// 配置管理服务
public class ConfigurationManager
{
private readonly IConfiguration _configuration;
private readonly ILogger<ConfigurationManager> _logger;
private readonly Dictionary<string, DeploymentConfiguration> _environmentConfigs;
public ConfigurationManager(IConfiguration configuration, ILogger<ConfigurationManager> logger)
{
_configuration = configuration;
_logger = logger;
_environmentConfigs = new Dictionary<string, DeploymentConfiguration>();
LoadConfigurations();
}
public DeploymentConfiguration GetConfiguration(string environment)
{
if (_environmentConfigs.TryGetValue(environment.ToLower(), out var config))
{
return config;
}
_logger.LogWarning("Configuration for environment {Environment} not found, using default", environment);
return GetDefaultConfiguration();
}
public void ValidateConfiguration(string environment)
{
var config = GetConfiguration(environment);
var validationResults = new List<string>();
// 验证数据库配置
if (string.IsNullOrEmpty(config.Database?.ConnectionString))
{
validationResults.Add("Database connection string is required");
}
// 验证安全配置
if (string.IsNullOrEmpty(config.Security?.JwtSecret))
{
validationResults.Add("JWT secret is required");
}
// 验证外部API配置
if (string.IsNullOrEmpty(config.ExternalApis?.BaseUrl))
{
validationResults.Add("External API base URL is required");
}
if (validationResults.Any())
{
var errors = string.Join(", ", validationResults);
throw new InvalidOperationException($"Configuration validation failed: {errors}");
}
_logger.LogInformation("Configuration validation passed for environment {Environment}", environment);
}
public async Task<bool> TestDatabaseConnectionAsync(string environment)
{
try
{
var config = GetConfiguration(environment);
using var connection = new SqlConnection(config.Database.ConnectionString);
await connection.OpenAsync();
_logger.LogInformation("Database connection test successful for environment {Environment}", environment);
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Database connection test failed for environment {Environment}", environment);
return false;
}
}
public async Task<Dictionary<string, bool>> RunHealthChecksAsync(string environment)
{
var config = GetConfiguration(environment);
var results = new Dictionary<string, bool>();
// 数据库健康检查
results["Database"] = await TestDatabaseConnectionAsync(environment);
// 外部API健康检查
results["ExternalAPI"] = await TestExternalApiAsync(config.ExternalApis);
// 缓存健康检查
results["Cache"] = await TestCacheAsync();
// 文件系统健康检查
results["FileSystem"] = TestFileSystemAccess();
return results;
}
private void LoadConfigurations()
{
// 开发环境配置
_environmentConfigs["development"] = new DeploymentConfiguration
{
Environment = "Development",
Database = new DatabaseConfig
{
ConnectionString = _configuration.GetConnectionString("DefaultConnection"),
MaxPoolSize = 10,
CommandTimeout = 30,
EnableRetryOnFailure = true
},
ExternalApis = new ApiConfig
{
BaseUrl = "https://api-dev.example.com",
TimeoutSeconds = 30,
MaxRetries = 3
},
Logging = new LoggingConfig
{
Level = "Debug",
EnableStructuredLogging = true,
Sinks = new List<LogSink>
{
new LogSink { Type = "Console", MinimumLevel = "Debug" },
new LogSink { Type = "File", Configuration = "logs/app-{Date}.log", MinimumLevel = "Information" }
}
},
Security = new SecurityConfig
{
JwtExpirationMinutes = 60,
RequireHttps = false,
AllowedOrigins = new List<string> { "http://localhost:3000", "http://localhost:5000" }
},
Performance = new PerformanceConfig
{
MaxConcurrentRequests = 100,
CacheExpirationMinutes = 5,
EnableCompression = true,
EnableResponseCaching = false,
RateLimit = new RateLimitConfig
{
RequestsPerMinute = 1000,
BurstSize = 100,
Strategy = "SlidingWindow"
}
}
};
// 生产环境配置
_environmentConfigs["production"] = new DeploymentConfiguration
{
Environment = "Production",
Database = new DatabaseConfig
{
ConnectionString = _configuration.GetConnectionString("ProductionConnection"),
MaxPoolSize = 50,
CommandTimeout = 60,
EnableRetryOnFailure = true,
BackupConnectionString = _configuration.GetConnectionString("BackupConnection")
},
ExternalApis = new ApiConfig
{
BaseUrl = "https://api.example.com",
TimeoutSeconds = 60,
MaxRetries = 5
},
Logging = new LoggingConfig
{
Level = "Information",
EnableStructuredLogging = true,
Sinks = new List<LogSink>
{
new LogSink { Type = "Console", MinimumLevel = "Warning" },
new LogSink { Type = "File", Configuration = "logs/app-{Date}.log", MinimumLevel = "Information" },
new LogSink { Type = "ElasticSearch", Configuration = "http://elasticsearch:9200", MinimumLevel = "Information" }
}
},
Security = new SecurityConfig
{
JwtExpirationMinutes = 15,
RequireHttps = true,
AllowedOrigins = new List<string> { "https://app.example.com" }
},
Performance = new PerformanceConfig
{
MaxConcurrentRequests = 1000,
CacheExpirationMinutes = 60,
EnableCompression = true,
EnableResponseCaching = true,
RateLimit = new RateLimitConfig
{
RequestsPerMinute = 10000,
BurstSize = 1000,
Strategy = "TokenBucket"
}
}
};
}
private DeploymentConfiguration GetDefaultConfiguration()
{
return _environmentConfigs["development"];
}
private async Task<bool> TestExternalApiAsync(ApiConfig apiConfig)
{
try
{
using var client = new HttpClient();
client.Timeout = TimeSpan.FromSeconds(apiConfig.TimeoutSeconds);
var response = await client.GetAsync($"{apiConfig.BaseUrl}/health");
return response.IsSuccessStatusCode;
}
catch
{
return false;
}
}
private async Task<bool> TestCacheAsync()
{
try
{
// 这里应该测试实际的缓存服务(Redis等)
await Task.Delay(10); // 模拟缓存测试
return true;
}
catch
{
return false;
}
}
private bool TestFileSystemAccess()
{
try
{
var tempFile = Path.GetTempFileName();
File.WriteAllText(tempFile, "test");
File.Delete(tempFile);
return true;
}
catch
{
return false;
}
}
}
// 监控相关数据模型
public class MonitoringReport
{
public TimeSpan TimeRange { get; set; }
public DateTime GeneratedAt { get; set; }
public PerformanceMetrics PerformanceMetrics { get; set; }
public BusinessMetrics BusinessMetrics { get; set; }
public ErrorStatistics ErrorStatistics { get; set; }
public List<HealthCheckResult> HealthCheckResults { get; set; }
public ResourceUsage ResourceUsage { get; set; }
public TrendAnalysis TrendAnalysis { get; set; }
}
public class BusinessMetrics
{
public int TotalUsers { get; set; }
public int ActiveUsers { get; set; }
public int NewRegistrations { get; set; }
public int TotalOrders { get; set; }
public decimal Revenue { get; set; }
public double ConversionRate { get; set; }
}
public class ResourceUsage
{
public double AverageCpuUsage { get; set; }
public double PeakCpuUsage { get; set; }
public double AverageMemoryUsage { get; set; }
public double PeakMemoryUsage { get; set; }
public double DiskUsage { get; set; }
public long NetworkTraffic { get; set; }
}
public class TrendAnalysis
{
public TrendDirection ResponseTimeTrend { get; set; }
public TrendDirection ErrorRateTrend { get; set; }
public TrendDirection ThroughputTrend { get; set; }
public TrendDirection UserGrowthTrend { get; set; }
}
public enum TrendDirection
{
Increasing,
Decreasing,
Stable
}
public class Alert
{
public string Id { get; set; }
public AlertType Type { get; set; }
public AlertSeverity Severity { get; set; }
public string Title { get; set; }
public string Description { get; set; }
public DateTime Timestamp { get; set; }
public string Source { get; set; }
public double Value { get; set; }
public double Threshold { get; set; }
}
public enum AlertType
{
Performance,
Error,
Security,
Business
}
public enum AlertSeverity
{
Info,
Warning,
Critical
}
public class DashboardData
{
public DateTime Timestamp { get; set; }
public RealTimeMetrics RealTimeMetrics { get; set; }
public string SystemStatus { get; set; }
public List<Alert> RecentAlerts { get; set; }
public List<ServiceStatus> ServiceStatuses { get; set; }
}
public class RealTimeMetrics
{
public double CpuUsage { get; set; }
public double MemoryUsage { get; set; }
public int ActiveConnections { get; set; }
public double RequestsPerSecond { get; set; }
public double AverageResponseTime { get; set; }
public double ErrorRate { get; set; }
}
public class ServiceStatus
{
public string Name { get; set; }
public string Status { get; set; }
public string Health { get; set; }
public DateTime LastChecked { get; set; }
}
// 日志相关数据模型
public class LogEntry
{
public DateTime Timestamp { get; set; }
public string Level { get; set; }
public string Message { get; set; }
public string ServiceName { get; set; }
public string Exception { get; set; }
public Dictionary<string, object> Properties { get; set; }
public string TraceId { get; set; }
public string SpanId { get; set; }
}
public class LogSearchRequest
{
public DateTime? StartTime { get; set; }
public DateTime? EndTime { get; set; }
public string LogLevel { get; set; }
public string ServiceName { get; set; }
public string Keyword { get; set; }
public int PageNumber { get; set; } = 0;
public int PageSize { get; set; } = 100;
}
public class LogSearchResult
{
public long TotalCount { get; set; }
public List<LogEntry> Logs { get; set; }
public int PageNumber { get; set; }
public int PageSize { get; set; }
public Dictionary<string, object> Aggregations { get; set; }
}
public class LogAnalysisRequest
{
public TimeSpan TimeRange { get; set; }
public List<string> ServiceNames { get; set; }
public List<string> LogLevels { get; set; }
}
public class LogAnalysisResult
{
public TimeSpan TimeRange { get; set; }
public DateTime AnalyzedAt { get; set; }
public ErrorAnalysis ErrorAnalysis { get; set; }
public PerformanceAnalysis PerformanceAnalysis { get; set; }
public UserBehaviorAnalysis UserBehaviorAnalysis { get; set; }
public SecurityAnalysis SecurityAnalysis { get; set; }
public LogTrendAnalysis TrendAnalysis { get; set; }
}
public class ErrorAnalysis
{
public int TotalErrors { get; set; }
public double ErrorRate { get; set; }
public List<ErrorSummary> TopErrors { get; set; }
}
public class ErrorSummary
{
public string ErrorType { get; set; }
public int Count { get; set; }
public double Percentage { get; set; }
}
public class PerformanceAnalysis
{
public double AverageResponseTime { get; set; }
public double P95ResponseTime { get; set; }
public double P99ResponseTime { get; set; }
public List<EndpointPerformance> SlowestEndpoints { get; set; }
}
public class EndpointPerformance
{
public string Endpoint { get; set; }
public double AverageResponseTime { get; set; }
public int RequestCount { get; set; }
}
public class UserBehaviorAnalysis
{
public int TotalUsers { get; set; }
public int ActiveUsers { get; set; }
public List<PageVisit> TopPages { get; set; }
}
public class PageVisit
{
public string Page { get; set; }
public int Visits { get; set; }
public int UniqueVisitors { get; set; }
}
public class SecurityAnalysis
{
public int FailedLoginAttempts { get; set; }
public int SuspiciousActivities { get; set; }
public int BlockedRequests { get; set; }
public List<AttackSource> TopAttackSources { get; set; }
}
public class AttackSource
{
public string IpAddress { get; set; }
public int AttackCount { get; set; }
public string AttackType { get; set; }
}
public class LogTrendAnalysis
{
public TrendDirection LogVolumeTrend { get; set; }
public TrendDirection ErrorRateTrend { get; set; }
public TrendDirection ResponseTimeTrend { get; set; }
}
public class AnomalyDetectionRequest
{
public TimeSpan TimeRange { get; set; }
public List<string> MetricNames { get; set; }
public double SensitivityLevel { get; set; } = 0.8;
}
public class LogAlert
{
public string Id { get; set; }
public string Type { get; set; }
public string Severity { get; set; }
public string Message { get; set; }
public DateTime DetectedAt { get; set; }
}
public class LogDashboard
{
public TimeSpan TimeRange { get; set; }
public DateTime GeneratedAt { get; set; }
public LogStatistics LogStatistics { get; set; }
public ErrorStatistics ErrorStatistics { get; set; }
public PerformanceMetrics PerformanceMetrics { get; set; }
public List<EndpointStatistics> TopEndpoints { get; set; }
public UserActivitySummary UserActivity { get; set; }
public List<LogAlert> RecentAlerts { get; set; }
}
public class LogStatistics
{
public long TotalLogs { get; set; }
public Dictionary<string, int> LogsByLevel { get; set; }
public Dictionary<string, int> LogsByService { get; set; }
}
public class EndpointStatistics
{
public string Endpoint { get; set; }
public int RequestCount { get; set; }
public double AverageResponseTime { get; set; }
public int ErrorCount { get; set; }
}
public class UserActivitySummary
{
public int TotalUsers { get; set; }
public int ActiveUsers { get; set; }
public int NewUsers { get; set; }
public Dictionary<string, int> TopUserActions { get; set; }
}
// 相关数据模型
public class PipelineRequest
{
public string Repository { get; set; }
public string Branch { get; set; }
public string WorkingDirectory { get; set; }
public string BuildConfiguration { get; set; }
public string ImageName { get; set; }
public string Version { get; set; }
public string Registry { get; set; }
public bool DeployToStaging { get; set; }
public bool DeployToProduction { get; set; }
}
public class PipelineResult
{
public string PipelineId { get; set; }
public DateTime StartTime { get; set; }
public DateTime? EndTime { get; set; }
public PipelineStatus Status { get; set; }
public string ErrorMessage { get; set; }
public PipelineRequest Request { get; set; }
public List<PipelineStep> Steps { get; set; } = new List<PipelineStep>();
public TimeSpan Duration => EndTime?.Subtract(StartTime) ?? TimeSpan.Zero;
}
public class PipelineStep
{
public string Name { get; set; }
public DateTime StartTime { get; set; }
public DateTime? EndTime { get; set; }
public StepStatus Status { get; set; }
public string Output { get; set; }
public string ErrorMessage { get; set; }
public TimeSpan Duration => EndTime?.Subtract(StartTime) ?? TimeSpan.Zero;
}
public enum PipelineStatus
{
Running,
Success,
Failed,
Cancelled
}
public enum StepStatus
{
Pending,
Running,
Success,
Failed,
Skipped
}
// 微服务监控相关数据模型
public class MicroserviceHealthReport
{
public DateTime GeneratedAt { get; set; }
public HealthStatus OverallHealth { get; set; }
public List<ServiceHealth> ServiceHealths { get; set; } = new List<ServiceHealth>();
public int TotalServices => ServiceHealths.Count;
public int HealthyServices => ServiceHealths.Count(s => s.Status == HealthStatus.Healthy);
public int WarningServices => ServiceHealths.Count(s => s.Status == HealthStatus.Warning);
public int CriticalServices => ServiceHealths.Count(s => s.Status == HealthStatus.Critical);
}
public class ServiceHealth
{
public string ServiceName { get; set; }
public HealthStatus Status { get; set; }
public bool IsHealthy { get; set; }
public TimeSpan ResponseTime { get; set; }
public int InstanceCount { get; set; }
public int HealthyInstanceCount { get; set; }
public double CpuUsage { get; set; }
public double MemoryUsage { get; set; }
public double ErrorRate { get; set; }
public double RequestRate { get; set; }
public DateTime CheckedAt { get; set; }
public string ErrorMessage { get; set; }
}
public enum HealthStatus
{
Healthy,
Warning,
Critical
}
public class ServiceMetricsReport
{
public string ServiceName { get; set; }
public TimeSpan TimeRange { get; set; }
public DateTime CollectedAt { get; set; }
public PerformanceMetrics PerformanceMetrics { get; set; }
public BusinessMetrics BusinessMetrics { get; set; }
public ErrorMetrics ErrorMetrics { get; set; }
public ResourceMetrics ResourceMetrics { get; set; }
public DependencyAnalysis DependencyAnalysis { get; set; }
}
public class ErrorMetrics
{
public int TotalErrors { get; set; }
public double ErrorRate { get; set; }
public Dictionary<string, int> ErrorsByType { get; set; }
public List<ErrorTrend> ErrorTrends { get; set; }
}
public class ErrorTrend
{
public DateTime Timestamp { get; set; }
public int ErrorCount { get; set; }
public double ErrorRate { get; set; }
}
public class ResourceMetrics
{
public double AverageCpuUsage { get; set; }
public double PeakCpuUsage { get; set; }
public double AverageMemoryUsage { get; set; }
public double PeakMemoryUsage { get; set; }
public long NetworkBytesIn { get; set; }
public long NetworkBytesOut { get; set; }
public long DiskReadBytes { get; set; }
public long DiskWriteBytes { get; set; }
}
public class DependencyAnalysis
{
public string ServiceName { get; set; }
public List<DependencyMetrics> Dependencies { get; set; } = new List<DependencyMetrics>();
public int TotalDependencies => Dependencies.Count;
public double AverageResponseTime => Dependencies.Any() ? Dependencies.Average(d => d.AverageResponseTime) : 0;
public double OverallErrorRate => Dependencies.Any() ? Dependencies.Average(d => d.ErrorRate) : 0;
}
public class DependencyMetrics
{
public string ServiceName { get; set; }
public int CallCount { get; set; }
public int ErrorCount { get; set; }
public TimeSpan TotalDuration { get; set; }
public double AverageResponseTime => CallCount > 0 ? TotalDuration.TotalMilliseconds / CallCount : 0;
public double ErrorRate => CallCount > 0 ? (double)ErrorCount / CallCount * 100 : 0;
}
public class DistributedTraceAnalysis
{
public string TraceId { get; set; }
public DateTime AnalyzedAt { get; set; }
public TimeSpan TotalDuration { get; set; }
public int SpanCount { get; set; }
public List<ServiceCall> ServiceCallChain { get; set; } = new List<ServiceCall>();
public List<PerformanceBottleneck> PerformanceBottlenecks { get; set; } = new List<PerformanceBottleneck>();
public List<TraceError> Errors { get; set; } = new List<TraceError>();
public Dictionary<string, TimeSpan> ServiceLatencies { get; set; } = new Dictionary<string, TimeSpan>();
}
public class ServiceCall
{
public string FromService { get; set; }
public string ToService { get; set; }
public string OperationName { get; set; }
public TimeSpan Duration { get; set; }
public SpanStatus Status { get; set; }
}
public class PerformanceBottleneck
{
public string ServiceName { get; set; }
public string OperationName { get; set; }
public TimeSpan Duration { get; set; }
public string Severity { get; set; }
}
public class TraceError
{
public string ServiceName { get; set; }
public string OperationName { get; set; }
public string ErrorMessage { get; set; }
public DateTime Timestamp { get; set; }
}
public enum SpanStatus
{
Ok,
Error,
Timeout
}
public class LogCorrelationResult
{
public string ServiceName { get; set; }
public TimeSpan TimeRange { get; set; }
public DateTime AnalyzedAt { get; set; }
public List<ErrorCorrelation> ErrorCorrelations { get; set; } = new List<ErrorCorrelation>();
public List<PerformanceCorrelation> PerformanceCorrelations { get; set; } = new List<PerformanceCorrelation>();
public List<AnomalyPattern> AnomalyPatterns { get; set; } = new List<AnomalyPattern>();
public List<string> Insights { get; set; } = new List<string>();
}
public class ErrorCorrelation
{
public DateTime Timestamp { get; set; }
public string ErrorType { get; set; }
public int LogCount { get; set; }
public double MetricValue { get; set; }
public double CorrelationStrength { get; set; }
}
public class PerformanceCorrelation
{
public DateTime Timestamp { get; set; }
public string MetricName { get; set; }
public double LogValue { get; set; }
public double MetricValue { get; set; }
public double CorrelationStrength { get; set; }
}
public class AnomalyPattern
{
public DateTime DetectedAt { get; set; }
public string PatternType { get; set; }
public string Description { get; set; }
public double Severity { get; set; }
public Dictionary<string, object> Attributes { get; set; }
}
// 服务发现和追踪相关接口和模型
public class ServiceInfo
{
public string Name { get; set; }
public List<ServiceInstance> Instances { get; set; } = new List<ServiceInstance>();
public Dictionary<string, string> Metadata { get; set; } = new Dictionary<string, string>();
}
public class ServiceInstance
{
public string Id { get; set; }
public string Host { get; set; }
public int Port { get; set; }
public bool IsHealthy { get; set; }
public DateTime LastHealthCheck { get; set; }
}
public class Trace
{
public string TraceId { get; set; }
public TimeSpan Duration { get; set; }
public List<Span> Spans { get; set; } = new List<Span>();
}
public class Span
{
public string SpanId { get; set; }
public string ParentSpanId { get; set; }
public string ServiceName { get; set; }
public string RemoteServiceName { get; set; }
public string OperationName { get; set; }
public DateTime StartTime { get; set; }
public TimeSpan Duration { get; set; }
public SpanStatus Status { get; set; }
public string ErrorMessage { get; set; }
public Dictionary<string, object> Tags { get; set; } = new Dictionary<string, object>();
}
public class ServiceMetrics
{
public string ServiceName { get; set; }
public DateTime Timestamp { get; set; }
public double CpuUsage { get; set; }
public double MemoryUsage { get; set; }
public double ErrorRate { get; set; }
public double RequestRate { get; set; }
public double AverageResponseTime { get; set; }
public Dictionary<string, double> CustomMetrics { get; set; } = new Dictionary<string, double>();
}
练习2:微服务监控系统
这个练习演示了如何构建一个完整的微服务监控系统,包括:
- 服务健康监控:实时检查所有微服务的健康状态
- 指标收集:收集性能、业务、错误和资源使用指标
- 分布式追踪分析:分析服务调用链和性能瓶颈
- 日志关联分析:将日志数据与指标数据关联分析
- 异常检测:识别异常模式和潜在问题
- 告警生成:基于监控数据生成智能告警
20.7 章节总结
在本章中,我们深入学习了部署和DevOps的核心概念和实践技术:
核心概念
应用程序部署基础
- 配置管理和环境隔离
- 部署脚本自动化
- 健康检查和回滚机制
容器化技术
- Docker容器化最佳实践
- 多服务编排和管理
- 容器监控和日志收集
持续集成/持续部署(CI/CD)
- GitHub Actions和Azure DevOps流水线
- 自动化测试和质量检查
- 多环境部署策略
云平台部署
- Azure App Service和Container Instances
- 自动扩缩容和负载均衡
- 云资源监控和成本优化
监控和日志
- 应用程序性能监控(APM)
- 分布式追踪和日志聚合
- 告警和仪表盘可视化
高级技术
自动化部署流水线
- 端到端部署自动化
- 蓝绿部署和金丝雀发布
- 自动回滚和故障恢复
微服务监控
- 服务发现和健康检查
- 分布式追踪分析
- 服务依赖关系映射
日志和指标关联
- 多维度数据关联分析
- 异常模式识别
- 智能告警和根因分析
实际应用
企业级部署
- 多环境管理(开发、测试、生产)
- 配置管理和密钥管理
- 合规性和安全性要求
云原生架构
- 容器编排和服务网格
- 微服务治理和监控
- 弹性伸缩和故障恢复
DevOps文化
- 开发和运维协作
- 持续改进和反馈循环
- 自动化和标准化流程
重要技能
部署策略设计
- 选择合适的部署模式
- 风险评估和回滚计划
- 性能和可用性优化
监控体系建设
- 指标体系设计
- 告警策略制定
- 可观测性实践
自动化工具使用
- CI/CD工具配置
- 基础设施即代码(IaC)
- 容器编排平台
通过本章的学习,你已经掌握了现代应用程序部署和DevOps的核心技术。这些技能对于构建可靠、可扩展的生产系统至关重要。
下一章我们将学习性能优化和调试,探讨如何识别和解决应用程序的性能问题。
## 20.6 实践练习
### 练习1:自动化部署流水线
```csharp
// 自动化部署流水线
public class DeploymentPipeline
{
private readonly ILogger<DeploymentPipeline> _logger;
private readonly IGitService _gitService;
private readonly IBuildService _buildService;
private readonly ITestService _testService;
private readonly IContainerService _containerService;
private readonly IDeploymentService _deploymentService;
private readonly INotificationService _notificationService;
public DeploymentPipeline(
ILogger<DeploymentPipeline> logger,
IGitService gitService,
IBuildService buildService,
ITestService testService,
IContainerService containerService,
IDeploymentService deploymentService,
INotificationService notificationService)
{
_logger = logger;
_gitService = gitService;
_buildService = buildService;
_testService = testService;
_containerService = containerService;
_deploymentService = deploymentService;
_notificationService = notificationService;
}
public async Task<PipelineResult> ExecutePipelineAsync(PipelineRequest request)
{
var result = new PipelineResult
{
PipelineId = Guid.NewGuid().ToString(),
StartTime = DateTime.UtcNow,
Request = request
};
try
{
_logger.LogInformation("Starting deployment pipeline {PipelineId}", result.PipelineId);
// 1. 代码检出
result.Steps.Add(await ExecuteStepAsync("Checkout", async () =>
{
await _gitService.CheckoutAsync(request.Repository, request.Branch, request.WorkingDirectory);
return "Code checked out successfully";
}));
// 2. 代码构建
result.Steps.Add(await ExecuteStepAsync("Build", async () =>
{
var buildResult = await _buildService.BuildAsync(request.WorkingDirectory, request.BuildConfiguration);
if (!buildResult.Success)
{
throw new Exception($"Build failed: {buildResult.ErrorMessage}");
}
return $"Build completed in {buildResult.Duration}";
}));
// 3. 运行测试
result.Steps.Add(await ExecuteStepAsync("Test", async () =>
{
var testResult = await _testService.RunTestsAsync(request.WorkingDirectory);
if (!testResult.Success)
{
throw new Exception($"Tests failed: {testResult.FailedTests} failed out of {testResult.TotalTests}");
}
return $"All {testResult.TotalTests} tests passed";
}));
// 4. 代码质量检查
result.Steps.Add(await ExecuteStepAsync("Quality Check", async () =>
{
var qualityResult = await _buildService.RunQualityChecksAsync(request.WorkingDirectory);
if (!qualityResult.Success)
{
throw new Exception($"Quality checks failed: {qualityResult.Issues} issues found");
}
return "Code quality checks passed";
}));
// 5. 容器化
result.Steps.Add(await ExecuteStepAsync("Containerize", async () =>
{
var imageTag = $"{request.ImageName}:{request.Version}";
await _containerService.BuildImageAsync(request.WorkingDirectory, imageTag);
return $"Container image built: {imageTag}";
}));
// 6. 推送镜像
result.Steps.Add(await ExecuteStepAsync("Push Image", async () =>
{
var imageTag = $"{request.ImageName}:{request.Version}";
await _containerService.PushImageAsync(imageTag, request.Registry);
return $"Image pushed to registry: {imageTag}";
}));
// 7. 部署到测试环境
if (request.DeployToStaging)
{
result.Steps.Add(await ExecuteStepAsync("Deploy to Staging", async () =>
{
await _deploymentService.DeployAsync("staging", request.ImageName, request.Version);
return "Deployed to staging environment";
}));
// 8. 集成测试
result.Steps.Add(await ExecuteStepAsync("Integration Tests", async () =>
{
var integrationTestResult = await _testService.RunIntegrationTestsAsync("staging");
if (!integrationTestResult.Success)
{
throw new Exception($"Integration tests failed: {integrationTestResult.ErrorMessage}");
}
return "Integration tests passed";
}));
}
// 9. 部署到生产环境
if (request.DeployToProduction)
{
result.Steps.Add(await ExecuteStepAsync("Deploy to Production", async () =>
{
// 蓝绿部署
await _deploymentService.BlueGreenDeployAsync("production", request.ImageName, request.Version);
return "Blue-green deployment to production completed";
}));
// 10. 生产环境验证
result.Steps.Add(await ExecuteStepAsync("Production Verification", async () =>
{
await Task.Delay(30000); // 等待30秒让服务稳定
var healthCheck = await _deploymentService.HealthCheckAsync("production");
if (!healthCheck.IsHealthy)
{
// 自动回滚
await _deploymentService.RollbackAsync("production");
throw new Exception("Production health check failed, rolled back");
}
return "Production deployment verified";
}));
}
result.Status = PipelineStatus.Success;
result.EndTime = DateTime.UtcNow;
// 发送成功通知
await _notificationService.SendSuccessNotificationAsync(result);
_logger.LogInformation("Pipeline {PipelineId} completed successfully", result.PipelineId);
return result;
}
catch (Exception ex)
{
result.Status = PipelineStatus.Failed;
result.EndTime = DateTime.UtcNow;
result.ErrorMessage = ex.Message;
_logger.LogError(ex, "Pipeline {PipelineId} failed", result.PipelineId);
// 发送失败通知
await _notificationService.SendFailureNotificationAsync(result, ex);
return result;
}
}
private async Task<PipelineStep> ExecuteStepAsync(string stepName, Func<Task<string>> stepAction)
{
var step = new PipelineStep
{
Name = stepName,
StartTime = DateTime.UtcNow
};
try
{
_logger.LogInformation("Executing step: {StepName}", stepName);
step.Output = await stepAction();
step.Status = StepStatus.Success;
step.EndTime = DateTime.UtcNow;
_logger.LogInformation("Step {StepName} completed successfully", stepName);
}
catch (Exception ex)
{
step.Status = StepStatus.Failed;
step.EndTime = DateTime.UtcNow;
step.ErrorMessage = ex.Message;
_logger.LogError(ex, "Step {StepName} failed", stepName);
throw;
}
return step;
}
}
// 微服务监控系统
public class MicroserviceMonitor
{
private readonly ILogger<MicroserviceMonitor> _logger;
private readonly IServiceDiscovery _serviceDiscovery;
private readonly IMetricsCollector _metricsCollector;
private readonly ILogAggregator _logAggregator;
private readonly ITracingService _tracingService;
private readonly IAlertService _alertService;
public MicroserviceMonitor(
ILogger<MicroserviceMonitor> logger,
IServiceDiscovery serviceDiscovery,
IMetricsCollector metricsCollector,
ILogAggregator logAggregator,
ITracingService tracingService,
IAlertService alertService)
{
_logger = logger;
_serviceDiscovery = serviceDiscovery;
_metricsCollector = metricsCollector;
_logAggregator = logAggregator;
_tracingService = tracingService;
_alertService = alertService;
}
public async Task<MicroserviceHealthReport> GenerateHealthReportAsync()
{
try
{
var report = new MicroserviceHealthReport
{
GeneratedAt = DateTime.UtcNow
};
// 发现所有服务
var services = await _serviceDiscovery.GetAllServicesAsync();
foreach (var service in services)
{
var serviceHealth = await CheckServiceHealthAsync(service);
report.ServiceHealths.Add(serviceHealth);
}
// 计算整体健康状态
report.OverallHealth = CalculateOverallHealth(report.ServiceHealths);
// 生成告警
await GenerateHealthAlertsAsync(report);
return report;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to generate microservice health report");
throw;
}
}
public async Task<ServiceMetricsReport> CollectServiceMetricsAsync(string serviceName, TimeSpan timeRange)
{
try
{
var endTime = DateTime.UtcNow;
var startTime = endTime.Subtract(timeRange);
var report = new ServiceMetricsReport
{
ServiceName = serviceName,
TimeRange = timeRange,
CollectedAt = endTime
};
// 收集性能指标
report.PerformanceMetrics = await _metricsCollector.GetServicePerformanceMetricsAsync(
serviceName, startTime, endTime);
// 收集业务指标
report.BusinessMetrics = await _metricsCollector.GetServiceBusinessMetricsAsync(
serviceName, startTime, endTime);
// 收集错误指标
report.ErrorMetrics = await _metricsCollector.GetServiceErrorMetricsAsync(
serviceName, startTime, endTime);
// 收集资源使用指标
report.ResourceMetrics = await _metricsCollector.GetServiceResourceMetricsAsync(
serviceName, startTime, endTime);
// 分析服务依赖
report.DependencyAnalysis = await AnalyzeServiceDependenciesAsync(serviceName, startTime, endTime);
return report;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to collect service metrics for {ServiceName}", serviceName);
throw;
}
}
public async Task<DistributedTraceAnalysis> AnalyzeDistributedTracesAsync(string traceId)
{
try
{
var analysis = new DistributedTraceAnalysis
{
TraceId = traceId,
AnalyzedAt = DateTime.UtcNow
};
// 获取追踪数据
var trace = await _tracingService.GetTraceAsync(traceId);
analysis.TotalDuration = trace.Duration;
analysis.SpanCount = trace.Spans.Count;
// 分析服务调用链
analysis.ServiceCallChain = BuildServiceCallChain(trace.Spans);
// 识别性能瓶颈
analysis.PerformanceBottlenecks = IdentifyPerformanceBottlenecks(trace.Spans);
// 检测错误
analysis.Errors = trace.Spans
.Where(s => s.Status == SpanStatus.Error)
.Select(s => new TraceError
{
ServiceName = s.ServiceName,
OperationName = s.OperationName,
ErrorMessage = s.ErrorMessage,
Timestamp = s.StartTime
})
.ToList();
// 计算服务间延迟
analysis.ServiceLatencies = CalculateServiceLatencies(trace.Spans);
return analysis;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to analyze distributed trace {TraceId}", traceId);
throw;
}
}
public async Task<LogCorrelationResult> CorrelateLogsWithMetricsAsync(
string serviceName, DateTime startTime, DateTime endTime)
{
try
{
var result = new LogCorrelationResult
{
ServiceName = serviceName,
TimeRange = endTime - startTime,
AnalyzedAt = DateTime.UtcNow
};
// 获取日志数据
var logs = await _logAggregator.GetLogsAsync(serviceName, startTime, endTime);
// 获取指标数据
var metrics = await _metricsCollector.GetServiceMetricsAsync(serviceName, startTime, endTime);
// 关联错误日志与错误率指标
result.ErrorCorrelations = CorrelateErrorLogsWithMetrics(logs, metrics);
// 关联性能日志与响应时间指标
result.PerformanceCorrelations = CorrelatePerformanceLogsWithMetrics(logs, metrics);
// 识别异常模式
result.AnomalyPatterns = IdentifyAnomalyPatterns(logs, metrics);
// 生成洞察
result.Insights = GenerateInsights(result);
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to correlate logs with metrics for {ServiceName}", serviceName);
throw;
}
}
private async Task<ServiceHealth> CheckServiceHealthAsync(ServiceInfo service)
{
var health = new ServiceHealth
{
ServiceName = service.Name,
CheckedAt = DateTime.UtcNow
};
try
{
// 健康检查
var healthCheck = await _serviceDiscovery.HealthCheckAsync(service);
health.IsHealthy = healthCheck.IsHealthy;
health.ResponseTime = healthCheck.ResponseTime;
// 获取实例状态
health.InstanceCount = service.Instances.Count;
health.HealthyInstanceCount = service.Instances.Count(i => i.IsHealthy);
// 获取最近的指标
var recentMetrics = await _metricsCollector.GetRecentMetricsAsync(service.Name);
health.CpuUsage = recentMetrics.CpuUsage;
health.MemoryUsage = recentMetrics.MemoryUsage;
health.ErrorRate = recentMetrics.ErrorRate;
health.RequestRate = recentMetrics.RequestRate;
// 确定健康状态
if (!health.IsHealthy || health.HealthyInstanceCount == 0)
{
health.Status = HealthStatus.Critical;
}
else if (health.ErrorRate > 5 || health.CpuUsage > 80 || health.MemoryUsage > 85)
{
health.Status = HealthStatus.Warning;
}
else
{
health.Status = HealthStatus.Healthy;
}
}
catch (Exception ex)
{
health.Status = HealthStatus.Critical;
health.ErrorMessage = ex.Message;
_logger.LogError(ex, "Health check failed for service {ServiceName}", service.Name);
}
return health;
}
private HealthStatus CalculateOverallHealth(List<ServiceHealth> serviceHealths)
{
if (serviceHealths.Any(s => s.Status == HealthStatus.Critical))
{
return HealthStatus.Critical;
}
if (serviceHealths.Any(s => s.Status == HealthStatus.Warning))
{
return HealthStatus.Warning;
}
return HealthStatus.Healthy;
}
private async Task GenerateHealthAlertsAsync(MicroserviceHealthReport report)
{
foreach (var serviceHealth in report.ServiceHealths)
{
if (serviceHealth.Status == HealthStatus.Critical)
{
await _alertService.SendAlertAsync(new Alert
{
Type = AlertType.Error,
Severity = AlertSeverity.Critical,
Title = $"Service {serviceHealth.ServiceName} is Critical",
Description = serviceHealth.ErrorMessage ?? "Service health check failed"
});
}
else if (serviceHealth.Status == HealthStatus.Warning)
{
await _alertService.SendAlertAsync(new Alert
{
Type = AlertType.Performance,
Severity = AlertSeverity.Warning,
Title = $"Service {serviceHealth.ServiceName} has Performance Issues",
Description = $"CPU: {serviceHealth.CpuUsage}%, Memory: {serviceHealth.MemoryUsage}%, Error Rate: {serviceHealth.ErrorRate}%"
});
}
}
}
private async Task<DependencyAnalysis> AnalyzeServiceDependenciesAsync(
string serviceName, DateTime startTime, DateTime endTime)
{
// 分析服务依赖关系
var traces = await _tracingService.GetServiceTracesAsync(serviceName, startTime, endTime);
var dependencies = new Dictionary<string, DependencyMetrics>();
foreach (var trace in traces)
{
foreach (var span in trace.Spans)
{
if (span.ServiceName == serviceName && span.RemoteServiceName != null)
{
if (!dependencies.ContainsKey(span.RemoteServiceName))
{
dependencies[span.RemoteServiceName] = new DependencyMetrics
{
ServiceName = span.RemoteServiceName
};
}
var dep = dependencies[span.RemoteServiceName];
dep.CallCount++;
dep.TotalDuration += span.Duration;
if (span.Status == SpanStatus.Error)
{
dep.ErrorCount++;
}
}
}
}
return new DependencyAnalysis
{
ServiceName = serviceName,
Dependencies = dependencies.Values.ToList()
};
}
private List<ServiceCall> BuildServiceCallChain(List<Span> spans)
{
return spans
.Where(s => s.ParentSpanId != null)
.Select(s => new ServiceCall
{
FromService = spans.FirstOrDefault(p => p.SpanId == s.ParentSpanId)?.ServiceName,
ToService = s.ServiceName,
OperationName = s.OperationName,
Duration = s.Duration,
Status = s.Status
})
.ToList();
}
private List<PerformanceBottleneck> IdentifyPerformanceBottlenecks(List<Span> spans)
{
var avgDuration = spans.Average(s => s.Duration.TotalMilliseconds);
var threshold = avgDuration * 2; // 超过平均值2倍的认为是瓶颈
return spans
.Where(s => s.Duration.TotalMilliseconds > threshold)
.Select(s => new PerformanceBottleneck
{
ServiceName = s.ServiceName,
OperationName = s.OperationName,
Duration = s.Duration,
Severity = s.Duration.TotalMilliseconds > threshold * 2 ? "High" : "Medium"
})
.ToList();
}
private Dictionary<string, TimeSpan> CalculateServiceLatencies(List<Span> spans)
{
return spans
.GroupBy(s => s.ServiceName)
.ToDictionary(
g => g.Key,
g => TimeSpan.FromMilliseconds(g.Average(s => s.Duration.TotalMilliseconds))
);
}
private List<ErrorCorrelation> CorrelateErrorLogsWithMetrics(List<LogEntry> logs, ServiceMetrics metrics)
{
// 实现错误日志与指标的关联逻辑
return new List<ErrorCorrelation>();
}
private List<PerformanceCorrelation> CorrelatePerformanceLogsWithMetrics(List<LogEntry> logs, ServiceMetrics metrics)
{
// 实现性能日志与指标的关联逻辑
return new List<PerformanceCorrelation>();
}
private List<AnomalyPattern> IdentifyAnomalyPatterns(List<LogEntry> logs, ServiceMetrics metrics)
{
// 实现异常模式识别逻辑
return new List<AnomalyPattern>();
}
private List<string> GenerateInsights(LogCorrelationResult result)
{
var insights = new List<string>();
if (result.ErrorCorrelations.Any())
{
insights.Add($"发现 {result.ErrorCorrelations.Count} 个错误关联模式");
}
if (result.PerformanceCorrelations.Any())
{
insights.Add($"发现 {result.PerformanceCorrelations.Count} 个性能关联模式");
}
if (result.AnomalyPatterns.Any())
{
insights.Add($"检测到 {result.AnomalyPatterns.Count} 个异常模式");
}
return insights;
}
}
部署脚本和自动化
// 部署管理器
public class DeploymentManager
{
private readonly ILogger<DeploymentManager> _logger;
private readonly ConfigurationManager _configManager;
private readonly IServiceProvider _serviceProvider;
public DeploymentManager(
ILogger<DeploymentManager> logger,
ConfigurationManager configManager,
IServiceProvider serviceProvider)
{
_logger = logger;
_configManager = configManager;
_serviceProvider = serviceProvider;
}
public async Task<DeploymentResult> DeployAsync(DeploymentRequest request)
{
var result = new DeploymentResult
{
DeploymentId = Guid.NewGuid().ToString(),
Environment = request.Environment,
Version = request.Version,
StartTime = DateTime.UtcNow
};
try
{
_logger.LogInformation("Starting deployment {DeploymentId} to {Environment}",
result.DeploymentId, request.Environment);
// 1. 预部署检查
await RunPreDeploymentChecksAsync(request, result);
// 2. 数据库迁移
if (request.RunDatabaseMigrations)
{
await RunDatabaseMigrationsAsync(request, result);
}
// 3. 应用程序部署
await DeployApplicationAsync(request, result);
// 4. 后部署验证
await RunPostDeploymentValidationAsync(request, result);
// 5. 健康检查
await RunHealthChecksAsync(request, result);
result.Status = DeploymentStatus.Success;
result.EndTime = DateTime.UtcNow;
_logger.LogInformation("Deployment {DeploymentId} completed successfully", result.DeploymentId);
}
catch (Exception ex)
{
result.Status = DeploymentStatus.Failed;
result.ErrorMessage = ex.Message;
result.EndTime = DateTime.UtcNow;
_logger.LogError(ex, "Deployment {DeploymentId} failed", result.DeploymentId);
// 回滚处理
if (request.AutoRollbackOnFailure)
{
await RollbackAsync(request, result);
}
}
return result;
}
private async Task RunPreDeploymentChecksAsync(DeploymentRequest request, DeploymentResult result)
{
_logger.LogInformation("Running pre-deployment checks");
// 验证配置
_configManager.ValidateConfiguration(request.Environment);
result.Steps.Add(new DeploymentStep
{
Name = "Configuration Validation",
Status = StepStatus.Success,
StartTime = DateTime.UtcNow,
EndTime = DateTime.UtcNow
});
// 检查数据库连接
var dbConnected = await _configManager.TestDatabaseConnectionAsync(request.Environment);
if (!dbConnected)
{
throw new InvalidOperationException("Database connection test failed");
}
result.Steps.Add(new DeploymentStep
{
Name = "Database Connection Test",
Status = StepStatus.Success,
StartTime = DateTime.UtcNow,
EndTime = DateTime.UtcNow
});
// 检查磁盘空间
var diskSpace = GetAvailableDiskSpace();
if (diskSpace < request.MinimumDiskSpaceGB)
{
throw new InvalidOperationException($"Insufficient disk space. Available: {diskSpace}GB, Required: {request.MinimumDiskSpaceGB}GB");
}
result.Steps.Add(new DeploymentStep
{
Name = "Disk Space Check",
Status = StepStatus.Success,
StartTime = DateTime.UtcNow,
EndTime = DateTime.UtcNow,
Details = $"Available: {diskSpace}GB"
});
}
private async Task RunDatabaseMigrationsAsync(DeploymentRequest request, DeploymentResult result)
{
_logger.LogInformation("Running database migrations");
var stepStartTime = DateTime.UtcNow;
try
{
// 这里应该集成Entity Framework迁移或其他数据库迁移工具
using var scope = _serviceProvider.CreateScope();
var dbContext = scope.ServiceProvider.GetRequiredService<ApplicationDbContext>();
// 备份数据库(生产环境)
if (request.Environment.Equals("production", StringComparison.OrdinalIgnoreCase))
{
await BackupDatabaseAsync(dbContext);
}
// 执行迁移
await dbContext.Database.MigrateAsync();
result.Steps.Add(new DeploymentStep
{
Name = "Database Migration",
Status = StepStatus.Success,
StartTime = stepStartTime,
EndTime = DateTime.UtcNow
});
}
catch (Exception ex)
{
result.Steps.Add(new DeploymentStep
{
Name = "Database Migration",
Status = StepStatus.Failed,
StartTime = stepStartTime,
EndTime = DateTime.UtcNow,
ErrorMessage = ex.Message
});
throw;
}
}
private async Task DeployApplicationAsync(DeploymentRequest request, DeploymentResult result)
{
_logger.LogInformation("Deploying application");
var stepStartTime = DateTime.UtcNow;
try
{
// 这里应该包含实际的应用程序部署逻辑
// 例如:复制文件、重启服务、更新配置等
// 模拟部署过程
await Task.Delay(5000);
result.Steps.Add(new DeploymentStep
{
Name = "Application Deployment",
Status = StepStatus.Success,
StartTime = stepStartTime,
EndTime = DateTime.UtcNow,
Details = $"Deployed version {request.Version}"
});
}
catch (Exception ex)
{
result.Steps.Add(new DeploymentStep
{
Name = "Application Deployment",
Status = StepStatus.Failed,
StartTime = stepStartTime,
EndTime = DateTime.UtcNow,
ErrorMessage = ex.Message
});
throw;
}
}
private async Task RunPostDeploymentValidationAsync(DeploymentRequest request, DeploymentResult result)
{
_logger.LogInformation("Running post-deployment validation");
var stepStartTime = DateTime.UtcNow;
try
{
// 验证应用程序是否正常启动
await ValidateApplicationStartupAsync();
// 验证关键功能
await ValidateCriticalFunctionsAsync();
result.Steps.Add(new DeploymentStep
{
Name = "Post-deployment Validation",
Status = StepStatus.Success,
StartTime = stepStartTime,
EndTime = DateTime.UtcNow
});
}
catch (Exception ex)
{
result.Steps.Add(new DeploymentStep
{
Name = "Post-deployment Validation",
Status = StepStatus.Failed,
StartTime = stepStartTime,
EndTime = DateTime.UtcNow,
ErrorMessage = ex.Message
});
throw;
}
}
private async Task RunHealthChecksAsync(DeploymentRequest request, DeploymentResult result)
{
_logger.LogInformation("Running health checks");
var healthResults = await _configManager.RunHealthChecksAsync(request.Environment);
var allHealthy = healthResults.All(h => h.Value);
result.Steps.Add(new DeploymentStep
{
Name = "Health Checks",
Status = allHealthy ? StepStatus.Success : StepStatus.Failed,
StartTime = DateTime.UtcNow,
EndTime = DateTime.UtcNow,
Details = string.Join(", ", healthResults.Select(h => $"{h.Key}: {(h.Value ? "OK" : "FAIL")}"))
});
if (!allHealthy)
{
throw new InvalidOperationException("Health checks failed");
}
}
private async Task RollbackAsync(DeploymentRequest request, DeploymentResult result)
{
_logger.LogWarning("Starting rollback for deployment {DeploymentId}", result.DeploymentId);
try
{
// 实现回滚逻辑
// 例如:恢复之前的版本、回滚数据库等
result.RollbackPerformed = true;
result.RollbackTime = DateTime.UtcNow;
_logger.LogInformation("Rollback completed for deployment {DeploymentId}", result.DeploymentId);
}
catch (Exception ex)
{
_logger.LogError(ex, "Rollback failed for deployment {DeploymentId}", result.DeploymentId);
result.RollbackErrorMessage = ex.Message;
}
}
private long GetAvailableDiskSpace()
{
var drive = new DriveInfo(Path.GetPathRoot(Environment.CurrentDirectory));
return drive.AvailableFreeSpace / (1024 * 1024 * 1024); // Convert to GB
}
private async Task BackupDatabaseAsync(ApplicationDbContext dbContext)
{
// 实现数据库备份逻辑
await Task.Delay(1000); // 模拟备份过程
}
private async Task ValidateApplicationStartupAsync()
{
// 验证应用程序启动
await Task.Delay(1000); // 模拟验证过程
}
private async Task ValidateCriticalFunctionsAsync()
{
// 验证关键功能
await Task.Delay(1000); // 模拟验证过程
}
}
// 部署相关的数据模型
public class DeploymentRequest
{
public string Environment { get; set; }
public string Version { get; set; }
public bool RunDatabaseMigrations { get; set; }
public bool AutoRollbackOnFailure { get; set; }
public int MinimumDiskSpaceGB { get; set; } = 5;
public Dictionary<string, string> Parameters { get; set; } = new();
}
public class DeploymentResult
{
public string DeploymentId { get; set; }
public string Environment { get; set; }
public string Version { get; set; }
public DeploymentStatus Status { get; set; }
public DateTime StartTime { get; set; }
public DateTime? EndTime { get; set; }
public string ErrorMessage { get; set; }
public List<DeploymentStep> Steps { get; set; } = new();
public bool RollbackPerformed { get; set; }
public DateTime? RollbackTime { get; set; }
public string RollbackErrorMessage { get; set; }
public TimeSpan? Duration => EndTime?.Subtract(StartTime);
}
public class DeploymentStep
{
public string Name { get; set; }
public StepStatus Status { get; set; }
public DateTime StartTime { get; set; }
public DateTime EndTime { get; set; }
public string Details { get; set; }
public string ErrorMessage { get; set; }
public TimeSpan Duration => EndTime.Subtract(StartTime);
}
public enum DeploymentStatus
{
InProgress,
Success,
Failed,
RolledBack
}
public enum StepStatus
{
InProgress,
Success,
Failed,
Skipped
}
20.2 容器化技术
Docker容器化
# 多阶段Dockerfile示例
# 第一阶段:构建阶段
FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build
WORKDIR /src
# 复制项目文件并还原依赖
COPY ["MyApp.Api/MyApp.Api.csproj", "MyApp.Api/"]
COPY ["MyApp.Core/MyApp.Core.csproj", "MyApp.Core/"]
COPY ["MyApp.Infrastructure/MyApp.Infrastructure.csproj", "MyApp.Infrastructure/"]
RUN dotnet restore "MyApp.Api/MyApp.Api.csproj"
# 复制所有源代码
COPY . .
# 构建应用程序
WORKDIR "/src/MyApp.Api"
RUN dotnet build "MyApp.Api.csproj" -c Release -o /app/build
# 第二阶段:发布阶段
FROM build AS publish
RUN dotnet publish "MyApp.Api.csproj" -c Release -o /app/publish /p:UseAppHost=false
# 第三阶段:运行时阶段
FROM mcr.microsoft.com/dotnet/aspnet:8.0 AS final
WORKDIR /app
# 创建非root用户
RUN adduser --disabled-password --gecos '' appuser && chown -R appuser /app
USER appuser
# 复制发布的应用程序
COPY --from=publish /app/publish .
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:80/health || exit 1
# 暴露端口
EXPOSE 80
EXPOSE 443
# 设置入口点
ENTRYPOINT ["dotnet", "MyApp.Api.dll"]
# docker-compose.yml
version: '3.8'
services:
# Web API服务
api:
build:
context: .
dockerfile: Dockerfile
ports:
- "5000:80"
- "5001:443"
environment:
- ASPNETCORE_ENVIRONMENT=Production
- ASPNETCORE_URLS=http://+:80;https://+:443
- ASPNETCORE_Kestrel__Certificates__Default__Password=password
- ASPNETCORE_Kestrel__Certificates__Default__Path=/https/aspnetapp.pfx
- ConnectionStrings__DefaultConnection=Server=db;Database=MyAppDb;User=sa;Password=YourPassword123!;TrustServerCertificate=true
- Redis__ConnectionString=redis:6379
volumes:
- ~/.aspnet/https:/https:ro
- ./logs:/app/logs
depends_on:
- db
- redis
networks:
- app-network
restart: unless-stopped
deploy:
resources:
limits:
memory: 512M
reservations:
memory: 256M
# SQL Server数据库
db:
image: mcr.microsoft.com/mssql/server:2022-latest
environment:
- ACCEPT_EULA=Y
- SA_PASSWORD=YourPassword123!
- MSSQL_PID=Express
ports:
- "1433:1433"
volumes:
- sqlserver_data:/var/opt/mssql
- ./database/init:/docker-entrypoint-initdb.d
networks:
- app-network
restart: unless-stopped
deploy:
resources:
limits:
memory: 2G
reservations:
memory: 1G
# Redis缓存
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
- ./redis/redis.conf:/usr/local/etc/redis/redis.conf
command: redis-server /usr/local/etc/redis/redis.conf
networks:
- app-network
restart: unless-stopped
deploy:
resources:
limits:
memory: 256M
reservations:
memory: 128M
# Nginx反向代理
nginx:
image: nginx:alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
- ./nginx/ssl:/etc/nginx/ssl
- ./logs/nginx:/var/log/nginx
depends_on:
- api
networks:
- app-network
restart: unless-stopped
# Elasticsearch (日志存储)
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- xpack.security.enabled=false
ports:
- "9200:9200"
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
networks:
- app-network
restart: unless-stopped
# Kibana (日志可视化)
kibana:
image: docker.elastic.co/kibana/kibana:8.8.0
environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
ports:
- "5601:5601"
depends_on:
- elasticsearch
networks:
- app-network
restart: unless-stopped
# Prometheus (监控)
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
networks:
- app-network
restart: unless-stopped
# Grafana (监控可视化)
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
depends_on:
- prometheus
networks:
- app-network
restart: unless-stopped
volumes:
sqlserver_data:
redis_data:
elasticsearch_data:
prometheus_data:
grafana_data:
networks:
app-network:
driver: bridge
容器管理和编排
// Docker管理服务
public class DockerManager
{
private readonly ILogger<DockerManager> _logger;
private readonly DockerClient _dockerClient;
public DockerManager(ILogger<DockerManager> logger)
{
_logger = logger;
_dockerClient = new DockerClientConfiguration().CreateClient();
}
public async Task<ContainerInfo> GetContainerInfoAsync(string containerName)
{
try
{
var containers = await _dockerClient.Containers.ListContainersAsync(
new ContainersListParameters { All = true });
var container = containers.FirstOrDefault(c =>
c.Names.Any(name => name.Contains(containerName)));
if (container == null)
{
return null;
}
var inspection = await _dockerClient.Containers.InspectContainerAsync(container.ID);
return new ContainerInfo
{
Id = container.ID,
Name = container.Names.FirstOrDefault()?.TrimStart('/'),
Image = container.Image,
Status = container.Status,
State = container.State,
Created = container.Created,
Ports = container.Ports?.Select(p => new PortInfo
{
PrivatePort = p.PrivatePort,
PublicPort = p.PublicPort,
Type = p.Type
}).ToList() ?? new List<PortInfo>(),
Environment = inspection.Config.Env?.ToList() ?? new List<string>(),
Mounts = inspection.Mounts?.Select(m => new MountInfo
{
Source = m.Source,
Destination = m.Destination,
Type = m.Type,
ReadOnly = m.RW == false
}).ToList() ?? new List<MountInfo>()
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get container info for {ContainerName}", containerName);
throw;
}
}
public async Task<List<ContainerInfo>> GetAllContainersAsync()
{
try
{
var containers = await _dockerClient.Containers.ListContainersAsync(
new ContainersListParameters { All = true });
var containerInfos = new List<ContainerInfo>();
foreach (var container in containers)
{
var info = await GetContainerInfoAsync(container.ID);
if (info != null)
{
containerInfos.Add(info);
}
}
return containerInfos;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get all containers");
throw;
}
}
public async Task<string> StartContainerAsync(string containerName)
{
try
{
var container = await FindContainerByNameAsync(containerName);
if (container == null)
{
throw new InvalidOperationException($"Container {containerName} not found");
}
var started = await _dockerClient.Containers.StartContainerAsync(
container.ID, new ContainerStartParameters());
if (started)
{
_logger.LogInformation("Container {ContainerName} started successfully", containerName);
return container.ID;
}
else
{
throw new InvalidOperationException($"Failed to start container {containerName}");
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to start container {ContainerName}", containerName);
throw;
}
}
public async Task StopContainerAsync(string containerName, int timeoutSeconds = 30)
{
try
{
var container = await FindContainerByNameAsync(containerName);
if (container == null)
{
throw new InvalidOperationException($"Container {containerName} not found");
}
var stopped = await _dockerClient.Containers.StopContainerAsync(
container.ID, new ContainerStopParameters
{
WaitBeforeKillSeconds = (uint)timeoutSeconds
});
if (stopped)
{
_logger.LogInformation("Container {ContainerName} stopped successfully", containerName);
}
else
{
_logger.LogWarning("Container {ContainerName} was already stopped", containerName);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to stop container {ContainerName}", containerName);
throw;
}
}
public async Task RestartContainerAsync(string containerName, int timeoutSeconds = 30)
{
try
{
var container = await FindContainerByNameAsync(containerName);
if (container == null)
{
throw new InvalidOperationException($"Container {containerName} not found");
}
await _dockerClient.Containers.RestartContainerAsync(
container.ID, new ContainerRestartParameters
{
WaitBeforeKillSeconds = (uint)timeoutSeconds
});
_logger.LogInformation("Container {ContainerName} restarted successfully", containerName);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to restart container {ContainerName}", containerName);
throw;
}
}
public async Task<ContainerStats> GetContainerStatsAsync(string containerName)
{
try
{
var container = await FindContainerByNameAsync(containerName);
if (container == null)
{
throw new InvalidOperationException($"Container {containerName} not found");
}
var statsStream = await _dockerClient.Containers.GetContainerStatsAsync(
container.ID, new ContainerStatsParameters { Stream = false });
var stats = await statsStream.ReadOutputToEndAsync(CancellationToken.None);
return new ContainerStats
{
ContainerName = containerName,
CpuUsage = CalculateCpuUsage(stats),
MemoryUsage = CalculateMemoryUsage(stats),
NetworkIO = CalculateNetworkIO(stats),
BlockIO = CalculateBlockIO(stats),
Timestamp = DateTime.UtcNow
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get stats for container {ContainerName}", containerName);
throw;
}
}
public async Task<string> GetContainerLogsAsync(string containerName, int tailLines = 100)
{
try
{
var container = await FindContainerByNameAsync(containerName);
if (container == null)
{
throw new InvalidOperationException($"Container {containerName} not found");
}
var logsStream = await _dockerClient.Containers.GetContainerLogsAsync(
container.ID, new ContainerLogsParameters
{
ShowStdout = true,
ShowStderr = true,
Tail = tailLines.ToString()
});
return await logsStream.ReadOutputToEndAsync(CancellationToken.None);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get logs for container {ContainerName}", containerName);
throw;
}
}
public async Task<bool> IsContainerHealthyAsync(string containerName)
{
try
{
var container = await FindContainerByNameAsync(containerName);
if (container == null)
{
return false;
}
var inspection = await _dockerClient.Containers.InspectContainerAsync(container.ID);
// 检查容器状态
if (inspection.State.Status != "running")
{
return false;
}
// 检查健康状态(如果配置了健康检查)
if (inspection.State.Health != null)
{
return inspection.State.Health.Status == "healthy";
}
// 如果没有健康检查,只要容器在运行就认为是健康的
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to check health for container {ContainerName}", containerName);
return false;
}
}
private async Task<ContainerListResponse> FindContainerByNameAsync(string containerName)
{
var containers = await _dockerClient.Containers.ListContainersAsync(
new ContainersListParameters { All = true });
return containers.FirstOrDefault(c =>
c.Names.Any(name => name.TrimStart('/').Equals(containerName, StringComparison.OrdinalIgnoreCase)) ||
c.ID.StartsWith(containerName, StringComparison.OrdinalIgnoreCase));
}
private double CalculateCpuUsage(ContainerStatsResponse stats)
{
// 实现CPU使用率计算逻辑
return 0.0; // 简化实现
}
private long CalculateMemoryUsage(ContainerStatsResponse stats)
{
// 实现内存使用量计算逻辑
return stats.MemoryStats.Usage;
}
private NetworkIOStats CalculateNetworkIO(ContainerStatsResponse stats)
{
// 实现网络IO统计计算逻辑
return new NetworkIOStats();
}
private BlockIOStats CalculateBlockIO(ContainerStatsResponse stats)
{
// 实现磁盘IO统计计算逻辑
return new BlockIOStats();
}
public void Dispose()
{
_dockerClient?.Dispose();
}
}
// 容器相关的数据模型
public class ContainerInfo
{
public string Id { get; set; }
public string Name { get; set; }
public string Image { get; set; }
public string Status { get; set; }
public string State { get; set; }
public DateTime Created { get; set; }
public List<PortInfo> Ports { get; set; }
public List<string> Environment { get; set; }
public List<MountInfo> Mounts { get; set; }
}
public class PortInfo
{
public ushort PrivatePort { get; set; }
public ushort? PublicPort { get; set; }
public string Type { get; set; }
}
public class MountInfo
{
public string Source { get; set; }
public string Destination { get; set; }
public string Type { get; set; }
public bool ReadOnly { get; set; }
}
public class ContainerStats
{
public string ContainerName { get; set; }
public double CpuUsage { get; set; }
public long MemoryUsage { get; set; }
public NetworkIOStats NetworkIO { get; set; }
public BlockIOStats BlockIO { get; set; }
public DateTime Timestamp { get; set; }
}
public class NetworkIOStats
{
public long RxBytes { get; set; }
public long TxBytes { get; set; }
public long RxPackets { get; set; }
public long TxPackets { get; set; }
}
public class BlockIOStats
{
public long ReadBytes { get; set; }
public long WriteBytes { get; set; }
public long ReadOps { get; set; }
public long WriteOps { get; set; }
}
20.3 持续集成/持续部署(CI/CD)
GitHub Actions工作流
# .github/workflows/ci-cd.yml
name: CI/CD Pipeline
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
release:
types: [ published ]
env:
DOTNET_VERSION: '8.0.x'
AZURE_WEBAPP_NAME: 'myapp-prod'
AZURE_WEBAPP_PACKAGE_PATH: './publish'
jobs:
# 构建和测试作业
build-and-test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup .NET
uses: actions/setup-dotnet@v3
with:
dotnet-version: ${{ env.DOTNET_VERSION }}
- name: Cache NuGet packages
uses: actions/cache@v3
with:
path: ~/.nuget/packages
key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.csproj') }}
restore-keys: |
${{ runner.os }}-nuget-
- name: Restore dependencies
run: dotnet restore
- name: Build application
run: dotnet build --configuration Release --no-restore
- name: Run unit tests
run: |
dotnet test --configuration Release --no-build --verbosity normal \
--collect:"XPlat Code Coverage" --results-directory ./coverage
- name: Generate test report
uses: dorny/test-reporter@v1
if: success() || failure()
with:
name: .NET Tests
path: '**/*.trx'
reporter: dotnet-trx
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
directory: ./coverage
- name: Run security scan
run: |
dotnet list package --vulnerable --include-transitive
dotnet list package --deprecated
- name: Publish application
run: |
dotnet publish src/MyApp.Api/MyApp.Api.csproj \
--configuration Release \
--output ${{ env.AZURE_WEBAPP_PACKAGE_PATH }} \
--no-build
- name: Upload build artifacts
uses: actions/upload-artifact@v3
with:
name: webapp-package
path: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }}
# 代码质量检查
code-quality:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup .NET
uses: actions/setup-dotnet@v3
with:
dotnet-version: ${{ env.DOTNET_VERSION }}
- name: SonarCloud Scan
uses: SonarSource/sonarcloud-github-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
- name: Run CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
languages: csharp
# Docker构建和推送
docker-build:
runs-on: ubuntu-latest
needs: [build-and-test, code-quality]
if: github.ref == 'refs/heads/main' || github.event_name == 'release'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to Azure Container Registry
uses: docker/login-action@v2
with:
registry: myregistry.azurecr.io
username: ${{ secrets.ACR_USERNAME }}
password: ${{ secrets.ACR_PASSWORD }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v4
with:
images: |
myapp/api
myregistry.azurecr.io/myapp/api
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha,prefix={{branch}}-
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
file: ./Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
# 部署到开发环境
deploy-dev:
runs-on: ubuntu-latest
needs: [build-and-test]
if: github.ref == 'refs/heads/develop'
environment: development
steps:
- name: Download build artifacts
uses: actions/download-artifact@v3
with:
name: webapp-package
path: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }}
- name: Deploy to Azure Web App (Dev)
uses: azure/webapps-deploy@v2
with:
app-name: 'myapp-dev'
slot-name: 'production'
publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_DEV }}
package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }}
- name: Run smoke tests
run: |
curl -f https://myapp-dev.azurewebsites.net/health || exit 1
# 部署到生产环境
deploy-prod:
runs-on: ubuntu-latest
needs: [build-and-test, code-quality, docker-build]
if: github.event_name == 'release'
environment: production
steps:
- name: Download build artifacts
uses: actions/download-artifact@v3
with:
name: webapp-package
path: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }}
- name: Deploy to Azure Web App (Staging)
uses: azure/webapps-deploy@v2
with:
app-name: ${{ env.AZURE_WEBAPP_NAME }}
slot-name: 'staging'
publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_STAGING }}
package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }}
- name: Run integration tests on staging
run: |
# 运行集成测试
curl -f https://myapp-prod-staging.azurewebsites.net/health || exit 1
# 运行更多的端到端测试
- name: Swap staging to production
uses: azure/CLI@v1
with:
azcliversion: 2.30.0
inlineScript: |
az webapp deployment slot swap \
--resource-group myapp-rg \
--name ${{ env.AZURE_WEBAPP_NAME }} \
--slot staging \
--target-slot production
- name: Verify production deployment
run: |
sleep 30 # 等待部署完成
curl -f https://myapp-prod.azurewebsites.net/health || exit 1
- name: Notify deployment success
uses: 8398a7/action-slack@v3
with:
status: success
text: 'Production deployment successful! 🚀'
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
# 性能测试
performance-test:
runs-on: ubuntu-latest
needs: [deploy-prod]
if: github.event_name == 'release'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run load tests
run: |
# 使用k6或其他负载测试工具
docker run --rm -v $PWD/tests:/tests \
grafana/k6 run /tests/load-test.js
- name: Upload performance test results
uses: actions/upload-artifact@v3
with:
name: performance-results
path: ./test-results/
Azure DevOps流水线
# azure-pipelines.yml
trigger:
branches:
include:
- main
- develop
paths:
exclude:
- README.md
- docs/*
pool:
vmImage: 'ubuntu-latest'
variables:
buildConfiguration: 'Release'
dotnetSdkVersion: '8.0.x'
azureSubscription: 'MyAzureSubscription'
webAppName: 'myapp-prod'
resourceGroupName: 'myapp-rg'
stages:
- stage: Build
displayName: 'Build and Test'
jobs:
- job: BuildJob
displayName: 'Build Application'
steps:
- task: UseDotNet@2
displayName: 'Use .NET SDK'
inputs:
packageType: 'sdk'
version: $(dotnetSdkVersion)
- task: DotNetCoreCLI@2
displayName: 'Restore packages'
inputs:
command: 'restore'
projects: '**/*.csproj'
- task: DotNetCoreCLI@2
displayName: 'Build application'
inputs:
command: 'build'
projects: '**/*.csproj'
arguments: '--configuration $(buildConfiguration) --no-restore'
- task: DotNetCoreCLI@2
displayName: 'Run unit tests'
inputs:
command: 'test'
projects: '**/*Tests.csproj'
arguments: '--configuration $(buildConfiguration) --no-build --collect "Code coverage"'
- task: PublishTestResults@2
displayName: 'Publish test results'
inputs:
testResultsFormat: 'VSTest'
testResultsFiles: '**/*.trx'
- task: PublishCodeCoverageResults@1
displayName: 'Publish code coverage'
inputs:
codeCoverageTool: 'Cobertura'
summaryFileLocation: '$(Agent.TempDirectory)/**/coverage.cobertura.xml'
- task: DotNetCoreCLI@2
displayName: 'Publish application'
inputs:
command: 'publish'
projects: 'src/MyApp.Api/MyApp.Api.csproj'
arguments: '--configuration $(buildConfiguration) --output $(Build.ArtifactStagingDirectory)'
- task: PublishBuildArtifacts@1
displayName: 'Publish artifacts'
inputs:
pathToPublish: '$(Build.ArtifactStagingDirectory)'
artifactName: 'webapp'
- stage: Deploy_Dev
displayName: 'Deploy to Development'
dependsOn: Build
condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/develop'))
jobs:
- deployment: DeployDev
displayName: 'Deploy to Dev Environment'
environment: 'development'
strategy:
runOnce:
deploy:
steps:
- task: AzureWebApp@1
displayName: 'Deploy to Azure Web App'
inputs:
azureSubscription: $(azureSubscription)
appType: 'webApp'
appName: 'myapp-dev'
package: '$(Pipeline.Workspace)/webapp/**/*.zip'
- stage: Deploy_Prod
displayName: 'Deploy to Production'
dependsOn: Build
condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/main'))
jobs:
- deployment: DeployProd
displayName: 'Deploy to Production Environment'
environment: 'production'
strategy:
runOnce:
deploy:
steps:
- task: AzureWebApp@1
displayName: 'Deploy to Staging Slot'
inputs:
azureSubscription: $(azureSubscription)
appType: 'webApp'
appName: $(webAppName)
package: '$(Pipeline.Workspace)/webapp/**/*.zip'
deployToSlotOrASE: true
slotName: 'staging'
- task: AzureCLI@2
displayName: 'Run smoke tests on staging'
inputs:
azureSubscription: $(azureSubscription)
scriptType: 'bash'
scriptLocation: 'inlineScript'
inlineScript: |
# 等待应用启动
sleep 30
# 运行健康检查
curl -f https://$(webAppName)-staging.azurewebsites.net/health
- task: AzureAppServiceManage@0
displayName: 'Swap staging to production'
inputs:
azureSubscription: $(azureSubscription)
action: 'Swap Slots'
webAppName: $(webAppName)
resourceGroupName: $(resourceGroupName)
sourceSlot: 'staging'
targetSlot: 'production'
CI/CD管理服务
// CI/CD管道管理
public class PipelineManager
{
private readonly ILogger<PipelineManager> _logger;
private readonly GitHubClient _githubClient;
private readonly HttpClient _azureDevOpsClient;
public PipelineManager(
ILogger<PipelineManager> logger,
GitHubClient githubClient,
HttpClient azureDevOpsClient)
{
_logger = logger;
_githubClient = githubClient;
_azureDevOpsClient = azureDevOpsClient;
}
public async Task<PipelineRun> TriggerGitHubWorkflowAsync(string owner, string repo, string workflowId, string branch = "main")
{
try
{
var workflowDispatch = new CreateWorkflowDispatch(branch);
await _githubClient.Actions.Workflows.CreateDispatch(owner, repo, workflowId, workflowDispatch);
_logger.LogInformation("GitHub workflow {WorkflowId} triggered for {Owner}/{Repo}", workflowId, owner, repo);
// 等待工作流开始
await Task.Delay(5000);
// 获取最新的工作流运行
var runs = await _githubClient.Actions.Workflows.Runs.List(owner, repo, workflowId);
var latestRun = runs.WorkflowRuns.FirstOrDefault();
return new PipelineRun
{
Id = latestRun?.Id.ToString(),
Status = MapGitHubStatus(latestRun?.Status),
StartTime = latestRun?.CreatedAt?.DateTime,
Branch = branch,
CommitSha = latestRun?.HeadSha,
Url = latestRun?.HtmlUrl
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to trigger GitHub workflow {WorkflowId}", workflowId);
throw;
}
}
public async Task<PipelineRun> GetGitHubWorkflowRunAsync(string owner, string repo, long runId)
{
try
{
var run = await _githubClient.Actions.Workflows.Runs.Get(owner, repo, runId);
var jobs = await _githubClient.Actions.Workflows.Runs.ListJobs(owner, repo, runId);
return new PipelineRun
{
Id = run.Id.ToString(),
Status = MapGitHubStatus(run.Status),
StartTime = run.CreatedAt?.DateTime,
EndTime = run.UpdatedAt?.DateTime,
Branch = run.HeadBranch,
CommitSha = run.HeadSha,
Url = run.HtmlUrl,
Jobs = jobs.Jobs.Select(j => new PipelineJob
{
Id = j.Id.ToString(),
Name = j.Name,
Status = MapGitHubStatus(j.Status),
StartTime = j.StartedAt?.DateTime,
EndTime = j.CompletedAt?.DateTime,
Url = j.HtmlUrl
}).ToList()
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get GitHub workflow run {RunId}", runId);
throw;
}
}
public async Task<List<PipelineRun>> GetRecentPipelineRunsAsync(string owner, string repo, int count = 10)
{
try
{
var runs = await _githubClient.Actions.Workflows.Runs.List(owner, repo, new WorkflowRunsRequest
{
PerPage = count
});
return runs.WorkflowRuns.Select(run => new PipelineRun
{
Id = run.Id.ToString(),
Status = MapGitHubStatus(run.Status),
StartTime = run.CreatedAt?.DateTime,
EndTime = run.UpdatedAt?.DateTime,
Branch = run.HeadBranch,
CommitSha = run.HeadSha,
Url = run.HtmlUrl
}).ToList();
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get recent pipeline runs");
throw;
}
}
public async Task<DeploymentStatus> GetDeploymentStatusAsync(string environment)
{
try
{
// 这里应该查询实际的部署状态
// 可以通过Azure API、Kubernetes API等获取
return new DeploymentStatus
{
Environment = environment,
Status = "Running",
Version = "1.0.0",
DeployedAt = DateTime.UtcNow.AddHours(-2),
HealthStatus = "Healthy",
Instances = new List<InstanceStatus>
{
new InstanceStatus
{
Name = "instance-1",
Status = "Running",
Health = "Healthy",
StartTime = DateTime.UtcNow.AddHours(-2)
}
}
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get deployment status for {Environment}", environment);
throw;
}
}
public async Task<bool> RollbackDeploymentAsync(string environment, string targetVersion)
{
try
{
_logger.LogInformation("Starting rollback for {Environment} to version {Version}", environment, targetVersion);
// 实现回滚逻辑
// 这可能涉及:
// 1. 更新容器镜像标签
// 2. 重新部署到Kubernetes
// 3. 更新Azure App Service
// 4. 回滚数据库迁移(如果需要)
await Task.Delay(10000); // 模拟回滚过程
_logger.LogInformation("Rollback completed for {Environment}", environment);
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to rollback deployment for {Environment}", environment);
return false;
}
}
private PipelineStatus MapGitHubStatus(string status)
{
return status?.ToLower() switch
{
"queued" => PipelineStatus.Queued,
"in_progress" => PipelineStatus.Running,
"completed" => PipelineStatus.Completed,
"cancelled" => PipelineStatus.Cancelled,
"failure" => PipelineStatus.Failed,
"success" => PipelineStatus.Success,
_ => PipelineStatus.Unknown
};
}
}
// 流水线相关的数据模型
public class PipelineRun
{
public string Id { get; set; }
public PipelineStatus Status { get; set; }
public DateTime? StartTime { get; set; }
public DateTime? EndTime { get; set; }
public string Branch { get; set; }
public string CommitSha { get; set; }
public string Url { get; set; }
public List<PipelineJob> Jobs { get; set; } = new();
public TimeSpan? Duration => EndTime?.Subtract(StartTime ?? DateTime.MinValue);
}
public class PipelineJob
{
public string Id { get; set; }
public string Name { get; set; }
public PipelineStatus Status { get; set; }
public DateTime? StartTime { get; set; }
public DateTime? EndTime { get; set; }
public string Url { get; set; }
public TimeSpan? Duration => EndTime?.Subtract(StartTime ?? DateTime.MinValue);
}
public class DeploymentStatus
{
public string Environment { get; set; }
public string Status { get; set; }
public string Version { get; set; }
public DateTime DeployedAt { get; set; }
public string HealthStatus { get; set; }
public List<InstanceStatus> Instances { get; set; } = new();
}
public class InstanceStatus
{
public string Name { get; set; }
public string Status { get; set; }
public string Health { get; set; }
public DateTime StartTime { get; set; }
}
public enum PipelineStatus
{
Unknown,
Queued,
Running,
Completed,
Success,
Failed,
Cancelled
}
20.4 云平台部署
Azure部署配置
// Azure部署管理
public class AzureDeploymentManager
{
private readonly ILogger<AzureDeploymentManager> _logger;
private readonly TokenCredential _credential;
private readonly string _subscriptionId;
public AzureDeploymentManager(
ILogger<AzureDeploymentManager> logger,
TokenCredential credential,
string subscriptionId)
{
_logger = logger;
_credential = credential;
_subscriptionId = subscriptionId;
}
public async Task<AzureDeploymentResult> DeployToAppServiceAsync(AzureAppServiceDeployment deployment)
{
try
{
var result = new AzureDeploymentResult
{
DeploymentId = Guid.NewGuid().ToString(),
StartTime = DateTime.UtcNow
};
_logger.LogInformation("Starting Azure App Service deployment {DeploymentId}", result.DeploymentId);
// 1. 创建或更新App Service Plan
await CreateOrUpdateAppServicePlanAsync(deployment);
// 2. 创建或更新Web App
await CreateOrUpdateWebAppAsync(deployment);
// 3. 配置应用设置
await ConfigureAppSettingsAsync(deployment);
// 4. 配置连接字符串
await ConfigureConnectionStringsAsync(deployment);
// 5. 部署应用代码
await DeployApplicationCodeAsync(deployment);
// 6. 配置自定义域名和SSL
if (!string.IsNullOrEmpty(deployment.CustomDomain))
{
await ConfigureCustomDomainAsync(deployment);
}
// 7. 配置自动扩缩容
if (deployment.AutoScaling != null)
{
await ConfigureAutoScalingAsync(deployment);
}
result.Status = "Success";
result.EndTime = DateTime.UtcNow;
result.WebAppUrl = $"https://{deployment.WebAppName}.azurewebsites.net";
_logger.LogInformation("Azure App Service deployment {DeploymentId} completed successfully", result.DeploymentId);
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Azure App Service deployment failed");
throw;
}
}
public async Task<AzureDeploymentResult> DeployToContainerInstancesAsync(AzureContainerDeployment deployment)
{
try
{
var result = new AzureDeploymentResult
{
DeploymentId = Guid.NewGuid().ToString(),
StartTime = DateTime.UtcNow
};
_logger.LogInformation("Starting Azure Container Instances deployment {DeploymentId}", result.DeploymentId);
var containerClient = new ContainerInstanceManagementClient(_subscriptionId, _credential);
// 创建容器组
var containerGroup = new ContainerGroup
{
Location = deployment.Location,
OsType = OperatingSystemTypes.Linux,
RestartPolicy = ContainerGroupRestartPolicy.Always,
Containers =
{
new Container
{
Name = deployment.ContainerName,
Image = deployment.ImageName,
Resources = new ResourceRequirements
{
Requests = new ResourceRequests
{
Cpu = deployment.CpuCores,
MemoryInGB = deployment.MemoryGB
}
},
Ports = deployment.Ports.Select(p => new ContainerPort { Port = p }).ToList(),
EnvironmentVariables = deployment.EnvironmentVariables
.Select(kv => new EnvironmentVariable { Name = kv.Key, Value = kv.Value })
.ToList()
}
},
IpAddress = new IpAddress
{
Type = ContainerGroupIpAddressType.Public,
Ports = deployment.Ports.Select(p => new Port { PortProperty = p, Protocol = ContainerGroupNetworkProtocol.TCP }).ToList()
}
};
await containerClient.ContainerGroups.CreateOrUpdateAsync(
deployment.ResourceGroupName,
deployment.ContainerGroupName,
containerGroup);
result.Status = "Success";
result.EndTime = DateTime.UtcNow;
_logger.LogInformation("Azure Container Instances deployment {DeploymentId} completed successfully", result.DeploymentId);
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Azure Container Instances deployment failed");
throw;
}
}
public async Task<List<AzureResource>> GetResourcesAsync(string resourceGroupName)
{
try
{
var resourceClient = new ResourceManagementClient(_subscriptionId, _credential);
var resources = new List<AzureResource>();
await foreach (var resource in resourceClient.Resources.ListByResourceGroupAsync(resourceGroupName))
{
resources.Add(new AzureResource
{
Id = resource.Id,
Name = resource.Name,
Type = resource.Type,
Location = resource.Location,
Tags = resource.Tags?.ToDictionary(t => t.Key, t => t.Value) ?? new Dictionary<string, string>()
});
}
return resources;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get Azure resources for resource group {ResourceGroup}", resourceGroupName);
throw;
}
}
public async Task<AzureResourceMetrics> GetResourceMetricsAsync(string resourceId, TimeSpan timeRange)
{
try
{
var monitorClient = new MonitorManagementClient(_subscriptionId, _credential);
var endTime = DateTime.UtcNow;
var startTime = endTime.Subtract(timeRange);
var metricsResponse = await monitorClient.Metrics.ListAsync(
resourceId,
timespan: $"{startTime:yyyy-MM-ddTHH:mm:ssZ}/{endTime:yyyy-MM-ddTHH:mm:ssZ}",
metricnames: "CpuPercentage,MemoryPercentage,HttpRequests,ResponseTime");
return new AzureResourceMetrics
{
ResourceId = resourceId,
TimeRange = timeRange,
Metrics = metricsResponse.Value.Select(m => new MetricData
{
Name = m.Name.Value,
Unit = m.Unit?.ToString(),
Values = m.Timeseries.SelectMany(ts => ts.Data.Select(d => new MetricValue
{
Timestamp = d.TimeStamp,
Value = d.Average ?? d.Total ?? d.Maximum ?? 0
})).ToList()
}).ToList()
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get metrics for resource {ResourceId}", resourceId);
throw;
}
}
private async Task CreateOrUpdateAppServicePlanAsync(AzureAppServiceDeployment deployment)
{
var webSiteClient = new WebSiteManagementClient(_subscriptionId, _credential);
var appServicePlan = new AppServicePlan
{
Location = deployment.Location,
Sku = new SkuDescription
{
Name = deployment.SkuName,
Tier = deployment.SkuTier,
Capacity = deployment.InstanceCount
},
Kind = "app"
};
await webSiteClient.AppServicePlans.CreateOrUpdateAsync(
deployment.ResourceGroupName,
deployment.AppServicePlanName,
appServicePlan);
}
private async Task CreateOrUpdateWebAppAsync(AzureAppServiceDeployment deployment)
{
var webSiteClient = new WebSiteManagementClient(_subscriptionId, _credential);
var webApp = new Site
{
Location = deployment.Location,
ServerFarmId = $"/subscriptions/{_subscriptionId}/resourceGroups/{deployment.ResourceGroupName}/providers/Microsoft.Web/serverfarms/{deployment.AppServicePlanName}",
SiteConfig = new SiteConfig
{
NetFrameworkVersion = "v8.0",
AlwaysOn = true,
Http20Enabled = true,
MinTlsVersion = SupportedTlsVersions.OneFullStopTwo,
FtpsState = FtpsState.Disabled
}
};
await webSiteClient.WebApps.CreateOrUpdateAsync(
deployment.ResourceGroupName,
deployment.WebAppName,
webApp);
}
private async Task ConfigureAppSettingsAsync(AzureAppServiceDeployment deployment)
{
var webSiteClient = new WebSiteManagementClient(_subscriptionId, _credential);
var appSettings = new StringDictionary
{
Properties = deployment.AppSettings
};
await webSiteClient.WebApps.UpdateApplicationSettingsAsync(
deployment.ResourceGroupName,
deployment.WebAppName,
appSettings);
}
private async Task ConfigureConnectionStringsAsync(AzureAppServiceDeployment deployment)
{
var webSiteClient = new WebSiteManagementClient(_subscriptionId, _credential);
var connectionStrings = new ConnectionStringDictionary
{
Properties = deployment.ConnectionStrings.ToDictionary(
kv => kv.Key,
kv => new ConnStringValueTypePair
{
Value = kv.Value,
Type = ConnectionStringType.SQLAzure
})
};
await webSiteClient.WebApps.UpdateConnectionStringsAsync(
deployment.ResourceGroupName,
deployment.WebAppName,
connectionStrings);
}
private async Task DeployApplicationCodeAsync(AzureAppServiceDeployment deployment)
{
// 这里应该实现实际的代码部署逻辑
// 可以使用ZIP部署、Git部署或容器部署
await Task.Delay(5000); // 模拟部署过程
}
private async Task ConfigureCustomDomainAsync(AzureAppServiceDeployment deployment)
{
// 配置自定义域名和SSL证书
await Task.Delay(1000); // 模拟配置过程
}
private async Task ConfigureAutoScalingAsync(AzureAppServiceDeployment deployment)
{
var monitorClient = new MonitorManagementClient(_subscriptionId, _credential);
var autoScaleSettings = new AutoscaleSettingResource
{
Location = deployment.Location,
AutoscaleSetting = new AutoscaleSetting
{
Enabled = true,
Profiles = new List<AutoscaleProfile>
{
new AutoscaleProfile
{
Name = "Default",
Capacity = new ScaleCapacity
{
Minimum = deployment.AutoScaling.MinInstances.ToString(),
Maximum = deployment.AutoScaling.MaxInstances.ToString(),
Default = deployment.AutoScaling.DefaultInstances.ToString()
},
Rules = new List<ScaleRule>
{
new ScaleRule
{
MetricTrigger = new MetricTrigger
{
MetricName = "CpuPercentage",
MetricResourceUri = $"/subscriptions/{_subscriptionId}/resourceGroups/{deployment.ResourceGroupName}/providers/Microsoft.Web/serverfarms/{deployment.AppServicePlanName}",
TimeGrain = TimeSpan.FromMinutes(1),
Statistic = MetricStatisticType.Average,
TimeWindow = TimeSpan.FromMinutes(5),
TimeAggregation = TimeAggregationType.Average,
OperatorProperty = ComparisonOperationType.GreaterThan,
Threshold = deployment.AutoScaling.CpuThresholdUp
},
ScaleAction = new ScaleAction
{
Direction = ScaleDirection.Increase,
Type = ScaleType.ChangeCount,
Value = "1",
Cooldown = TimeSpan.FromMinutes(5)
}
}
}
}
}
}
};
await monitorClient.AutoscaleSettings.CreateOrUpdateAsync(
deployment.ResourceGroupName,
$"{deployment.WebAppName}-autoscale",
autoScaleSettings);
}
}
// Azure部署相关的数据模型
public class AzureAppServiceDeployment
{
public string ResourceGroupName { get; set; }
public string Location { get; set; }
public string AppServicePlanName { get; set; }
public string WebAppName { get; set; }
public string SkuName { get; set; } = "S1";
public string SkuTier { get; set; } = "Standard";
public int InstanceCount { get; set; } = 1;
public Dictionary<string, string> AppSettings { get; set; } = new();
public Dictionary<string, string> ConnectionStrings { get; set; } = new();
public string CustomDomain { get; set; }
public AutoScalingConfig AutoScaling { get; set; }
}
public class AzureContainerDeployment
{
public string ResourceGroupName { get; set; }
public string Location { get; set; }
public string ContainerGroupName { get; set; }
public string ContainerName { get; set; }
public string ImageName { get; set; }
public double CpuCores { get; set; } = 1.0;
public double MemoryGB { get; set; } = 1.5;
public List<int> Ports { get; set; } = new();
public Dictionary<string, string> EnvironmentVariables { get; set; } = new();
}
public class AutoScalingConfig
{
public int MinInstances { get; set; } = 1;
public int MaxInstances { get; set; } = 10;
public int DefaultInstances { get; set; } = 2;
public double CpuThresholdUp { get; set; } = 70.0;
public double CpuThresholdDown { get; set; } = 30.0;
}
public class AzureDeploymentResult
{
public string DeploymentId { get; set; }
public string Status { get; set; }
public DateTime StartTime { get; set; }
public DateTime? EndTime { get; set; }
public string WebAppUrl { get; set; }
public string ErrorMessage { get; set; }
public TimeSpan? Duration => EndTime?.Subtract(StartTime);
}
public class AzureResource
{
public string Id { get; set; }
public string Name { get; set; }
public string Type { get; set; }
public string Location { get; set; }
public Dictionary<string, string> Tags { get; set; }
}
public class AzureResourceMetrics
{
public string ResourceId { get; set; }
public TimeSpan TimeRange { get; set; }
public List<MetricData> Metrics { get; set; }
}
public class MetricData
{
public string Name { get; set; }
public string Unit { get; set; }
public List<MetricValue> Values { get; set; }
}
public class MetricValue
{
public DateTime Timestamp { get; set; }
public double Value { get; set; }
}
20.5 监控和日志
应用程序监控
// 应用程序监控服务
public class ApplicationMonitoringService
{
private readonly ILogger<ApplicationMonitoringService> _logger;
private readonly IMetricsCollector _metricsCollector;
private readonly IHealthCheckService _healthCheckService;
private readonly IAlertService _alertService;
public ApplicationMonitoringService(
ILogger<ApplicationMonitoringService> logger,
IMetricsCollector metricsCollector,
IHealthCheckService healthCheckService,
IAlertService alertService)
{
_logger = logger;
_metricsCollector = metricsCollector;
_healthCheckService = healthCheckService;
_alertService = alertService;
}
public async Task<MonitoringReport> GenerateMonitoringReportAsync(TimeSpan timeRange)
{
try
{
var endTime = DateTime.UtcNow;
var startTime = endTime.Subtract(timeRange);
var report = new MonitoringReport
{
TimeRange = timeRange,
GeneratedAt = endTime
};
// 收集性能指标
report.PerformanceMetrics = await CollectPerformanceMetricsAsync(startTime, endTime);
// 收集业务指标
report.BusinessMetrics = await CollectBusinessMetricsAsync(startTime, endTime);
// 收集错误统计
report.ErrorStatistics = await CollectErrorStatisticsAsync(startTime, endTime);
// 收集健康检查结果
report.HealthCheckResults = await _healthCheckService.GetHealthCheckHistoryAsync(startTime, endTime);
// 收集资源使用情况
report.ResourceUsage = await CollectResourceUsageAsync(startTime, endTime);
// 分析趋势
report.TrendAnalysis = AnalyzeTrends(report);
// 生成告警
await GenerateAlertsAsync(report);
_logger.LogInformation("Monitoring report generated for time range {TimeRange}", timeRange);
return report;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to generate monitoring report");
throw;
}
}
public async Task<List<Alert>> CheckAlertsAsync()
{
try
{
var alerts = new List<Alert>();
// 检查CPU使用率
var cpuUsage = await _metricsCollector.GetCurrentCpuUsageAsync();
if (cpuUsage > 80)
{
alerts.Add(new Alert
{
Id = Guid.NewGuid().ToString(),
Type = AlertType.Performance,
Severity = AlertSeverity.Warning,
Title = "High CPU Usage",
Description = $"CPU usage is {cpuUsage:F1}%, which exceeds the threshold of 80%",
Timestamp = DateTime.UtcNow,
Source = "CPU Monitor",
Value = cpuUsage,
Threshold = 80
});
}
// 检查内存使用率
var memoryUsage = await _metricsCollector.GetCurrentMemoryUsageAsync();
if (memoryUsage > 85)
{
alerts.Add(new Alert
{
Id = Guid.NewGuid().ToString(),
Type = AlertType.Performance,
Severity = AlertSeverity.Critical,
Title = "High Memory Usage",
Description = $"Memory usage is {memoryUsage:F1}%, which exceeds the threshold of 85%",
Timestamp = DateTime.UtcNow,
Source = "Memory Monitor",
Value = memoryUsage,
Threshold = 85
});
}
// 检查错误率
var errorRate = await _metricsCollector.GetCurrentErrorRateAsync();
if (errorRate > 5)
{
alerts.Add(new Alert
{
Id = Guid.NewGuid().ToString(),
Type = AlertType.Error,
Severity = AlertSeverity.Warning,
Title = "High Error Rate",
Description = $"Error rate is {errorRate:F2}%, which exceeds the threshold of 5%",
Timestamp = DateTime.UtcNow,
Source = "Error Monitor",
Value = errorRate,
Threshold = 5
});
}
// 检查响应时间
var responseTime = await _metricsCollector.GetAverageResponseTimeAsync();
if (responseTime > 2000)
{
alerts.Add(new Alert
{
Id = Guid.NewGuid().ToString(),
Type = AlertType.Performance,
Severity = AlertSeverity.Warning,
Title = "High Response Time",
Description = $"Average response time is {responseTime}ms, which exceeds the threshold of 2000ms",
Timestamp = DateTime.UtcNow,
Source = "Response Time Monitor",
Value = responseTime,
Threshold = 2000
});
}
// 发送告警
foreach (var alert in alerts)
{
await _alertService.SendAlertAsync(alert);
}
return alerts;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to check alerts");
throw;
}
}
public async Task<DashboardData> GetDashboardDataAsync()
{
try
{
var dashboardData = new DashboardData
{
Timestamp = DateTime.UtcNow
};
// 实时指标
dashboardData.RealTimeMetrics = new RealTimeMetrics
{
CpuUsage = await _metricsCollector.GetCurrentCpuUsageAsync(),
MemoryUsage = await _metricsCollector.GetCurrentMemoryUsageAsync(),
ActiveConnections = await _metricsCollector.GetActiveConnectionsAsync(),
RequestsPerSecond = await _metricsCollector.GetRequestsPerSecondAsync(),
AverageResponseTime = await _metricsCollector.GetAverageResponseTimeAsync(),
ErrorRate = await _metricsCollector.GetCurrentErrorRateAsync()
};
// 系统状态
dashboardData.SystemStatus = await _healthCheckService.GetOverallHealthAsync();
// 最近的告警
dashboardData.RecentAlerts = await _alertService.GetRecentAlertsAsync(10);
// 服务状态
dashboardData.ServiceStatuses = await GetServiceStatusesAsync();
return dashboardData;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get dashboard data");
throw;
}
}
private async Task<PerformanceMetrics> CollectPerformanceMetricsAsync(DateTime startTime, DateTime endTime)
{
return new PerformanceMetrics
{
AverageResponseTime = await _metricsCollector.GetAverageResponseTimeAsync(startTime, endTime),
MaxResponseTime = await _metricsCollector.GetMaxResponseTimeAsync(startTime, endTime),
MinResponseTime = await _metricsCollector.GetMinResponseTimeAsync(startTime, endTime),
TotalRequests = await _metricsCollector.GetTotalRequestsAsync(startTime, endTime),
RequestsPerSecond = await _metricsCollector.GetAverageRequestsPerSecondAsync(startTime, endTime),
Throughput = await _metricsCollector.GetThroughputAsync(startTime, endTime),
CpuUsage = await _metricsCollector.GetAverageCpuUsageAsync(startTime, endTime),
MemoryUsage = await _metricsCollector.GetAverageMemoryUsageAsync(startTime, endTime)
};
}
private async Task<BusinessMetrics> CollectBusinessMetricsAsync(DateTime startTime, DateTime endTime)
{
return new BusinessMetrics
{
TotalUsers = await _metricsCollector.GetTotalUsersAsync(startTime, endTime),
ActiveUsers = await _metricsCollector.GetActiveUsersAsync(startTime, endTime),
NewRegistrations = await _metricsCollector.GetNewRegistrationsAsync(startTime, endTime),
TotalOrders = await _metricsCollector.GetTotalOrdersAsync(startTime, endTime),
Revenue = await _metricsCollector.GetRevenueAsync(startTime, endTime),
ConversionRate = await _metricsCollector.GetConversionRateAsync(startTime, endTime)
};
}
private async Task<ErrorStatistics> CollectErrorStatisticsAsync(DateTime startTime, DateTime endTime)
{
return new ErrorStatistics
{
TotalErrors = await _metricsCollector.GetTotalErrorsAsync(startTime, endTime),
ErrorRate = await _metricsCollector.GetErrorRateAsync(startTime, endTime),
ErrorsByType = await _metricsCollector.GetErrorsByTypeAsync(startTime, endTime),
ErrorsByEndpoint = await _metricsCollector.GetErrorsByEndpointAsync(startTime, endTime),
CriticalErrors = await _metricsCollector.GetCriticalErrorsAsync(startTime, endTime)
};
}
private async Task<ResourceUsage> CollectResourceUsageAsync(DateTime startTime, DateTime endTime)
{
return new ResourceUsage
{
AverageCpuUsage = await _metricsCollector.GetAverageCpuUsageAsync(startTime, endTime),
PeakCpuUsage = await _metricsCollector.GetPeakCpuUsageAsync(startTime, endTime),
AverageMemoryUsage = await _metricsCollector.GetAverageMemoryUsageAsync(startTime, endTime),
PeakMemoryUsage = await _metricsCollector.GetPeakMemoryUsageAsync(startTime, endTime),
DiskUsage = await _metricsCollector.GetDiskUsageAsync(),
NetworkTraffic = await _metricsCollector.GetNetworkTrafficAsync(startTime, endTime)
};
}
private TrendAnalysis AnalyzeTrends(MonitoringReport report)
{
return new TrendAnalysis
{
ResponseTimeTrend = CalculateTrend("ResponseTime", report.PerformanceMetrics.AverageResponseTime),
ErrorRateTrend = CalculateTrend("ErrorRate", report.ErrorStatistics.ErrorRate),
ThroughputTrend = CalculateTrend("Throughput", report.PerformanceMetrics.Throughput),
UserGrowthTrend = CalculateTrend("UserGrowth", report.BusinessMetrics.ActiveUsers)
};
}
private TrendDirection CalculateTrend(string metricName, double currentValue)
{
// 这里应该实现实际的趋势分析逻辑
// 比较当前值与历史值
return TrendDirection.Stable;
}
private async Task GenerateAlertsAsync(MonitoringReport report)
{
// 根据监控报告生成告警
if (report.PerformanceMetrics.AverageResponseTime > 2000)
{
await _alertService.SendAlertAsync(new Alert
{
Type = AlertType.Performance,
Severity = AlertSeverity.Warning,
Title = "High Average Response Time",
Description = $"Average response time is {report.PerformanceMetrics.AverageResponseTime}ms"
});
}
if (report.ErrorStatistics.ErrorRate > 5)
{
await _alertService.SendAlertAsync(new Alert
{
Type = AlertType.Error,
Severity = AlertSeverity.Critical,
Title = "High Error Rate",
Description = $"Error rate is {report.ErrorStatistics.ErrorRate}%"
});
}
}
private async Task<List<ServiceStatus>> GetServiceStatusesAsync()
{
return new List<ServiceStatus>
{
new ServiceStatus
{
Name = "Web API",
Status = "Running",
Health = "Healthy",
LastChecked = DateTime.UtcNow
},
new ServiceStatus
{
Name = "Database",
Status = "Running",
Health = "Healthy",
LastChecked = DateTime.UtcNow
},
new ServiceStatus
{
Name = "Cache",
Status = "Running",
Health = "Healthy",
LastChecked = DateTime.UtcNow
}
};
}
}
// 日志管理服务
public class LogManagementService
{
private readonly ILogger<LogManagementService> _logger;
private readonly IElasticsearchClient _elasticsearchClient;
private readonly ILogAggregator _logAggregator;
public LogManagementService(
ILogger<LogManagementService> logger,
IElasticsearchClient elasticsearchClient,
ILogAggregator logAggregator)
{
_logger = logger;
_elasticsearchClient = elasticsearchClient;
_logAggregator = logAggregator;
}
public async Task<LogSearchResult> SearchLogsAsync(LogSearchRequest request)
{
try
{
var searchRequest = new SearchRequest<LogEntry>("logs-*")
{
Query = BuildQuery(request),
Sort = new List<ISort>
{
new FieldSort { Field = "timestamp", Order = SortOrder.Descending }
},
Size = request.PageSize,
From = request.PageNumber * request.PageSize
};
var response = await _elasticsearchClient.SearchAsync<LogEntry>(searchRequest);
return new LogSearchResult
{
TotalCount = response.Total,
Logs = response.Documents.ToList(),
PageNumber = request.PageNumber,
PageSize = request.PageSize,
Aggregations = ExtractAggregations(response.Aggregations)
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to search logs");
throw;
}
}
public async Task<LogAnalysisResult> AnalyzeLogsAsync(LogAnalysisRequest request)
{
try
{
var result = new LogAnalysisResult
{
TimeRange = request.TimeRange,
AnalyzedAt = DateTime.UtcNow
};
// 错误分析
result.ErrorAnalysis = await AnalyzeErrorsAsync(request);
// 性能分析
result.PerformanceAnalysis = await AnalyzePerformanceAsync(request);
// 用户行为分析
result.UserBehaviorAnalysis = await AnalyzeUserBehaviorAsync(request);
// 安全分析
result.SecurityAnalysis = await AnalyzeSecurityAsync(request);
// 趋势分析
result.TrendAnalysis = await AnalyzeTrendsAsync(request);
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to analyze logs");
throw;
}
}
public async Task<List<LogAlert>> DetectAnomaliesAsync(AnomalyDetectionRequest request)
{
try
{
var alerts = new List<LogAlert>();
// 检测错误率异常
var errorRateAnomaly = await DetectErrorRateAnomalyAsync(request);
if (errorRateAnomaly != null)
{
alerts.Add(errorRateAnomaly);
}
// 检测响应时间异常
var responseTimeAnomaly = await DetectResponseTimeAnomalyAsync(request);
if (responseTimeAnomaly != null)
{
alerts.Add(responseTimeAnomaly);
}
// 检测安全威胁
var securityThreats = await DetectSecurityThreatsAsync(request);
alerts.AddRange(securityThreats);
// 检测异常访问模式
var accessPatternAnomalies = await DetectAccessPatternAnomaliesAsync(request);
alerts.AddRange(accessPatternAnomalies);
return alerts;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to detect anomalies");
throw;
}
}
public async Task<LogDashboard> GetLogDashboardAsync(TimeSpan timeRange)
{
try
{
var endTime = DateTime.UtcNow;
var startTime = endTime.Subtract(timeRange);
var dashboard = new LogDashboard
{
TimeRange = timeRange,
GeneratedAt = endTime
};
// 日志统计
dashboard.LogStatistics = await GetLogStatisticsAsync(startTime, endTime);
// 错误统计
dashboard.ErrorStatistics = await GetErrorStatisticsFromLogsAsync(startTime, endTime);
// 性能指标
dashboard.PerformanceMetrics = await GetPerformanceMetricsFromLogsAsync(startTime, endTime);
// 热门端点
dashboard.TopEndpoints = await GetTopEndpointsAsync(startTime, endTime);
// 用户活动
dashboard.UserActivity = await GetUserActivityAsync(startTime, endTime);
// 最近的告警
dashboard.RecentAlerts = await GetRecentLogAlertsAsync(10);
return dashboard;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get log dashboard");
throw;
}
}
private QueryContainer BuildQuery(LogSearchRequest request)
{
var queries = new List<QueryContainer>();
// 时间范围
if (request.StartTime.HasValue && request.EndTime.HasValue)
{
queries.Add(new DateRangeQuery
{
Field = "timestamp",
GreaterThanOrEqualTo = request.StartTime.Value,
LessThanOrEqualTo = request.EndTime.Value
});
}
// 日志级别
if (!string.IsNullOrEmpty(request.LogLevel))
{
queries.Add(new TermQuery
{
Field = "level",
Value = request.LogLevel
});
}
// 服务名称
if (!string.IsNullOrEmpty(request.ServiceName))
{
queries.Add(new TermQuery
{
Field = "serviceName",
Value = request.ServiceName
});
}
// 关键词搜索
if (!string.IsNullOrEmpty(request.Keyword))
{
queries.Add(new MultiMatchQuery
{
Query = request.Keyword,
Fields = new[] { "message", "exception", "properties.*" }
});
}
return new BoolQuery
{
Must = queries
};
}
private Dictionary<string, object> ExtractAggregations(IReadOnlyDictionary<string, IAggregate> aggregations)
{
var result = new Dictionary<string, object>();
foreach (var agg in aggregations)
{
if (agg.Value is BucketAggregate bucketAgg)
{
result[agg.Key] = bucketAgg.Items.Select(item => new
{
Key = item.Key,
Count = item.DocCount
}).ToList();
}
else if (agg.Value is ValueAggregate valueAgg)
{
result[agg.Key] = valueAgg.Value;
}
}
return result;
}
private async Task<ErrorAnalysis> AnalyzeErrorsAsync(LogAnalysisRequest request)
{
// 实现错误分析逻辑
return new ErrorAnalysis
{
TotalErrors = 150,
ErrorRate = 2.5,
TopErrors = new List<ErrorSummary>
{
new ErrorSummary
{
ErrorType = "NullReferenceException",
Count = 45,
Percentage = 30
},
new ErrorSummary
{
ErrorType = "TimeoutException",
Count = 30,
Percentage = 20
}
}
};
}
private async Task<PerformanceAnalysis> AnalyzePerformanceAsync(LogAnalysisRequest request)
{
// 实现性能分析逻辑
return new PerformanceAnalysis
{
AverageResponseTime = 250,
P95ResponseTime = 500,
P99ResponseTime = 1000,
SlowestEndpoints = new List<EndpointPerformance>
{
new EndpointPerformance
{
Endpoint = "/api/orders",
AverageResponseTime = 800,
RequestCount = 1200
}
}
};
}
private async Task<UserBehaviorAnalysis> AnalyzeUserBehaviorAsync(LogAnalysisRequest request)
{
// 实现用户行为分析逻辑
return new UserBehaviorAnalysis
{
TotalUsers = 5000,
ActiveUsers = 1200,
TopPages = new List<PageVisit>
{
new PageVisit
{
Page = "/dashboard",
Visits = 3000,
UniqueVisitors = 800
}
}
};
}
private async Task<SecurityAnalysis> AnalyzeSecurityAsync(LogAnalysisRequest request)
{
// 实现安全分析逻辑
return new SecurityAnalysis
{
FailedLoginAttempts = 25,
SuspiciousActivities = 5,
BlockedRequests = 100,
TopAttackSources = new List<AttackSource>
{
new AttackSource
{
IpAddress = "192.168.1.100",
AttackCount = 15,
AttackType = "Brute Force"
}
}
};
}
private async Task<LogTrendAnalysis> AnalyzeTrendsAsync(LogAnalysisRequest request)
{
// 实现趋势分析逻辑
return new LogTrendAnalysis
{
LogVolumeTrend = TrendDirection.Increasing,
ErrorRateTrend = TrendDirection.Stable,
ResponseTimeTrend = TrendDirection.Decreasing
};
}
private async Task<LogAlert> DetectErrorRateAnomalyAsync(AnomalyDetectionRequest request)
{
// 检测错误率异常
var currentErrorRate = 8.5; // 从日志中计算
var normalErrorRate = 2.0; // 历史平均值
if (currentErrorRate > normalErrorRate * 3)
{
return new LogAlert
{
Id = Guid.NewGuid().ToString(),
Type = "ErrorRateAnomaly",
Severity = "High",
Message = $"Error rate ({currentErrorRate}%) is significantly higher than normal ({normalErrorRate}%)",
DetectedAt = DateTime.UtcNow
};
}
return null;
}
private async Task<LogAlert> DetectResponseTimeAnomalyAsync(AnomalyDetectionRequest request)
{
// 检测响应时间异常
return null; // 简化实现
}
private async Task<List<LogAlert>> DetectSecurityThreatsAsync(AnomalyDetectionRequest request)
{
// 检测安全威胁
return new List<LogAlert>();
}
private async Task<List<LogAlert>> DetectAccessPatternAnomaliesAsync(AnomalyDetectionRequest request)
{
// 检测访问模式异常
return new List<LogAlert>();
}
private async Task<LogStatistics> GetLogStatisticsAsync(DateTime startTime, DateTime endTime)
{
return new LogStatistics
{
TotalLogs = 1000000,
LogsByLevel = new Dictionary<string, int>
{
{ "Info", 800000 },
{ "Warning", 150000 },
{ "Error", 45000 },
{ "Critical", 5000 }
},
LogsByService = new Dictionary<string, int>
{
{ "WebAPI", 600000 },
{ "OrderService", 250000 },
{ "UserService", 150000 }
}
};
}
private async Task<ErrorStatistics> GetErrorStatisticsFromLogsAsync(DateTime startTime, DateTime endTime)
{
return new ErrorStatistics
{
TotalErrors = 50000,
ErrorRate = 5.0,
ErrorsByType = new Dictionary<string, int>
{
{ "NullReferenceException", 15000 },
{ "TimeoutException", 10000 },
{ "ValidationException", 8000 }
},
ErrorsByEndpoint = new Dictionary<string, int>
{
{ "/api/orders", 12000 },
{ "/api/users", 8000 },
{ "/api/products", 6000 }
},
CriticalErrors = 500
};
}
private async Task<PerformanceMetrics> GetPerformanceMetricsFromLogsAsync(DateTime startTime, DateTime endTime)
{
return new PerformanceMetrics
{
AverageResponseTime = 250,
MaxResponseTime = 5000,
MinResponseTime = 10,
TotalRequests = 2000000,
RequestsPerSecond = 500,
Throughput = 1000
};
}
private async Task<List<EndpointStatistics>> GetTopEndpointsAsync(DateTime startTime, DateTime endTime)
{
return new List<EndpointStatistics>
{
new EndpointStatistics
{
Endpoint = "/api/orders",
RequestCount = 500000,
AverageResponseTime = 300,
ErrorCount = 12000
},
new EndpointStatistics
{
Endpoint = "/api/users",
RequestCount = 300000,
AverageResponseTime = 200,
ErrorCount = 8000
}
};
}
private async Task<UserActivitySummary> GetUserActivityAsync(DateTime startTime, DateTime endTime)
{
return new UserActivitySummary
{
TotalUsers = 10000,
ActiveUsers = 2500,
NewUsers = 150,
TopUserActions = new Dictionary<string, int>
{
{ "Login", 15000 },
{ "ViewProduct", 50000 },
{ "PlaceOrder", 8000 }
}
};
}
private async Task<List<LogAlert>> GetRecentLogAlertsAsync(int count)
{
return new List<LogAlert>
{
new LogAlert
{
Id = Guid.NewGuid().ToString(),
Type = "HighErrorRate",
Severity = "High",
Message = "Error rate exceeded threshold",
DetectedAt = DateTime.UtcNow.AddMinutes(-10)
}
};
}
}