1. 概述

本章将通过实际案例分析和最佳实践指南,帮助您在生产环境中更好地应用GoAccess。我们将涵盖性能优化、安全配置、监控策略、故障排除等关键领域的最佳实践。

2. 性能优化最佳实践

2.1 硬件配置建议

2.1.1 服务器规格推荐

# hardware-recommendations.yml - 硬件配置推荐
production_environments:
  small_scale:  # 日志量 < 1GB/天
    cpu: "4 cores"
    memory: "8GB"
    storage: "100GB SSD"
    network: "1Gbps"
    concurrent_users: "< 100"
    
  medium_scale:  # 日志量 1-10GB/天
    cpu: "8 cores"
    memory: "16GB"
    storage: "500GB SSD"
    network: "10Gbps"
    concurrent_users: "100-1000"
    
  large_scale:  # 日志量 10-100GB/天
    cpu: "16 cores"
    memory: "32GB"
    storage: "2TB NVMe SSD"
    network: "10Gbps"
    concurrent_users: "1000-10000"
    
  enterprise_scale:  # 日志量 > 100GB/天
    cpu: "32+ cores"
    memory: "64GB+"
    storage: "10TB+ NVMe SSD RAID"
    network: "25Gbps+"
    concurrent_users: "> 10000"

storage_optimization:
  log_storage:
    type: "High IOPS SSD"
    raid_level: "RAID 10"
    compression: "enabled"
    retention_policy: "automated"
    
  database_storage:
    type: "NVMe SSD"
    raid_level: "RAID 1"
    backup_frequency: "daily"
    replication: "enabled"
    
  cache_storage:
    type: "RAM Disk"
    size: "25% of total memory"
    persistence: "optional"

2.1.2 性能调优脚本

#!/bin/bash
# performance-tuning.sh - 系统性能调优脚本

set -euo pipefail

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# 日志函数
log_info() {
    echo -e "${BLUE}[INFO]${NC} $1"
}

log_success() {
    echo -e "${GREEN}[SUCCESS]${NC} $1"
}

log_warning() {
    echo -e "${YELLOW}[WARNING]${NC} $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

# 检查是否为root用户
check_root() {
    if [[ $EUID -ne 0 ]]; then
        log_error "此脚本需要root权限运行"
        exit 1
    fi
}

# 系统信息收集
collect_system_info() {
    log_info "收集系统信息..."
    
    echo "=== 系统信息 ===" > /tmp/system_info.txt
    uname -a >> /tmp/system_info.txt
    echo "" >> /tmp/system_info.txt
    
    echo "=== CPU信息 ===" >> /tmp/system_info.txt
    lscpu >> /tmp/system_info.txt
    echo "" >> /tmp/system_info.txt
    
    echo "=== 内存信息 ===" >> /tmp/system_info.txt
    free -h >> /tmp/system_info.txt
    echo "" >> /tmp/system_info.txt
    
    echo "=== 磁盘信息 ===" >> /tmp/system_info.txt
    df -h >> /tmp/system_info.txt
    echo "" >> /tmp/system_info.txt
    
    echo "=== 网络信息 ===" >> /tmp/system_info.txt
    ip addr show >> /tmp/system_info.txt
    
    log_success "系统信息已保存到 /tmp/system_info.txt"
}

# 内核参数优化
optimize_kernel_parameters() {
    log_info "优化内核参数..."
    
    # 备份原始配置
    cp /etc/sysctl.conf /etc/sysctl.conf.backup.$(date +%Y%m%d_%H%M%S)
    
    # 网络优化
    cat >> /etc/sysctl.conf << EOF

# GoAccess性能优化配置
# 网络优化
net.core.rmem_max = 134217728
net.core.wmem_max = 134217728
net.core.netdev_max_backlog = 5000
net.core.somaxconn = 65535
net.ipv4.tcp_rmem = 4096 65536 134217728
net.ipv4.tcp_wmem = 4096 65536 134217728
net.ipv4.tcp_congestion_control = bbr
net.ipv4.tcp_slow_start_after_idle = 0
net.ipv4.tcp_tw_reuse = 1
net.ipv4.ip_local_port_range = 1024 65535

# 文件系统优化
fs.file-max = 2097152
fs.nr_open = 2097152

# 虚拟内存优化
vm.swappiness = 10
vm.dirty_ratio = 15
vm.dirty_background_ratio = 5
vm.vfs_cache_pressure = 50

# 进程优化
kernel.pid_max = 4194304
EOF
    
    # 应用配置
    sysctl -p
    
    log_success "内核参数优化完成"
}

# 文件描述符限制优化
optimize_file_limits() {
    log_info "优化文件描述符限制..."
    
    # 备份原始配置
    cp /etc/security/limits.conf /etc/security/limits.conf.backup.$(date +%Y%m%d_%H%M%S)
    
    # 添加文件描述符限制
    cat >> /etc/security/limits.conf << EOF

# GoAccess文件描述符优化
* soft nofile 1048576
* hard nofile 1048576
* soft nproc 1048576
* hard nproc 1048576
root soft nofile 1048576
root hard nofile 1048576
root soft nproc 1048576
root hard nproc 1048576
EOF
    
    # 更新systemd配置
    mkdir -p /etc/systemd/system.conf.d
    cat > /etc/systemd/system.conf.d/limits.conf << EOF
[Manager]
DefaultLimitNOFILE=1048576
DefaultLimitNPROC=1048576
EOF
    
    # 更新PAM配置
    echo "session required pam_limits.so" >> /etc/pam.d/common-session
    
    log_success "文件描述符限制优化完成"
}

# 磁盘I/O优化
optimize_disk_io() {
    log_info "优化磁盘I/O性能..."
    
    # 检测SSD设备
    for device in $(lsblk -d -o name,rota | awk '$2=="0" {print $1}'); do
        if [[ -b "/dev/$device" ]]; then
            log_info "为SSD设备 $device 设置调度器为 none"
            echo none > /sys/block/$device/queue/scheduler
            
            # 设置队列深度
            echo 32 > /sys/block/$device/queue/nr_requests
            
            # 禁用读取预取
            echo 0 > /sys/block/$device/queue/read_ahead_kb
        fi
    done
    
    # 检测HDD设备
    for device in $(lsblk -d -o name,rota | awk '$2=="1" {print $1}'); do
        if [[ -b "/dev/$device" ]]; then
            log_info "为HDD设备 $device 设置调度器为 mq-deadline"
            echo mq-deadline > /sys/block/$device/queue/scheduler
            
            # 设置队列深度
            echo 128 > /sys/block/$device/queue/nr_requests
            
            # 设置读取预取
            echo 4096 > /sys/block/$device/queue/read_ahead_kb
        fi
    done
    
    log_success "磁盘I/O优化完成"
}

# CPU性能优化
optimize_cpu_performance() {
    log_info "优化CPU性能..."
    
    # 设置CPU调度器为性能模式
    if command -v cpupower >/dev/null 2>&1; then
        cpupower frequency-set -g performance
        log_success "CPU调度器设置为性能模式"
    else
        log_warning "cpupower工具未安装,跳过CPU调度器设置"
    fi
    
    # 禁用CPU节能功能
    if [[ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]]; then
        for cpu in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do
            echo performance > "$cpu"
        done
        log_success "所有CPU核心设置为性能模式"
    fi
    
    # 设置CPU亲和性
    if command -v irqbalance >/dev/null 2>&1; then
        systemctl enable irqbalance
        systemctl start irqbalance
        log_success "启用IRQ负载均衡"
    fi
}

# 内存优化
optimize_memory() {
    log_info "优化内存使用..."
    
    # 配置透明大页
    echo never > /sys/kernel/mm/transparent_hugepage/enabled
    echo never > /sys/kernel/mm/transparent_hugepage/defrag
    
    # 添加到启动脚本
    cat > /etc/rc.local << EOF
#!/bin/bash
# 禁用透明大页
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
exit 0
EOF
    chmod +x /etc/rc.local
    
    log_success "内存优化完成"
}

# 创建性能监控脚本
create_monitoring_script() {
    log_info "创建性能监控脚本..."
    
    cat > /usr/local/bin/goaccess-monitor.sh << 'EOF'
#!/bin/bash
# goaccess-monitor.sh - GoAccess性能监控脚本

MONITOR_LOG="/var/log/goaccess-monitor.log"
ALERT_THRESHOLD_CPU=80
ALERT_THRESHOLD_MEM=85
ALERT_THRESHOLD_DISK=90

# 获取系统指标
get_cpu_usage() {
    top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1
}

get_memory_usage() {
    free | grep Mem | awk '{printf "%.1f", $3/$2 * 100.0}'
}

get_disk_usage() {
    df / | tail -1 | awk '{print $5}' | cut -d'%' -f1
}

get_goaccess_processes() {
    pgrep -c goaccess || echo 0
}

get_load_average() {
    uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1
}

# 记录监控数据
log_metrics() {
    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    local cpu_usage=$(get_cpu_usage)
    local mem_usage=$(get_memory_usage)
    local disk_usage=$(get_disk_usage)
    local goaccess_procs=$(get_goaccess_processes)
    local load_avg=$(get_load_average)
    
    echo "$timestamp,CPU:${cpu_usage}%,MEM:${mem_usage}%,DISK:${disk_usage}%,PROCS:${goaccess_procs},LOAD:${load_avg}" >> "$MONITOR_LOG"
    
    # 检查告警阈值
    if (( $(echo "$cpu_usage > $ALERT_THRESHOLD_CPU" | bc -l) )); then
        echo "$timestamp ALERT: High CPU usage: ${cpu_usage}%" >> "$MONITOR_LOG"
    fi
    
    if (( $(echo "$mem_usage > $ALERT_THRESHOLD_MEM" | bc -l) )); then
        echo "$timestamp ALERT: High memory usage: ${mem_usage}%" >> "$MONITOR_LOG"
    fi
    
    if [[ $disk_usage -gt $ALERT_THRESHOLD_DISK ]]; then
        echo "$timestamp ALERT: High disk usage: ${disk_usage}%" >> "$MONITOR_LOG"
    fi
}

# 主函数
main() {
    while true; do
        log_metrics
        sleep 60
    done
}

if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    main "$@"
fi
EOF
    
    chmod +x /usr/local/bin/goaccess-monitor.sh
    
    # 创建systemd服务
    cat > /etc/systemd/system/goaccess-monitor.service << EOF
[Unit]
Description=GoAccess Performance Monitor
After=network.target

[Service]
Type=simple
User=root
ExecStart=/usr/local/bin/goaccess-monitor.sh
Restart=always
RestartSec=10

[Install]
WantedBy=multi-user.target
EOF
    
    systemctl daemon-reload
    systemctl enable goaccess-monitor.service
    
    log_success "性能监控脚本创建完成"
}

# 生成优化报告
generate_optimization_report() {
    log_info "生成优化报告..."
    
    local report_file="/tmp/goaccess_optimization_report_$(date +%Y%m%d_%H%M%S).txt"
    
    cat > "$report_file" << EOF
GoAccess性能优化报告
生成时间: $(date)

=== 系统配置 ===
操作系统: $(uname -a)
CPU核心数: $(nproc)
内存大小: $(free -h | grep Mem | awk '{print $2}')
磁盘空间: $(df -h / | tail -1 | awk '{print $2}')

=== 优化项目 ===
✓ 内核参数优化
✓ 文件描述符限制优化
✓ 磁盘I/O优化
✓ CPU性能优化
✓ 内存优化
✓ 性能监控脚本部署

=== 建议配置 ===
1. 定期清理日志文件
2. 监控系统资源使用情况
3. 根据负载调整GoAccess配置
4. 实施日志轮转策略
5. 配置自动化备份

=== 下一步操作 ===
1. 重启系统以应用所有优化
2. 启动性能监控服务: systemctl start goaccess-monitor
3. 检查监控日志: tail -f /var/log/goaccess-monitor.log
4. 根据监控数据进一步调优
EOF
    
    log_success "优化报告已生成: $report_file"
    cat "$report_file"
}

# 主函数
main() {
    log_info "开始GoAccess性能优化..."
    
    check_root
    collect_system_info
    optimize_kernel_parameters
    optimize_file_limits
    optimize_disk_io
    optimize_cpu_performance
    optimize_memory
    create_monitoring_script
    generate_optimization_report
    
    log_success "GoAccess性能优化完成!"
    log_warning "建议重启系统以应用所有优化配置"
}

if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    main "$@"
fi

2.2 GoAccess配置优化

2.2.1 高性能配置模板

# goaccess-optimized.conf - 高性能GoAccess配置

# 时间和日期格式
time-format %H:%M:%S
date-format %d/%b/%Y
log-format COMBINED

# 性能优化设置
real-time-html true
ws-url wss://your-domain.com:7890
port 7890

# 数据持久化
persist true
restore true
db-path /data/goaccess/goaccess.db

# 内存优化
process-and-exit false
keep-db-files true
load-from-disk true

# 地理位置数据库
geoip-database /usr/share/GeoIP/GeoLite2-City.mmdb

# 排除内部IP
exclude-ip 127.0.0.1
exclude-ip 10.0.0.0/8
exclude-ip 172.16.0.0/12
exclude-ip 192.168.0.0/16
exclude-ip ::1

# 爬虫过滤
ignore-crawlers true
crawlers-only false

# 面板配置(只启用必要的面板)
enable-panel VISITORS
enable-panel REQUESTS
enable-panel REQUESTS_STATIC
enable-panel NOT_FOUND
enable-panel HOSTS
enable-panel OS
enable-panel BROWSERS
enable-panel VISIT_TIMES
enable-panel VIRTUAL_HOSTS
enable-panel REFERRERS
enable-panel REFERRING_SITES
enable-panel KEYPHRASES
enable-panel STATUS_CODES
enable-panel GEO_LOCATION

# 禁用不必要的面板以提高性能
# disable-panel REMOTE_USER
# disable-panel CACHE_STATUS

# 输出优化
html-custom-css /etc/goaccess/custom.css
html-custom-js /etc/goaccess/custom.js
html-prefs '{"theme":"dark","perPage":50,"layout":"horizontal"}'

# 日志处理优化
no-query-string false
no-term-resolver true
no-ip-validation false
http-protocol true
http-method true

# 缓存设置
static-file .css
static-file .js
static-file .jpg
static-file .jpeg
static-file .png
static-file .gif
static-file .ico
static-file .svg
static-file .woff
static-file .woff2
static-file .ttf
static-file .eot
static-file .pdf
static-file .zip
static-file .tar.gz

# 4xx错误页面
4xx-to-unique-count true

# 双日志格式支持
double-decode false

# 用户代理解析
real-os true

# 颜色方案
color-scheme 2

# 最大解析项目数(根据内存调整)
num-tests 10

# 忽略查询字符串中的特定参数
ignore-qstr utm_source
ignore-qstr utm_medium
ignore-qstr utm_campaign
ignore-qstr utm_content
ignore-qstr utm_term
ignore-qstr fbclid
ignore-qstr gclid
ignore-qstr _ga
ignore-qstr _gid

# 忽略特定的引用来源
ignore-referer *.google.com
ignore-referer *.bing.com
ignore-referer *.yahoo.com
ignore-referer *.baidu.com

# 忽略特定状态码
ignore-status 400
ignore-status 408
ignore-status 499

# 自定义日志格式(如果需要)
# log-format %h %^[%d:%t %^] "%r" %s %b "%R" "%u"

2.2.2 配置验证脚本

#!/usr/bin/env python3
# config-validator.py - GoAccess配置验证脚本

import os
import re
import sys
import json
import logging
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from enum import Enum

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class ValidationLevel(Enum):
    """验证级别"""
    ERROR = "error"
    WARNING = "warning"
    INFO = "info"
    SUCCESS = "success"

@dataclass
class ValidationResult:
    """验证结果"""
    level: ValidationLevel
    message: str
    suggestion: Optional[str] = None
    line_number: Optional[int] = None

class GoAccessConfigValidator:
    """GoAccess配置验证器"""
    
    def __init__(self, config_file: str):
        self.config_file = Path(config_file)
        self.config_lines = []
        self.config_dict = {}
        self.results = []
        
        # 已知的配置选项
        self.known_options = {
            'time-format', 'date-format', 'log-format', 'real-time-html',
            'ws-url', 'port', 'persist', 'restore', 'db-path',
            'geoip-database', 'exclude-ip', 'ignore-crawlers',
            'crawlers-only', 'enable-panel', 'disable-panel',
            'html-custom-css', 'html-custom-js', 'html-prefs',
            'no-query-string', 'no-term-resolver', 'no-ip-validation',
            'http-protocol', 'http-method', 'static-file',
            '4xx-to-unique-count', 'double-decode', 'real-os',
            'color-scheme', 'num-tests', 'ignore-qstr',
            'ignore-referer', 'ignore-status', 'process-and-exit',
            'keep-db-files', 'load-from-disk'
        }
        
        # 必需的配置选项
        self.required_options = {
            'time-format', 'date-format', 'log-format'
        }
        
        # 性能相关的推荐配置
        self.performance_recommendations = {
            'persist': 'true',
            'restore': 'true',
            'keep-db-files': 'true',
            'load-from-disk': 'true',
            'no-term-resolver': 'true',
            'ignore-crawlers': 'true'
        }
    
    def load_config(self) -> bool:
        """加载配置文件"""
        try:
            if not self.config_file.exists():
                self.results.append(ValidationResult(
                    ValidationLevel.ERROR,
                    f"配置文件不存在: {self.config_file}"
                ))
                return False
            
            with open(self.config_file, 'r', encoding='utf-8') as f:
                self.config_lines = f.readlines()
            
            # 解析配置
            for line_num, line in enumerate(self.config_lines, 1):
                line = line.strip()
                if line and not line.startswith('#'):
                    if ' ' in line:
                        key, value = line.split(' ', 1)
                        self.config_dict[key] = value
                    else:
                        self.config_dict[line] = True
            
            logger.info(f"成功加载配置文件: {self.config_file}")
            return True
            
        except Exception as e:
            self.results.append(ValidationResult(
                ValidationLevel.ERROR,
                f"加载配置文件失败: {e}"
            ))
            return False
    
    def validate_required_options(self):
        """验证必需的配置选项"""
        logger.info("验证必需的配置选项...")
        
        for option in self.required_options:
            if option not in self.config_dict:
                self.results.append(ValidationResult(
                    ValidationLevel.ERROR,
                    f"缺少必需的配置选项: {option}",
                    f"请在配置文件中添加 {option} 选项"
                ))
            else:
                self.results.append(ValidationResult(
                    ValidationLevel.SUCCESS,
                    f"找到必需的配置选项: {option}"
                ))
    
    def validate_unknown_options(self):
        """验证未知的配置选项"""
        logger.info("检查未知的配置选项...")
        
        for option in self.config_dict.keys():
            if option not in self.known_options:
                self.results.append(ValidationResult(
                    ValidationLevel.WARNING,
                    f"未知的配置选项: {option}",
                    "请检查选项名称是否正确"
                ))
    
    def validate_log_format(self):
        """验证日志格式"""
        logger.info("验证日志格式...")
        
        if 'log-format' in self.config_dict:
            log_format = self.config_dict['log-format']
            
            # 检查常见的日志格式
            valid_formats = ['COMBINED', 'COMMON', 'CLOUDFRONT', 'CLOUDSTORAGE', 'AWSELB', 'AWSS3']
            
            if log_format in valid_formats:
                self.results.append(ValidationResult(
                    ValidationLevel.SUCCESS,
                    f"使用标准日志格式: {log_format}"
                ))
            elif log_format.startswith('%'):
                # 自定义格式
                self.results.append(ValidationResult(
                    ValidationLevel.INFO,
                    f"使用自定义日志格式: {log_format}",
                    "请确保自定义格式与实际日志文件匹配"
                ))
            else:
                self.results.append(ValidationResult(
                    ValidationLevel.WARNING,
                    f"未识别的日志格式: {log_format}",
                    "请检查日志格式是否正确"
                ))
    
    def validate_time_date_format(self):
        """验证时间和日期格式"""
        logger.info("验证时间和日期格式...")
        
        # 验证时间格式
        if 'time-format' in self.config_dict:
            time_format = self.config_dict['time-format']
            if not time_format.startswith('%'):
                self.results.append(ValidationResult(
                    ValidationLevel.ERROR,
                    f"时间格式错误: {time_format}",
                    "时间格式应该以%开头,例如: %H:%M:%S"
                ))
            else:
                self.results.append(ValidationResult(
                    ValidationLevel.SUCCESS,
                    f"时间格式正确: {time_format}"
                ))
        
        # 验证日期格式
        if 'date-format' in self.config_dict:
            date_format = self.config_dict['date-format']
            if not date_format.startswith('%'):
                self.results.append(ValidationResult(
                    ValidationLevel.ERROR,
                    f"日期格式错误: {date_format}",
                    "日期格式应该以%开头,例如: %d/%b/%Y"
                ))
            else:
                self.results.append(ValidationResult(
                    ValidationLevel.SUCCESS,
                    f"日期格式正确: {date_format}"
                ))
    
    def validate_file_paths(self):
        """验证文件路径"""
        logger.info("验证文件路径...")
        
        path_options = {
            'db-path': '数据库路径',
            'geoip-database': 'GeoIP数据库路径',
            'html-custom-css': '自定义CSS文件路径',
            'html-custom-js': '自定义JS文件路径'
        }
        
        for option, description in path_options.items():
            if option in self.config_dict:
                file_path = Path(self.config_dict[option])
                
                if option == 'db-path':
                    # 数据库路径的父目录应该存在
                    parent_dir = file_path.parent
                    if not parent_dir.exists():
                        self.results.append(ValidationResult(
                            ValidationLevel.WARNING,
                            f"{description}的父目录不存在: {parent_dir}",
                            f"请创建目录: mkdir -p {parent_dir}"
                        ))
                    else:
                        self.results.append(ValidationResult(
                            ValidationLevel.SUCCESS,
                            f"{description}的父目录存在: {parent_dir}"
                        ))
                else:
                    # 其他文件应该存在
                    if not file_path.exists():
                        self.results.append(ValidationResult(
                            ValidationLevel.WARNING,
                            f"{description}不存在: {file_path}",
                            f"请确保文件存在或移除此配置选项"
                        ))
                    else:
                        self.results.append(ValidationResult(
                            ValidationLevel.SUCCESS,
                            f"{description}存在: {file_path}"
                        ))
    
    def validate_network_settings(self):
        """验证网络设置"""
        logger.info("验证网络设置...")
        
        # 验证端口号
        if 'port' in self.config_dict:
            try:
                port = int(self.config_dict['port'])
                if 1 <= port <= 65535:
                    self.results.append(ValidationResult(
                        ValidationLevel.SUCCESS,
                        f"端口号有效: {port}"
                    ))
                    
                    # 检查常用端口
                    if port < 1024:
                        self.results.append(ValidationResult(
                            ValidationLevel.WARNING,
                            f"使用特权端口: {port}",
                            "需要root权限运行GoAccess"
                        ))
                else:
                    self.results.append(ValidationResult(
                        ValidationLevel.ERROR,
                        f"端口号无效: {port}",
                        "端口号应该在1-65535范围内"
                    ))
            except ValueError:
                self.results.append(ValidationResult(
                    ValidationLevel.ERROR,
                    f"端口号格式错误: {self.config_dict['port']}",
                    "端口号应该是数字"
                ))
        
        # 验证WebSocket URL
        if 'ws-url' in self.config_dict:
            ws_url = self.config_dict['ws-url']
            if ws_url.startswith(('ws://', 'wss://')):
                self.results.append(ValidationResult(
                    ValidationLevel.SUCCESS,
                    f"WebSocket URL格式正确: {ws_url}"
                ))
                
                if ws_url.startswith('ws://'):
                    self.results.append(ValidationResult(
                        ValidationLevel.WARNING,
                        "使用非加密的WebSocket连接",
                        "建议在生产环境中使用wss://"
                    ))
            else:
                self.results.append(ValidationResult(
                    ValidationLevel.ERROR,
                    f"WebSocket URL格式错误: {ws_url}",
                    "URL应该以ws://或wss://开头"
                ))
    
    def validate_performance_settings(self):
        """验证性能设置"""
        logger.info("验证性能设置...")
        
        for option, recommended_value in self.performance_recommendations.items():
            if option in self.config_dict:
                current_value = str(self.config_dict[option]).lower()
                if current_value == recommended_value:
                    self.results.append(ValidationResult(
                        ValidationLevel.SUCCESS,
                        f"性能优化选项配置正确: {option} = {current_value}"
                    ))
                else:
                    self.results.append(ValidationResult(
                        ValidationLevel.WARNING,
                        f"性能优化建议: {option} = {recommended_value} (当前: {current_value})",
                        f"建议设置 {option} {recommended_value} 以提高性能"
                    ))
            else:
                self.results.append(ValidationResult(
                    ValidationLevel.INFO,
                    f"建议添加性能优化选项: {option} {recommended_value}",
                    "此选项可以提高GoAccess性能"
                ))
    
    def validate_ip_exclusions(self):
        """验证IP排除设置"""
        logger.info("验证IP排除设置...")
        
        exclude_ips = []
        for key, value in self.config_dict.items():
            if key == 'exclude-ip':
                if isinstance(value, list):
                    exclude_ips.extend(value)
                else:
                    exclude_ips.append(value)
        
        # 检查常见的内部IP范围
        recommended_exclusions = [
            '127.0.0.1',
            '10.0.0.0/8',
            '172.16.0.0/12',
            '192.168.0.0/16',
            '::1'
        ]
        
        for ip in recommended_exclusions:
            if ip not in exclude_ips:
                self.results.append(ValidationResult(
                    ValidationLevel.INFO,
                    f"建议排除内部IP: {ip}",
                    f"添加配置: exclude-ip {ip}"
                ))
        
        if exclude_ips:
            self.results.append(ValidationResult(
                ValidationLevel.SUCCESS,
                f"已配置IP排除: {len(exclude_ips)} 个IP/网段"
            ))
    
    def generate_report(self) -> Dict:
        """生成验证报告"""
        report = {
            'config_file': str(self.config_file),
            'validation_time': str(datetime.now()),
            'summary': {
                'total_checks': len(self.results),
                'errors': len([r for r in self.results if r.level == ValidationLevel.ERROR]),
                'warnings': len([r for r in self.results if r.level == ValidationLevel.WARNING]),
                'info': len([r for r in self.results if r.level == ValidationLevel.INFO]),
                'success': len([r for r in self.results if r.level == ValidationLevel.SUCCESS])
            },
            'results': []
        }
        
        for result in self.results:
            report['results'].append({
                'level': result.level.value,
                'message': result.message,
                'suggestion': result.suggestion,
                'line_number': result.line_number
            })
        
        return report
    
    def validate(self) -> bool:
        """执行完整验证"""
        logger.info(f"开始验证GoAccess配置文件: {self.config_file}")
        
        if not self.load_config():
            return False
        
        # 执行各项验证
        self.validate_required_options()
        self.validate_unknown_options()
        self.validate_log_format()
        self.validate_time_date_format()
        self.validate_file_paths()
        self.validate_network_settings()
        self.validate_performance_settings()
        self.validate_ip_exclusions()
        
        # 检查是否有错误
        errors = [r for r in self.results if r.level == ValidationLevel.ERROR]
        
        if errors:
            logger.error(f"验证失败,发现 {len(errors)} 个错误")
            return False
        else:
            logger.info("配置验证通过")
            return True

def main():
    """主函数"""
    if len(sys.argv) != 2:
        print("用法: python config-validator.py <config-file>")
        sys.exit(1)
    
    config_file = sys.argv[1]
    validator = GoAccessConfigValidator(config_file)
    
    # 执行验证
    is_valid = validator.validate()
    
    # 生成报告
    report = validator.generate_report()
    
    # 输出结果
    print("\n=== GoAccess配置验证报告 ===")
    print(f"配置文件: {report['config_file']}")
    print(f"验证时间: {report['validation_time']}")
    print(f"\n=== 验证摘要 ===")
    print(f"总检查项: {report['summary']['total_checks']}")
    print(f"错误: {report['summary']['errors']}")
    print(f"警告: {report['summary']['warnings']}")
    print(f"信息: {report['summary']['info']}")
    print(f"成功: {report['summary']['success']}")
    
    print("\n=== 详细结果 ===")
    for result in report['results']:
        level_colors = {
            'error': '\033[91m',
            'warning': '\033[93m',
            'info': '\033[94m',
            'success': '\033[92m'
        }
        color = level_colors.get(result['level'], '')
        reset_color = '\033[0m'
        
        print(f"{color}[{result['level'].upper()}]{reset_color} {result['message']}")
        if result['suggestion']:
            print(f"  建议: {result['suggestion']}")
        print()
    
    # 保存JSON报告
    report_file = f"goaccess_config_validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(report_file, 'w', encoding='utf-8') as f:
        json.dump(report, f, indent=2, ensure_ascii=False)
    
    print(f"详细报告已保存到: {report_file}")
    
    sys.exit(0 if is_valid else 1)

if __name__ == '__main__':
    from datetime import datetime
    main()

3. 安全配置最佳实践

3.1 访问控制和认证

3.1.1 Nginx反向代理安全配置

# nginx-goaccess-security.conf - 安全的Nginx配置

# 限制请求速率
limit_req_zone $binary_remote_addr zone=goaccess_login:10m rate=5r/m;
limit_req_zone $binary_remote_addr zone=goaccess_api:10m rate=30r/m;

# 上游服务器配置
upstream goaccess_backend {
    least_conn;
    server 127.0.0.1:7890 max_fails=3 fail_timeout=30s;
    server 127.0.0.1:7891 max_fails=3 fail_timeout=30s backup;
    keepalive 32;
}

server {
    listen 443 ssl http2;
    listen [::]:443 ssl http2;
    server_name analytics.yourdomain.com;
    
    # SSL配置
    ssl_certificate /etc/ssl/certs/analytics.yourdomain.com.crt;
    ssl_certificate_key /etc/ssl/private/analytics.yourdomain.com.key;
    ssl_session_timeout 1d;
    ssl_session_cache shared:SSL:50m;
    ssl_session_tickets off;
    
    # 现代SSL配置
    ssl_protocols TLSv1.2 TLSv1.3;
    ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384;
    ssl_prefer_server_ciphers off;
    
    # HSTS
    add_header Strict-Transport-Security "max-age=63072000; includeSubDomains; preload" always;
    
    # 安全头
    add_header X-Frame-Options "SAMEORIGIN" always;
    add_header X-Content-Type-Options "nosniff" always;
    add_header X-XSS-Protection "1; mode=block" always;
    add_header Referrer-Policy "strict-origin-when-cross-origin" always;
    add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; connect-src 'self' wss:; font-src 'self';" always;
    
    # 隐藏服务器信息
    server_tokens off;
    more_clear_headers Server;
    
    # 基本认证
    auth_basic "GoAccess Analytics";
    auth_basic_user_file /etc/nginx/.htpasswd;
    
    # IP白名单(可选)
    # allow 192.168.1.0/24;
    # allow 10.0.0.0/8;
    # deny all;
    
    # 日志配置
    access_log /var/log/nginx/goaccess_access.log combined;
    error_log /var/log/nginx/goaccess_error.log warn;
    
    # 主页面
    location / {
        limit_req zone=goaccess_api burst=10 nodelay;
        
        proxy_pass http://goaccess_backend;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        
        # 超时设置
        proxy_connect_timeout 60s;
        proxy_send_timeout 60s;
        proxy_read_timeout 60s;
        
        # 缓存设置
        proxy_cache_bypass $http_upgrade;
        proxy_no_cache $http_upgrade;
    }
    
    # WebSocket连接
    location /ws {
        limit_req zone=goaccess_api burst=20 nodelay;
        
        proxy_pass http://goaccess_backend;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        
        # WebSocket特定设置
        proxy_buffering off;
        proxy_cache off;
        proxy_read_timeout 86400s;
        proxy_send_timeout 86400s;
    }
    
    # API端点保护
    location /api/ {
        limit_req zone=goaccess_api burst=5 nodelay;
        
        # 额外的API认证(可选)
        # auth_request /auth;
        
        proxy_pass http://goaccess_backend;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
    }
    
    # 静态资源
    location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
        expires 1y;
        add_header Cache-Control "public, immutable";
        add_header X-Content-Type-Options "nosniff" always;
    }
    
    # 禁止访问敏感文件
    location ~ /\. {
        deny all;
        access_log off;
        log_not_found off;
    }
    
    location ~ \.(conf|log|db)$ {
        deny all;
        access_log off;
        log_not_found off;
    }
    
    # 健康检查端点
    location /health {
        access_log off;
        return 200 "healthy\n";
        add_header Content-Type text/plain;
    }
}

# HTTP重定向到HTTPS
server {
    listen 80;
    listen [::]:80;
    server_name analytics.yourdomain.com;
    
    # 安全重定向
    return 301 https://$server_name$request_uri;
}

3.1.2 用户认证管理脚本

#!/bin/bash
# auth-manager.sh - GoAccess用户认证管理脚本

set -euo pipefail

# 配置
HTPASSWD_FILE="/etc/nginx/.htpasswd"
BACKUP_DIR="/etc/nginx/backups"
LOG_FILE="/var/log/goaccess-auth.log"

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# 日志函数
log_message() {
    echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE"
}

log_info() {
    echo -e "${BLUE}[INFO]${NC} $1"
    log_message "INFO: $1"
}

log_success() {
    echo -e "${GREEN}[SUCCESS]${NC} $1"
    log_message "SUCCESS: $1"
}

log_warning() {
    echo -e "${YELLOW}[WARNING]${NC} $1"
    log_message "WARNING: $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
    log_message "ERROR: $1"
}

# 检查依赖
check_dependencies() {
    local deps=("htpasswd" "openssl")
    
    for dep in "${deps[@]}"; do
        if ! command -v "$dep" >/dev/null 2>&1; then
            log_error "缺少依赖: $dep"
            exit 1
        fi
    done
}

# 创建备份
create_backup() {
    if [[ -f "$HTPASSWD_FILE" ]]; then
        mkdir -p "$BACKUP_DIR"
        local backup_file="$BACKUP_DIR/htpasswd_$(date +%Y%m%d_%H%M%S).backup"
        cp "$HTPASSWD_FILE" "$backup_file"
        log_info "已创建备份: $backup_file"
    fi
}

# 生成强密码
generate_password() {
    local length=${1:-16}
    openssl rand -base64 32 | tr -d "=+/" | cut -c1-${length}
}

# 验证密码强度
validate_password() {
    local password="$1"
    local min_length=8
    
    if [[ ${#password} -lt $min_length ]]; then
        log_error "密码长度至少需要 $min_length 个字符"
        return 1
    fi
    
    # 检查密码复杂性
    local has_upper=0
    local has_lower=0
    local has_digit=0
    local has_special=0
    
    if [[ "$password" =~ [A-Z] ]]; then has_upper=1; fi
    if [[ "$password" =~ [a-z] ]]; then has_lower=1; fi
    if [[ "$password" =~ [0-9] ]]; then has_digit=1; fi
    if [[ "$password" =~ [^A-Za-z0-9] ]]; then has_special=1; fi
    
    local complexity=$((has_upper + has_lower + has_digit + has_special))
    
    if [[ $complexity -lt 3 ]]; then
        log_warning "密码复杂性不足,建议包含大写字母、小写字母、数字和特殊字符"
    fi
    
    return 0
}

# 添加用户
add_user() {
    local username="$1"
    local password="$2"
    
    if [[ -z "$username" ]]; then
        log_error "用户名不能为空"
        return 1
    fi
    
    # 检查用户是否已存在
    if [[ -f "$HTPASSWD_FILE" ]] && grep -q "^$username:" "$HTPASSWD_FILE"; then
        log_error "用户 $username 已存在"
        return 1
    fi
    
    # 验证密码
    if ! validate_password "$password"; then
        return 1
    fi
    
    create_backup
    
    # 创建目录
    mkdir -p "$(dirname "$HTPASSWD_FILE")"
    
    # 添加用户
    if [[ -f "$HTPASSWD_FILE" ]]; then
        htpasswd -B "$HTPASSWD_FILE" "$username" <<< "$password"
    else
        htpasswd -cB "$HTPASSWD_FILE" "$username" <<< "$password"
    fi
    
    # 设置权限
    chmod 640 "$HTPASSWD_FILE"
    chown root:nginx "$HTPASSWD_FILE" 2>/dev/null || chown root:www-data "$HTPASSWD_FILE" 2>/dev/null || true
    
    log_success "用户 $username 添加成功"
}

# 删除用户
delete_user() {
    local username="$1"
    
    if [[ -z "$username" ]]; then
        log_error "用户名不能为空"
        return 1
    fi
    
    if [[ ! -f "$HTPASSWD_FILE" ]]; then
        log_error "认证文件不存在: $HTPASSWD_FILE"
        return 1
    fi
    
    if ! grep -q "^$username:" "$HTPASSWD_FILE"; then
        log_error "用户 $username 不存在"
        return 1
    fi
    
    create_backup
    
    # 删除用户
    htpasswd -D "$HTPASSWD_FILE" "$username"
    
    log_success "用户 $username 删除成功"
}

# 更新密码
update_password() {
    local username="$1"
    local password="$2"
    
    if [[ -z "$username" ]]; then
        log_error "用户名不能为空"
        return 1
    fi
    
    if [[ ! -f "$HTPASSWD_FILE" ]]; then
        log_error "认证文件不存在: $HTPASSWD_FILE"
        return 1
    fi
    
    if ! grep -q "^$username:" "$HTPASSWD_FILE"; then
        log_error "用户 $username 不存在"
        return 1
    fi
    
    # 验证密码
    if ! validate_password "$password"; then
        return 1
    fi
    
    create_backup
    
    # 更新密码
    htpasswd -B "$HTPASSWD_FILE" "$username" <<< "$password"
    
    log_success "用户 $username 密码更新成功"
}

# 列出用户
list_users() {
    if [[ ! -f "$HTPASSWD_FILE" ]]; then
        log_warning "认证文件不存在: $HTPASSWD_FILE"
        return 0
    fi
    
    log_info "当前用户列表:"
    awk -F: '{print "  - " $1}' "$HTPASSWD_FILE"
}

# 验证用户认证
verify_auth() {
    local username="$1"
    local password="$2"
    
    if [[ ! -f "$HTPASSWD_FILE" ]]; then
        log_error "认证文件不存在: $HTPASSWD_FILE"
        return 1
    fi
    
    if ! grep -q "^$username:" "$HTPASSWD_FILE"; then
        log_error "用户 $username 不存在"
        return 1
    fi
    
    # 使用htpasswd验证
    local hash=$(grep "^$username:" "$HTPASSWD_FILE" | cut -d: -f2)
    
    if htpasswd -vB "$HTPASSWD_FILE" "$username" <<< "$password" >/dev/null 2>&1; then
        log_success "用户 $username 认证成功"
        return 0
    else
        log_error "用户 $username 认证失败"
        return 1
    fi
}

# 显示帮助
show_help() {
    cat << EOF
GoAccess用户认证管理脚本

用法: $0 <命令> [参数]

命令:
  add <username> [password]     添加用户(如果不提供密码,将自动生成)
  delete <username>             删除用户
  update <username> [password]  更新用户密码
  list                          列出所有用户
  verify <username> <password>  验证用户认证
  generate-password [length]    生成强密码
  backup                        手动创建备份
  restore <backup-file>         从备份恢复
  help                          显示此帮助信息

示例:
  $0 add admin                  # 添加用户admin,自动生成密码
  $0 add user1 mypassword123    # 添加用户user1,指定密码
  $0 delete user1               # 删除用户user1
  $0 update admin newpass456    # 更新admin密码
  $0 list                       # 列出所有用户
  $0 verify admin password      # 验证admin用户认证
  $0 generate-password 20       # 生成20位强密码

配置文件: $HTPASSWD_FILE
日志文件: $LOG_FILE
EOF
}

# 手动备份
manual_backup() {
    create_backup
    log_success "手动备份完成"
}

# 从备份恢复
restore_backup() {
    local backup_file="$1"
    
    if [[ -z "$backup_file" ]]; then
        log_error "请指定备份文件"
        return 1
    fi
    
    if [[ ! -f "$backup_file" ]]; then
        log_error "备份文件不存在: $backup_file"
        return 1
    fi
    
    create_backup
    cp "$backup_file" "$HTPASSWD_FILE"
    
    # 设置权限
    chmod 640 "$HTPASSWD_FILE"
    chown root:nginx "$HTPASSWD_FILE" 2>/dev/null || chown root:www-data "$HTPASSWD_FILE" 2>/dev/null || true
    
    log_success "从备份恢复成功: $backup_file"
}

# 主函数
main() {
    check_dependencies
    
    case "${1:-}" in
        "add")
            if [[ -z "${2:-}" ]]; then
                log_error "请提供用户名"
                show_help
                exit 1
            fi
            
            local username="$2"
            local password="${3:-}"
            
            if [[ -z "$password" ]]; then
                password=$(generate_password)
                log_info "自动生成密码: $password"
            fi
            
            add_user "$username" "$password"
            ;;
        "delete")
            if [[ -z "${2:-}" ]]; then
                log_error "请提供用户名"
                show_help
                exit 1
            fi
            delete_user "$2"
            ;;
        "update")
            if [[ -z "${2:-}" ]]; then
                log_error "请提供用户名"
                show_help
                exit 1
            fi
            
            local username="$2"
            local password="${3:-}"
            
            if [[ -z "$password" ]]; then
                password=$(generate_password)
                log_info "自动生成新密码: $password"
            fi
            
            update_password "$username" "$password"
            ;;
        "list")
            list_users
            ;;
        "verify")
            if [[ -z "${2:-}" ]] || [[ -z "${3:-}" ]]; then
                log_error "请提供用户名和密码"
                show_help
                exit 1
            fi
            verify_auth "$2" "$3"
            ;;
        "generate-password")
            local length="${2:-16}"
            echo "生成的密码: $(generate_password "$length")"
            ;;
        "backup")
            manual_backup
            ;;
        "restore")
            if [[ -z "${2:-}" ]]; then
                log_error "请提供备份文件路径"
                show_help
                exit 1
            fi
            restore_backup "$2"
            ;;
        "help"|"--help"|"-h")
            show_help
            ;;
        "")
            log_error "请提供命令"
            show_help
            exit 1
            ;;
        *)
            log_error "未知命令: $1"
            show_help
            exit 1
            ;;
    esac
}

if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    main "$@"
fi

3.2 数据加密和隐私保护

3.2.1 日志数据脱敏脚本

#!/usr/bin/env python3
# log-anonymizer.py - 日志数据脱敏脚本

import re
import sys
import hashlib
import ipaddress
import argparse
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

@dataclass
class AnonymizationConfig:
    """脱敏配置"""
    anonymize_ips: bool = True
    anonymize_user_agents: bool = True
    anonymize_referrers: bool = True
    anonymize_query_params: bool = True
    hash_salt: str = "goaccess_salt_2024"
    ip_mask_ipv4: int = 24  # IPv4掩码位数
    ip_mask_ipv6: int = 64  # IPv6掩码位数
    preserve_internal_ips: bool = True

class LogAnonymizer:
    """日志脱敏器"""
    
    def __init__(self, config: AnonymizationConfig):
        self.config = config
        self.ip_cache: Dict[str, str] = {}
        self.ua_cache: Dict[str, str] = {}
        
        # 编译正则表达式
        self.ip_pattern = re.compile(
            r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
        )
        self.ipv6_pattern = re.compile(
            r'\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b|\b::1\b|\b::ffff:[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\b'
        )
        
        # 敏感查询参数
        self.sensitive_params = {
            'email', 'password', 'token', 'key', 'secret', 'auth',
            'session', 'user', 'username', 'userid', 'id', 'phone',
            'mobile', 'tel', 'ssn', 'credit', 'card', 'account'
        }
        
        # 内部IP范围
        self.internal_networks = [
            ipaddress.IPv4Network('10.0.0.0/8'),
            ipaddress.IPv4Network('172.16.0.0/12'),
            ipaddress.IPv4Network('192.168.0.0/16'),
            ipaddress.IPv4Network('127.0.0.0/8'),
            ipaddress.IPv6Network('::1/128'),
            ipaddress.IPv6Network('fc00::/7'),
            ipaddress.IPv6Network('fe80::/10')
        ]
    
    def _hash_string(self, text: str) -> str:
        """生成字符串哈希"""
        return hashlib.sha256((text + self.config.hash_salt).encode()).hexdigest()[:16]
    
    def _is_internal_ip(self, ip_str: str) -> bool:
        """检查是否为内部IP"""
        try:
            ip = ipaddress.ip_address(ip_str)
            for network in self.internal_networks:
                if ip in network:
                    return True
            return False
        except ValueError:
            return False
    
    def anonymize_ip(self, ip_str: str) -> str:
        """IP地址脱敏"""
        if not self.config.anonymize_ips:
            return ip_str
        
        # 检查缓存
        if ip_str in self.ip_cache:
            return self.ip_cache[ip_str]
        
        # 保留内部IP
        if self.config.preserve_internal_ips and self._is_internal_ip(ip_str):
            self.ip_cache[ip_str] = ip_str
            return ip_str
        
        try:
            ip = ipaddress.ip_address(ip_str)
            
            if isinstance(ip, ipaddress.IPv4Address):
                # IPv4掩码处理
                network = ipaddress.IPv4Network(f"{ip}/{self.config.ip_mask_ipv4}", strict=False)
                anonymized = str(network.network_address)
            else:
                # IPv6掩码处理
                network = ipaddress.IPv6Network(f"{ip}/{self.config.ip_mask_ipv6}", strict=False)
                anonymized = str(network.network_address)
            
            self.ip_cache[ip_str] = anonymized
            return anonymized
            
        except ValueError:
            # 无效IP,返回哈希
            anonymized = f"invalid_ip_{self._hash_string(ip_str)}"
            self.ip_cache[ip_str] = anonymized
            return anonymized
    
    def anonymize_user_agent(self, ua_str: str) -> str:
        """User-Agent脱敏"""
        if not self.config.anonymize_user_agents:
            return ua_str
        
        # 检查缓存
        if ua_str in self.ua_cache:
            return self.ua_cache[ua_str]
        
        # 提取主要信息(浏览器类型、操作系统)
        browser_patterns = {
            'Chrome': r'Chrome/([\d.]+)',
            'Firefox': r'Firefox/([\d.]+)',
            'Safari': r'Safari/([\d.]+)',
            'Edge': r'Edge/([\d.]+)',
            'Opera': r'Opera/([\d.]+)'
        }
        
        os_patterns = {
            'Windows': r'Windows NT ([\d.]+)',
            'macOS': r'Mac OS X ([\d_]+)',
            'Linux': r'Linux',
            'Android': r'Android ([\d.]+)',
            'iOS': r'OS ([\d_]+)'
        }
        
        browser = 'Unknown'
        os = 'Unknown'
        
        for name, pattern in browser_patterns.items():
            if re.search(pattern, ua_str, re.IGNORECASE):
                browser = name
                break
        
        for name, pattern in os_patterns.items():
            if re.search(pattern, ua_str, re.IGNORECASE):
                os = name
                break
        
        anonymized = f"{browser}/{os}/hash_{self._hash_string(ua_str)}"
        self.ua_cache[ua_str] = anonymized
        return anonymized
    
    def anonymize_referrer(self, referrer: str) -> str:
        """引用来源脱敏"""
        if not self.config.anonymize_referrers or not referrer or referrer == '-':
            return referrer
        
        try:
            from urllib.parse import urlparse
            parsed = urlparse(referrer)
            
            # 保留域名,移除路径和查询参数
            if parsed.netloc:
                return f"{parsed.scheme}://{parsed.netloc}/"
            else:
                return f"anonymized_referrer_{self._hash_string(referrer)}"
        except:
            return f"anonymized_referrer_{self._hash_string(referrer)}"
    
    def anonymize_query_params(self, url: str) -> str:
        """查询参数脱敏"""
        if not self.config.anonymize_query_params:
            return url
        
        try:
            from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
            
            parsed = urlparse(url)
            if not parsed.query:
                return url
            
            params = parse_qs(parsed.query, keep_blank_values=True)
            anonymized_params = {}
            
            for key, values in params.items():
                key_lower = key.lower()
                if any(sensitive in key_lower for sensitive in self.sensitive_params):
                    # 敏感参数用哈希替换
                    anonymized_params[key] = [f"hash_{self._hash_string(str(values))}"]
                else:
                    # 非敏感参数保留
                    anonymized_params[key] = values
            
            new_query = urlencode(anonymized_params, doseq=True)
            return urlunparse((
                parsed.scheme, parsed.netloc, parsed.path,
                parsed.params, new_query, parsed.fragment
            ))
            
        except:
            return url
    
    def process_log_line(self, line: str) -> str:
        """处理单行日志"""
        # 跳过空行和注释
        if not line.strip() or line.strip().startswith('#'):
            return line
        
        # IP地址脱敏
        if self.config.anonymize_ips:
            line = self.ip_pattern.sub(lambda m: self.anonymize_ip(m.group()), line)
            line = self.ipv6_pattern.sub(lambda m: self.anonymize_ip(m.group()), line)
        
        # 解析常见日志格式
        # Combined Log Format: IP - - [timestamp] "method url protocol" status size "referrer" "user-agent"
        combined_pattern = r'^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]+)" (\d+) (\S+) "([^"]+)" "([^"]+)"'
        match = re.match(combined_pattern, line)
        
        if match:
            ip, ident, authuser, timestamp, request, status, size, referrer, user_agent = match.groups()
            
            # 脱敏处理
            ip = self.anonymize_ip(ip)
            referrer = self.anonymize_referrer(referrer)
            user_agent = self.anonymize_user_agent(user_agent)
            
            # 处理请求URL中的查询参数
            if ' ' in request:
                method, url, protocol = request.split(' ', 2)
                url = self.anonymize_query_params(url)
                request = f"{method} {url} {protocol}"
            
            # 重构日志行
            return f'{ip} {ident} {authuser} [{timestamp}] "{request}" {status} {size} "{referrer}" "{user_agent}"\n'
        
        return line
    
    def process_file(self, input_file: Path, output_file: Path) -> Tuple[int, int]:
        """处理日志文件"""
        processed_lines = 0
        total_lines = 0
        
        try:
            with open(input_file, 'r', encoding='utf-8', errors='ignore') as infile, \
                 open(output_file, 'w', encoding='utf-8') as outfile:
                
                for line in infile:
                    total_lines += 1
                    processed_line = self.process_log_line(line)
                    outfile.write(processed_line)
                    
                    if processed_line != line:
                        processed_lines += 1
                    
                    if total_lines % 10000 == 0:
                        logger.info(f"已处理 {total_lines} 行")
            
            logger.info(f"文件处理完成: {input_file} -> {output_file}")
            logger.info(f"总行数: {total_lines}, 脱敏行数: {processed_lines}")
            
            return total_lines, processed_lines
            
        except Exception as e:
            logger.error(f"处理文件时出错: {e}")
            raise

def main():
    """主函数"""
    parser = argparse.ArgumentParser(description='GoAccess日志数据脱敏工具')
    parser.add_argument('input', help='输入日志文件路径')
    parser.add_argument('output', help='输出日志文件路径')
    parser.add_argument('--no-ip', action='store_true', help='不脱敏IP地址')
    parser.add_argument('--no-ua', action='store_true', help='不脱敏User-Agent')
    parser.add_argument('--no-referrer', action='store_true', help='不脱敏引用来源')
    parser.add_argument('--no-query', action='store_true', help='不脱敏查询参数')
    parser.add_argument('--salt', default='goaccess_salt_2024', help='哈希盐值')
    parser.add_argument('--ipv4-mask', type=int, default=24, help='IPv4掩码位数')
    parser.add_argument('--ipv6-mask', type=int, default=64, help='IPv6掩码位数')
    parser.add_argument('--no-preserve-internal', action='store_true', help='不保留内部IP')
    
    args = parser.parse_args()
    
    # 创建配置
    config = AnonymizationConfig(
        anonymize_ips=not args.no_ip,
        anonymize_user_agents=not args.no_ua,
        anonymize_referrers=not args.no_referrer,
        anonymize_query_params=not args.no_query,
        hash_salt=args.salt,
        ip_mask_ipv4=args.ipv4_mask,
        ip_mask_ipv6=args.ipv6_mask,
        preserve_internal_ips=not args.no_preserve_internal
    )
    
    # 验证输入文件
    input_file = Path(args.input)
    if not input_file.exists():
        logger.error(f"输入文件不存在: {input_file}")
        sys.exit(1)
    
    # 创建输出目录
    output_file = Path(args.output)
    output_file.parent.mkdir(parents=True, exist_ok=True)
    
    # 执行脱敏
    anonymizer = LogAnonymizer(config)
    
    try:
        total_lines, processed_lines = anonymizer.process_file(input_file, output_file)
        
        logger.info("=== 脱敏完成 ===")
        logger.info(f"输入文件: {input_file}")
        logger.info(f"输出文件: {output_file}")
        logger.info(f"总行数: {total_lines}")
        logger.info(f"脱敏行数: {processed_lines}")
        logger.info(f"脱敏率: {processed_lines/total_lines*100:.2f}%")
        
        # 输出缓存统计
        logger.info(f"IP缓存大小: {len(anonymizer.ip_cache)}")
        logger.info(f"User-Agent缓存大小: {len(anonymizer.ua_cache)}")
        
    except Exception as e:
        logger.error(f"脱敏过程中出错: {e}")
        sys.exit(1)

if __name__ == '__main__':
    main()