11.1 常见问题诊断

11.1.1 连接问题诊断

1. 连接无法建立

#!/bin/bash
# connection_diagnosis.sh

# 连接问题诊断脚本
diagnose_connection() {
    local target_host="$1"
    local target_port="$2"
    local protocol="${3:-tcp}"
    
    echo "=== Connection Diagnosis for $target_host:$target_port ($protocol) ==="
    
    # 1. 基本连通性测试
    echo "1. Testing basic connectivity..."
    if ping -c 3 "$target_host" > /dev/null 2>&1; then
        echo "   ✓ Host is reachable via ICMP"
    else
        echo "   ✗ Host is NOT reachable via ICMP"
        echo "   Checking if ICMP is blocked..."
        
        # 检查 ICMP 规则
        icmp_rules=$(iptables -L -n | grep icmp)
        if [ -n "$icmp_rules" ]; then
            echo "   ICMP rules found:"
            echo "$icmp_rules" | sed 's/^/     /'
        fi
    fi
    
    # 2. 端口连通性测试
    echo "2. Testing port connectivity..."
    if command -v nc > /dev/null; then
        if nc -z -w 5 "$target_host" "$target_port" 2>/dev/null; then
            echo "   ✓ Port $target_port is open"
        else
            echo "   ✗ Port $target_port is closed or filtered"
        fi
    elif command -v telnet > /dev/null; then
        timeout 5 telnet "$target_host" "$target_port" 2>/dev/null | grep -q "Connected" && 
        echo "   ✓ Port $target_port is open" || 
        echo "   ✗ Port $target_port is closed or filtered"
    fi
    
    # 3. 检查相关的 iptables 规则
    echo "3. Checking iptables rules..."
    
    # 检查 INPUT 链规则
    echo "   INPUT chain rules for port $target_port:"
    input_rules=$(iptables -L INPUT -n --line-numbers | grep ":$target_port ")
    if [ -n "$input_rules" ]; then
        echo "$input_rules" | sed 's/^/     /'
    else
        echo "     No specific rules found for port $target_port"
    fi
    
    # 检查 OUTPUT 链规则
    echo "   OUTPUT chain rules for port $target_port:"
    output_rules=$(iptables -L OUTPUT -n --line-numbers | grep ":$target_port ")
    if [ -n "$output_rules" ]; then
        echo "$output_rules" | sed 's/^/     /'
    else
        echo "     No specific rules found for port $target_port"
    fi
    
    # 4. 检查默认策略
    echo "4. Checking default policies..."
    policies=$(iptables -L | grep "Chain.*policy")
    echo "$policies" | sed 's/^/   /'
    
    # 5. 检查连接跟踪
    echo "5. Checking connection tracking..."
    if [ -f /proc/net/nf_conntrack ]; then
        conn_count=$(cat /proc/net/nf_conntrack | wc -l)
        conn_max=$(cat /proc/sys/net/netfilter/nf_conntrack_max)
        echo "   Current connections: $conn_count / $conn_max"
        
        # 检查特定连接
        specific_conn=$(grep "$target_host.*:$target_port" /proc/net/nf_conntrack 2>/dev/null)
        if [ -n "$specific_conn" ]; then
            echo "   Found connection tracking entry:"
            echo "$specific_conn" | sed 's/^/     /'
        else
            echo "   No connection tracking entry found"
        fi
    fi
    
    # 6. 建议
    echo "6. Troubleshooting suggestions:"
    echo "   - Check if service is running on target port"
    echo "   - Verify iptables rules allow the connection"
    echo "   - Check if connection tracking table is full"
    echo "   - Review application logs for errors"
    echo "   - Test from different source addresses"
}

# 网络接口诊断
diagnose_interface() {
    local interface="$1"
    
    echo "=== Interface Diagnosis for $interface ==="
    
    # 1. 接口状态
    echo "1. Interface status:"
    if ip link show "$interface" > /dev/null 2>&1; then
        ip link show "$interface" | sed 's/^/   /'
        
        # 检查接口是否启用
        if ip link show "$interface" | grep -q "state UP"; then
            echo "   ✓ Interface is UP"
        else
            echo "   ✗ Interface is DOWN"
        fi
    else
        echo "   ✗ Interface $interface does not exist"
        return 1
    fi
    
    # 2. IP 地址配置
    echo "2. IP address configuration:"
    ip_info=$(ip addr show "$interface" | grep "inet ")
    if [ -n "$ip_info" ]; then
        echo "$ip_info" | sed 's/^/   /'
    else
        echo "   No IP address configured"
    fi
    
    # 3. 路由信息
    echo "3. Routing information:"
    route_info=$(ip route | grep "$interface")
    if [ -n "$route_info" ]; then
        echo "$route_info" | sed 's/^/   /'
    else
        echo "   No routes found for this interface"
    fi
    
    # 4. 接口相关的 iptables 规则
    echo "4. Interface-specific iptables rules:"
    interface_rules=$(iptables -L -n | grep "$interface")
    if [ -n "$interface_rules" ]; then
        echo "$interface_rules" | sed 's/^/   /'
    else
        echo "   No interface-specific rules found"
    fi
    
    # 5. 流量统计
    echo "5. Traffic statistics:"
    if [ -f "/sys/class/net/$interface/statistics/rx_bytes" ]; then
        rx_bytes=$(cat "/sys/class/net/$interface/statistics/rx_bytes")
        tx_bytes=$(cat "/sys/class/net/$interface/statistics/tx_bytes")
        rx_packets=$(cat "/sys/class/net/$interface/statistics/rx_packets")
        tx_packets=$(cat "/sys/class/net/$interface/statistics/tx_packets")
        
        echo "   RX: $rx_packets packets, $rx_bytes bytes"
        echo "   TX: $tx_packets packets, $tx_bytes bytes"
    fi
}

# 使用示例
if [ $# -eq 0 ]; then
    echo "Usage: $0 <command> [options]"
    echo "Commands:"
    echo "  connection <host> <port> [protocol]  - Diagnose connection issues"
    echo "  interface <interface>                - Diagnose interface issues"
    exit 1
fi

case "$1" in
    "connection")
        if [ $# -lt 3 ]; then
            echo "Usage: $0 connection <host> <port> [protocol]"
            exit 1
        fi
        diagnose_connection "$2" "$3" "$4"
        ;;
    "interface")
        if [ $# -lt 2 ]; then
            echo "Usage: $0 interface <interface>"
            exit 1
        fi
        diagnose_interface "$2"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac

11.4 自动化故障排除

11.4.1 自动化诊断脚本

1. 综合故障诊断脚本

#!/bin/bash
# iptables_auto_diagnosis.sh

# 自动化 iptables 故障诊断脚本
auto_diagnosis() {
    local log_file="/var/log/iptables_diagnosis_$(date +%Y%m%d_%H%M%S).log"
    
    echo "=== Automated iptables Diagnosis Report ===" | tee "$log_file"
    echo "Timestamp: $(date)" | tee -a "$log_file"
    echo "Hostname: $(hostname)" | tee -a "$log_file"
    echo "Kernel: $(uname -r)" | tee -a "$log_file"
    echo | tee -a "$log_file"
    
    local issues_found=0
    
    # 1. 基础连通性检查
    echo "1. Basic Connectivity Check:" | tee -a "$log_file"
    
    # 检查网络接口
    echo "   Network Interfaces:" | tee -a "$log_file"
    ip link show | grep -E "^[0-9]+:" | while read -r line; do
        interface=$(echo "$line" | awk -F': ' '{print $2}' | awk '{print $1}')
        status=$(echo "$line" | grep -o "state [A-Z]*" | awk '{print $2}')
        echo "     $interface: $status" | tee -a "$log_file"
        
        if [ "$status" != "UP" ] && [ "$interface" != "lo" ]; then
            echo "     ⚠️  Interface $interface is down" | tee -a "$log_file"
            issues_found=$((issues_found + 1))
        fi
    done
    
    # 检查路由表
    echo "   Default Route:" | tee -a "$log_file"
    default_route=$(ip route | grep default)
    if [ -n "$default_route" ]; then
        echo "     ✓ $default_route" | tee -a "$log_file"
    else
        echo "     ❌ No default route found" | tee -a "$log_file"
        issues_found=$((issues_found + 1))
    fi
    
    # 2. iptables 规则检查
    echo -e "\n2. iptables Rules Check:" | tee -a "$log_file"
    
    # 检查默认策略
    echo "   Default Policies:" | tee -a "$log_file"
    for chain in INPUT OUTPUT FORWARD; do
        policy=$(iptables -L "$chain" -n | head -1 | awk '{print $4}' | tr -d '()')
        echo "     $chain: $policy" | tee -a "$log_file"
        
        if [ "$policy" = "DROP" ] || [ "$policy" = "REJECT" ]; then
            # 检查是否有允许规则
            allow_rules=$(iptables -L "$chain" -n | grep -c ACCEPT)
            if [ $allow_rules -eq 0 ]; then
                echo "     ⚠️  $chain chain has restrictive policy but no ACCEPT rules" | tee -a "$log_file"
                issues_found=$((issues_found + 1))
            fi
        fi
    done
    
    # 检查规则数量
    total_rules=$(iptables-save | grep "^-A" | wc -l)
    echo "   Total Rules: $total_rules" | tee -a "$log_file"
    
    if [ $total_rules -gt 1000 ]; then
        echo "     ⚠️  Very high rule count may impact performance" | tee -a "$log_file"
        issues_found=$((issues_found + 1))
    fi
    
    # 3. 连接跟踪检查
    echo -e "\n3. Connection Tracking Check:" | tee -a "$log_file"
    
    if [ -f /proc/net/nf_conntrack ]; then
        current_conn=$(cat /proc/net/nf_conntrack | wc -l)
        max_conn=$(cat /proc/sys/net/netfilter/nf_conntrack_max)
        usage_percent=$((current_conn * 100 / max_conn))
        
        echo "   Connection Table Usage: $usage_percent% ($current_conn/$max_conn)" | tee -a "$log_file"
        
        if [ $usage_percent -gt 90 ]; then
            echo "     ❌ Connection table near capacity" | tee -a "$log_file"
            issues_found=$((issues_found + 1))
        elif [ $usage_percent -gt 80 ]; then
            echo "     ⚠️  Connection table usage is high" | tee -a "$log_file"
            issues_found=$((issues_found + 1))
        else
            echo "     ✓ Connection table usage is normal" | tee -a "$log_file"
        fi
        
        # 检查连接状态分布
        echo "   Connection States:" | tee -a "$log_file"
        cat /proc/net/nf_conntrack | awk '{print $4}' | sort | uniq -c | sort -nr | head -5 | 
        while read -r count state; do
            echo "     $state: $count" | tee -a "$log_file"
        done
    else
        echo "   ❌ Connection tracking not available" | tee -a "$log_file"
        issues_found=$((issues_found + 1))
    fi
    
    # 4. 系统资源检查
    echo -e "\n4. System Resource Check:" | tee -a "$log_file"
    
    # CPU 负载
    load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ',')
    cpu_cores=$(nproc)
    load_per_core=$(echo "scale=2; $load_avg / $cpu_cores" | bc 2>/dev/null || echo "0")
    
    echo "   CPU Load: $load_avg (${load_per_core} per core)" | tee -a "$log_file"
    
    if [ "$(echo "$load_per_core > 2" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "     ❌ Very high CPU load" | tee -a "$log_file"
        issues_found=$((issues_found + 1))
    elif [ "$(echo "$load_per_core > 1" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "     ⚠️  High CPU load" | tee -a "$log_file"
        issues_found=$((issues_found + 1))
    fi
    
    # 内存使用
    memory_usage=$(free | grep Mem | awk '{printf "%.1f", $3*100/$2}')
    echo "   Memory Usage: ${memory_usage}%" | tee -a "$log_file"
    
    if [ "$(echo "$memory_usage > 90" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "     ❌ Very high memory usage" | tee -a "$log_file"
        issues_found=$((issues_found + 1))
    elif [ "$(echo "$memory_usage > 80" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "     ⚠️  High memory usage" | tee -a "$log_file"
        issues_found=$((issues_found + 1))
    fi
    
    # 5. 日志检查
    echo -e "\n5. Log Analysis:" | tee -a "$log_file"
    
    # 检查最近的 iptables 相关日志
    if [ -f /var/log/messages ]; then
        recent_blocks=$(grep -i "iptables" /var/log/messages | tail -10 | wc -l)
        if [ $recent_blocks -gt 0 ]; then
            echo "   Recent iptables log entries: $recent_blocks" | tee -a "$log_file"
            echo "   Last 3 entries:" | tee -a "$log_file"
            grep -i "iptables" /var/log/messages | tail -3 | sed 's/^/     /' | tee -a "$log_file"
        else
            echo "   No recent iptables log entries found" | tee -a "$log_file"
        fi
    fi
    
    # 检查内核日志中的网络相关错误
    network_errors=$(dmesg | grep -i -E "network|iptables|netfilter" | tail -5 | wc -l)
    if [ $network_errors -gt 0 ]; then
        echo "   Recent network-related kernel messages: $network_errors" | tee -a "$log_file"
        dmesg | grep -i -E "network|iptables|netfilter" | tail -3 | sed 's/^/     /' | tee -a "$log_file"
    fi
    
    # 6. 总结和建议
    echo -e "\n6. Summary and Recommendations:" | tee -a "$log_file"
    
    if [ $issues_found -eq 0 ]; then
        echo "   ✓ No critical issues detected" | tee -a "$log_file"
        echo "   System appears to be functioning normally" | tee -a "$log_file"
    else
        echo "   Found $issues_found potential issues" | tee -a "$log_file"
        echo "   Recommendations:" | tee -a "$log_file"
        
        if [ $total_rules -gt 500 ]; then
            echo "     - Consider optimizing iptables rules" | tee -a "$log_file"
            echo "     - Use ipset for large IP lists" | tee -a "$log_file"
        fi
        
        if [ "$(echo "$load_per_core > 1" | bc 2>/dev/null || echo 0)" = "1" ]; then
            echo "     - Monitor CPU usage and consider hardware upgrade" | tee -a "$log_file"
        fi
        
        if [ "$(echo "$memory_usage > 80" | bc 2>/dev/null || echo 0)" = "1" ]; then
            echo "     - Monitor memory usage and consider adding RAM" | tee -a "$log_file"
        fi
        
        if [ -f /proc/net/nf_conntrack ]; then
            current_conn=$(cat /proc/net/nf_conntrack | wc -l)
            max_conn=$(cat /proc/sys/net/netfilter/nf_conntrack_max)
            usage_percent=$((current_conn * 100 / max_conn))
            
            if [ $usage_percent -gt 80 ]; then
                echo "     - Increase nf_conntrack_max value" | tee -a "$log_file"
                echo "     - Reduce connection timeout values" | tee -a "$log_file"
            fi
        fi
    fi
    
    echo -e "\nDiagnosis report saved to: $log_file" | tee -a "$log_file"
    echo "Issues found: $issues_found" | tee -a "$log_file"
    
    return $issues_found
}

# 快速健康检查
quick_health_check() {
    echo "=== Quick Health Check ==="
    
    local status="HEALTHY"
    
    # 1. 基本服务检查
    echo "1. Basic Service Check:"
    
    # 检查 iptables 服务
    if command -v systemctl >/dev/null 2>&1; then
        if systemctl is-active iptables >/dev/null 2>&1; then
            echo "   ✓ iptables service is active"
        else
            echo "   ⚠️  iptables service status unknown"
        fi
    fi
    
    # 检查规则是否加载
    rule_count=$(iptables -L | wc -l)
    if [ $rule_count -gt 10 ]; then
        echo "   ✓ iptables rules are loaded"
    else
        echo "   ⚠️  Very few iptables rules detected"
        status="WARNING"
    fi
    
    # 2. 连通性检查
    echo "\n2. Connectivity Check:"
    
    # 检查本地回环
    if ping -c 1 127.0.0.1 >/dev/null 2>&1; then
        echo "   ✓ Localhost connectivity OK"
    else
        echo "   ❌ Localhost connectivity failed"
        status="CRITICAL"
    fi
    
    # 检查默认网关
    gateway=$(ip route | grep default | awk '{print $3}' | head -1)
    if [ -n "$gateway" ]; then
        if ping -c 1 -W 3 "$gateway" >/dev/null 2>&1; then
            echo "   ✓ Gateway ($gateway) reachable"
        else
            echo "   ⚠️  Gateway ($gateway) unreachable"
            status="WARNING"
        fi
    else
        echo "   ❌ No default gateway found"
        status="CRITICAL"
    fi
    
    # 3. 资源检查
    echo "\n3. Resource Check:"
    
    # CPU 负载
    load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ',')
    cpu_cores=$(nproc)
    load_per_core=$(echo "scale=2; $load_avg / $cpu_cores" | bc 2>/dev/null || echo "0")
    
    if [ "$(echo "$load_per_core > 2" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ❌ High CPU load: $load_per_core per core"
        status="CRITICAL"
    elif [ "$(echo "$load_per_core > 1" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ⚠️  Moderate CPU load: $load_per_core per core"
        if [ "$status" = "HEALTHY" ]; then
            status="WARNING"
        fi
    else
        echo "   ✓ CPU load normal: $load_per_core per core"
    fi
    
    # 内存使用
    memory_usage=$(free | grep Mem | awk '{printf "%.1f", $3*100/$2}')
    if [ "$(echo "$memory_usage > 90" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ❌ High memory usage: ${memory_usage}%"
        status="CRITICAL"
    elif [ "$(echo "$memory_usage > 80" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ⚠️  Moderate memory usage: ${memory_usage}%"
        if [ "$status" = "HEALTHY" ]; then
            status="WARNING"
        fi
    else
        echo "   ✓ Memory usage normal: ${memory_usage}%"
    fi
    
    # 连接跟踪
    if [ -f /proc/net/nf_conntrack ]; then
        current_conn=$(cat /proc/net/nf_conntrack | wc -l)
        max_conn=$(cat /proc/sys/net/netfilter/nf_conntrack_max)
        usage_percent=$((current_conn * 100 / max_conn))
        
        if [ $usage_percent -gt 90 ]; then
            echo "   ❌ Connection table near capacity: ${usage_percent}%"
            status="CRITICAL"
        elif [ $usage_percent -gt 80 ]; then
            echo "   ⚠️  High connection table usage: ${usage_percent}%"
            if [ "$status" = "HEALTHY" ]; then
                status="WARNING"
            fi
        else
            echo "   ✓ Connection table usage normal: ${usage_percent}%"
        fi
    fi
    
    # 4. 总结
    echo "\n4. Overall Status: $status"
    
    case "$status" in
        "HEALTHY")
            echo "   ✓ All systems operating normally"
            return 0
            ;;
        "WARNING")
            echo "   ⚠️  Some issues detected, monitoring recommended"
            return 1
            ;;
        "CRITICAL")
            echo "   ❌ Critical issues detected, immediate attention required"
            return 2
            ;;
    esac
}

# 使用示例
case "${1:-help}" in
    "diagnosis")
        auto_diagnosis
        ;;
    "health")
        quick_health_check
        ;;
    "help")
        echo "Usage: $0 <command>"
        echo "Commands:"
        echo "  diagnosis  - Run comprehensive diagnosis"
        echo "  health     - Quick health check"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac

11.5 最佳实践和建议

11.5.1 故障排除最佳实践

1. 系统化的故障排除方法

# 故障排除检查清单
echo "=== iptables Troubleshooting Checklist ==="
echo
echo "1. 基础检查 (Basic Checks):"
echo "   □ 检查网络接口状态"
echo "   □ 验证 IP 地址配置"
echo "   □ 确认路由表正确"
echo "   □ 测试基本连通性"
echo
echo "2. iptables 规则检查 (Rule Checks):"
echo "   □ 查看默认策略"
echo "   □ 检查规则顺序"
echo "   □ 验证规则语法"
echo "   □ 测试规则匹配"
echo
echo "3. 连接跟踪检查 (Connection Tracking):"
echo "   □ 检查连接表使用情况"
echo "   □ 验证连接状态"
echo "   □ 检查超时设置"
echo "   □ 监控连接建立速率"
echo
echo "4. 性能检查 (Performance Checks):"
echo "   □ 监控 CPU 使用率"
echo "   □ 检查内存使用情况"
echo "   □ 分析规则复杂度"
echo "   □ 测试处理速度"
echo
echo "5. 日志分析 (Log Analysis):"
echo "   □ 检查系统日志"
echo "   □ 分析 iptables 日志"
echo "   □ 查看内核消息"
echo "   □ 监控错误模式"

2. 常见问题解决方案

#!/bin/bash
# common_issues_solutions.sh

# 常见问题解决方案
solve_common_issues() {
    echo "=== Common iptables Issues and Solutions ==="
    echo
    
    echo "1. 连接被拒绝 (Connection Refused):"
    echo "   问题: 无法连接到服务"
    echo "   检查:"
    echo "     - iptables -L INPUT -n -v --line-numbers"
    echo "     - netstat -tlnp | grep <port>"
    echo "     - ss -tlnp | grep <port>"
    echo "   解决:"
    echo "     - 添加允许规则: iptables -I INPUT -p tcp --dport <port> -j ACCEPT"
    echo "     - 检查服务是否运行"
    echo "     - 验证监听地址"
    echo
    
    echo "2. 连接超时 (Connection Timeout):"
    echo "   问题: 连接建立缓慢或超时"
    echo "   检查:"
    echo "     - ping <target_ip>"
    echo "     - traceroute <target_ip>"
    echo "     - iptables -L FORWARD -n -v"
    echo "   解决:"
    echo "     - 检查 FORWARD 链规则"
    echo "     - 验证路由配置"
    echo "     - 检查 MTU 设置"
    echo
    
    echo "3. NAT 不工作 (NAT Not Working):"
    echo "   问题: 网络地址转换失败"
    echo "   检查:"
    echo "     - iptables -t nat -L -n -v"
    echo "     - cat /proc/sys/net/ipv4/ip_forward"
    echo "     - ip route show"
    echo "   解决:"
    echo "     - 启用 IP 转发: echo 1 > /proc/sys/net/ipv4/ip_forward"
    echo "     - 添加 MASQUERADE 规则"
    echo "     - 检查源/目标地址"
    echo
    
    echo "4. 规则不匹配 (Rules Not Matching):"
    echo "   问题: 规则没有按预期工作"
    echo "   检查:"
    echo "     - iptables -L -n -v --line-numbers"
    echo "     - iptables -Z (重置计数器)"
    echo "     - tcpdump 抓包分析"
    echo "   解决:"
    echo "     - 检查规则顺序"
    echo "     - 验证匹配条件"
    echo "     - 使用 LOG 目标调试"
    echo
    
    echo "5. 性能问题 (Performance Issues):"
    echo "   问题: 网络性能下降"
    echo "   检查:"
    echo "     - iptables-save | wc -l"
    echo "     - cat /proc/net/nf_conntrack | wc -l"
    echo "     - top (查看 CPU 使用)"
    echo "   解决:"
    echo "     - 优化规则顺序"
    echo "     - 使用 ipset"
    echo "     - 增加连接跟踪表大小"
    echo
    
    echo "6. 连接跟踪表满 (Connection Table Full):"
    echo "   问题: nf_conntrack: table full"
    echo "   检查:"
    echo "     - cat /proc/sys/net/netfilter/nf_conntrack_max"
    echo "     - cat /proc/net/nf_conntrack | wc -l"
    echo "   解决:"
    echo "     - echo 65536 > /proc/sys/net/netfilter/nf_conntrack_max"
    echo "     - 减少超时时间"
    echo "     - 使用 NOTRACK 跳过跟踪"
    echo
}

# 故障排除工具箱
troubleshooting_toolkit() {
    echo "=== Troubleshooting Toolkit ==="
    echo
    
    echo "1. 网络连通性测试:"
    echo "   ping -c 4 <target>          # 基本连通性"
    echo "   traceroute <target>         # 路由跟踪"
    echo "   mtr <target>                # 实时路由跟踪"
    echo "   nc -zv <host> <port>        # 端口连通性"
    echo "   telnet <host> <port>        # 交互式连接测试"
    echo
    
    echo "2. 网络状态查看:"
    echo "   ss -tuln                    # 监听端口"
    echo "   ss -tuap                    # 所有连接"
    echo "   netstat -rn                 # 路由表"
    echo "   ip route show               # 路由信息"
    echo "   ip addr show                # 接口地址"
    echo
    
    echo "3. iptables 调试:"
    echo "   iptables -L -n -v           # 查看规则和计数"
    echo "   iptables -t nat -L -n -v    # NAT 表规则"
    echo "   iptables -Z                 # 重置计数器"
    echo "   iptables-save               # 导出规则"
    echo "   iptables-restore            # 导入规则"
    echo
    
    echo "4. 数据包分析:"
    echo "   tcpdump -i any host <ip>    # 抓取特定主机"
    echo "   tcpdump -i any port <port>  # 抓取特定端口"
    echo "   wireshark                   # 图形化分析"
    echo "   tshark                      # 命令行分析"
    echo
    
    echo "5. 系统监控:"
    echo "   top                         # CPU 和内存"
    echo "   htop                        # 增强版 top"
    echo "   iotop                       # I/O 监控"
    echo "   iftop                       # 网络流量"
    echo "   nload                       # 网络负载"
    echo
    
    echo "6. 日志分析:"
    echo "   tail -f /var/log/messages   # 实时日志"
    echo "   journalctl -f               # systemd 日志"
    echo "   dmesg | tail                # 内核消息"
    echo "   grep iptables /var/log/*    # iptables 日志"
    echo
}

# 性能优化建议
performance_optimization_tips() {
    echo "=== Performance Optimization Tips ==="
    echo
    
    echo "1. 规则优化:"
    echo "   • 将最常匹配的规则放在前面"
    echo "   • 使用具体的匹配条件而不是通用条件"
    echo "   • 避免使用过多的扩展模块"
    echo "   • 合并相似的规则"
    echo "   • 删除不必要的规则"
    echo
    
    echo "2. 连接跟踪优化:"
    echo "   • 增加 nf_conntrack_max 值"
    echo "   • 减少连接超时时间"
    echo "   • 对不需要跟踪的流量使用 NOTRACK"
    echo "   • 调整哈希表大小"
    echo
    
    echo "3. 系统优化:"
    echo "   • 启用网卡多队列"
    echo "   • 调整中断亲和性"
    echo "   • 优化内核参数"
    echo "   • 使用高性能网卡"
    echo
    
    echo "4. 监控和维护:"
    echo "   • 定期检查规则使用情况"
    echo "   • 监控系统资源使用"
    echo "   • 定期清理日志文件"
    echo "   • 备份重要配置"
    echo
}

# 使用示例
case "${1:-help}" in
    "solutions")
        solve_common_issues
        ;;
    "toolkit")
        troubleshooting_toolkit
        ;;
    "optimization")
        performance_optimization_tips
        ;;
    "help")
        echo "Usage: $0 <command>"
        echo "Commands:"
        echo "  solutions     - Show common issues and solutions"
        echo "  toolkit       - Display troubleshooting tools"
        echo "  optimization  - Performance optimization tips"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac

11.5.2 预防性维护

1. 定期检查脚本

#!/bin/bash
# preventive_maintenance.sh

# 预防性维护脚本
preventive_maintenance() {
    local log_file="/var/log/iptables_maintenance_$(date +%Y%m%d).log"
    
    echo "=== iptables Preventive Maintenance ===" | tee "$log_file"
    echo "Date: $(date)" | tee -a "$log_file"
    echo | tee -a "$log_file"
    
    # 1. 规则健康检查
    echo "1. Rules Health Check:" | tee -a "$log_file"
    
    # 检查未使用的规则
    unused_rules=0
    for chain in INPUT OUTPUT FORWARD; do
        chain_unused=$(iptables -L "$chain" -n -v --line-numbers | tail -n +3 | awk '$2 == "0" {print $1}' | wc -l)
        unused_rules=$((unused_rules + chain_unused))
        if [ $chain_unused -gt 0 ]; then
            echo "   $chain chain: $chain_unused unused rules" | tee -a "$log_file"
        fi
    done
    
    if [ $unused_rules -eq 0 ]; then
        echo "   ✓ All rules are being used" | tee -a "$log_file"
    else
        echo "   ⚠️  Found $unused_rules unused rules" | tee -a "$log_file"
        echo "   Consider reviewing and removing unused rules" | tee -a "$log_file"
    fi
    
    # 检查规则复杂度
    complex_rules=$(iptables-save | grep -E "(\-m.*){3,}" | wc -l)
    total_rules=$(iptables-save | grep "^-A" | wc -l)
    
    if [ $total_rules -gt 0 ]; then
        complexity_ratio=$((complex_rules * 100 / total_rules))
        echo "   Rule complexity: $complexity_ratio% ($complex_rules/$total_rules complex rules)" | tee -a "$log_file"
        
        if [ $complexity_ratio -gt 30 ]; then
            echo "   ⚠️  High rule complexity detected" | tee -a "$log_file"
        fi
    fi
    
    # 2. 连接跟踪维护
    echo -e "\n2. Connection Tracking Maintenance:" | tee -a "$log_file"
    
    if [ -f /proc/net/nf_conntrack ]; then
        current_conn=$(cat /proc/net/nf_conntrack | wc -l)
        max_conn=$(cat /proc/sys/net/netfilter/nf_conntrack_max)
        usage_percent=$((current_conn * 100 / max_conn))
        
        echo "   Current usage: $usage_percent% ($current_conn/$max_conn)" | tee -a "$log_file"
        
        # 分析连接状态分布
        echo "   Connection state distribution:" | tee -a "$log_file"
        cat /proc/net/nf_conntrack | awk '{print $4}' | sort | uniq -c | sort -nr | head -5 | 
        while read -r count state; do
            echo "     $state: $count connections" | tee -a "$log_file"
        done
        
        # 检查异常连接
        time_wait_count=$(cat /proc/net/nf_conntrack | grep TIME_WAIT | wc -l)
        if [ $time_wait_count -gt 1000 ]; then
            echo "   ⚠️  High TIME_WAIT connections: $time_wait_count" | tee -a "$log_file"
        fi
    fi
    
    # 3. 系统资源检查
    echo -e "\n3. System Resource Check:" | tee -a "$log_file"
    
    # 内存使用
    memory_usage=$(free | grep Mem | awk '{printf "%.1f", $3*100/$2}')
    echo "   Memory usage: ${memory_usage}%" | tee -a "$log_file"
    
    # CPU 负载
    load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ',')
    echo "   CPU load average: $load_avg" | tee -a "$log_file"
    
    # 磁盘使用
    disk_usage=$(df / | tail -1 | awk '{print $5}' | tr -d '%')
    echo "   Root disk usage: ${disk_usage}%" | tee -a "$log_file"
    
    if [ $disk_usage -gt 80 ]; then
        echo "   ⚠️  High disk usage detected" | tee -a "$log_file"
    fi
    
    # 4. 日志维护
    echo -e "\n4. Log Maintenance:" | tee -a "$log_file"
    
    # 检查日志文件大小
    if [ -f /var/log/messages ]; then
        log_size=$(du -h /var/log/messages | awk '{print $1}')
        echo "   System log size: $log_size" | tee -a "$log_file"
    fi
    
    # 清理旧日志(可选)
    old_logs=$(find /var/log -name "iptables_*.log" -mtime +30 | wc -l)
    if [ $old_logs -gt 0 ]; then
        echo "   Found $old_logs old log files (>30 days)" | tee -a "$log_file"
        echo "   Consider cleaning up old logs" | tee -a "$log_file"
    fi
    
    # 5. 配置备份
    echo -e "\n5. Configuration Backup:" | tee -a "$log_file"
    
    backup_dir="/etc/iptables/backups"
    mkdir -p "$backup_dir"
    
    backup_file="$backup_dir/iptables_backup_$(date +%Y%m%d_%H%M%S).rules"
    iptables-save > "$backup_file"
    
    if [ -f "$backup_file" ]; then
        echo "   ✓ Configuration backed up to: $backup_file" | tee -a "$log_file"
        
        # 保留最近10个备份
        backup_count=$(ls -1 "$backup_dir"/iptables_backup_*.rules 2>/dev/null | wc -l)
        if [ $backup_count -gt 10 ]; then
            old_backups=$((backup_count - 10))
            ls -1t "$backup_dir"/iptables_backup_*.rules | tail -$old_backups | xargs rm -f
            echo "   Cleaned up $old_backups old backup files" | tee -a "$log_file"
        fi
    else
        echo "   ❌ Failed to create backup" | tee -a "$log_file"
    fi
    
    # 6. 安全检查
    echo -e "\n6. Security Check:" | tee -a "$log_file"
    
    # 检查默认策略
    for chain in INPUT OUTPUT FORWARD; do
        policy=$(iptables -L "$chain" -n | head -1 | awk '{print $4}' | tr -d '()')
        echo "   $chain default policy: $policy" | tee -a "$log_file"
        
        if [ "$policy" = "ACCEPT" ]; then
            echo "   ⚠️  $chain chain has permissive default policy" | tee -a "$log_file"
        fi
    done
    
    # 检查危险规则
    dangerous_rules=$(iptables-save | grep -E "(0\.0\.0\.0/0.*ACCEPT|ACCEPT.*0\.0\.0\.0/0)" | wc -l)
    if [ $dangerous_rules -gt 0 ]; then
        echo "   ⚠️  Found $dangerous_rules potentially dangerous rules (ACCEPT from/to anywhere)" | tee -a "$log_file"
    fi
    
    # 7. 总结
    echo -e "\n7. Maintenance Summary:" | tee -a "$log_file"
    echo "   Maintenance completed successfully" | tee -a "$log_file"
    echo "   Report saved to: $log_file" | tee -a "$log_file"
    
    # 生成建议
    echo -e "\n8. Recommendations:" | tee -a "$log_file"
    
    if [ $unused_rules -gt 0 ]; then
        echo "   • Review and remove $unused_rules unused rules" | tee -a "$log_file"
    fi
    
    if [ $complexity_ratio -gt 30 ]; then
        echo "   • Consider simplifying complex rules" | tee -a "$log_file"
    fi
    
    if [ "$(echo "$memory_usage > 80" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   • Monitor memory usage closely" | tee -a "$log_file"
    fi
    
    if [ $dangerous_rules -gt 0 ]; then
        echo "   • Review potentially dangerous rules" | tee -a "$log_file"
    fi
    
    echo "   • Schedule next maintenance check" | tee -a "$log_file"
}

# 使用示例
case "${1:-help}" in
    "run")
        preventive_maintenance
        ;;
    "help")
        echo "Usage: $0 <command>"
        echo "Commands:"
        echo "  run   - Run preventive maintenance"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac

11.6 本章小结

11.6.1 关键要点回顾

本章详细介绍了 iptables 故障排除和调试的各种技巧和方法:

1. 连接问题诊断 - 系统化的连接问题排查方法 - 网络接口和路由诊断技巧 - ICMP 和端口连通性测试 - iptables 规则检查和验证

2. 规则匹配问题 - 规则匹配调试技术 - 规则冲突分析方法 - 性能影响评估 - 规则优化建议

3. NAT 问题诊断 - SNAT 和 DNAT 故障排除 - 连接跟踪问题解决 - 路由和地址转换验证

4. 调试工具使用 - tcpdump 数据包分析 - iptables 日志配置和分析 - 连接跟踪调试技巧 - 系统资源监控

5. 自动化故障排除 - 自动化诊断脚本 - 实时监控和告警 - 性能趋势分析 - 预防性维护

11.6.2 最佳实践总结

故障排除原则: 1. 系统化方法:按层次逐步排查 2. 工具结合:多种工具交叉验证 3. 日志分析:重视日志信息 4. 性能监控:关注系统资源 5. 预防为主:定期维护检查

调试技巧: 1. 使用 LOG 目标记录关键信息 2. 重置计数器观察规则匹配 3. 临时规则快速测试 4. 数据包抓取分析流量 5. 连接跟踪状态监控

性能优化: 1. 规则顺序优化 2. 复杂规则简化 3. 连接跟踪调优 4. 系统参数优化 5. 硬件资源升级

11.6.3 故障排除检查清单

基础检查: - [ ] 网络接口状态 - [ ] IP 地址配置 - [ ] 路由表设置 - [ ] 基本连通性

iptables 检查: - [ ] 默认策略设置 - [ ] 规则语法正确性 - [ ] 规则匹配顺序 - [ ] 计数器统计

性能检查: - [ ] CPU 使用率 - [ ] 内存使用情况 - [ ] 连接跟踪使用率 - [ ] 规则处理效率

安全检查: - [ ] 危险规则识别 - [ ] 日志记录配置 - [ ] 访问控制验证 - [ ] 配置备份状态

11.6.4 下一章预告

下一章我们将学习 iptables 与其他工具的集成,包括:

  1. 与系统服务集成

    • systemd 服务配置
    • 开机自启动设置
    • 服务依赖管理
  2. 与监控工具集成

    • Nagios 监控集成
    • Zabbix 监控配置
    • Prometheus 指标收集
  3. 与自动化工具集成

    • Ansible 自动化部署
    • Puppet 配置管理
    • Chef 基础设施代码
  4. 与容器技术集成

    • Docker 网络集成
    • Kubernetes 网络策略
    • 容器安全配置
  5. 与云平台集成

    • AWS 安全组集成
    • Azure 网络安全组
    • 混合云网络配置

11.7 练习与思考

11.7.1 理论练习

  1. 故障排除流程设计

    • 设计一个完整的 iptables 故障排除流程
    • 包括检查步骤、工具使用、问题分类
  2. 性能问题分析

    • 分析可能导致 iptables 性能问题的因素
    • 提出相应的优化方案
  3. 监控指标设计

    • 设计 iptables 监控指标体系
    • 包括关键指标、告警阈值、响应策略

11.7.2 实践练习

  1. 故障模拟和排除 “`bash

    练习1:模拟连接问题

    创建阻止 SSH 连接的规则,然后排除故障

# 练习2:模拟 NAT 问题 # 配置错误的 NAT 规则,分析和修复问题

# 练习3:模拟性能问题 # 创建大量复杂规则,分析性能影响


2. **调试脚本开发**
   ```bash
   # 开发自定义的故障诊断脚本
   # 包括自动检测、问题分类、解决建议
  1. 监控系统搭建

    # 搭建 iptables 监控系统
    # 包括数据收集、可视化、告警
    

    11.7.3 思考题

    1. 如何设计一个高效的 iptables 故障排除流程?

    2. 在大规模环境中,如何实现 iptables 的自动化监控和故障处理?

    3. 如何平衡 iptables 的安全性和性能?

    4. 在云环境中,iptables 故障排除有哪些特殊考虑?

    5. 如何设计 iptables 的灾难恢复方案?

      11.1.2 规则匹配问题

      1. 规则匹配调试

      #!/bin/bash
      # rule_matching_debug.sh
      # 规则匹配调试脚本
      debug_rule_matching() {
      echo "=== Rule Matching Debug ==="
      # 1. 显示所有规则及其计数器
      echo "1. Current rules with packet/byte counters:"
      iptables -L -n -v --line-numbers
      echo -e "\n2. Rules with zero packet count (potentially unused):"
      iptables -L -n -v --line-numbers | awk '$2 == "0" && NR > 2 {print "   Line " $1 ": " $0}'
      echo -e "\n3. Most active rules (top 10):"
      iptables -L -n -v --line-numbers | awk 'NR > 2 && $2 != "pkts" {print $2 " " $0}' | 
      sort -nr | head -10 | sed 's/^/   /'
      }
      # 测试特定规则匹配
      test_rule_matching() {
      local src_ip="$1"
      local dst_ip="$2"
      local dst_port="$3"
      local protocol="${4:-tcp}"
      echo "=== Testing Rule Matching ==="
      echo "Source: $src_ip"
      echo "Destination: $dst_ip:$dst_port"
      echo "Protocol: $protocol"
      echo
      # 创建测试规则(带日志)
      test_rule="-s $src_ip -d $dst_ip -p $protocol --dport $dst_port -j LOG --log-prefix 'TEST_RULE: '"
      echo "Adding test rule: iptables -I INPUT 1 $test_rule"
      iptables -I INPUT 1 $test_rule
      echo "Test rule added. Generate some traffic and check logs:"
      echo "  tail -f /var/log/messages | grep 'TEST_RULE'"
      echo
      echo "To remove test rule: iptables -D INPUT 1"
      }
      # 分析规则冲突
      analyze_rule_conflicts() {
      echo "=== Analyzing Rule Conflicts ==="
      # 获取所有规则
      rules_file="/tmp/iptables_rules.txt"
      iptables-save > "$rules_file"
      echo "1. Checking for duplicate rules:"
      duplicates=$(sort "$rules_file" | uniq -d | grep -v "^#" | grep -v "^:")
      if [ -n "$duplicates" ]; then
      echo "   Found duplicate rules:"
      echo "$duplicates" | sed 's/^/     /'
      else
      echo "   No duplicate rules found"
      fi
      echo -e "\n2. Checking for conflicting ACCEPT/DROP rules:"
      # 提取 INPUT 链规则
      input_rules=$(iptables -S INPUT | grep -v "^-P")
      # 检查是否有相同条件但不同动作的规则
      echo "$input_rules" | while read -r rule; do
      if echo "$rule" | grep -q "\-j ACCEPT"; then
          # 检查是否有相同条件的 DROP 规则
          condition=$(echo "$rule" | sed 's/-j ACCEPT//' | sed 's/-A INPUT//')
          drop_rule=$(echo "$input_rules" | grep "$condition" | grep "\-j DROP")
          if [ -n "$drop_rule" ]; then
              echo "   Potential conflict found:"
              echo "     ACCEPT: $rule"
              echo "     DROP:   $drop_rule"
          fi
      fi
      done
      echo -e "\n3. Checking rule order issues:"
      # 检查是否有过于宽泛的规则在前面
      broad_rules=$(iptables -S INPUT | grep -E "\-j (ACCEPT|DROP)$" | grep -v "\-s" | grep -v "\-d" | grep -v "\-p")
      if [ -n "$broad_rules" ]; then
      echo "   Found broad rules that might block more specific rules:"
      echo "$broad_rules" | sed 's/^/     /'
      fi
      rm -f "$rules_file"
      }
      # 规则性能分析
      analyze_rule_performance() {
      echo "=== Rule Performance Analysis ==="
      # 1. 规则数量统计
      echo "1. Rule count by chain:"
      for chain in INPUT OUTPUT FORWARD; do
      count=$(iptables -L "$chain" --line-numbers | tail -n +3 | wc -l)
      echo "   $chain: $count rules"
      done
      # 2. 复杂规则识别
      echo -e "\n2. Complex rules (multiple conditions):"
      iptables -S | grep -E "(\-m.*){3,}" | sed 's/^/   /'
      # 3. 低效匹配模式
      echo -e "\n3. Potentially inefficient patterns:"
      # 检查字符串匹配
      string_rules=$(iptables -S | grep "\-m string")
      if [ -n "$string_rules" ]; then
      echo "   String matching rules (can be slow):"
      echo "$string_rules" | sed 's/^/     /'
      fi
      # 检查正则表达式
      regex_rules=$(iptables -S | grep "\-m regexp")
      if [ -n "$regex_rules" ]; then
      echo "   Regular expression rules (can be slow):"
      echo "$regex_rules" | sed 's/^/     /'
      fi
      # 4. 建议优化
      echo -e "\n4. Optimization suggestions:"
      echo "   - Move frequently matched rules to the top"
      echo "   - Use ipset for large IP lists"
      echo "   - Combine similar rules where possible"
      echo "   - Avoid string matching in high-traffic rules"
      echo "   - Use stateful connection tracking"
      }
      # 使用示例
      case "${1:-help}" in
      "debug")
      debug_rule_matching
      ;;
      "test")
      if [ $# -lt 4 ]; then
          echo "Usage: $0 test <src_ip> <dst_ip> <dst_port> [protocol]"
          exit 1
      fi
      test_rule_matching "$2" "$3" "$4" "$5"
      ;;
      "conflicts")
      analyze_rule_conflicts
      ;;
      "performance")
      analyze_rule_performance
      ;;
      "help")
      echo "Usage: $0 <command>"
      echo "Commands:"
      echo "  debug       - Show rule matching debug info"
      echo "  test        - Test specific rule matching"
      echo "  conflicts   - Analyze rule conflicts"
      echo "  performance - Analyze rule performance"
      ;;
      *)
      echo "Unknown command: $1"
      exit 1
      ;;
      esac
      

11.1.3 NAT 问题诊断

1. NAT 故障排除

#!/bin/bash
# nat_troubleshooting.sh

# NAT 故障排除脚本
diagnose_nat() {
    echo "=== NAT Diagnosis ==="
    
    # 1. 检查 NAT 表规则
    echo "1. NAT table rules:"
    echo "   PREROUTING chain:"
    iptables -t nat -L PREROUTING -n -v --line-numbers | sed 's/^/     /'
    
    echo "   POSTROUTING chain:"
    iptables -t nat -L POSTROUTING -n -v --line-numbers | sed 's/^/     /'
    
    echo "   OUTPUT chain:"
    iptables -t nat -L OUTPUT -n -v --line-numbers | sed 's/^/     /'
    
    # 2. 检查 IP 转发
    echo -e "\n2. IP forwarding status:"
    ip_forward=$(cat /proc/sys/net/ipv4/ip_forward)
    if [ "$ip_forward" = "1" ]; then
        echo "   ✓ IP forwarding is enabled"
    else
        echo "   ✗ IP forwarding is disabled"
        echo "   To enable: echo 1 > /proc/sys/net/ipv4/ip_forward"
    fi
    
    # 3. 检查连接跟踪
    echo -e "\n3. Connection tracking for NAT:"
    if [ -f /proc/net/nf_conntrack ]; then
        nat_connections=$(grep "nat" /proc/net/nf_conntrack | wc -l)
        total_connections=$(cat /proc/net/nf_conntrack | wc -l)
        echo "   NAT connections: $nat_connections / $total_connections"
        
        # 显示一些 NAT 连接示例
        echo "   Sample NAT connections:"
        grep "nat" /proc/net/nf_conntrack | head -5 | sed 's/^/     /'
    else
        echo "   Connection tracking not available"
    fi
    
    # 4. 检查网络接口
    echo -e "\n4. Network interfaces:"
    ip addr show | grep -E "^[0-9]+:|inet " | sed 's/^/   /'
    
    # 5. 检查路由表
    echo -e "\n5. Routing table:"
    ip route | sed 's/^/   /'
}

# SNAT 问题诊断
diagnose_snat() {
    local internal_network="$1"
    local external_interface="$2"
    
    echo "=== SNAT Diagnosis ==="
    echo "Internal network: $internal_network"
    echo "External interface: $external_interface"
    echo
    
    # 1. 检查 SNAT 规则
    echo "1. SNAT rules in POSTROUTING:"
    snat_rules=$(iptables -t nat -L POSTROUTING -n -v | grep "$internal_network\|$external_interface")
    if [ -n "$snat_rules" ]; then
        echo "$snat_rules" | sed 's/^/   /'
    else
        echo "   No SNAT rules found for specified network/interface"
    fi
    
    # 2. 检查 MASQUERADE 规则
    echo -e "\n2. MASQUERADE rules:"
    masq_rules=$(iptables -t nat -L POSTROUTING -n -v | grep MASQUERADE)
    if [ -n "$masq_rules" ]; then
        echo "$masq_rules" | sed 's/^/   /'
    else
        echo "   No MASQUERADE rules found"
    fi
    
    # 3. 测试 SNAT 功能
    echo -e "\n3. Testing SNAT functionality:"
    
    # 检查外部接口 IP
    external_ip=$(ip addr show "$external_interface" 2>/dev/null | grep "inet " | awk '{print $2}' | cut -d'/' -f1)
    if [ -n "$external_ip" ]; then
        echo "   External interface IP: $external_ip"
        
        # 检查是否有使用该 IP 的 NAT 连接
        nat_with_ip=$(grep "$external_ip" /proc/net/nf_conntrack 2>/dev/null | wc -l)
        echo "   Active NAT connections using this IP: $nat_with_ip"
    else
        echo "   ✗ External interface has no IP address"
    fi
    
    # 4. 建议
    echo -e "\n4. Troubleshooting suggestions:"
    echo "   - Ensure IP forwarding is enabled"
    echo "   - Check POSTROUTING rules for source network"
    echo "   - Verify external interface has valid IP"
    echo "   - Test connectivity from internal hosts"
    echo "   - Check for conflicting rules"
}

# DNAT 问题诊断
diagnose_dnat() {
    local external_port="$1"
    local internal_ip="$2"
    local internal_port="$3"
    
    echo "=== DNAT Diagnosis ==="
    echo "External port: $external_port"
    echo "Internal target: $internal_ip:$internal_port"
    echo
    
    # 1. 检查 DNAT 规则
    echo "1. DNAT rules in PREROUTING:"
    dnat_rules=$(iptables -t nat -L PREROUTING -n -v | grep ":$external_port ")
    if [ -n "$dnat_rules" ]; then
        echo "$dnat_rules" | sed 's/^/   /'
    else
        echo "   No DNAT rules found for port $external_port"
    fi
    
    # 2. 检查 FORWARD 规则
    echo -e "\n2. FORWARD rules for internal target:"
    forward_rules=$(iptables -L FORWARD -n -v | grep "$internal_ip.*:$internal_port")
    if [ -n "$forward_rules" ]; then
        echo "$forward_rules" | sed 's/^/   /'
    else
        echo "   No specific FORWARD rules found"
        echo "   Checking general FORWARD policy:"
        forward_policy=$(iptables -L FORWARD | grep "policy" | awk '{print $4}')
        echo "   FORWARD policy: $forward_policy"
    fi
    
    # 3. 检查内部主机连通性
    echo -e "\n3. Testing internal host connectivity:"
    if ping -c 1 -W 2 "$internal_ip" > /dev/null 2>&1; then
        echo "   ✓ Internal host $internal_ip is reachable"
        
        # 测试端口
        if nc -z -w 2 "$internal_ip" "$internal_port" 2>/dev/null; then
            echo "   ✓ Port $internal_port is open on internal host"
        else
            echo "   ✗ Port $internal_port is closed on internal host"
        fi
    else
        echo "   ✗ Internal host $internal_ip is not reachable"
    fi
    
    # 4. 检查连接跟踪
    echo -e "\n4. Connection tracking for DNAT:"
    dnat_connections=$(grep "$internal_ip:$internal_port" /proc/net/nf_conntrack 2>/dev/null)
    if [ -n "$dnat_connections" ]; then
        echo "   Found DNAT connections:"
        echo "$dnat_connections" | sed 's/^/     /'
    else
        echo "   No active DNAT connections found"
    fi
    
    # 5. 建议
    echo -e "\n5. Troubleshooting suggestions:"
    echo "   - Verify DNAT rule in PREROUTING chain"
    echo "   - Check FORWARD rules allow traffic to internal host"
    echo "   - Ensure internal service is running and accessible"
    echo "   - Test DNAT from external source"
    echo "   - Check for firewall rules on internal host"
}

# 使用示例
case "${1:-help}" in
    "general")
        diagnose_nat
        ;;
    "snat")
        if [ $# -lt 3 ]; then
            echo "Usage: $0 snat <internal_network> <external_interface>"
            echo "Example: $0 snat 192.168.1.0/24 eth0"
            exit 1
        fi
        diagnose_snat "$2" "$3"
        ;;
    "dnat")
        if [ $# -lt 4 ]; then
            echo "Usage: $0 dnat <external_port> <internal_ip> <internal_port>"
            echo "Example: $0 dnat 80 192.168.1.100 8080"
            exit 1
        fi
        diagnose_dnat "$2" "$3" "$4"
        ;;
    "help")
        echo "Usage: $0 <command> [options]"
        echo "Commands:"
        echo "  general                                    - General NAT diagnosis"
        echo "  snat <internal_network> <external_if>     - SNAT diagnosis"
        echo "  dnat <ext_port> <int_ip> <int_port>       - DNAT diagnosis"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac

11.2 调试工具使用

11.2.1 数据包跟踪工具

1. tcpdump 调试

#!/bin/bash
# tcpdump_debug.sh

# tcpdump 调试脚本
tcpdump_basic_capture() {
    local interface="$1"
    local filter="$2"
    local output_file="${3:-/tmp/capture.pcap}"
    
    echo "Starting packet capture on $interface"
    echo "Filter: $filter"
    echo "Output: $output_file"
    echo "Press Ctrl+C to stop"
    echo
    
    tcpdump -i "$interface" -w "$output_file" "$filter"
}

# 实时数据包分析
tcpdump_realtime_analysis() {
    local interface="$1"
    local target_ip="$2"
    
    echo "Real-time packet analysis for $target_ip on $interface"
    echo "Press Ctrl+C to stop"
    echo
    
    tcpdump -i "$interface" -n -v host "$target_ip" | while read -r line; do
        timestamp=$(echo "$line" | awk '{print $1}')
        if echo "$line" | grep -q ">"; then
            src=$(echo "$line" | awk '{print $3}')
            dst=$(echo "$line" | awk '{print $5}')
            echo "[$timestamp] $src -> $dst"
            
            # 检查是否被 iptables 处理
            if echo "$line" | grep -q "Flags.*S"; then
                echo "  SYN packet detected"
            elif echo "$line" | grep -q "Flags.*R"; then
                echo "  RST packet detected (connection rejected?)"
            fi
        fi
    done
}

# 连接建立跟踪
trace_connection_establishment() {
    local src_ip="$1"
    local dst_ip="$2"
    local dst_port="$3"
    
    echo "Tracing connection establishment: $src_ip -> $dst_ip:$dst_port"
    echo "Monitoring TCP handshake..."
    echo
    
    tcpdump -i any -n "host $src_ip and host $dst_ip and port $dst_port" | 
    while read -r line; do
        if echo "$line" | grep -q "Flags.*S.*"; then
            if echo "$line" | grep -q "$src_ip.*$dst_ip"; then
                echo "1. SYN: $src_ip -> $dst_ip:$dst_port"
            elif echo "$line" | grep -q "$dst_ip.*$src_ip"; then
                echo "2. SYN-ACK: $dst_ip:$dst_port -> $src_ip"
            fi
        elif echo "$line" | grep -q "Flags.*A.*" && ! echo "$line" | grep -q "Flags.*S.*"; then
            if echo "$line" | grep -q "$src_ip.*$dst_ip"; then
                echo "3. ACK: $src_ip -> $dst_ip:$dst_port (Connection established)"
                break
            fi
        elif echo "$line" | grep -q "Flags.*R.*"; then
            echo "Connection reset detected!"
            break
        fi
    done
}

# 使用示例
case "${1:-help}" in
    "capture")
        if [ $# -lt 3 ]; then
            echo "Usage: $0 capture <interface> <filter> [output_file]"
            echo "Example: $0 capture eth0 'port 80' /tmp/web_traffic.pcap"
            exit 1
        fi
        tcpdump_basic_capture "$2" "$3" "$4"
        ;;
    "realtime")
        if [ $# -lt 3 ]; then
            echo "Usage: $0 realtime <interface> <target_ip>"
            exit 1
        fi
        tcpdump_realtime_analysis "$2" "$3"
        ;;
    "trace")
        if [ $# -lt 4 ]; then
            echo "Usage: $0 trace <src_ip> <dst_ip> <dst_port>"
            exit 1
        fi
        trace_connection_establishment "$2" "$3" "$4"
        ;;
    "help")
        echo "Usage: $0 <command> [options]"
        echo "Commands:"
        echo "  capture   - Basic packet capture"
        echo "  realtime  - Real-time packet analysis"
        echo "  trace     - Trace connection establishment"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac

2. iptables 日志分析

#!/bin/bash
# iptables_log_analysis.sh

# iptables 日志分析脚本
analyze_iptables_logs() {
    local log_file="${1:-/var/log/messages}"
    local time_range="${2:-1h}"
    
    echo "=== IPTables Log Analysis ==="
    echo "Log file: $log_file"
    echo "Time range: last $time_range"
    echo
    
    # 1. 基本统计
    echo "1. Basic statistics:"
    
    # 获取指定时间范围内的日志
    case "$time_range" in
        *h) hours=${time_range%h}; since_time=$(date -d "$hours hours ago" '+%b %d %H:%M') ;;
        *m) minutes=${time_range%m}; since_time=$(date -d "$minutes minutes ago" '+%b %d %H:%M') ;;
        *d) days=${time_range%d}; since_time=$(date -d "$days days ago" '+%b %d') ;;
        *) since_time=$(date -d "1 hour ago" '+%b %d %H:%M') ;;
    esac
    
    # 提取 iptables 相关日志
    iptables_logs=$(awk -v since="$since_time" '$0 >= since && /kernel:.*IN=|OUT=/' "$log_file")
    
    if [ -z "$iptables_logs" ]; then
        echo "   No iptables logs found in the specified time range"
        return
    fi
    
    total_entries=$(echo "$iptables_logs" | wc -l)
    echo "   Total log entries: $total_entries"
    
    # 2. 按前缀分组统计
    echo -e "\n2. Log entries by prefix:"
    echo "$iptables_logs" | grep -o 'kernel:.*:' | sort | uniq -c | sort -nr | head -10 | 
    while read -r count prefix; do
        echo "   $count entries: $prefix"
    done
    
    # 3. 最活跃的源 IP
    echo -e "\n3. Top source IPs:"
    echo "$iptables_logs" | grep -o 'SRC=[0-9.]*' | cut -d'=' -f2 | sort | uniq -c | sort -nr | head -10 | 
    while read -r count ip; do
        echo "   $count packets from $ip"
    done
    
    # 4. 最常见的目标端口
    echo -e "\n4. Top destination ports:"
    echo "$iptables_logs" | grep -o 'DPT=[0-9]*' | cut -d'=' -f2 | sort | uniq -c | sort -nr | head -10 | 
    while read -r count port; do
        echo "   $count packets to port $port"
    done
    
    # 5. 协议分布
    echo -e "\n5. Protocol distribution:"
    echo "$iptables_logs" | grep -o 'PROTO=[A-Z]*' | cut -d'=' -f2 | sort | uniq -c | sort -nr | 
    while read -r count proto; do
        echo "   $count packets: $proto"
    done
    
    # 6. 网络接口统计
    echo -e "\n6. Interface statistics:"
    echo "   Incoming interfaces:"
    echo "$iptables_logs" | grep -o 'IN=[a-zA-Z0-9]*' | cut -d'=' -f2 | grep -v '^$' | sort | uniq -c | sort -nr | 
    while read -r count iface; do
        echo "     $count packets on $iface"
    done
    
    echo "   Outgoing interfaces:"
    echo "$iptables_logs" | grep -o 'OUT=[a-zA-Z0-9]*' | cut -d'=' -f2 | grep -v '^$' | sort | uniq -c | sort -nr | 
    while read -r count iface; do
        echo "     $count packets on $iface"
    done
}

# 实时日志监控
monitor_iptables_logs() {
    local log_file="${1:-/var/log/messages}"
    local filter="${2:-.*}"
    
    echo "Monitoring iptables logs in real-time"
    echo "Log file: $log_file"
    echo "Filter: $filter"
    echo "Press Ctrl+C to stop"
    echo
    
    tail -f "$log_file" | grep --line-buffered "kernel:.*IN=\|OUT=" | grep --line-buffered "$filter" | 
    while read -r line; do
        timestamp=$(echo "$line" | awk '{print $1, $2, $3}')
        
        # 提取关键信息
        src_ip=$(echo "$line" | grep -o 'SRC=[0-9.]*' | cut -d'=' -f2)
        dst_ip=$(echo "$line" | grep -o 'DST=[0-9.]*' | cut -d'=' -f2)
        src_port=$(echo "$line" | grep -o 'SPT=[0-9]*' | cut -d'=' -f2)
        dst_port=$(echo "$line" | grep -o 'DPT=[0-9]*' | cut -d'=' -f2)
        protocol=$(echo "$line" | grep -o 'PROTO=[A-Z]*' | cut -d'=' -f2)
        in_iface=$(echo "$line" | grep -o 'IN=[a-zA-Z0-9]*' | cut -d'=' -f2)
        out_iface=$(echo "$line" | grep -o 'OUT=[a-zA-Z0-9]*' | cut -d'=' -f2)
        
        # 格式化输出
        printf "[%s] %s:%s -> %s:%s (%s) [%s->%s]\n" \
            "$timestamp" "$src_ip" "$src_port" "$dst_ip" "$dst_port" "$protocol" "$in_iface" "$out_iface"
    done
}

# 安全事件检测
detect_security_events() {
    local log_file="${1:-/var/log/messages}"
    local time_range="${2:-1h}"
    
    echo "=== Security Event Detection ==="
    echo
    
    # 获取时间范围
    case "$time_range" in
        *h) hours=${time_range%h}; since_time=$(date -d "$hours hours ago" '+%b %d %H:%M') ;;
        *m) minutes=${time_range%m}; since_time=$(date -d "$minutes minutes ago" '+%b %d %H:%M') ;;
        *d) days=${time_range%d}; since_time=$(date -d "$days days ago" '+%b %d') ;;
        *) since_time=$(date -d "1 hour ago" '+%b %d %H:%M') ;;
    esac
    
    iptables_logs=$(awk -v since="$since_time" '$0 >= since && /kernel:.*IN=|OUT=/' "$log_file")
    
    # 1. 端口扫描检测
    echo "1. Port scan detection:"
    echo "$iptables_logs" | grep -o 'SRC=[0-9.]*' | cut -d'=' -f2 | sort | uniq -c | 
    awk '$1 > 50 {print "   Potential port scan from " $2 " (" $1 " attempts)"}'
    
    # 2. 暴力破解检测
    echo -e "\n2. Brute force detection:"
    for port in 22 21 23 3389; do
        attacks=$(echo "$iptables_logs" | grep "DPT=$port" | grep -o 'SRC=[0-9.]*' | cut -d'=' -f2 | sort | uniq -c | awk '$1 > 10')
        if [ -n "$attacks" ]; then
            echo "   Port $port attacks:"
            echo "$attacks" | while read -r count ip; do
                echo "     $ip: $count attempts"
            done
        fi
    done
    
    # 3. DDoS 检测
    echo -e "\n3. DDoS detection:"
    high_volume_ips=$(echo "$iptables_logs" | grep -o 'SRC=[0-9.]*' | cut -d'=' -f2 | sort | uniq -c | awk '$1 > 100')
    if [ -n "$high_volume_ips" ]; then
        echo "   High volume sources:"
        echo "$high_volume_ips" | while read -r count ip; do
            echo "     $ip: $count packets"
        done
    else
        echo "   No high volume sources detected"
    fi
    
    # 4. 异常协议检测
    echo -e "\n4. Unusual protocol detection:"
    unusual_protocols=$(echo "$iptables_logs" | grep -o 'PROTO=[A-Z]*' | cut -d'=' -f2 | sort | uniq -c | awk '$2 !~ /^(TCP|UDP|ICMP)$/ && $1 > 5')
    if [ -n "$unusual_protocols" ]; then
        echo "   Unusual protocols:"
        echo "$unusual_protocols" | while read -r count proto; do
            echo "     $proto: $count packets"
        done
    else
        echo "   No unusual protocols detected"
    fi
}

# 使用示例
case "${1:-help}" in
    "analyze")
        analyze_iptables_logs "$2" "$3"
        ;;
    "monitor")
        monitor_iptables_logs "$2" "$3"
        ;;
    "security")
        detect_security_events "$2" "$3"
        ;;
    "help")
        echo "Usage: $0 <command> [options]"
        echo "Commands:"
        echo "  analyze [log_file] [time_range]  - Analyze iptables logs"
        echo "  monitor [log_file] [filter]      - Monitor logs in real-time"
        echo "  security [log_file] [time_range] - Detect security events"
        echo
        echo "Examples:"
        echo "  $0 analyze /var/log/messages 2h"
        echo "  $0 monitor /var/log/messages 'port 22'"
        echo "  $0 security /var/log/messages 1d"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac

11.2.2 连接跟踪调试

1. conntrack 工具使用

#!/bin/bash
# conntrack_debug.sh

# conntrack 调试脚本
conntrack_status() {
    echo "=== Connection Tracking Status ==="
    
    # 1. 连接跟踪统计
    echo "1. Connection tracking statistics:"
    if [ -f /proc/net/nf_conntrack ]; then
        current_connections=$(cat /proc/net/nf_conntrack | wc -l)
        max_connections=$(cat /proc/sys/net/netfilter/nf_conntrack_max)
        usage_percent=$((current_connections * 100 / max_connections))
        
        echo "   Current connections: $current_connections"
        echo "   Maximum connections: $max_connections"
        echo "   Usage: $usage_percent%"
        
        if [ $usage_percent -gt 80 ]; then
            echo "   ⚠️  WARNING: Connection table usage is high!"
        fi
    else
        echo "   Connection tracking not available"
        return 1
    fi
    
    # 2. 按协议分组统计
    echo -e "\n2. Connections by protocol:"
    awk '{print $1}' /proc/net/nf_conntrack | sort | uniq -c | sort -nr | 
    while read -r count proto; do
        echo "   $proto: $count connections"
    done
    
    # 3. 按状态分组统计
    echo -e "\n3. Connections by state:"
    grep -o 'state=[A-Z_]*' /proc/net/nf_conntrack | cut -d'=' -f2 | sort | uniq -c | sort -nr | 
    while read -r count state; do
        echo "   $state: $count connections"
    done
    
    # 4. 超时设置
    echo -e "\n4. Timeout settings:"
    if [ -d /proc/sys/net/netfilter ]; then
        echo "   TCP established: $(cat /proc/sys/net/netfilter/nf_conntrack_tcp_timeout_established 2>/dev/null || echo 'N/A') seconds"
        echo "   TCP close wait: $(cat /proc/sys/net/netfilter/nf_conntrack_tcp_timeout_close_wait 2>/dev/null || echo 'N/A') seconds"
        echo "   UDP timeout: $(cat /proc/sys/net/netfilter/nf_conntrack_udp_timeout 2>/dev/null || echo 'N/A') seconds"
        echo "   ICMP timeout: $(cat /proc/sys/net/netfilter/nf_conntrack_icmp_timeout 2>/dev/null || echo 'N/A') seconds"
    fi
}

# 监控特定连接
monitor_connection() {
    local src_ip="$1"
    local dst_ip="$2"
    local port="$3"
    
    echo "=== Monitoring Connection: $src_ip -> $dst_ip:$port ==="
    echo "Press Ctrl+C to stop"
    echo
    
    while true; do
        # 查找匹配的连接
        connections=$(grep "$src_ip.*$dst_ip.*:$port \|$dst_ip.*$src_ip.*:$port " /proc/net/nf_conntrack 2>/dev/null)
        
        if [ -n "$connections" ]; then
            echo "[$(date '+%H:%M:%S')] Active connections found:"
            echo "$connections" | while read -r conn; do
                # 提取状态信息
                state=$(echo "$conn" | grep -o 'state=[A-Z_]*' | cut -d'=' -f2)
                timeout=$(echo "$conn" | awk '{print $2}')
                echo "   State: $state, Timeout: $timeout seconds"
                echo "   Full entry: $conn"
            done
        else
            echo "[$(date '+%H:%M:%S')] No active connections found"
        fi
        
        echo
        sleep 2
    done
}

# 清理连接跟踪表
cleanup_conntrack() {
    echo "=== Connection Tracking Cleanup ==="
    
    # 1. 显示清理前状态
    before_count=$(cat /proc/net/nf_conntrack | wc -l)
    echo "Connections before cleanup: $before_count"
    
    # 2. 清理已关闭的连接
    echo "Cleaning up closed connections..."
    if command -v conntrack > /dev/null; then
        # 清理 TIME_WAIT 状态的连接
        closed_count=$(conntrack -L | grep TIME_WAIT | wc -l)
        if [ $closed_count -gt 0 ]; then
            echo "Found $closed_count TIME_WAIT connections"
            conntrack -D -p tcp --state TIME_WAIT 2>/dev/null || echo "Failed to clean TIME_WAIT connections"
        fi
        
        # 清理 CLOSE_WAIT 状态的连接
        close_wait_count=$(conntrack -L | grep CLOSE_WAIT | wc -l)
        if [ $close_wait_count -gt 0 ]; then
            echo "Found $close_wait_count CLOSE_WAIT connections"
            conntrack -D -p tcp --state CLOSE_WAIT 2>/dev/null || echo "Failed to clean CLOSE_WAIT connections"
        fi
    else
        echo "conntrack tool not available, using alternative method"
        # 降低超时值来加速清理
        echo 60 > /proc/sys/net/netfilter/nf_conntrack_tcp_timeout_time_wait 2>/dev/null
        echo 60 > /proc/sys/net/netfilter/nf_conntrack_tcp_timeout_close_wait 2>/dev/null
    fi
    
    # 3. 显示清理后状态
    sleep 2
    after_count=$(cat /proc/net/nf_conntrack | wc -l)
    cleaned_count=$((before_count - after_count))
    echo "Connections after cleanup: $after_count"
    echo "Cleaned up: $cleaned_count connections"
}

# 连接跟踪性能分析
analyze_conntrack_performance() {
    echo "=== Connection Tracking Performance Analysis ==="
    
    # 1. 内存使用情况
    echo "1. Memory usage:"
    if [ -f /proc/slabinfo ]; then
        conntrack_mem=$(grep nf_conntrack /proc/slabinfo 2>/dev/null)
        if [ -n "$conntrack_mem" ]; then
            echo "   $conntrack_mem"
        else
            echo "   Connection tracking memory info not available"
        fi
    fi
    
    # 2. 哈希表统计
    echo -e "\n2. Hash table statistics:"
    if [ -f /proc/sys/net/netfilter/nf_conntrack_buckets ]; then
        buckets=$(cat /proc/sys/net/netfilter/nf_conntrack_buckets)
        current_connections=$(cat /proc/net/nf_conntrack | wc -l)
        avg_per_bucket=$((current_connections / buckets))
        
        echo "   Hash buckets: $buckets"
        echo "   Current connections: $current_connections"
        echo "   Average per bucket: $avg_per_bucket"
        
        if [ $avg_per_bucket -gt 5 ]; then
            echo "   ⚠️  WARNING: High collision rate, consider increasing buckets"
        fi
    fi
    
    # 3. 连接建立速率
    echo -e "\n3. Connection establishment rate:"
    initial_count=$(cat /proc/net/nf_conntrack | wc -l)
    sleep 5
    final_count=$(cat /proc/net/nf_conntrack | wc -l)
    rate=$(((final_count - initial_count) / 5))
    
    echo "   New connections per second: $rate"
    
    if [ $rate -gt 100 ]; then
        echo "   ⚠️  WARNING: High connection rate detected"
    fi
    
    # 4. 性能建议
    echo -e "\n4. Performance recommendations:"
    max_connections=$(cat /proc/sys/net/netfilter/nf_conntrack_max)
    current_connections=$(cat /proc/net/nf_conntrack | wc -l)
    usage_percent=$((current_connections * 100 / max_connections))
    
    if [ $usage_percent -gt 80 ]; then
        echo "   - Increase nf_conntrack_max value"
        echo "   - Consider reducing timeout values"
        echo "   - Implement connection limiting"
    fi
    
    if [ $avg_per_bucket -gt 5 ]; then
        echo "   - Increase nf_conntrack_buckets value"
    fi
    
    if [ $rate -gt 100 ]; then
        echo "   - Monitor for DDoS attacks"
        echo "   - Implement rate limiting"
    fi
}

# 使用示例
case "${1:-help}" in
    "status")
        conntrack_status
        ;;
    "monitor")
        if [ $# -lt 4 ]; then
            echo "Usage: $0 monitor <src_ip> <dst_ip> <port>"
            exit 1
        fi
        monitor_connection "$2" "$3" "$4"
        ;;
    "cleanup")
        cleanup_conntrack
        ;;
    "performance")
        analyze_conntrack_performance
        ;;
    "help")
        echo "Usage: $0 <command> [options]"
        echo "Commands:"
        echo "  status                           - Show connection tracking status"
        echo "  monitor <src_ip> <dst_ip> <port> - Monitor specific connection"
        echo "  cleanup                          - Clean up connection tracking table"
        echo "  performance                      - Analyze performance"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac

11.3 性能问题排查

11.3.1 规则性能分析

1. 规则匹配效率测试

#!/bin/bash
# rule_performance_test.sh

# 规则性能测试脚本
test_rule_performance() {
    echo "=== Rule Performance Test ==="
    
    # 1. 基准测试 - 无规则情况
    echo "1. Baseline test (no rules):"
    
    # 保存当前规则
    iptables-save > /tmp/current_rules.txt
    
    # 清空规则进行基准测试
    iptables -F
    iptables -P INPUT ACCEPT
    iptables -P OUTPUT ACCEPT
    iptables -P FORWARD ACCEPT
    
    # 测试网络性能
    baseline_result=$(test_network_performance)
    echo "   Baseline performance: $baseline_result"
    
    # 2. 恢复规则并测试
    echo "2. Performance with current rules:"
    iptables-restore < /tmp/current_rules.txt
    
    current_result=$(test_network_performance)
    echo "   Current performance: $current_result"
    
    # 3. 计算性能影响
    if [ -n "$baseline_result" ] && [ -n "$current_result" ]; then
        impact=$(echo "scale=2; ($baseline_result - $current_result) / $baseline_result * 100" | bc 2>/dev/null || echo "N/A")
        echo "   Performance impact: ${impact}%"
    fi
    
    rm -f /tmp/current_rules.txt
}

# 网络性能测试函数
test_network_performance() {
    # 使用 ping 测试延迟
    local test_host="8.8.8.8"
    local ping_result
    
    ping_result=$(ping -c 10 -q "$test_host" 2>/dev/null | grep "avg" | awk -F'/' '{print $5}')
    echo "$ping_result"
}

# 规则复杂度分析
analyze_rule_complexity() {
    echo "=== Rule Complexity Analysis ==="
    
    # 1. 规则数量统计
    echo "1. Rule count by chain:"
    for table in filter nat mangle; do
        echo "   Table: $table"
        case "$table" in
            "filter")
                chains="INPUT OUTPUT FORWARD"
                ;;
            "nat")
                chains="PREROUTING POSTROUTING OUTPUT"
                ;;
            "mangle")
                chains="PREROUTING INPUT FORWARD OUTPUT POSTROUTING"
                ;;
        esac
        
        for chain in $chains; do
            count=$(iptables -t "$table" -L "$chain" --line-numbers 2>/dev/null | tail -n +3 | wc -l)
            echo "     $chain: $count rules"
        done
    done
    
    # 2. 复杂规则识别
    echo -e "\n2. Complex rules analysis:"
    
    # 多条件规则
    complex_rules=$(iptables-save | grep -E "(\-m.*){3,}" | wc -l)
    echo "   Rules with 3+ match conditions: $complex_rules"
    
    # 字符串匹配规则
    string_rules=$(iptables-save | grep "\-m string" | wc -l)
    echo "   String matching rules: $string_rules"
    
    # 正则表达式规则
    regex_rules=$(iptables-save | grep "\-m regexp" | wc -l)
    echo "   Regular expression rules: $regex_rules"
    
    # 状态跟踪规则
    state_rules=$(iptables-save | grep "\-m.*state\|\-m.*conntrack" | wc -l)
    echo "   Stateful rules: $state_rules"
    
    # 3. 性能建议
    echo -e "\n3. Performance recommendations:"
    
    total_rules=$(iptables-save | grep "^\-A" | wc -l)
    if [ $total_rules -gt 100 ]; then
        echo "   - Consider using ipset for large IP lists"
        echo "   - Group similar rules together"
        echo "   - Review rule necessity"
    fi
    
    if [ $string_rules -gt 5 ]; then
        echo "   - Minimize string matching rules"
        echo "   - Consider application-level filtering"
    fi
    
    if [ $complex_rules -gt 20 ]; then
        echo "   - Simplify complex rules where possible"
        echo "   - Split complex rules into multiple simpler ones"
    fi
}

# 规则匹配统计
rule_matching_stats() {
    echo "=== Rule Matching Statistics ==="
    
    # 1. 重置计数器
    echo "1. Resetting rule counters..."
    iptables -Z
    
    echo "   Counters reset. Waiting 60 seconds for traffic..."
    sleep 60
    
    # 2. 显示匹配统计
    echo "2. Rule matching statistics (last 60 seconds):"
    
    # INPUT 链统计
    echo "   INPUT chain:"
    iptables -L INPUT -n -v --line-numbers | tail -n +3 | 
    while read -r line; do
        packets=$(echo "$line" | awk '{print $2}')
        rule_num=$(echo "$line" | awk '{print $1}')
        target=$(echo "$line" | awk '{print $3}')
        
        if [ "$packets" != "0" ]; then
            echo "     Rule $rule_num: $packets packets -> $target"
        fi
    done
    
    # OUTPUT 链统计
    echo "   OUTPUT chain:"
    iptables -L OUTPUT -n -v --line-numbers | tail -n +3 | 
    while read -r line; do
        packets=$(echo "$line" | awk '{print $2}')
        rule_num=$(echo "$line" | awk '{print $1}')
        target=$(echo "$line" | awk '{print $3}')
        
        if [ "$packets" != "0" ]; then
            echo "     Rule $rule_num: $packets packets -> $target"
        fi
    done
    
    # 3. 未匹配规则识别
    echo -e "\n3. Unused rules (zero packet count):"
    unused_count=0
    
    for chain in INPUT OUTPUT FORWARD; do
        unused_in_chain=$(iptables -L "$chain" -n -v --line-numbers | tail -n +3 | awk '$2 == "0" {print $1}' | wc -l)
        if [ $unused_in_chain -gt 0 ]; then
            echo "   $chain chain: $unused_in_chain unused rules"
            unused_count=$((unused_count + unused_in_chain))
        fi
    done
    
    if [ $unused_count -eq 0 ]; then
        echo "   All rules have been matched"
    else
        echo "   Total unused rules: $unused_count"
        echo "   Consider reviewing and removing unused rules"
    fi
}

# 热点规则识别
identify_hotspot_rules() {
    echo "=== Hotspot Rules Identification ==="
    
    # 重置计数器
    iptables -Z
    
    echo "Collecting data for 5 minutes..."
    sleep 300
    
    echo "Top 10 most active rules:"
    
    # 收集所有链的统计信息
    {
        iptables -L INPUT -n -v --line-numbers | tail -n +3 | sed 's/^/INPUT /';
        iptables -L OUTPUT -n -v --line-numbers | tail -n +3 | sed 's/^/OUTPUT /';
        iptables -L FORWARD -n -v --line-numbers | tail -n +3 | sed 's/^/FORWARD /';
    } | sort -k3 -nr | head -10 | 
    while read -r chain line_num packets bytes target rest; do
        echo "   $chain[$line_num]: $packets packets, $bytes bytes -> $target"
    done
}

# 使用示例
case "${1:-help}" in
    "test")
        test_rule_performance
        ;;
    "complexity")
        analyze_rule_complexity
        ;;
    "stats")
        rule_matching_stats
        ;;
    "hotspot")
        identify_hotspot_rules
        ;;
    "help")
        echo "Usage: $0 <command>"
        echo "Commands:"
        echo "  test       - Test rule performance impact"
        echo "  complexity - Analyze rule complexity"
        echo "  stats      - Show rule matching statistics"
        echo "  hotspot    - Identify most active rules"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac

11.3.2 系统资源监控

1. 系统性能监控

#!/bin/bash
# system_performance_monitor.sh

# 系统性能监控脚本
monitor_system_performance() {
    local duration="${1:-60}"
    local interval="${2:-5}"
    
    echo "=== System Performance Monitor ==="
    echo "Duration: ${duration}s, Interval: ${interval}s"
    echo "Time,CPU%,Memory%,ConnTrack,Rules,Network_RX,Network_TX"
    
    local end_time=$(($(date +%s) + duration))
    
    while [ $(date +%s) -lt $end_time ]; do
        timestamp=$(date '+%H:%M:%S')
        
        # CPU 使用率
        cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
        
        # 内存使用率
        memory_info=$(free | grep Mem)
        total_mem=$(echo "$memory_info" | awk '{print $2}')
        used_mem=$(echo "$memory_info" | awk '{print $3}')
        memory_usage=$(echo "scale=1; $used_mem * 100 / $total_mem" | bc 2>/dev/null || echo "0")
        
        # 连接跟踪使用情况
        if [ -f /proc/net/nf_conntrack ]; then
            current_conn=$(cat /proc/net/nf_conntrack | wc -l)
            max_conn=$(cat /proc/sys/net/netfilter/nf_conntrack_max)
            conntrack_usage=$(echo "scale=1; $current_conn * 100 / $max_conn" | bc 2>/dev/null || echo "0")
        else
            conntrack_usage="N/A"
        fi
        
        # iptables 规则数量
        rule_count=$(iptables-save | grep "^-A" | wc -l)
        
        # 网络流量
        network_stats=$(cat /proc/net/dev | grep eth0 | awk '{print $2,$10}' 2>/dev/null || echo "0 0")
        rx_bytes=$(echo "$network_stats" | awk '{print $1}')
        tx_bytes=$(echo "$network_stats" | awk '{print $2}')
        
        echo "$timestamp,$cpu_usage,$memory_usage,$conntrack_usage,$rule_count,$rx_bytes,$tx_bytes"
        
        sleep "$interval"
    done
}

# 资源使用分析
analyze_resource_usage() {
    echo "=== Resource Usage Analysis ==="
    
    # 1. CPU 分析
    echo "1. CPU Analysis:"
    
    # 检查 ksoftirqd 进程(网络中断处理)
    ksoftirqd_cpu=$(ps aux | grep ksoftirqd | grep -v grep | awk '{sum+=$3} END {print sum+0}')
    echo "   ksoftirqd CPU usage: ${ksoftirqd_cpu}%"
    
    if [ "$(echo "$ksoftirqd_cpu > 10" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ⚠️  High softirq CPU usage detected"
        echo "   This may indicate high network interrupt load"
    fi
    
    # 检查系统负载
    load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ',')
    cpu_cores=$(nproc)
    load_per_core=$(echo "scale=2; $load_avg / $cpu_cores" | bc 2>/dev/null || echo "0")
    
    echo "   Load average: $load_avg (${load_per_core} per core)"
    
    if [ "$(echo "$load_per_core > 1" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ⚠️  High system load detected"
    fi
    
    # 2. 内存分析
    echo -e "\n2. Memory Analysis:"
    
    # 总体内存使用
    memory_info=$(free -h)
    echo "$memory_info" | sed 's/^/   /'
    
    # iptables 相关内存使用
    if [ -f /proc/slabinfo ]; then
        echo "   iptables memory usage:"
        grep -E "ip_tables|xt_|nf_" /proc/slabinfo 2>/dev/null | 
        while read -r name active_objs num_objs objsize objperslab pagesperslab rest; do
            if [ "$active_objs" != "0" ]; then
                memory_kb=$((active_objs * objsize / 1024))
                echo "     $name: $active_objs objects, ${memory_kb}KB"
            fi
        done
    fi
    
    # 3. 网络接口分析
    echo -e "\n3. Network Interface Analysis:"
    
    for interface in $(ls /sys/class/net/ | grep -v lo); do
        if [ -f "/sys/class/net/$interface/statistics/rx_packets" ]; then
            rx_packets=$(cat "/sys/class/net/$interface/statistics/rx_packets")
            tx_packets=$(cat "/sys/class/net/$interface/statistics/tx_packets")
            rx_errors=$(cat "/sys/class/net/$interface/statistics/rx_errors")
            tx_errors=$(cat "/sys/class/net/$interface/statistics/tx_errors")
            rx_dropped=$(cat "/sys/class/net/$interface/statistics/rx_dropped")
            tx_dropped=$(cat "/sys/class/net/$interface/statistics/tx_dropped")
            
            echo "   $interface:"
            echo "     RX: $rx_packets packets, $rx_errors errors, $rx_dropped dropped"
            echo "     TX: $tx_packets packets, $tx_errors errors, $tx_dropped dropped"
            
            # 计算错误率
            if [ "$rx_packets" -gt 0 ]; then
                rx_error_rate=$(echo "scale=4; $rx_errors * 100 / $rx_packets" | bc 2>/dev/null || echo "0")
                if [ "$(echo "$rx_error_rate > 0.1" | bc 2>/dev/null || echo 0)" = "1" ]; then
                    echo "     ⚠️  High RX error rate: ${rx_error_rate}%"
                fi
            fi
            
            if [ "$tx_packets" -gt 0 ]; then
                tx_error_rate=$(echo "scale=4; $tx_errors * 100 / $tx_packets" | bc 2>/dev/null || echo "0")
                if [ "$(echo "$tx_error_rate > 0.1" | bc 2>/dev/null || echo 0)" = "1" ]; then
                    echo "     ⚠️  High TX error rate: ${tx_error_rate}%"
                fi
            fi
        fi
    done
}

# 性能瓶颈检测
detect_performance_bottlenecks() {
    echo "=== Performance Bottleneck Detection ==="
    
    local issues_found=0
    
    # 1. 连接跟踪瓶颈
    echo "1. Connection Tracking Bottlenecks:"
    
    if [ -f /proc/net/nf_conntrack ]; then
        current_conn=$(cat /proc/net/nf_conntrack | wc -l)
        max_conn=$(cat /proc/sys/net/netfilter/nf_conntrack_max)
        usage_percent=$((current_conn * 100 / max_conn))
        
        echo "   Connection table usage: $usage_percent%"
        
        if [ $usage_percent -gt 80 ]; then
            echo "   ❌ Connection table near capacity"
            echo "   Recommendation: Increase nf_conntrack_max or reduce timeouts"
            issues_found=$((issues_found + 1))
        elif [ $usage_percent -gt 60 ]; then
            echo "   ⚠️  Connection table usage is high"
            issues_found=$((issues_found + 1))
        else
            echo "   ✓ Connection table usage is normal"
        fi
        
        # 检查连接建立速率
        initial_count=$(cat /proc/net/nf_conntrack | wc -l)
        sleep 5
        final_count=$(cat /proc/net/nf_conntrack | wc -l)
        conn_rate=$(((final_count - initial_count) / 5))
        
        echo "   Connection establishment rate: $conn_rate/sec"
        
        if [ $conn_rate -gt 1000 ]; then
            echo "   ❌ Very high connection rate detected"
            echo "   Recommendation: Implement rate limiting or check for attacks"
            issues_found=$((issues_found + 1))
        elif [ $conn_rate -gt 500 ]; then
            echo "   ⚠️  High connection rate detected"
            issues_found=$((issues_found + 1))
        fi
    fi
    
    # 2. 规则处理瓶颈
    echo -e "\n2. Rule Processing Bottlenecks:"
    
    total_rules=$(iptables-save | grep "^-A" | wc -l)
    echo "   Total rules: $total_rules"
    
    if [ $total_rules -gt 1000 ]; then
        echo "   ❌ Very high rule count"
        echo "   Recommendation: Optimize rules, use ipset, or implement rule grouping"
        issues_found=$((issues_found + 1))
    elif [ $total_rules -gt 500 ]; then
        echo "   ⚠️  High rule count"
        issues_found=$((issues_found + 1))
    else
        echo "   ✓ Rule count is reasonable"
    fi
    
    # 检查复杂规则
    complex_rules=$(iptables-save | grep -E "(\-m.*){3,}" | wc -l)
    echo "   Complex rules (3+ conditions): $complex_rules"
    
    if [ $complex_rules -gt 50 ]; then
        echo "   ❌ Too many complex rules"
        echo "   Recommendation: Simplify rules or split into multiple simpler rules"
        issues_found=$((issues_found + 1))
    elif [ $complex_rules -gt 20 ]; then
        echo "   ⚠️  Many complex rules detected"
        issues_found=$((issues_found + 1))
    fi
    
    # 3. 系统资源瓶颈
    echo -e "\n3. System Resource Bottlenecks:"
    
    # CPU 检查
    load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ',')
    cpu_cores=$(nproc)
    load_per_core=$(echo "scale=2; $load_avg / $cpu_cores" | bc 2>/dev/null || echo "0")
    
    echo "   CPU load per core: $load_per_core"
    
    if [ "$(echo "$load_per_core > 2" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ❌ Very high CPU load"
        issues_found=$((issues_found + 1))
    elif [ "$(echo "$load_per_core > 1" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ⚠️  High CPU load"
        issues_found=$((issues_found + 1))
    else
        echo "   ✓ CPU load is normal"
    fi
    
    # 内存检查
    memory_usage=$(free | grep Mem | awk '{printf "%.1f", $3*100/$2}')
    echo "   Memory usage: ${memory_usage}%"
    
    if [ "$(echo "$memory_usage > 90" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ❌ Very high memory usage"
        issues_found=$((issues_found + 1))
    elif [ "$(echo "$memory_usage > 80" | bc 2>/dev/null || echo 0)" = "1" ]; then
        echo "   ⚠️  High memory usage"
        issues_found=$((issues_found + 1))
    else
        echo "   ✓ Memory usage is normal"
    fi
    
    # 4. 总结
    echo -e "\n4. Summary:"
    if [ $issues_found -eq 0 ]; then
        echo "   ✓ No performance bottlenecks detected"
    else
        echo "   Found $issues_found potential performance issues"
        echo "   Review the recommendations above for optimization"
    fi
}

# 使用示例
case "${1:-help}" in
    "monitor")
        monitor_system_performance "$2" "$3"
        ;;
    "analyze")
        analyze_resource_usage
        ;;
    "bottleneck")
        detect_performance_bottlenecks
        ;;
    "help")
        echo "Usage: $0 <command> [options]"
        echo "Commands:"
        echo "  monitor [duration] [interval]  - Monitor system performance"
        echo "  analyze                         - Analyze resource usage"
        echo "  bottleneck                      - Detect performance bottlenecks"
        echo
        echo "Examples:"
        echo "  $0 monitor 300 10              - Monitor for 5 minutes, 10s interval"
        echo "  $0 analyze                      - Analyze current resource usage"
        echo "  $0 bottleneck                   - Check for performance issues"
        ;;
    *)
        echo "Unknown command: $1"
        exit 1
        ;;
esac