1. 配置文件详解
1.1 配置文件优先级
# 配置文件加载顺序(优先级从高到低)
1. 命令行参数
2. 当前目录的 goaccess.conf
3. 用户主目录的 ~/.goaccessrc
4. 系统配置文件 /etc/goaccess/goaccess.conf
1.2 完整配置文件示例
# /etc/goaccess/goaccess.conf
# GoAccess完整配置文件
######################################
# 时间和日期格式
######################################
# 时间格式
time-format %H:%M:%S
# 可选格式:
# time-format %h:%i:%s # 12小时制
# time-format %T # 24小时制简写
# 日期格式
date-format %d/%b/%Y
# 可选格式:
# date-format %Y-%m-%d # ISO格式
# date-format %m/%d/%Y # 美式格式
######################################
# 日志格式
######################################
# Apache/Nginx Combined格式
log-format %h %^[%d:%t %^] "%r" %s %b "%R" "%u"
# 其他常用格式:
# log-format COMBINED # 预定义Combined格式
# log-format COMMON # 预定义Common格式
# log-format CLOUDFRONT # CloudFront格式
# log-format CLOUDSTORAGE # Google Cloud Storage格式
######################################
# 输出设置
######################################
# HTML输出文件
output /var/www/html/goaccess.html
# 实时HTML输出
real-time-html true
# WebSocket设置
port 7890
addr 0.0.0.0
ws-url ws://localhost:7890
# 页面标题
html-report-title GoAccess - Web Log Analyzer
# 自定义CSS
# html-custom-css /path/to/custom.css
# 自定义JavaScript
# html-custom-js /path/to/custom.js
######################################
# 数据处理设置
######################################
# 忽略的面板
# ignore-panel VISITORS
# ignore-panel REQUESTS
# ignore-panel REQUESTS_STATIC
# ignore-panel NOT_FOUND
# ignore-panel HOSTS
# ignore-panel OS
# ignore-panel BROWSERS
# ignore-panel VISIT_TIMES
# ignore-panel VIRTUAL_HOSTS
# ignore-panel REFERRERS
# ignore-panel REFERRING_SITES
# ignore-panel KEYPHRASES
# ignore-panel STATUS_CODES
# ignore-panel REMOTE_USER
# ignore-panel CACHE_STATUS
# 忽略的IP地址
# ignore-ip 127.0.0.1
# ignore-ip 192.168.1.0/24
# 忽略的状态码
# ignore-status 400
# ignore-status 404
# 静态文件扩展名
static-file .css
static-file .js
static-file .jpg
static-file .jpeg
static-file .png
static-file .gif
static-file .ico
static-file .pdf
static-file .txt
static-file .zip
static-file .rar
static-file .tar.gz
static-file .woff
static-file .woff2
static-file .ttf
static-file .eot
static-file .svg
static-file .mp4
static-file .mp3
static-file .avi
static-file .mov
######################################
# GeoIP设置
######################################
# GeoIP数据库路径
geoip-database /usr/share/GeoIP/GeoIP.dat
# geoip-database /usr/share/GeoIP/GeoLiteCity.dat
######################################
# 性能设置
######################################
# 启用内存映射
# enable-mmap true
# 缓存大小(行数)
# cache-lcnum 1000000
# 哈希表大小
# cache-ncnum 65536
# 启用磁盘存储
# keep-db-files true
# db-path /tmp/goaccess
######################################
# 显示设置
######################################
# 颜色主题
color-scheme 1
# 0: 默认
# 1: 绿色
# 2: 单色
# 不显示进度条
# no-progress true
# 不显示颜色
# no-color true
# 启用鼠标支持
# enable-mouse true
######################################
# 日志处理设置
######################################
# 排除爬虫
# exclude-crawlers true
# 4xx错误作为404处理
# all-static-files true
# 双重解码URL
# double-decode true
# 启用HTTP状态码面板
# http-method true
# http-protocol true
# 启用带宽计算
# enable-bandwidth true
# 启用访问者面板
# enable-visitors true
######################################
# 调试设置
######################################
# 调试模式
# debug-file /tmp/goaccess-debug.log
# 详细输出
# verbose true
1.3 配置文件生成脚本
#!/usr/bin/env python3
# generate_goaccess_config.py - GoAccess配置文件生成器
import os
import sys
from pathlib import Path
class GoAccessConfigGenerator:
def __init__(self):
self.config = {
'time_format': '%H:%M:%S',
'date_format': '%d/%b/%Y',
'log_format': '%h %^[%d:%t %^] "%r" %s %b "%R" "%u"',
'output_file': '/var/www/html/goaccess.html',
'real_time': True,
'port': 7890,
'addr': '0.0.0.0',
'geoip_db': '/usr/share/GeoIP/GeoIP.dat',
'static_files': [
'.css', '.js', '.jpg', '.jpeg', '.png', '.gif',
'.ico', '.pdf', '.txt', '.zip', '.rar', '.tar.gz',
'.woff', '.woff2', '.ttf', '.eot', '.svg',
'.mp4', '.mp3', '.avi', '.mov'
],
'ignore_ips': [],
'ignore_panels': [],
'color_scheme': 1
}
def set_log_format(self, format_type):
"""设置日志格式"""
formats = {
'combined': '%h %^[%d:%t %^] "%r" %s %b "%R" "%u"',
'common': '%h %^ %^ [%d:%t %^] "%r" %s %b',
'nginx': '%h %^[%d:%t %^] "%r" %s %b "%R" "%u"',
'apache': '%h %^[%d:%t %^] "%r" %s %b "%R" "%u"',
'cloudfront': '%d\t%t\t%^\t%b\t%h\t%m\t%^\t%r\t%s\t%R\t%u\t%^',
'custom': None
}
if format_type in formats and formats[format_type]:
self.config['log_format'] = formats[format_type]
elif format_type == 'custom':
custom_format = input("请输入自定义日志格式: ")
self.config['log_format'] = custom_format
def add_ignore_ip(self, ip):
"""添加忽略的IP地址"""
self.config['ignore_ips'].append(ip)
def add_ignore_panel(self, panel):
"""添加忽略的面板"""
panels = [
'VISITORS', 'REQUESTS', 'REQUESTS_STATIC', 'NOT_FOUND',
'HOSTS', 'OS', 'BROWSERS', 'VISIT_TIMES', 'VIRTUAL_HOSTS',
'REFERRERS', 'REFERRING_SITES', 'KEYPHRASES', 'STATUS_CODES',
'REMOTE_USER', 'CACHE_STATUS'
]
if panel.upper() in panels:
self.config['ignore_panels'].append(panel.upper())
def generate_config(self, output_path=None):
"""生成配置文件"""
if not output_path:
output_path = '/etc/goaccess/goaccess.conf'
config_content = f"""# GoAccess配置文件
# 自动生成于 {os.popen('date').read().strip()}
######################################
# 时间和日期格式
######################################
time-format {self.config['time_format']}
date-format {self.config['date_format']}
######################################
# 日志格式
######################################
log-format {self.config['log_format']}
######################################
# 输出设置
######################################
output {self.config['output_file']}
"""
if self.config['real_time']:
config_content += f"""real-time-html true
port {self.config['port']}
addr {self.config['addr']}
ws-url ws://localhost:{self.config['port']}
"""
config_content += f"""
######################################
# GeoIP设置
######################################
geoip-database {self.config['geoip_db']}
######################################
# 静态文件扩展名
######################################
"""
for ext in self.config['static_files']:
config_content += f"static-file {ext}\n"
if self.config['ignore_ips']:
config_content += "\n######################################\n"
config_content += "# 忽略的IP地址\n"
config_content += "######################################\n\n"
for ip in self.config['ignore_ips']:
config_content += f"ignore-ip {ip}\n"
if self.config['ignore_panels']:
config_content += "\n######################################\n"
config_content += "# 忽略的面板\n"
config_content += "######################################\n\n"
for panel in self.config['ignore_panels']:
config_content += f"ignore-panel {panel}\n"
config_content += f"""
######################################
# 显示设置
######################################
color-scheme {self.config['color_scheme']}
"""
return config_content
def save_config(self, output_path=None):
"""保存配置文件"""
if not output_path:
output_path = '/etc/goaccess/goaccess.conf'
config_content = self.generate_config(output_path)
try:
# 创建目录
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# 写入配置文件
with open(output_path, 'w', encoding='utf-8') as f:
f.write(config_content)
print(f"配置文件已保存到: {output_path}")
return True
except PermissionError:
print(f"权限不足,无法写入 {output_path}")
print("请使用sudo运行或选择其他路径")
return False
except Exception as e:
print(f"保存配置文件时出错: {e}")
return False
def interactive_config():
"""交互式配置生成"""
generator = GoAccessConfigGenerator()
print("GoAccess配置文件生成器")
print("=" * 30)
# 选择日志格式
print("\n选择日志格式:")
print("1. Combined (Apache/Nginx默认)")
print("2. Common (Apache简单格式)")
print("3. CloudFront")
print("4. 自定义格式")
choice = input("请选择 (1-4): ").strip()
format_map = {'1': 'combined', '2': 'common', '3': 'cloudfront', '4': 'custom'}
if choice in format_map:
generator.set_log_format(format_map[choice])
# 输出文件路径
output_file = input(f"\n输出HTML文件路径 [{generator.config['output_file']}]: ").strip()
if output_file:
generator.config['output_file'] = output_file
# 实时监控设置
real_time = input("\n启用实时监控? (y/N): ").strip().lower()
generator.config['real_time'] = real_time == 'y'
if generator.config['real_time']:
port = input(f"WebSocket端口 [{generator.config['port']}]: ").strip()
if port.isdigit():
generator.config['port'] = int(port)
# 忽略IP地址
print("\n添加要忽略的IP地址 (回车结束):")
while True:
ip = input("IP地址: ").strip()
if not ip:
break
generator.add_ignore_ip(ip)
# 配置文件保存路径
config_path = input("\n配置文件保存路径 [./goaccess.conf]: ").strip()
if not config_path:
config_path = './goaccess.conf'
# 保存配置
if generator.save_config(config_path):
print("\n配置文件生成完成!")
print(f"使用方法: goaccess -p {config_path} /path/to/access.log")
return generator
def main():
"""主函数"""
if len(sys.argv) > 1 and sys.argv[1] == '--interactive':
interactive_config()
else:
# 默认配置
generator = GoAccessConfigGenerator()
# 添加一些常用忽略IP
generator.add_ignore_ip('127.0.0.1')
generator.add_ignore_ip('::1')
# 保存到当前目录
generator.save_config('./goaccess.conf')
print("\n默认配置文件已生成: ./goaccess.conf")
print("使用 --interactive 参数进行交互式配置")
if __name__ == "__main__":
main()
2. 日志格式配置
2.1 常用日志格式
Apache Combined格式
# 日志示例
127.0.0.1 - - [25/Dec/2023:10:00:01 +0000] "GET / HTTP/1.1" 200 1234 "-" "Mozilla/5.0"
# 配置
time-format %H:%M:%S
date-format %d/%b/%Y
log-format %h %^[%d:%t %^] "%r" %s %b "%R" "%u"
Nginx默认格式
# nginx.conf配置
log_format combined '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'"$http_referer" "$http_user_agent"';
# GoAccess配置
time-format %H:%M:%S
date-format %d/%b/%Y
log-format %h %^[%d:%t %^] "%r" %s %b "%R" "%u"
自定义Nginx格式
# nginx.conf
log_format detailed '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'"$http_referer" "$http_user_agent" '
'$request_time $upstream_response_time';
# GoAccess配置
time-format %H:%M:%S
date-format %d/%b/%Y
log-format %h %^[%d:%t %^] "%r" %s %b "%R" "%u" %T %^
2.2 日志格式检测脚本
#!/usr/bin/env python3
# detect_log_format.py - 日志格式检测工具
import re
import sys
from collections import Counter
class LogFormatDetector:
def __init__(self):
self.patterns = {
'combined': {
'pattern': r'^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]+)" (\d+) (\S+) "([^"]+)" "([^"]+)"',
'goaccess_format': '%h %^[%d:%t %^] "%r" %s %b "%R" "%u"',
'time_format': '%H:%M:%S',
'date_format': '%d/%b/%Y'
},
'common': {
'pattern': r'^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]+)" (\d+) (\S+)',
'goaccess_format': '%h %^ %^ [%d:%t %^] "%r" %s %b',
'time_format': '%H:%M:%S',
'date_format': '%d/%b/%Y'
},
'nginx_custom': {
'pattern': r'^(\S+) - (\S+) \[([^\]]+)\] "([^"]+)" (\d+) (\S+) "([^"]+)" "([^"]+)" ([\d\.]+) ([\d\.]+)',
'goaccess_format': '%h %^[%d:%t %^] "%r" %s %b "%R" "%u" %T %^',
'time_format': '%H:%M:%S',
'date_format': '%d/%b/%Y'
},
'cloudfront': {
'pattern': r'^([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)',
'goaccess_format': '%d\t%t\t%^\t%b\t%h\t%m\t%^\t%r\t%s\t%R\t%u\t%^',
'time_format': '%H:%M:%S',
'date_format': '%Y-%m-%d'
}
}
def detect_format(self, log_file, sample_lines=100):
"""检测日志格式"""
try:
with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
lines = []
for i, line in enumerate(f):
if i >= sample_lines:
break
lines.append(line.strip())
if not lines:
return None, "日志文件为空"
# 测试每种格式
results = {}
for format_name, format_info in self.patterns.items():
pattern = format_info['pattern']
matches = 0
for line in lines:
if re.match(pattern, line):
matches += 1
match_rate = matches / len(lines)
results[format_name] = {
'matches': matches,
'total': len(lines),
'rate': match_rate,
'config': format_info
}
# 找到最佳匹配
best_format = max(results.keys(), key=lambda x: results[x]['rate'])
best_result = results[best_format]
if best_result['rate'] > 0.8: # 80%以上匹配率
return best_format, best_result
else:
return None, results
except Exception as e:
return None, f"读取日志文件时出错: {e}"
def generate_config(self, format_name, format_info):
"""生成GoAccess配置"""
config = f"""# 检测到的日志格式: {format_name}
# 匹配率: {format_info['rate']:.2%}
time-format {format_info['config']['time_format']}
date-format {format_info['config']['date_format']}
log-format {format_info['config']['goaccess_format']}
"""
return config
def analyze_sample_lines(self, log_file, num_lines=10):
"""分析示例日志行"""
try:
with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
sample_lines = []
for i, line in enumerate(f):
if i >= num_lines:
break
sample_lines.append(line.strip())
print(f"\n示例日志行 (前{len(sample_lines)}行):")
print("=" * 50)
for i, line in enumerate(sample_lines, 1):
print(f"{i:2d}: {line}")
return sample_lines
except Exception as e:
print(f"读取示例行时出错: {e}")
return []
def main():
"""主函数"""
if len(sys.argv) != 2:
print("用法: python3 detect_log_format.py <log_file>")
sys.exit(1)
log_file = sys.argv[1]
detector = LogFormatDetector()
print(f"分析日志文件: {log_file}")
print("=" * 50)
# 显示示例行
detector.analyze_sample_lines(log_file)
# 检测格式
format_name, result = detector.detect_format(log_file)
if format_name:
print(f"\n检测结果: {format_name}")
print(f"匹配率: {result['rate']:.2%} ({result['matches']}/{result['total']})")
# 生成配置
config = detector.generate_config(format_name, result)
print("\n推荐的GoAccess配置:")
print("=" * 30)
print(config)
# 保存配置文件
config_file = f"goaccess_{format_name}.conf"
with open(config_file, 'w', encoding='utf-8') as f:
f.write(config)
print(f"配置已保存到: {config_file}")
else:
print("\n未能检测到匹配的日志格式")
if isinstance(result, dict):
print("\n各格式匹配情况:")
for fmt, info in result.items():
print(f"{fmt:15s}: {info['rate']:.2%} ({info['matches']}/{info['total']})")
else:
print(f"错误: {result}")
print("\n请手动配置日志格式或检查日志文件格式")
if __name__ == "__main__":
main()
3. 基本使用方法
3.1 命令行基础用法
# 基本分析
goaccess /var/log/nginx/access.log
# 指定配置文件
goaccess -p /etc/goaccess/goaccess.conf /var/log/nginx/access.log
# 指定日志格式
goaccess /var/log/nginx/access.log --log-format=COMBINED
# 生成HTML报告
goaccess /var/log/nginx/access.log -o report.html
# 实时监控
goaccess /var/log/nginx/access.log --real-time
# 分析多个文件
goaccess /var/log/nginx/access.log* --log-format=COMBINED
# 从标准输入读取
tail -f /var/log/nginx/access.log | goaccess --log-format=COMBINED
# 分析压缩文件
zcat /var/log/nginx/access.log.*.gz | goaccess --log-format=COMBINED
3.2 常用参数详解
# 输出相关
-o, --output=FILE 输出文件路径
-f, --log-file=FILE 日志文件路径
--real-time-html 生成实时HTML报告
--json-pretty-print 格式化JSON输出
--csv-pretty-print 格式化CSV输出
# 格式相关
--log-format=FORMAT 日志格式
--date-format=FORMAT 日期格式
--time-format=FORMAT 时间格式
# 过滤相关
--ignore-panel=PANEL 忽略指定面板
--ignore-ip=IP 忽略指定IP
--ignore-status=CODE 忽略指定状态码
--exclude-crawlers 排除爬虫
# 性能相关
--no-progress 不显示进度条
--no-color 不使用颜色
--enable-mmap 启用内存映射
--cache-lcnum=NUM 缓存行数
# 实时监控相关
--port=PORT WebSocket端口
--addr=ADDR 绑定地址
--ws-url=URL WebSocket URL
# 其他
-p, --config-file=FILE 配置文件路径
--debug-file=FILE 调试日志文件
-h, --help 显示帮助
-V, --version 显示版本
3.3 使用示例脚本
#!/bin/bash
# goaccess_examples.sh - GoAccess使用示例脚本
# 设置变量
LOG_DIR="/var/log/nginx"
OUTPUT_DIR="/var/www/html/goaccess"
CONFIG_FILE="/etc/goaccess/goaccess.conf"
# 创建输出目录
sudo mkdir -p $OUTPUT_DIR
sudo chown www-data:www-data $OUTPUT_DIR
echo "GoAccess使用示例"
echo "================="
# 示例1: 基本HTML报告
echo "\n1. 生成基本HTML报告"
goaccess $LOG_DIR/access.log \
--log-format=COMBINED \
-o $OUTPUT_DIR/basic_report.html
echo "报告已生成: $OUTPUT_DIR/basic_report.html"
# 示例2: 实时HTML报告
echo "\n2. 启动实时HTML报告"
goaccess $LOG_DIR/access.log \
--log-format=COMBINED \
--real-time-html \
--ws-url=ws://localhost:7890 \
--port=7890 \
--addr=0.0.0.0 \
-o $OUTPUT_DIR/realtime_report.html &
echo "实时报告已启动: http://localhost/goaccess/realtime_report.html"
echo "WebSocket端口: 7890"
# 示例3: JSON输出
echo "\n3. 生成JSON报告"
goaccess $LOG_DIR/access.log \
--log-format=COMBINED \
--json-pretty-print \
-o $OUTPUT_DIR/report.json
echo "JSON报告已生成: $OUTPUT_DIR/report.json"
# 示例4: 分析多个日志文件
echo "\n4. 分析多个日志文件"
goaccess $LOG_DIR/access.log* \
--log-format=COMBINED \
-o $OUTPUT_DIR/multi_files_report.html
echo "多文件报告已生成: $OUTPUT_DIR/multi_files_report.html"
# 示例5: 分析压缩日志
echo "\n5. 分析压缩日志文件"
zcat $LOG_DIR/access.log.*.gz | \
goaccess --log-format=COMBINED \
-o $OUTPUT_DIR/compressed_logs_report.html
echo "压缩日志报告已生成: $OUTPUT_DIR/compressed_logs_report.html"
# 示例6: 过滤特定IP
echo "\n6. 过滤本地IP的报告"
goaccess $LOG_DIR/access.log \
--log-format=COMBINED \
--ignore-ip=127.0.0.1 \
--ignore-ip=::1 \
-o $OUTPUT_DIR/filtered_report.html
echo "过滤报告已生成: $OUTPUT_DIR/filtered_report.html"
# 示例7: 终端实时监控
echo "\n7. 终端实时监控(按q退出)"
echo "启动实时监控..."
goaccess $LOG_DIR/access.log \
--log-format=COMBINED \
--real-time
echo "\n所有示例完成!"
echo "查看报告: ls -la $OUTPUT_DIR/"
4. 输出格式详解
4.1 HTML输出
# 基本HTML输出
goaccess /var/log/nginx/access.log \
--log-format=COMBINED \
-o report.html
# 自定义HTML标题
goaccess /var/log/nginx/access.log \
--log-format=COMBINED \
--html-report-title="我的网站访问统计" \
-o report.html
# 实时HTML(推荐)
goaccess /var/log/nginx/access.log \
--log-format=COMBINED \
--real-time-html \
--ws-url=ws://example.com:7890 \
--port=7890 \
--addr=0.0.0.0 \
-o /var/www/html/goaccess.html
4.2 JSON输出
# 标准JSON输出
goaccess /var/log/nginx/access.log \
--log-format=COMBINED \
--json-pretty-print
# 保存到文件
goaccess /var/log/nginx/access.log \
--log-format=COMBINED \
--json-pretty-print \
-o report.json
4.3 CSV输出
# CSV输出
goaccess /var/log/nginx/access.log \
--log-format=COMBINED \
--csv-pretty-print \
-o report.csv
4.4 终端输出
# 默认终端输出
goaccess /var/log/nginx/access.log --log-format=COMBINED
# 无颜色输出
goaccess /var/log/nginx/access.log \
--log-format=COMBINED \
--no-color
# 不显示进度条
goaccess /var/log/nginx/access.log \
--log-format=COMBINED \
--no-progress
5. 实时监控设置
5.1 实时HTML监控
# 启动实时监控
goaccess /var/log/nginx/access.log \
--log-format=COMBINED \
--real-time-html \
--ws-url=ws://localhost:7890 \
--port=7890 \
--addr=0.0.0.0 \
-o /var/www/html/goaccess.html
5.2 Systemd服务配置
# /etc/systemd/system/goaccess.service
[Unit]
Description=GoAccess Real-time Web Log Analyzer
After=network.target
[Service]
Type=simple
User=www-data
Group=www-data
ExecStart=/usr/bin/goaccess /var/log/nginx/access.log \
--config-file=/etc/goaccess/goaccess.conf \
--real-time-html \
--ws-url=ws://localhost:7890 \
--port=7890 \
--addr=0.0.0.0 \
--output=/var/www/html/goaccess.html
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target
# 启用和启动服务
sudo systemctl daemon-reload
sudo systemctl enable goaccess
sudo systemctl start goaccess
sudo systemctl status goaccess
5.3 Nginx反向代理配置
# /etc/nginx/sites-available/goaccess
server {
listen 80;
server_name stats.example.com;
root /var/www/html;
index goaccess.html;
location / {
try_files $uri $uri/ =404;
auth_basic "GoAccess Statistics";
auth_basic_user_file /etc/nginx/.htpasswd;
}
location /ws {
proxy_pass http://127.0.0.1:7890;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
6. 下一步
基本配置完成后,您可以:
- 学习高级过滤和分析功能
- 了解各种统计面板的含义
- 掌握性能优化技巧
- 学习与其他工具的集成
- 探索自动化和监控方案
下一章我们将详细介绍GoAccess的高级功能和分析技巧。