1. 部署架构设计
1.1 典型部署架构
┌─────────────────┐
│ Load Balancer │
│ (HAProxy/F5) │
└─────────┬───────┘
│
┌───────────────┼───────────────┐
│ │ │
┌─────────▼─────────┐ ┌───▼────┐ ┌───────▼─────────┐
│ OpenResty-1 │ │ ... │ │ OpenResty-N │
│ (API Gateway) │ │ │ │ (API Gateway) │
└─────────┬─────────┘ └────────┘ └─────────┬───────┘
│ │
┌─────────▼─────────┐ ┌─────▼─────────┐
│ Backend Apps │ │ Backend Apps │
│ (Microservices) │ │(Microservices)│
└─────────┬─────────┘ └─────┬─────────┘
│ │
┌─────────▼─────────┐ ┌─────▼─────────┐
│ Database │ │ Database │
│ (MySQL/Redis) │ │ (MySQL/Redis) │
└───────────────────┘ └───────────────┘
1.2 部署模式选择
1.2.1 单机部署
# 适用场景:开发测试、小型应用
# 优点:简单、成本低
# 缺点:单点故障、性能受限
# 部署脚本
#!/bin/bash
# deploy_single.sh
set -e
APP_NAME="openresty-app"
APP_DIR="/opt/${APP_NAME}"
NGINX_CONF="/etc/openresty/nginx.conf"
LUA_DIR="${APP_DIR}/lua"
echo "Starting single node deployment..."
# 1. 停止现有服务
sudo systemctl stop openresty || true
# 2. 备份配置
sudo cp ${NGINX_CONF} ${NGINX_CONF}.backup.$(date +%Y%m%d_%H%M%S)
# 3. 部署应用代码
sudo mkdir -p ${APP_DIR}
sudo cp -r ./src/* ${APP_DIR}/
sudo chown -R openresty:openresty ${APP_DIR}
# 4. 更新配置
sudo cp ./config/nginx.conf ${NGINX_CONF}
sudo nginx -t
# 5. 启动服务
sudo systemctl start openresty
sudo systemctl enable openresty
# 6. 健康检查
for i in {1..30}; do
if curl -f http://localhost/health; then
echo "Deployment successful!"
exit 0
fi
echo "Waiting for service to start... ($i/30)"
sleep 2
done
echo "Deployment failed - service not responding"
exit 1
1.2.2 集群部署
#!/bin/bash
# deploy_cluster.sh
set -e
# 集群节点配置
NODES=("10.0.1.10" "10.0.1.11" "10.0.1.12")
USER="deploy"
APP_NAME="openresty-app"
echo "Starting cluster deployment..."
# 1. 并行部署到所有节点
deploy_to_node() {
local node=$1
echo "Deploying to node: $node"
# 上传代码
rsync -avz --delete ./src/ ${USER}@${node}:/opt/${APP_NAME}/
# 上传配置
scp ./config/nginx.conf ${USER}@${node}:/etc/openresty/
# 远程执行部署
ssh ${USER}@${node} << 'EOF'
# 测试配置
sudo nginx -t
# 优雅重载
sudo systemctl reload openresty
# 健康检查
sleep 5
curl -f http://localhost/health || exit 1
EOF
echo "Node $node deployed successfully"
}
# 并行部署
for node in "${NODES[@]}"; do
deploy_to_node $node &
done
# 等待所有部署完成
wait
echo "Cluster deployment completed!"
# 验证集群状态
echo "Verifying cluster health..."
for node in "${NODES[@]}"; do
if curl -f http://${node}/health; then
echo "✓ Node $node is healthy"
else
echo "✗ Node $node is unhealthy"
fi
done
1.2.3 容器化部署
# Dockerfile
FROM openresty/openresty:1.21.4.1-alpine
# 安装依赖
RUN apk add --no-cache \
curl \
bash \
tzdata
# 设置时区
ENV TZ=Asia/Shanghai
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
# 创建应用目录
WORKDIR /app
# 复制Lua库
COPY lua/ /usr/local/openresty/lualib/app/
# 复制配置文件
COPY config/nginx.conf /usr/local/openresty/nginx/conf/nginx.conf
# 复制启动脚本
COPY scripts/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# 创建日志目录
RUN mkdir -p /var/log/nginx
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost/health || exit 1
# 暴露端口
EXPOSE 80 443
# 启动脚本
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/usr/local/openresty/bin/openresty", "-g", "daemon off;"]
#!/bin/bash
# scripts/entrypoint.sh
set -e
# 环境变量默认值
WORKER_PROCESSES=${WORKER_PROCESSES:-auto}
WORKER_CONNECTIONS=${WORKER_CONNECTIONS:-1024}
LOG_LEVEL=${LOG_LEVEL:-info}
# 动态生成配置
cat > /tmp/env.conf << EOF
env WORKER_PROCESSES;
env WORKER_CONNECTIONS;
env LOG_LEVEL;
env DATABASE_URL;
env REDIS_URL;
env JWT_SECRET;
EOF
# 将环境配置插入到nginx.conf
sed -i '1r /tmp/env.conf' /usr/local/openresty/nginx/conf/nginx.conf
# 测试配置
/usr/local/openresty/bin/openresty -t
# 执行传入的命令
exec "$@"
# docker-compose.yml
version: '3.8'
services:
openresty:
build: .
ports:
- "80:80"
- "443:443"
environment:
- WORKER_PROCESSES=auto
- WORKER_CONNECTIONS=1024
- LOG_LEVEL=info
- DATABASE_URL=mysql://user:pass@db:3306/app
- REDIS_URL=redis://redis:6379/0
- JWT_SECRET=your-secret-key
volumes:
- ./logs:/var/log/nginx
- ./ssl:/etc/ssl/certs
depends_on:
- db
- redis
restart: unless-stopped
deploy:
replicas: 3
resources:
limits:
cpus: '1.0'
memory: 512M
reservations:
cpus: '0.5'
memory: 256M
db:
image: mysql:8.0
environment:
- MYSQL_ROOT_PASSWORD=rootpass
- MYSQL_DATABASE=app
- MYSQL_USER=user
- MYSQL_PASSWORD=pass
volumes:
- db_data:/var/lib/mysql
- ./sql:/docker-entrypoint-initdb.d
restart: unless-stopped
redis:
image: redis:7-alpine
command: redis-server --appendonly yes
volumes:
- redis_data:/data
restart: unless-stopped
nginx:
image: nginx:alpine
ports:
- "8080:80"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
depends_on:
- openresty
restart: unless-stopped
volumes:
db_data:
redis_data:
networks:
default:
driver: bridge
2. 配置管理
2.1 环境配置分离
-- config/env.lua - 环境配置管理
local env = {}
-- 环境类型
local ENV_TYPE = os.getenv("ENV_TYPE") or "development"
-- 基础配置
local base_config = {
app_name = "openresty-app",
version = "1.0.0",
-- 服务器配置
server = {
worker_processes = "auto",
worker_connections = 1024,
keepalive_timeout = 65,
client_max_body_size = "10m"
},
-- 日志配置
logging = {
level = "info",
access_log = "/var/log/nginx/access.log",
error_log = "/var/log/nginx/error.log"
},
-- 安全配置
security = {
jwt_secret = os.getenv("JWT_SECRET") or "default-secret",
session_timeout = 3600,
rate_limit = {
requests_per_minute = 60,
burst = 10
}
}
}
-- 环境特定配置
local env_configs = {
development = {
debug = true,
database = {
host = "localhost",
port = 3306,
database = "app_dev",
user = "dev_user",
password = "dev_pass",
pool_size = 5
},
redis = {
host = "localhost",
port = 6379,
database = 0,
pool_size = 10
},
logging = {
level = "debug"
}
},
testing = {
debug = true,
database = {
host = "test-db",
port = 3306,
database = "app_test",
user = "test_user",
password = "test_pass",
pool_size = 3
},
redis = {
host = "test-redis",
port = 6379,
database = 1,
pool_size = 5
}
},
production = {
debug = false,
database = {
host = os.getenv("DB_HOST") or "prod-db",
port = tonumber(os.getenv("DB_PORT")) or 3306,
database = os.getenv("DB_NAME") or "app_prod",
user = os.getenv("DB_USER") or "prod_user",
password = os.getenv("DB_PASSWORD") or "prod_pass",
pool_size = tonumber(os.getenv("DB_POOL_SIZE")) or 20
},
redis = {
host = os.getenv("REDIS_HOST") or "prod-redis",
port = tonumber(os.getenv("REDIS_PORT")) or 6379,
database = tonumber(os.getenv("REDIS_DB")) or 0,
pool_size = tonumber(os.getenv("REDIS_POOL_SIZE")) or 50
},
logging = {
level = "warn"
},
server = {
worker_processes = tonumber(os.getenv("WORKER_PROCESSES")) or "auto",
worker_connections = tonumber(os.getenv("WORKER_CONNECTIONS")) or 2048
}
}
}
-- 合并配置
function env.deep_merge(base, override)
local result = {}
-- 复制基础配置
for k, v in pairs(base) do
if type(v) == "table" then
result[k] = env.deep_merge(v, {})
else
result[k] = v
end
end
-- 覆盖配置
for k, v in pairs(override) do
if type(v) == "table" and type(result[k]) == "table" then
result[k] = env.deep_merge(result[k], v)
else
result[k] = v
end
end
return result
end
-- 获取当前环境配置
function env.get_config()
local env_config = env_configs[ENV_TYPE] or env_configs.development
return env.deep_merge(base_config, env_config)
end
-- 获取环境类型
function env.get_env_type()
return ENV_TYPE
end
-- 是否为生产环境
function env.is_production()
return ENV_TYPE == "production"
end
-- 是否为开发环境
function env.is_development()
return ENV_TYPE == "development"
end
return env
2.2 配置热更新
-- config/hot_reload.lua - 配置热更新
local hot_reload = {}
local cjson = require "cjson"
local lfs = require "lfs"
-- 配置文件监控
local config_files = {
"/etc/openresty/app.conf",
"/etc/openresty/routes.conf",
"/etc/openresty/upstream.conf"
}
-- 文件修改时间缓存
local file_mtimes = {}
-- 配置缓存
local config_cache = {}
-- 检查文件是否修改
function hot_reload.check_file_modified(filepath)
local attr = lfs.attributes(filepath)
if not attr then
return false
end
local current_mtime = attr.modification
local cached_mtime = file_mtimes[filepath]
if not cached_mtime or current_mtime > cached_mtime then
file_mtimes[filepath] = current_mtime
return true
end
return false
end
-- 加载配置文件
function hot_reload.load_config_file(filepath)
local file = io.open(filepath, "r")
if not file then
ngx.log(ngx.ERR, "Failed to open config file: ", filepath)
return nil
end
local content = file:read("*all")
file:close()
local success, config = pcall(cjson.decode, content)
if not success then
ngx.log(ngx.ERR, "Failed to parse config file: ", filepath, ", error: ", config)
return nil
end
return config
end
-- 重载配置
function hot_reload.reload_config()
local updated = false
for _, filepath in ipairs(config_files) do
if hot_reload.check_file_modified(filepath) then
ngx.log(ngx.INFO, "Config file modified, reloading: ", filepath)
local new_config = hot_reload.load_config_file(filepath)
if new_config then
config_cache[filepath] = new_config
updated = true
-- 触发配置更新事件
hot_reload.on_config_updated(filepath, new_config)
end
end
end
return updated
end
-- 配置更新回调
function hot_reload.on_config_updated(filepath, config)
-- 根据文件类型执行不同的更新逻辑
if filepath:match("routes%.conf$") then
hot_reload.update_routes(config)
elseif filepath:match("upstream%.conf$") then
hot_reload.update_upstream(config)
elseif filepath:match("app%.conf$") then
hot_reload.update_app_config(config)
end
end
-- 更新路由配置
function hot_reload.update_routes(routes_config)
local shared_dict = ngx.shared.routes
if not shared_dict then
ngx.log(ngx.ERR, "Routes shared dict not found")
return
end
-- 清空现有路由
shared_dict:flush_all()
-- 加载新路由
for path, config in pairs(routes_config) do
local success, err = shared_dict:set(path, cjson.encode(config))
if not success then
ngx.log(ngx.ERR, "Failed to update route: ", path, ", error: ", err)
end
end
ngx.log(ngx.INFO, "Routes updated successfully")
end
-- 更新上游配置
function hot_reload.update_upstream(upstream_config)
local shared_dict = ngx.shared.upstream
if not shared_dict then
ngx.log(ngx.ERR, "Upstream shared dict not found")
return
end
for name, servers in pairs(upstream_config) do
local success, err = shared_dict:set(name, cjson.encode(servers))
if not success then
ngx.log(ngx.ERR, "Failed to update upstream: ", name, ", error: ", err)
end
end
ngx.log(ngx.INFO, "Upstream configuration updated successfully")
end
-- 更新应用配置
function hot_reload.update_app_config(app_config)
local shared_dict = ngx.shared.app_config
if not shared_dict then
ngx.log(ngx.ERR, "App config shared dict not found")
return
end
for key, value in pairs(app_config) do
local encoded_value = type(value) == "table" and cjson.encode(value) or tostring(value)
local success, err = shared_dict:set(key, encoded_value)
if not success then
ngx.log(ngx.ERR, "Failed to update app config: ", key, ", error: ", err)
end
end
ngx.log(ngx.INFO, "App configuration updated successfully")
end
-- 获取配置
function hot_reload.get_config(filepath)
return config_cache[filepath]
end
-- 初始化
function hot_reload.init()
-- 初始加载所有配置文件
for _, filepath in ipairs(config_files) do
local config = hot_reload.load_config_file(filepath)
if config then
config_cache[filepath] = config
hot_reload.on_config_updated(filepath, config)
end
end
end
-- 定时检查配置更新
function hot_reload.start_monitor()
local function check_config()
hot_reload.reload_config()
end
-- 每5秒检查一次配置文件
local ok, err = ngx.timer.every(5, check_config)
if not ok then
ngx.log(ngx.ERR, "Failed to start config monitor: ", err)
else
ngx.log(ngx.INFO, "Config monitor started")
end
end
return hot_reload
3. 服务管理
3.1 Systemd服务配置
# /etc/systemd/system/openresty.service
[Unit]
Description=OpenResty Web Server
After=network.target remote-fs.target nss-lookup.target
Wants=network.target
[Service]
Type=forking
PIDFile=/var/run/openresty.pid
ExecStartPre=/usr/local/openresty/bin/openresty -t -q -g 'daemon on; master_process on;'
ExecStart=/usr/local/openresty/bin/openresty -g 'daemon on; master_process on;'
ExecReload=/bin/kill -s HUP $MAINPID
ExecStop=/bin/kill -s TERM $MAINPID
KillSignal=SIGTERM
KillMode=mixed
PrivateTmp=true
LimitNOFILE=65536
LimitNPROC=32768
LimitCORE=infinity
Restart=on-failure
RestartSec=5
User=openresty
Group=openresty
# 环境变量
Environment=ENV_TYPE=production
Environment=WORKER_PROCESSES=auto
Environment=WORKER_CONNECTIONS=2048
# 安全设置
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/var/log/nginx /var/cache/nginx /var/run
[Install]
WantedBy=multi-user.target
3.2 进程管理脚本
#!/bin/bash
# scripts/openresty-manager.sh
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
APP_NAME="openresty-app"
PID_FILE="/var/run/openresty.pid"
LOG_DIR="/var/log/nginx"
CONF_FILE="/etc/openresty/nginx.conf"
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# 日志函数
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 检查进程状态
check_status() {
if [ -f "$PID_FILE" ]; then
local pid=$(cat "$PID_FILE")
if kill -0 "$pid" 2>/dev/null; then
echo "running"
return 0
else
echo "dead"
return 1
fi
else
echo "stopped"
return 1
fi
}
# 启动服务
start_service() {
log_info "Starting OpenResty..."
local status=$(check_status)
if [ "$status" = "running" ]; then
log_warn "OpenResty is already running"
return 0
fi
# 测试配置
if ! /usr/local/openresty/bin/openresty -t -c "$CONF_FILE"; then
log_error "Configuration test failed"
return 1
fi
# 启动服务
/usr/local/openresty/bin/openresty -c "$CONF_FILE"
# 等待启动
sleep 2
local status=$(check_status)
if [ "$status" = "running" ]; then
log_info "OpenResty started successfully"
return 0
else
log_error "Failed to start OpenResty"
return 1
fi
}
# 停止服务
stop_service() {
log_info "Stopping OpenResty..."
local status=$(check_status)
if [ "$status" = "stopped" ]; then
log_warn "OpenResty is not running"
return 0
fi
if [ -f "$PID_FILE" ]; then
local pid=$(cat "$PID_FILE")
# 发送TERM信号
kill -TERM "$pid" 2>/dev/null || true
# 等待进程退出
local count=0
while kill -0 "$pid" 2>/dev/null && [ $count -lt 30 ]; do
sleep 1
count=$((count + 1))
done
# 如果进程仍在运行,强制杀死
if kill -0 "$pid" 2>/dev/null; then
log_warn "Force killing OpenResty process"
kill -KILL "$pid" 2>/dev/null || true
fi
rm -f "$PID_FILE"
fi
log_info "OpenResty stopped"
}
# 重启服务
restart_service() {
log_info "Restarting OpenResty..."
stop_service
sleep 2
start_service
}
# 重载配置
reload_service() {
log_info "Reloading OpenResty configuration..."
local status=$(check_status)
if [ "$status" != "running" ]; then
log_error "OpenResty is not running"
return 1
fi
# 测试配置
if ! /usr/local/openresty/bin/openresty -t -c "$CONF_FILE"; then
log_error "Configuration test failed"
return 1
fi
# 发送HUP信号重载配置
local pid=$(cat "$PID_FILE")
kill -HUP "$pid"
log_info "Configuration reloaded successfully"
}
# 显示状态
show_status() {
local status=$(check_status)
echo "OpenResty Status: $status"
if [ "$status" = "running" ]; then
local pid=$(cat "$PID_FILE")
echo "PID: $pid"
# 显示进程信息
ps -p "$pid" -o pid,ppid,user,%cpu,%mem,vsz,rss,tty,stat,start,time,command
# 显示端口监听
echo ""
echo "Listening ports:"
netstat -tlnp 2>/dev/null | grep "$pid" || true
fi
}
# 查看日志
show_logs() {
local log_type=${1:-access}
local lines=${2:-50}
case $log_type in
access)
tail -n "$lines" "$LOG_DIR/access.log"
;;
error)
tail -n "$lines" "$LOG_DIR/error.log"
;;
*)
log_error "Unknown log type: $log_type (use 'access' or 'error')"
return 1
;;
esac
}
# 健康检查
health_check() {
local endpoint=${1:-"http://localhost/health"}
log_info "Performing health check on $endpoint"
if curl -f -s "$endpoint" > /dev/null; then
log_info "Health check passed"
return 0
else
log_error "Health check failed"
return 1
fi
}
# 显示帮助
show_help() {
cat << EOF
Usage: $0 {start|stop|restart|reload|status|logs|health|help}
Commands:
start Start OpenResty service
stop Stop OpenResty service
restart Restart OpenResty service
reload Reload configuration without stopping
status Show service status
logs Show logs (access|error) [lines]
health Perform health check [endpoint]
help Show this help message
Examples:
$0 start
$0 logs error 100
$0 health http://localhost:8080/health
EOF
}
# 主函数
main() {
case "${1:-}" in
start)
start_service
;;
stop)
stop_service
;;
restart)
restart_service
;;
reload)
reload_service
;;
status)
show_status
;;
logs)
show_logs "$2" "$3"
;;
health)
health_check "$2"
;;
help|--help|-h)
show_help
;;
*)
log_error "Unknown command: ${1:-}"
show_help
exit 1
;;
esac
}
# 检查权限
if [ "$EUID" -ne 0 ] && [ "${1:-}" != "status" ] && [ "${1:-}" != "logs" ] && [ "${1:-}" != "health" ] && [ "${1:-}" != "help" ]; then
log_error "This script must be run as root for most operations"
exit 1
fi
main "$@"
4. 监控与告警
4.1 Prometheus监控集成
-- lua/prometheus_metrics.lua - Prometheus指标收集
local prometheus_metrics = {}
local prometheus = require "resty.prometheus"
-- 初始化Prometheus
local prom = prometheus.init("prometheus_metrics", "nginx_")
-- 定义指标
local metrics = {
-- 请求计数器
requests_total = prom:counter(
"http_requests_total",
"Total number of HTTP requests",
{"method", "status", "endpoint"}
),
-- 请求延迟直方图
request_duration = prom:histogram(
"http_request_duration_seconds",
"HTTP request latency",
{"method", "endpoint"},
{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10}
),
-- 活跃连接数
active_connections = prom:gauge(
"nginx_connections_active",
"Number of active connections"
),
-- 数据库连接池
db_connections = prom:gauge(
"database_connections",
"Database connection pool status",
{"pool", "status"}
),
-- 缓存命中率
cache_operations = prom:counter(
"cache_operations_total",
"Cache operations",
{"operation", "result"}
),
-- 错误计数
errors_total = prom:counter(
"errors_total",
"Total number of errors",
{"type", "severity"}
),
-- 内存使用
memory_usage = prom:gauge(
"lua_memory_usage_bytes",
"Lua memory usage in bytes"
)
}
-- 记录请求指标
function prometheus_metrics.record_request(method, status, endpoint, duration)
metrics.requests_total:inc(1, {method, tostring(status), endpoint})
metrics.request_duration:observe(duration, {method, endpoint})
end
-- 更新连接数
function prometheus_metrics.update_connections(active, reading, writing, waiting)
metrics.active_connections:set(active)
end
-- 记录数据库连接
function prometheus_metrics.record_db_connection(pool, status, count)
metrics.db_connections:set(count, {pool, status})
end
-- 记录缓存操作
function prometheus_metrics.record_cache_operation(operation, result)
metrics.cache_operations:inc(1, {operation, result})
end
-- 记录错误
function prometheus_metrics.record_error(error_type, severity)
metrics.errors_total:inc(1, {error_type, severity})
end
-- 更新内存使用
function prometheus_metrics.update_memory_usage()
local memory_kb = collectgarbage("count")
metrics.memory_usage:set(memory_kb * 1024)
end
-- 收集所有指标
function prometheus_metrics.collect()
-- 更新系统指标
prometheus_metrics.update_memory_usage()
-- 收集Nginx状态
local status = ngx.var.connections_active
if status then
prometheus_metrics.update_connections(tonumber(status))
end
return prom:collect()
end
return prometheus_metrics
# nginx配置中的监控端点
location /metrics {
access_log off;
allow 127.0.0.1;
allow 10.0.0.0/8;
deny all;
content_by_lua_block {
local prometheus_metrics = require "prometheus_metrics"
ngx.header.content_type = "text/plain"
ngx.say(prometheus_metrics.collect())
}
}
# 状态监控端点
location /status {
access_log off;
allow 127.0.0.1;
allow 10.0.0.0/8;
deny all;
content_by_lua_block {
local cjson = require "cjson"
local status = {
timestamp = ngx.time(),
version = "1.0.0",
uptime = ngx.time() - ngx.shared.app_start_time:get("start_time"),
connections = {
active = ngx.var.connections_active,
reading = ngx.var.connections_reading,
writing = ngx.var.connections_writing,
waiting = ngx.var.connections_waiting
},
memory = {
lua_memory_kb = collectgarbage("count"),
shared_dicts = {}
}
}
-- 收集共享内存状态
local shared_dicts = {"cache", "sessions", "rate_limit"}
for _, dict_name in ipairs(shared_dicts) do
local dict = ngx.shared[dict_name]
if dict then
status.memory.shared_dicts[dict_name] = {
capacity = dict:capacity(),
free_space = dict:free_space()
}
end
end
ngx.header.content_type = "application/json"
ngx.say(cjson.encode(status))
}
}
4.2 健康检查
-- lua/health_check.lua - 健康检查模块
local health_check = {}
local cjson = require "cjson"
-- 健康检查项
local checks = {
database = {
name = "Database Connection",
timeout = 5,
critical = true
},
redis = {
name = "Redis Connection",
timeout = 3,
critical = true
},
external_api = {
name = "External API",
timeout = 10,
critical = false
},
disk_space = {
name = "Disk Space",
timeout = 1,
critical = true
}
}
-- 检查数据库连接
function health_check.check_database()
local mysql = require "resty.mysql"
local db = mysql:new()
db:set_timeout(5000)
local ok, err = db:connect({
host = "127.0.0.1",
port = 3306,
database = "app",
user = "app_user",
password = "app_pass"
})
if not ok then
return false, "Connection failed: " .. (err or "unknown error")
end
local res, err = db:query("SELECT 1 as health_check")
db:set_keepalive(10000, 100)
if not res then
return false, "Query failed: " .. (err or "unknown error")
end
return true, "OK"
end
-- 检查Redis连接
function health_check.check_redis()
local redis = require "resty.redis"
local red = redis:new()
red:set_timeout(3000)
local ok, err = red:connect("127.0.0.1", 6379)
if not ok then
return false, "Connection failed: " .. (err or "unknown error")
end
local res, err = red:ping()
red:set_keepalive(10000, 100)
if not res then
return false, "Ping failed: " .. (err or "unknown error")
end
return true, "OK"
end
-- 检查外部API
function health_check.check_external_api()
local http = require "resty.http"
local httpc = http.new()
httpc:set_timeout(10000)
local res, err = httpc:request_uri("https://api.example.com/health", {
method = "GET",
headers = {
["User-Agent"] = "OpenResty-HealthCheck/1.0"
}
})
if not res then
return false, "Request failed: " .. (err or "unknown error")
end
if res.status ~= 200 then
return false, "HTTP " .. res.status .. ": " .. (res.body or "")
end
return true, "OK"
end
-- 检查磁盘空间
function health_check.check_disk_space()
local handle = io.popen("df / | tail -1 | awk '{print $5}' | sed 's/%//'")
local usage = handle:read("*a")
handle:close()
local usage_percent = tonumber(usage)
if not usage_percent then
return false, "Unable to get disk usage"
end
if usage_percent > 90 then
return false, "Disk usage too high: " .. usage_percent .. "%"
elseif usage_percent > 80 then
return true, "Warning: Disk usage is " .. usage_percent .. "%"
end
return true, "Disk usage: " .. usage_percent .. "%"
end
-- 执行所有健康检查
function health_check.run_all_checks()
local results = {
timestamp = ngx.time(),
overall_status = "healthy",
checks = {}
}
local check_functions = {
database = health_check.check_database,
redis = health_check.check_redis,
external_api = health_check.check_external_api,
disk_space = health_check.check_disk_space
}
for check_name, check_config in pairs(checks) do
local check_func = check_functions[check_name]
if check_func then
local start_time = ngx.now()
local success, message = check_func()
local duration = ngx.now() - start_time
local check_result = {
name = check_config.name,
status = success and "healthy" or "unhealthy",
message = message,
duration_ms = duration * 1000,
critical = check_config.critical
}
results.checks[check_name] = check_result
-- 如果关键检查失败,整体状态为不健康
if not success and check_config.critical then
results.overall_status = "unhealthy"
end
end
end
return results
end
-- 简单健康检查(快速响应)
function health_check.simple_check()
return {
status = "healthy",
timestamp = ngx.time(),
version = "1.0.0"
}
end
return health_check
4.3 告警配置
# prometheus/alert_rules.yml - Prometheus告警规则
groups:
- name: openresty
rules:
# 服务可用性告警
- alert: OpenRestyDown
expr: up{job="openresty"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "OpenResty instance is down"
description: "OpenResty instance {{ $labels.instance }} has been down for more than 1 minute."
# 高错误率告警
- alert: HighErrorRate
expr: |
(
rate(nginx_http_requests_total{status=~"5.."}[5m]) /
rate(nginx_http_requests_total[5m])
) * 100 > 5
for: 5m
labels:
severity: warning
annotations:
summary: "High error rate detected"
description: "Error rate is {{ $value }}% for instance {{ $labels.instance }}"
# 高延迟告警
- alert: HighLatency
expr: |
histogram_quantile(0.95,
rate(nginx_http_request_duration_seconds_bucket[5m])
) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "High latency detected"
description: "95th percentile latency is {{ $value }}s for instance {{ $labels.instance }}"
# 内存使用告警
- alert: HighMemoryUsage
expr: nginx_lua_memory_usage_bytes / (1024 * 1024) > 512
for: 10m
labels:
severity: warning
annotations:
summary: "High Lua memory usage"
description: "Lua memory usage is {{ $value }}MB for instance {{ $labels.instance }}"
# 连接数告警
- alert: HighConnectionCount
expr: nginx_connections_active > 1000
for: 5m
labels:
severity: warning
annotations:
summary: "High connection count"
description: "Active connections: {{ $value }} for instance {{ $labels.instance }}"
# 数据库连接告警
- alert: DatabaseConnectionIssue
expr: database_connections{status="error"} > 0
for: 1m
labels:
severity: critical
annotations:
summary: "Database connection issues"
description: "Database connection errors detected for pool {{ $labels.pool }}"
# 缓存命中率告警
- alert: LowCacheHitRate
expr: |
(
rate(cache_operations_total{result="hit"}[10m]) /
rate(cache_operations_total{operation="get"}[10m])
) * 100 < 80
for: 15m
labels:
severity: warning
annotations:
summary: "Low cache hit rate"
description: "Cache hit rate is {{ $value }}% for instance {{ $labels.instance }}"
# 磁盘空间告警
- alert: HighDiskUsage
expr: |
(
(node_filesystem_size_bytes - node_filesystem_avail_bytes) /
node_filesystem_size_bytes
) * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "High disk usage"
description: "Disk usage is {{ $value }}% for instance {{ $labels.instance }}"
# alertmanager/alertmanager.yml - Alertmanager配置
global:
smtp_smarthost: 'smtp.example.com:587'
smtp_from: 'alerts@example.com'
smtp_auth_username: 'alerts@example.com'
smtp_auth_password: 'password'
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'default'
routes:
- match:
severity: critical
receiver: 'critical-alerts'
- match:
severity: warning
receiver: 'warning-alerts'
receivers:
- name: 'default'
email_configs:
- to: 'ops@example.com'
subject: '[{{ .Status | toUpper }}] {{ .GroupLabels.alertname }}'
body: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Labels: {{ range .Labels.SortedPairs }}{{ .Name }}={{ .Value }} {{ end }}
{{ end }}
- name: 'critical-alerts'
email_configs:
- to: 'ops@example.com,oncall@example.com'
subject: '[CRITICAL] {{ .GroupLabels.alertname }}'
body: |
CRITICAL ALERT!
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Instance: {{ .Labels.instance }}
Time: {{ .StartsAt }}
{{ end }}
slack_configs:
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
channel: '#alerts'
title: 'Critical Alert: {{ .GroupLabels.alertname }}'
text: |
{{ range .Alerts }}
{{ .Annotations.summary }}
{{ .Annotations.description }}
{{ end }}
- name: 'warning-alerts'
email_configs:
- to: 'ops@example.com'
subject: '[WARNING] {{ .GroupLabels.alertname }}'
body: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Instance: {{ .Labels.instance }}
{{ end }}
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'cluster', 'service']
通过本章的学习,你应该掌握了OpenResty在生产环境中的完整部署方案,包括架构设计、配置管理、服务管理、监控告警等各个方面。这些知识将帮助你构建稳定、可靠、可扩展的OpenResty生产环境。