目录
索引基础概念
索引结构
graph TB
subgraph "Elasticsearch集群"
subgraph "索引 (Index)"
subgraph "分片 (Shard)"
A[主分片 0]
B[副本分片 0]
end
subgraph "分片 (Shard)"
C[主分片 1]
D[副本分片 1]
end
end
subgraph "文档结构"
E[文档 ID]
F[字段 1]
G[字段 2]
H[字段 N]
end
end
A -.-> B
C -.-> D
E --> F
F --> G
G --> H
核心概念
索引 (Index): - 类似于关系数据库中的数据库 - 包含具有相似特征的文档集合 - 由一个或多个分片组成
分片 (Shard): - 索引的水平分割单位 - 每个分片是一个独立的Lucene索引 - 支持主分片和副本分片
映射 (Mapping): - 定义文档字段的数据类型和索引方式 - 类似于关系数据库中的表结构 - 支持动态和静态映射
索引命名规范
# 推荐的索引命名模式
<数据类型>-<环境>-<日期>
# 示例
logs-production-2024.01.15
metrics-staging-2024.01.15
events-development-2024.01.15
# 时间序列数据
apache-logs-2024.01.15
nginx-access-2024.01.15
app-errors-2024.01.15
索引模板管理
1. 索引模板基础
创建索引模板:
PUT _index_template/logs-template
{
"index_patterns": ["logs-*"],
"priority": 100,
"template": {
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1,
"index.refresh_interval": "30s",
"index.codec": "best_compression",
"index.mapping.total_fields.limit": 2000,
"index.max_result_window": 50000
},
"mappings": {
"properties": {
"@timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"level": {
"type": "keyword",
"ignore_above": 256
},
"message": {
"type": "text",
"analyzer": "standard",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 512
}
}
},
"host": {
"properties": {
"name": {
"type": "keyword"
},
"ip": {
"type": "ip"
}
}
},
"tags": {
"type": "keyword"
},
"fields": {
"type": "object",
"dynamic": true
}
}
},
"aliases": {
"logs-current": {},
"logs-all": {}
}
},
"composed_of": ["logs-mappings", "logs-settings"],
"version": 1,
"_meta": {
"description": "Template for application logs",
"created_by": "elk-admin",
"created_at": "2024-01-15"
}
}
2. 组件模板
映射组件模板:
PUT _component_template/logs-mappings
{
"template": {
"mappings": {
"properties": {
"@timestamp": {
"type": "date"
},
"log_level": {
"type": "keyword"
},
"source_ip": {
"type": "ip"
},
"user_agent": {
"type": "text",
"analyzer": "standard",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"response_time": {
"type": "float"
},
"status_code": {
"type": "integer"
},
"geoip": {
"properties": {
"location": {
"type": "geo_point"
},
"country_name": {
"type": "keyword"
},
"city_name": {
"type": "keyword"
}
}
}
}
}
},
"version": 1,
"_meta": {
"description": "Common mappings for log data"
}
}
设置组件模板:
PUT _component_template/logs-settings
{
"template": {
"settings": {
"index.lifecycle.name": "logs-policy",
"index.lifecycle.rollover_alias": "logs-write",
"index.refresh_interval": "30s",
"index.number_of_shards": 2,
"index.number_of_replicas": 1,
"index.codec": "best_compression",
"index.sort.field": "@timestamp",
"index.sort.order": "desc",
"analysis": {
"analyzer": {
"custom_log_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"stop",
"snowball"
]
}
}
}
}
},
"version": 1
}
3. 动态模板
PUT _index_template/dynamic-logs-template
{
"index_patterns": ["dynamic-logs-*"],
"template": {
"mappings": {
"dynamic_templates": [
{
"strings_as_keywords": {
"match_mapping_type": "string",
"match": "*_id",
"mapping": {
"type": "keyword",
"ignore_above": 256
}
}
},
{
"strings_as_text": {
"match_mapping_type": "string",
"match": "*_text",
"mapping": {
"type": "text",
"analyzer": "standard",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
{
"longs_as_integers": {
"match_mapping_type": "long",
"mapping": {
"type": "integer"
}
}
},
{
"geo_point_fields": {
"match": "*_location",
"mapping": {
"type": "geo_point"
}
}
}
]
}
}
}
索引生命周期管理
1. ILM策略配置
PUT _ilm/policy/logs-policy
{
"policy": {
"phases": {
"hot": {
"min_age": "0ms",
"actions": {
"rollover": {
"max_size": "10gb",
"max_age": "1d",
"max_docs": 10000000
},
"set_priority": {
"priority": 100
}
}
},
"warm": {
"min_age": "1d",
"actions": {
"allocate": {
"number_of_replicas": 0,
"include": {
"box_type": "warm"
}
},
"forcemerge": {
"max_num_segments": 1
},
"set_priority": {
"priority": 50
}
}
},
"cold": {
"min_age": "7d",
"actions": {
"allocate": {
"number_of_replicas": 0,
"include": {
"box_type": "cold"
}
},
"set_priority": {
"priority": 0
}
}
},
"frozen": {
"min_age": "30d",
"actions": {
"freeze": {}
}
},
"delete": {
"min_age": "90d",
"actions": {
"delete": {}
}
}
}
}
}
2. 索引别名管理
# 创建写入别名
PUT logs-write
{
"aliases": {
"logs-write": {
"is_write_index": true
},
"logs-all": {}
}
}
# 滚动索引
POST logs-write/_rollover
{
"conditions": {
"max_age": "1d",
"max_size": "10gb",
"max_docs": 10000000
}
}
# 查看别名
GET _alias/logs-*
# 更新别名
POST _aliases
{
"actions": [
{
"remove": {
"index": "logs-2024.01.14",
"alias": "logs-current"
}
},
{
"add": {
"index": "logs-2024.01.15",
"alias": "logs-current"
}
}
]
}
3. 数据流管理
PUT _data_stream/logs-stream
{
"timestamp_field": {
"name": "@timestamp"
}
}
# 查看数据流
GET _data_stream/logs-stream
# 删除数据流
DELETE _data_stream/logs-stream
映射配置优化
1. 字段类型优化
PUT optimized-logs
{
"mappings": {
"properties": {
"@timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"level": {
"type": "keyword",
"ignore_above": 32
},
"message": {
"type": "text",
"analyzer": "standard",
"store": false,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 512,
"store": false
}
}
},
"response_time": {
"type": "scaled_float",
"scaling_factor": 1000
},
"status_code": {
"type": "short"
},
"user_id": {
"type": "keyword",
"ignore_above": 64,
"doc_values": false
},
"session_id": {
"type": "keyword",
"index": false,
"doc_values": false
},
"metadata": {
"type": "object",
"enabled": false
},
"tags": {
"type": "keyword",
"ignore_above": 128
},
"location": {
"type": "geo_point"
},
"ip_address": {
"type": "ip"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
}
}
},
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1,
"refresh_interval": "30s",
"max_result_window": 50000,
"mapping": {
"total_fields": {
"limit": 2000
},
"depth": {
"limit": 20
},
"nested_fields": {
"limit": 100
}
}
}
}
2. 自定义分析器
PUT logs-with-analyzer
{
"settings": {
"analysis": {
"char_filter": {
"quotes_filter": {
"type": "mapping",
"mappings": [
"\u0091=>\u0027",
"\u0092=>\u0027",
"\u0093=>\u0022",
"\u0094=>\u0022"
]
}
},
"tokenizer": {
"custom_tokenizer": {
"type": "pattern",
"pattern": "[\\W&&[^-]]+"
}
},
"filter": {
"custom_stop": {
"type": "stop",
"stopwords": ["the", "and", "or", "but"]
},
"custom_stemmer": {
"type": "stemmer",
"language": "english"
}
},
"analyzer": {
"log_analyzer": {
"type": "custom",
"char_filter": ["quotes_filter"],
"tokenizer": "custom_tokenizer",
"filter": [
"lowercase",
"custom_stop",
"custom_stemmer"
]
},
"path_analyzer": {
"type": "custom",
"tokenizer": "path_hierarchy",
"filter": ["lowercase"]
}
}
}
},
"mappings": {
"properties": {
"message": {
"type": "text",
"analyzer": "log_analyzer",
"search_analyzer": "standard"
},
"file_path": {
"type": "text",
"analyzer": "path_analyzer"
}
}
}
}
查询语法详解
1. 基础查询
匹配查询:
GET logs-*/_search
{
"query": {
"match": {
"message": "error database connection"
}
}
}
# 多字段匹配
GET logs-*/_search
{
"query": {
"multi_match": {
"query": "error",
"fields": ["message", "error_message", "description"],
"type": "best_fields",
"fuzziness": "AUTO"
}
}
}
精确匹配:
GET logs-*/_search
{
"query": {
"term": {
"level.keyword": "ERROR"
}
}
}
# 多值匹配
GET logs-*/_search
{
"query": {
"terms": {
"status_code": [400, 404, 500]
}
}
}
范围查询:
GET logs-*/_search
{
"query": {
"range": {
"@timestamp": {
"gte": "2024-01-15T00:00:00",
"lte": "2024-01-15T23:59:59",
"time_zone": "+08:00"
}
}
}
}
# 数值范围
GET logs-*/_search
{
"query": {
"range": {
"response_time": {
"gte": 100,
"lte": 1000
}
}
}
}
2. 复合查询
布尔查询:
GET logs-*/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"message": "error"
}
},
{
"range": {
"@timestamp": {
"gte": "now-1h"
}
}
}
],
"must_not": [
{
"term": {
"level.keyword": "DEBUG"
}
}
],
"should": [
{
"term": {
"service.keyword": "web-api"
}
},
{
"term": {
"service.keyword": "auth-service"
}
}
],
"filter": [
{
"term": {
"environment.keyword": "production"
}
}
],
"minimum_should_match": 1
}
}
}
嵌套查询:
GET logs-*/_search
{
"query": {
"nested": {
"path": "errors",
"query": {
"bool": {
"must": [
{
"match": {
"errors.type": "NullPointerException"
}
},
{
"range": {
"errors.count": {
"gte": 5
}
}
}
]
}
}
}
}
}
3. 高级查询
通配符和正则表达式:
GET logs-*/_search
{
"query": {
"wildcard": {
"file_path.keyword": "/var/log/app*.log"
}
}
}
# 正则表达式
GET logs-*/_search
{
"query": {
"regexp": {
"user_agent.keyword": ".*Chrome.*"
}
}
}
模糊查询:
GET logs-*/_search
{
"query": {
"fuzzy": {
"message": {
"value": "databse",
"fuzziness": "AUTO",
"max_expansions": 50
}
}
}
}
地理位置查询:
GET logs-*/_search
{
"query": {
"geo_distance": {
"distance": "100km",
"geoip.location": {
"lat": 40.7128,
"lon": -74.0060
}
}
}
}
# 地理边界查询
GET logs-*/_search
{
"query": {
"geo_bounding_box": {
"geoip.location": {
"top_left": {
"lat": 42.0,
"lon": -74.0
},
"bottom_right": {
"lat": 40.0,
"lon": -72.0
}
}
}
}
}
聚合分析
1. 指标聚合
GET logs-*/_search
{
"size": 0,
"aggs": {
"avg_response_time": {
"avg": {
"field": "response_time"
}
},
"max_response_time": {
"max": {
"field": "response_time"
}
},
"response_time_stats": {
"stats": {
"field": "response_time"
}
},
"response_time_percentiles": {
"percentiles": {
"field": "response_time",
"percents": [50, 90, 95, 99]
}
},
"unique_users": {
"cardinality": {
"field": "user_id.keyword"
}
}
}
}
2. 桶聚合
GET logs-*/_search
{
"size": 0,
"aggs": {
"status_codes": {
"terms": {
"field": "status_code",
"size": 10,
"order": {
"_count": "desc"
}
},
"aggs": {
"avg_response_time": {
"avg": {
"field": "response_time"
}
}
}
},
"time_histogram": {
"date_histogram": {
"field": "@timestamp",
"calendar_interval": "1h",
"time_zone": "+08:00",
"min_doc_count": 1
},
"aggs": {
"error_count": {
"filter": {
"range": {
"status_code": {
"gte": 400
}
}
}
}
}
},
"response_time_ranges": {
"range": {
"field": "response_time",
"ranges": [
{"to": 100},
{"from": 100, "to": 500},
{"from": 500, "to": 1000},
{"from": 1000}
]
}
}
}
}
3. 管道聚合
GET logs-*/_search
{
"size": 0,
"aggs": {
"daily_requests": {
"date_histogram": {
"field": "@timestamp",
"calendar_interval": "1d"
},
"aggs": {
"total_requests": {
"value_count": {
"field": "@timestamp"
}
},
"requests_derivative": {
"derivative": {
"buckets_path": "total_requests"
}
},
"requests_moving_avg": {
"moving_avg": {
"buckets_path": "total_requests",
"window": 7,
"model": "linear"
}
}
}
},
"max_daily_requests": {
"max_bucket": {
"buckets_path": "daily_requests>total_requests"
}
}
}
}
性能优化策略
1. 查询优化
使用过滤器而非查询:
# 优化前
GET logs-*/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"message": "error"
}
},
{
"term": {
"level.keyword": "ERROR"
}
}
]
}
}
}
# 优化后
GET logs-*/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"message": "error"
}
}
],
"filter": [
{
"term": {
"level.keyword": "ERROR"
}
}
]
}
}
}
限制返回字段:
GET logs-*/_search
{
"_source": ["@timestamp", "level", "message", "host.name"],
"query": {
"match": {
"message": "error"
}
}
}
# 或者使用stored_fields
GET logs-*/_search
{
"stored_fields": ["@timestamp", "level", "message"],
"query": {
"match": {
"message": "error"
}
}
}
2. 索引优化
分片策略:
# 计算分片数量
# 分片大小: 10-50GB
# 每个节点分片数: 不超过20个/GB堆内存
# 示例: 100GB数据,3个节点,每个节点8GB堆内存
# 推荐分片数: 100GB / 30GB = 3-4个主分片
# 每个节点最大分片数: 8GB * 20 = 160个
索引设置优化:
PUT optimized-index
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"refresh_interval": "30s",
"index.codec": "best_compression",
"index.compound_format": false,
"index.max_result_window": 50000,
"index.max_rescore_window": 10000,
"index.sort.field": "@timestamp",
"index.sort.order": "desc",
"index.routing.allocation.total_shards_per_node": 2,
"index.merge.policy.max_merged_segment": "5gb",
"index.translog.flush_threshold_size": "1gb",
"index.translog.sync_interval": "30s"
}
}
3. 搜索优化技巧
使用搜索模板:
PUT _scripts/error-logs-template
{
"script": {
"lang": "mustache",
"source": {
"query": {
"bool": {
"must": [
{
"match": {
"message": "{{query_string}}"
}
}
],
"filter": [
{
"term": {
"level.keyword": "{{log_level}}"
}
},
{
"range": {
"@timestamp": {
"gte": "{{start_time}}",
"lte": "{{end_time}}"
}
}
}
]
}
},
"size": "{{size|100}}",
"sort": [
{
"@timestamp": {
"order": "desc"
}
}
]
}
}
}
# 使用模板
GET logs-*/_search/template
{
"id": "error-logs-template",
"params": {
"query_string": "database connection",
"log_level": "ERROR",
"start_time": "now-1h",
"end_time": "now",
"size": 50
}
}
异步搜索:
POST logs-*/_async_search
{
"size": 0,
"aggs": {
"daily_stats": {
"date_histogram": {
"field": "@timestamp",
"calendar_interval": "1d"
},
"aggs": {
"avg_response_time": {
"avg": {
"field": "response_time"
}
}
}
}
}
}
# 获取异步搜索结果
GET _async_search/FmRldE8zREeRTU59b1FiZkEBYWx0ZXJuYXRpdmVfaWQ
监控与维护
1. 集群监控
# 集群健康状态
GET _cluster/health?pretty
# 节点信息
GET _nodes/stats?pretty
# 索引统计
GET _cat/indices?v&s=store.size:desc
# 分片信息
GET _cat/shards?v&s=store:desc
# 热线程
GET _nodes/hot_threads
# 待处理任务
GET _cluster/pending_tasks
2. 性能监控脚本
#!/bin/bash
# es-monitor.sh
ES_HOST="localhost:9200"
THRESHOLD_HEAP=80
THRESHOLD_DISK=85
# 检查集群健康状态
check_cluster_health() {
health=$(curl -s "$ES_HOST/_cluster/health" | jq -r '.status')
echo "Cluster Health: $health"
if [ "$health" != "green" ]; then
echo "WARNING: Cluster health is $health"
return 1
fi
}
# 检查节点资源使用
check_node_resources() {
echo "Node Resource Usage:"
curl -s "$ES_HOST/_nodes/stats" | jq -r '
.nodes | to_entries[] |
"Node: \(.key)" +
"\n Heap Used: \(.value.jvm.mem.heap_used_percent)%" +
"\n Disk Used: \(.value.fs.total.available_in_bytes / .value.fs.total.total_in_bytes * 100 | floor)%" +
"\n CPU Usage: \(.value.os.cpu.percent)%\n"
'
}
# 检查慢查询
check_slow_queries() {
echo "Slow Queries (last 1 hour):"
curl -s "$ES_HOST/_nodes/stats" | jq -r '
.nodes | to_entries[] |
"Node: \(.key)" +
"\n Search Query Time: \(.value.indices.search.query_time_in_millis)ms" +
"\n Search Query Count: \(.value.indices.search.query_total)" +
"\n Indexing Time: \(.value.indices.indexing.index_time_in_millis)ms\n"
'
}
# 主函数
main() {
echo "Elasticsearch Monitoring - $(date)"
echo "===================================="
check_cluster_health
echo
check_node_resources
echo
check_slow_queries
echo "===================================="
}
main
3. 索引维护
# 强制合并索引
POST logs-2024.01.14/_forcemerge?max_num_segments=1
# 刷新索引
POST logs-*/_refresh
# 清理缓存
POST logs-*/_cache/clear
# 重新分配分片
POST _cluster/reroute
{
"commands": [
{
"move": {
"index": "logs-2024.01.14",
"shard": 0,
"from_node": "node1",
"to_node": "node2"
}
}
]
}
# 索引恢复
GET _recovery?active_only=true
# 分片分配解释
GET _cluster/allocation/explain
{
"index": "logs-2024.01.14",
"shard": 0,
"primary": true
}
总结
本章详细介绍了Elasticsearch的索引管理与查询优化,包括:
核心要点
- 索引管理: 模板配置、生命周期管理、别名管理
- 映射优化: 字段类型选择、分析器配置、动态模板
- 查询语法: 基础查询、复合查询、高级查询技巧
- 聚合分析: 指标聚合、桶聚合、管道聚合
- 性能优化: 查询优化、索引优化、搜索优化
- 监控维护: 集群监控、性能监控、索引维护
最佳实践
- 合理设计索引模板和映射结构
- 使用ILM管理索引生命周期
- 优化查询语句,使用过滤器提高性能
- 定期监控集群健康状态和性能指标
- 建立索引维护和优化的定期任务
- 根据数据特征选择合适的分片策略
下一章我们将学习Kibana可视化与仪表板设计。