学习目标
通过本章学习,您将掌握: - 索引的创建、删除和管理操作 - 映射(Mapping)的定义和配置 - 字段类型和属性设置 - 动态映射和静态映射 - 索引模板和组件模板 - 索引生命周期管理
1. 索引基础操作
1.1 创建索引
基本创建
# 创建简单索引
PUT /my_index
# 创建带设置的索引
PUT /my_index
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"refresh_interval": "30s"
}
}
高级创建配置
PUT /advanced_index
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"index": {
"refresh_interval": "30s",
"max_result_window": 20000,
"max_rescore_window": 10000,
"blocks": {
"read_only": false
}
},
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "stop"]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "my_custom_analyzer"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
1.2 查看索引信息
# 查看所有索引
GET /_cat/indices?v
# 查看特定索引信息
GET /my_index
# 查看索引设置
GET /my_index/_settings
# 查看索引映射
GET /my_index/_mapping
# 查看索引统计信息
GET /my_index/_stats
# 查看索引分片信息
GET /_cat/shards/my_index?v
1.3 修改索引设置
# 修改动态设置
PUT /my_index/_settings
{
"index": {
"refresh_interval": "60s",
"number_of_replicas": 2
}
}
# 关闭索引(允许修改静态设置)
POST /my_index/_close
# 修改静态设置
PUT /my_index/_settings
{
"index": {
"number_of_shards": 5
}
}
# 重新打开索引
POST /my_index/_open
1.4 删除索引
# 删除单个索引
DELETE /my_index
# 删除多个索引
DELETE /index1,index2,index3
# 使用通配符删除
DELETE /log-*
# 删除所有索引(危险操作)
DELETE /_all
1.5 索引别名
# 创建别名
POST /_aliases
{
"actions": [
{
"add": {
"index": "my_index_v1",
"alias": "my_index"
}
}
]
}
# 原子性切换别名
POST /_aliases
{
"actions": [
{
"remove": {
"index": "my_index_v1",
"alias": "my_index"
}
},
{
"add": {
"index": "my_index_v2",
"alias": "my_index"
}
}
]
}
# 带过滤器的别名
POST /_aliases
{
"actions": [
{
"add": {
"index": "logs",
"alias": "error_logs",
"filter": {
"term": {
"level": "error"
}
}
}
}
]
}
# 查看别名
GET /_aliases
GET /my_index/_alias
2. 映射(Mapping)详解
2.1 映射基础概念
映射定义了文档及其字段的存储和索引方式,包括: - 字段的数据类型 - 字段的索引方式 - 分析器的使用 - 字段的存储选项
2.2 字段数据类型
核心数据类型
PUT /type_examples
{
"mappings": {
"properties": {
# 文本类型
"title": {
"type": "text",
"analyzer": "standard"
},
"keyword_field": {
"type": "keyword"
},
# 数值类型
"age": {
"type": "integer"
},
"price": {
"type": "float"
},
"score": {
"type": "double"
},
"count": {
"type": "long"
},
"percentage": {
"type": "scaled_float",
"scaling_factor": 100
},
# 日期类型
"created_at": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
},
# 布尔类型
"is_active": {
"type": "boolean"
},
# 二进制类型
"binary_data": {
"type": "binary"
}
}
}
}
复杂数据类型
PUT /complex_types
{
"mappings": {
"properties": {
# 对象类型
"user": {
"type": "object",
"properties": {
"name": {
"type": "text"
},
"age": {
"type": "integer"
}
}
},
# 嵌套类型
"comments": {
"type": "nested",
"properties": {
"author": {
"type": "keyword"
},
"content": {
"type": "text"
},
"date": {
"type": "date"
}
}
},
# 数组类型(隐式支持)
"tags": {
"type": "keyword"
},
# 地理位置类型
"location": {
"type": "geo_point"
},
"area": {
"type": "geo_shape"
},
# IP地址类型
"ip_address": {
"type": "ip"
},
# 范围类型
"age_range": {
"type": "integer_range"
},
"date_range": {
"type": "date_range",
"format": "yyyy-MM-dd"
}
}
}
}
特殊字段类型
PUT /special_types
{
"mappings": {
"properties": {
# 连接类型(父子关系)
"join_field": {
"type": "join",
"relations": {
"post": "comment"
}
},
# 别名字段
"full_name": {
"type": "alias",
"path": "user.name"
},
# 搜索建议类型
"suggest": {
"type": "completion",
"analyzer": "simple",
"preserve_separators": true,
"preserve_position_increments": true,
"max_input_length": 50
},
# 令牌计数类型
"word_count": {
"type": "token_count",
"analyzer": "standard"
},
# 密集向量类型(用于机器学习)
"vector": {
"type": "dense_vector",
"dims": 128
}
}
}
}
2.3 字段属性配置
文本字段属性
PUT /text_properties
{
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "standard",
"search_analyzer": "standard",
"index": true,
"store": false,
"term_vector": "with_positions_offsets",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"suggest": {
"type": "completion"
}
}
},
"description": {
"type": "text",
"analyzer": "english",
"index_options": "positions",
"similarity": "BM25"
}
}
}
}
关键字字段属性
PUT /keyword_properties
{
"mappings": {
"properties": {
"status": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
},
"category": {
"type": "keyword",
"value": "default_category",
"doc_values": true
}
}
},
"settings": {
"analysis": {
"normalizer": {
"lowercase_normalizer": {
"type": "custom",
"filter": ["lowercase"]
}
}
}
}
}
数值字段属性
PUT /numeric_properties
{
"mappings": {
"properties": {
"price": {
"type": "float",
"coerce": true,
"ignore_malformed": false,
"doc_values": true,
"index": true
},
"quantity": {
"type": "integer",
"null_value": 0
}
}
}
}
2.4 动态映射
动态映射规则
PUT /dynamic_mapping
{
"mappings": {
"dynamic": "strict", # strict, true, false
"dynamic_templates": [
{
"strings_as_keywords": {
"match_mapping_type": "string",
"mapping": {
"type": "keyword"
}
}
},
{
"integers_as_floats": {
"match_mapping_type": "long",
"mapping": {
"type": "float"
}
}
}
],
"properties": {
"title": {
"type": "text"
}
}
}
}
复杂动态模板
PUT /advanced_dynamic
{
"mappings": {
"dynamic_templates": [
{
"message_field_template": {
"path_match": "message.*",
"mapping": {
"type": "text",
"analyzer": "english"
}
}
},
{
"string_fields_template": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
{
"date_fields_template": {
"match": "*_date",
"mapping": {
"type": "date",
"format": "yyyy-MM-dd"
}
}
}
]
}
}
2.5 映射更新
添加新字段
# 添加新字段到现有映射
PUT /my_index/_mapping
{
"properties": {
"new_field": {
"type": "keyword"
},
"nested_field": {
"type": "object",
"properties": {
"sub_field": {
"type": "text"
}
}
}
}
}
更新字段属性(有限制)
# 只能更新某些属性,如ignore_above
PUT /my_index/_mapping
{
"properties": {
"existing_keyword": {
"type": "keyword",
"ignore_above": 512
}
}
}
3. 索引模板
3.1 传统索引模板
# 创建索引模板
PUT /_template/log_template
{
"index_patterns": ["log-*"],
"order": 1,
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1,
"refresh_interval": "30s",
"index.lifecycle.name": "log_policy"
},
"mappings": {
"properties": {
"@timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"level": {
"type": "keyword"
},
"message": {
"type": "text",
"analyzer": "standard"
},
"host": {
"type": "keyword"
},
"service": {
"type": "keyword"
}
}
},
"aliases": {
"logs": {}
}
}
# 查看模板
GET /_template/log_template
# 删除模板
DELETE /_template/log_template
3.2 组件模板(Elasticsearch 7.8+)
创建组件模板
# 设置组件模板
PUT /_component_template/logs_settings
{
"template": {
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1,
"refresh_interval": "30s"
}
}
}
PUT /_component_template/logs_mappings
{
"template": {
"mappings": {
"properties": {
"@timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"level": {
"type": "keyword"
},
"message": {
"type": "text"
}
}
}
}
}
PUT /_component_template/logs_aliases
{
"template": {
"aliases": {
"logs": {}
}
}
}
创建索引模板使用组件模板
PUT /_index_template/logs_template
{
"index_patterns": ["logs-*"],
"priority": 200,
"composed_of": [
"logs_settings",
"logs_mappings",
"logs_aliases"
],
"template": {
"settings": {
"index.lifecycle.name": "logs_policy"
}
},
"data_stream": {
"timestamp_field": "@timestamp"
}
}
3.3 模板优先级和继承
# 基础模板(低优先级)
PUT /_index_template/base_template
{
"index_patterns": ["*"],
"priority": 100,
"template": {
"settings": {
"number_of_replicas": 1
},
"mappings": {
"properties": {
"created_at": {
"type": "date"
}
}
}
}
}
# 应用特定模板(高优先级)
PUT /_index_template/app_template
{
"index_patterns": ["app-*"],
"priority": 200,
"template": {
"settings": {
"number_of_shards": 3,
"refresh_interval": "10s"
},
"mappings": {
"properties": {
"app_id": {
"type": "keyword"
},
"user_id": {
"type": "keyword"
}
}
}
}
}
4. 索引生命周期管理(ILM)
4.1 创建ILM策略
PUT /_ilm/policy/logs_policy
{
"policy": {
"phases": {
"hot": {
"min_age": "0ms",
"actions": {
"rollover": {
"max_size": "10GB",
"max_age": "7d",
"max_docs": 10000000
},
"set_priority": {
"priority": 100
}
}
},
"warm": {
"min_age": "7d",
"actions": {
"allocate": {
"number_of_replicas": 0,
"include": {},
"exclude": {},
"require": {
"data": "warm"
}
},
"forcemerge": {
"max_num_segments": 1
},
"set_priority": {
"priority": 50
}
}
},
"cold": {
"min_age": "30d",
"actions": {
"allocate": {
"number_of_replicas": 0,
"require": {
"data": "cold"
}
},
"set_priority": {
"priority": 0
}
}
},
"delete": {
"min_age": "90d",
"actions": {
"delete": {}
}
}
}
}
}
4.2 应用ILM策略
# 在索引模板中应用ILM策略
PUT /_index_template/logs_with_ilm
{
"index_patterns": ["logs-*"],
"template": {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1,
"index.lifecycle.name": "logs_policy",
"index.lifecycle.rollover_alias": "logs"
},
"mappings": {
"properties": {
"@timestamp": {
"type": "date"
},
"message": {
"type": "text"
}
}
}
}
}
# 创建初始索引
PUT /logs-000001
{
"aliases": {
"logs": {
"is_write_index": true
}
}
}
4.3 监控ILM状态
# 查看ILM策略
GET /_ilm/policy/logs_policy
# 查看索引ILM状态
GET /logs-*/_ilm/explain
# 手动触发ILM操作
POST /logs-*/_ilm/retry
# 移除ILM策略
POST /logs-*/_ilm/remove
5. 实践案例
5.1 电商产品索引设计
PUT /products
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"analysis": {
"analyzer": {
"product_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"stop",
"synonym_filter"
]
}
},
"filter": {
"synonym_filter": {
"type": "synonym",
"synonyms": [
"phone,mobile,smartphone",
"laptop,notebook,computer"
]
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "keyword"
},
"name": {
"type": "text",
"analyzer": "product_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"suggest": {
"type": "completion",
"analyzer": "simple"
}
}
},
"description": {
"type": "text",
"analyzer": "product_analyzer"
},
"category": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"level": {
"type": "integer"
}
}
},
"price": {
"type": "scaled_float",
"scaling_factor": 100
},
"currency": {
"type": "keyword"
},
"brand": {
"type": "keyword"
},
"tags": {
"type": "keyword"
},
"attributes": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"inventory": {
"type": "object",
"properties": {
"quantity": {
"type": "integer"
},
"warehouse": {
"type": "keyword"
}
}
},
"rating": {
"type": "object",
"properties": {
"average": {
"type": "float"
},
"count": {
"type": "integer"
}
}
},
"images": {
"type": "keyword",
"index": false
},
"status": {
"type": "keyword"
},
"created_at": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"updated_at": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
}
}
}
5.2 日志分析索引设计
# 组件模板 - 通用设置
PUT /_component_template/logs_base_settings
{
"template": {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1,
"refresh_interval": "30s",
"index.codec": "best_compression"
}
}
}
# 组件模板 - 通用映射
PUT /_component_template/logs_base_mappings
{
"template": {
"mappings": {
"properties": {
"@timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"level": {
"type": "keyword"
},
"message": {
"type": "text",
"analyzer": "standard",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"host": {
"type": "object",
"properties": {
"name": {
"type": "keyword"
},
"ip": {
"type": "ip"
}
}
},
"service": {
"type": "object",
"properties": {
"name": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
},
"trace": {
"type": "object",
"properties": {
"id": {
"type": "keyword"
},
"span_id": {
"type": "keyword"
}
}
},
"user": {
"type": "object",
"properties": {
"id": {
"type": "keyword"
},
"name": {
"type": "keyword"
}
}
},
"http": {
"type": "object",
"properties": {
"method": {
"type": "keyword"
},
"status_code": {
"type": "integer"
},
"url": {
"type": "keyword",
"ignore_above": 1024
},
"response_time": {
"type": "float"
}
}
},
"error": {
"type": "object",
"properties": {
"type": {
"type": "keyword"
},
"message": {
"type": "text"
},
"stack_trace": {
"type": "text",
"index": false
}
}
}
}
}
}
}
# 应用日志模板
PUT /_index_template/app_logs_template
{
"index_patterns": ["app-logs-*"],
"priority": 200,
"composed_of": [
"logs_base_settings",
"logs_base_mappings"
],
"template": {
"settings": {
"index.lifecycle.name": "app_logs_policy"
},
"aliases": {
"app-logs": {}
}
}
}
5.3 时间序列数据索引
PUT /_component_template/metrics_settings
{
"template": {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"refresh_interval": "30s",
"index.codec": "best_compression",
"index.sort.field": "@timestamp",
"index.sort.order": "desc"
}
}
}
PUT /_component_template/metrics_mappings
{
"template": {
"mappings": {
"properties": {
"@timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"metric": {
"type": "object",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "double"
},
"unit": {
"type": "keyword"
}
}
},
"labels": {
"type": "flattened"
},
"host": {
"type": "keyword"
},
"datacenter": {
"type": "keyword"
}
}
}
}
}
PUT /_index_template/metrics_template
{
"index_patterns": ["metrics-*"],
"priority": 200,
"composed_of": [
"metrics_settings",
"metrics_mappings"
],
"template": {
"settings": {
"index.lifecycle.name": "metrics_policy"
}
},
"data_stream": {
"timestamp_field": "@timestamp"
}
}
6. 最佳实践
6.1 映射设计原则
- 明确字段类型:避免依赖动态映射
- 合理使用嵌套类型:只在需要独立查询对象数组时使用
- 控制字段数量:避免映射爆炸,限制字段数量
- 优化存储:不需要搜索的字段设置
"index": false
- 使用多字段:为不同用途创建多个字段
6.2 性能优化建议
# 优化映射示例
PUT /optimized_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1,
"refresh_interval": "30s",
"index.codec": "best_compression",
"index.mapping.total_fields.limit": 2000,
"index.mapping.depth.limit": 20,
"index.mapping.nested_fields.limit": 50
},
"mappings": {
"dynamic": "strict",
"properties": {
"id": {
"type": "keyword",
"doc_values": false # 不需要聚合和排序
},
"title": {
"type": "text",
"store": true, # 需要高亮显示
"term_vector": "with_positions_offsets"
},
"content": {
"type": "text",
"index_options": "positions", # 不需要偏移量
"norms": false # 不需要评分标准化
},
"metadata": {
"type": "object",
"enabled": false # 只存储,不索引
},
"tags": {
"type": "keyword",
"eager_global_ordinals": true # 频繁聚合的字段
}
}
}
}
6.3 索引管理脚本
#!/usr/bin/env python3
# index_management.py
import requests
import json
from datetime import datetime, timedelta
import logging
class IndexManager:
def __init__(self, es_host="localhost:9200", username=None, password=None):
self.base_url = f"http://{es_host}"
self.auth = (username, password) if username and password else None
self.logger = self._setup_logger()
def _setup_logger(self):
logging.basicConfig(level=logging.INFO)
return logging.getLogger(__name__)
def create_index_with_mapping(self, index_name, mapping_config):
"""创建带映射的索引"""
try:
response = requests.put(
f"{self.base_url}/{index_name}",
json=mapping_config,
auth=self.auth
)
if response.status_code == 200:
self.logger.info(f"Index {index_name} created successfully")
return True
else:
self.logger.error(f"Failed to create index: {response.text}")
return False
except Exception as e:
self.logger.error(f"Error creating index: {e}")
return False
def update_mapping(self, index_name, new_fields):
"""更新索引映射"""
try:
response = requests.put(
f"{self.base_url}/{index_name}/_mapping",
json={"properties": new_fields},
auth=self.auth
)
if response.status_code == 200:
self.logger.info(f"Mapping updated for {index_name}")
return True
else:
self.logger.error(f"Failed to update mapping: {response.text}")
return False
except Exception as e:
self.logger.error(f"Error updating mapping: {e}")
return False
def create_index_template(self, template_name, template_config):
"""创建索引模板"""
try:
response = requests.put(
f"{self.base_url}/_index_template/{template_name}",
json=template_config,
auth=self.auth
)
if response.status_code == 200:
self.logger.info(f"Template {template_name} created successfully")
return True
else:
self.logger.error(f"Failed to create template: {response.text}")
return False
except Exception as e:
self.logger.error(f"Error creating template: {e}")
return False
def get_index_info(self, index_pattern="*"):
"""获取索引信息"""
try:
response = requests.get(
f"{self.base_url}/_cat/indices/{index_pattern}?v&format=json",
auth=self.auth
)
if response.status_code == 200:
return response.json()
else:
self.logger.error(f"Failed to get index info: {response.text}")
return None
except Exception as e:
self.logger.error(f"Error getting index info: {e}")
return None
def delete_old_indices(self, pattern, days_old=30):
"""删除旧索引"""
indices = self.get_index_info(pattern)
if not indices:
return
cutoff_date = datetime.now() - timedelta(days=days_old)
for index in indices:
index_name = index['index']
creation_date = datetime.fromtimestamp(
int(index['creation.date.string']) / 1000
)
if creation_date < cutoff_date:
try:
response = requests.delete(
f"{self.base_url}/{index_name}",
auth=self.auth
)
if response.status_code == 200:
self.logger.info(f"Deleted old index: {index_name}")
else:
self.logger.error(f"Failed to delete {index_name}: {response.text}")
except Exception as e:
self.logger.error(f"Error deleting {index_name}: {e}")
def reindex_data(self, source_index, dest_index, query=None):
"""重新索引数据"""
reindex_config = {
"source": {
"index": source_index
},
"dest": {
"index": dest_index
}
}
if query:
reindex_config["source"]["query"] = query
try:
response = requests.post(
f"{self.base_url}/_reindex",
json=reindex_config,
auth=self.auth
)
if response.status_code == 200:
task_id = response.json().get('task')
self.logger.info(f"Reindex started, task ID: {task_id}")
return task_id
else:
self.logger.error(f"Failed to start reindex: {response.text}")
return None
except Exception as e:
self.logger.error(f"Error starting reindex: {e}")
return None
# 使用示例
if __name__ == "__main__":
manager = IndexManager()
# 创建产品索引
product_mapping = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"name": {"type": "text"},
"price": {"type": "float"},
"category": {"type": "keyword"}
}
}
}
manager.create_index_with_mapping("products", product_mapping)
# 创建日志模板
log_template = {
"index_patterns": ["logs-*"],
"template": {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"@timestamp": {"type": "date"},
"level": {"type": "keyword"},
"message": {"type": "text"}
}
}
}
}
manager.create_index_template("logs_template", log_template)
本章总结
本章深入介绍了Elasticsearch的索引管理和映射配置:
- 索引操作:掌握了索引的创建、查看、修改和删除操作
- 映射配置:学习了各种字段类型和属性设置
- 动态映射:了解了动态映射规则和模板配置
- 索引模板:掌握了传统模板和组件模板的使用
- 生命周期管理:学习了ILM策略的配置和应用
- 实践案例:通过实际场景学习了索引设计最佳实践
下一章我们将学习Elasticsearch的搜索查询语法,深入了解如何高效地检索和分析数据。
练习题
- 设计一个博客系统的索引映射,包含文章、作者、标签等信息
- 创建一个日志索引模板,支持自动轮转和生命周期管理
- 实现一个索引管理脚本,自动清理过期索引
- 配置一个电商搜索索引,支持多语言和自动补全功能