4.1 索引基础概念

索引是数据库中用于快速查找数据的数据结构。它类似于书籍的目录,可以大大提高查询效率,但也会占用额外的存储空间并影响写入性能。

4.1.1 索引的工作原理

class IndexConcepts:
    def __init__(self):
        self.index_benefits = {
            "查询优化": "大幅提升SELECT查询速度",
            "排序优化": "加速ORDER BY操作",
            "分组优化": "提升GROUP BY性能",
            "连接优化": "优化JOIN操作",
            "唯一性约束": "确保数据唯一性"
        }
        
        self.index_costs = {
            "存储开销": "索引需要额外的磁盘空间",
            "维护开销": "INSERT/UPDATE/DELETE时需要维护索引",
            "内存消耗": "索引会占用内存缓冲池"
        }
    
    def explain_index_structure(self):
        """解释索引结构"""
        print("索引结构说明:")
        print("=" * 50)
        
        structure_info = """
MySQL主要使用B+树索引结构:

1. B+树特点:
   - 所有数据都存储在叶子节点
   - 叶子节点通过指针连接,支持范围查询
   - 非叶子节点只存储键值,不存储数据
   - 树的高度较低,减少磁盘I/O次数

2. 索引查找过程:
   - 从根节点开始
   - 根据键值比较,选择子节点
   - 重复过程直到叶子节点
   - 在叶子节点中找到目标数据

3. 聚簇索引 vs 非聚簇索引:
   - 聚簇索引:数据行按索引顺序存储(InnoDB主键索引)
   - 非聚簇索引:索引和数据分开存储(二级索引)
"""
        
        print(structure_info)
    
    def index_benefits_costs(self):
        """索引的优缺点"""
        print("\n索引的优点:")
        print("=" * 30)
        for benefit, description in self.index_benefits.items():
            print(f"{benefit}: {description}")
        
        print("\n索引的代价:")
        print("=" * 30)
        for cost, description in self.index_costs.items():
            print(f"{cost}: {description}")
    
    def when_to_use_indexes(self):
        """何时使用索引"""
        scenarios = {
            "适合创建索引": [
                "经常出现在WHERE子句中的列",
                "经常用于JOIN连接的列",
                "经常用于ORDER BY的列",
                "经常用于GROUP BY的列",
                "需要唯一性约束的列",
                "选择性高的列(不同值多)"
            ],
            "不适合创建索引": [
                "很少查询的列",
                "频繁更新的列",
                "选择性低的列(如性别)",
                "小表(几百行以内)",
                "数据类型为TEXT/BLOB的列"
            ]
        }
        
        print("\n索引使用场景:")
        print("=" * 30)
        
        for scenario_type, conditions in scenarios.items():
            print(f"\n{scenario_type}:")
            for condition in conditions:
                print(f"  - {condition}")

# 索引概念演示
index_concepts = IndexConcepts()
index_concepts.explain_index_structure()
index_concepts.index_benefits_costs()
index_concepts.when_to_use_indexes()

4.1.2 索引类型详解

class IndexTypes:
    def __init__(self):
        self.index_types = {
            "按数据结构分类": {
                "B+树索引": {
                    "描述": "MySQL默认索引类型,适用于大部分场景",
                    "适用场景": "等值查询、范围查询、排序",
                    "存储引擎": "InnoDB、MyISAM"
                },
                "哈希索引": {
                    "描述": "基于哈希表的索引,查找速度极快",
                    "适用场景": "等值查询",
                    "存储引擎": "Memory、NDB",
                    "限制": "不支持范围查询和排序"
                },
                "全文索引": {
                    "描述": "用于全文搜索的特殊索引",
                    "适用场景": "文本搜索、关键词匹配",
                    "存储引擎": "InnoDB、MyISAM"
                },
                "空间索引": {
                    "描述": "用于地理空间数据的索引",
                    "适用场景": "GIS应用、地理位置查询",
                    "存储引擎": "InnoDB、MyISAM"
                }
            },
            "按逻辑分类": {
                "主键索引": {
                    "描述": "主键自动创建的唯一索引",
                    "特点": "唯一、非空、聚簇索引"
                },
                "唯一索引": {
                    "描述": "保证列值唯一性的索引",
                    "特点": "唯一、可为空"
                },
                "普通索引": {
                    "描述": "最基本的索引类型",
                    "特点": "无唯一性限制"
                },
                "复合索引": {
                    "描述": "多个列组成的索引",
                    "特点": "遵循最左前缀原则"
                },
                "前缀索引": {
                    "描述": "对列的前几个字符建立索引",
                    "特点": "节省空间,适用于长字符串"
                }
            }
        }
    
    def show_index_types(self):
        """显示索引类型"""
        print("MySQL索引类型详解:")
        print("=" * 60)
        
        for category, types in self.index_types.items():
            print(f"\n{category}:")
            for index_type, info in types.items():
                print(f"  {index_type}:")
                print(f"    描述: {info['描述']}")
                if '适用场景' in info:
                    print(f"    适用场景: {info['适用场景']}")
                if '存储引擎' in info:
                    print(f"    存储引擎: {info['存储引擎']}")
                if '特点' in info:
                    print(f"    特点: {info['特点']}")
                if '限制' in info:
                    print(f"    限制: {info['限制']}")
    
    def index_creation_examples(self):
        """索引创建示例"""
        examples = """
-- 索引创建示例

-- 1. 创建表时定义索引
CREATE TABLE employees (
    employee_id INT AUTO_INCREMENT PRIMARY KEY,  -- 主键索引
    email VARCHAR(100) UNIQUE,                   -- 唯一索引
    first_name VARCHAR(50),
    last_name VARCHAR(50),
    department_id INT,
    hire_date DATE,
    salary DECIMAL(10,2),
    
    -- 普通索引
    INDEX idx_department (department_id),
    
    -- 复合索引
    INDEX idx_name (last_name, first_name),
    
    -- 前缀索引
    INDEX idx_email_prefix (email(10))
);

-- 2. 在已存在的表上创建索引

-- 普通索引
CREATE INDEX idx_hire_date ON employees (hire_date);
ALTER TABLE employees ADD INDEX idx_salary (salary);

-- 唯一索引
CREATE UNIQUE INDEX idx_employee_code ON employees (employee_code);
ALTER TABLE employees ADD UNIQUE INDEX idx_phone (phone);

-- 复合索引
CREATE INDEX idx_dept_salary ON employees (department_id, salary);
ALTER TABLE employees ADD INDEX idx_hire_dept (hire_date, department_id);

-- 前缀索引
CREATE INDEX idx_description_prefix ON employees (description(20));

-- 全文索引
CREATE FULLTEXT INDEX idx_fulltext_desc ON employees (description);
ALTER TABLE employees ADD FULLTEXT INDEX idx_notes (notes);

-- 3. 查看索引
SHOW INDEXES FROM employees;
SHOW INDEX FROM employees;

-- 4. 删除索引
DROP INDEX idx_hire_date ON employees;
ALTER TABLE employees DROP INDEX idx_salary;
"""
        
        print("\n索引创建示例:")
        print("=" * 40)
        print(examples)
    
    def composite_index_rules(self):
        """复合索引规则"""
        print("\n复合索引最左前缀原则:")
        print("=" * 40)
        
        rules_explanation = """
复合索引 (a, b, c) 的使用规则:

1. 可以使用的查询条件组合:
   - WHERE a = ?                    ✓ 使用索引
   - WHERE a = ? AND b = ?          ✓ 使用索引
   - WHERE a = ? AND b = ? AND c = ? ✓ 使用索引
   - WHERE a = ? AND c = ?          ✓ 部分使用索引(只用到a)

2. 不能使用索引的查询条件:
   - WHERE b = ?                    ✗ 不使用索引
   - WHERE c = ?                    ✗ 不使用索引
   - WHERE b = ? AND c = ?          ✗ 不使用索引

3. 范围查询的影响:
   - WHERE a = ? AND b > ? AND c = ? 
     只能使用到 (a, b) 部分,c无法使用索引

4. 排序的使用:
   - ORDER BY a, b, c               ✓ 使用索引排序
   - ORDER BY a, b                  ✓ 使用索引排序
   - ORDER BY a DESC, b DESC, c DESC ✓ 使用索引排序
   - ORDER BY a, c                  ✗ 不能使用索引排序
   - ORDER BY a ASC, b DESC         ✗ 不能使用索引排序(MySQL 8.0前)
"""
        
        print(rules_explanation)
        
        # 实际示例
        examples = """
-- 复合索引使用示例
-- 假设有复合索引:INDEX idx_dept_salary_hire (department_id, salary, hire_date)

-- 能够使用索引的查询
SELECT * FROM employees WHERE department_id = 1;
SELECT * FROM employees WHERE department_id = 1 AND salary > 50000;
SELECT * FROM employees WHERE department_id = 1 AND salary = 60000 AND hire_date > '2020-01-01';

-- 不能完全使用索引的查询
SELECT * FROM employees WHERE salary > 50000;  -- 跳过了department_id
SELECT * FROM employees WHERE hire_date > '2020-01-01';  -- 跳过了前面的列

-- 部分使用索引的查询
SELECT * FROM employees WHERE department_id = 1 AND hire_date > '2020-01-01';
-- 只能使用department_id部分,hire_date无法使用索引
"""
        
        print("\n复合索引使用示例:")
        print(examples)

# 索引类型演示
index_types = IndexTypes()
index_types.show_index_types()
index_types.index_creation_examples()
index_types.composite_index_rules()

4.2 查询优化与EXPLAIN

4.2.1 EXPLAIN语句详解

class ExplainAnalysis:
    def __init__(self):
        self.explain_columns = {
            "id": "查询的标识符,表示SELECT的执行顺序",
            "select_type": "查询类型(SIMPLE、PRIMARY、SUBQUERY等)",
            "table": "当前行访问的表名",
            "partitions": "匹配的分区(如果有分区表)",
            "type": "访问类型,表示MySQL如何查找表中的行",
            "possible_keys": "可能使用的索引",
            "key": "实际使用的索引",
            "key_len": "使用索引的长度",
            "ref": "与索引比较的列或常量",
            "rows": "估计需要扫描的行数",
            "filtered": "按条件过滤后的行数百分比",
            "Extra": "额外信息,包含重要的性能提示"
        }
        
        self.access_types = {
            "system": "表只有一行(系统表),最快",
            "const": "通过主键或唯一索引访问,最多一行",
            "eq_ref": "唯一索引扫描,每个索引键只有一行匹配",
            "ref": "非唯一索引扫描,返回匹配某个值的所有行",
            "fulltext": "全文索引扫描",
            "ref_or_null": "类似ref,但包含NULL值的查找",
            "index_merge": "使用索引合并优化",
            "unique_subquery": "唯一子查询",
            "index_subquery": "子查询中使用索引",
            "range": "索引范围扫描",
            "index": "全索引扫描",
            "ALL": "全表扫描,最慢"
        }
    
    def explain_columns_detail(self):
        """EXPLAIN输出列详解"""
        print("EXPLAIN输出列详解:")
        print("=" * 60)
        
        for column, description in self.explain_columns.items():
            print(f"{column}: {description}")
    
    def access_types_detail(self):
        """访问类型详解"""
        print("\n访问类型(type列)详解:")
        print("=" * 40)
        print("性能从好到差的顺序:")
        
        for i, (access_type, description) in enumerate(self.access_types.items(), 1):
            print(f"{i:2d}. {access_type:15s}: {description}")
    
    def explain_examples(self):
        """EXPLAIN使用示例"""
        examples = """
-- EXPLAIN使用示例

-- 1. 基本EXPLAIN
EXPLAIN SELECT * FROM employees WHERE department_id = 1;

-- 2. EXPLAIN EXTENDED(MySQL 5.7及以前)
EXPLAIN EXTENDED SELECT * FROM employees WHERE salary > 50000;
SHOW WARNINGS;  -- 查看优化后的查询

-- 3. EXPLAIN FORMAT=JSON(更详细的信息)
EXPLAIN FORMAT=JSON 
SELECT e.first_name, d.department_name 
FROM employees e 
JOIN departments d ON e.department_id = d.department_id 
WHERE e.salary > 50000;

-- 4. EXPLAIN ANALYZE(MySQL 8.0+,实际执行并分析)
EXPLAIN ANALYZE 
SELECT * FROM employees 
WHERE department_id = 1 AND salary > 50000;

-- 5. 分析复杂查询
EXPLAIN 
SELECT 
    e.first_name,
    e.last_name,
    d.department_name,
    (SELECT AVG(salary) FROM employees e2 WHERE e2.department_id = e.department_id) as avg_dept_salary
FROM employees e
JOIN departments d ON e.department_id = d.department_id
WHERE e.hire_date > '2020-01-01'
ORDER BY e.salary DESC
LIMIT 10;
"""
        
        print("\nEXPLAIN使用示例:")
        print("=" * 40)
        print(examples)
    
    def explain_interpretation(self):
        """EXPLAIN结果解读"""
        interpretation_guide = """
EXPLAIN结果解读指南:

1. 关注type列:
   - 如果是ALL或index,考虑添加索引
   - 最好是const、eq_ref、ref
   - range也是可接受的

2. 关注rows列:
   - 表示估计扫描的行数
   - 数值越小越好
   - 与实际表大小对比

3. 关注key列:
   - NULL表示没有使用索引
   - 应该显示预期的索引名

4. 关注Extra列的重要信息:
   - Using index: 覆盖索引,很好
   - Using where: 使用WHERE过滤,正常
   - Using filesort: 需要额外排序,考虑优化
   - Using temporary: 使用临时表,考虑优化
   - Using index condition: 索引条件下推,好
   - Using join buffer: 连接缓冲,可能需要优化

5. 性能警告信号:
   - type为ALL且rows很大
   - Extra中出现Using filesort或Using temporary
   - possible_keys有值但key为NULL
   - 子查询的select_type为DEPENDENT SUBQUERY
"""
        
        print("\nEXPLAIN结果解读:")
        print("=" * 40)
        print(interpretation_guide)

# EXPLAIN分析演示
explain_analysis = ExplainAnalysis()
explain_analysis.explain_columns_detail()
explain_analysis.access_types_detail()
explain_analysis.explain_examples()
explain_analysis.explain_interpretation()

4.2.2 查询优化技巧

class QueryOptimization:
    def __init__(self):
        self.optimization_techniques = {
            "索引优化": [
                "为WHERE、ORDER BY、GROUP BY列创建索引",
                "使用复合索引覆盖多个查询条件",
                "避免在索引列上使用函数",
                "考虑使用覆盖索引减少回表"
            ],
            "查询重写": [
                "避免SELECT *,只查询需要的列",
                "使用LIMIT限制结果集大小",
                "将复杂查询分解为多个简单查询",
                "使用EXISTS替代IN子查询"
            ],
            "JOIN优化": [
                "确保JOIN条件有索引",
                "小表驱动大表",
                "避免笛卡尔积",
                "考虑使用STRAIGHT_JOIN控制连接顺序"
            ],
            "子查询优化": [
                "将相关子查询改写为JOIN",
                "使用EXISTS替代IN",
                "避免在SELECT列表中使用子查询"
            ]
        }
    
    def show_optimization_techniques(self):
        """显示优化技巧"""
        print("查询优化技巧:")
        print("=" * 50)
        
        for category, techniques in self.optimization_techniques.items():
            print(f"\n{category}:")
            for technique in techniques:
                print(f"  - {technique}")
    
    def index_optimization_examples(self):
        """索引优化示例"""
        examples = """
-- 索引优化示例

-- 1. 避免在索引列上使用函数
-- 不好的写法
SELECT * FROM employees WHERE YEAR(hire_date) = 2023;
-- 好的写法
SELECT * FROM employees WHERE hire_date >= '2023-01-01' AND hire_date < '2024-01-01';

-- 2. 使用覆盖索引
-- 创建覆盖索引
CREATE INDEX idx_covering ON employees (department_id, salary, hire_date);
-- 查询只需要索引中的列,无需回表
SELECT department_id, salary, hire_date FROM employees WHERE department_id = 1;

-- 3. 前缀索引优化
-- 分析前缀选择性
SELECT 
    COUNT(DISTINCT LEFT(email, 5)) / COUNT(*) AS prefix_5,
    COUNT(DISTINCT LEFT(email, 10)) / COUNT(*) AS prefix_10,
    COUNT(DISTINCT LEFT(email, 15)) / COUNT(*) AS prefix_15
FROM employees;
-- 选择合适的前缀长度创建索引
CREATE INDEX idx_email_prefix ON employees (email(10));

-- 4. 复合索引的列顺序优化
-- 将选择性高的列放在前面
-- 将经常用于等值查询的列放在前面
CREATE INDEX idx_optimized ON employees (department_id, hire_date, salary);
"""
        
        print("\n索引优化示例:")
        print("=" * 40)
        print(examples)
    
    def query_rewriting_examples(self):
        """查询重写示例"""
        examples = """
-- 查询重写优化示例

-- 1. 避免SELECT *
-- 不好的写法
SELECT * FROM employees WHERE department_id = 1;
-- 好的写法
SELECT employee_id, first_name, last_name, salary FROM employees WHERE department_id = 1;

-- 2. 使用LIMIT
-- 分页查询
SELECT * FROM employees ORDER BY hire_date DESC LIMIT 20 OFFSET 0;
-- 或者使用游标分页(性能更好)
SELECT * FROM employees WHERE employee_id > 1000 ORDER BY employee_id LIMIT 20;

-- 3. 子查询优化
-- 不好的写法(相关子查询)
SELECT * FROM employees e1 
WHERE salary > (SELECT AVG(salary) FROM employees e2 WHERE e2.department_id = e1.department_id);

-- 好的写法(JOIN)
SELECT e.* FROM employees e
JOIN (
    SELECT department_id, AVG(salary) as avg_salary 
    FROM employees 
    GROUP BY department_id
) dept_avg ON e.department_id = dept_avg.department_id
WHERE e.salary > dept_avg.avg_salary;

-- 4. IN vs EXISTS
-- 当子查询结果集较小时,使用IN
SELECT * FROM employees 
WHERE department_id IN (SELECT department_id FROM departments WHERE location = 'New York');

-- 当子查询结果集较大时,使用EXISTS
SELECT * FROM employees e
WHERE EXISTS (
    SELECT 1 FROM employee_projects ep 
    WHERE ep.employee_id = e.employee_id AND ep.status = 'active'
);

-- 5. UNION vs UNION ALL
-- 如果确定没有重复,使用UNION ALL(性能更好)
SELECT employee_id, first_name FROM employees WHERE department_id = 1
UNION ALL
SELECT employee_id, first_name FROM employees WHERE department_id = 2;
"""
        
        print("\n查询重写示例:")
        print("=" * 40)
        print(examples)
    
    def join_optimization_examples(self):
        """JOIN优化示例"""
        examples = """
-- JOIN优化示例

-- 1. 确保JOIN条件有索引
-- 在连接列上创建索引
CREATE INDEX idx_dept_id ON employees (department_id);
CREATE INDEX idx_dept_pk ON departments (department_id);

-- 2. 小表驱动大表
-- 假设departments表较小,employees表较大
-- MySQL优化器通常会自动选择,但可以使用STRAIGHT_JOIN强制
SELECT /*+ STRAIGHT_JOIN */ d.department_name, e.first_name
FROM departments d
JOIN employees e ON d.department_id = e.department_id
WHERE d.location = 'New York';

-- 3. 避免笛卡尔积
-- 不好的写法(缺少JOIN条件)
SELECT * FROM employees, departments;
-- 好的写法
SELECT * FROM employees e JOIN departments d ON e.department_id = d.department_id;

-- 4. 多表JOIN的顺序优化
-- 将过滤条件最严格的表放在前面
SELECT e.first_name, d.department_name, p.project_name
FROM employees e
JOIN departments d ON e.department_id = d.department_id
JOIN employee_projects ep ON e.employee_id = ep.employee_id
JOIN projects p ON ep.project_id = p.project_id
WHERE e.hire_date > '2023-01-01'  -- 最严格的过滤条件
AND d.location = 'New York'
AND p.status = 'active';

-- 5. 使用索引提示
-- 强制使用特定索引
SELECT * FROM employees USE INDEX (idx_department_salary) 
WHERE department_id = 1 AND salary > 50000;

-- 忽略特定索引
SELECT * FROM employees IGNORE INDEX (idx_hire_date) 
WHERE hire_date > '2023-01-01';
"""
        
        print("\nJOIN优化示例:")
        print("=" * 40)
        print(examples)
    
    def performance_monitoring(self):
        """性能监控"""
        monitoring_queries = """
-- 性能监控查询

-- 1. 查看慢查询日志状态
SHOW VARIABLES LIKE 'slow_query_log%';
SHOW VARIABLES LIKE 'long_query_time';

-- 2. 启用慢查询日志
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 2;  -- 2秒以上的查询记录到慢查询日志

-- 3. 查看当前运行的查询
SHOW PROCESSLIST;
-- 或者更详细的信息
SELECT * FROM information_schema.PROCESSLIST WHERE COMMAND != 'Sleep';

-- 4. 查看表的统计信息
SHOW TABLE STATUS LIKE 'employees';

-- 5. 查看索引使用情况
SHOW INDEX FROM employees;

-- 6. 查看查询缓存状态(MySQL 5.7及以前)
SHOW VARIABLES LIKE 'query_cache%';
SHOW STATUS LIKE 'Qcache%';

-- 7. 分析表的碎片情况
SELECT 
    table_name,
    ROUND(((data_length + index_length) / 1024 / 1024), 2) AS 'Total Size (MB)',
    ROUND((data_free / 1024 / 1024), 2) AS 'Free Space (MB)'
FROM information_schema.TABLES 
WHERE table_schema = 'your_database_name'
ORDER BY data_free DESC;

-- 8. 查看InnoDB状态
SHOW ENGINE INNODB STATUS;

-- 9. 性能模式查询(MySQL 5.6+)
-- 查看最耗时的语句
SELECT 
    DIGEST_TEXT,
    COUNT_STAR,
    AVG_TIMER_WAIT/1000000000 AS avg_time_seconds,
    SUM_TIMER_WAIT/1000000000 AS total_time_seconds
FROM performance_schema.events_statements_summary_by_digest
ORDER BY SUM_TIMER_WAIT DESC
LIMIT 10;
"""
        
        print("\n性能监控查询:")
        print("=" * 40)
        print(monitoring_queries)

# 查询优化演示
query_optimization = QueryOptimization()
query_optimization.show_optimization_techniques()
query_optimization.index_optimization_examples()
query_optimization.query_rewriting_examples()
query_optimization.join_optimization_examples()
query_optimization.performance_monitoring()

4.3 存储引擎优化

4.3.1 InnoDB优化

class InnoDBOptimization:
    def __init__(self):
        self.innodb_parameters = {
            "内存相关": {
                "innodb_buffer_pool_size": {
                    "描述": "InnoDB缓冲池大小,最重要的参数",
                    "建议值": "物理内存的70-80%",
                    "影响": "缓存数据页和索引页,减少磁盘I/O"
                },
                "innodb_buffer_pool_instances": {
                    "描述": "缓冲池实例数量",
                    "建议值": "缓冲池大小GB数,最大64",
                    "影响": "减少缓冲池争用,提高并发性能"
                },
                "innodb_log_buffer_size": {
                    "描述": "日志缓冲区大小",
                    "建议值": "16M-64M",
                    "影响": "减少日志写入的磁盘I/O"
                }
            },
            "I/O相关": {
                "innodb_io_capacity": {
                    "描述": "InnoDB后台I/O操作的IOPS限制",
                    "建议值": "SSD: 2000-4000, HDD: 200",
                    "影响": "控制后台刷新脏页的速度"
                },
                "innodb_flush_method": {
                    "描述": "数据文件刷新方法",
                    "建议值": "Linux: O_DIRECT",
                    "影响": "避免双重缓冲,提高I/O效率"
                },
                "innodb_file_per_table": {
                    "描述": "每个表使用独立的表空间文件",
                    "建议值": "ON",
                    "影响": "便于管理,支持表压缩"
                }
            },
            "事务相关": {
                "innodb_lock_wait_timeout": {
                    "描述": "锁等待超时时间",
                    "建议值": "50-120秒",
                    "影响": "避免长时间锁等待"
                },
                "innodb_deadlock_detect": {
                    "描述": "是否启用死锁检测",
                    "建议值": "ON",
                    "影响": "自动检测和解决死锁"
                }
            }
        }
    
    def show_innodb_parameters(self):
        """显示InnoDB参数"""
        print("InnoDB重要参数优化:")
        print("=" * 60)
        
        for category, params in self.innodb_parameters.items():
            print(f"\n{category}:")
            for param_name, param_info in params.items():
                print(f"  {param_name}:")
                print(f"    描述: {param_info['描述']}")
                print(f"    建议值: {param_info['建议值']}")
                print(f"    影响: {param_info['影响']}")
    
    def innodb_configuration_example(self):
        """InnoDB配置示例"""
        config_example = """
# InnoDB优化配置示例(my.cnf)

[mysqld]
# 基本设置
default-storage-engine = InnoDB
innodb_file_per_table = 1

# 内存设置(假设16GB内存)
innodb_buffer_pool_size = 12G
innodb_buffer_pool_instances = 12
innodb_log_buffer_size = 32M

# 日志设置
innodb_log_file_size = 1G
innodb_log_files_in_group = 2
innodb_flush_log_at_trx_commit = 1

# I/O设置
innodb_io_capacity = 2000
innodb_io_capacity_max = 4000
innodb_flush_method = O_DIRECT
innodb_read_io_threads = 8
innodb_write_io_threads = 8

# 锁和事务设置
innodb_lock_wait_timeout = 60
innodb_deadlock_detect = 1
innodb_rollback_on_timeout = 1

# 其他优化
innodb_thread_concurrency = 0
innodb_adaptive_hash_index = 1
innodb_change_buffering = all
innodb_old_blocks_time = 1000

# 监控设置
innodb_monitor_enable = all
innodb_print_all_deadlocks = 1
"""
        
        print("\nInnoDB配置示例:")
        print("=" * 40)
        print(config_example)
    
    def innodb_monitoring(self):
        """InnoDB监控"""
        monitoring_queries = """
-- InnoDB监控查询

-- 1. 查看InnoDB状态
SHOW ENGINE INNODB STATUS;

-- 2. 查看缓冲池使用情况
SELECT 
    POOL_ID,
    POOL_SIZE,
    FREE_BUFFERS,
    DATABASE_PAGES,
    OLD_DATABASE_PAGES,
    MODIFIED_DATABASE_PAGES
FROM information_schema.INNODB_BUFFER_POOL_STATS;

-- 3. 查看缓冲池命中率
SHOW STATUS LIKE 'Innodb_buffer_pool_%';

-- 计算命中率
SELECT 
    ROUND(
        (1 - (Innodb_buffer_pool_reads / Innodb_buffer_pool_read_requests)) * 100, 2
    ) AS buffer_pool_hit_rate
FROM (
    SELECT 
        VARIABLE_VALUE AS Innodb_buffer_pool_reads
    FROM information_schema.GLOBAL_STATUS 
    WHERE VARIABLE_NAME = 'Innodb_buffer_pool_reads'
) reads,
(
    SELECT 
        VARIABLE_VALUE AS Innodb_buffer_pool_read_requests
    FROM information_schema.GLOBAL_STATUS 
    WHERE VARIABLE_NAME = 'Innodb_buffer_pool_read_requests'
) requests;

-- 4. 查看锁等待情况
SELECT * FROM information_schema.INNODB_LOCKS;
SELECT * FROM information_schema.INNODB_LOCK_WAITS;

-- 5. 查看事务情况
SELECT * FROM information_schema.INNODB_TRX;

-- 6. 查看表空间使用情况
SELECT 
    NAME,
    FILE_SIZE/1024/1024 AS file_size_mb,
    ALLOCATED_SIZE/1024/1024 AS allocated_size_mb
FROM information_schema.INNODB_TABLESPACES;

-- 7. 查看死锁信息
SHOW ENGINE INNODB STATUS\G
-- 查看LATEST DETECTED DEADLOCK部分

-- 8. 查看InnoDB指标
SELECT 
    NAME,
    COUNT,
    MAX_COUNT,
    MIN_COUNT,
    AVG_COUNT,
    TIME_ENABLED,
    TIME_DISABLED
FROM information_schema.INNODB_METRICS 
WHERE STATUS = 'enabled'
ORDER BY NAME;
"""
        
        print("\nInnoDB监控查询:")
        print("=" * 40)
        print(monitoring_queries)
    
    def innodb_troubleshooting(self):
        """InnoDB故障排除"""
        troubleshooting_guide = """
InnoDB常见问题及解决方案:

1. 缓冲池命中率低(<95%):
   - 增加innodb_buffer_pool_size
   - 检查是否有大量全表扫描
   - 优化查询和索引

2. 锁等待超时频繁:
   - 优化事务逻辑,减少事务时间
   - 调整innodb_lock_wait_timeout
   - 检查是否有长时间运行的事务

3. 死锁频繁发生:
   - 统一事务中访问表的顺序
   - 减少事务大小
   - 使用较低的隔离级别(如READ COMMITTED)

4. 写入性能差:
   - 调整innodb_flush_log_at_trx_commit
   - 增加innodb_log_file_size
   - 使用SSD存储
   - 调整innodb_io_capacity

5. 查询性能差:
   - 检查是否使用了合适的索引
   - 分析EXPLAIN输出
   - 考虑增加innodb_buffer_pool_size

6. 表空间碎片:
   - 使用OPTIMIZE TABLE重建表
   - 考虑使用ALTER TABLE ... ENGINE=InnoDB
   - 定期维护和监控
"""
        
        print("\nInnoDB故障排除:")
        print("=" * 40)
        print(troubleshooting_guide)

# InnoDB优化演示
innodb_optimization = InnoDBOptimization()
innodb_optimization.show_innodb_parameters()
innodb_optimization.innodb_configuration_example()
innodb_optimization.innodb_monitoring()
innodb_optimization.innodb_troubleshooting()

4.3.2 MyISAM与其他存储引擎

class StorageEngineComparison:
    def __init__(self):
        self.storage_engines = {
            "InnoDB": {
                "特点": ["支持事务", "支持外键", "行级锁", "崩溃恢复"],
                "适用场景": ["OLTP应用", "高并发", "数据一致性要求高"],
                "优点": ["ACID支持", "并发性能好", "数据安全性高"],
                "缺点": ["存储空间大", "内存消耗高"]
            },
            "MyISAM": {
                "特点": ["不支持事务", "表级锁", "全文索引", "压缩表"],
                "适用场景": ["读多写少", "数据仓库", "日志系统"],
                "优点": ["查询速度快", "存储空间小", "支持全文索引"],
                "缺点": ["不支持事务", "并发性能差", "崩溃恢复差"]
            },
            "Memory": {
                "特点": ["数据存储在内存", "支持哈希索引", "表级锁"],
                "适用场景": ["临时表", "缓存", "会话存储"],
                "优点": ["访问速度极快", "支持哈希索引"],
                "缺点": ["数据易丢失", "内存限制", "不支持TEXT/BLOB"]
            },
            "Archive": {
                "特点": ["高压缩比", "只支持INSERT和SELECT", "行级锁"],
                "适用场景": ["历史数据", "日志归档", "数据仓库"],
                "优点": ["压缩比高", "存储成本低"],
                "缺点": ["不支持UPDATE/DELETE", "查询性能一般"]
            }
        }
    
    def compare_storage_engines(self):
        """存储引擎对比"""
        print("MySQL存储引擎对比:")
        print("=" * 60)
        
        for engine, details in self.storage_engines.items():
            print(f"\n{engine}:")
            for aspect, items in details.items():
                print(f"  {aspect}: {', '.join(items)}")
    
    def storage_engine_selection_guide(self):
        """存储引擎选择指南"""
        selection_guide = """
存储引擎选择指南:

1. 选择InnoDB的场景:
   - 需要事务支持的应用
   - 高并发的OLTP系统
   - 需要外键约束
   - 对数据一致性要求高
   - 需要崩溃恢复能力
   - 现代Web应用(推荐)

2. 选择MyISAM的场景:
   - 读多写少的应用
   - 不需要事务支持
   - 需要全文索引功能
   - 数据仓库和报表系统
   - 历史数据存储

3. 选择Memory的场景:
   - 临时数据存储
   - 会话信息存储
   - 缓存表
   - 需要极快访问速度的小表

4. 选择Archive的场景:
   - 历史数据归档
   - 日志数据存储
   - 只需要插入和查询的场景
   - 存储空间敏感的应用

5. 混合使用策略:
   - 主要业务表使用InnoDB
   - 日志表使用MyISAM或Archive
   - 临时表使用Memory
   - 全文搜索表使用MyISAM(MySQL 5.6前)
"""
        
        print("\n存储引擎选择指南:")
        print("=" * 40)
        print(selection_guide)
    
    def myisam_optimization(self):
        """MyISAM优化"""
        myisam_config = """
# MyISAM优化配置

[mysqld]
# MyISAM关键缓冲区大小
key_buffer_size = 256M

# MyISAM表缓存
table_open_cache = 2000

# 并发插入设置
concurrent_insert = 2

# 延迟键写入
delay_key_write = ALL

# 批量插入缓冲区大小
bulk_insert_buffer_size = 64M

# MyISAM排序缓冲区
myisam_sort_buffer_size = 128M

# MyISAM修复线程数
myisam_repair_threads = 1

# 自动修复
myisam_recover_options = BACKUP,FORCE

MyISAM优化技巧:

1. 索引优化:
   - 为经常查询的列创建索引
   - 使用复合索引覆盖多个查询条件
   - 定期使用OPTIMIZE TABLE整理索引

2. 表维护:
   - 定期运行CHECK TABLE检查表
   - 使用REPAIR TABLE修复损坏的表
   - 使用OPTIMIZE TABLE整理表碎片

3. 查询优化:
   - 避免在WHERE子句中使用函数
   - 使用LIMIT限制结果集
   - 合理使用全文索引

4. 批量操作:
   - 使用LOAD DATA INFILE批量导入
   - 禁用索引后批量插入,再启用索引
   - 使用INSERT DELAYED异步插入
"""
        
        print("\nMyISAM优化:")
        print("=" * 30)
        print(myisam_config)
    
    def storage_engine_conversion(self):
        """存储引擎转换"""
        conversion_examples = """
-- 存储引擎转换示例

-- 1. 查看表的存储引擎
SHOW TABLE STATUS WHERE Name = 'table_name';
-- 或者
SELECT ENGINE FROM information_schema.TABLES 
WHERE TABLE_SCHEMA = 'database_name' AND TABLE_NAME = 'table_name';

-- 2. 转换存储引擎
-- 将MyISAM表转换为InnoDB
ALTER TABLE table_name ENGINE = InnoDB;

-- 将InnoDB表转换为MyISAM
ALTER TABLE table_name ENGINE = MyISAM;

-- 3. 批量转换数据库中的所有表
-- 生成转换语句
SELECT CONCAT('ALTER TABLE ', TABLE_NAME, ' ENGINE = InnoDB;') AS conversion_sql
FROM information_schema.TABLES 
WHERE TABLE_SCHEMA = 'your_database' AND ENGINE = 'MyISAM';

-- 4. 转换前的准备工作
-- 备份数据
mysqldump -u username -p database_name > backup.sql

-- 检查表结构兼容性
-- InnoDB不支持的MyISAM特性:
-- - FULLTEXT索引(MySQL 5.6前)
-- - 压缩表
-- - 某些数据类型的默认值

-- 5. 转换后的验证
-- 检查转换结果
SHOW TABLE STATUS WHERE Name = 'table_name';

-- 检查数据完整性
CHECKSUM TABLE table_name;

-- 6. 性能测试
-- 转换前后进行性能对比测试
-- 监控查询性能变化
-- 检查存储空间变化
"""
        
        print("\n存储引擎转换:")
        print("=" * 40)
        print(conversion_examples)

# 存储引擎对比演示
storage_comparison = StorageEngineComparison()
storage_comparison.compare_storage_engines()
storage_comparison.storage_engine_selection_guide()
storage_comparison.myisam_optimization()
storage_comparison.storage_engine_conversion()

4.4 总结

本章深入介绍了MySQL的索引机制与性能优化,包括:

核心知识点

  1. 索引基础

    • B+树索引结构和工作原理
    • 索引的优点和代价
    • 各种索引类型的特点和适用场景
  2. 索引设计

    • 复合索引的最左前缀原则
    • 索引选择性和覆盖索引
    • 前缀索引的使用技巧
  3. 查询优化

    • EXPLAIN语句的详细解读
    • 查询重写和优化技巧
    • JOIN操作的优化策略
  4. 存储引擎优化

    • InnoDB参数调优
    • 不同存储引擎的选择策略
    • 性能监控和故障排除

最佳实践

  1. 索引策略

    • 为WHERE、ORDER BY、GROUP BY列创建索引
    • 合理设计复合索引
    • 定期监控和维护索引
  2. 查询优化

    • 避免SELECT *和全表扫描
    • 使用EXPLAIN分析查询计划
    • 优化子查询和JOIN操作
  3. 配置优化

    • 根据硬件资源调整InnoDB参数
    • 启用慢查询日志监控性能
    • 定期进行性能测试和调优

下一章我们将学习MySQL的事务处理与锁机制,这是保证数据一致性和并发控制的重要内容。