8.1 综合项目实战

8.1.1 数据分析报告自动化生成

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import warnings
warnings.filterwarnings('ignore')

class AutoReportGenerator:
    """自动化数据分析报告生成器"""
    
    def __init__(self):
        plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']
        plt.rcParams['axes.unicode_minus'] = False
        
        # 设置报告样式
        self.report_style = {
            'figure_size': (12, 8),
            'title_fontsize': 16,
            'label_fontsize': 12,
            'tick_fontsize': 10,
            'color_palette': ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
        }
    
    def generate_sample_data(self):
        """生成示例数据"""
        np.random.seed(42)
        
        # 销售数据
        dates = pd.date_range('2023-01-01', '2023-12-31', freq='D')
        sales_data = {
            'date': dates,
            'sales': 1000 + np.cumsum(np.random.randn(len(dates)) * 50) + 
                    100 * np.sin(np.arange(len(dates)) * 2 * np.pi / 365),
            'region': np.random.choice(['北区', '南区', '东区', '西区'], len(dates)),
            'product': np.random.choice(['产品A', '产品B', '产品C', '产品D'], len(dates)),
            'customer_count': np.random.poisson(50, len(dates)),
            'marketing_spend': np.random.uniform(1000, 5000, len(dates))
        }
        
        df = pd.DataFrame(sales_data)
        
        # 添加一些计算字段
        df['month'] = df['date'].dt.month
        df['quarter'] = df['date'].dt.quarter
        df['day_of_week'] = df['date'].dt.day_name()
        df['sales_per_customer'] = df['sales'] / df['customer_count']
        df['roi'] = df['sales'] / df['marketing_spend']
        
        return df
    
    def create_executive_summary(self, data):
        """创建执行摘要页面"""
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle('2023年度业务执行摘要', fontsize=20, fontweight='bold', y=0.95)
        
        # 1. 总销售额趋势
        monthly_sales = data.groupby(data['date'].dt.to_period('M'))['sales'].sum()
        ax1.plot(monthly_sales.index.astype(str), monthly_sales.values, 
                marker='o', linewidth=3, markersize=8, color=self.report_style['color_palette'][0])
        ax1.set_title('月度销售额趋势', fontsize=self.report_style['title_fontsize'], fontweight='bold')
        ax1.set_xlabel('月份', fontsize=self.report_style['label_fontsize'])
        ax1.set_ylabel('销售额 (万元)', fontsize=self.report_style['label_fontsize'])
        ax1.grid(True, alpha=0.3)
        ax1.tick_params(axis='x', rotation=45)
        
        # 添加趋势线
        z = np.polyfit(range(len(monthly_sales)), monthly_sales.values, 1)
        p = np.poly1d(z)
        ax1.plot(monthly_sales.index.astype(str), p(range(len(monthly_sales))), 
                "--", alpha=0.8, color='red', label=f'趋势线 (斜率: {z[0]:.0f})')
        ax1.legend()
        
        # 2. 地区销售分布
        region_sales = data.groupby('region')['sales'].sum()
        colors = self.report_style['color_palette'][:len(region_sales)]
        wedges, texts, autotexts = ax2.pie(region_sales.values, labels=region_sales.index, 
                                          autopct='%1.1f%%', colors=colors, startangle=90)
        ax2.set_title('地区销售分布', fontsize=self.report_style['title_fontsize'], fontweight='bold')
        
        # 美化饼图
        for autotext in autotexts:
            autotext.set_color('white')
            autotext.set_fontweight('bold')
        
        # 3. 产品销售对比
        product_sales = data.groupby('product')['sales'].sum().sort_values(ascending=True)
        bars = ax3.barh(product_sales.index, product_sales.values, 
                       color=self.report_style['color_palette'][:len(product_sales)])
        ax3.set_title('产品销售额对比', fontsize=self.report_style['title_fontsize'], fontweight='bold')
        ax3.set_xlabel('销售额 (万元)', fontsize=self.report_style['label_fontsize'])
        
        # 添加数值标签
        for i, bar in enumerate(bars):
            width = bar.get_width()
            ax3.text(width + width*0.01, bar.get_y() + bar.get_height()/2, 
                    f'{width:.0f}', ha='left', va='center', fontweight='bold')
        
        # 4. 关键指标仪表板
        ax4.axis('off')
        
        # 计算关键指标
        total_sales = data['sales'].sum()
        avg_daily_sales = data['sales'].mean()
        total_customers = data['customer_count'].sum()
        avg_roi = data['roi'].mean()
        
        # 创建指标卡片
        metrics = [
            ('总销售额', f'{total_sales:.0f}万元', '#1f77b4'),
            ('日均销售', f'{avg_daily_sales:.0f}万元', '#ff7f0e'),
            ('总客户数', f'{total_customers:.0f}人', '#2ca02c'),
            ('平均ROI', f'{avg_roi:.2f}', '#d62728')
        ]
        
        for i, (label, value, color) in enumerate(metrics):
            x = (i % 2) * 0.5 + 0.1
            y = 0.7 - (i // 2) * 0.4
            
            # 创建指标框
            rect = plt.Rectangle((x, y), 0.35, 0.25, facecolor=color, alpha=0.1, 
                               edgecolor=color, linewidth=2, transform=ax4.transAxes)
            ax4.add_patch(rect)
            
            # 添加文本
            ax4.text(x + 0.175, y + 0.18, value, transform=ax4.transAxes, 
                    ha='center', va='center', fontsize=16, fontweight='bold', color=color)
            ax4.text(x + 0.175, y + 0.08, label, transform=ax4.transAxes, 
                    ha='center', va='center', fontsize=12, color='black')
        
        ax4.set_title('关键业务指标', fontsize=self.report_style['title_fontsize'], 
                     fontweight='bold', pad=20)
        
        plt.tight_layout()
        return fig
    
    def create_detailed_analysis(self, data):
        """创建详细分析页面"""
        fig = plt.figure(figsize=(16, 20))
        gs = fig.add_gridspec(4, 2, hspace=0.3, wspace=0.3)
        
        # 1. 销售趋势分析
        ax1 = fig.add_subplot(gs[0, :])
        
        # 按季度分析
        quarterly_data = data.groupby(['quarter', 'region'])['sales'].sum().unstack()
        quarterly_data.plot(kind='bar', ax=ax1, color=self.report_style['color_palette'])
        ax1.set_title('季度销售趋势分析', fontsize=self.report_style['title_fontsize'], fontweight='bold')
        ax1.set_xlabel('季度', fontsize=self.report_style['label_fontsize'])
        ax1.set_ylabel('销售额 (万元)', fontsize=self.report_style['label_fontsize'])
        ax1.legend(title='地区', bbox_to_anchor=(1.05, 1), loc='upper left')
        ax1.tick_params(axis='x', rotation=0)
        
        # 2. 客户分析
        ax2 = fig.add_subplot(gs[1, 0])
        
        # 客户数量与销售额的关系
        ax2.scatter(data['customer_count'], data['sales'], alpha=0.6, 
                   color=self.report_style['color_palette'][1])
        
        # 添加趋势线
        z = np.polyfit(data['customer_count'], data['sales'], 1)
        p = np.poly1d(z)
        ax2.plot(data['customer_count'], p(data['customer_count']), "r--", alpha=0.8)
        
        ax2.set_title('客户数量与销售额关系', fontsize=self.report_style['title_fontsize'], fontweight='bold')
        ax2.set_xlabel('客户数量', fontsize=self.report_style['label_fontsize'])
        ax2.set_ylabel('销售额 (万元)', fontsize=self.report_style['label_fontsize'])
        ax2.grid(True, alpha=0.3)
        
        # 计算相关系数
        correlation = np.corrcoef(data['customer_count'], data['sales'])[0, 1]
        ax2.text(0.05, 0.95, f'相关系数: {correlation:.3f}', transform=ax2.transAxes, 
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
        
        # 3. ROI分析
        ax3 = fig.add_subplot(gs[1, 1])
        
        # ROI分布直方图
        ax3.hist(data['roi'], bins=30, alpha=0.7, color=self.report_style['color_palette'][2], 
                edgecolor='black')
        ax3.axvline(data['roi'].mean(), color='red', linestyle='--', linewidth=2, 
                   label=f'平均ROI: {data["roi"].mean():.2f}')
        ax3.set_title('ROI分布分析', fontsize=self.report_style['title_fontsize'], fontweight='bold')
        ax3.set_xlabel('ROI', fontsize=self.report_style['label_fontsize'])
        ax3.set_ylabel('频数', fontsize=self.report_style['label_fontsize'])
        ax3.legend()
        ax3.grid(True, alpha=0.3)
        
        # 4. 星期销售模式
        ax4 = fig.add_subplot(gs[2, 0])
        
        # 按星期几分析销售模式
        weekday_sales = data.groupby('day_of_week')['sales'].mean()
        # 重新排序星期
        weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
        weekday_sales = weekday_sales.reindex(weekday_order)
        
        bars = ax4.bar(range(len(weekday_sales)), weekday_sales.values, 
                      color=self.report_style['color_palette'][3])
        ax4.set_title('星期销售模式', fontsize=self.report_style['title_fontsize'], fontweight='bold')
        ax4.set_xlabel('星期', fontsize=self.report_style['label_fontsize'])
        ax4.set_ylabel('平均销售额 (万元)', fontsize=self.report_style['label_fontsize'])
        ax4.set_xticks(range(len(weekday_sales)))
        ax4.set_xticklabels(['周一', '周二', '周三', '周四', '周五', '周六', '周日'])
        
        # 添加数值标签
        for bar in bars:
            height = bar.get_height()
            ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
                    f'{height:.0f}', ha='center', va='bottom')
        
        # 5. 营销效果分析
        ax5 = fig.add_subplot(gs[2, 1])
        
        # 营销支出与销售额的关系
        ax5.scatter(data['marketing_spend'], data['sales'], alpha=0.6, 
                   color=self.report_style['color_palette'][4])
        
        # 添加趋势线
        z = np.polyfit(data['marketing_spend'], data['sales'], 1)
        p = np.poly1d(z)
        ax5.plot(data['marketing_spend'], p(data['marketing_spend']), "r--", alpha=0.8)
        
        ax5.set_title('营销支出效果分析', fontsize=self.report_style['title_fontsize'], fontweight='bold')
        ax5.set_xlabel('营销支出 (元)', fontsize=self.report_style['label_fontsize'])
        ax5.set_ylabel('销售额 (万元)', fontsize=self.report_style['label_fontsize'])
        ax5.grid(True, alpha=0.3)
        
        # 6. 热力图分析
        ax6 = fig.add_subplot(gs[3, :])
        
        # 创建月份-地区销售热力图
        pivot_data = data.pivot_table(values='sales', index=data['date'].dt.month, 
                                     columns='region', aggfunc='sum')
        
        im = ax6.imshow(pivot_data.values, cmap='YlOrRd', aspect='auto')
        ax6.set_title('月份-地区销售热力图', fontsize=self.report_style['title_fontsize'], fontweight='bold')
        ax6.set_xlabel('地区', fontsize=self.report_style['label_fontsize'])
        ax6.set_ylabel('月份', fontsize=self.report_style['label_fontsize'])
        
        # 设置刻度标签
        ax6.set_xticks(range(len(pivot_data.columns)))
        ax6.set_xticklabels(pivot_data.columns)
        ax6.set_yticks(range(len(pivot_data.index)))
        ax6.set_yticklabels([f'{i}月' for i in pivot_data.index])
        
        # 添加颜色条
        cbar = plt.colorbar(im, ax=ax6, shrink=0.8)
        cbar.set_label('销售额 (万元)', fontsize=self.report_style['label_fontsize'])
        
        # 在热力图上添加数值
        for i in range(len(pivot_data.index)):
            for j in range(len(pivot_data.columns)):
                text = ax6.text(j, i, f'{pivot_data.iloc[i, j]:.0f}',
                               ha="center", va="center", color="black", fontweight='bold')
        
        return fig
    
    def create_recommendations(self, data):
        """创建建议和结论页面"""
        fig, ax = plt.subplots(figsize=(12, 16))
        ax.axis('off')
        
        # 分析数据得出洞察
        insights = self.analyze_data_insights(data)
        
        # 创建建议报告
        report_text = f"""
数据分析洞察与建议报告

一、关键发现

1. 销售趋势分析
   • 年度总销售额: {data['sales'].sum():.0f}万元
   • 月均增长率: {insights['monthly_growth']:.1f}%
   • 最佳销售月份: {insights['best_month']}
   • 销售波动性: {insights['sales_volatility']:.2f}

2. 地区表现分析
   • 最佳表现地区: {insights['best_region']} ({insights['best_region_sales']:.0f}万元)
   • 地区销售差异: {insights['region_variance']:.1f}%
   • 需要关注地区: {insights['underperform_region']}

3. 产品组合分析
   • 明星产品: {insights['star_product']} ({insights['star_product_sales']:.0f}万元)
   • 产品集中度: {insights['product_concentration']:.1f}%
   • 产品多样化指数: {insights['diversity_index']:.2f}

4. 客户行为分析
   • 平均客单价: {insights['avg_order_value']:.0f}元
   • 客户获取成本: {insights['customer_acquisition_cost']:.0f}元
   • 客户价值比: {insights['customer_value_ratio']:.2f}

5. 营销效果分析
   • 平均ROI: {insights['avg_roi']:.2f}
   • 营销效率: {insights['marketing_efficiency']:.1f}%
   • 最佳营销时段: {insights['best_marketing_day']}

二、战略建议

1. 销售优化建议
   ✓ 加强{insights['best_month']}月份的销售推广活动
   ✓ 针对{insights['underperform_region']}制定专项提升计划
   ✓ 优化{insights['worst_day']}的销售策略

2. 产品策略建议
   ✓ 重点推广明星产品{insights['star_product']}
   ✓ 改进表现较差的产品线
   ✓ 考虑产品组合多样化

3. 客户管理建议
   ✓ 提升客单价,目标增长{insights['target_increase']:.0f}%
   ✓ 优化客户获取渠道
   ✓ 建立客户忠诚度计划

4. 营销投入建议
   ✓ 增加ROI较高时段的营销投入
   ✓ 优化营销渠道配置
   ✓ 建立营销效果监控体系

三、风险提示

• 销售波动性较高,需要建立稳定的销售基础
• 地区发展不均衡,存在市场集中风险
• 产品依赖度较高,需要分散化经营

四、下一步行动计划

1. 短期目标 (1-3个月)
   - 实施{insights['underperform_region']}提升计划
   - 优化{insights['worst_day']}销售流程
   - 加强明星产品推广

2. 中期目标 (3-6个月)
   - 建立客户价值管理体系
   - 优化营销投入结构
   - 完善数据监控体系

3. 长期目标 (6-12个月)
   - 实现销售稳定增长
   - 建立多元化产品组合
   - 构建数据驱动的决策体系
"""
        
        # 设置文本样式
        ax.text(0.05, 0.95, report_text, transform=ax.transAxes, fontsize=10,
                verticalalignment='top', fontfamily='monospace',
                bbox=dict(boxstyle='round,pad=1', facecolor='lightgray', alpha=0.1))
        
        return fig
    
    def analyze_data_insights(self, data):
        """分析数据并提取洞察"""
        insights = {}
        
        # 月度增长率
        monthly_sales = data.groupby(data['date'].dt.month)['sales'].sum()
        monthly_growth = ((monthly_sales.iloc[-1] - monthly_sales.iloc[0]) / monthly_sales.iloc[0]) * 100
        insights['monthly_growth'] = monthly_growth
        
        # 最佳月份
        insights['best_month'] = f"{monthly_sales.idxmax()}月"
        
        # 销售波动性
        insights['sales_volatility'] = data['sales'].std() / data['sales'].mean()
        
        # 地区分析
        region_sales = data.groupby('region')['sales'].sum()
        insights['best_region'] = region_sales.idxmax()
        insights['best_region_sales'] = region_sales.max()
        insights['underperform_region'] = region_sales.idxmin()
        insights['region_variance'] = (region_sales.std() / region_sales.mean()) * 100
        
        # 产品分析
        product_sales = data.groupby('product')['sales'].sum()
        insights['star_product'] = product_sales.idxmax()
        insights['star_product_sales'] = product_sales.max()
        insights['product_concentration'] = (product_sales.max() / product_sales.sum()) * 100
        insights['diversity_index'] = 1 - ((product_sales / product_sales.sum()) ** 2).sum()
        
        # 客户分析
        insights['avg_order_value'] = data['sales_per_customer'].mean() * 10000  # 转换为元
        insights['customer_acquisition_cost'] = (data['marketing_spend'] / data['customer_count']).mean()
        insights['customer_value_ratio'] = insights['avg_order_value'] / insights['customer_acquisition_cost']
        
        # 营销分析
        insights['avg_roi'] = data['roi'].mean()
        insights['marketing_efficiency'] = (data['sales'].sum() / data['marketing_spend'].sum()) * 100
        
        # 星期分析
        weekday_sales = data.groupby('day_of_week')['sales'].mean()
        insights['best_marketing_day'] = weekday_sales.idxmax()
        insights['worst_day'] = weekday_sales.idxmin()
        
        # 目标设定
        insights['target_increase'] = 15  # 15%的增长目标
        
        return insights
    
    def generate_full_report(self, output_filename='business_report.pdf'):
        """生成完整的PDF报告"""
        # 生成数据
        data = self.generate_sample_data()
        
        with PdfPages(output_filename) as pdf:
            # 第一页:执行摘要
            fig1 = self.create_executive_summary(data)
            pdf.savefig(fig1, bbox_inches='tight', dpi=300)
            plt.close(fig1)
            
            # 第二页:详细分析
            fig2 = self.create_detailed_analysis(data)
            pdf.savefig(fig2, bbox_inches='tight', dpi=300)
            plt.close(fig2)
            
            # 第三页:建议和结论
            fig3 = self.create_recommendations(data)
            pdf.savefig(fig3, bbox_inches='tight', dpi=300)
            plt.close(fig3)
        
        print(f"报告已生成: {output_filename}")
        return data

# 使用示例
report_generator = AutoReportGenerator()

print("生成自动化数据分析报告...")
data = report_generator.generate_full_report('2023年度业务分析报告.pdf')

print("\n单独显示执行摘要:")
fig_summary = report_generator.create_executive_summary(data)
plt.show()

8.1.2 交互式数据仪表板开发

import matplotlib.pyplot as plt
from matplotlib.widgets import Button, Slider, RadioButtons, CheckButtons
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

class InteractiveDashboard:
    """交互式数据仪表板"""
    
    def __init__(self):
        plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']
        plt.rcParams['axes.unicode_minus'] = False
        
        # 生成示例数据
        self.data = self.generate_dashboard_data()
        
        # 当前选择的参数
        self.current_metric = 'sales'
        self.current_region = 'all'
        self.current_product = 'all'
        self.date_range = [0, len(self.data) - 1]
        
        # 图表引用
        self.fig = None
        self.axes = {}
        
    def generate_dashboard_data(self):
        """生成仪表板数据"""
        np.random.seed(42)
        
        # 创建时间序列数据
        dates = pd.date_range('2023-01-01', '2023-12-31', freq='D')
        n_days = len(dates)
        
        regions = ['北区', '南区', '东区', '西区']
        products = ['产品A', '产品B', '产品C', '产品D', '产品E']
        
        data_list = []
        
        for i, date in enumerate(dates):
            for region in regions:
                for product in products:
                    # 生成带有季节性和趋势的数据
                    base_sales = 1000 + i * 2  # 趋势
                    seasonal = 200 * np.sin(2 * np.pi * i / 365)  # 季节性
                    noise = np.random.normal(0, 100)  # 噪声
                    
                    # 地区和产品的影响因子
                    region_factor = {'北区': 1.2, '南区': 1.0, '东区': 0.9, '西区': 1.1}[region]
                    product_factor = {'产品A': 1.3, '产品B': 1.1, '产品C': 1.0, '产品D': 0.8, '产品E': 0.9}[product]
                    
                    sales = max(0, (base_sales + seasonal + noise) * region_factor * product_factor)
                    
                    data_list.append({
                        'date': date,
                        'region': region,
                        'product': product,
                        'sales': sales,
                        'profit': sales * np.random.uniform(0.1, 0.3),
                        'customers': max(1, int(sales / np.random.uniform(50, 150))),
                        'marketing_cost': sales * np.random.uniform(0.05, 0.15)
                    })
        
        df = pd.DataFrame(data_list)
        
        # 添加计算字段
        df['profit_margin'] = df['profit'] / df['sales'] * 100
        df['customer_value'] = df['sales'] / df['customers']
        df['roi'] = df['profit'] / df['marketing_cost']
        
        return df
    
    def filter_data(self):
        """根据当前选择过滤数据"""
        filtered_data = self.data.copy()
        
        # 地区过滤
        if self.current_region != 'all':
            filtered_data = filtered_data[filtered_data['region'] == self.current_region]
        
        # 产品过滤
        if self.current_product != 'all':
            filtered_data = filtered_data[filtered_data['product'] == self.current_product]
        
        # 日期范围过滤
        unique_dates = sorted(self.data['date'].unique())
        start_date = unique_dates[self.date_range[0]]
        end_date = unique_dates[self.date_range[1]]
        filtered_data = filtered_data[
            (filtered_data['date'] >= start_date) & 
            (filtered_data['date'] <= end_date)
        ]
        
        return filtered_data
    
    def create_dashboard(self):
        """创建交互式仪表板"""
        # 创建主图形
        self.fig = plt.figure(figsize=(20, 12))
        
        # 创建网格布局
        gs = self.fig.add_gridspec(4, 4, hspace=0.4, wspace=0.3,
                                  left=0.05, right=0.75, top=0.95, bottom=0.15)
        
        # 主要图表区域
        self.axes['main'] = self.fig.add_subplot(gs[0:2, 0:3])
        self.axes['secondary'] = self.fig.add_subplot(gs[2, 0:2])
        self.axes['tertiary'] = self.fig.add_subplot(gs[2, 2])
        self.axes['summary'] = self.fig.add_subplot(gs[3, 0:3])
        
        # 控制面板区域
        control_left = 0.78
        control_width = 0.18
        
        # 指标选择
        ax_metric = plt.axes([control_left, 0.8, control_width, 0.15])
        self.metric_radio = RadioButtons(ax_metric, 
                                       ['sales', 'profit', 'customers', 'profit_margin', 'roi'],
                                       labels=['销售额', '利润', '客户数', '利润率(%)', 'ROI'])
        self.metric_radio.on_clicked(self.update_metric)
        
        # 地区选择
        ax_region = plt.axes([control_left, 0.6, control_width, 0.15])
        self.region_radio = RadioButtons(ax_region, 
                                       ['all', '北区', '南区', '东区', '西区'],
                                       labels=['全部', '北区', '南区', '东区', '西区'])
        self.region_radio.on_clicked(self.update_region)
        
        # 产品选择
        ax_product = plt.axes([control_left, 0.4, control_width, 0.15])
        self.product_radio = RadioButtons(ax_product, 
                                        ['all', '产品A', '产品B', '产品C', '产品D', '产品E'],
                                        labels=['全部', '产品A', '产品B', '产品C', '产品D', '产品E'])
        self.product_radio.on_clicked(self.update_product)
        
        # 日期范围滑块
        ax_date_start = plt.axes([control_left, 0.25, control_width, 0.03])
        ax_date_end = plt.axes([control_left, 0.2, control_width, 0.03])
        
        max_days = len(self.data['date'].unique()) - 1
        self.date_start_slider = Slider(ax_date_start, '开始日期', 0, max_days, 
                                      valinit=0, valfmt='%d')
        self.date_end_slider = Slider(ax_date_end, '结束日期', 0, max_days, 
                                    valinit=max_days, valfmt='%d')
        
        self.date_start_slider.on_changed(self.update_date_range)
        self.date_end_slider.on_changed(self.update_date_range)
        
        # 重置按钮
        ax_reset = plt.axes([control_left, 0.1, control_width/2, 0.05])
        self.reset_button = Button(ax_reset, '重置')
        self.reset_button.on_clicked(self.reset_filters)
        
        # 导出按钮
        ax_export = plt.axes([control_left + control_width/2, 0.1, control_width/2, 0.05])
        self.export_button = Button(ax_export, '导出')
        self.export_button.on_clicked(self.export_data)
        
        # 初始化图表
        self.update_charts()
        
        plt.show()
        
        return self.fig
    
    def update_metric(self, label):
        """更新选择的指标"""
        metric_map = {'销售额': 'sales', '利润': 'profit', '客户数': 'customers', 
                     '利润率(%)': 'profit_margin', 'ROI': 'roi'}
        self.current_metric = metric_map[label]
        self.update_charts()
    
    def update_region(self, label):
        """更新选择的地区"""
        self.current_region = label if label != '全部' else 'all'
        self.update_charts()
    
    def update_product(self, label):
        """更新选择的产品"""
        self.current_product = label if label != '全部' else 'all'
        self.update_charts()
    
    def update_date_range(self, val):
        """更新日期范围"""
        start_val = int(self.date_start_slider.val)
        end_val = int(self.date_end_slider.val)
        
        # 确保开始日期不大于结束日期
        if start_val > end_val:
            if val == start_val:  # 如果是开始日期变化
                self.date_end_slider.set_val(start_val)
            else:  # 如果是结束日期变化
                self.date_start_slider.set_val(end_val)
        
        self.date_range = [min(start_val, end_val), max(start_val, end_val)]
        self.update_charts()
    
    def reset_filters(self, event):
        """重置所有过滤器"""
        self.current_metric = 'sales'
        self.current_region = 'all'
        self.current_product = 'all'
        self.date_range = [0, len(self.data['date'].unique()) - 1]
        
        # 重置控件
        self.metric_radio.set_active(0)
        self.region_radio.set_active(0)
        self.product_radio.set_active(0)
        self.date_start_slider.reset()
        self.date_end_slider.reset()
        
        self.update_charts()
    
    def export_data(self, event):
        """导出当前数据"""
        filtered_data = self.filter_data()
        filename = f'dashboard_export_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
        filtered_data.to_csv(filename, index=False, encoding='utf-8-sig')
        print(f"数据已导出到: {filename}")
    
    def update_charts(self):
        """更新所有图表"""
        # 获取过滤后的数据
        filtered_data = self.filter_data()
        
        if filtered_data.empty:
            # 如果没有数据,显示空图表
            for ax in self.axes.values():
                ax.clear()
                ax.text(0.5, 0.5, '没有数据', ha='center', va='center', transform=ax.transAxes)
            self.fig.canvas.draw()
            return
        
        # 清除所有图表
        for ax in self.axes.values():
            ax.clear()
        
        # 1. 主时间序列图
        daily_data = filtered_data.groupby('date')[self.current_metric].sum()
        self.axes['main'].plot(daily_data.index, daily_data.values, linewidth=2, marker='o', markersize=3)
        
        metric_labels = {
            'sales': '销售额', 'profit': '利润', 'customers': '客户数', 
            'profit_margin': '利润率(%)', 'roi': 'ROI'
        }
        
        self.axes['main'].set_title(f'{metric_labels[self.current_metric]}趋势图', fontsize=14, fontweight='bold')
        self.axes['main'].set_xlabel('日期')
        self.axes['main'].set_ylabel(metric_labels[self.current_metric])
        self.axes['main'].grid(True, alpha=0.3)
        self.axes['main'].tick_params(axis='x', rotation=45)
        
        # 2. 地区对比图
        if self.current_region == 'all':
            region_data = filtered_data.groupby('region')[self.current_metric].sum()
            bars = self.axes['secondary'].bar(region_data.index, region_data.values)
            self.axes['secondary'].set_title('地区对比', fontsize=12, fontweight='bold')
            
            # 添加数值标签
            for bar in bars:
                height = bar.get_height()
                self.axes['secondary'].text(bar.get_x() + bar.get_width()/2., height,
                                          f'{height:.0f}', ha='center', va='bottom')
        else:
            # 如果选择了特定地区,显示该地区的产品对比
            product_data = filtered_data.groupby('product')[self.current_metric].sum()
            bars = self.axes['secondary'].bar(product_data.index, product_data.values)
            self.axes['secondary'].set_title(f'{self.current_region}产品对比', fontsize=12, fontweight='bold')
            
            for bar in bars:
                height = bar.get_height()
                self.axes['secondary'].text(bar.get_x() + bar.get_width()/2., height,
                                          f'{height:.0f}', ha='center', va='bottom')
        
        self.axes['secondary'].tick_params(axis='x', rotation=45)
        
        # 3. 饼图(产品或地区分布)
        if self.current_product == 'all':
            pie_data = filtered_data.groupby('product')[self.current_metric].sum()
            self.axes['tertiary'].pie(pie_data.values, labels=pie_data.index, autopct='%1.1f%%')
            self.axes['tertiary'].set_title('产品分布', fontsize=12, fontweight='bold')
        else:
            pie_data = filtered_data.groupby('region')[self.current_metric].sum()
            self.axes['tertiary'].pie(pie_data.values, labels=pie_data.index, autopct='%1.1f%%')
            self.axes['tertiary'].set_title('地区分布', fontsize=12, fontweight='bold')
        
        # 4. 汇总统计
        self.axes['summary'].axis('off')
        
        # 计算关键统计指标
        total_value = filtered_data[self.current_metric].sum()
        avg_value = filtered_data[self.current_metric].mean()
        max_value = filtered_data[self.current_metric].max()
        min_value = filtered_data[self.current_metric].min()
        
        # 计算同比增长(如果有足够数据)
        growth_rate = 0
        if len(daily_data) > 30:
            recent_avg = daily_data.tail(30).mean()
            previous_avg = daily_data.head(30).mean()
            if previous_avg > 0:
                growth_rate = ((recent_avg - previous_avg) / previous_avg) * 100
        
        summary_text = f"""
关键指标汇总

总计: {total_value:,.0f}
平均值: {avg_value:,.0f}
最大值: {max_value:,.0f}
最小值: {min_value:,.0f}
增长率: {growth_rate:+.1f}%

数据点数: {len(filtered_data):,}
日期范围: {filtered_data['date'].min().strftime('%Y-%m-%d')} 至 {filtered_data['date'].max().strftime('%Y-%m-%d')}
        """
        
        self.axes['summary'].text(0.1, 0.9, summary_text, transform=self.axes['summary'].transAxes,
                                fontsize=11, verticalalignment='top', fontfamily='monospace',
                                bbox=dict(boxstyle='round,pad=0.5', facecolor='lightblue', alpha=0.1))
        
        # 刷新图表
        self.fig.canvas.draw()

# 使用示例
dashboard = InteractiveDashboard()

print("创建交互式数据仪表板...")
fig = dashboard.create_dashboard()

8.1.3 科学计算结果可视化

import matplotlib.pyplot as plt
import numpy as np
from scipy import optimize, integrate, signal
from scipy.stats import norm, chi2
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D

class ScientificVisualization:
    """科学计算结果可视化类"""
    
    def __init__(self):
        plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']
        plt.rcParams['axes.unicode_minus'] = False
        
    def optimization_visualization(self):
        """优化算法可视化"""
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle('优化算法可视化分析', fontsize=16, fontweight='bold')
        
        # 1. 函数优化过程可视化
        def objective_function(x):
            return x[0]**2 + x[1]**2 + 0.1*np.sin(10*x[0]) + 0.1*np.sin(10*x[1])
        
        # 创建网格
        x = np.linspace(-2, 2, 100)
        y = np.linspace(-2, 2, 100)
        X, Y = np.meshgrid(x, y)
        Z = X**2 + Y**2 + 0.1*np.sin(10*X) + 0.1*np.sin(10*Y)
        
        # 绘制等高线
        contour = ax1.contour(X, Y, Z, levels=20, alpha=0.6)
        ax1.contourf(X, Y, Z, levels=20, alpha=0.3, cmap='viridis')
        
        # 优化路径
        optimization_path = []
        
        def callback(xk):
            optimization_path.append(xk.copy())
        
        # 执行优化
        result = optimize.minimize(objective_function, x0=[1.5, 1.5], 
                                 method='BFGS', callback=callback)
        
        # 绘制优化路径
        if optimization_path:
            path = np.array(optimization_path)
            ax1.plot(path[:, 0], path[:, 1], 'ro-', linewidth=2, markersize=6, 
                    label=f'优化路径 ({len(path)}步)')
            ax1.plot(result.x[0], result.x[1], 'r*', markersize=15, label='最优解')
        
        ax1.set_title('函数优化过程可视化')
        ax1.set_xlabel('x1')
        ax1.set_ylabel('x2')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # 2. 收敛性分析
        if optimization_path:
            path = np.array(optimization_path)
            objective_values = [objective_function(point) for point in path]
            
            ax2.semilogy(objective_values, 'b-o', linewidth=2, markersize=4)
            ax2.set_title('目标函数收敛过程')
            ax2.set_xlabel('迭代次数')
            ax2.set_ylabel('目标函数值 (对数尺度)')
            ax2.grid(True, alpha=0.3)
            
            # 添加收敛信息
            final_value = objective_values[-1]
            ax2.text(0.7, 0.8, f'最终值: {final_value:.6f}\n迭代次数: {len(objective_values)}', 
                    transform=ax2.transAxes, bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
        
        # 3. 参数敏感性分析
        x1_range = np.linspace(-0.5, 0.5, 50)
        sensitivity_x1 = []
        sensitivity_x2 = []
        
        for dx in x1_range:
            # x1方向的敏感性
            perturbed_x1 = result.x + [dx, 0]
            sensitivity_x1.append(objective_function(perturbed_x1) - result.fun)
            
            # x2方向的敏感性
            perturbed_x2 = result.x + [0, dx]
            sensitivity_x2.append(objective_function(perturbed_x2) - result.fun)
        
        ax3.plot(x1_range, sensitivity_x1, 'r-', linewidth=2, label='x1方向敏感性')
        ax3.plot(x1_range, sensitivity_x2, 'b-', linewidth=2, label='x2方向敏感性')
        ax3.set_title('参数敏感性分析')
        ax3.set_xlabel('参数扰动')
        ax3.set_ylabel('目标函数变化')
        ax3.legend()
        ax3.grid(True, alpha=0.3)
        
        # 4. 多起点优化比较
        start_points = [[-1.5, -1.5], [1.5, -1.5], [-1.5, 1.5], [1.5, 1.5], [0, 0]]
        colors = ['red', 'blue', 'green', 'orange', 'purple']
        
        for i, (start_point, color) in enumerate(zip(start_points, colors)):
            path = []
            
            def callback_multi(xk):
                path.append(xk.copy())
            
            result_multi = optimize.minimize(objective_function, x0=start_point, 
                                           method='BFGS', callback=callback_multi)
            
            if path:
                path = np.array(path)
                ax4.plot(path[:, 0], path[:, 1], color=color, linewidth=2, 
                        alpha=0.7, label=f'起点{i+1}: ({start_point[0]}, {start_point[1]})')
                ax4.plot(result_multi.x[0], result_multi.x[1], '*', color=color, markersize=10)
        
        # 绘制背景等高线
        ax4.contour(X, Y, Z, levels=20, alpha=0.3)
        ax4.set_title('多起点优化比较')
        ax4.set_xlabel('x1')
        ax4.set_ylabel('x2')
        ax4.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        ax4.grid(True, alpha=0.3)
        
        plt.tight_layout()
        return fig
    
    def signal_processing_visualization(self):
        """信号处理可视化"""
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle('信号处理分析可视化', fontsize=16, fontweight='bold')
        
        # 生成复合信号
        t = np.linspace(0, 2, 1000)
        freq1, freq2, freq3 = 5, 15, 30
        
        # 原始信号:多个正弦波叠加
        signal_clean = (np.sin(2*np.pi*freq1*t) + 
                       0.5*np.sin(2*np.pi*freq2*t) + 
                       0.3*np.sin(2*np.pi*freq3*t))
        
        # 添加噪声
        noise = 0.2 * np.random.randn(len(t))
        signal_noisy = signal_clean + noise
        
        # 1. 时域信号对比
        ax1.plot(t[:200], signal_clean[:200], 'b-', linewidth=2, label='原始信号', alpha=0.8)
        ax1.plot(t[:200], signal_noisy[:200], 'r-', linewidth=1, label='含噪声信号', alpha=0.6)
        ax1.set_title('时域信号对比')
        ax1.set_xlabel('时间 (秒)')
        ax1.set_ylabel('幅度')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # 2. 频域分析(FFT)
        # 计算FFT
        fft_clean = np.fft.fft(signal_clean)
        fft_noisy = np.fft.fft(signal_noisy)
        freqs = np.fft.fftfreq(len(t), t[1] - t[0])
        
        # 只显示正频率部分
        positive_freqs = freqs[:len(freqs)//2]
        fft_clean_positive = np.abs(fft_clean[:len(freqs)//2])
        fft_noisy_positive = np.abs(fft_noisy[:len(freqs)//2])
        
        ax2.plot(positive_freqs, fft_clean_positive, 'b-', linewidth=2, label='原始信号频谱')
        ax2.plot(positive_freqs, fft_noisy_positive, 'r-', linewidth=1, label='含噪声信号频谱', alpha=0.7)
        ax2.set_title('频域分析 (FFT)')
        ax2.set_xlabel('频率 (Hz)')
        ax2.set_ylabel('幅度')
        ax2.set_xlim(0, 50)
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        # 标记主要频率成分
        for freq in [freq1, freq2, freq3]:
            ax2.axvline(freq, color='green', linestyle='--', alpha=0.7)
            ax2.text(freq, ax2.get_ylim()[1]*0.8, f'{freq}Hz', rotation=90, 
                    ha='right', va='top', color='green')
        
        # 3. 滤波效果
        # 设计低通滤波器
        nyquist = 0.5 * (1 / (t[1] - t[0]))
        cutoff = 20  # 截止频率
        b, a = signal.butter(4, cutoff / nyquist, btype='low')
        
        # 应用滤波器
        signal_filtered = signal.filtfilt(b, a, signal_noisy)
        
        ax3.plot(t[:200], signal_noisy[:200], 'r-', linewidth=1, label='含噪声信号', alpha=0.6)
        ax3.plot(t[:200], signal_filtered[:200], 'g-', linewidth=2, label='滤波后信号')
        ax3.plot(t[:200], signal_clean[:200], 'b--', linewidth=2, label='原始信号', alpha=0.8)
        ax3.set_title(f'低通滤波效果 (截止频率: {cutoff}Hz)')
        ax3.set_xlabel('时间 (秒)')
        ax3.set_ylabel('幅度')
        ax3.legend()
        ax3.grid(True, alpha=0.3)
        
        # 4. 时频分析(短时傅里叶变换)
        f, t_stft, Zxx = signal.stft(signal_noisy, fs=1/(t[1]-t[0]), window='hann', nperseg=128)
        
        im = ax4.pcolormesh(t_stft, f, np.abs(Zxx), shading='gouraud', cmap='viridis')
        ax4.set_title('时频分析 (STFT)')
        ax4.set_xlabel('时间 (秒)')
        ax4.set_ylabel('频率 (Hz)')
        ax4.set_ylim(0, 50)
        
        # 添加颜色条
        cbar = plt.colorbar(im, ax=ax4, shrink=0.8)
        cbar.set_label('幅度')
        
        plt.tight_layout()
        return fig
    
    def statistical_analysis_visualization(self):
        """统计分析可视化"""
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle('统计分析可视化', fontsize=16, fontweight='bold')
        
        # 生成示例数据
        np.random.seed(42)
        n_samples = 1000
        
        # 1. 假设检验可视化
        # 生成两组数据
        group1 = np.random.normal(100, 15, n_samples//2)
        group2 = np.random.normal(105, 15, n_samples//2)
        
        # 绘制分布
        ax1.hist(group1, bins=30, alpha=0.7, label='组1', color='blue', density=True)
        ax1.hist(group2, bins=30, alpha=0.7, label='组2', color='red', density=True)
        
        # 添加理论分布曲线
        x_range = np.linspace(50, 150, 200)
        ax1.plot(x_range, norm.pdf(x_range, 100, 15), 'b-', linewidth=2, label='组1理论分布')
        ax1.plot(x_range, norm.pdf(x_range, 105, 15), 'r-', linewidth=2, label='组2理论分布')
        
        # 进行t检验
        from scipy.stats import ttest_ind
        t_stat, p_value = ttest_ind(group1, group2)
        
        ax1.set_title(f'两组数据分布比较\nt统计量: {t_stat:.3f}, p值: {p_value:.6f}')
        ax1.set_xlabel('数值')
        ax1.set_ylabel('概率密度')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # 添加均值线
        ax1.axvline(np.mean(group1), color='blue', linestyle='--', alpha=0.8)
        ax1.axvline(np.mean(group2), color='red', linestyle='--', alpha=0.8)
        
        # 2. 置信区间可视化
        # 生成样本
        sample_sizes = range(10, 201, 10)
        confidence_intervals = []
        sample_means = []
        
        true_mean = 100
        true_std = 15
        
        for n in sample_sizes:
            sample = np.random.normal(true_mean, true_std, n)
            sample_mean = np.mean(sample)
            sample_std = np.std(sample, ddof=1)
            
            # 95%置信区间
            margin_error = 1.96 * sample_std / np.sqrt(n)
            ci_lower = sample_mean - margin_error
            ci_upper = sample_mean + margin_error
            
            sample_means.append(sample_mean)
            confidence_intervals.append((ci_lower, ci_upper))
        
        # 绘制置信区间
        ci_lower = [ci[0] for ci in confidence_intervals]
        ci_upper = [ci[1] for ci in confidence_intervals]
        
        ax2.fill_between(sample_sizes, ci_lower, ci_upper, alpha=0.3, color='lightblue', 
                        label='95%置信区间')
        ax2.plot(sample_sizes, sample_means, 'bo-', linewidth=2, markersize=4, label='样本均值')
        ax2.axhline(true_mean, color='red', linestyle='--', linewidth=2, label='真实均值')
        
        ax2.set_title('置信区间随样本量变化')
        ax2.set_xlabel('样本量')
        ax2.set_ylabel('均值估计')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        # 3. 回归分析可视化
        # 生成回归数据
        x_reg = np.random.uniform(0, 10, 100)
        y_true = 2 * x_reg + 1
        y_reg = y_true + np.random.normal(0, 1, 100)
        
        # 线性回归
        from scipy.stats import linregress
        slope, intercept, r_value, p_value_reg, std_err = linregress(x_reg, y_reg)
        
        # 绘制散点图和回归线
        ax3.scatter(x_reg, y_reg, alpha=0.6, color='blue', label='观测数据')
        
        x_line = np.linspace(0, 10, 100)
        y_line = slope * x_line + intercept
        y_true_line = 2 * x_line + 1
        
        ax3.plot(x_line, y_line, 'r-', linewidth=2, 
                label=f'拟合线 (y = {slope:.2f}x + {intercept:.2f})')
        ax3.plot(x_line, y_true_line, 'g--', linewidth=2, label='真实关系 (y = 2x + 1)')
        
        # 计算预测区间
        residuals = y_reg - (slope * x_reg + intercept)
        mse = np.mean(residuals**2)
        
        # 95%预测区间
        prediction_interval = 1.96 * np.sqrt(mse)
        ax3.fill_between(x_line, y_line - prediction_interval, y_line + prediction_interval, 
                        alpha=0.2, color='red', label='95%预测区间')
        
        ax3.set_title(f'线性回归分析\nR² = {r_value**2:.3f}, p值 = {p_value_reg:.6f}')
        ax3.set_xlabel('x')
        ax3.set_ylabel('y')
        ax3.legend()
        ax3.grid(True, alpha=0.3)
        
        # 4. 残差分析
        predicted = slope * x_reg + intercept
        residuals = y_reg - predicted
        
        # Q-Q图检验正态性
        from scipy.stats import probplot
        probplot(residuals, dist="norm", plot=ax4)
        ax4.set_title('残差正态性检验 (Q-Q图)')
        ax4.grid(True, alpha=0.3)
        
        # 添加统计信息
        from scipy.stats import shapiro
        shapiro_stat, shapiro_p = shapiro(residuals)
        ax4.text(0.05, 0.95, f'Shapiro-Wilk检验:\n统计量: {shapiro_stat:.4f}\np值: {shapiro_p:.6f}', 
                transform=ax4.transAxes, bbox=dict(boxstyle='round', facecolor='white', alpha=0.8),
                verticalalignment='top')
        
        plt.tight_layout()
        return fig
    
    def numerical_integration_visualization(self):
        """数值积分可视化"""
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle('数值积分方法比较', fontsize=16, fontweight='bold')
        
        # 定义被积函数
        def f(x):
            return np.sin(x) * np.exp(-x/5) + 0.1*x**2
        
        # 积分区间
        a, b = 0, 10
        x_fine = np.linspace(a, b, 1000)
        y_fine = f(x_fine)
        
        # 解析解(用于比较)
        analytical_result, _ = integrate.quad(f, a, b)
        
        # 1. 梯形法则可视化
        n_trapz = 10
        x_trapz = np.linspace(a, b, n_trapz + 1)
        y_trapz = f(x_trapz)
        
        ax1.plot(x_fine, y_fine, 'b-', linewidth=2, label='被积函数')
        ax1.fill_between(x_fine, 0, y_fine, alpha=0.2, color='lightblue', label='积分区域')
        
        # 绘制梯形
        for i in range(n_trapz):
            x_trap = [x_trapz[i], x_trapz[i], x_trapz[i+1], x_trapz[i+1]]
            y_trap = [0, y_trapz[i], y_trapz[i+1], 0]
            ax1.fill(x_trap, y_trap, alpha=0.3, color='red', edgecolor='black')
        
        trapz_result = np.trapz(y_trapz, x_trapz)
        error_trapz = abs(trapz_result - analytical_result)
        
        ax1.set_title(f'梯形法则 (n={n_trapz})\n结果: {trapz_result:.6f}, 误差: {error_trapz:.6f}')
        ax1.set_xlabel('x')
        ax1.set_ylabel('f(x)')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # 2. 辛普森法则可视化
        n_simpson = 10  # 必须是偶数
        x_simpson = np.linspace(a, b, n_simpson + 1)
        y_simpson = f(x_simpson)
        
        ax2.plot(x_fine, y_fine, 'b-', linewidth=2, label='被积函数')
        ax2.fill_between(x_fine, 0, y_fine, alpha=0.2, color='lightblue', label='积分区域')
        
        # 绘制抛物线近似
        for i in range(0, n_simpson, 2):
            x_seg = x_simpson[i:i+3]
            y_seg = y_simpson[i:i+3]
            
            # 拟合二次多项式
            coeffs = np.polyfit(x_seg, y_seg, 2)
            x_para = np.linspace(x_seg[0], x_seg[-1], 50)
            y_para = np.polyval(coeffs, x_para)
            
            ax2.plot(x_para, y_para, 'r-', linewidth=2, alpha=0.7)
            ax2.fill_between(x_para, 0, y_para, alpha=0.3, color='red')
        
        simpson_result = integrate.simpson(y_simpson, x_simpson)
        error_simpson = abs(simpson_result - analytical_result)
        
        ax2.set_title(f'辛普森法则 (n={n_simpson})\n结果: {simpson_result:.6f}, 误差: {error_simpson:.6f}')
        ax2.set_xlabel('x')
        ax2.set_ylabel('f(x)')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        # 3. 收敛性分析
        n_values = np.logspace(1, 3, 20).astype(int)
        trapz_errors = []
        simpson_errors = []
        
        for n in n_values:
            x_n = np.linspace(a, b, n + 1)
            y_n = f(x_n)
            
            # 梯形法则误差
            trapz_result_n = np.trapz(y_n, x_n)
            trapz_errors.append(abs(trapz_result_n - analytical_result))
            
            # 辛普森法则误差(确保n是偶数)
            if n % 2 == 0:
                simpson_result_n = integrate.simpson(y_n, x_n)
                simpson_errors.append(abs(simpson_result_n - analytical_result))
            else:
                simpson_errors.append(np.nan)
        
        ax3.loglog(n_values, trapz_errors, 'ro-', linewidth=2, label='梯形法则')
        
        # 过滤掉NaN值
        valid_simpson = ~np.isnan(simpson_errors)
        ax3.loglog(n_values[valid_simpson], np.array(simpson_errors)[valid_simpson], 
                  'bs-', linewidth=2, label='辛普森法则')
        
        # 添加理论收敛率线
        ax3.loglog(n_values, 1e-2 / n_values**2, 'k--', alpha=0.7, label='O(h²) 收敛率')
        ax3.loglog(n_values, 1e-6 / n_values**4, 'g--', alpha=0.7, label='O(h⁴) 收敛率')
        
        ax3.set_title('数值积分收敛性分析')
        ax3.set_xlabel('分割数 n')
        ax3.set_ylabel('绝对误差')
        ax3.legend()
        ax3.grid(True, alpha=0.3)
        
        # 4. 自适应积分可视化
        # 使用scipy的自适应积分
        adaptive_result, adaptive_error = integrate.quad(f, a, b, epsabs=1e-10)
        
        # 显示不同精度要求下的积分点分布
        tolerances = [1e-3, 1e-6, 1e-9]
        colors = ['red', 'green', 'blue']
        
        ax4.plot(x_fine, y_fine, 'k-', linewidth=2, label='被积函数')
        
        for tol, color in zip(tolerances, colors):
            # 模拟自适应积分的点分布(简化版本)
            # 在函数变化剧烈的地方增加更多点
            x_adaptive = [a]
            current_x = a
            
            while current_x < b:
                # 计算局部导数来决定步长
                h = 0.01
                if current_x + h < b:
                    local_derivative = abs(f(current_x + h) - f(current_x)) / h
                    step = min(0.5, max(0.05, tol / (local_derivative + 1e-10)))
                else:
                    step = b - current_x
                
                current_x = min(current_x + step, b)
                x_adaptive.append(current_x)
            
            y_adaptive = f(np.array(x_adaptive))
            ax4.scatter(x_adaptive, y_adaptive, color=color, s=20, alpha=0.7, 
                       label=f'自适应点 (tol={tol})')
        
        ax4.set_title(f'自适应积分点分布\n精确结果: {adaptive_result:.10f}')
        ax4.set_xlabel('x')
        ax4.set_ylabel('f(x)')
        ax4.legend()
        ax4.grid(True, alpha=0.3)
        
        plt.tight_layout()
        return fig

# 使用示例
sci_viz = ScientificVisualization()

print("1. 优化算法可视化")
fig1 = sci_viz.optimization_visualization()
plt.show()

print("2. 信号处理可视化")
fig2 = sci_viz.signal_processing_visualization()
plt.show()

print("3. 统计分析可视化")
fig3 = sci_viz.statistical_analysis_visualization()
plt.show()

print("4. 数值积分可视化")
fig4 = sci_viz.numerical_integration_visualization()
plt.show()

8.2 行业应用案例

8.2.1 金融数据分析与可视化

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.dates as mdates
from matplotlib.patches import Rectangle

class FinancialAnalysis:
    """金融数据分析与可视化"""
    
    def __init__(self):
        plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']
        plt.rcParams['axes.unicode_minus'] = False
        
    def generate_stock_data(self, symbol='STOCK', days=252):
        """生成模拟股票数据"""
        np.random.seed(42)
        
        # 生成日期
        start_date = datetime.now() - timedelta(days=days)
        dates = pd.date_range(start=start_date, periods=days, freq='D')
        
        # 生成价格数据(几何布朗运动)
        initial_price = 100
        mu = 0.0002  # 日收益率均值
        sigma = 0.02  # 日收益率标准差
        
        # 生成随机收益率
        returns = np.random.normal(mu, sigma, days)
        
        # 添加一些市场事件
        # 模拟市场崩盘
        crash_day = days // 3
        returns[crash_day:crash_day+5] = np.random.normal(-0.05, 0.01, 5)
        
        # 模拟牛市
        bull_start = days * 2 // 3
        returns[bull_start:bull_start+30] = np.random.normal(0.01, 0.015, 30)
        
        # 计算价格
        prices = [initial_price]
        for ret in returns[1:]:
            prices.append(prices[-1] * (1 + ret))
        
        # 生成成交量(与价格变化相关)
        volume_base = 1000000
        volume = volume_base * (1 + np.abs(returns) * 10 + np.random.normal(0, 0.3, days))
        volume = np.maximum(volume, volume_base * 0.1)  # 确保最小成交量
        
        # 创建OHLC数据
        ohlc_data = []
        for i, (date, price, vol) in enumerate(zip(dates, prices, volume)):
            # 生成开高低收价格
            if i == 0:
                open_price = price
            else:
                open_price = prices[i-1]  # 前一日收盘价
            
            close_price = price
            
            # 生成高低价
            daily_range = abs(returns[i]) * price * 2
            high_price = max(open_price, close_price) + np.random.uniform(0, daily_range)
            low_price = min(open_price, close_price) - np.random.uniform(0, daily_range)
            
            ohlc_data.append({
                'date': date,
                'open': open_price,
                'high': high_price,
                'low': low_price,
                'close': close_price,
                'volume': int(vol),
                'symbol': symbol
            })
        
        return pd.DataFrame(ohlc_data)
    
    def plot_candlestick_chart(self, data, title="股票K线图"):
        """绘制K线图"""
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), 
                                      gridspec_kw={'height_ratios': [3, 1]}, 
                                      sharex=True)
        
        # 1. K线图
        for i, row in data.iterrows():
            date = row['date']
            open_price = row['open']
            high_price = row['high']
            low_price = row['low']
            close_price = row['close']
            
            # 确定颜色(红涨绿跌)
            color = 'red' if close_price >= open_price else 'green'
            
            # 绘制影线
            ax1.plot([date, date], [low_price, high_price], color='black', linewidth=1)
            
            # 绘制实体
            body_height = abs(close_price - open_price)
            body_bottom = min(open_price, close_price)
            
            rect = Rectangle((mdates.date2num(date) - 0.3, body_bottom), 
                           0.6, body_height, 
                           facecolor=color, edgecolor='black', alpha=0.8)
            ax1.add_patch(rect)
        
        # 添加移动平均线
        data['MA5'] = data['close'].rolling(window=5).mean()
        data['MA20'] = data['close'].rolling(window=20).mean()
        data['MA60'] = data['close'].rolling(window=60).mean()
        
        ax1.plot(data['date'], data['MA5'], label='MA5', linewidth=1, alpha=0.8)
        ax1.plot(data['date'], data['MA20'], label='MA20', linewidth=1, alpha=0.8)
        ax1.plot(data['date'], data['MA60'], label='MA60', linewidth=1, alpha=0.8)
        
        ax1.set_title(title, fontsize=14, fontweight='bold')
        ax1.set_ylabel('价格')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # 2. 成交量图
        colors = ['red' if close >= open else 'green' 
                 for close, open in zip(data['close'], data['open'])]
        
        ax2.bar(data['date'], data['volume'], color=colors, alpha=0.7, width=0.8)
        ax2.set_ylabel('成交量')
        ax2.set_xlabel('日期')
        ax2.grid(True, alpha=0.3)
        
        # 格式化x轴日期
        ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
        ax2.xaxis.set_major_locator(mdates.MonthLocator())
        plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)
        
        plt.tight_layout()
        return fig
    
    def technical_analysis_dashboard(self, data):
        """技术分析仪表板"""
        fig = plt.figure(figsize=(20, 16))
        gs = fig.add_gridspec(4, 3, hspace=0.3, wspace=0.3)
        
        # 1. 主K线图
        ax_main = fig.add_subplot(gs[0:2, :])
        
        # 绘制K线
        for i, row in data.iterrows():
            date_num = mdates.date2num(row['date'])
            open_price = row['open']
            high_price = row['high']
            low_price = row['low']
            close_price = row['close']
            
            color = 'red' if close_price >= open_price else 'green'
            
            # 影线
            ax_main.plot([date_num, date_num], [low_price, high_price], 
                       color='black', linewidth=0.5)
            
            # 实体
            body_height = abs(close_price - open_price)
            body_bottom = min(open_price, close_price)
            
            rect = Rectangle((date_num - 0.3, body_bottom), 0.6, body_height, 
                           facecolor=color, edgecolor='black', alpha=0.8)
            ax_main.add_patch(rect)
        
        # 布林带
        data['MA20'] = data['close'].rolling(window=20).mean()
        data['std20'] = data['close'].rolling(window=20).std()
        data['upper_band'] = data['MA20'] + 2 * data['std20']
        data['lower_band'] = data['MA20'] - 2 * data['std20']
        
        ax_main.plot(data['date'], data['MA20'], label='MA20', linewidth=2, color='blue')
        ax_main.plot(data['date'], data['upper_band'], label='上轨', linewidth=1, 
                    color='red', linestyle='--', alpha=0.7)
        ax_main.plot(data['date'], data['lower_band'], label='下轨', linewidth=1, 
                    color='green', linestyle='--', alpha=0.7)
        ax_main.fill_between(data['date'], data['upper_band'], data['lower_band'], 
                           alpha=0.1, color='gray')
        
        ax_main.set_title('技术分析 - 布林带', fontsize=14, fontweight='bold')
        ax_main.set_ylabel('价格')
        ax_main.legend()
        ax_main.grid(True, alpha=0.3)
        
        # 2. RSI指标
        ax_rsi = fig.add_subplot(gs[2, 0])
        
        # 计算RSI
        def calculate_rsi(prices, window=14):
            delta = prices.diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
            rs = gain / loss
            rsi = 100 - (100 / (1 + rs))
            return rsi
        
        data['RSI'] = calculate_rsi(data['close'])
        
        ax_rsi.plot(data['date'], data['RSI'], linewidth=2, color='purple')
        ax_rsi.axhline(y=70, color='red', linestyle='--', alpha=0.7, label='超买线')
        ax_rsi.axhline(y=30, color='green', linestyle='--', alpha=0.7, label='超卖线')
        ax_rsi.fill_between(data['date'], 70, 100, alpha=0.1, color='red')
        ax_rsi.fill_between(data['date'], 0, 30, alpha=0.1, color='green')
        
        ax_rsi.set_title('RSI指标')
        ax_rsi.set_ylabel('RSI')
        ax_rsi.set_ylim(0, 100)
        ax_rsi.legend()
        ax_rsi.grid(True, alpha=0.3)
        
        # 3. MACD指标
        ax_macd = fig.add_subplot(gs[2, 1])
        
        # 计算MACD
        exp1 = data['close'].ewm(span=12).mean()
        exp2 = data['close'].ewm(span=26).mean()
        data['MACD'] = exp1 - exp2
        data['Signal'] = data['MACD'].ewm(span=9).mean()
        data['Histogram'] = data['MACD'] - data['Signal']
        
        ax_macd.plot(data['date'], data['MACD'], label='MACD', linewidth=2, color='blue')
        ax_macd.plot(data['date'], data['Signal'], label='Signal', linewidth=2, color='red')
        
        # MACD柱状图
        colors = ['red' if x >= 0 else 'green' for x in data['Histogram']]
        ax_macd.bar(data['date'], data['Histogram'], color=colors, alpha=0.7, width=0.8)
        
        ax_macd.set_title('MACD指标')
        ax_macd.set_ylabel('MACD')
        ax_macd.legend()
        ax_macd.grid(True, alpha=0.3)
        ax_macd.axhline(y=0, color='black', linewidth=0.5)
        
        # 4. 成交量分析
        ax_volume = fig.add_subplot(gs[2, 2])
        
        # 成交量移动平均
        data['Volume_MA5'] = data['volume'].rolling(window=5).mean()
        data['Volume_MA20'] = data['volume'].rolling(window=20).mean()
        
        colors = ['red' if close >= open else 'green' 
                 for close, open in zip(data['close'], data['open'])]
        
        ax_volume.bar(data['date'], data['volume'], color=colors, alpha=0.7, width=0.8)
        ax_volume.plot(data['date'], data['Volume_MA5'], label='成交量MA5', 
                      linewidth=2, color='orange')
        ax_volume.plot(data['date'], data['Volume_MA20'], label='成交量MA20', 
                      linewidth=2, color='purple')
        
        ax_volume.set_title('成交量分析')
        ax_volume.set_ylabel('成交量')
        ax_volume.legend()
        ax_volume.grid(True, alpha=0.3)
        
        # 5. 价格分布分析
        ax_dist = fig.add_subplot(gs[3, 0])
        
        ax_dist.hist(data['close'], bins=30, alpha=0.7, color='skyblue', 
                    orientation='horizontal', density=True)
        ax_dist.axhline(y=data['close'].mean(), color='red', linestyle='--', 
                       linewidth=2, label=f'均价: {data["close"].mean():.2f}')
        ax_dist.axhline(y=data['close'].median(), color='green', linestyle='--', 
                       linewidth=2, label=f'中位数: {data["close"].median():.2f}')
        
        ax_dist.set_title('价格分布')
        ax_dist.set_ylabel('价格')
        ax_dist.set_xlabel('频率密度')
        ax_dist.legend()
        ax_dist.grid(True, alpha=0.3)
        
        # 6. 收益率分析
        ax_returns = fig.add_subplot(gs[3, 1])
        
        data['returns'] = data['close'].pct_change()
        
        ax_returns.hist(data['returns'].dropna(), bins=30, alpha=0.7, 
                       color='lightcoral', density=True)
        
        # 添加正态分布拟合
        returns_clean = data['returns'].dropna()
        mu, sigma = returns_clean.mean(), returns_clean.std()
        x = np.linspace(returns_clean.min(), returns_clean.max(), 100)
        ax_returns.plot(x, (1/(sigma * np.sqrt(2 * np.pi))) * 
                       np.exp(-0.5 * ((x - mu) / sigma) ** 2), 
                       'r-', linewidth=2, label='正态分布拟合')
        
        ax_returns.set_title('收益率分布')
        ax_returns.set_xlabel('日收益率')
        ax_returns.set_ylabel('频率密度')
        ax_returns.legend()
        ax_returns.grid(True, alpha=0.3)
        
        # 7. 风险指标
        ax_risk = fig.add_subplot(gs[3, 2])
        ax_risk.axis('off')
        
        # 计算风险指标
        returns_clean = data['returns'].dropna()
        
        # VaR计算(95%置信度)
        var_95 = np.percentile(returns_clean, 5)
        
        # 最大回撤
        cumulative_returns = (1 + returns_clean).cumprod()
        running_max = cumulative_returns.expanding().max()
        drawdown = (cumulative_returns - running_max) / running_max
        max_drawdown = drawdown.min()
        
        # 夏普比率(假设无风险利率为2%)
        risk_free_rate = 0.02 / 252  # 日无风险利率
        excess_returns = returns_clean - risk_free_rate
        sharpe_ratio = excess_returns.mean() / excess_returns.std() * np.sqrt(252)
        
        # 波动率
        volatility = returns_clean.std() * np.sqrt(252)
        
        risk_text = f"""
风险指标分析

年化收益率: {returns_clean.mean() * 252:.2%}
年化波动率: {volatility:.2%}
夏普比率: {sharpe_ratio:.3f}

95% VaR: {var_95:.2%}
最大回撤: {max_drawdown:.2%}

当前价格: {data['close'].iloc[-1]:.2f}
期间涨跌: {(data['close'].iloc[-1] / data['close'].iloc[0] - 1):.2%}
        """
        
        ax_risk.text(0.1, 0.9, risk_text, transform=ax_risk.transAxes,
                    fontsize=12, verticalalignment='top', fontfamily='monospace',
                    bbox=dict(boxstyle='round,pad=0.5', facecolor='lightgray', alpha=0.1))
        
        # 格式化日期轴
        for ax in [ax_main, ax_rsi, ax_macd, ax_volume]:
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
            ax.xaxis.set_major_locator(mdates.MonthLocator())
            plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
        
        plt.tight_layout()
        return fig

# 使用示例
finance_analyzer = FinancialAnalysis()

print("生成股票数据...")
stock_data = finance_analyzer.generate_stock_data('DEMO股票', days=252)

print("1. K线图")
fig1 = finance_analyzer.plot_candlestick_chart(stock_data, "DEMO股票 - K线图")
plt.show()

print("2. 技术分析仪表板")
fig2 = finance_analyzer.technical_analysis_dashboard(stock_data)
plt.show()

8.2.2 医疗健康数据可视化

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
from matplotlib.patches import Circle, Rectangle
from matplotlib.collections import LineCollection

class MedicalDataVisualization:
    """医疗健康数据可视化"""
    
    def __init__(self):
        plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']
        plt.rcParams['axes.unicode_minus'] = False
        
    def generate_patient_data(self, n_patients=1000):
        """生成模拟患者数据"""
        np.random.seed(42)
        
        # 基础信息
        ages = np.random.normal(45, 15, n_patients).astype(int)
        ages = np.clip(ages, 18, 90)
        
        genders = np.random.choice(['男', '女'], n_patients, p=[0.52, 0.48])
        
        # 生理指标(考虑年龄和性别的影响)
        systolic_bp = []
        diastolic_bp = []
        heart_rate = []
        bmi = []
        cholesterol = []
        glucose = []
        
        for age, gender in zip(ages, genders):
            # 血压(随年龄增加)
            base_systolic = 110 + (age - 30) * 0.5 + np.random.normal(0, 10)
            base_diastolic = 70 + (age - 30) * 0.3 + np.random.normal(0, 8)
            
            if gender == '男':
                base_systolic += 5
                base_diastolic += 3
            
            systolic_bp.append(max(90, min(200, base_systolic)))
            diastolic_bp.append(max(60, min(120, base_diastolic)))
            
            # 心率
            base_hr = 72 + np.random.normal(0, 8)
            if gender == '女':
                base_hr += 3
            heart_rate.append(max(50, min(120, base_hr)))
            
            # BMI
            base_bmi = 24 + (age - 40) * 0.1 + np.random.normal(0, 3)
            if gender == '男':
                base_bmi += 1
            bmi.append(max(16, min(40, base_bmi)))
            
            # 胆固醇
            base_chol = 180 + (age - 30) * 1.2 + np.random.normal(0, 25)
            if gender == '男':
                base_chol += 10
            cholesterol.append(max(120, min(350, base_chol)))
            
            # 血糖
            base_glucose = 95 + (age - 30) * 0.3 + np.random.normal(0, 15)
            glucose.append(max(70, min(200, base_glucose)))
        
        # 疾病风险评估
        hypertension_risk = []
        diabetes_risk = []
        heart_disease_risk = []
        
        for i in range(n_patients):
            # 高血压风险
            hyp_risk = 0
            if systolic_bp[i] > 140 or diastolic_bp[i] > 90:
                hyp_risk += 0.4
            if ages[i] > 50:
                hyp_risk += 0.2
            if bmi[i] > 28:
                hyp_risk += 0.2
            if genders[i] == '男':
                hyp_risk += 0.1
            hyp_risk += np.random.normal(0, 0.1)
            hypertension_risk.append(max(0, min(1, hyp_risk)))
            
            # 糖尿病风险
            diab_risk = 0
            if glucose[i] > 126:
                diab_risk += 0.5
            if bmi[i] > 30:
                diab_risk += 0.2
            if ages[i] > 45:
                diab_risk += 0.15
            diab_risk += np.random.normal(0, 0.1)
            diabetes_risk.append(max(0, min(1, diab_risk)))
            
            # 心脏病风险
            heart_risk = 0
            if cholesterol[i] > 240:
                heart_risk += 0.3
            if systolic_bp[i] > 140:
                heart_risk += 0.2
            if ages[i] > 55:
                heart_risk += 0.2
            if genders[i] == '男':
                heart_risk += 0.15
            heart_risk += np.random.normal(0, 0.1)
            heart_disease_risk.append(max(0, min(1, heart_risk)))
        
        return pd.DataFrame({
            'age': ages,
            'gender': genders,
            'systolic_bp': systolic_bp,
            'diastolic_bp': diastolic_bp,
            'heart_rate': heart_rate,
            'bmi': bmi,
            'cholesterol': cholesterol,
            'glucose': glucose,
            'hypertension_risk': hypertension_risk,
            'diabetes_risk': diabetes_risk,
            'heart_disease_risk': heart_disease_risk
        })
    
    def health_dashboard(self, data):
        """健康数据仪表板"""
        fig = plt.figure(figsize=(20, 16))
        gs = fig.add_gridspec(4, 4, hspace=0.3, wspace=0.3)
        
        # 1. 年龄分布
        ax1 = fig.add_subplot(gs[0, 0])
        ax1.hist(data['age'], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
        ax1.set_title('年龄分布', fontweight='bold')
        ax1.set_xlabel('年龄')
        ax1.set_ylabel('人数')
        ax1.grid(True, alpha=0.3)
        
        # 添加统计信息
        mean_age = data['age'].mean()
        ax1.axvline(mean_age, color='red', linestyle='--', linewidth=2, 
                   label=f'平均年龄: {mean_age:.1f}')
        ax1.legend()
        
        # 2. 性别分布
        ax2 = fig.add_subplot(gs[0, 1])
        gender_counts = data['gender'].value_counts()
        colors = ['lightblue', 'lightpink']
        wedges, texts, autotexts = ax2.pie(gender_counts.values, labels=gender_counts.index, 
                                          autopct='%1.1f%%', colors=colors, startangle=90)
        ax2.set_title('性别分布', fontweight='bold')
        
        # 3. BMI分布与分类
        ax3 = fig.add_subplot(gs[0, 2])
        
        # BMI分类
        bmi_categories = []
        for bmi in data['bmi']:
            if bmi < 18.5:
                bmi_categories.append('偏瘦')
            elif bmi < 24:
                bmi_categories.append('正常')
            elif bmi < 28:
                bmi_categories.append('超重')
            else:
                bmi_categories.append('肥胖')
        
        data['bmi_category'] = bmi_categories
        bmi_counts = pd.Series(bmi_categories).value_counts()
        
        colors = ['lightgreen', 'yellow', 'orange', 'red']
        bars = ax3.bar(bmi_counts.index, bmi_counts.values, color=colors, alpha=0.7)
        ax3.set_title('BMI分类分布', fontweight='bold')
        ax3.set_ylabel('人数')
        ax3.tick_params(axis='x', rotation=45)
        
        # 添加数值标签
        for bar in bars:
            height = bar.get_height()
            ax3.text(bar.get_x() + bar.get_width()/2., height + 5,
                    f'{int(height)}', ha='center', va='bottom')
        
        # 4. 血压分布散点图
        ax4 = fig.add_subplot(gs[0, 3])
        
        # 根据血压分类着色
        colors = []
        for sys, dia in zip(data['systolic_bp'], data['diastolic_bp']):
            if sys < 120 and dia < 80:
                colors.append('green')  # 正常
            elif sys < 140 and dia < 90:
                colors.append('yellow')  # 偏高
            else:
                colors.append('red')  # 高血压
        
        scatter = ax4.scatter(data['systolic_bp'], data['diastolic_bp'], 
                             c=colors, alpha=0.6, s=30)
        ax4.set_xlabel('收缩压 (mmHg)')
        ax4.set_ylabel('舒张压 (mmHg)')
        ax4.set_title('血压分布', fontweight='bold')
        ax4.grid(True, alpha=0.3)
        
        # 添加血压分类线
        ax4.axvline(120, color='orange', linestyle='--', alpha=0.7, label='收缩压120')
        ax4.axvline(140, color='red', linestyle='--', alpha=0.7, label='收缩压140')
        ax4.axhline(80, color='orange', linestyle='--', alpha=0.7, label='舒张压80')
        ax4.axhline(90, color='red', linestyle='--', alpha=0.7, label='舒张压90')
        ax4.legend(fontsize=8)
        
        # 5. 生理指标相关性热力图
        ax5 = fig.add_subplot(gs[1, :])
        
        # 选择数值型列
        numeric_cols = ['age', 'systolic_bp', 'diastolic_bp', 'heart_rate', 
                       'bmi', 'cholesterol', 'glucose']
        corr_matrix = data[numeric_cols].corr()
        
        im = ax5.imshow(corr_matrix, cmap='RdBu_r', aspect='auto', vmin=-1, vmax=1)
        
        # 设置标签
        ax5.set_xticks(range(len(numeric_cols)))
        ax5.set_yticks(range(len(numeric_cols)))
        ax5.set_xticklabels(['年龄', '收缩压', '舒张压', '心率', 'BMI', '胆固醇', '血糖'])
        ax5.set_yticklabels(['年龄', '收缩压', '舒张压', '心率', 'BMI', '胆固醇', '血糖'])
        
        # 添加数值标签
        for i in range(len(numeric_cols)):
            for j in range(len(numeric_cols)):
                text = ax5.text(j, i, f'{corr_matrix.iloc[i, j]:.2f}',
                               ha="center", va="center", color="black", fontsize=10)
        
        ax5.set_title('生理指标相关性分析', fontweight='bold', pad=20)
        
        # 添加颜色条
        cbar = plt.colorbar(im, ax=ax5, shrink=0.8)
        cbar.set_label('相关系数')
        
        # 6. 疾病风险评估
        ax6 = fig.add_subplot(gs[2, 0])
        
        risk_data = data[['hypertension_risk', 'diabetes_risk', 'heart_disease_risk']]
        risk_means = risk_data.mean()
        
        bars = ax6.bar(['高血压', '糖尿病', '心脏病'], risk_means, 
                      color=['red', 'orange', 'purple'], alpha=0.7)
        ax6.set_title('平均疾病风险', fontweight='bold')
        ax6.set_ylabel('风险评分')
        ax6.set_ylim(0, 1)
        
        # 添加数值标签
        for bar, value in zip(bars, risk_means):
            ax6.text(bar.get_x() + bar.get_width()/2., value + 0.02,
                    f'{value:.3f}', ha='center', va='bottom')
        
        # 7. 年龄与疾病风险关系
        ax7 = fig.add_subplot(gs[2, 1])
        
        # 按年龄分组计算平均风险
        age_groups = pd.cut(data['age'], bins=[18, 30, 40, 50, 60, 70, 90], 
                           labels=['18-30', '30-40', '40-50', '50-60', '60-70', '70+'])
        risk_by_age = data.groupby(age_groups)[['hypertension_risk', 'diabetes_risk', 'heart_disease_risk']].mean()
        
        x = range(len(risk_by_age.index))
        width = 0.25
        
        ax7.bar([i - width for i in x], risk_by_age['hypertension_risk'], 
               width, label='高血压', color='red', alpha=0.7)
        ax7.bar(x, risk_by_age['diabetes_risk'], 
               width, label='糖尿病', color='orange', alpha=0.7)
        ax7.bar([i + width for i in x], risk_by_age['heart_disease_risk'], 
               width, label='心脏病', color='purple', alpha=0.7)
        
        ax7.set_xlabel('年龄组')
        ax7.set_ylabel('平均风险评分')
        ax7.set_title('年龄与疾病风险关系', fontweight='bold')
        ax7.set_xticks(x)
        ax7.set_xticklabels(risk_by_age.index)
        ax7.legend()
        ax7.grid(True, alpha=0.3)
        
        # 8. 性别差异分析
        ax8 = fig.add_subplot(gs[2, 2])
        
        # 按性别分组的生理指标对比
        gender_stats = data.groupby('gender')[['systolic_bp', 'diastolic_bp', 'heart_rate', 'bmi']].mean()
        
        x = range(len(gender_stats.columns))
        width = 0.35
        
        male_values = gender_stats.loc['男'].values
        female_values = gender_stats.loc['女'].values
        
        ax8.bar([i - width/2 for i in x], male_values, width, label='男性', 
               color='lightblue', alpha=0.7)
        ax8.bar([i + width/2 for i in x], female_values, width, label='女性', 
               color='lightpink', alpha=0.7)
        
        ax8.set_xlabel('生理指标')
        ax8.set_ylabel('平均值')
        ax8.set_title('性别差异分析', fontweight='bold')
        ax8.set_xticks(x)
        ax8.set_xticklabels(['收缩压', '舒张压', '心率', 'BMI'])
        ax8.legend()
        ax8.grid(True, alpha=0.3)
        
        # 9. 高风险患者识别
        ax9 = fig.add_subplot(gs[2, 3])
        
        # 计算综合风险评分
        data['total_risk'] = (data['hypertension_risk'] + data['diabetes_risk'] + data['heart_disease_risk']) / 3
        
        # 风险分级
        risk_levels = []
        for risk in data['total_risk']:
            if risk < 0.3:
                risk_levels.append('低风险')
            elif risk < 0.6:
                risk_levels.append('中风险')
            else:
                risk_levels.append('高风险')
        
        risk_counts = pd.Series(risk_levels).value_counts()
        colors = ['green', 'yellow', 'red']
        
        wedges, texts, autotexts = ax9.pie(risk_counts.values, labels=risk_counts.index, 
                                          autopct='%1.1f%%', colors=colors, startangle=90)
        ax9.set_title('风险分级分布', fontweight='bold')
        
        # 10. 健康指标趋势(模拟时间序列)
        ax10 = fig.add_subplot(gs[3, :])
        
        # 生成模拟的月度健康指标趋势
        months = pd.date_range('2023-01', periods=12, freq='M')
        
        # 模拟季节性变化
        avg_bp = 125 + 5 * np.sin(np.arange(12) * 2 * np.pi / 12) + np.random.normal(0, 2, 12)
        avg_glucose = 100 + 3 * np.sin(np.arange(12) * 2 * np.pi / 12 + np.pi/4) + np.random.normal(0, 1.5, 12)
        avg_cholesterol = 200 + 10 * np.sin(np.arange(12) * 2 * np.pi / 12 + np.pi/2) + np.random.normal(0, 5, 12)
        
        ax10_twin1 = ax10.twinx()
        ax10_twin2 = ax10.twinx()
        
        # 调整第二个y轴位置
        ax10_twin2.spines['right'].set_position(('outward', 60))
        
        line1 = ax10.plot(months, avg_bp, 'r-o', linewidth=2, label='平均血压', markersize=6)
        line2 = ax10_twin1.plot(months, avg_glucose, 'g-s', linewidth=2, label='平均血糖', markersize=6)
        line3 = ax10_twin2.plot(months, avg_cholesterol, 'b-^', linewidth=2, label='平均胆固醇', markersize=6)
        
        ax10.set_xlabel('月份')
        ax10.set_ylabel('血压 (mmHg)', color='red')
        ax10_twin1.set_ylabel('血糖 (mg/dL)', color='green')
        ax10_twin2.set_ylabel('胆固醇 (mg/dL)', color='blue')
        
        ax10.set_title('健康指标月度趋势', fontweight='bold', pad=20)
        
        # 设置颜色
        ax10.tick_params(axis='y', labelcolor='red')
        ax10_twin1.tick_params(axis='y', labelcolor='green')
        ax10_twin2.tick_params(axis='y', labelcolor='blue')
        
        # 添加图例
        lines = line1 + line2 + line3
        labels = [l.get_label() for l in lines]
        ax10.legend(lines, labels, loc='upper left')
        
        ax10.grid(True, alpha=0.3)
        
        plt.tight_layout()
        return fig
    
    def generate_health_report(self, data):
        """生成健康报告"""
        # 计算关键统计指标
        total_patients = len(data)
        avg_age = data['age'].mean()
        
        # 血压异常比例
        hypertension_count = len(data[(data['systolic_bp'] > 140) | (data['diastolic_bp'] > 90)])
        hypertension_rate = hypertension_count / total_patients * 100
        
        # 糖尿病风险
        high_glucose_count = len(data[data['glucose'] > 126])
        diabetes_rate = high_glucose_count / total_patients * 100
        
        # BMI异常
        obesity_count = len(data[data['bmi'] > 28])
        obesity_rate = obesity_count / total_patients * 100
        
        # 高风险患者
        data['total_risk'] = (data['hypertension_risk'] + data['diabetes_risk'] + data['heart_disease_risk']) / 3
        high_risk_count = len(data[data['total_risk'] > 0.6])
        high_risk_rate = high_risk_count / total_patients * 100
        
        report = f"""
健康数据分析报告
==================

基本信息:
- 总患者数: {total_patients:,}
- 平均年龄: {avg_age:.1f}岁
- 男性比例: {len(data[data['gender']=='男'])/total_patients*100:.1f}%
- 女性比例: {len(data[data['gender']=='女'])/total_patients*100:.1f}%

健康指标异常率:
- 高血压患者: {hypertension_count}人 ({hypertension_rate:.1f}%)
- 血糖异常: {high_glucose_count}人 ({diabetes_rate:.1f}%)
- 超重/肥胖: {obesity_count}人 ({obesity_rate:.1f}%)

风险评估:
- 高风险患者: {high_risk_count}人 ({high_risk_rate:.1f}%)
- 平均高血压风险: {data['hypertension_risk'].mean():.3f}
- 平均糖尿病风险: {data['diabetes_risk'].mean():.3f}
- 平均心脏病风险: {data['heart_disease_risk'].mean():.3f}

建议:
1. 加强对{high_risk_count}名高风险患者的监测
2. 推广健康生活方式,降低肥胖率
3. 定期血压和血糖筛查
4. 针对50岁以上人群加强心血管疾病预防
        """
        
        return report

# 使用示例
medical_viz = MedicalDataVisualization()

print("生成患者数据...")
patient_data = medical_viz.generate_patient_data(1000)

print("创建健康数据仪表板...")
fig = medical_viz.health_dashboard(patient_data)
plt.show()

print("生成健康报告...")
report = medical_viz.generate_health_report(patient_data)
print(report)

8.2.3 教育数据分析与可视化

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
from matplotlib.patches import Rectangle
from matplotlib.sankey import Sankey

class EducationDataVisualization:
    """教育数据分析与可视化"""
    
    def __init__(self):
        plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']
        plt.rcParams['axes.unicode_minus'] = False
        
    def generate_student_data(self, n_students=500):
        """生成模拟学生数据"""
        np.random.seed(42)
        
        # 基础信息
        student_ids = [f'S{i:04d}' for i in range(1, n_students + 1)]
        grades = np.random.choice(['一年级', '二年级', '三年级', '四年级', '五年级', '六年级'], 
                                 n_students, p=[0.17, 0.17, 0.17, 0.16, 0.16, 0.17])
        genders = np.random.choice(['男', '女'], n_students, p=[0.52, 0.48])
        
        # 学科成绩(考虑年级差异)
        subjects = ['语文', '数学', '英语', '科学', '体育', '美术']
        scores = {}
        
        for subject in subjects:
            subject_scores = []
            for grade in grades:
                # 不同年级的基础分数不同
                grade_num = ['一年级', '二年级', '三年级', '四年级', '五年级', '六年级'].index(grade) + 1
                base_score = 70 + grade_num * 2  # 高年级基础分更高
                
                # 不同学科的难度差异
                if subject == '数学':
                    base_score -= 5  # 数学相对较难
                elif subject == '体育':
                    base_score += 10  # 体育分数普遍较高
                elif subject == '美术':
                    base_score += 8
                
                # 添加随机变化
                score = base_score + np.random.normal(0, 12)
                score = max(0, min(100, score))  # 限制在0-100范围内
                subject_scores.append(score)
            
            scores[subject] = subject_scores
        
        # 出勤率
        attendance_rates = np.random.beta(8, 2, n_students) * 100  # 大多数学生出勤率较高
        
        # 家庭背景(影响成绩)
        family_income = np.random.choice(['低收入', '中等收入', '高收入'], 
                                        n_students, p=[0.3, 0.5, 0.2])
        parent_education = np.random.choice(['初中', '高中', '大学', '研究生'], 
                                          n_students, p=[0.2, 0.4, 0.3, 0.1])
        
        # 根据家庭背景调整成绩
        for subject in subjects:
            for i, (income, edu) in enumerate(zip(family_income, parent_education)):
                adjustment = 0
                if income == '高收入':
                    adjustment += 5
                elif income == '低收入':
                    adjustment -= 3
                
                if edu == '研究生':
                    adjustment += 8
                elif edu == '大学':
                    adjustment += 4
                elif edu == '初中':
                    adjustment -= 4
                
                scores[subject][i] = max(0, min(100, scores[subject][i] + adjustment))
        
        # 计算总分和排名
        total_scores = [sum(scores[subject][i] for subject in subjects) for i in range(n_students)]
        
        # 创建DataFrame
        data = pd.DataFrame({
            'student_id': student_ids,
            'grade': grades,
            'gender': genders,
            'attendance_rate': attendance_rates,
            'family_income': family_income,
            'parent_education': parent_education,
            'total_score': total_scores
        })
        
        # 添加各科成绩
        for subject in subjects:
            data[subject] = scores[subject]
        
        return data
    
    def academic_performance_dashboard(self, data):
        """学业表现仪表板"""
        fig = plt.figure(figsize=(20, 16))
        gs = fig.add_gridspec(4, 4, hspace=0.3, wspace=0.3)
        
        subjects = ['语文', '数学', '英语', '科学', '体育', '美术']
        
        # 1. 各科成绩分布
        ax1 = fig.add_subplot(gs[0, :])
        
        positions = range(len(subjects))
        box_data = [data[subject] for subject in subjects]
        
        bp = ax1.boxplot(box_data, positions=positions, patch_artist=True, 
                        labels=subjects, showmeans=True)
        
        # 设置颜色
        colors = ['lightblue', 'lightgreen', 'lightcoral', 'lightyellow', 'lightpink', 'lightgray']
        for patch, color in zip(bp['boxes'], colors):
            patch.set_facecolor(color)
            patch.set_alpha(0.7)
        
        ax1.set_title('各科成绩分布箱线图', fontsize=14, fontweight='bold')
        ax1.set_ylabel('成绩')
        ax1.grid(True, alpha=0.3)
        
        # 添加平均分标注
        for i, subject in enumerate(subjects):
            mean_score = data[subject].mean()
            ax1.text(i, mean_score + 2, f'{mean_score:.1f}', 
                    ha='center', va='bottom', fontweight='bold')
        
        # 2. 年级成绩对比
        ax2 = fig.add_subplot(gs[1, 0:2])
        
        grade_performance = data.groupby('grade')[subjects].mean()
        
        x = np.arange(len(grade_performance.index))
        width = 0.12
        
        for i, subject in enumerate(subjects):
            ax2.bar(x + i * width, grade_performance[subject], width, 
                   label=subject, alpha=0.8)
        
        ax2.set_xlabel('年级')
        ax2.set_ylabel('平均成绩')
        ax2.set_title('各年级学科成绩对比', fontweight='bold')
        ax2.set_xticks(x + width * 2.5)
        ax2.set_xticklabels(grade_performance.index)
        ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        ax2.grid(True, alpha=0.3)
        
        # 3. 性别差异分析
        ax3 = fig.add_subplot(gs[1, 2:4])
        
        gender_performance = data.groupby('gender')[subjects].mean()
        
        x = np.arange(len(subjects))
        width = 0.35
        
        male_scores = gender_performance.loc['男']
        female_scores = gender_performance.loc['女']
        
        ax3.bar(x - width/2, male_scores, width, label='男生', 
               color='lightblue', alpha=0.8)
        ax3.bar(x + width/2, female_scores, width, label='女生', 
               color='lightpink', alpha=0.8)
        
        ax3.set_xlabel('学科')
        ax3.set_ylabel('平均成绩')
        ax3.set_title('性别学科成绩差异', fontweight='bold')
        ax3.set_xticks(x)
        ax3.set_xticklabels(subjects)
        ax3.legend()
        ax3.grid(True, alpha=0.3)
        
        # 4. 学科相关性热力图
        ax4 = fig.add_subplot(gs[2, 0:2])
        
        corr_matrix = data[subjects].corr()
        
        im = ax4.imshow(corr_matrix, cmap='RdBu_r', aspect='auto', vmin=-1, vmax=1)
        
        ax4.set_xticks(range(len(subjects)))
        ax4.set_yticks(range(len(subjects)))
        ax4.set_xticklabels(subjects)
        ax4.set_yticklabels(subjects)
        
        # 添加数值标签
        for i in range(len(subjects)):
            for j in range(len(subjects)):
                text = ax4.text(j, i, f'{corr_matrix.iloc[i, j]:.2f}',
                               ha="center", va="center", color="black", fontsize=9)
        
        ax4.set_title('学科成绩相关性', fontweight='bold')
        
        # 5. 家庭背景影响分析
        ax5 = fig.add_subplot(gs[2, 2])
        
        income_performance = data.groupby('family_income')['total_score'].mean()
        
        colors = ['red', 'yellow', 'green']
        bars = ax5.bar(income_performance.index, income_performance.values, 
                      color=colors, alpha=0.7)
        
        ax5.set_title('家庭收入与成绩关系', fontweight='bold')
        ax5.set_ylabel('平均总分')
        ax5.tick_params(axis='x', rotation=45)
        
        # 添加数值标签
        for bar in bars:
            height = bar.get_height()
            ax5.text(bar.get_x() + bar.get_width()/2., height + 5,
                    f'{height:.1f}', ha='center', va='bottom')
        
        # 6. 父母教育水平影响
        ax6 = fig.add_subplot(gs[2, 3])
        
        edu_performance = data.groupby('parent_education')['total_score'].mean()
        
        colors = ['red', 'orange', 'lightgreen', 'green']
        bars = ax6.bar(edu_performance.index, edu_performance.values, 
                      color=colors, alpha=0.7)
        
        ax6.set_title('父母教育水平与成绩', fontweight='bold')
        ax6.set_ylabel('平均总分')
        ax6.tick_params(axis='x', rotation=45)
        
        # 添加数值标签
        for bar in bars:
            height = bar.get_height()
            ax6.text(bar.get_x() + bar.get_width()/2., height + 5,
                    f'{height:.1f}', ha='center', va='bottom')
        
        # 7. 出勤率与成绩关系
        ax7 = fig.add_subplot(gs[3, 0:2])
        
        # 创建出勤率分组
        attendance_groups = pd.cut(data['attendance_rate'], 
                                  bins=[0, 80, 90, 95, 100], 
                                  labels=['<80%', '80-90%', '90-95%', '95-100%'])
        
        attendance_performance = data.groupby(attendance_groups)[subjects].mean()
        
        x = np.arange(len(attendance_performance.index))
        width = 0.12
        
        for i, subject in enumerate(subjects):
            ax7.bar(x + i * width, attendance_performance[subject], width, 
                   label=subject, alpha=0.8)
        
        ax7.set_xlabel('出勤率区间')
        ax7.set_ylabel('平均成绩')
        ax7.set_title('出勤率与学科成绩关系', fontweight='bold')
        ax7.set_xticks(x + width * 2.5)
        ax7.set_xticklabels(attendance_performance.index)
        ax7.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        ax7.grid(True, alpha=0.3)
        
        # 8. 成绩分布与排名
        ax8 = fig.add_subplot(gs[3, 2:4])
        
        # 计算成绩等级
        def get_grade_level(score):
            if score >= 540:  # 90分平均
                return '优秀'
            elif score >= 480:  # 80分平均
                return '良好'
            elif score >= 420:  # 70分平均
                return '中等'
            elif score >= 360:  # 60分平均
                return '及格'
            else:
                return '不及格'
        
        data['grade_level'] = data['total_score'].apply(get_grade_level)
        grade_counts = data['grade_level'].value_counts()
        
        # 重新排序
        order = ['优秀', '良好', '中等', '及格', '不及格']
        grade_counts = grade_counts.reindex(order, fill_value=0)
        
        colors = ['green', 'lightgreen', 'yellow', 'orange', 'red']
        wedges, texts, autotexts = ax8.pie(grade_counts.values, labels=grade_counts.index, 
                                          autopct='%1.1f%%', colors=colors, startangle=90)
        
        ax8.set_title('学生成绩等级分布', fontweight='bold')
        
        plt.tight_layout()
        return fig
    
    def generate_education_report(self, data):
        """生成教育分析报告"""
        subjects = ['语文', '数学', '英语', '科学', '体育', '美术']
        
        # 基本统计
        total_students = len(data)
        avg_total_score = data['total_score'].mean()
        avg_attendance = data['attendance_rate'].mean()
        
        # 各科平均分
        subject_averages = {subject: data[subject].mean() for subject in subjects}
        best_subject = max(subject_averages, key=subject_averages.get)
        worst_subject = min(subject_averages, key=subject_averages.get)
        
        # 性别差异
        gender_diff = data.groupby('gender')[subjects].mean()
        
        # 年级表现
        grade_performance = data.groupby('grade')['total_score'].mean()
        best_grade = grade_performance.idxmax()
        
        # 家庭背景影响
        income_impact = data.groupby('family_income')['total_score'].mean()
        edu_impact = data.groupby('parent_education')['total_score'].mean()
        
        # 优秀学生比例
        excellent_students = len(data[data['total_score'] >= 540])
        excellent_rate = excellent_students / total_students * 100
        
        # 需要关注的学生
        at_risk_students = len(data[data['total_score'] < 360])
        at_risk_rate = at_risk_students / total_students * 100
        
        report = f"""
教育数据分析报告
================

基本信息:
- 学生总数: {total_students}
- 平均总分: {avg_total_score:.1f}分
- 平均出勤率: {avg_attendance:.1f}%

学科表现:
- 表现最好学科: {best_subject} ({subject_averages[best_subject]:.1f}分)
- 需要提升学科: {worst_subject} ({subject_averages[worst_subject]:.1f}分)

各科平均分:
{chr(10).join([f'- {subject}: {avg:.1f}分' for subject, avg in subject_averages.items()])}

年级表现:
- 表现最好年级: {best_grade} ({grade_performance[best_grade]:.1f}分)

性别差异分析:
男生优势学科: {gender_diff.loc['男'].idxmax()} ({gender_diff.loc['男'].max():.1f}分)
女生优势学科: {gender_diff.loc['女'].idxmax()} ({gender_diff.loc['女'].max():.1f}分)

家庭背景影响:
- 高收入家庭平均分: {income_impact['高收入']:.1f}分
- 中等收入家庭平均分: {income_impact['中等收入']:.1f}分
- 低收入家庭平均分: {income_impact['低收入']:.1f}分

学生表现分布:
- 优秀学生: {excellent_students}人 ({excellent_rate:.1f}%)
- 需要关注学生: {at_risk_students}人 ({at_risk_rate:.1f}%)

建议:
1. 加强{worst_subject}学科教学,提高整体水平
2. 关注{at_risk_students}名学习困难学生,提供个性化辅导
3. 推广{best_grade}的成功经验到其他年级
4. 加强家校合作,特别关注低收入家庭学生
5. 提高学生出勤率,建立激励机制
        """
        
        return report

# 使用示例
edu_viz = EducationDataVisualization()

print("生成学生数据...")
student_data = edu_viz.generate_student_data(500)

print("创建学业表现仪表板...")
fig = edu_viz.academic_performance_dashboard(student_data)
plt.show()

print("生成教育分析报告...")
report = edu_viz.generate_education_report(student_data)
print(report)

8.3 本章总结

8.3.1 学习要点回顾

通过本章的学习,我们掌握了以下核心技能:

1. 项目实战能力 - 数据分析报告自动化生成 - 交互式数据仪表板开发 - 科学计算结果可视化 - 完整项目的架构设计

2. 行业应用实践 - 金融数据分析与技术指标可视化 - 医疗健康数据的多维度分析 - 教育数据的统计分析与洞察 - 不同领域的数据特点和可视化需求

3. 高级可视化技术 - 复杂仪表板的布局设计 - 多图表联动和交互 - 数据驱动的动态图表 - 专业级报告生成

4. 数据分析思维 - 从业务需求到技术实现 - 数据质量评估和处理 - 统计分析方法的应用 - 结果解释和建议提出

8.3.2 实践练习

练习1:个人项目开发 - 选择一个感兴趣的领域(如体育、环境、社交媒体等) - 收集或生成相关数据 - 设计并实现完整的可视化分析系统 - 生成专业的分析报告

练习2:行业案例扩展 - 基于本章的案例,添加新的分析维度 - 实现更复杂的交互功能 - 集成机器学习预测模型 - 优化性能和用户体验

练习3:团队协作项目 - 与同学或同事合作开发大型可视化项目 - 实践代码版本控制和模块化设计 - 进行用户测试和反馈收集 - 迭代改进产品功能

8.3.3 常见问题解答

Q1: 如何处理大规模数据的可视化? A1: - 使用数据采样和聚合技术 - 实现分页和懒加载 - 考虑使用专业的大数据可视化工具 - 优化算法和数据结构

Q2: 如何确保可视化的准确性和可信度? A2: - 严格的数据验证和清洗流程 - 多重检查和交叉验证 - 透明的数据来源和处理方法说明 - 适当的统计显著性检验

Q3: 如何平衡功能丰富性和易用性? A3: - 采用渐进式设计,从简单到复杂 - 提供多层次的用户界面 - 充分的用户测试和反馈 - 清晰的文档和帮助系统

Q4: 如何选择合适的可视化类型? A4: - 明确分析目标和受众需求 - 考虑数据的类型和分布特征 - 参考行业标准和最佳实践 - 进行A/B测试验证效果

8.3.4 进阶学习方向

1. Web可视化技术 - 学习D3.js、Plotly.js等前端可视化库 - 掌握React、Vue等现代前端框架 - 了解WebGL和Canvas高性能渲染 - 实现响应式和移动端适配

2. 大数据可视化 - 学习Apache Superset、Grafana等企业级工具 - 掌握流式数据处理和实时可视化 - 了解分布式计算和可视化架构 - 实践云平台的可视化服务

3. 机器学习可视化 - 模型解释性可视化 - 特征重要性和相关性分析 - 模型性能监控仪表板 - 自动化机器学习流程可视化

4. 商业智能应用 - 企业级BI系统开发 - 数据仓库和ETL流程设计 - KPI监控和预警系统 - 决策支持系统构建

恭喜你完成了Matplotlib数据可视化教程的学习!通过八个章节的系统学习,你已经掌握了从基础图表到高级应用的完整技能体系。继续实践和探索,你将能够创建出更加专业和有影响力的数据可视化作品。


下一步学习建议: 1. 深入学习特定行业的数据分析方法 2. 探索新兴的可视化技术和工具 3. 参与开源项目,贡献自己的代码 4. 建立个人作品集,展示你的技能 5. 持续关注数据可视化领域的最新发展