本章概述
在前面的章节中,我们学习了PyTorch的基础概念和张量操作。现在我们将进入深度学习的核心——神经网络。本章将详细介绍PyTorch中神经网络的构建方法,包括nn.Module的使用、各种层的定义、前向传播机制等。
学习目标
通过本章学习,你将掌握:
- PyTorch神经网络的基本架构
- nn.Module类的使用方法
- 常用神经网络层的定义和使用
- 前向传播和反向传播机制
- 自定义神经网络层的实现
- 参数初始化和管理
- 模型的保存和加载
3.1 神经网络基础理论
3.1.1 人工神经元模型
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
# 简单的感知器模型
class Perceptron:
def __init__(self, input_size):
self.weights = torch.randn(input_size, requires_grad=True)
self.bias = torch.randn(1, requires_grad=True)
def forward(self, x):
return torch.sigmoid(torch.dot(x, self.weights) + self.bias)
def __call__(self, x):
return self.forward(x)
# 创建感知器实例
perceptron = Perceptron(2)
input_data = torch.tensor([1.0, 2.0])
output = perceptron(input_data)
print(f"感知器输出: {output}")
3.1.2 多层感知器(MLP)
# 使用PyTorch构建多层感知器
class SimpleMLP(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleMLP, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)
self.relu = nn.ReLU()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
# 创建MLP模型
model = SimpleMLP(input_size=10, hidden_size=20, output_size=1)
print(model)
# 查看模型参数
for name, param in model.named_parameters():
print(f"{name}: {param.shape}")
3.2 nn.Module详解
3.2.1 nn.Module基础
nn.Module是PyTorch中所有神经网络模块的基类,提供了构建和管理神经网络的基础功能。
# 自定义神经网络模块
class CustomModule(nn.Module):
def __init__(self):
super(CustomModule, self).__init__()
# 定义子模块
self.linear1 = nn.Linear(10, 5)
self.linear2 = nn.Linear(5, 1)
self.dropout = nn.Dropout(0.2)
# 定义参数
self.custom_param = nn.Parameter(torch.randn(5))
# 定义缓冲区(不参与梯度计算)
self.register_buffer('running_mean', torch.zeros(5))
def forward(self, x):
x = F.relu(self.linear1(x))
x = self.dropout(x)
x = self.linear2(x)
return x
# 创建模型实例
model = CustomModule()
print("模型结构:")
print(model)
# 查看所有参数
print("\n模型参数:")
for name, param in model.named_parameters():
print(f"{name}: {param.shape}, requires_grad: {param.requires_grad}")
# 查看所有缓冲区
print("\n模型缓冲区:")
for name, buffer in model.named_buffers():
print(f"{name}: {buffer.shape}")
3.2.2 模块的状态管理
# 训练模式和评估模式
model = CustomModule()
# 设置为训练模式
model.train()
print(f"训练模式: {model.training}")
# 设置为评估模式
model.eval()
print(f"评估模式: {model.training}")
# 递归设置子模块的模式
model.train(True) # 设置为训练模式
for name, module in model.named_modules():
print(f"{name}: training={module.training}")
3.2.3 参数管理
# 参数统计
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = count_parameters(model)
print(f"可训练参数总数: {total_params}")
# 参数冻结和解冻
def freeze_model(model):
for param in model.parameters():
param.requires_grad = False
def unfreeze_model(model):
for param in model.parameters():
param.requires_grad = True
# 部分参数冻结
def freeze_layer(layer):
for param in layer.parameters():
param.requires_grad = False
# 冻结第一层
freeze_layer(model.linear1)
print("冻结第一层后的参数状态:")
for name, param in model.named_parameters():
print(f"{name}: requires_grad={param.requires_grad}")
3.3 常用神经网络层
3.3.1 线性层(全连接层)
# 线性层详解
class LinearLayerDemo(nn.Module):
def __init__(self):
super(LinearLayerDemo, self).__init__()
# 基本线性层
self.fc1 = nn.Linear(10, 5)
# 不使用偏置的线性层
self.fc2 = nn.Linear(5, 3, bias=False)
# 自定义权重初始化
self.fc3 = nn.Linear(3, 1)
nn.init.xavier_uniform_(self.fc3.weight)
nn.init.zeros_(self.fc3.bias)
def forward(self, x):
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
# 测试线性层
model = LinearLayerDemo()
input_tensor = torch.randn(32, 10) # batch_size=32, input_size=10
output = model(input_tensor)
print(f"输入形状: {input_tensor.shape}")
print(f"输出形状: {output.shape}")
# 查看权重和偏置
print(f"fc1权重形状: {model.fc1.weight.shape}")
print(f"fc1偏置形状: {model.fc1.bias.shape}")
3.3.2 激活函数层
# 常用激活函数
class ActivationDemo(nn.Module):
def __init__(self):
super(ActivationDemo, self).__init__()
self.relu = nn.ReLU()
self.leaky_relu = nn.LeakyReLU(0.1)
self.elu = nn.ELU()
self.gelu = nn.GELU()
self.swish = nn.SiLU() # Swish激活函数
self.tanh = nn.Tanh()
self.sigmoid = nn.Sigmoid()
def forward(self, x):
results = {}
results['relu'] = self.relu(x)
results['leaky_relu'] = self.leaky_relu(x)
results['elu'] = self.elu(x)
results['gelu'] = self.gelu(x)
results['swish'] = self.swish(x)
results['tanh'] = self.tanh(x)
results['sigmoid'] = self.sigmoid(x)
return results
# 可视化激活函数
x = torch.linspace(-3, 3, 100)
activation_demo = ActivationDemo()
results = activation_demo(x)
plt.figure(figsize=(15, 10))
for i, (name, y) in enumerate(results.items(), 1):
plt.subplot(2, 4, i)
plt.plot(x.numpy(), y.detach().numpy())
plt.title(f'{name.upper()}')
plt.grid(True)
plt.tight_layout()
plt.show()
3.3.3 正则化层
# Dropout层
class DropoutDemo(nn.Module):
def __init__(self):
super(DropoutDemo, self).__init__()
self.fc1 = nn.Linear(10, 20)
self.dropout1 = nn.Dropout(0.2)
self.fc2 = nn.Linear(20, 10)
self.dropout2 = nn.Dropout(0.5)
self.fc3 = nn.Linear(10, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.dropout1(x)
x = F.relu(self.fc2(x))
x = self.dropout2(x)
x = self.fc3(x)
return x
# 测试Dropout效果
model = DropoutDemo()
input_tensor = torch.randn(5, 10)
# 训练模式下的输出(有随机性)
model.train()
print("训练模式下的输出:")
for i in range(3):
output = model(input_tensor)
print(f"第{i+1}次: {output[0].item():.4f}")
# 评估模式下的输出(确定性)
model.eval()
print("\n评估模式下的输出:")
for i in range(3):
output = model(input_tensor)
print(f"第{i+1}次: {output[0].item():.4f}")
3.3.4 批量归一化层
# 批量归一化
class BatchNormDemo(nn.Module):
def __init__(self):
super(BatchNormDemo, self).__init__()
self.fc1 = nn.Linear(10, 20)
self.bn1 = nn.BatchNorm1d(20)
self.fc2 = nn.Linear(20, 10)
self.bn2 = nn.BatchNorm1d(10)
self.fc3 = nn.Linear(10, 1)
def forward(self, x):
x = self.fc1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.fc2(x)
x = self.bn2(x)
x = F.relu(x)
x = self.fc3(x)
return x
# 测试批量归一化
model = BatchNormDemo()
input_tensor = torch.randn(32, 10) # 批量大小为32
# 训练模式
model.train()
output = model(input_tensor)
print(f"训练模式输出形状: {output.shape}")
# 查看BatchNorm的统计信息
print(f"BN1 running_mean: {model.bn1.running_mean[:5]}")
print(f"BN1 running_var: {model.bn1.running_var[:5]}")
3.4 前向传播机制
3.4.1 前向传播过程
# 详细的前向传播示例
class DetailedForward(nn.Module):
def __init__(self):
super(DetailedForward, self).__init__()
self.fc1 = nn.Linear(4, 8)
self.fc2 = nn.Linear(8, 4)
self.fc3 = nn.Linear(4, 1)
def forward(self, x):
print(f"输入形状: {x.shape}")
# 第一层
x1 = self.fc1(x)
print(f"第一层输出形状: {x1.shape}")
x1_activated = F.relu(x1)
print(f"第一层激活后形状: {x1_activated.shape}")
# 第二层
x2 = self.fc2(x1_activated)
print(f"第二层输出形状: {x2.shape}")
x2_activated = F.relu(x2)
# 第三层
x3 = self.fc3(x2_activated)
print(f"最终输出形状: {x3.shape}")
return x3
# 测试前向传播
model = DetailedForward()
input_data = torch.randn(3, 4) # 3个样本,每个样本4个特征
output = model(input_data)
3.4.2 钩子函数(Hooks)
# 使用钩子函数监控前向传播
class HookDemo(nn.Module):
def __init__(self):
super(HookDemo, self).__init__()
self.fc1 = nn.Linear(10, 20)
self.fc2 = nn.Linear(20, 10)
self.fc3 = nn.Linear(10, 1)
# 存储中间结果
self.activations = {}
# 注册前向钩子
self.fc1.register_forward_hook(self.get_activation('fc1'))
self.fc2.register_forward_hook(self.get_activation('fc2'))
def get_activation(self, name):
def hook(model, input, output):
self.activations[name] = output.detach()
return hook
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# 测试钩子函数
model = HookDemo()
input_data = torch.randn(5, 10)
output = model(input_data)
print("中间层激活值:")
for name, activation in model.activations.items():
print(f"{name}: {activation.shape}, mean: {activation.mean().item():.4f}")
3.5 自定义神经网络层
3.5.1 简单自定义层
# 自定义线性层
class CustomLinear(nn.Module):
def __init__(self, in_features, out_features, bias=True):
super(CustomLinear, self).__init__()
self.in_features = in_features
self.out_features = out_features
# 定义权重参数
self.weight = nn.Parameter(torch.randn(out_features, in_features))
# 定义偏置参数
if bias:
self.bias = nn.Parameter(torch.randn(out_features))
else:
self.register_parameter('bias', None)
# 初始化参数
self.reset_parameters()
def reset_parameters(self):
nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
bound = 1 / math.sqrt(fan_in)
nn.init.uniform_(self.bias, -bound, bound)
def forward(self, input):
return F.linear(input, self.weight, self.bias)
def extra_repr(self):
return f'in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}'
# 测试自定义线性层
import math
custom_layer = CustomLinear(10, 5)
input_data = torch.randn(3, 10)
output = custom_layer(input_data)
print(f"自定义层输出: {output.shape}")
print(custom_layer)
3.5.2 复杂自定义层
# 自定义注意力层
class SimpleAttention(nn.Module):
def __init__(self, hidden_size):
super(SimpleAttention, self).__init__()
self.hidden_size = hidden_size
self.attention = nn.Linear(hidden_size, 1, bias=False)
def forward(self, hidden_states):
# hidden_states: (batch_size, seq_len, hidden_size)
# 计算注意力分数
attention_scores = self.attention(hidden_states) # (batch_size, seq_len, 1)
attention_scores = attention_scores.squeeze(-1) # (batch_size, seq_len)
# 应用softmax
attention_weights = F.softmax(attention_scores, dim=-1) # (batch_size, seq_len)
# 计算加权和
attended_output = torch.sum(hidden_states * attention_weights.unsqueeze(-1), dim=1)
# (batch_size, hidden_size)
return attended_output, attention_weights
# 测试注意力层
attention_layer = SimpleAttention(64)
hidden_states = torch.randn(2, 10, 64) # 2个样本,序列长度10,隐藏维度64
output, weights = attention_layer(hidden_states)
print(f"注意力输出形状: {output.shape}")
print(f"注意力权重形状: {weights.shape}")
print(f"注意力权重和: {weights.sum(dim=1)}") # 应该接近1
3.5.3 带状态的自定义层
# 带状态的自定义层(类似LSTM的隐藏状态)
class StatefulLayer(nn.Module):
def __init__(self, input_size, hidden_size):
super(StatefulLayer, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.input_transform = nn.Linear(input_size, hidden_size)
self.hidden_transform = nn.Linear(hidden_size, hidden_size)
# 初始化隐藏状态
self.register_buffer('hidden_state', torch.zeros(1, hidden_size))
def forward(self, x):
# x: (batch_size, input_size)
batch_size = x.size(0)
# 扩展隐藏状态以匹配批量大小
if self.hidden_state.size(0) != batch_size:
self.hidden_state = self.hidden_state.expand(batch_size, -1).contiguous()
# 计算新的隐藏状态
input_contribution = self.input_transform(x)
hidden_contribution = self.hidden_transform(self.hidden_state)
new_hidden = torch.tanh(input_contribution + hidden_contribution)
# 更新隐藏状态
self.hidden_state = new_hidden.detach()
return new_hidden
def reset_state(self, batch_size=1):
self.hidden_state = torch.zeros(batch_size, self.hidden_size)
# 测试带状态的层
stateful_layer = StatefulLayer(10, 20)
for i in range(3):
input_data = torch.randn(1, 10)
output = stateful_layer(input_data)
print(f"步骤 {i+1} 输出均值: {output.mean().item():.4f}")
3.6 参数初始化
3.6.1 常用初始化方法
# 参数初始化示例
class InitializationDemo(nn.Module):
def __init__(self):
super(InitializationDemo, self).__init__()
self.layers = nn.ModuleList([
nn.Linear(10, 20),
nn.Linear(20, 20),
nn.Linear(20, 10),
nn.Linear(10, 1)
])
# 应用不同的初始化方法
self.apply_initialization()
def apply_initialization(self):
for i, layer in enumerate(self.layers):
if i == 0:
# Xavier/Glorot 初始化
nn.init.xavier_uniform_(layer.weight)
nn.init.zeros_(layer.bias)
elif i == 1:
# Kaiming/He 初始化
nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
nn.init.zeros_(layer.bias)
elif i == 2:
# 正态分布初始化
nn.init.normal_(layer.weight, mean=0, std=0.1)
nn.init.constant_(layer.bias, 0.1)
else:
# 均匀分布初始化
nn.init.uniform_(layer.weight, -0.1, 0.1)
nn.init.zeros_(layer.bias)
def forward(self, x):
for i, layer in enumerate(self.layers[:-1]):
x = F.relu(layer(x))
x = self.layers[-1](x)
return x
# 分析初始化效果
model = InitializationDemo()
print("各层权重统计:")
for i, layer in enumerate(model.layers):
weight = layer.weight.data
print(f"层 {i+1}: 均值={weight.mean().item():.4f}, 标准差={weight.std().item():.4f}")
3.6.2 自定义初始化函数
# 自定义初始化函数
def custom_init(module):
if isinstance(module, nn.Linear):
# 对线性层使用自定义初始化
nn.init.orthogonal_(module.weight)
if module.bias is not None:
nn.init.constant_(module.bias, 0)
elif isinstance(module, nn.BatchNorm1d):
# 对批量归一化层的初始化
nn.init.constant_(module.weight, 1)
nn.init.constant_(module.bias, 0)
# 应用自定义初始化
class CustomInitModel(nn.Module):
def __init__(self):
super(CustomInitModel, self).__init__()
self.fc1 = nn.Linear(10, 20)
self.bn1 = nn.BatchNorm1d(20)
self.fc2 = nn.Linear(20, 1)
# 应用自定义初始化
self.apply(custom_init)
def forward(self, x):
x = F.relu(self.bn1(self.fc1(x)))
x = self.fc2(x)
return x
model = CustomInitModel()
print("自定义初始化后的权重:")
for name, param in model.named_parameters():
if 'weight' in name:
print(f"{name}: 均值={param.mean().item():.4f}, 标准差={param.std().item():.4f}")
3.7 模型保存和加载
3.7.1 基本保存和加载
# 创建一个示例模型
class ExampleModel(nn.Module):
def __init__(self):
super(ExampleModel, self).__init__()
self.fc1 = nn.Linear(10, 20)
self.fc2 = nn.Linear(20, 1)
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
# 创建和训练模型(简化版)
model = ExampleModel()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 模拟训练
input_data = torch.randn(100, 10)
target = torch.randn(100, 1)
criterion = nn.MSELoss()
for epoch in range(10):
optimizer.zero_grad()
output = model(input_data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
print(f"训练完成,最终损失: {loss.item():.4f}")
# 保存模型
torch.save(model.state_dict(), 'model_weights.pth')
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss.item(),
'epoch': 10
}, 'checkpoint.pth')
print("模型已保存")
3.7.2 加载模型
# 加载模型权重
def load_model_weights():
# 创建新的模型实例
new_model = ExampleModel()
# 加载权重
new_model.load_state_dict(torch.load('model_weights.pth'))
new_model.eval()
return new_model
# 加载完整检查点
def load_checkpoint():
new_model = ExampleModel()
new_optimizer = torch.optim.Adam(new_model.parameters(), lr=0.001)
checkpoint = torch.load('checkpoint.pth')
new_model.load_state_dict(checkpoint['model_state_dict'])
new_optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
return new_model, new_optimizer, epoch, loss
# 测试加载的模型
try:
loaded_model = load_model_weights()
test_input = torch.randn(5, 10)
with torch.no_grad():
output = loaded_model(test_input)
print(f"加载的模型输出形状: {output.shape}")
print("模型加载成功!")
except FileNotFoundError:
print("模型文件不存在,请先运行保存代码")
3.8 实践案例
3.8.1 多分类神经网络
# 构建多分类神经网络
class MultiClassifier(nn.Module):
def __init__(self, input_size, hidden_sizes, num_classes, dropout_rate=0.2):
super(MultiClassifier, self).__init__()
# 构建层列表
layers = []
prev_size = input_size
for hidden_size in hidden_sizes:
layers.extend([
nn.Linear(prev_size, hidden_size),
nn.BatchNorm1d(hidden_size),
nn.ReLU(),
nn.Dropout(dropout_rate)
])
prev_size = hidden_size
# 输出层
layers.append(nn.Linear(prev_size, num_classes))
self.network = nn.Sequential(*layers)
def forward(self, x):
return self.network(x)
# 创建分类器
classifier = MultiClassifier(
input_size=784, # 28x28图像展平
hidden_sizes=[512, 256, 128],
num_classes=10, # 10个类别
dropout_rate=0.3
)
print("多分类器结构:")
print(classifier)
# 测试前向传播
test_input = torch.randn(32, 784) # 32个样本
output = classifier(test_input)
print(f"输出形状: {output.shape}")
print(f"输出概率分布(第一个样本): {F.softmax(output[0], dim=0)}")
3.8.2 残差连接网络
# 实现残差块
class ResidualBlock(nn.Module):
def __init__(self, hidden_size, dropout_rate=0.1):
super(ResidualBlock, self).__init__()
self.fc1 = nn.Linear(hidden_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.bn1 = nn.BatchNorm1d(hidden_size)
self.bn2 = nn.BatchNorm1d(hidden_size)
self.dropout = nn.Dropout(dropout_rate)
def forward(self, x):
residual = x
out = self.fc1(x)
out = self.bn1(out)
out = F.relu(out)
out = self.dropout(out)
out = self.fc2(out)
out = self.bn2(out)
# 残差连接
out += residual
out = F.relu(out)
return out
# 构建残差网络
class ResNet(nn.Module):
def __init__(self, input_size, hidden_size, num_blocks, num_classes):
super(ResNet, self).__init__()
# 输入投影层
self.input_projection = nn.Linear(input_size, hidden_size)
# 残差块
self.residual_blocks = nn.ModuleList([
ResidualBlock(hidden_size) for _ in range(num_blocks)
])
# 输出层
self.output_layer = nn.Linear(hidden_size, num_classes)
def forward(self, x):
x = self.input_projection(x)
x = F.relu(x)
for block in self.residual_blocks:
x = block(x)
x = self.output_layer(x)
return x
# 创建残差网络
resnet = ResNet(input_size=100, hidden_size=256, num_blocks=4, num_classes=10)
print(f"残差网络参数数量: {count_parameters(resnet)}")
# 测试残差网络
test_input = torch.randn(16, 100)
output = resnet(test_input)
print(f"残差网络输出形状: {output.shape}")
3.9 性能优化技巧
3.9.1 内存优化
# 内存高效的模型设计
class MemoryEfficientModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers):
super(MemoryEfficientModel, self).__init__()
# 使用参数共享
self.shared_layer = nn.Linear(hidden_size, hidden_size)
self.input_layer = nn.Linear(input_size, hidden_size)
self.output_layer = nn.Linear(hidden_size, 1)
self.num_layers = num_layers
def forward(self, x):
x = F.relu(self.input_layer(x))
# 重复使用同一层(参数共享)
for _ in range(self.num_layers):
x = F.relu(self.shared_layer(x))
x = self.output_layer(x)
return x
# 梯度检查点(用于大模型)
from torch.utils.checkpoint import checkpoint
class CheckpointModel(nn.Module):
def __init__(self):
super(CheckpointModel, self).__init__()
self.layer1 = nn.Linear(1000, 1000)
self.layer2 = nn.Linear(1000, 1000)
self.layer3 = nn.Linear(1000, 1000)
self.layer4 = nn.Linear(1000, 1)
def forward(self, x):
# 使用检查点节省内存
x = checkpoint(self._forward_block1, x)
x = checkpoint(self._forward_block2, x)
x = self.layer4(x)
return x
def _forward_block1(self, x):
return F.relu(self.layer2(F.relu(self.layer1(x))))
def _forward_block2(self, x):
return F.relu(self.layer3(x))
print("内存优化模型创建完成")
3.9.2 计算优化
# 使用torch.jit进行模型优化
@torch.jit.script
def optimized_activation(x):
return torch.where(x > 0, x, 0.01 * x) # Leaky ReLU的优化实现
class OptimizedModel(nn.Module):
def __init__(self):
super(OptimizedModel, self).__init__()
self.fc1 = nn.Linear(100, 200)
self.fc2 = nn.Linear(200, 100)
self.fc3 = nn.Linear(100, 1)
def forward(self, x):
x = optimized_activation(self.fc1(x))
x = optimized_activation(self.fc2(x))
x = self.fc3(x)
return x
# 编译模型以获得更好的性能
model = OptimizedModel()
scripted_model = torch.jit.script(model)
# 性能比较
import time
input_data = torch.randn(1000, 100)
# 原始模型
start_time = time.time()
for _ in range(100):
_ = model(input_data)
original_time = time.time() - start_time
# 编译模型
start_time = time.time()
for _ in range(100):
_ = scripted_model(input_data)
scripted_time = time.time() - start_time
print(f"原始模型时间: {original_time:.4f}s")
print(f"编译模型时间: {scripted_time:.4f}s")
print(f"加速比: {original_time/scripted_time:.2f}x")
3.10 调试和可视化
3.10.1 模型调试
# 模型调试工具
class DebugModel(nn.Module):
def __init__(self):
super(DebugModel, self).__init__()
self.fc1 = nn.Linear(10, 20)
self.fc2 = nn.Linear(20, 10)
self.fc3 = nn.Linear(10, 1)
# 调试标志
self.debug = True
def forward(self, x):
if self.debug:
print(f"输入: shape={x.shape}, mean={x.mean().item():.4f}, std={x.std().item():.4f}")
x1 = F.relu(self.fc1(x))
if self.debug:
print(f"第一层输出: shape={x1.shape}, mean={x1.mean().item():.4f}")
print(f"第一层激活率: {(x1 > 0).float().mean().item():.4f}")
x2 = F.relu(self.fc2(x1))
if self.debug:
print(f"第二层输出: shape={x2.shape}, mean={x2.mean().item():.4f}")
x3 = self.fc3(x2)
if self.debug:
print(f"最终输出: shape={x3.shape}, mean={x3.mean().item():.4f}")
return x3
# 测试调试模型
debug_model = DebugModel()
test_input = torch.randn(5, 10)
output = debug_model(test_input)
# 关闭调试模式
debug_model.debug = False
output = debug_model(test_input)
print("调试模式已关闭")
3.10.2 权重可视化
# 权重分布可视化
def visualize_weights(model, layer_name=None):
plt.figure(figsize=(15, 5))
plot_idx = 1
for name, param in model.named_parameters():
if 'weight' in name and (layer_name is None or layer_name in name):
plt.subplot(1, 3, plot_idx)
weights = param.data.cpu().numpy().flatten()
plt.hist(weights, bins=50, alpha=0.7)
plt.title(f'{name}\nMean: {weights.mean():.4f}, Std: {weights.std():.4f}')
plt.xlabel('Weight Value')
plt.ylabel('Frequency')
plot_idx += 1
if plot_idx > 3:
break
plt.tight_layout()
plt.show()
# 创建一个模型并可视化权重
model = MultiClassifier(784, [256, 128], 10)
visualize_weights(model)
本章总结
在本章中,我们深入学习了PyTorch神经网络的基础知识:
核心概念
- nn.Module: PyTorch神经网络的基础类,提供了参数管理、状态控制等功能
- 前向传播: 数据通过网络层的计算过程
- 参数管理: 权重和偏置的初始化、冻结、更新机制
重要组件
- 线性层: 全连接层,实现线性变换
- 激活函数: ReLU、Sigmoid、Tanh等非线性函数
- 正则化层: Dropout、BatchNorm等防止过拟合的技术
- 自定义层: 如何实现自己的神经网络层
实践技能
- 模型构建: 如何组合不同的层构建复杂网络
- 参数初始化: 不同初始化策略对训练的影响
- 模型保存/加载: 模型持久化和部署
- 性能优化: 内存和计算效率的提升方法
学习成果
通过本章学习,你现在能够: 1. 使用nn.Module构建自定义神经网络 2. 理解并实现各种常用的网络层 3. 掌握前向传播和参数管理机制 4. 实现复杂的网络架构(如残差网络) 5. 优化模型性能和调试网络问题
下一章预告
在下一章《数据处理与加载》中,我们将学习: - Dataset和DataLoader的使用 - 数据预处理和增强技术 - 批量处理和并行加载 - 自定义数据集的实现
练习题
基础练习
- 实现一个三层的多层感知器,包含BatchNorm和Dropout
- 创建一个自定义激活函数层
- 实现模型参数的统计和可视化函数
进阶练习
- 实现一个带有跳跃连接的深度网络
- 创建一个参数共享的循环网络结构
- 实现梯度裁剪和学习率调度
项目练习
- 构建一个用于MNIST数字识别的完整网络
- 实现一个可配置的通用分类器
- 创建一个模型性能分析和可视化工具
代码示例总结
本章包含了以下重要代码示例: - 基础神经网络构建(SimpleMLP、CustomModule) - 常用层的使用(Linear、Activation、Dropout、BatchNorm) - 自定义层实现(CustomLinear、SimpleAttention、StatefulLayer) - 参数初始化和管理 - 模型保存和加载 - 性能优化技巧 - 调试和可视化工具
学习资源
第3章完成! 🎉
你已经掌握了PyTorch神经网络的核心概念和实现方法。现在让我们继续学习第4章《数据处理与加载》,了解如何高效地处理和加载训练数据。