PyTorch 深度学习框架详解
1. 什么是 PyTorch
PyTorch 是由 Facebook(现Meta)开发的开源深度学习框架,基于 Python 语言。它以动态计算图、易于调试和直观的API设计而闻名,是目前最受欢迎的深度学习框架之一。
核心特性
- 动态计算图:支持运行时构建和修改计算图
- 自动微分:自动计算梯度,简化反向传播
- GPU加速:无缝支持CUDA和多GPU训练
- 丰富的生态:包含计算机视觉、自然语言处理等领域的工具
- 易于调试:Python原生支持,便于调试和开发
2. 安装和环境配置
基础安装
bash
# CPU版本
pip install torch torchvision torchaudio
# GPU版本(CUDA 11.8)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# GPU版本(CUDA 12.1)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121验证安装
python
import torch
import torchvision
print(f"PyTorch版本: {torch.__version__}")
print(f"CUDA可用: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"CUDA版本: {torch.version.cuda}")
print(f"GPU数量: {torch.cuda.device_count()}")
print(f"当前GPU: {torch.cuda.get_device_name()}")3. 核心概念
3.1 张量(Tensor)
python
import torch
# 创建张量
x = torch.tensor([1, 2, 3, 4, 5])
y = torch.zeros(3, 4)
z = torch.randn(2, 3)
print(f"x: {x}")
print(f"y形状: {y.shape}")
print(f"z数据类型: {z.dtype}")
# 张量运算
a = torch.tensor([1.0, 2.0, 3.0])
b = torch.tensor([4.0, 5.0, 6.0])
# 基本运算
add_result = a + b
mul_result = a * b
dot_result = torch.dot(a, b)
print(f"加法: {add_result}")
print(f"乘法: {mul_result}")
print(f"点积: {dot_result}")3.2 自动微分(Autograd)
python
import torch
# 创建需要梯度的张量
x = torch.tensor([2.0], requires_grad=True)
y = torch.tensor([3.0], requires_grad=True)
# 定义计算
z = x**2 + y**3
loss = z.sum()
# 反向传播
loss.backward()
print(f"x的梯度: {x.grad}") # dz/dx = 2x = 4
print(f"y的梯度: {y.grad}") # dz/dy = 3y^2 = 273.3 神经网络模块(nn.Module)
python
import torch
import torch.nn as nn
import torch.nn.functional as F
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
# 创建网络
net = SimpleNet(784, 128, 10)
print(net)
# 查看参数
for name, param in net.named_parameters():
print(f"{name}: {param.shape}")4. 实际应用示例
4.1 线性回归
python
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
# 生成数据
np.random.seed(42)
X = np.random.randn(100, 1)
y = 2 * X + 1 + 0.1 * np.random.randn(100, 1)
# 转换为张量
X_tensor = torch.FloatTensor(X)
y_tensor = torch.FloatTensor(y)
# 定义模型
class LinearRegression(nn.Module):
def __init__(self):
super(LinearRegression, self).__init__()
self.linear = nn.Linear(1, 1)
def forward(self, x):
return self.linear(x)
# 创建模型、损失函数和优化器
model = LinearRegression()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 训练
losses = []
for epoch in range(1000):
# 前向传播
predictions = model(X_tensor)
loss = criterion(predictions, y_tensor)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(loss.item())
if (epoch + 1) % 100 == 0:
print(f'Epoch [{epoch+1}/1000], Loss: {loss.item():.4f}')
# 查看学习到的参数
print(f"权重: {model.linear.weight.item():.4f}")
print(f"偏置: {model.linear.bias.item():.4f}")4.2 图像分类(CNN)
python
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
# 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# 加载MNIST数据集
trainset = torchvision.datasets.MNIST(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
shuffle=True)
testset = torchvision.datasets.MNIST(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
shuffle=False)
# 定义CNN模型
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 64 * 7 * 7)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
# 创建模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练函数
def train_model(model, trainloader, criterion, optimizer, epochs=5):
model.train()
for epoch in range(epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(trainloader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(trainloader):.4f}')
# 训练模型
train_model(model, trainloader, criterion, optimizer)4.3 自然语言处理(RNN/LSTM)
python
import torch
import torch.nn as nn
import torch.optim as optim
class TextClassifier(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim, n_layers=2):
super(TextClassifier, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.lstm = nn.LSTM(embed_dim, hidden_dim, n_layers,
batch_first=True, dropout=0.3)
self.fc = nn.Linear(hidden_dim, output_dim)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
# x shape: (batch_size, seq_length)
embedded = self.embedding(x) # (batch_size, seq_length, embed_dim)
lstm_out, (hidden, _) = self.lstm(embedded)
# 使用最后一个时间步的输出
output = self.fc(self.dropout(hidden[-1]))
return output
# 示例使用
vocab_size = 10000
embed_dim = 100
hidden_dim = 128
output_dim = 2 # 二分类
seq_length = 50
batch_size = 32
model = TextClassifier(vocab_size, embed_dim, hidden_dim, output_dim)
# 模拟数据
x = torch.randint(0, vocab_size, (batch_size, seq_length))
y = torch.randint(0, output_dim, (batch_size,))
# 前向传播
output = model(x)
print(f"输出形状: {output.shape}")5. 高级功能
5.1 自定义数据集
python
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
class CustomDataset(Dataset):
def __init__(self, csv_file, transform=None):
self.data = pd.read_csv(csv_file)
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
# 假设CSV有'features'和'label'列
features = self.data.iloc[idx, :-1].values.astype('float32')
label = self.data.iloc[idx, -1]
sample = {'features': features, 'label': label}
if self.transform:
sample = self.transform(sample)
return sample
# 使用自定义数据集
# dataset = CustomDataset('data.csv')
# dataloader = DataLoader(dataset, batch_size=32, shuffle=True)5.2 模型保存和加载
python
import torch
# 保存整个模型
torch.save(model, 'model.pth')
# 只保存模型参数(推荐)
torch.save(model.state_dict(), 'model_params.pth')
# 加载模型
# 方法1:加载整个模型
model = torch.load('model.pth')
# 方法2:加载参数(推荐)
model = CNN() # 首先创建模型实例
model.load_state_dict(torch.load('model_params.pth'))
model.eval() # 设置为评估模式5.3 学习率调度
python
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
# 创建优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 学习率调度器
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
# 或者使用自适应调度器
# scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10)
# 在训练循环中使用
for epoch in range(num_epochs):
# 训练代码...
# 更新学习率
scheduler.step()
# 如果使用ReduceLROnPlateau
# scheduler.step(val_loss)6. 模型评估和可视化
6.1 模型评估
python
def evaluate_model(model, testloader, device):
model.eval()
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in testloader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f'测试准确率: {accuracy:.2f}%')
return accuracy
# 评估模型
accuracy = evaluate_model(model, testloader, device)6.2 损失可视化
python
import matplotlib.pyplot as plt
def plot_training_history(train_losses, val_losses=None):
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='训练损失')
if val_losses:
plt.plot(val_losses, label='验证损失')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('训练历史')
plt.show()
# 使用示例
# plot_training_history(losses)7. 最佳实践
7.1 GPU使用优化
python
# 检查GPU可用性
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 将模型和数据移到GPU
model = model.to(device)
inputs = inputs.to(device)
labels = labels.to(device)
# 混合精度训练(提高训练速度)
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()
for inputs, labels in dataloader:
optimizer.zero_grad()
with autocast():
outputs = model(inputs)
loss = criterion(outputs, labels)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()7.2 内存管理
python
# 清理GPU内存
torch.cuda.empty_cache()
# 使用checkpoint节省内存
from torch.utils.checkpoint import checkpoint
def forward_with_checkpoint(model, x):
return checkpoint(model, x)7.3 调试技巧
python
# 检查梯度
def check_gradients(model):
for name, param in model.named_parameters():
if param.grad is not None:
print(f"{name}: {param.grad.norm()}")
# 梯度裁剪
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
# 设置随机种子确保可重现性
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed(42)8. 常见问题和解决方案
问题1:CUDA内存不足
python
# 减小批次大小
batch_size = 16 # 从32减少到16
# 使用梯度累积
accumulation_steps = 4
for i, (inputs, labels) in enumerate(dataloader):
outputs = model(inputs)
loss = criterion(outputs, labels) / accumulation_steps
loss.backward()
if (i + 1) % accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()问题2:训练速度慢
python
# 使用DataLoader的多进程
dataloader = DataLoader(dataset, batch_size=32,
shuffle=True, num_workers=4,
pin_memory=True)
# 使用编译模式(PyTorch 2.0+)
model = torch.compile(model)9. 总结
PyTorch 是一个功能强大且易于使用的深度学习框架,特别适合研究和快速原型开发。其动态计算图和Python原生支持使得调试和开发变得更加直观。
