Calech101数据集ResNet34
本博⽂内容:
Caltech101数据集;
神经⽹络(模型、⼯具、⽬录)
编写代码
⼀、Caltech101数据集;
这个数据集包含了101类的图像,每类⼤约有40~800张图像,⼤部分是50张/类,在2003年由lifeifei收集,每张图像的⼤⼩⼤约是300x200.图像的类别分布:
按Top40图⽚数量从⼤到⼩的顺序展⽰:
整体数据集情况:
可以看到图⽚的数量⾮常不均衡;
像这样类别不均衡的图⽚是深度学习表现⼒的的主要原因之⼀
⼆、神经⽹络(模型、⼯具、⽬录)
⽹络:ResNet34
使⽤ ImageNet中预训练好的权重——迁移学习提⾼深度学习的表现⼒
对于隐藏层的权重,我们将不进⾏更新,但是我们会微调ResNet34⽹络的头部来⽀持我们的⽹络;
当进⾏微调的时候,我们同样也会加⼊Droput层;
如何在类别不均衡的图⽚上实现较⾼的精度
图⽚类别不均衡的解决⽅法:
1)获取更多的数据
2)使⽤数据增强;
  在数据⽆法增多的情况下,数据增强效果⽐较好;数据增强使神经⽹络可以看到数据不同类型的变化,⼤⼩、⾓度、颜⾊等;但是我们现在不使⽤上述两种⽅法,事实上,我们将采⽤的是迁移学习和微调神经⽹络来实现更⾼的精度;
⼯具:
Install PyTorch.
Install pretraindemodels. ——提供ResNet预训练模型
pip install pretrainedmodels
Install imutils  ——实现图⽚的旋转缩放等;
pip install imutils
⽬录
1├───input
2│├───101_ObjectCategories
轮胎101网
3││├───accordion
4││├───airplanes
5││├───anchor
6││├───ant
7││├───BACKGROUND_Google
8││├───barrel
9││├───bass
10││├───beaver
11││├───binocular
12││├───bonsai
13││├───brain
14││├───brontosaurus
15 ...
16├───outputs
17│├───models #最终训练好的模型结果
18│└───plots
19└───src
20└───train.py
编写代码
导⼊相关的包
1# imports
2import matplotlib.pyplot as plt
3import matplotlib
4import joblib
5import cv2    #把图⽚读⼊到数据集中
6import os
7import torch
8import numpy as np
as nn
functional as F
11import torch.optim as optim
12import time
13import random
14import pretrainedmodels
15from imutils import paths
16from sklearn.preprocessing import LabelBinarizer
del_selection import train_test_split
ansforms import transforms
19from torch.utils.data import DataLoader, Dataset
20from tqdm import tqdm
21 matplotlib.style.use('ggplot')
22'''SEED Everything'''
23def seed_everything(SEED=42):  #应⽤不同的种⼦产⽣可复现的结果
24    random.seed(SEED)
25    np.random.seed(SEED)
26    torch.manual_seed(SEED)
27    torch.cuda.manual_seed(SEED)
28    torch.cuda.manual_seed_all(SEED)
29    torch.backends.cudnn.benchmark = True # keep True if all the input have same size.
30 SEED=42
31 seed_everything(SEED=SEED)
32'''SEED Everything'''
超参数的设置:
定义设备、EOPCH以及batch size
1if torch.cuda.is_available():
2    device = 'cuda'
3else:
4    device = 'cpu'
5
6 epochs = 5
7 BATCH_SIZE = 16
准备标签和图像
1 image_paths = list(paths.list_images('../input/101_ObjectCategories'))
2 data = []
3 labels = []
4for image_path in image_paths:
5    label = image_path.split(os.path.sep)[-2]
6if label == 'BACKGROUND_Google':
7continue
8    image = cv2.imread(image_path)
9    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
10    data.append(image)
11    labels.append(label)
12 data = np.array(data)
13 labels = np.array(labels)
使⽤One-hot编码对label进⾏编码
定义图像变换
1# define transforms
2 train_transform = transforms.Compose(
3    [transforms.ToPILImage(),
4      transforms.Resize((224, 224)),
5      transforms.ToTensor(),
6      transforms.Normalize(mean=[0.485, 0.456, 0.406],
7                          std=[0.229, 0.224, 0.225])])
8 val_transform = transforms.Compose(
9    [transforms.ToPILImage(),
10      transforms.Resize((224, 224)),
11      transforms.ToTensor(),
12      transforms.Normalize(mean=[0.485, 0.456, 0.406],
13                          std=[0.229, 0.224, 0.225])])
⼀般只对训练数据进⾏数据变换;
所以我们在此分开写训练和验证的数据变换函数;
数据分割,切分为训练、验证和测试集
1# divide the data into train, validation, and test set
2 (X, x_val , Y, y_val) = train_test_split(data, labels,
3                                                    test_size=0.2,
4                                                    stratify=labels,
5                                                    random_state=42)
6 (x_train, x_test, y_train, y_test) = train_test_split(X, Y,
7                                                    test_size=0.25,
8                                                    random_state=42)
9print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}")输出:
1 x_train examples: (5205,)
2 x_test examples: (1736,)
3 x_val examples: (1736,)
创建⾃定义数据集和Loaders
1# custom dataset
2class ImageDataset(Dataset):
3def__init__(self, images, labels=None, transforms=None):
4        self.X = images
5        self.y = labels
6        ansforms = transforms
7
8def__len__(self):
9return (len(self.X))
10
11def__getitem__(self, i):
12        data = self.X[i][:]
13
ansforms:
15            data = ansforms(data)
16
17if self.y is not None:
18return (data, self.y[i])
19else:
20return data
21
22 train_data = ImageDataset(x_train, y_train, train_transform)
23 val_data = ImageDataset(x_val, y_val, val_transform)
24 test_data = ImageDataset(x_test, y_test, val_transform)
1# dataloaders
2 trainloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
3 valloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True)
4 testloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)
注意:只对训练集和验证集进⾏shuffle,对测试集不进⾏shuffle
神经⽹络模型搭建;
1# the resnet34 model
2class ResNet34(nn.Module):
3def__init__(self, pretrained):
4        super(ResNet34, self).__init__()
5if pretrained is True:
6            del = pretrainedmodels.__dict__['resnet34'](pretrained='imagenet')
7else:
8            del = pretrainedmodels.__dict__['resnet34'](pretrained=None)
9
10# change the classification layer
11        self.l0 = nn.Linear(512, len(lb.classes_))
12        self.dropout = nn.Dropout2d(0.4)
13def forward(self, x):
14# get the batch size only, ignore (c, h, w)
15        batch, _, _, _ = x.shape
16        x = del.features(x)
17        x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1)
18        x = self.dropout(x)
19        l0 = self.l0(x)
20return l0
21 model = ResNet34(pretrained=True).to(device)
优化器和损失函数定义
1# optimizer
2 optimizer = optim.Adam(model.parameters(), lr=1e-4)
3# loss function
4 criterion = nn.CrossEntropyLoss()
训练函数:
1# training function
2def fit(model, dataloader):
3print('Training')
4    ain()
5    running_loss = 0.0
6    running_correct = 0
7for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
8        data, target = data[0].to(device), data[1].to(device)
9        _grad()
10        outputs = model(data)
11        loss = criterion(outputs, torch.max(target, 1)[1])
12        running_loss += loss.item()
13        _, preds = torch.max(outputs.data, 1)
14        running_correct += (preds == torch.max(target, 1)[1]).sum().item()
15        loss.backward()
16        optimizer.step()
17
18    loss = running_loss/len(dataloader.dataset)
19    accuracy = 100. * running_correct/len(dataloader.dataset)
20
21print(f"Train Loss: {loss:.4f}, Train Acc: {accuracy:.2f}")
22
23return loss, accuracy
验证函数:
1#validation function
2def validate(model, dataloader):
3print('Validating')
4    model.eval()
5    running_loss = 0.0
6    running_correct = 0
7    _grad():
8for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
9            data, target = data[0].to(device), data[1].to(device)
10            outputs = model(data)
11            loss = criterion(outputs, torch.max(target, 1)[1])
12
13            running_loss += loss.item()
14            _, preds = torch.max(outputs.data, 1)
15            running_correct += (preds == torch.max(target, 1)[1]).sum().item()
16
17        loss = running_loss/len(dataloader.dataset)
18        accuracy = 100. * running_correct/len(dataloader.dataset)
19print(f'Val Loss: {loss:.4f}, Val Acc: {accuracy:.2f}')
20
21return loss, accuracy
测试函数:
1    correct = 0
2    total = 0
3    _grad():
4for data in testloader:
5            inputs, target = data[0].to(device), data[1].to(device)
6            outputs = model(inputs)
7            _, predicted = torch.max(outputs.data, 1)
8            total += target.size(0)
9            correct += (predicted == torch.max(target, 1)[1]).sum().item()
10return correct, total
模型的训练:
1 train_loss , train_accuracy = [], []
2 val_loss , val_accuracy = [], []
3print(f"Training on {len(train_data)} examples, validating on {len(val_data)} ")
4 start = time.time()
5for epoch in range(epochs):
6print(f"Epoch {epoch+1} of {epochs}")
7    train_epoch_loss, train_epoch_accuracy = fit(model, trainloader)
8    val_epoch_loss, val_epoch_accuracy = validate(model, valloader)
9    train_loss.append(train_epoch_loss)
10    train_accuracy.append(train_epoch_accuracy)
11    val_loss.append(val_epoch_loss)
12    val_accuracy.append(val_epoch_accuracy)
13 end = time.time()
14print((end-start)/60, 'minutes')
15 torch.save(model.state_dict(), f"../outputs/models/resnet34_epochs{epochs}.pth")
16# accuracy plots
17 plt.figure(figsize=(10, 7))
18 plt.plot(train_accuracy, color='green', label='train accuracy')
19 plt.plot(val_accuracy, color='blue', label='validataion accuracy')
20 plt.xlabel('Epochs')
21 plt.ylabel('Accuracy')
22 plt.legend()
23 plt.savefig('../outputs/plots/accuracy.png')
24# loss plots
25 plt.figure(figsize=(10, 7))
26 plt.plot(train_loss, color='orange', label='train loss')
27 plt.plot(val_loss, color='red', label='validataion loss')
28 plt.xlabel('Epochs')
29 plt.ylabel('Loss')
30 plt.legend()
31 plt.savefig('../outputs/plots/loss.png')
结果保存
1# save the accuracy and loss lists as pickled files
2print('Pickling accuracy and ')
3 joblib.dump(train_accuracy, '../outputs/models/train_accuracy.pkl')