Научите обнаружение объектов вручную - SSD (обязателен к просмотру для новичков)

Загрузите код SSD:

После распаковки загруженного кода создайте новую папку VOCdevkit в папке данных, а затем скопируйте набор данных VOC2007 в эту папку.

Загрузите файл весов в папку с весами. ссылка для скачивания:

Да 3. Amazon AWS.com/AMD, например, root — нет...

Изменить код config.py

# SSD300 CONFIGS

voc = {

    'num_classes': 3,//将类别改为：类别+1（背景）

    'lr_steps': (80000, 100000, 120000),

    'max_iter': 120000,//迭代次数

    'feature_maps': [38, 19, 10, 5, 3, 1],

    'min_dim': 300,

    'steps': [8, 16, 32, 64, 100, 300],

    'min_sizes': [30, 60, 111, 162, 213, 264],

    'max_sizes': [60, 111, 162, 213, 264, 315],

    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],

    'variance': [0.1, 0.2],

    'clip': True,

    'name': 'VOC',

}

Измените код VOC0712.py.

'''

VOC_CLASSES = (  # always index 0

    'aeroplane', 'bicycle', 'bird', 'boat',

    'bottle', 'bus', 'car', 'cat', 'chair',

    'cow', 'diningtable', 'dog', 'horse',

    'motorbike', 'person', 'pottedplant',

    'sheep', 'sofa', 'train', 'tvmonitor')

'''

VOC_CLASSES = (  # always index 0

    'aircraft', 'oiltank')//修改为自己数据集的类别。

# note: if you used our download scripts, this should be right

#VOC_ROOT = osp.join("", "data/VOCdevkit/")

VOC_ROOT = "data/VOCdevkit/" #修改为Win10路径

将

def __init__(self, root,

             image_sets=[('2007', 'trainval'), ('2012', 'trainval')],

             transform=None, target_transform=VOCAnnotationTransform(),

             dataset_name='VOC0712'):

修改为：

def __init__(self, root,

             image_sets=[('2007', 'trainval')],

             transform=None, target_transform=VOCAnnotationTransform(),

             dataset_name='VOC2007'):

Изменить coco.py

COCO_ROOT = 'data/'

Изменить ssd.py

将32行的self.cfg = (coco, voc)[num_classes == 21]

修改为self.cfg = (coco, voc)[num_classes == 3]#3是我数据集的类别+1

Изменить multibox_loss.py

在97行的loss_c[pos] = 0  # filter out pos boxes for now

Присоединяйтесь к местуloss_c = loss_c.view(pos.size()[0], pos.size()[1])

如果没有这句话会引起张量不匹配的问题

如：

IndexError: The shape of the mask [4, 8732] at index 0 does not match the shape of the indexed tensor [34928, 1] at index 0

Изменить глобальные параметры конфигурации:

parser.add_argument( '--dataset' , default= 'VOC' , choices=[ 'VOC' , 'COCO' ],
type=str, help= 'VOC or COCO')# в ЛОС
parser.add_argument( '--batch_size' , default=4, type=int,
help= 'Batch size for training')#Batch Size Установите соответствующее значение в соответствии с размером видеопамяти.
parser.add_argument( '--start_iter' , default=0, type=int,
help= 'Resume training at this iter')# итеративный запуск
parser.add_argument( '--num_workers' , default=4, type=int,
help= 'Number of workers used in dataloading' )
parser.add_argument( '--cuda' , default=True, type=str2bool,
help= 'Use CUDA to train model' )
parser.add_argument( '--lr' , '--learning-rate' , default=1e-4, type=float,
help= 'initial learning rate' )
parser.add_argument( '--momentum' , default=0.9, type=float,
help= 'Momentum value for optim' )
parser.add_argument( '--weight_decay' , default=5e-4, type=float,
help= 'Weight decay for SGD' )
parser.add_argument( '--gamma' , default=0.1, type=float,
help= 'Gamma update for SGD' )
parser.add_argument( '--visdom' , default=False, type=str2bool,
help= 'Use visdom for loss visualization' )
parser.add_argument( '--save_folder' , default= 'weights/' ,
help= 'Directory for saving checkpoint models')# Куда сохранять веса.
args = parser.parse_args()

将：

修改为：

loc_loss += loss_l.item()

conf_loss += loss_c.item()

if iteration % 10 == 0:

    print('timer: %.4f sec.' % (t1 - t0))

    print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')

不修改会引起错误：

.IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number

将165行的

images, targets = next(batch_iterator)

更改成：

try:

                   images, targets = next(batch_iterator)

except StopIteration:

                   batch_iterator = iter(data_loader)

                   images, targets = next(batch_iterator)

如果不改会引起跳出迭代的问题：

Traceback (most recent call last):

  File "E:/ssd.pytorch-master/ssd.pytorch-master/train.py", line 246, in <module>

    train()

  File "E:/ssd.pytorch-master/ssd.pytorch-master/train.py", line 159, in train

    images, targets = next(batch_iterator)

  File "D:\Users\WH\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 345, in __next__

    data = self._next_data()

  File "D:\Users\WH\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 831, in _next_data

    raise StopIteration

StopIteration

如果遇到loss为nan的现象，就需要降低出示学习率。

 将10e-3改为10e-4

将以上问题解决后就可以开始训练了。

контрольная работа

需要修改test.py的全局参数。

parser.add_argument( '--trained_model' , default= 'weights/ssd300_COCO_60000.pth' , type=str, help= 'Trained state_dict file path to open' )#指定测试用的模型。 parser.add_argument( '--save_folder' , default= 'eval/' , type=str, help= 'Dir to save results' )#生成的测试结果 parser.add_argument( '--visual_threshold' , default=0.6, type=float, help= 'Final confidence threshold' )#检测的最低置信度。

点击测试生成测试结果： 
GROUND TRUTH FOR: aircraft_27
label: 650.0 || 101.0 || 753.0 || 227.0 || 0
label: 395.0 || 351.0 || 521.0 || 464.0 || 0
label: 320.0 || 479.0 || 465.0 || 606.0 || 0
label: 276.0 || 617.0 || 432.0 || 753.0 || 0
PREDICTIONS:
1 label: aircraft score: tensor(1.0000) 317.00345 || 479.0164 || 468.34143 || 604.06415
2 label: aircraft score: tensor(1.0000) 279.88608 || 614.7385 || 434.32086 || 752.4983
3 label: aircraft score: tensor(0.9999) 397.0827 || 352.3487 || 524.27655 || 462.45944
4 label: aircraft score: tensor(0.9990) 647.89233 || 100.57587 || 758.556 || 232.56816

GROUND TRUTH FOR: oiltank_349
label: 115.0 || 340.0 || 237.0 || 466.0 || 1
label: 104.0 || 498.0 || 231.0 || 637.0 || 1
label: 92.0 || 675.0 || 224.0 || 814.0 || 1
label: 369.0 || 378.0 || 492.0 || 507.0 || 1
label: 357.0 || 543.0 || 483.0 || 678.0 || 1
label: 350.0 || 715.0 || 485.0 || 856.0 || 1
label: 534.0 || 378.0 || 662.0 || 511.0 || 1
label: 527.0 || 541.0 || 656.0 || 678.0 || 1
label: 527.0 || 712.0 || 654.0 || 857.0 || 1
PREDICTIONS:
1 label: oiltank score: tensor(1.0000) 349.36838 || 709.4552 || 481.7989 || 854.8713
2 label: oiltank score: tensor(1.0000) 528.97327 || 546.3467 || 656.2798 || 676.9037
3 label: oiltank score: tensor(1.0000) 88.82061 || 667.3125 || 220.63167 || 812.9001
4 label: oiltank score: tensor(1.0000) 358.38913 || 373.05368 || 489.21268 || 510.5563
5 label: oiltank score: tensor(1.0000) 519.9066 || 709.3708 || 658.5541 || 863.88947
6 label: oiltank score: tensor(1.0000) 115.77756 || 339.24872 || 240.00787 || 466.19022
7 label: oiltank score: tensor(1.0000) 104.77564 || 500.64545 || 230.20764 || 636.5293
8 label: oiltank score: tensor(1.0000) 357.70694 || 547.2647 || 485.0283 || 679.9714
9 label: oiltank score: tensor(0.9999) 524.7541 || 375.12167 || 668.46106 || 511.2381

распечатать результаты теста

import os
import sys
module_path = os.path.abspath(os.path.join( '..' ))
if module_path not in sys.path:
sys.path.append(module_path)
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import numpy as np
import cv2
if torch.cuda.is_available():
torch.set_default_tensor_type( 'torch.cuda.FloatTensor' )
from ssd import build_ssd
net = build_ssd( 'test' , 300, 3) # initialize SSD
net.load_state_dict(torch.load( 'weights/ssd300_COCO_60000.pth'# Загрузка модели
net.eval()
image = cv2.imread( 'data/VOCdevkit/aircraft_27.jpg', cv2.IMREAD_COLOR) # загрузить изображение
from matplotlib import pyplot as plt
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)#Прочитать изображение
# View the sampled input image before transform
x = cv2.resize(image, (300, 300)).astype(np.float32)
x -= (104.0, 117.0, 123.0)
x = x.astype(np.float32)
x = x[:, :, ::-1].copy()
plt.imshow(x)
x = torch.from_numpy(x).permute(2, 0, 1)
xx = Variable(x.unsqueeze(0)) # wrap tensor in Variable
if torch.cuda.is_available():
xx = xx.cuda()
y = net(xx)
from data import VOC_CLASSES as labels
top_k=10#Выберите 10 лучших результатов
plt.figure(figsize=(10,10))
colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
plt.imshow(rgb_image) # plot the image for matplotlib
currentAxis = plt.gca()
detections = y.data
# scale each detection back up to the image
scale = torch.Tensor(rgb_image.shape[1::-1]).repeat(2)
for i in range(detections.size(1)):
j = 0
while detections[0,i,j,0] >= 0.35:
score = detections[0,i,j,0]
label_name = labels[i-1]
display_txt = '%s: %.2f' %(label_name, score)
pt = (detections[0,i,j,1:]*scale).cpu().numpy()
coords = (pt[0], pt[1]), pt[2]-pt[0]+1, pt[3]-pt[1]+1
color = colors[i]
currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
currentAxis.text(pt[0], pt[1], display_txt, bbox={ 'facecolor' :color, 'alpha' :0.5})
j+=1
plt.show()