This commit is contained in:
Zhongdao 2019-09-27 13:37:47 +08:00
commit 28514de87b
27 changed files with 4957 additions and 0 deletions

108
.gitignore vendored Normal file
View file

@ -0,0 +1,108 @@
weights/
data/
tmp/
external/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/

1
README.md Normal file
View file

@ -0,0 +1 @@
# Towards-Realtime-MOT

22
cfg/ccmcpe.json Normal file
View file

@ -0,0 +1,22 @@
{
"train":
{
"mot17":"./data/mot17.train",
"caltech":"./data/caltech.train",
"citypersons":"./data/citypersons.train",
"cuhksysu":"./data/cuhksysu.train",
"prw":"./data/prw.train",
"eth":"./data/eth.train"
},
"test_emb":
{
"caltech":"./data/caltech.val",
"cuhksysu":"./data/cuhksysu.val",
"prw":"./data/prw.val"
},
"test":
{
"mot19.train":"./data/mot19.train"
}
}

833
cfg/yolov3.cfg Executable file
View file

@ -0,0 +1,833 @@
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=16
subdivisions=1
width=608
height=1088
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
######################
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
######### embedding ###########
[route]
layers = -3
[convolutional]
size=3
stride=1
pad=1
filters=512
activation=linear
[route]
layers = -3, -1
###############################
[yolo]
mask = 8,9,10,11
anchors = 8,24, 11, 34, 16,48, 23,68, 32,96, 45,135, 64,192, 90,271, 128,384, 180,540, 256,640, 512,640
classes=1
num=12
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -7
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 61
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
######### embedding ###########
[route]
layers = -3
[convolutional]
size=3
stride=1
pad=1
filters=512
activation=linear
[route]
layers = -3, -1
###############################
[yolo]
mask = 4,5,6,7
anchors = 8,24, 11, 34, 16,48, 23,68, 32,96, 45,135, 64,192, 90,271, 128,384, 180,540, 256,640, 512,640
classes=1
num=12
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -7
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 36
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
######### embedding ###########
[route]
layers = -3
[convolutional]
size=3
stride=1
pad=1
filters=512
activation=linear
[route]
layers = -3, -1
###############################
[yolo]
mask = 0,1,2,3
anchors = 8,24, 11,34, 16,48, 23,68, 32,96, 45,135, 64,192, 90,271, 128,384, 180,540, 256,640, 512,640
classes=1
num=12
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1

98
extract_ped_per_frame.py Normal file
View file

@ -0,0 +1,98 @@
import argparse
import json
import time
from pathlib import Path
from sklearn import metrics
from scipy import interpolate
import torch.nn.functional as F
from models import *
from utils.utils import *
from torchvision.transforms import transforms as T
from utils.datasets import LoadImages, JointDataset, collate_fn
def extract_ped_per_frame(
cfg,
input_root,
output_root,
weights,
batch_size=16,
img_size=416,
iou_thres=0.5,
conf_thres=0.3,
nms_thres=0.45,
print_interval=40,
nID=14455,
):
mkdir_if_missing(output_root)
# Initialize model
model = Darknet(cfg, img_size, nID)
# Load weights
if weights.endswith('.pt'): # pytorch format
model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False)
else: # darknet format
load_darknet_weights(model, weights)
model = torch.nn.DataParallel(model)
model.cuda().eval()
vlist = os.listdir(input_root)
vlist = [osp.join(input_root, v, 'img1') for v in vlist]
for vpath in vlist:
vroot = osp.join('/',*vpath.split('/')[:-1])
out_vroot = vroot.replace(input_root, output_root)
mkdir_if_missing(out_vroot)
dataloader = LoadImages(vpath, img_size)
for frame_id, (frame_path, frame, frame_ori) in enumerate(dataloader):
frame_ground_id = frame_path.split('/')[-1].split('.')[0]
if frame_id % 20 == 0:
print('Processing frame {} of video {}'.format(frame_id, frame_path))
blob = torch.from_numpy(frame).cuda().unsqueeze(0)
pred = model(blob)
pred = pred[pred[:,:,4] > conf_thres]
if len(pred) > 0:
dets = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres)[0].cpu()
scale_coords(img_size, dets[:, :4], frame_ori.shape).round()
frame_dir = osp.join(out_vroot, frame_ground_id)
mkdir_if_missing(frame_dir)
dets = dets[:, :5]
for ped_id, det in enumerate(dets):
box = det[:4].int()
conf = det[4]
ped = frame_ori[box[1]:box[3], box[0]:box[2]]
ped_path = osp.join(frame_dir, ('{:04d}_'+ '{:d}_'*4 + '{:.2f}.jpg').format(ped_id, *box, conf))
cv2.imwrite(ped_path, ped)
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='test.py')
parser.add_argument('--batch-size', type=int, default=40, help='size of each image batch')
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('--weights', type=str, default='weights/mot_64/latest.pt', help='path to weights file')
parser.add_argument('--iou-thres', type=float, default=0.3, help='iou threshold required to qualify as detected')
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.3, help='iou threshold for non-maximum suppression')
parser.add_argument('--img-size', type=int, default=(1088, 608), help='size of each image dimension')
parser.add_argument('--print-interval', type=int, default=10, help='size of each image dimension')
parser.add_argument('--input-root', type=str, default='/home/wangzd/datasets/youtube/data/0002/frame', help='path to input frames')
parser.add_argument('--output-root', type=str, default='/home/wangzd/datasets/youtube/data/0002/ped_per_frame', help='path to output frames')
opt = parser.parse_args()
print(opt, end='\n\n')
with torch.no_grad():
extract_ped_per_frame(
opt.cfg,
opt.input_root,
opt.output_root,
opt.weights,
opt.batch_size,
opt.img_size,
opt.iou_thres,
opt.conf_thres,
opt.nms_thres,
opt.print_interval,
)

379
models.py Normal file
View file

@ -0,0 +1,379 @@
import os
from collections import defaultdict,OrderedDict
import torch.nn as nn
from utils.parse_config import *
from utils.utils import *
from utils.syncbn import SyncBN
import time
import math
ONNX_EXPORT = False
batch_norm=SyncBN #nn.BatchNorm2d
def create_modules(module_defs):
"""
Constructs module list of layer blocks from module configuration in module_defs
"""
hyperparams = module_defs.pop(0)
output_filters = [int(hyperparams['channels'])]
module_list = nn.ModuleList()
yolo_layer_count = 0
for i, module_def in enumerate(module_defs):
modules = nn.Sequential()
if module_def['type'] == 'convolutional':
bn = int(module_def['batch_normalize'])
filters = int(module_def['filters'])
kernel_size = int(module_def['size'])
pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(module_def['stride']),
padding=pad,
bias=not bn))
if bn:
modules.add_module('batch_norm_%d' % i, batch_norm(filters))
if module_def['activation'] == 'leaky':
modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))
elif module_def['type'] == 'maxpool':
kernel_size = int(module_def['size'])
stride = int(module_def['stride'])
if kernel_size == 2 and stride == 1:
modules.add_module('_debug_padding_%d' % i, nn.ZeroPad2d((0, 1, 0, 1)))
maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
modules.add_module('maxpool_%d' % i, maxpool)
elif module_def['type'] == 'upsample':
# upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest') # WARNING: deprecated
upsample = Upsample(scale_factor=int(module_def['stride']))
modules.add_module('upsample_%d' % i, upsample)
elif module_def['type'] == 'route':
layers = [int(x) for x in module_def['layers'].split(',')]
filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers])
modules.add_module('route_%d' % i, EmptyLayer())
elif module_def['type'] == 'shortcut':
filters = output_filters[int(module_def['from'])]
modules.add_module('shortcut_%d' % i, EmptyLayer())
elif module_def['type'] == 'yolo':
anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
# Extract anchors
anchors = [float(x) for x in module_def['anchors'].split(',')]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in anchor_idxs]
nC = int(module_def['classes']) # number of classes
img_size = (int(hyperparams['width']),int(hyperparams['height']))
# Define detection layer
yolo_layer = YOLOLayer(anchors, nC, hyperparams['nID'], img_size, yolo_layer_count, cfg=hyperparams['cfg'])
modules.add_module('yolo_%d' % i, yolo_layer)
yolo_layer_count += 1
# Register module list and number of output filters
module_list.append(modules)
output_filters.append(filters)
return hyperparams, module_list
class EmptyLayer(nn.Module):
"""Placeholder for 'route' and 'shortcut' layers"""
def __init__(self):
super(EmptyLayer, self).__init__()
def forward(self, x):
return x
class Upsample(nn.Module):
# Custom Upsample layer (nn.Upsample gives deprecated warning message)
def __init__(self, scale_factor=1, mode='nearest'):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
self.mode = mode
def forward(self, x):
return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
class YOLOLayer(nn.Module):
def __init__(self, anchors, nC, nID, img_size, yolo_layer, cfg):
super(YOLOLayer, self).__init__()
self.layer = yolo_layer
nA = len(anchors)
self.anchors = torch.FloatTensor(anchors)
self.nA = nA # number of anchors (3)
self.nC = nC # number of classes (80)
self.nID = nID # number of identities
self.img_size = 0
self.emb_dim = 512
self.SmoothL1Loss = nn.SmoothL1Loss()
self.SoftmaxLoss = nn.CrossEntropyLoss(ignore_index=-1)
self.CrossEntropyLoss = nn.CrossEntropyLoss()
self.IDLoss = nn.CrossEntropyLoss(ignore_index=-1)
self.s_c = nn.Parameter(1*torch.ones(1)) # -4.15
self.s_r = nn.Parameter(1*torch.ones(1)) # -4.85
self.s_id = nn.Parameter(1*torch.ones(1)) # -2.3
self.emb_scale = math.sqrt(2) * math.log(self.nID-1)
def forward(self, p_cat, img_size, targets=None, classifier=None, test_emb=False):
p, p_emb = p_cat[:, :24, ...], p_cat[:, 24:, ...]
nB, nGh, nGw = p.shape[0], p.shape[-2], p.shape[-1]
if self.img_size != img_size:
create_grids(self, img_size, nGh, nGw)
if p.is_cuda:
self.grid_xy = self.grid_xy.cuda()
self.anchor_wh = self.anchor_wh.cuda()
# p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 80) # (bs, anchors, grid, grid, classes + xywh)
p = p.view(nB, self.nA, self.nC + 5, nGh, nGw).permute(0, 1, 3, 4, 2).contiguous() # prediction
p_emb = p_emb.permute(0,2,3,1).contiguous()
p_box = p[..., :4]
p_conf = p[..., 4:6].permute(0, 4, 1, 2, 3) # Conf
# Training
if targets is not None:
if test_emb:
tconf, tbox, tids = build_targets_max(targets, self.anchor_vec.cuda(), self.nA, self.nC, nGh, nGw)
else:
tconf, tbox, tids = build_targets_thres(targets, self.anchor_vec.cuda(), self.nA, self.nC, nGh, nGw)
tconf, tbox, tids = tconf.cuda(), tbox.cuda(), tids.cuda()
mask = tconf > 0
# Compute losses
nT = sum([len(x) for x in targets]) # number of targets
nM = mask.sum().float() # number of anchors (assigned to targets)
nP = torch.ones_like(mask).sum().float()
if nM > 0:
lbox = self.SmoothL1Loss(p_box[mask], tbox[mask])
else:
FT = torch.cuda.FloatTensor if p_conf.is_cuda else torch.FloatTensor
lbox, lconf = FT([0]), FT([0])
lconf = self.SoftmaxLoss(p_conf, tconf)
lid = torch.Tensor(1).fill_(0).squeeze().cuda()
emb_mask,_ = mask.max(1)
# For convenience we use max(1) to decide the id, TODO: more reseanable strategy
tids,_ = tids.max(1)
tids = tids[emb_mask]
embedding = p_emb[emb_mask].contiguous()
embedding = self.emb_scale * F.normalize(embedding)
nI = emb_mask.sum().float()
if test_emb:
if np.prod(embedding.shape)==0 or np.prod(tids.shape) == 0:
return torch.zeros(0, self. emb_dim+1).cuda()
emb_and_gt = torch.cat([embedding, tids.float()], dim=1)
return emb_and_gt
if len(embedding) > 1:
logits = classifier(embedding).contiguous()
lid = self.IDLoss(logits, tids.squeeze())
# Sum loss components
loss = torch.exp(-self.s_r)*lbox + torch.exp(-self.s_c)*lconf + torch.exp(-self.s_id)*lid + \
(self.s_r + self.s_c + self.s_id)
loss *= 0.5
return loss, loss.item(), lbox.item(), lconf.item(), lid.item(), nT
else:
p_conf = torch.softmax(p_conf, dim=1)[:,1,...].unsqueeze(-1)
p_emb = p_emb.unsqueeze(1).repeat(1,self.nA,1,1,1).contiguous()
p_cls = torch.zeros(nB,self.nA,nGh,nGw,1).cuda() # Temp
p = torch.cat([p_box, p_conf, p_cls, p_emb], dim=-1)
p[..., :4] = decode_delta_map(p[..., :4], self.anchor_vec.to(p))
p[..., :4] *= self.stride
# reshape from [nB, nA, nGh, nGw, 5 + nD] to [nB, -1, 5+nD]
return p.view(nB, -1, p.shape[-1])
class Darknet(nn.Module):
"""YOLOv3 object detection model"""
def __init__(self, cfg_path, img_size=(1088, 608), nID=1591, test_emb=False):
super(Darknet, self).__init__()
self.module_defs = parse_model_cfg(cfg_path)
self.module_defs[0]['cfg'] = cfg_path
self.module_defs[0]['nID'] = nID
self.hyperparams, self.module_list = create_modules(self.module_defs)
self.img_size = img_size
self.loss_names = ['loss', 'box', 'conf', 'id', 'nT']
self.losses = OrderedDict()
for ln in self.loss_names:
self.losses[ln] = 0
self.emb_dim = 512
self.classifier = nn.Linear(self.emb_dim, nID)
self.test_emb=test_emb
def forward(self, x, targets=None, targets_len=None):
self.losses = OrderedDict()
for ln in self.loss_names:
self.losses[ln] = 0
is_training = (targets is not None) and (not self.test_emb)
#img_size = x.shape[-1]
layer_outputs = []
output = []
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
mtype = module_def['type']
if mtype in ['convolutional', 'upsample', 'maxpool']:
x = module(x)
elif mtype == 'route':
layer_i = [int(x) for x in module_def['layers'].split(',')]
if len(layer_i) == 1:
x = layer_outputs[layer_i[0]]
else:
x = torch.cat([layer_outputs[i] for i in layer_i], 1)
elif mtype == 'shortcut':
layer_i = int(module_def['from'])
x = layer_outputs[-1] + layer_outputs[layer_i]
elif mtype == 'yolo':
if is_training: # get loss
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
x, *losses = module[0](x, self.img_size, targets, self.classifier)
for name, loss in zip(self.loss_names, losses):
self.losses[name] += loss
elif self.test_emb:
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
x = module[0](x, self.img_size, targets, self.classifier, self.test_emb)
else: # get detections
x = module[0](x, self.img_size)
output.append(x)
layer_outputs.append(x)
if is_training:
self.losses['nT'] /= 3
output = [o.squeeze() for o in output]
return sum(output), torch.Tensor(list(self.losses.values())).cuda()
elif self.test_emb:
return torch.cat(output, 0)
return torch.cat(output, 1)
def create_grids(self, img_size, nGh, nGw):
self.stride = img_size[0]/nGw
assert self.stride == img_size[1] / nGh
# build xy offsets
grid_x = torch.arange(nGw).repeat((nGh, 1)).view((1, 1, nGh, nGw)).float()
grid_y = torch.arange(nGh).repeat((nGw, 1)).transpose(0,1).view((1, 1, nGh, nGw)).float()
#grid_y = grid_x.permute(0, 1, 3, 2)
self.grid_xy = torch.stack((grid_x, grid_y), 4)
# build wh gains
self.anchor_vec = self.anchors / self.stride
self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2)
def load_darknet_weights(self, weights, cutoff=-1):
# Parses and loads the weights stored in 'weights'
# cutoff: save layers between 0 and cutoff (if cutoff = -1 all are saved)
weights_file = weights.split(os.sep)[-1]
# Try to download weights if not available locally
if not os.path.isfile(weights):
try:
os.system('wget https://pjreddie.com/media/files/' + weights_file + ' -O ' + weights)
except IOError:
print(weights + ' not found')
# Establish cutoffs
if weights_file == 'darknet53.conv.74':
cutoff = 75
elif weights_file == 'yolov3-tiny.conv.15':
cutoff = 15
# Open the weights file
fp = open(weights, 'rb')
header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values
# Needed to write header when saving weights
self.header_info = header
self.seen = header[3] # number of images seen during training
weights = np.fromfile(fp, dtype=np.float32) # The rest are weights
fp.close()
ptr = 0
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
if module_def['type'] == 'convolutional':
conv_layer = module[0]
if module_def['batch_normalize']:
# Load BN bias, weights, running mean and running variance
bn_layer = module[1]
num_b = bn_layer.bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
bn_layer.bias.data.copy_(bn_b)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
bn_layer.weight.data.copy_(bn_w)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
bn_layer.running_mean.data.copy_(bn_rm)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
bn_layer.running_var.data.copy_(bn_rv)
ptr += num_b
else:
# Load conv. bias
num_b = conv_layer.bias.numel()
conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
conv_layer.bias.data.copy_(conv_b)
ptr += num_b
# Load conv. weights
num_w = conv_layer.weight.numel()
conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
conv_layer.weight.data.copy_(conv_w)
ptr += num_w
"""
@:param path - path of the new weights file
@:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
"""
def save_weights(self, path, cutoff=-1):
fp = open(path, 'wb')
self.header_info[3] = self.seen # number of images seen during training
self.header_info.tofile(fp)
# Iterate through layers
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
if module_def['type'] == 'convolutional':
conv_layer = module[0]
# If batch norm, load bn first
if module_def['batch_normalize']:
bn_layer = module[1]
bn_layer.bias.data.cpu().numpy().tofile(fp)
bn_layer.weight.data.cpu().numpy().tofile(fp)
bn_layer.running_mean.data.cpu().numpy().tofile(fp)
bn_layer.running_var.data.cpu().numpy().tofile(fp)
# Load conv bias
else:
conv_layer.bias.data.cpu().numpy().tofile(fp)
# Load conv weights
conv_layer.weight.data.cpu().numpy().tofile(fp)
fp.close()

264
test.py Normal file
View file

@ -0,0 +1,264 @@
import argparse
import json
import time
from pathlib import Path
from sklearn import metrics
from scipy import interpolate
import torch.nn.functional as F
from models import *
from utils.utils import *
from torchvision.transforms import transforms as T
from utils.datasets import LoadImagesAndLabels, JointDataset, collate_fn
def test(
cfg,
data_cfg,
weights,
batch_size=16,
img_size=416,
iou_thres=0.5,
conf_thres=0.3,
nms_thres=0.45,
print_interval=40,
nID=14455,
):
# Configure run
f = open(data_cfg)
data_cfg_dict = json.load(f)
f.close()
#nC = int(data_cfg_dict['classes']) # number of classes (80 for COCO)
nC = 1
test_path = data_cfg_dict['test']
# Initialize model
model = Darknet(cfg, img_size, nID)
# Load weights
if weights.endswith('.pt'): # pytorch format
model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False)
else: # darknet format
load_darknet_weights(model, weights)
model = torch.nn.DataParallel(model)
model.cuda().eval()
# Get dataloader
transforms = T.Compose([T.ToTensor()])
dataset = JointDataset(test_path, img_size, augment=False, transforms=transforms)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False,
num_workers=8, drop_last=False, collate_fn=collate_fn)
mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0
print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \
[], [], [], [], [], [], [], [], []
AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC)
coco91class = coco80_to_coco91_class()
for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader):
t = time.time()
output = model(imgs.cuda())
output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres)
for i, o in enumerate(output):
if o is not None:
output[i] = o[:, :6]
# Compute average precision for each sample
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
for si, (labels, detections) in enumerate(zip(targets, output)):
seen += 1
if detections is None:
# If there are labels but no detections mark as zero AP
if labels.size(0) != 0:
mAPs.append(0), mR.append(0), mP.append(0)
continue
# Get detections sorted by decreasing confidence scores
detections = detections.cpu().numpy()
detections = detections[np.argsort(-detections[:, 4])]
# If no labels add number of detections as incorrect
correct = []
if labels.size(0) == 0:
# correct.extend([0 for _ in range(len(detections))])
mAPs.append(0), mR.append(0), mP.append(0)
continue
else:
target_cls = labels[:, 0]
# Extract target boxes as (x1, y1, x2, y2)
target_boxes = xywh2xyxy(labels[:, 2:6])
target_boxes[:, 0] *= img_size[0]
target_boxes[:, 2] *= img_size[0]
target_boxes[:, 1] *= img_size[1]
target_boxes[:, 3] *= img_size[1]
detected = []
for *pred_bbox, conf, obj_conf in detections:
obj_pred = 0
pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
# Compute iou with target boxes
iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0]
# Extract index of largest overlap
best_i = np.argmax(iou)
# If overlap exceeds threshold and classification is correct mark as correct
if iou[best_i] > iou_thres and obj_pred == labels[best_i, 0] and best_i not in detected:
correct.append(1)
detected.append(best_i)
else:
correct.append(0)
# Compute Average Precision (AP) per class
AP, AP_class, R, P = ap_per_class(tp=correct,
conf=detections[:, 4],
pred_cls=np.zeros_like(detections[:, 5]), # detections[:, 6]
target_cls=target_cls)
# Accumulate AP per class
AP_accum_count += np.bincount(AP_class, minlength=nC)
AP_accum += np.bincount(AP_class, minlength=nC, weights=AP)
# Compute mean AP across all classes in this image, and append to image list
mAPs.append(AP.mean())
mR.append(R.mean())
mP.append(P.mean())
# Means of all images
mean_mAP = np.sum(mAPs) / ( AP_accum_count + 1E-16)
mean_R = np.sum(mR) / ( AP_accum_count + 1E-16)
mean_P = np.sum(mP) / (AP_accum_count + 1E-16)
if batch_i % print_interval==0:
# Print image mAP and running mean mAP
print(('%11s%11s' + '%11.3g' * 4 + 's') %
(seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t))
# Print mAP per class
print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16)))
# Return mAP
return mean_mAP, mean_R, mean_P
def test_emb(
cfg,
data_cfg,
weights,
batch_size=16,
img_size=416,
iou_thres=0.5,
conf_thres=0.3,
nms_thres=0.45,
print_interval=40,
nID=14455,
):
# Configure run
f = open(data_cfg)
data_cfg_dict = json.load(f)
f.close()
test_paths = data_cfg_dict['test_emb']
# Initialize model
model = Darknet(cfg, img_size, nID, test_emb=True)
# Load weights
if weights.endswith('.pt'): # pytorch format
model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False)
else: # darknet format
load_darknet_weights(model, weights)
model = torch.nn.DataParallel(model)
model.cuda().eval()
# Get dataloader
transforms = T.Compose([T.ToTensor()])
dataset = JointDataset(test_paths, img_size, augment=False, transforms=transforms)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False,
num_workers=8, drop_last=False, collate_fn=collate_fn)
embedding, id_labels = [], []
print('Extracting pedestrain features...')
for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader):
t = time.time()
output = model(imgs.cuda(), targets.cuda(), targets_len.cuda()).squeeze()
for out in output:
feat, label = out[:-1], out[-1].long()
if label != -1:
embedding.append(feat)
id_labels.append(label)
if batch_i % print_interval==0:
print('Extracting {}/{}, # of instances {}, time {:.2f} sec.'.format(batch_i, len(dataloader), len(id_labels), time.time() - t))
print('Computing pairwise similairity...')
if len(embedding) <1 :
return None
embedding = torch.stack(embedding, dim=0).cuda()
id_labels = torch.LongTensor(id_labels)
n = len(id_labels)
print(n, len(embedding))
assert len(embedding) == n
embedding = F.normalize(embedding, dim=1)
pdist = torch.mm(embedding, embedding.t()).cpu().numpy()
gt = id_labels.expand(n,n).eq(id_labels.expand(n,n).t()).numpy()
up_triangle = np.where(np.triu(pdist)- np.eye(n)*pdist !=0)
pdist = pdist[up_triangle]
gt = gt[up_triangle]
far_levels = [ 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
far,tar,threshold = metrics.roc_curve(gt, pdist)
interp = interpolate.interp1d(far, tar)
tar_at_far = [interp(x) for x in far_levels]
for f,fa in enumerate(far_levels):
print('TPR@FAR={:.7f}: {:.4f}'.format(fa, tar_at_far[f]))
return tar_at_far
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='test.py')
parser.add_argument('--batch-size', type=int, default=40, help='size of each image batch')
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('--data-cfg', type=str, default='cfg/ccmcpe.json', help='data config')
parser.add_argument('--weights', type=str, default='weights/latest.pt', help='path to weights file')
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
parser.add_argument('--img-size', type=int, default=(1088, 608), help='size of each image dimension')
parser.add_argument('--print-interval', type=int, default=10, help='size of each image dimension')
parser.add_argument('--test-emb', action='store_true', help='test embedding')
opt = parser.parse_args()
print(opt, end='\n\n')
with torch.no_grad():
if opt.test_emb:
res = test_emb(
opt.cfg,
opt.data_cfg,
opt.weights,
opt.batch_size,
opt.img_size,
opt.iou_thres,
opt.conf_thres,
opt.nms_thres,
opt.print_interval,
)
else:
mAP = test(
opt.cfg,
opt.data_cfg,
opt.weights,
opt.batch_size,
opt.img_size,
opt.iou_thres,
opt.conf_thres,
opt.nms_thres,
opt.print_interval,
)

170
track.py Normal file
View file

@ -0,0 +1,170 @@
import os
import os.path as osp
import cv2
import logging
import argparse
import motmetrics as mm
from tracker.mot_tracker_kalman import AETracker
from utils import visualization as vis
from utils.log import logger
from utils.timer import Timer
from utils.evaluation import Evaluator
import utils.datasets as datasets
import torch
def mkdirs(path):
if os.path.exists(path):
return
os.makedirs(path)
def write_results(filename, results, data_type):
if data_type == 'mot':
save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
elif data_type == 'kitti':
save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
else:
raise ValueError(data_type)
with open(filename, 'w') as f:
for frame_id, tlwhs, track_ids in results:
if data_type == 'kitti':
frame_id -= 1
for tlwh, track_id in zip(tlwhs, track_ids):
if track_id < 0:
continue
x1, y1, w, h = tlwh
x2, y2 = x1 + w, y1 + h
line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
f.write(line)
logger.info('save results to {}'.format(filename))
def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30):
if save_dir is not None:
mkdirs(save_dir)
tracker = AETracker(opt, frame_rate=frame_rate)
timer = Timer()
results = []
frame_id = 0
for path, img, img0 in dataloader:
if frame_id % 20 == 0:
logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1./max(1e-5, timer.average_time)))
# run tracking
timer.tic()
blob = torch.from_numpy(img).cuda().unsqueeze(0)
online_targets = tracker.update(blob, img0)
online_tlwhs = []
online_ids = []
for t in online_targets:
tlwh = t.tlwh
tid = t.track_id
vertical = tlwh[2] / tlwh[3] > 1.6
if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
online_tlwhs.append(tlwh)
online_ids.append(tid)
timer.toc()
# save results
results.append((frame_id + 1, online_tlwhs, online_ids))
if show_image or save_dir is not None:
online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id,
fps=1. / timer.average_time)
if show_image:
cv2.imshow('online_im', online_im)
if save_dir is not None:
cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im)
frame_id += 1
# save results
write_results(result_filename, results, data_type)
return frame_id
def main(opt, data_root='/data/MOT16/train', det_root=None,
seqs=('MOT16-05',), exp_name='demo', save_image=False, show_image=True):
logger.setLevel(logging.INFO)
result_root = os.path.join(data_root, '..', 'results', exp_name)
mkdirs(result_root)
data_type = 'mot'
# run tracking
timer = Timer()
accs = []
n_frame = 0
timer.tic()
for seq in seqs:
output_dir = os.path.join(data_root, '..','outputs', exp_name, seq) if save_image else None
logger.info('start seq: {}'.format(seq))
dataloader = datasets.LoadImages(osp.join(data_root, seq, 'img1'), opt.img_size)
result_filename = os.path.join(result_root, '{}.txt'.format(seq))
meta_info = open(os.path.join(data_root, seq, 'seqinfo.ini')).read()
frame_rate = int(meta_info[meta_info.find('frameRate')+10:meta_info.find('\nseqLength')])
n_frame += eval_seq(opt, dataloader, data_type, result_filename,
save_dir=output_dir, show_image=show_image, frame_rate=frame_rate)
# eval
logger.info('Evaluate seq: {}'.format(seq))
evaluator = Evaluator(data_root, seq, data_type)
accs.append(evaluator.eval_file(result_filename))
timer.toc()
logger.info('Time elapsed: {}, FPS {}'.format(timer.average_time, n_frame / timer.average_time))
# get summary
# metrics = ['mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall']
metrics = mm.metrics.motchallenge_metrics
mh = mm.metrics.create()
summary = Evaluator.get_summary(accs, seqs, metrics)
strsummary = mm.io.render_summary(
summary,
formatters=mh.formatters,
namemap=mm.io.motchallenge_metric_names
)
print(strsummary)
Evaluator.save_summary(summary, os.path.join(result_root, 'summary_{}.xlsx'.format(exp_name)))
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='test.py')
parser.add_argument('--batch-size', type=int, default=8, help='size of each image batch')
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('--weights', type=str, default='weights/latest.pt', help='path to weights file')
parser.add_argument('--img-size', type=int, default=(864,480), help='size of each image dimension')
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.4, help='iou threshold for non-maximum suppression')
parser.add_argument('--min-box-area', type=float, default=200, help='filter out tiny boxes')
parser.add_argument('--pixel-mean', type=float, default=[0,0,0], nargs='+', help='pixel mean')
parser.add_argument('--track-buffer', type=int, default=30, help='tracking buffer')
parser.add_argument('--test-mot16', action='store_true', help='tracking buffer')
parser.add_argument('--save-images', action='store_true', help='save tracking results')
opt = parser.parse_args()
print(opt, end='\n\n')
if not opt.test_mot16:
seqs_str = '''CVPR19-01
CVPR19-02
CVPR19-03
CVPR19-05'''
data_root = '/home/wangzd/datasets/MOT/MOT19/train'
else:
seqs_str = '''MOT16-01
MOT16-03
MOT16-06
MOT16-07
MOT16-08
MOT16-12
MOT16-14'''
#seqs_str = 'MOT16-14'
data_root = '/home/wangzd/datasets/MOT/MOT16/test'
seqs = [seq.strip() for seq in seqs_str.split()]
main(opt,
data_root=data_root,
seqs=seqs,
exp_name='darknet53.864x480',
show_image=False,
save_image=opt.save_images)

0
tracker/__init__.py Normal file
View file

53
tracker/basetrack.py Normal file
View file

@ -0,0 +1,53 @@
import numpy as np
from collections import OrderedDict
class TrackState(object):
New = 0
Tracked = 1
Lost = 2
Removed = 3
class BaseTrack(object):
_count = 0
track_id = 0
is_activated = False
state = TrackState.New
history = OrderedDict()
features = []
curr_feature = None
score = 0
start_frame = 0
frame_id = 0
time_since_update = 0
# multi-camera
location = (np.inf, np.inf)
@property
def end_frame(self):
return self.frame_id
@staticmethod
def next_id():
BaseTrack._count += 1
return BaseTrack._count
def activate(self, *args):
raise NotImplementedError
def predict(self):
raise NotImplementedError
def update(self, *args, **kwargs):
raise NotImplementedError
def mark_lost(self):
self.state = TrackState.Lost
def mark_removed(self):
self.state = TrackState.Removed

181
tracker/detector.py Normal file
View file

@ -0,0 +1,181 @@
import numpy as np
from numba import jit
from collections import deque
import itertools
import os
import os.path as osp
import time
import torch
from lib.utils.log import logger
from lib.tracker import matching
from lib.utils.kalman_filter import KalmanFilter
from lib.model.faster_rcnn.resnet import resnet_deploy
from lib.model.utils.config import cfg
from lib.model.rpn.bbox_transform import clip_boxes, bbox_transform_inv
from lib.model.nms.nms_wrapper import nms
from .basetrack import BaseTrack, TrackState
class STrack(BaseTrack):
def __init__(self, tlwh, score, temp_feat):
# wait activate
self._tlwh = np.asarray(tlwh, dtype=np.float)
self.is_activated = False
self.score = score
self.tracklet_len = 0
self.temp_feat = temp_feat
def activate(self, frame_id):
"""Start a new tracklet"""
self.track_id = self.next_id()
self.time_since_update = 0
self.tracklet_len = 0
self.state = TrackState.Tracked
#self.is_activated = True
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
self._tlwh = new_track.tlwh
self.temp_feat = new_track.temp_feat
self.time_since_update = 0
self.tracklet_len = 0
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
def update(self, new_track, frame_id, update_feature=True):
"""
Update a matched track
:type new_track: STrack
:type frame_id: int
:type update_feature: bool
:return:
"""
self.frame_id = frame_id
self.time_since_update = 0
self.tracklet_len += 1
self._tlwh = new_track.tlwh
self.state = TrackState.Tracked
self.is_activated = True
self.score = new_track.score
if update_feature:
self.temp_feat = new_track.temp_feat
@property
@jit
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
return self._tlwh.copy()
@property
@jit
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
@jit
def tlbr_to_tlwh(tlbr):
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
@jit
def tlwh_to_tlbr(tlwh):
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
class JDTracker(object):
def __init__(self, checksession=3, checkepoch=24, checkpoint=663, det_thresh=0.92, frame_rate=30):
self.classes = np.asarray(['__background__', 'pedestrian'])
self.fasterRCNN = resnet_deploy(self.classes, 101, pretrained=False, class_agnostic=False)
self.fasterRCNN.create_architecture()
input_dir = osp.join('models', 'res101', 'mot17det')
if not os.path.exists(input_dir):
raise Exception('There is no input directory for loading network from ' + input_dir)
load_name = os.path.join(input_dir,
'faster_rcnn_{}_{}_{}.pth'.format(checksession, checkepoch, checkpoint))
print("load checkpoint %s" % (load_name))
checkpoint = torch.load(load_name)
self.fasterRCNN.load_state_dict(checkpoint['model'], strict=False)
print('load model successfully!')
self.fasterRCNN.cuda()
self.fasterRCNN.eval()
self.frame_id = 0
self.det_thresh = det_thresh
self.buffer_size = int(frame_rate / 30.0 * cfg.TRACKING_BUFFER_SIZE)
self.max_time_lost = self.buffer_size
#self.fmap_buffer = deque([], maxlen=self.buffer_size)
def update(self, im_blob):
self.frame_id += 1
'''Forward'''
im_blob = im_blob.cuda()
im_info = torch.Tensor([[im_blob.shape[1], im_blob.shape[2], 1, ],]).float().cuda()
self.im_info = im_info
boxes, temp_feat, base_feat = self.predict(im_blob, im_info)
'''Detections'''
detections = [STrack(STrack.tlbr_to_tlwh((t, l, b, r)), s, f) for (t, l, b, r, s), f in zip(boxes, temp_feat)]
return detections
def predict(self, im_blob, im_info):
im_blob = im_blob.permute(0,3,1,2)
# Trivial input
gt_boxes = torch.zeros(1, 1, 6).to(im_blob)
num_boxes = gt_boxes[:, :, 0].squeeze()
with torch.no_grad():
rois, cls_prob, bbox_pred, base_feat = self.fasterRCNN(im_blob, im_info, gt_boxes, num_boxes)
scores = cls_prob.data
inds_first = torch.nonzero(scores[0, :, 1] > self.det_thresh).view(-1)
if inds_first.numel() > 0:
rois = rois[:, inds_first]
scores = scores[:,inds_first]
bbox_pred = bbox_pred[:, inds_first]
refined_rois = self.fasterRCNN.bbox_refine(rois, bbox_pred, im_info)
template_feat = self.fasterRCNN.roi_pool(base_feat, refined_rois)
pred_boxes = refined_rois.data[:, :, 1:5]
cls_scores = scores[0, :, 1]
_, order = torch.sort(cls_scores, 0, True)
cls_boxes = pred_boxes[0]
cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
cls_dets = cls_dets[order]
temp_feat = template_feat[order]
keep_first = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS).view(-1).long()
cls_dets = cls_dets[keep_first]
temp_feat = temp_feat[keep_first]
output_box = cls_dets.cpu().numpy()
else:
output_box = []
temp_feat = []
return output_box, temp_feat, base_feat

141
tracker/matching.py Normal file
View file

@ -0,0 +1,141 @@
import cv2
import numpy as np
import scipy
from scipy.spatial.distance import cdist
from sklearn.utils import linear_assignment_
from utils.cython_bbox import bbox_ious
from utils import kalman_filter
import time
def merge_matches(m1, m2, shape):
O,P,Q = shape
m1 = np.asarray(m1)
m2 = np.asarray(m2)
M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
mask = M1*M2
match = mask.nonzero()
match = list(zip(match[0], match[1]))
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
return match, unmatched_O, unmatched_Q
def _indices_to_matches(cost_matrix, indices, thresh):
matched_cost = cost_matrix[tuple(zip(*indices))]
matched_mask = (matched_cost <= thresh)
matches = indices[matched_mask]
unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
return matches, unmatched_a, unmatched_b
def linear_assignment(cost_matrix, thresh):
"""
Simple linear assignment
:type cost_matrix: np.ndarray
:type thresh: float
:return: matches, unmatched_a, unmatched_b
"""
if cost_matrix.size == 0:
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
cost_matrix[cost_matrix > thresh] = thresh + 1e-4
indices = linear_assignment_.linear_assignment(cost_matrix)
return _indices_to_matches(cost_matrix, indices, thresh)
def ious(atlbrs, btlbrs):
"""
Compute cost based on IoU
:type atlbrs: list[tlbr] | np.ndarray
:type atlbrs: list[tlbr] | np.ndarray
:rtype ious np.ndarray
"""
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
if ious.size == 0:
return ious
ious = bbox_ious(
np.ascontiguousarray(atlbrs, dtype=np.float),
np.ascontiguousarray(btlbrs, dtype=np.float)
)
return ious
def iou_distance(atracks, btracks):
"""
Compute cost based on IoU
:type atracks: list[STrack]
:type btracks: list[STrack]
:rtype cost_matrix np.ndarray
"""
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
_ious = ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
#def embedding_distance(tracks, detections, metric='cosine'):
# """
# :param tracks: list[STrack]
# :param detections: list[BaseTrack]
# :param metric:
# :return: cost_matrix np.ndarray
# """
#
# cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
# if cost_matrix.size == 0:
# return cost_matrix
# det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
# for i, track in enumerate(tracks):
# #cost_matrix[i, :] = np.maximum(0.0, cdist(track.features, det_features, metric).min(axis=0))
# cost_matrix[i, :] = np.maximum(0.0, cdist(track.features, det_features, metric).min(axis=0))
# return cost_matrix
def embedding_distance(tracks, detections, metric='cosine'):
"""
:param tracks: list[STrack]
:param detections: list[BaseTrack]
:param metric:
:return: cost_matrix np.ndarray
"""
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
for i, track in enumerate(tracks):
cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
return cost_matrix
def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray([det.to_xyah() for det in detections])
for row, track in enumerate(tracks):
gating_distance = kf.gating_distance(
track.mean, track.covariance, measurements, only_position)
cost_matrix[row, gating_distance > gating_threshold] = np.inf
return cost_matrix

473
tracker/mot_tracker.py Normal file
View file

@ -0,0 +1,473 @@
import numpy as np
from numba import jit
from collections import deque
import itertools
import os
import os.path as osp
import time
import torch
from utils.utils import *
from utils.log import logger
from models import *
from tracker import matching
from .basetrack import BaseTrack, TrackState
class STrack(BaseTrack):
def __init__(self, tlwh, score, temp_feat, buffer_size=30):
# wait activate
self._tlwh = np.asarray(tlwh, dtype=np.float)
self.is_activated = False
self.score = score
self.tracklet_len = 0
self.smooth_feat = None
self.update_features(temp_feat)
self.features = deque([], maxlen=buffer_size)
def update_features(self, feat):
print(1)
self.curr_feat = feat
if self.smooth_feat is None:
self.smooth_feat = feat
else:
self.smooth_feat = 0.9 *self.smooth_feat + 0.1 * feat
self.features.append(temp_feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def activate(self, frame_id):
"""Start a new tracklet"""
self.track_id = self.next_id()
self.time_since_update = 0
self.tracklet_len = 0
self.state = TrackState.Tracked
#self.is_activated = True
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
self._tlwh = new_track.tlwh
#self.features.append(new_track.curr_feat)
self.update_features(new_track.curr_feat)
self.time_since_update = 0
self.tracklet_len = 0
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
def update(self, new_track, frame_id, update_feature=True):
"""
Update a matched track
:type new_track: STrack
:type frame_id: int
:type update_feature: bool
:return:
"""
self.frame_id = frame_id
self.time_since_update = 0
self.tracklet_len += 1
self._tlwh = new_track.tlwh
self.state = TrackState.Tracked
self.is_activated = True
self.score = new_track.score
if update_feature:
#self.features.append( new_track.curr_feat)
self.update_features(new_track.curr_feat)
@property
@jit
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
return self._tlwh.copy()
@property
@jit
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
@jit
def tlbr_to_tlwh(tlbr):
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
@jit
def tlwh_to_tlbr(tlwh):
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
class IOUTracker(object):
def __init__(self, opt, frame_rate=30):
self.opt = opt
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
#load_darknet_weights(self.model, opt.weights)
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'])
self.model.cuda().eval()
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
self.frame_id = 0
self.det_thresh = opt.conf_thres
self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer)
self.max_time_lost = self.buffer_size
#self.fmap_buffer = deque([], maxlen=self.buffer_size)
def update(self, im_blob, img0):
self.frame_id += 1
activated_starcks = []
refind_stracks = []
lost_stracks = []
removed_stracks = []
t1 = time.time()
'''Forward'''
with torch.no_grad():
pred = self.model(im_blob)
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
if len(pred) > 0:
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0]
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
'''Detections'''
detections = [STrack(STrack.tlbr_to_tlwh((t, l, b, r)), s, None) for (t, l, b, r, s) in dets[:, :5]]
else:
detections = []
t2 = time.time()
#print('Forward: {} s'.format(t2-t1))
'''matching for tracked targets'''
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_stracks.append(track)
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
#dists = self.track_matching(strack_pool, detections, base_feat)
dists = matching.iou_distance(strack_pool, detections)
#dists[np.where(iou_dists>0.4)] = 1.0
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
track = strack_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
t3 = time.time()
#print('First match {} s'.format(t3-t2))
#'''Remained det/track, use IOU between dets and tracks to associate directly'''
#detections = [detections[i] for i in u_detection]
#r_tracked_stracks = [strack_pool[i] for i in u_track ]
#dists = matching.iou_distance(r_tracked_stracks, detections)
#matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
#for itracked, idet in matches:
# r_tracked_stracks[itracked].update(detections[idet], self.frame_id)
for it in u_track:
track = strack_pool[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_stracks.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = matching.iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_stracks.append(track)
"""step 4: init new stracks"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_starcks.append(track)
"""step 6: update state"""
for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
t4 = time.time()
#print('Ramained match {} s'.format(t4-t3))
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
#self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
self.removed_stracks.extend(removed_stracks)
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
# get scores of lost tracks
output_stracks = [track for track in self.tracked_stracks if track.is_activated]
logger.debug('===========Frame {}=========='.format(self.frame_id))
logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
t5 = time.time()
#print('Final {} s'.format(t5-t4))
return output_stracks
class AETracker(object):
def __init__(self, opt, frame_rate=30):
self.opt = opt
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
# load_darknet_weights(self.model, opt.weights)
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'])
self.model.cuda().eval()
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
self.frame_id = 0
self.det_thresh = opt.conf_thres
self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer)
self.max_time_lost = self.buffer_size
def update(self, im_blob, img0):
self.frame_id += 1
activated_starcks = []
refind_stracks = []
lost_stracks = []
removed_stracks = []
t1 = time.time()
'''Forward'''
with torch.no_grad():
pred = self.model(im_blob)
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
if len(pred) > 0:
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu()
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
'''Detections'''
detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for
(tlbrs, f) in zip(dets[:, :5], dets[:, -self.model.emb_dim:])]
else:
detections = []
t2 = time.time()
# print('Forward: {} s'.format(t2-t1))
'''matching for tracked targets'''
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_stracks.append(track)
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
#strack_pool = tracked_stracks
dists = matching.embedding_distance(strack_pool, detections)
iou_dists = matching.iou_distance(strack_pool, detections)
dists[np.where(iou_dists>0.99)] = 1.0
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
track = strack_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
# detections = [detections[i] for i in u_detection]
# dists = matching.embedding_distance(self.lost_stracks, detections)
# iou_dists = matching.iou_distance(self.lost_stracks, detections)
# dists[np.where(iou_dists>0.7)] = 1.0
#
# matches, u_track_lost, u_detection = matching.linear_assignment(dists, thresh=0.7)
#
# for itracked, idet in matches:
# track = self.lost_stracks[itracked]
# det = detections[idet]
# if track.state == TrackState.Tracked:
# track.update(detections[idet], self.frame_id)
# activated_starcks.append(track)
# else:
# track.re_activate(det, self.frame_id, new_id=False)
# refind_stracks.append(track)
'''Remained det/track, use IOU between dets and tracks to associate directly'''
detections = [detections[i] for i in u_detection]
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state==TrackState.Tracked ]
r_lost_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state!=TrackState.Tracked ]
dists = matching.iou_distance(r_tracked_stracks, detections)
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_stracks[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
for it in u_track:
track = r_tracked_stracks[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_stracks.append(track)
# '''Remained det/track, use IOU between dets and tracks to associate directly'''
# detections = [detections[i] for i in u_detection]
# dists = matching.iou_distance(r_lost_stracks, detections)
# matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.25)
#
# for itracked, idet in matches:
# track = r_lost_stracks[itracked]
# det = detections[idet]
# if track.state == TrackState.Tracked:
# track.update(det, self.frame_id)
# activated_starcks.append(track)
# else:
# track.re_activate(det, self.frame_id, new_id=False)
# refind_stracks.append(track)
#
# for it in u_track:
# track = r_lost_stracks[it]
# if not track.state == TrackState.Lost:
# track.mark_lost()
# lost_stracks.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = matching.iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_starcks.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_stracks.append(track)
"""step 4: init new stracks"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_starcks.append(track)
"""step 6: update state"""
for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
t4 = time.time()
# print('Ramained match {} s'.format(t4-t3))
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
# self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
self.removed_stracks.extend(removed_stracks)
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
# get scores of lost tracks
output_stracks = [track for track in self.tracked_stracks if track.is_activated]
logger.debug('===========Frame {}=========='.format(self.frame_id))
logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
t5 = time.time()
# print('Final {} s'.format(t5-t4))
return output_stracks
def joint_stracks(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_stracks(tlista, tlistb):
stracks = {}
for t in tlista:
stracks[t.track_id] = t
for t in tlistb:
tid = t.track_id
if stracks.get(tid, 0):
del stracks[tid]
return list(stracks.values())
def remove_duplicate_stracks(stracksa, stracksb):
pdist = matching.iou_distance(stracksa, stracksb)
pairs = np.where(pdist<0.15)
dupa, dupb = list(), list()
for p,q in zip(*pairs):
timep = stracksa[p].frame_id - stracksa[p].start_frame
timeq = stracksb[q].frame_id - stracksb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i,t in enumerate(stracksa) if not i in dupa]
resb = [t for i,t in enumerate(stracksb) if not i in dupb]
return resa, resb

View file

@ -0,0 +1,466 @@
import numpy as np
from numba import jit
from collections import deque
import itertools
import os
import os.path as osp
import time
import torch
from utils.utils import *
from utils.log import logger
from utils.kalman_filter import KalmanFilter
from models import *
from tracker import matching
from .basetrack import BaseTrack, TrackState
class STrack(BaseTrack):
def __init__(self, tlwh, score, temp_feat, buffer_size=30):
# wait activate
self._tlwh = np.asarray(tlwh, dtype=np.float)
self.kalman_filter = None
self.mean, self.covariance = None, None
self.is_activated = False
self.score = score
self.tracklet_len = 0
self.smooth_feat = None
self.update_features(temp_feat)
self.features = deque([], maxlen=buffer_size)
self.alpha = 0.9
def update_features(self, feat):
self.curr_feat = feat
if self.smooth_feat is None:
self.smooth_feat = feat
else:
self.smooth_feat = self.alpha *self.smooth_feat + (1-self.alpha) * feat
self.features.append(feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def predict(self):
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[7] = 0
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
def activate(self, kalman_filter, frame_id):
"""Start a new tracklet"""
self.kalman_filter = kalman_filter
self.track_id = self.next_id()
self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh))
self.tracklet_len = 0
self.state = TrackState.Tracked
#self.is_activated = True
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
)
self.update_features(new_track.curr_feat)
self.tracklet_len = 0
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
def update(self, new_track, frame_id, update_feature=True):
"""
Update a matched track
:type new_track: STrack
:type frame_id: int
:type update_feature: bool
:return:
"""
self.frame_id = frame_id
self.tracklet_len += 1
new_tlwh = new_track.tlwh
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.score = new_track.score
if update_feature:
self.update_features(new_track.curr_feat)
@property
@jit
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
@property
@jit
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
@jit
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
def to_xyah(self):
return self.tlwh_to_xyah(self.tlwh)
@staticmethod
@jit
def tlbr_to_tlwh(tlbr):
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
@jit
def tlwh_to_tlbr(tlwh):
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
class IOUTracker(object):
def __init__(self, opt, frame_rate=30):
self.opt = opt
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
#load_darknet_weights(self.model, opt.weights)
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'])
self.model.cuda().eval()
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
self.frame_id = 0
self.det_thresh = opt.conf_thres
self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer)
self.max_time_lost = self.buffer_size
#self.fmap_buffer = deque([], maxlen=self.buffer_size)
def update(self, im_blob, img0):
self.frame_id += 1
activated_starcks = []
refind_stracks = []
lost_stracks = []
removed_stracks = []
t1 = time.time()
'''Forward'''
with torch.no_grad():
pred = self.model(im_blob)
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
if len(pred) > 0:
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0]
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
'''Detections'''
detections = [STrack(STrack.tlbr_to_tlwh((t, l, b, r)), s, None) for (t, l, b, r, s) in dets[:, :5]]
else:
detections = []
t2 = time.time()
#print('Forward: {} s'.format(t2-t1))
'''matching for tracked targets'''
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_stracks.append(track)
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
#dists = self.track_matching(strack_pool, detections, base_feat)
dists = matching.iou_distance(strack_pool, detections)
#dists[np.where(iou_dists>0.4)] = 1.0
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
track = strack_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
t3 = time.time()
#print('First match {} s'.format(t3-t2))
#'''Remained det/track, use IOU between dets and tracks to associate directly'''
#detections = [detections[i] for i in u_detection]
#r_tracked_stracks = [strack_pool[i] for i in u_track ]
#dists = matching.iou_distance(r_tracked_stracks, detections)
#matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
#for itracked, idet in matches:
# r_tracked_stracks[itracked].update(detections[idet], self.frame_id)
for it in u_track:
track = strack_pool[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_stracks.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = matching.iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_stracks.append(track)
"""step 4: init new stracks"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.kalman_filter, self.frame_id)
activated_starcks.append(track)
"""step 6: update state"""
for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
t4 = time.time()
#print('Ramained match {} s'.format(t4-t3))
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
#self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
self.removed_stracks.extend(removed_stracks)
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
# get scores of lost tracks
output_stracks = [track for track in self.tracked_stracks if track.is_activated]
logger.debug('===========Frame {}=========='.format(self.frame_id))
logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
t5 = time.time()
#print('Final {} s'.format(t5-t4))
return output_stracks
class AETracker(object):
def __init__(self, opt, frame_rate=30):
self.opt = opt
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
# load_darknet_weights(self.model, opt.weights)
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'])
self.model.cuda().eval()
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
self.frame_id = 0
self.det_thresh = opt.conf_thres
self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer)
self.max_time_lost = self.buffer_size
self.kalman_filter = KalmanFilter()
def update(self, im_blob, img0):
self.frame_id += 1
activated_starcks = []
refind_stracks = []
lost_stracks = []
removed_stracks = []
t1 = time.time()
''' Step 1: Network forward, get detections & embeddings'''
with torch.no_grad():
pred = self.model(im_blob)
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
if len(pred) > 0:
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu()
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
'''Detections'''
detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for
(tlbrs, f) in zip(dets[:, :5], dets[:, -self.model.emb_dim:])]
else:
detections = []
t2 = time.time()
# print('Forward: {} s'.format(t2-t1))
''' Add newly detected tracklets to tracked_stracks'''
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_stracks.append(track)
''' Step 2: First association, with embedding'''
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
# Predict the current location with KF
for strack in strack_pool:
strack.predict()
dists = matching.embedding_distance(strack_pool, detections)
dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
track = strack_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
''' Step 3: Second association, with IOU'''
detections = [detections[i] for i in u_detection]
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state==TrackState.Tracked ]
dists = matching.iou_distance(r_tracked_stracks, detections)
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_stracks[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
for it in u_track:
track = r_tracked_stracks[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_stracks.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = matching.iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_starcks.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_stracks.append(track)
""" Step 4: Init new stracks"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.kalman_filter, self.frame_id)
activated_starcks.append(track)
""" Step 5: Update state"""
for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
t4 = time.time()
# print('Ramained match {} s'.format(t4-t3))
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
# self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
self.removed_stracks.extend(removed_stracks)
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
# get scores of lost tracks
output_stracks = [track for track in self.tracked_stracks if track.is_activated]
logger.debug('===========Frame {}=========='.format(self.frame_id))
logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
t5 = time.time()
# print('Final {} s'.format(t5-t4))
return output_stracks
def joint_stracks(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_stracks(tlista, tlistb):
stracks = {}
for t in tlista:
stracks[t.track_id] = t
for t in tlistb:
tid = t.track_id
if stracks.get(tid, 0):
del stracks[tid]
return list(stracks.values())
def remove_duplicate_stracks(stracksa, stracksb):
pdist = matching.iou_distance(stracksa, stracksb)
pairs = np.where(pdist<0.15)
dupa, dupb = list(), list()
for p,q in zip(*pairs):
timep = stracksa[p].frame_id - stracksa[p].start_frame
timeq = stracksb[q].frame_id - stracksb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i,t in enumerate(stracksa) if not i in dupa]
resb = [t for i,t in enumerate(stracksb) if not i in dupb]
return resa, resb

198
train.py Normal file
View file

@ -0,0 +1,198 @@
import argparse
import json
import time
import test # Import test.py to get mAP after each epoch
from models import *
from utils.datasets import JointDataset, collate_fn
from utils.utils import *
from torchvision.transforms import transforms as T
def train(
cfg,
data_cfg,
img_size=416,
resume=False,
epochs=100,
batch_size=16,
accumulated_batches=1,
freeze_backbone=False,
var=0,
opt=None,
):
weights = 'weights' + os.sep
latest = weights + 'latest.pt'
best = weights + 'best.pt'
device = torch_utils.select_device()
torch.backends.cudnn.benchmark = True # unsuitable for multiscale
# Configure run
f = open(data_cfg)
trainset_paths = json.load(f)['train']
f.close()
transforms = T.Compose([T.ToTensor()])
# Get dataloader
dataset = JointDataset(trainset_paths, img_size, augment=True, transforms=transforms)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,
num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn)
# Initialize model
model = Darknet(cfg, img_size, dataset.nID)
lr0 = opt.lr
cutoff = -1 # backbone reaches to cutoff layer
start_epoch = 0
best_loss = float('inf')
if resume:
checkpoint = torch.load(latest, map_location='cpu')
# Load weights to resume from
model.load_state_dict(checkpoint['model'])
model.to(device).train()
# Set optimizer
optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9)
start_epoch = checkpoint['epoch'] + 1
if checkpoint['optimizer'] is not None:
optimizer.load_state_dict(checkpoint['optimizer'])
best_loss = checkpoint['best_loss']
del checkpoint # current, saved
else:
# Initialize model with backbone (optional)
if cfg.endswith('yolov3.cfg'):
load_darknet_weights(model, weights + 'darknet53.conv.74')
cutoff = 75
elif cfg.endswith('yolov3-tiny.cfg'):
load_darknet_weights(model, weights + 'yolov3-tiny.conv.15')
cutoff = 15
model.to(device).train()
# Set optimizer
optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9, weight_decay=1e-4)
model = torch.nn.DataParallel(model)
# Set scheduler
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
milestones=[int(0.5*opt.epochs), int(0.75*opt.epochs)], gamma=0.1)
# An important trick for detection: freeze bn during fine-tuning
if not opt.unfreeze_bn:
for i, (name, p) in enumerate(model.named_parameters()):
p.requires_grad = False if 'batch_norm' in name else True
model_info(model)
t0 = time.time()
for epoch in range(epochs):
epoch += start_epoch
print(('%8s%12s' + '%10s' * 6) % (
'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
# Update scheduler (automatic)
scheduler.step()
# Freeze darknet53.conv.74 for first epoch
if freeze_backbone and (epoch < 2):
for i, (name, p) in enumerate(model.named_parameters()):
if int(name.split('.')[2]) < cutoff: # if layer < 75
p.requires_grad = False if (epoch == 0) else True
ui = -1
rloss = defaultdict(float) # running loss
optimizer.zero_grad()
for i, (imgs, targets, _, _, targets_len) in enumerate(dataloader):
if sum([len(x) for x in targets]) < 1: # if no targets continue
continue
# SGD burn-in
burnin = min(1000, len(dataloader))
if (epoch == 0) & (i <= burnin):
lr = lr0 * (i / burnin) **4
for g in optimizer.param_groups:
g['lr'] = lr
# Compute loss, compute gradient, update parameters
loss, components = model(imgs.cuda(), targets.cuda(), targets_len.cuda())
components = torch.mean(components.view(4,-1),dim=0)
loss = torch.mean(loss)
loss.backward()
# accumulate gradient for x batches before optimizing
if ((i + 1) % accumulated_batches == 0) or (i == len(dataloader) - 1):
optimizer.step()
optimizer.zero_grad()
# Running epoch-means of tracked metrics
ui += 1
for ii, key in enumerate(model.module.loss_names):
rloss[key] = (rloss[key] * ui + components[ii]) / (ui + 1)
s = ('%8s%12s' + '%10.3g' * 6) % (
'%g/%g' % (epoch, epochs - 1),
'%g/%g' % (i, len(dataloader) - 1),
rloss['box'], rloss['conf'],
rloss['id'],rloss['loss'],
rloss['nT'], time.time() - t0)
t0 = time.time()
if i % opt.print_interval == 0:
print(s)
# Save latest checkpoint
checkpoint = {'epoch': epoch,
# 'best_loss': best_loss,
'model': model.module.state_dict(),
'optimizer': optimizer.state_dict()}
torch.save(checkpoint, latest)
# Calculate mAP
if epoch % opt.test_interval ==0:
with torch.no_grad():
mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type=int, default=30, help='number of epochs')
parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step')
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('--data-cfg', type=str, default='cfg/ccmcpe.json', help='coco.data file path')
parser.add_argument('--img-size', type=int, default=(1088, 608), help='pixels')
parser.add_argument('--resume', action='store_true', help='resume training flag')
parser.add_argument('--var', type=float, default=0, help='test variable')
parser.add_argument('--print-interval', type=int, default=40, help='print interval')
parser.add_argument('--test-interval', type=int, default=9, help='test interval')
parser.add_argument('--lr', type=float, default=1e-2, help='init lr')
parser.add_argument('--idw', type=float, default=0.1, help='loss id weight')
parser.add_argument('--unfreeze-bn', action='store_true', help='unfreeze bn')
opt = parser.parse_args()
print(opt, end='\n\n')
init_seeds()
train(
opt.cfg,
opt.data_cfg,
img_size=opt.img_size,
resume=opt.resume,
epochs=opt.epochs,
batch_size=opt.batch_size,
accumulated_batches=opt.accumulated_batches,
var=opt.var,
opt=opt,
)

362
utils/datasets.py Executable file
View file

@ -0,0 +1,362 @@
import glob
import math
import os
import random
import time
from collections import OrderedDict
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset
from utils.utils import xyxy2xywh
class LoadImages: # for inference
def __init__(self, path, img_size=(1088, 608)):
if os.path.isdir(path):
image_format = ['.jpg', '.jpeg', '.png', '.tif']
self.files = sorted(glob.glob('%s/*.*' % path))
self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))
elif os.path.isfile(path):
self.files = [path]
self.nF = len(self.files) # number of image files
self.width = img_size[0]
self.height = img_size[1]
self.count = 0
assert self.nF > 0, 'No images found in ' + path
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if self.count == self.nF:
raise StopIteration
img_path = self.files[self.count]
# Read image
img0 = cv2.imread(img_path) # BGR
assert img0 is not None, 'Failed to load ' + img_path
# Padded resize
img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
# Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1)
img = np.ascontiguousarray(img, dtype=np.float32)
img /= 255.0
# cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
return img_path, img, img0
def __getitem__(self, idx):
idx = idx % self.nF
img_path = self.files[idx]
# Read image
img0 = cv2.imread(img_path) # BGR
assert img0 is not None, 'Failed to load ' + img_path
# Padded resize
img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
# Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1)
img = np.ascontiguousarray(img, dtype=np.float32)
img /= 255.0
return img_path, img, img0
def __len__(self):
return self.nF # number of files
class LoadImagesAndLabels: # for training
def __init__(self, path, img_size=(1088,608), augment=False, transforms=None):
with open(path, 'r') as file:
self.img_files = file.readlines()
self.img_files = [x.replace('\n', '') for x in self.img_files]
self.img_files = list(filter(lambda x: len(x) > 0, self.img_files))
self.label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')
for x in self.img_files]
self.nF = len(self.img_files) # number of image files
self.width = img_size[0]
self.height = img_size[1]
self.augment = augment
self.transforms = transforms
def __getitem__(self, files_index):
img_path = self.img_files[files_index]
label_path = self.label_files[files_index]
return self.get_data(img_path, label_path)
def get_data(self, img_path, label_path):
height = self.height
width = self.width
img = cv2.imread(img_path) # BGR
if img is None:
raise ValueError('File corrupt {}'.format(img_path))
augment_hsv = True
if self.augment and augment_hsv:
# SV augmentation by 50%
fraction = 0.50
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
S = img_hsv[:, :, 1].astype(np.float32)
V = img_hsv[:, :, 2].astype(np.float32)
a = (random.random() * 2 - 1) * fraction + 1
S *= a
if a > 1:
np.clip(S, a_min=0, a_max=255, out=S)
a = (random.random() * 2 - 1) * fraction + 1
V *= a
if a > 1:
np.clip(V, a_min=0, a_max=255, out=V)
img_hsv[:, :, 1] = S.astype(np.uint8)
img_hsv[:, :, 2] = V.astype(np.uint8)
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
h, w, _ = img.shape
img, ratio, padw, padh = letterbox(img, height=height, width=width)
# Load labels
if os.path.isfile(label_path):
labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6)
# Normalized xywh to pixel xyxy format
labels = labels0.copy()
labels[:, 2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw
labels[:, 3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh
labels[:, 4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw
labels[:, 5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh
else:
labels = np.array([])
# Augment image and labels
if self.augment:
img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))
plotFlag = False
if plotFlag:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.figure(figsize=(50, 50))
plt.imshow(img[:, :, ::-1])
plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-')
plt.axis('off')
plt.savefig('test.jpg')
time.sleep(10)
nL = len(labels)
if nL > 0:
# convert xyxy to xywh
labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy()) #/ height
labels[:, 2] /= width
labels[:, 3] /= height
labels[:, 4] /= width
labels[:, 5] /= height
if self.augment:
# random left-right flip
lr_flip = True
if lr_flip & (random.random() > 0.5):
img = np.fliplr(img)
if nL > 0:
labels[:, 2] = 1 - labels[:, 2]
img = np.ascontiguousarray(img[ :, :, ::-1]) # BGR to RGB
if self.transforms is not None:
img = self.transforms(img)
return img, labels, img_path, (h, w)
def __len__(self):
return self.nF # number of batches
def letterbox(img, height=608, width=1088, color=(127.5, 127.5, 127.5)): # resize a rectangular image to a padded rectangular
shape = img.shape[:2] # shape = [height, width]
ratio = min(float(height)/shape[0], float(width)/shape[1])
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]
dw = (width - new_shape[0]) / 2 # width padding
dh = (height - new_shape[1]) / 2 # height padding
top, bottom = round(dh - 0.1), round(dh + 0.1)
left, right = round(dw - 0.1), round(dw + 0.1)
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular
return img, ratio, dw, dh
def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
borderValue=(127.5, 127.5, 127.5)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
border = 0 # width of added border (optional)
height = img.shape[0]
width = img.shape[1]
# Rotation and Scale
R = np.eye(3)
a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
s = random.random() * (scale[1] - scale[0]) + scale[0]
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
# Translation
T = np.eye(3)
T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels)
T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels)
# Shear
S = np.eye(3)
S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg)
M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR,
borderValue=borderValue) # BGR order borderValue
# Return warped points also
if targets is not None:
if len(targets) > 0:
n = targets.shape[0]
points = targets[:, 2:6].copy()
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = (xy @ M.T)[:, :2].reshape(n, 8)
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# apply angle-based reduction
radians = a * math.pi / 180
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
x = (xy[:, 2] + xy[:, 0]) / 2
y = (xy[:, 3] + xy[:, 1]) / 2
w = (xy[:, 2] - xy[:, 0]) * reduction
h = (xy[:, 3] - xy[:, 1]) * reduction
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
# reject warped points outside of image
np.clip(xy[:, 0], 0, width, out=xy[:, 0])
np.clip(xy[:, 2], 0, width, out=xy[:, 2])
np.clip(xy[:, 1], 0, height, out=xy[:, 1])
np.clip(xy[:, 3], 0, height, out=xy[:, 3])
w = xy[:, 2] - xy[:, 0]
h = xy[:, 3] - xy[:, 1]
area = w * h
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
targets = targets[i]
targets[:, 2:6] = xy[i]
return imw, targets, M
else:
return imw
def collate_fn(batch):
imgs, labels, paths, sizes = zip(*batch)
batch_size = len(labels)
imgs = torch.stack(imgs, 0)
max_box_len = max([l.shape[0] for l in labels])
labels = [torch.from_numpy(l) for l in labels]
filled_labels = torch.zeros(batch_size, max_box_len, 6)
labels_len = torch.zeros(batch_size)
for i in range(batch_size):
isize = labels[i].shape[0]
if len(labels[i])>0:
filled_labels[i, :isize, :] = labels[i]
labels_len[i] = isize
return imgs, filled_labels, paths, sizes, labels_len.unsqueeze(1)
class JointDataset(LoadImagesAndLabels): # for training
def __init__(self, paths, img_size=(1088,608), augment=False, transforms=None):
dataset_names = paths.keys()
self.img_files = OrderedDict()
self.label_files = OrderedDict()
self.tid_num = OrderedDict()
self.tid_start_index = OrderedDict()
for ds, path in paths.items():
with open(path, 'r') as file:
self.img_files[ds] = file.readlines()
self.img_files[ds] = [x.strip() for x in self.img_files[ds]]
self.img_files[ds] = list(filter(lambda x: len(x) > 0, self.img_files[ds]))
self.label_files[ds] = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')
for x in self.img_files[ds]]
for ds, label_paths in self.label_files.items():
max_index = -1
for lp in label_paths:
lb = np.loadtxt(lp)
if len(lb) < 1:
continue
if len(lb.shape) < 2:
img_max = lb[1]
else:
img_max = np.max(lb[:,1])
if img_max >max_index:
max_index = img_max
self.tid_num[ds] = max_index + 1
last_index = 0
for i, (k, v) in enumerate(self.tid_num.items()):
self.tid_start_index[k] = last_index
last_index += v
self.nID = int(last_index+1)
self.nds = [len(x) for x in self.img_files.values()]
self.cds = [sum(self.nds[:i]) for i in range(len(self.nds))]
self.nF = sum(self.nds)
self.width = img_size[0]
self.height = img_size[1]
self.augment = augment
self.transforms = transforms
print('='*80)
print('dataset summary')
print(self.tid_num)
print('total # identities:', self.nID)
print('start index')
print(self.tid_start_index)
print('='*80)
def __getitem__(self, files_index):
for i, c in enumerate(self.cds):
if files_index >= c:
ds = list(self.label_files.keys())[i]
start_index = c
img_path = self.img_files[ds][files_index - start_index]
label_path = self.label_files[ds][files_index - start_index]
imgs, labels, img_path, (h, w) = self.get_data(img_path, label_path)
for i, _ in enumerate(labels):
if labels[i,1] > -1:
labels[i,1] += self.tid_start_index[ds]
return imgs, labels, img_path, (h, w)

101
utils/evaluation.py Normal file
View file

@ -0,0 +1,101 @@
import os
import numpy as np
import copy
import motmetrics as mm
from utils.io import read_results, unzip_objs
class Evaluator(object):
def __init__(self, data_root, seq_name, data_type):
self.data_root = data_root
self.seq_name = seq_name
self.data_type = data_type
self.load_annotations()
self.reset_accumulator()
def load_annotations(self):
assert self.data_type == 'mot'
gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
def reset_accumulator(self):
self.acc = mm.MOTAccumulator(auto_id=True)
def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
# results
trk_tlwhs = np.copy(trk_tlwhs)
trk_ids = np.copy(trk_ids)
# gts
gt_objs = self.gt_frame_dict.get(frame_id, [])
gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
# ignore boxes
ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
ignore_tlwhs = unzip_objs(ignore_objs)[0]
# remove ignored results
keep = np.ones(len(trk_tlwhs), dtype=bool)
iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
match_ious = iou_distance[match_is, match_js]
match_js = np.asarray(match_js, dtype=int)
match_js = match_js[np.logical_not(np.isnan(match_ious))]
keep[match_js] = False
trk_tlwhs = trk_tlwhs[keep]
trk_ids = trk_ids[keep]
# get distance matrix
iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
# acc
self.acc.update(gt_ids, trk_ids, iou_distance)
if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics
else:
events = None
return events
def eval_file(self, filename):
self.reset_accumulator()
result_frame_dict = read_results(filename, self.data_type, is_gt=False)
frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
for frame_id in frames:
trk_objs = result_frame_dict.get(frame_id, [])
trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
return self.acc
@staticmethod
def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
names = copy.deepcopy(names)
if metrics is None:
metrics = mm.metrics.motchallenge_metrics
metrics = copy.deepcopy(metrics)
mh = mm.metrics.create()
summary = mh.compute_many(
accs,
metrics=metrics,
names=names,
generate_overall=True
)
return summary
@staticmethod
def save_summary(summary, filename):
import pandas as pd
writer = pd.ExcelWriter(filename)
summary.to_excel(writer)
writer.save()

112
utils/io.py Normal file
View file

@ -0,0 +1,112 @@
import os
from typing import Dict
import numpy as np
from utils.log import logger
def write_results(filename, results_dict: Dict, data_type: str):
if not filename:
return
path = os.path.dirname(filename)
if not os.path.exists(path):
os.makedirs(path)
if data_type in ('mot', 'mcmot', 'lab'):
save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
elif data_type == 'kitti':
save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
else:
raise ValueError(data_type)
with open(filename, 'w') as f:
for frame_id, frame_data in results_dict.items():
if data_type == 'kitti':
frame_id -= 1
for tlwh, track_id in frame_data:
if track_id < 0:
continue
x1, y1, w, h = tlwh
x2, y2 = x1 + w, y1 + h
line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
f.write(line)
logger.info('Save results to {}'.format(filename))
def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
if data_type in ('mot', 'lab'):
read_fun = read_mot_results
else:
raise ValueError('Unknown data type: {}'.format(data_type))
return read_fun(filename, is_gt, is_ignore)
"""
labels={'ped', ... % 1
'person_on_vhcl', ... % 2
'car', ... % 3
'bicycle', ... % 4
'mbike', ... % 5
'non_mot_vhcl', ... % 6
'static_person', ... % 7
'distractor', ... % 8
'occluder', ... % 9
'occluder_on_grnd', ... %10
'occluder_full', ... % 11
'reflection', ... % 12
'crowd' ... % 13
};
"""
def read_mot_results(filename, is_gt, is_ignore):
valid_labels = {1}
ignore_labels = {2, 7, 8, 12}
results_dict = dict()
if os.path.isfile(filename):
with open(filename, 'r') as f:
for line in f.readlines():
linelist = line.split(',')
if len(linelist) < 7:
continue
fid = int(linelist[0])
if fid < 1:
continue
results_dict.setdefault(fid, list())
if is_gt:
if 'MOT16-' in filename or 'MOT17-' in filename:
label = int(float(linelist[7]))
mark = int(float(linelist[6]))
if mark == 0 or label not in valid_labels:
continue
score = 1
elif is_ignore:
if 'MOT16-' in filename or 'MOT17-' in filename:
label = int(float(linelist[7]))
vis_ratio = float(linelist[8])
if label not in ignore_labels and vis_ratio >= 0:
continue
else:
continue
score = 1
else:
score = float(linelist[6])
tlwh = tuple(map(float, linelist[2:6]))
target_id = int(linelist[1])
results_dict[fid].append((tlwh, target_id, score))
return results_dict
def unzip_objs(objs):
if len(objs) > 0:
tlwhs, ids, scores = zip(*objs)
else:
tlwhs, ids, scores = [], [], []
tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
return tlwhs, ids, scores

229
utils/kalman_filter.py Normal file
View file

@ -0,0 +1,229 @@
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919}
class KalmanFilter(object):
"""
A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, a, h, vx, vy, va, vh
contains the bounding box center position (x, y), aspect ratio a, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self):
ndim, dt = 4, 1.
# Create Kalman filter model matrices.
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
self._update_mat = np.eye(ndim, 2 * ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initiate(self, measurement):
"""Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Bounding box coordinates (x, y, a, h) with center position (x, y),
aspect ratio a, and height h.
Returns
-------
(ndarray, ndarray)
Returns the mean vector (8 dimensional) and covariance matrix (8x8
dimensional) of the new track. Unobserved velocities are initialized
to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
mean = np.r_[mean_pos, mean_vel]
std = [
2 * self._std_weight_position * measurement[3],
2 * self._std_weight_position * measurement[3],
1e-2,
2 * self._std_weight_position * measurement[3],
10 * self._std_weight_velocity * measurement[3],
10 * self._std_weight_velocity * measurement[3],
1e-5,
10 * self._std_weight_velocity * measurement[3]]
covariance = np.diag(np.square(std))
return mean, covariance
def predict(self, mean, covariance):
"""Run Kalman filter prediction step.
Parameters
----------
mean : ndarray
The 8 dimensional mean vector of the object state at the previous
time step.
covariance : ndarray
The 8x8 dimensional covariance matrix of the object state at the
previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted
state. Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[3],
self._std_weight_position * mean[3],
1e-2,
self._std_weight_position * mean[3]]
std_vel = [
self._std_weight_velocity * mean[3],
self._std_weight_velocity * mean[3],
1e-5,
self._std_weight_velocity * mean[3]]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
mean = np.dot(self._motion_mat, mean)
covariance = np.linalg.multi_dot((
self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
return mean, covariance
def project(self, mean, covariance):
"""Project state distribution to measurement space.
Parameters
----------
mean : ndarray
The state's mean vector (8 dimensional array).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
Returns
-------
(ndarray, ndarray)
Returns the projected mean and covariance matrix of the given state
estimate.
"""
std = [
self._std_weight_position * mean[3],
self._std_weight_position * mean[3],
1e-1,
self._std_weight_position * mean[3]]
innovation_cov = np.diag(np.square(std))
mean = np.dot(self._update_mat, mean)
covariance = np.linalg.multi_dot((
self._update_mat, covariance, self._update_mat.T))
return mean, covariance + innovation_cov
def update(self, mean, covariance, measurement):
"""Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (8 dimensional).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
measurement : ndarray
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
is the center position, a the aspect ratio, and h the height of the
bounding box.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
projected_mean, projected_cov = self.project(mean, covariance)
chol_factor, lower = scipy.linalg.cho_factor(
projected_cov, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
check_finite=False).T
innovation = measurement - projected_mean
new_mean = mean + np.dot(innovation, kalman_gain.T)
new_covariance = covariance - np.linalg.multi_dot((
kalman_gain, projected_cov, kalman_gain.T))
return new_mean, new_covariance
def gating_distance(self, mean, covariance, measurements,
only_position=False):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in
format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
mean, covariance = self.project(mean, covariance)
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
cholesky_factor = np.linalg.cholesky(covariance)
d = measurements - mean
z = scipy.linalg.solve_triangular(
cholesky_factor, d.T, lower=True, check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha

18
utils/log.py Normal file
View file

@ -0,0 +1,18 @@
import logging
def get_logger(name='root'):
formatter = logging.Formatter(
# fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)
return logger
logger = get_logger('root')

7
utils/nms.py Normal file
View file

@ -0,0 +1,7 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# from ._utils import _C
from utils import _C
nms = _C.nms
# nms.__doc__ = """
# This function performs Non-maximum suppresion"""

35
utils/parse_config.py Normal file
View file

@ -0,0 +1,35 @@
def parse_model_cfg(path):
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
file = open(path, 'r')
lines = file.read().split('\n')
lines = [x for x in lines if x and not x.startswith('#')]
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
module_defs = []
for line in lines:
if line.startswith('['): # This marks the start of a new block
module_defs.append({})
module_defs[-1]['type'] = line[1:-1].rstrip()
if module_defs[-1]['type'] == 'convolutional':
module_defs[-1]['batch_normalize'] = 0
else:
key, value = line.split("=")
value = value.strip()
module_defs[-1][key.rstrip()] = value.strip()
return module_defs
def parse_data_cfg(path):
"""Parses the data configuration file"""
options = dict()
options['gpus'] = '0'
options['num_workers'] = '10'
with open(path, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.strip()
if line == '' or line.startswith('#'):
continue
key, value = line.split('=')
options[key.strip()] = value.strip()
return options

1
utils/syncbn Submodule

@ -0,0 +1 @@
Subproject commit 265a7059ebbd20c27a81c3d74d43773779fe70d7

45
utils/timer.py Executable file
View file

@ -0,0 +1,45 @@
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
import time
class Timer(object):
"""A simple timer."""
def __init__(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.
self.duration = 0.
def tic(self):
# using time.time instead of time.clock because time time.clock
# does not normalize for multithreading
self.start_time = time.time()
def toc(self, average=True):
self.diff = time.time() - self.start_time
self.total_time += self.diff
self.calls += 1
self.average_time = self.total_time / self.calls
if average:
self.duration = self.average_time
else:
self.duration = self.diff
return self.duration
def clear(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.
self.duration = 0.

25
utils/torch_utils.py Normal file
View file

@ -0,0 +1,25 @@
import torch
def init_seeds(seed=0):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def select_device(force_cpu=False):
if force_cpu:
cuda = False
device = torch.device('cpu')
else:
cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
if torch.cuda.device_count() > 1:
print('WARNING Using GPU0 Only: https://github.com/ultralytics/yolov3/issues/21')
torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available
# print('Using ', torch.cuda.device_count(), ' GPUs')
print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
print(device)
return device

545
utils/utils.py Executable file
View file

@ -0,0 +1,545 @@
import glob
import random
import time
import os
import os.path as osp
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.functional as F
from utils import torch_utils
import maskrcnn_benchmark.layers.nms as nms
from external.lib.nms.cpu_nms import cpu_soft_nms
# Set printoptions
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
def mkdir_if_missing(d):
if not osp.exists(d):
os.makedirs(d)
def float3(x): # format floats to 3 decimals
return float(format(x, '.3f'))
def init_seeds(seed=0):
random.seed(seed)
np.random.seed(seed)
torch_utils.init_seeds(seed=seed)
def load_classes(path):
"""
Loads class labels at 'path'
"""
fp = open(path, 'r')
names = fp.read().split('\n')
return list(filter(None, names)) # filter removes empty strings (such as last line)
def model_info(model): # Plots a line-by-line description of a PyTorch model
n_p = sum(x.numel() for x in model.parameters()) # number parameters
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace('module_list.', '')
print('%5g %50s %9s %12g %20s %12.3g %12.3g' % (
i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g))
def coco_class_weights(): # frequency of each class in coco train2014
weights = 1 / torch.FloatTensor(
[187437, 4955, 30920, 6033, 3838, 4332, 3160, 7051, 7677, 9167, 1316, 1372, 833, 6757, 7355, 3302, 3776, 4671,
6769, 5706, 3908, 903, 3686, 3596, 6200, 7920, 8779, 4505, 4272, 1862, 4698, 1962, 4403, 6659, 2402, 2689,
4012, 4175, 3411, 17048, 5637, 14553, 3923, 5539, 4289, 10084, 7018, 4314, 3099, 4638, 4939, 5543, 2038, 4004,
5053, 4578, 27292, 4113, 5931, 2905, 11174, 2873, 4036, 3415, 1517, 4122, 1980, 4464, 1190, 2302, 156, 3933,
1877, 17630, 4337, 4624, 1075, 3468, 135, 1380])
weights /= weights.sum()
return weights
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
# x = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
return x
def plot_one_box(x, img, color=None, label=None, line_thickness=None): # Plots one bounding box on image img
tl = line_thickness or round(0.0004 * max(img.shape[0:2])) + 1 # line thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
elif classname.find('BatchNorm2d') != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
torch.nn.init.constant_(m.bias.data, 0.0)
def xyxy2xywh(x):
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
y = torch.zeros(x.shape) if x.dtype is torch.float32 else np.zeros(x.shape)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2
y[:, 1] = (x[:, 1] + x[:, 3]) / 2
y[:, 2] = x[:, 2] - x[:, 0]
y[:, 3] = x[:, 3] - x[:, 1]
return y
def xywh2xyxy(x):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y = torch.zeros(x.shape) if x.dtype is torch.float32 else np.zeros(x.shape)
y[:, 0] = (x[:, 0] - x[:, 2] / 2)
y[:, 1] = (x[:, 1] - x[:, 3] / 2)
y[:, 2] = (x[:, 0] + x[:, 2] / 2)
y[:, 3] = (x[:, 1] + x[:, 3] / 2)
return y
def scale_coords(img_size, coords, img0_shape):
# Rescale x1, y1, x2, y2 from 416 to image size
gain_w = float(img_size[0]) / img0_shape[1] # gain = old / new
gain_h = float(img_size[1]) / img0_shape[0]
gain = min(gain_w, gain_h)
pad_x = (img_size[0] - img0_shape[1] * gain) / 2 # width padding
pad_y = (img_size[1] - img0_shape[0] * gain) / 2 # height padding
coords[:, [0, 2]] -= pad_x
coords[:, [1, 3]] -= pad_y
coords[:, 0:4] /= gain
coords[:, :4] = torch.clamp(coords[:, :4], min=0)
return coords
def ap_per_class(tp, conf, pred_cls, target_cls):
""" Compute the average precision, given the recall and precision curves.
Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (list).
conf: Objectness value from 0-1 (list).
pred_cls: Predicted object classes (list).
target_cls: True object classes (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# lists/pytorch to numpy
tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(pred_cls), np.array(target_cls)
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
# Create Precision-Recall curve and compute AP for each class
ap, p, r = [], [], []
for c in unique_classes:
i = pred_cls == c
n_gt = sum(target_cls == c) # Number of ground truth objects
n_p = sum(i) # Number of predicted objects
if (n_p == 0) and (n_gt == 0):
continue
elif (n_p == 0) or (n_gt == 0):
ap.append(0)
r.append(0)
p.append(0)
else:
# Accumulate FPs and TPs
fpc = np.cumsum(1 - tp[i])
tpc = np.cumsum(tp[i])
# Recall
recall_curve = tpc / (n_gt + 1e-16)
r.append(tpc[-1] / (n_gt + 1e-16))
# Precision
precision_curve = tpc / (tpc + fpc)
p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
# AP from recall-precision curve
ap.append(compute_ap(recall_curve, precision_curve))
return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def bbox_iou(box1, box2, x1y1x2y2=False):
"""
Returns the IoU of two bounding boxes
"""
N, M = len(box1), len(box2)
if x1y1x2y2:
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
else:
# Transform from center and width to exact coordinates
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
# get the coordinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1.unsqueeze(1), b2_x1)
inter_rect_y1 = torch.max(b1_y1.unsqueeze(1), b2_y1)
inter_rect_x2 = torch.min(b1_x2.unsqueeze(1), b2_x2)
inter_rect_y2 = torch.min(b1_y2.unsqueeze(1), b2_y2)
# Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0)
# Union Area
b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1))
b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).view(-1,1).expand(N,M)
b2_area = ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).view(1,-1).expand(N,M)
return inter_area / (b1_area + b2_area - inter_area + 1e-16)
def build_targets_max(target, anchor_wh, nA, nC, nGh, nGw):
"""
returns nT, nCorrect, tx, ty, tw, th, tconf, tcls
"""
nB = len(target) # number of images in batch
txy = torch.zeros(nB, nA, nGh, nGw, 2).cuda() # batch size, anchors, grid size
twh = torch.zeros(nB, nA, nGh, nGw, 2).cuda()
tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda()
tcls = torch.ByteTensor(nB, nA, nGh, nGw, nC).fill_(0).cuda() # nC = number of classes
tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda()
for b in range(nB):
t = target[b]
t_id = t[:, 1].clone().long().cuda()
t = t[:,[0,2,3,4,5]]
nTb = len(t) # number of targets
if nTb == 0:
continue
#gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG
gxy, gwh = t[: , 1:3].clone() , t[:, 3:5].clone()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
gi = torch.clamp(gxy[:, 0], min=0, max=nGw -1).long()
gj = torch.clamp(gxy[:, 1], min=0, max=nGh -1).long()
# Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors)
#gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t()
#gi, gj = gxy.long().t()
# iou of targets-anchors (using wh only)
box1 = gwh
box2 = anchor_wh.unsqueeze(1)
inter_area = torch.min(box1, box2).prod(2)
iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
# Select best iou_pred and anchor
iou_best, a = iou.max(0) # best anchor [0-2] for each target
# Select best unique target-anchor combinations
if nTb > 1:
_, iou_order = torch.sort(-iou_best) # best to worst
# Unique anchor selection
u = torch.stack((gi, gj, a), 0)[:, iou_order]
# _, first_unique = np.unique(u, axis=1, return_index=True) # first unique indices
first_unique = return_torch_unique_index(u, torch.unique(u, dim=1)) # torch alternative
i = iou_order[first_unique]
# best anchor must share significant commonality (iou) with target
i = i[iou_best[i] > 0.60] # TODO: examine arbitrary threshold
if len(i) == 0:
continue
a, gj, gi, t = a[i], gj[i], gi[i], t[i]
t_id = t_id[i]
if len(t.shape) == 1:
t = t.view(1, 5)
else:
if iou_best < 0.60:
continue
tc, gxy, gwh = t[:, 0].long(), t[:, 1:3].clone(), t[:, 3:5].clone()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
# XY coordinates
txy[b, a, gj, gi] = gxy - gxy.floor()
# Width and height
twh[b, a, gj, gi] = torch.log(gwh / anchor_wh[a]) # yolo method
# twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_wh[a]) / 2 # power method
# One-hot encoding of label
tcls[b, a, gj, gi, tc] = 1
tconf[b, a, gj, gi] = 1
tid[b, a, gj, gi] = t_id.unsqueeze(1)
tbox = torch.cat([txy, twh], -1)
return tconf, tbox, tid
def build_targets_thres(target, anchor_wh, nA, nC, nGh, nGw):
ID_THRESH = 0.5
FG_THRESH = 0.5
BG_THRESH = 0.4
nB = len(target) # number of images in batch
assert(len(anchor_wh)==nA)
tbox = torch.zeros(nB, nA, nGh, nGw, 4).cuda() # batch size, anchors, grid size
tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda()
tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda()
for b in range(nB):
t = target[b]
t_id = t[:, 1].clone().long().cuda()
t = t[:,[0,2,3,4,5]]
nTb = len(t) # number of targets
if nTb == 0:
continue
gxy, gwh = t[: , 1:3].clone() , t[:, 3:5].clone()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
gxy[:, 0] = torch.clamp(gxy[:, 0], min=0, max=nGw -1)
gxy[:, 1] = torch.clamp(gxy[:, 1], min=0, max=nGh -1)
gt_boxes = torch.cat([gxy, gwh], dim=1) # Shape Ngx4 (xc, yc, w, h)
anchor_mesh = generate_anchor(nGh, nGw, anchor_wh)
anchor_list = anchor_mesh.permute(0,2,3,1).contiguous().view(-1, 4) # Shpae (nA x nGh x nGw) x 4
#print(anchor_list.shape, gt_boxes.shape)
iou_pdist = bbox_iou(anchor_list, gt_boxes) # Shape (nA x nGh x nGw) x Ng
iou_max, max_gt_index = torch.max(iou_pdist, dim=1) # Shape (nA x nGh x nGw), both
iou_map = iou_max.view(nA, nGh, nGw)
gt_index_map = max_gt_index.view(nA, nGh, nGw)
#nms_map = pooling_nms(iou_map, 3)
id_index = iou_map > ID_THRESH
fg_index = iou_map > FG_THRESH
bg_index = iou_map < BG_THRESH
ign_index = (iou_map < FG_THRESH) * (iou_map > BG_THRESH)
tconf[b][fg_index] = 1
tconf[b][bg_index] = 0
tconf[b][ign_index] = -1
gt_index = gt_index_map[fg_index]
gt_box_list = gt_boxes[gt_index]
gt_id_list = t_id[gt_index_map[id_index]]
#print(gt_index.shape, gt_index_map[id_index].shape, gt_boxes.shape)
if torch.sum(fg_index) > 0:
tid[b][id_index] = gt_id_list.unsqueeze(1)
fg_anchor_list = anchor_list.view(nA, nGh, nGw, 4)[fg_index]
delta_target = encode_delta(gt_box_list, fg_anchor_list)
tbox[b][fg_index] = delta_target
return tconf, tbox, tid
def generate_anchor(nGh, nGw, anchor_wh):
nA = len(anchor_wh)
yy, xx =torch.meshgrid(torch.arange(nGh), torch.arange(nGw))
xx, yy = xx.cuda(), yy.cuda()
mesh = torch.stack([xx, yy], dim=0) # Shape 2, nGh, nGw
mesh = mesh.unsqueeze(0).repeat(nA,1,1,1).float() # Shape nA x 2 x nGh x nGw
anchor_offset_mesh = anchor_wh.unsqueeze(-1).unsqueeze(-1).repeat(1, 1, nGh,nGw) # Shape nA x 2 x nGh x nGw
anchor_mesh = torch.cat([mesh, anchor_offset_mesh], dim=1) # Shape nA x 4 x nGh x nGw
return anchor_mesh
def encode_delta(gt_box_list, fg_anchor_list):
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
fg_anchor_list[:, 2], fg_anchor_list[:,3]
gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
gt_box_list[:, 2], gt_box_list[:, 3]
dx = (gx - px) / pw
dy = (gy - py) / ph
dw = torch.log(gw/pw)
dh = torch.log(gh/ph)
return torch.stack([dx, dy, dw, dh], dim=1)
def decode_delta(delta, fg_anchor_list):
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
fg_anchor_list[:, 2], fg_anchor_list[:,3]
dx, dy, dw, dh = delta[:, 0], delta[:, 1], delta[:, 2], delta[:, 3]
gx = pw * dx + px
gy = ph * dy + py
gw = pw * torch.exp(dw)
gh = ph * torch.exp(dh)
return torch.stack([gx, gy, gw, gh], dim=1)
def decode_delta_map(delta_map, anchors):
'''
:param: delta_map, shape (nB, nA, nGh, nGw, 4)
:param: anchors, shape (nA,4)
'''
nB, nA, nGh, nGw, _ = delta_map.shape
anchor_mesh = generate_anchor(nGh, nGw, anchors)
anchor_mesh = anchor_mesh.permute(0,2,3,1).contiguous() # Shpae (nA x nGh x nGw) x 4
anchor_mesh = anchor_mesh.unsqueeze(0).repeat(nB,1,1,1,1)
pred_list = decode_delta(delta_map.view(-1,4), anchor_mesh.view(-1,4))
pred_map = pred_list.view(nB, nA, nGh, nGw, 4)
return pred_map
def pooling_nms(heatmap, kernel=1):
pad = (kernel -1 ) // 2
hmax = F.max_pool2d(heatmap, (kernel, kernel), stride=1, padding=pad)
keep = (hmax == heatmap).float()
return keep * heatmap
def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.05, method=1):
keep = cpu_soft_nms(np.ascontiguousarray(dets, dtype=np.float32),
np.float32(sigma), np.float32(Nt),
np.float32(threshold),
np.uint8(method))
return keep
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
"""
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_score, class_pred)
"""
output = [None for _ in range(len(prediction))]
for image_i, pred in enumerate(prediction):
# Filter out confidence scores below threshold
# Get score and class with highest confidence
v = pred[:, 4] > conf_thres
v = v.nonzero().squeeze()
if len(v.shape) == 0:
v = v.unsqueeze(0)
pred = pred[v]
# If none are remaining => process next image
nP = pred.shape[0]
if not nP:
continue
# From (center x, center y, width, height) to (x1, y1, x2, y2)
pred[:, :4] = xywh2xyxy(pred[:, :4])
# Non-maximum suppression
if method == -1:
nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
else:
dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
nms_indices = soft_nms(dets, Nt=nms_thres, method=method)
det_max = pred[nms_indices]
if len(det_max) > 0:
# Add max detections to outputs
output[image_i] = det_max if output[image_i] is None else torch.cat((output[image_i], det_max))
return output
def return_torch_unique_index(u, uv):
n = uv.shape[1] # number of columns
first_unique = torch.zeros(n, device=u.device).long()
for j in range(n):
first_unique[j] = (uv[:, j:j + 1] == u).all(0).nonzero()[0]
return first_unique
def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
# Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
a = torch.load(filename, map_location='cpu')
a['optimizer'] = []
torch.save(a, filename.replace('.pt', '_lite.pt'))
def coco_class_count(path='../coco/labels/train2014/'):
# histogram of occurrences per class
nC = 80 # number classes
x = np.zeros(nC, dtype='int32')
files = sorted(glob.glob('%s/*.*' % path))
for i, file in enumerate(files):
labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
x += np.bincount(labels[:, 0].astype('int32'), minlength=nC)
print(i, len(files))
def coco_only_people(path='../coco/labels/val2014/'):
# find images with only people
files = sorted(glob.glob('%s/*.*' % path))
for i, file in enumerate(files):
labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
if all(labels[:, 0] == 0):
print(labels.shape[0], file)
def plot_results():
# Plot YOLO training results file 'results.txt'
# import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v1.txt')
plt.figure(figsize=(14, 7))
s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'mAP', 'Recall', 'Precision']
files = sorted(glob.glob('results*.txt'))
for f in files:
results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T # column 11 is mAP
x = range(1, results.shape[1])
for i in range(8):
plt.subplot(2, 4, i + 1)
plt.plot(x, results[i, x], marker='.', label=f)
plt.title(s[i])
if i == 0:
plt.legend()

90
utils/visualization.py Normal file
View file

@ -0,0 +1,90 @@
import numpy as np
import cv2
def tlwhs_to_tlbrs(tlwhs):
tlbrs = np.copy(tlwhs)
if len(tlbrs) == 0:
return tlbrs
tlbrs[:, 2] += tlwhs[:, 0]
tlbrs[:, 3] += tlwhs[:, 1]
return tlbrs
def get_color(idx):
idx = idx * 3
color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
return color
def resize_image(image, max_size=800):
if max(image.shape[:2]) > max_size:
scale = float(max_size) / max(image.shape[:2])
image = cv2.resize(image, None, fx=scale, fy=scale)
return image
def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
im = np.ascontiguousarray(np.copy(image))
im_h, im_w = im.shape[:2]
top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
text_scale = max(1, image.shape[1] / 1600.)
text_thickness = 1 if text_scale > 1.1 else 1
line_thickness = max(1, int(image.shape[1] / 600.))
radius = max(5, int(im_w/140.))
cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
(0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2)
for i, tlwh in enumerate(tlwhs):
x1, y1, w, h = tlwh
intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
obj_id = int(obj_ids[i])
id_text = '{}'.format(int(obj_id))
if ids2 is not None:
id_text = id_text + ', {}'.format(int(ids2[i]))
_line_thickness = 1 if obj_id <= 0 else line_thickness
color = get_color(abs(obj_id))
cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
thickness=text_thickness)
return im
def plot_trajectory(image, tlwhs, track_ids):
image = image.copy()
for one_tlwhs, track_id in zip(tlwhs, track_ids):
color = get_color(int(track_id))
for tlwh in one_tlwhs:
x1, y1, w, h = tuple(map(int, tlwh))
cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2)
return image
def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None):
im = np.copy(image)
text_scale = max(1, image.shape[1] / 800.)
thickness = 2 if text_scale > 1.3 else 1
for i, det in enumerate(tlbrs):
x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int)
if len(det) >= 7:
label = 'det' if det[5] > 0 else 'trk'
if ids is not None:
text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i])
cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
thickness=thickness)
else:
text = '{}# {:.2f}'.format(label, det[6])
if scores is not None:
text = '{:.2f}'.format(scores[i])
cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
thickness=thickness)
cv2.rectangle(im, (x1, y1), (x2, y2), color, 2)
return im