1.Accelerate the association step.
2.Provide more trained models with different input resoulution.
This commit is contained in:
parent
7216bcaadf
commit
c40826179b
12 changed files with 994 additions and 166 deletions
28
cfg/yolov3.cfg → cfg/yolov3_1088x608.cfg
Executable file → Normal file
28
cfg/yolov3.cfg → cfg/yolov3_1088x608.cfg
Executable file → Normal file
|
@ -1,26 +1,10 @@
|
|||
[net]
|
||||
# Testing
|
||||
#batch=1
|
||||
#subdivisions=1
|
||||
# Training
|
||||
batch=16
|
||||
subdivisions=1
|
||||
width=608
|
||||
height=1088
|
||||
width=1088
|
||||
height=608
|
||||
embedding_dim=512
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 500200
|
||||
policy=steps
|
||||
steps=400000,450000
|
||||
scales=.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
|
@ -611,7 +595,7 @@ layers = -3
|
|||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
filters=$embedding_dim
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
|
@ -712,7 +696,7 @@ layers = -3
|
|||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
filters=$embedding_dim
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
|
@ -815,7 +799,7 @@ layers = -3
|
|||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
filters=$embedding_dim
|
||||
activation=linear
|
||||
|
||||
[route]
|
817
cfg/yolov3_576x320.cfg
Normal file
817
cfg/yolov3_576x320.cfg
Normal file
|
@ -0,0 +1,817 @@
|
|||
[net]
|
||||
batch=16
|
||||
subdivisions=1
|
||||
width= 576
|
||||
height=320
|
||||
embedding_dim=512
|
||||
channels=3
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
######################
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=24
|
||||
activation=linear
|
||||
|
||||
######### embedding ###########
|
||||
[route]
|
||||
layers = -3
|
||||
|
||||
[convolutional]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=$embedding_dim
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
layers = -3, -1
|
||||
###############################
|
||||
|
||||
|
||||
[yolo]
|
||||
mask = 8,9,10,11
|
||||
anchors = 6,16, 8,23, 11,32, 16,45, 21,64, 30,90, 43,128, 60,180, 85,255, 120,360, 170,420, 340, 320
|
||||
classes=1
|
||||
num=12
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
||||
|
||||
|
||||
[route]
|
||||
layers = -7
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[upsample]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers = -1, 61
|
||||
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=24
|
||||
activation=linear
|
||||
|
||||
######### embedding ###########
|
||||
[route]
|
||||
layers = -3
|
||||
|
||||
[convolutional]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=$embedding_dim
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
layers = -3, -1
|
||||
###############################
|
||||
|
||||
[yolo]
|
||||
mask = 4,5,6,7
|
||||
anchors = 6,16, 8,23, 11,32, 16,45, 21,64, 30,90, 43,128, 60,180, 85,255, 120,320, 170,320, 340,320
|
||||
classes=1
|
||||
num=12
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
||||
|
||||
|
||||
|
||||
[route]
|
||||
layers = -7
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[upsample]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers = -1, 36
|
||||
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=24
|
||||
activation=linear
|
||||
|
||||
|
||||
|
||||
######### embedding ###########
|
||||
[route]
|
||||
layers = -3
|
||||
|
||||
[convolutional]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=$embedding_dim
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
layers = -3, -1
|
||||
###############################
|
||||
|
||||
[yolo]
|
||||
mask = 0,1,2,3
|
||||
anchors = 6,16, 8,23, 11,32, 16,45, 21,64, 30,90, 43,128, 60,180, 85,255, 120,320, 170,320, 340,320
|
||||
classes=1
|
||||
num=12
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
|
@ -1,26 +1,10 @@
|
|||
[net]
|
||||
# Testing
|
||||
#batch=1
|
||||
#subdivisions=1
|
||||
# Training
|
||||
batch=16
|
||||
subdivisions=1
|
||||
width=480
|
||||
height=864
|
||||
width=864
|
||||
height=480
|
||||
embedding_dim=512
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 500200
|
||||
policy=steps
|
||||
steps=400000,450000
|
||||
scales=.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
|
@ -611,7 +595,7 @@ layers = -3
|
|||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
filters=$embedding_dim
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
|
@ -712,7 +696,7 @@ layers = -3
|
|||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
filters=$embedding_dim
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
|
@ -815,7 +799,7 @@ layers = -3
|
|||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
filters=$embedding_dim
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
|
|
37
models.py
37
models.py
|
@ -74,7 +74,8 @@ def create_modules(module_defs):
|
|||
nC = int(module_def['classes']) # number of classes
|
||||
img_size = (int(hyperparams['width']),int(hyperparams['height']))
|
||||
# Define detection layer
|
||||
yolo_layer = YOLOLayer(anchors, nC, hyperparams['nID'], img_size, yolo_layer_count, cfg=hyperparams['cfg'])
|
||||
yolo_layer = YOLOLayer(anchors, nC, int(hyperparams['nID']),
|
||||
int(hyperparams['embedding_dim']), img_size, yolo_layer_count)
|
||||
modules.add_module('yolo_%d' % i, yolo_layer)
|
||||
yolo_layer_count += 1
|
||||
|
||||
|
@ -108,7 +109,7 @@ class Upsample(nn.Module):
|
|||
|
||||
|
||||
class YOLOLayer(nn.Module):
|
||||
def __init__(self, anchors, nC, nID, img_size, yolo_layer, cfg):
|
||||
def __init__(self, anchors, nC, nID, nE, img_size, yolo_layer):
|
||||
super(YOLOLayer, self).__init__()
|
||||
self.layer = yolo_layer
|
||||
nA = len(anchors)
|
||||
|
@ -117,7 +118,7 @@ class YOLOLayer(nn.Module):
|
|||
self.nC = nC # number of classes (80)
|
||||
self.nID = nID # number of identities
|
||||
self.img_size = 0
|
||||
self.emb_dim = 512
|
||||
self.emb_dim = nE
|
||||
self.shift = [1, 3, 5]
|
||||
|
||||
self.SmoothL1Loss = nn.SmoothL1Loss()
|
||||
|
@ -127,7 +128,9 @@ class YOLOLayer(nn.Module):
|
|||
self.s_c = nn.Parameter(-4.15*torch.ones(1)) # -4.15
|
||||
self.s_r = nn.Parameter(-4.85*torch.ones(1)) # -4.85
|
||||
self.s_id = nn.Parameter(-2.3*torch.ones(1)) # -2.3
|
||||
self.emb_scale = math.sqrt(2) * math.log(self.nID-1)
|
||||
|
||||
self.emb_scale = math.sqrt(2) * math.log(self.nID-1) if self.nID>1 else 1
|
||||
|
||||
|
||||
|
||||
def forward(self, p_cat, img_size, targets=None, classifier=None, test_emb=False):
|
||||
|
@ -178,7 +181,7 @@ class YOLOLayer(nn.Module):
|
|||
|
||||
if test_emb:
|
||||
if np.prod(embedding.shape)==0 or np.prod(tids.shape) == 0:
|
||||
return torch.zeros(0, self. emb_dim+1).cuda()
|
||||
return torch.zeros(0, self.emb_dim+1).cuda()
|
||||
emb_and_gt = torch.cat([embedding, tids.float()], dim=1)
|
||||
return emb_and_gt
|
||||
|
||||
|
@ -210,21 +213,23 @@ class YOLOLayer(nn.Module):
|
|||
class Darknet(nn.Module):
|
||||
"""YOLOv3 object detection model"""
|
||||
|
||||
def __init__(self, cfg_path, img_size=(1088, 608), nID=1591, test_emb=False):
|
||||
def __init__(self, cfg_dict, nID=0, test_emb=False):
|
||||
super(Darknet, self).__init__()
|
||||
|
||||
self.module_defs = parse_model_cfg(cfg_path)
|
||||
self.module_defs[0]['cfg'] = cfg_path
|
||||
if isinstance(cfg_dict, str):
|
||||
cfg_dict = parse_model_cfg(cfg_dict)
|
||||
self.module_defs = cfg_dict
|
||||
self.module_defs[0]['nID'] = nID
|
||||
self.img_size = [int(self.module_defs[0]['width']), int(self.module_defs[0]['height'])]
|
||||
self.emb_dim = int(self.module_defs[0]['embedding_dim'])
|
||||
self.hyperparams, self.module_list = create_modules(self.module_defs)
|
||||
self.img_size = img_size
|
||||
self.loss_names = ['loss', 'box', 'conf', 'id', 'nT']
|
||||
self.losses = OrderedDict()
|
||||
for ln in self.loss_names:
|
||||
self.losses[ln] = 0
|
||||
self.emb_dim = 512
|
||||
self.classifier = nn.Linear(self.emb_dim, nID)
|
||||
self.test_emb=test_emb
|
||||
self.test_emb = test_emb
|
||||
|
||||
self.classifier = nn.Linear(self.emb_dim, nID) if nID>0 else None
|
||||
|
||||
|
||||
|
||||
def forward(self, x, targets=None, targets_len=None):
|
||||
|
@ -256,7 +261,8 @@ class Darknet(nn.Module):
|
|||
for name, loss in zip(self.loss_names, losses):
|
||||
self.losses[name] += loss
|
||||
elif self.test_emb:
|
||||
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
|
||||
if targets is not None:
|
||||
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
|
||||
x = module[0](x, self.img_size, targets, self.classifier, self.test_emb)
|
||||
else: # get detections
|
||||
x = module[0](x, self.img_size)
|
||||
|
@ -282,7 +288,8 @@ def shift_tensor_vertically(t, delta):
|
|||
|
||||
def create_grids(self, img_size, nGh, nGw):
|
||||
self.stride = img_size[0]/nGw
|
||||
assert self.stride == img_size[1] / nGh
|
||||
assert self.stride == img_size[1] / nGh, \
|
||||
"{} v.s. {}/{}".format(self.stride, img_size[1], nGh)
|
||||
|
||||
# build xy offsets
|
||||
grid_x = torch.arange(nGw).repeat((nGh, 1)).view((1, 1, nGh, nGw)).float()
|
||||
|
|
15
test.py
15
test.py
|
@ -16,12 +16,10 @@ def test(
|
|||
data_cfg,
|
||||
weights,
|
||||
batch_size=16,
|
||||
img_size=416,
|
||||
iou_thres=0.5,
|
||||
conf_thres=0.3,
|
||||
nms_thres=0.45,
|
||||
print_interval=40,
|
||||
nID=14455,
|
||||
):
|
||||
|
||||
# Configure run
|
||||
|
@ -32,9 +30,11 @@ def test(
|
|||
nC = 1
|
||||
test_path = data_cfg_dict['test']
|
||||
dataset_root = data_cfg_dict['root']
|
||||
cfg_dict = parse_model_cfg(cfg)
|
||||
img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
|
||||
|
||||
# Initialize model
|
||||
model = Darknet(cfg, img_size, nID)
|
||||
model = Darknet(cfg_dict, test_emb=False)
|
||||
|
||||
# Load weights
|
||||
if weights.endswith('.pt'): # pytorch format
|
||||
|
@ -149,12 +149,10 @@ def test_emb(
|
|||
data_cfg,
|
||||
weights,
|
||||
batch_size=16,
|
||||
img_size=416,
|
||||
iou_thres=0.5,
|
||||
conf_thres=0.3,
|
||||
nms_thres=0.45,
|
||||
print_interval=40,
|
||||
nID=14455,
|
||||
):
|
||||
|
||||
# Configure run
|
||||
|
@ -163,9 +161,11 @@ def test_emb(
|
|||
f.close()
|
||||
test_paths = data_cfg_dict['test_emb']
|
||||
dataset_root = data_cfg_dict['root']
|
||||
cfg_dict = parse_model_cfg(cfg)
|
||||
img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
|
||||
|
||||
# Initialize model
|
||||
model = Darknet(cfg, img_size, nID, test_emb=True)
|
||||
model = Darknet(cfg_dict, test_emb=True)
|
||||
|
||||
# Load weights
|
||||
if weights.endswith('.pt'): # pytorch format
|
||||
|
@ -231,7 +231,6 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
|
||||
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
|
||||
parser.add_argument('--img-size', type=int, default=(1088, 608), help='size of each image dimension')
|
||||
parser.add_argument('--print-interval', type=int, default=10, help='size of each image dimension')
|
||||
parser.add_argument('--test-emb', action='store_true', help='test embedding')
|
||||
opt = parser.parse_args()
|
||||
|
@ -244,7 +243,6 @@ if __name__ == '__main__':
|
|||
opt.data_cfg,
|
||||
opt.weights,
|
||||
opt.batch_size,
|
||||
opt.img_size,
|
||||
opt.iou_thres,
|
||||
opt.conf_thres,
|
||||
opt.nms_thres,
|
||||
|
@ -256,7 +254,6 @@ if __name__ == '__main__':
|
|||
opt.data_cfg,
|
||||
opt.weights,
|
||||
opt.batch_size,
|
||||
opt.img_size,
|
||||
opt.iou_thres,
|
||||
opt.conf_thres,
|
||||
opt.nms_thres,
|
||||
|
|
10
track.py
10
track.py
|
@ -5,13 +5,14 @@ import logging
|
|||
import argparse
|
||||
import motmetrics as mm
|
||||
|
||||
import torch
|
||||
from tracker.multitracker import JDETracker
|
||||
from utils import visualization as vis
|
||||
from utils.log import logger
|
||||
from utils.timer import Timer
|
||||
from utils.evaluation import Evaluator
|
||||
from utils.parse_config import parse_model_cfg
|
||||
import utils.datasets as datasets
|
||||
import torch
|
||||
from utils.utils import *
|
||||
|
||||
|
||||
|
@ -84,6 +85,10 @@ def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',),
|
|||
mkdir_if_missing(result_root)
|
||||
data_type = 'mot'
|
||||
|
||||
# Read config
|
||||
cfg_dict = parse_model_cfg(opt.cfg)
|
||||
opt.img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
|
||||
|
||||
# run tracking
|
||||
accs = []
|
||||
n_frame = 0
|
||||
|
@ -134,7 +139,6 @@ if __name__ == '__main__':
|
|||
parser = argparse.ArgumentParser(prog='track.py')
|
||||
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
|
||||
parser.add_argument('--weights', type=str, default='weights/latest.pt', help='path to weights file')
|
||||
parser.add_argument('--img-size', type=int, default=[1088, 608], nargs='+', help='pixels')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
|
||||
parser.add_argument('--nms-thres', type=float, default=0.4, help='iou threshold for non-maximum suppression')
|
||||
|
@ -162,6 +166,8 @@ if __name__ == '__main__':
|
|||
MOT17-11-SDP
|
||||
MOT17-13-SDP
|
||||
'''
|
||||
seqs_str = '''MOT17-02-SDP
|
||||
'''
|
||||
data_root = '/home/wangzd/datasets/MOT/MOT17/images/train'
|
||||
else:
|
||||
seqs_str = '''MOT16-01
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
import cv2
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import scipy
|
||||
from scipy.spatial.distance import cdist
|
||||
from sklearn.utils import linear_assignment_
|
||||
import lap
|
||||
|
||||
from cython_bbox import bbox_overlaps as bbox_ious
|
||||
from utils import kalman_filter
|
||||
|
@ -25,31 +27,18 @@ def merge_matches(m1, m2, shape):
|
|||
return match, unmatched_O, unmatched_Q
|
||||
|
||||
|
||||
def _indices_to_matches(cost_matrix, indices, thresh):
|
||||
matched_cost = cost_matrix[tuple(zip(*indices))]
|
||||
matched_mask = (matched_cost <= thresh)
|
||||
|
||||
matches = indices[matched_mask]
|
||||
unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
|
||||
unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
|
||||
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
|
||||
def linear_assignment(cost_matrix, thresh):
|
||||
"""
|
||||
Simple linear assignment
|
||||
:type cost_matrix: np.ndarray
|
||||
:type thresh: float
|
||||
:return: matches, unmatched_a, unmatched_b
|
||||
"""
|
||||
if cost_matrix.size == 0:
|
||||
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
|
||||
|
||||
cost_matrix[cost_matrix > thresh] = thresh + 1e-4
|
||||
indices = linear_assignment_.linear_assignment(cost_matrix)
|
||||
|
||||
return _indices_to_matches(cost_matrix, indices, thresh)
|
||||
matches, unmatched_a, unmatched_b = [], [], []
|
||||
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
|
||||
for ix, mx in enumerate(x):
|
||||
if mx >= 0:
|
||||
matches.append([ix, mx])
|
||||
unmatched_a = np.where(x < 0)[0]
|
||||
unmatched_b = np.where(y < 0)[0]
|
||||
matches = np.asarray(matches)
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
|
||||
def ious(atlbrs, btlbrs):
|
||||
|
@ -104,21 +93,9 @@ def embedding_distance(tracks, detections, metric='cosine'):
|
|||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
|
||||
for i, track in enumerate(tracks):
|
||||
cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
|
||||
return cost_matrix
|
||||
track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
|
||||
cost_matrix = np.maximum(0.0, cdist(track_features, det_features)) # Nomalized features
|
||||
|
||||
|
||||
def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = kalman_filter.chi2inv95[gating_dim]
|
||||
measurements = np.asarray([det.to_xyah() for det in detections])
|
||||
for row, track in enumerate(tracks):
|
||||
gating_distance = kf.gating_distance(
|
||||
track.mean, track.covariance, measurements, only_position)
|
||||
cost_matrix[row, gating_distance > gating_threshold] = np.inf
|
||||
return cost_matrix
|
||||
|
||||
|
||||
|
@ -130,10 +107,7 @@ def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda
|
|||
measurements = np.asarray([det.to_xyah() for det in detections])
|
||||
for row, track in enumerate(tracks):
|
||||
gating_distance = kf.gating_distance(
|
||||
track.mean, track.covariance, measurements, only_position)
|
||||
track.mean, track.covariance, measurements, only_position, metric='maha')
|
||||
cost_matrix[row, gating_distance > gating_threshold] = np.inf
|
||||
#print(cost_matrix[row])
|
||||
#print(gating_distance)
|
||||
#print('-'*90)
|
||||
cost_matrix[row] = lambda_ * cost_matrix[row] + (1-lambda_)* gating_distance
|
||||
return cost_matrix
|
||||
|
|
|
@ -6,6 +6,7 @@ import os
|
|||
import os.path as osp
|
||||
import time
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
from utils.utils import *
|
||||
from utils.log import logger
|
||||
|
@ -16,6 +17,7 @@ from .basetrack import BaseTrack, TrackState
|
|||
|
||||
|
||||
class STrack(BaseTrack):
|
||||
shared_kalman = KalmanFilter()
|
||||
|
||||
def __init__(self, tlwh, score, temp_feat, buffer_size=30):
|
||||
|
||||
|
@ -49,6 +51,19 @@ class STrack(BaseTrack):
|
|||
mean_state[7] = 0
|
||||
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
|
||||
|
||||
@staticmethod
|
||||
def multi_predict(stracks):
|
||||
if len(stracks) > 0:
|
||||
multi_mean = np.asarray([st.mean.copy() for st in stracks])
|
||||
multi_covariance = np.asarray([st.covariance for st in stracks])
|
||||
for i,st in enumerate(stracks):
|
||||
if st.state != TrackState.Tracked:
|
||||
multi_mean[i][7] = 0
|
||||
multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
|
||||
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
|
||||
stracks[i].mean = mean
|
||||
stracks[i].covariance = cov
|
||||
|
||||
|
||||
def activate(self, kalman_filter, frame_id):
|
||||
"""Start a new tracklet"""
|
||||
|
@ -97,7 +112,7 @@ class STrack(BaseTrack):
|
|||
self.update_features(new_track.curr_feat)
|
||||
|
||||
@property
|
||||
@jit
|
||||
#@jit(nopython=True)
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
|
@ -110,7 +125,7 @@ class STrack(BaseTrack):
|
|||
return ret
|
||||
|
||||
@property
|
||||
@jit
|
||||
#@jit(nopython=True)
|
||||
def tlbr(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
|
@ -120,7 +135,7 @@ class STrack(BaseTrack):
|
|||
return ret
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
#@jit(nopython=True)
|
||||
def tlwh_to_xyah(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, aspect ratio,
|
||||
height)`, where the aspect ratio is `width / height`.
|
||||
|
@ -134,14 +149,14 @@ class STrack(BaseTrack):
|
|||
return self.tlwh_to_xyah(self.tlwh)
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
#@jit(nopython=True)
|
||||
def tlbr_to_tlwh(tlbr):
|
||||
ret = np.asarray(tlbr).copy()
|
||||
ret[2:] -= ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
#@jit(nopython=True)
|
||||
def tlwh_to_tlbr(tlwh):
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[2:] += ret[:2]
|
||||
|
@ -151,10 +166,11 @@ class STrack(BaseTrack):
|
|||
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
|
||||
|
||||
|
||||
|
||||
class JDETracker(object):
|
||||
def __init__(self, opt, frame_rate=30):
|
||||
self.opt = opt
|
||||
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
|
||||
self.model = Darknet(opt.cfg)
|
||||
# load_darknet_weights(self.model, opt.weights)
|
||||
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'], strict=False)
|
||||
self.model.cuda().eval()
|
||||
|
@ -183,17 +199,16 @@ class JDETracker(object):
|
|||
pred = self.model(im_blob)
|
||||
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
|
||||
if len(pred) > 0:
|
||||
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu()
|
||||
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres,
|
||||
self.opt.nms_thres)[0]
|
||||
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
|
||||
dets, embs = dets[:, :5].cpu().numpy(), dets[:, 6:].cpu().numpy()
|
||||
'''Detections'''
|
||||
detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for
|
||||
(tlbrs, f) in zip(dets[:, :5], dets[:, 6:])]
|
||||
detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for
|
||||
(tlbrs, f) in zip(dets, embs)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
t2 = time.time()
|
||||
# print('Forward: {} s'.format(t2-t1))
|
||||
|
||||
''' Add newly detected tracklets to tracked_stracks'''
|
||||
unconfirmed = []
|
||||
tracked_stracks = [] # type: list[STrack]
|
||||
|
@ -206,11 +221,8 @@ class JDETracker(object):
|
|||
''' Step 2: First association, with embedding'''
|
||||
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
|
||||
# Predict the current location with KF
|
||||
for strack in strack_pool:
|
||||
strack.predict()
|
||||
|
||||
STrack.multi_predict(strack_pool)
|
||||
dists = matching.embedding_distance(strack_pool, detections)
|
||||
#dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
|
||||
dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections)
|
||||
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
|
||||
|
@ -271,13 +283,10 @@ class JDETracker(object):
|
|||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
t4 = time.time()
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
|
||||
# self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
|
||||
self.lost_stracks.extend(lost_stracks)
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
|
||||
|
@ -292,8 +301,6 @@ class JDETracker(object):
|
|||
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
|
||||
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
|
||||
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
|
||||
t5 = time.time()
|
||||
# print('Final {} s'.format(t5-t4))
|
||||
return output_stracks
|
||||
|
||||
def joint_stracks(tlista, tlistb):
|
||||
|
|
21
train.py
21
train.py
|
@ -13,7 +13,6 @@ from torchvision.transforms import transforms as T
|
|||
def train(
|
||||
cfg,
|
||||
data_cfg,
|
||||
img_size=(1088,608),
|
||||
resume=False,
|
||||
epochs=100,
|
||||
batch_size=16,
|
||||
|
@ -33,16 +32,19 @@ def train(
|
|||
trainset_paths = data_config['train']
|
||||
dataset_root = data_config['root']
|
||||
f.close()
|
||||
cfg_dict = parse_model_cfg(cfg)
|
||||
img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
|
||||
|
||||
|
||||
transforms = T.Compose([T.ToTensor()])
|
||||
# Get dataloader
|
||||
transforms = T.Compose([T.ToTensor()])
|
||||
dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms)
|
||||
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,
|
||||
num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn)
|
||||
|
||||
# Initialize model
|
||||
model = Darknet(cfg, img_size, dataset.nID)
|
||||
model = Darknet(cfg_dict, dataset.nID)
|
||||
|
||||
|
||||
|
||||
cutoff = -1 # backbone reaches to cutoff layer
|
||||
start_epoch = 0
|
||||
|
@ -87,6 +89,7 @@ def train(
|
|||
p.requires_grad = False if 'batch_norm' in name else True
|
||||
|
||||
model_info(model)
|
||||
|
||||
t0 = time.time()
|
||||
for epoch in range(epochs):
|
||||
epoch += start_epoch
|
||||
|
@ -94,8 +97,6 @@ def train(
|
|||
logger.info(('%8s%12s' + '%10s' * 6) % (
|
||||
'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
|
||||
|
||||
|
||||
|
||||
# Freeze darknet53.conv.74 for first epoch
|
||||
if freeze_backbone and (epoch < 2):
|
||||
for i, (name, p) in enumerate(model.named_parameters()):
|
||||
|
@ -154,8 +155,8 @@ def train(
|
|||
# Calculate mAP
|
||||
if epoch % opt.test_interval ==0:
|
||||
with torch.no_grad():
|
||||
mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
|
||||
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
|
||||
mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40)
|
||||
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40)
|
||||
|
||||
|
||||
# Call scheduler.step() after opimizer.step() with pytorch > 1.1.0
|
||||
|
@ -166,9 +167,8 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--epochs', type=int, default=30, help='number of epochs')
|
||||
parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
|
||||
parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step')
|
||||
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
|
||||
parser.add_argument('--cfg', type=str, default='cfg/yolov3_1088x608.cfg', help='cfg file path')
|
||||
parser.add_argument('--data-cfg', type=str, default='cfg/ccmcpe.json', help='coco.data file path')
|
||||
parser.add_argument('--img-size', type=int, default=[1088, 608], nargs='+', help='pixels')
|
||||
parser.add_argument('--resume', action='store_true', help='resume training flag')
|
||||
parser.add_argument('--print-interval', type=int, default=40, help='print interval')
|
||||
parser.add_argument('--test-interval', type=int, default=9, help='test interval')
|
||||
|
@ -181,7 +181,6 @@ if __name__ == '__main__':
|
|||
train(
|
||||
opt.cfg,
|
||||
opt.data_cfg,
|
||||
img_size=opt.img_size,
|
||||
resume=opt.resume,
|
||||
epochs=opt.epochs,
|
||||
batch_size=opt.batch_size,
|
||||
|
|
|
@ -2,7 +2,7 @@ import os
|
|||
import numpy as np
|
||||
import copy
|
||||
import motmetrics as mm
|
||||
|
||||
mm.lap.default_solver = 'lap'
|
||||
from utils.io import read_results, unzip_objs
|
||||
|
||||
|
||||
|
@ -39,18 +39,20 @@ class Evaluator(object):
|
|||
ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
|
||||
ignore_tlwhs = unzip_objs(ignore_objs)[0]
|
||||
|
||||
|
||||
# remove ignored results
|
||||
keep = np.ones(len(trk_tlwhs), dtype=bool)
|
||||
iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
|
||||
match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
|
||||
match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
|
||||
match_ious = iou_distance[match_is, match_js]
|
||||
if len(iou_distance) > 0:
|
||||
match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
|
||||
match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
|
||||
match_ious = iou_distance[match_is, match_js]
|
||||
|
||||
match_js = np.asarray(match_js, dtype=int)
|
||||
match_js = match_js[np.logical_not(np.isnan(match_ious))]
|
||||
keep[match_js] = False
|
||||
trk_tlwhs = trk_tlwhs[keep]
|
||||
trk_ids = trk_ids[keep]
|
||||
match_js = np.asarray(match_js, dtype=int)
|
||||
match_js = match_js[np.logical_not(np.isnan(match_ious))]
|
||||
keep[match_js] = False
|
||||
trk_tlwhs = trk_tlwhs[keep]
|
||||
trk_ids = trk_ids[keep]
|
||||
|
||||
# get distance matrix
|
||||
iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
import numba
|
||||
import numpy as np
|
||||
import scipy.linalg
|
||||
|
||||
|
@ -116,7 +117,7 @@ class KalmanFilter(object):
|
|||
self._std_weight_velocity * mean[3]]
|
||||
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
mean = np.dot(self._motion_mat, mean)
|
||||
mean = np.dot(mean, self._motion_mat.T)
|
||||
covariance = np.linalg.multi_dot((
|
||||
self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
|
||||
|
||||
|
@ -151,6 +152,48 @@ class KalmanFilter(object):
|
|||
self._update_mat, covariance, self._update_mat.T))
|
||||
return mean, covariance + innovation_cov
|
||||
|
||||
def multi_predict(self, mean, covariance):
|
||||
"""Run Kalman filter prediction step (Vectorized version).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The Nx8 dimensional mean matrix of the object states at the previous
|
||||
time step.
|
||||
covariance : ndarray
|
||||
The Nx8x8 dimensional covariance matrics of the object states at the
|
||||
previous time step.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the mean vector and covariance matrix of the predicted
|
||||
state. Unobserved velocities are initialized to 0 mean.
|
||||
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * mean[:, 3],
|
||||
self._std_weight_position * mean[:, 3],
|
||||
1e-2 * np.ones_like(mean[:, 3]),
|
||||
self._std_weight_position * mean[:, 3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * mean[:, 3],
|
||||
self._std_weight_velocity * mean[:, 3],
|
||||
1e-5 * np.ones_like(mean[:, 3]),
|
||||
self._std_weight_velocity * mean[:, 3]]
|
||||
sqr = np.square(np.r_[std_pos, std_vel]).T
|
||||
|
||||
motion_cov = []
|
||||
for i in range(len(mean)):
|
||||
motion_cov.append(np.diag(sqr[i]))
|
||||
motion_cov = np.asarray(motion_cov)
|
||||
|
||||
mean = np.dot(mean, self._motion_mat.T)
|
||||
left = np.dot(self._motion_mat, covariance).transpose((1,0,2))
|
||||
covariance = np.dot(left, self._motion_mat.T) + motion_cov
|
||||
|
||||
return mean, covariance
|
||||
|
||||
def update(self, mean, covariance, measurement):
|
||||
"""Run Kalman filter correction step.
|
||||
|
||||
|
@ -186,7 +229,7 @@ class KalmanFilter(object):
|
|||
return new_mean, new_covariance
|
||||
|
||||
def gating_distance(self, mean, covariance, measurements,
|
||||
only_position=False):
|
||||
only_position=False, metric='maha'):
|
||||
"""Compute gating distance between state distribution and measurements.
|
||||
|
||||
A suitable distance threshold can be obtained from `chi2inv95`. If
|
||||
|
@ -220,10 +263,16 @@ class KalmanFilter(object):
|
|||
mean, covariance = mean[:2], covariance[:2, :2]
|
||||
measurements = measurements[:, :2]
|
||||
|
||||
cholesky_factor = np.linalg.cholesky(covariance)
|
||||
d = measurements - mean
|
||||
z = scipy.linalg.solve_triangular(
|
||||
cholesky_factor, d.T, lower=True, check_finite=False,
|
||||
overwrite_b=True)
|
||||
squared_maha = np.sum(z * z, axis=0)
|
||||
return squared_maha
|
||||
if metric == 'gaussian':
|
||||
return np.sum(d * d, axis=1)
|
||||
elif metric == 'maha':
|
||||
cholesky_factor = np.linalg.cholesky(covariance)
|
||||
z = scipy.linalg.solve_triangular(
|
||||
cholesky_factor, d.T, lower=True, check_finite=False,
|
||||
overwrite_b=True)
|
||||
squared_maha = np.sum(z * z, axis=0)
|
||||
return squared_maha
|
||||
else:
|
||||
raise ValueError('invalid distance metric')
|
||||
|
||||
|
|
|
@ -14,7 +14,9 @@ def parse_model_cfg(path):
|
|||
else:
|
||||
key, value = line.split("=")
|
||||
value = value.strip()
|
||||
module_defs[-1][key.rstrip()] = value.strip()
|
||||
if value[0] == '$':
|
||||
value = module_defs[0].get(value.strip('$'), None)
|
||||
module_defs[-1][key.rstrip()] = value
|
||||
|
||||
return module_defs
|
||||
|
||||
|
|
Loading…
Reference in a new issue