setup
This commit is contained in:
commit
28514de87b
27 changed files with 4957 additions and 0 deletions
108
.gitignore
vendored
Normal file
108
.gitignore
vendored
Normal file
|
@ -0,0 +1,108 @@
|
|||
weights/
|
||||
data/
|
||||
tmp/
|
||||
external/
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
1
README.md
Normal file
1
README.md
Normal file
|
@ -0,0 +1 @@
|
|||
# Towards-Realtime-MOT
|
22
cfg/ccmcpe.json
Normal file
22
cfg/ccmcpe.json
Normal file
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"train":
|
||||
{
|
||||
"mot17":"./data/mot17.train",
|
||||
"caltech":"./data/caltech.train",
|
||||
"citypersons":"./data/citypersons.train",
|
||||
"cuhksysu":"./data/cuhksysu.train",
|
||||
"prw":"./data/prw.train",
|
||||
"eth":"./data/eth.train"
|
||||
},
|
||||
"test_emb":
|
||||
{
|
||||
"caltech":"./data/caltech.val",
|
||||
"cuhksysu":"./data/cuhksysu.val",
|
||||
"prw":"./data/prw.val"
|
||||
},
|
||||
|
||||
"test":
|
||||
{
|
||||
"mot19.train":"./data/mot19.train"
|
||||
}
|
||||
}
|
833
cfg/yolov3.cfg
Executable file
833
cfg/yolov3.cfg
Executable file
|
@ -0,0 +1,833 @@
|
|||
[net]
|
||||
# Testing
|
||||
#batch=1
|
||||
#subdivisions=1
|
||||
# Training
|
||||
batch=16
|
||||
subdivisions=1
|
||||
width=608
|
||||
height=1088
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 500200
|
||||
policy=steps
|
||||
steps=400000,450000
|
||||
scales=.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
######################
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=24
|
||||
activation=linear
|
||||
|
||||
######### embedding ###########
|
||||
[route]
|
||||
layers = -3
|
||||
|
||||
[convolutional]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
layers = -3, -1
|
||||
###############################
|
||||
|
||||
|
||||
[yolo]
|
||||
mask = 8,9,10,11
|
||||
anchors = 8,24, 11, 34, 16,48, 23,68, 32,96, 45,135, 64,192, 90,271, 128,384, 180,540, 256,640, 512,640
|
||||
classes=1
|
||||
num=12
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
||||
|
||||
|
||||
[route]
|
||||
layers = -7
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[upsample]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers = -1, 61
|
||||
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=24
|
||||
activation=linear
|
||||
|
||||
######### embedding ###########
|
||||
[route]
|
||||
layers = -3
|
||||
|
||||
[convolutional]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
layers = -3, -1
|
||||
###############################
|
||||
|
||||
[yolo]
|
||||
mask = 4,5,6,7
|
||||
anchors = 8,24, 11, 34, 16,48, 23,68, 32,96, 45,135, 64,192, 90,271, 128,384, 180,540, 256,640, 512,640
|
||||
classes=1
|
||||
num=12
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
||||
|
||||
|
||||
|
||||
[route]
|
||||
layers = -7
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[upsample]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers = -1, 36
|
||||
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=24
|
||||
activation=linear
|
||||
|
||||
|
||||
|
||||
######### embedding ###########
|
||||
[route]
|
||||
layers = -3
|
||||
|
||||
[convolutional]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
layers = -3, -1
|
||||
###############################
|
||||
|
||||
[yolo]
|
||||
mask = 0,1,2,3
|
||||
anchors = 8,24, 11,34, 16,48, 23,68, 32,96, 45,135, 64,192, 90,271, 128,384, 180,540, 256,640, 512,640
|
||||
classes=1
|
||||
num=12
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
98
extract_ped_per_frame.py
Normal file
98
extract_ped_per_frame.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
import argparse
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from sklearn import metrics
|
||||
from scipy import interpolate
|
||||
import torch.nn.functional as F
|
||||
from models import *
|
||||
from utils.utils import *
|
||||
from torchvision.transforms import transforms as T
|
||||
from utils.datasets import LoadImages, JointDataset, collate_fn
|
||||
|
||||
def extract_ped_per_frame(
|
||||
cfg,
|
||||
input_root,
|
||||
output_root,
|
||||
weights,
|
||||
batch_size=16,
|
||||
img_size=416,
|
||||
iou_thres=0.5,
|
||||
conf_thres=0.3,
|
||||
nms_thres=0.45,
|
||||
print_interval=40,
|
||||
nID=14455,
|
||||
):
|
||||
mkdir_if_missing(output_root)
|
||||
|
||||
# Initialize model
|
||||
model = Darknet(cfg, img_size, nID)
|
||||
|
||||
# Load weights
|
||||
if weights.endswith('.pt'): # pytorch format
|
||||
model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False)
|
||||
else: # darknet format
|
||||
load_darknet_weights(model, weights)
|
||||
|
||||
model = torch.nn.DataParallel(model)
|
||||
model.cuda().eval()
|
||||
|
||||
vlist = os.listdir(input_root)
|
||||
vlist = [osp.join(input_root, v, 'img1') for v in vlist]
|
||||
|
||||
for vpath in vlist:
|
||||
vroot = osp.join('/',*vpath.split('/')[:-1])
|
||||
out_vroot = vroot.replace(input_root, output_root)
|
||||
mkdir_if_missing(out_vroot)
|
||||
dataloader = LoadImages(vpath, img_size)
|
||||
for frame_id, (frame_path, frame, frame_ori) in enumerate(dataloader):
|
||||
frame_ground_id = frame_path.split('/')[-1].split('.')[0]
|
||||
if frame_id % 20 == 0:
|
||||
print('Processing frame {} of video {}'.format(frame_id, frame_path))
|
||||
blob = torch.from_numpy(frame).cuda().unsqueeze(0)
|
||||
pred = model(blob)
|
||||
pred = pred[pred[:,:,4] > conf_thres]
|
||||
if len(pred) > 0:
|
||||
dets = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres)[0].cpu()
|
||||
scale_coords(img_size, dets[:, :4], frame_ori.shape).round()
|
||||
frame_dir = osp.join(out_vroot, frame_ground_id)
|
||||
mkdir_if_missing(frame_dir)
|
||||
dets = dets[:, :5]
|
||||
|
||||
for ped_id, det in enumerate(dets):
|
||||
box = det[:4].int()
|
||||
conf = det[4]
|
||||
ped = frame_ori[box[1]:box[3], box[0]:box[2]]
|
||||
ped_path = osp.join(frame_dir, ('{:04d}_'+ '{:d}_'*4 + '{:.2f}.jpg').format(ped_id, *box, conf))
|
||||
cv2.imwrite(ped_path, ped)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(prog='test.py')
|
||||
parser.add_argument('--batch-size', type=int, default=40, help='size of each image batch')
|
||||
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
|
||||
parser.add_argument('--weights', type=str, default='weights/mot_64/latest.pt', help='path to weights file')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.3, help='iou threshold required to qualify as detected')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
|
||||
parser.add_argument('--nms-thres', type=float, default=0.3, help='iou threshold for non-maximum suppression')
|
||||
parser.add_argument('--img-size', type=int, default=(1088, 608), help='size of each image dimension')
|
||||
parser.add_argument('--print-interval', type=int, default=10, help='size of each image dimension')
|
||||
parser.add_argument('--input-root', type=str, default='/home/wangzd/datasets/youtube/data/0002/frame', help='path to input frames')
|
||||
parser.add_argument('--output-root', type=str, default='/home/wangzd/datasets/youtube/data/0002/ped_per_frame', help='path to output frames')
|
||||
opt = parser.parse_args()
|
||||
print(opt, end='\n\n')
|
||||
|
||||
with torch.no_grad():
|
||||
extract_ped_per_frame(
|
||||
opt.cfg,
|
||||
opt.input_root,
|
||||
opt.output_root,
|
||||
opt.weights,
|
||||
opt.batch_size,
|
||||
opt.img_size,
|
||||
opt.iou_thres,
|
||||
opt.conf_thres,
|
||||
opt.nms_thres,
|
||||
opt.print_interval,
|
||||
)
|
||||
|
379
models.py
Normal file
379
models.py
Normal file
|
@ -0,0 +1,379 @@
|
|||
import os
|
||||
from collections import defaultdict,OrderedDict
|
||||
|
||||
import torch.nn as nn
|
||||
|
||||
from utils.parse_config import *
|
||||
from utils.utils import *
|
||||
from utils.syncbn import SyncBN
|
||||
import time
|
||||
import math
|
||||
|
||||
ONNX_EXPORT = False
|
||||
|
||||
batch_norm=SyncBN #nn.BatchNorm2d
|
||||
|
||||
def create_modules(module_defs):
|
||||
"""
|
||||
Constructs module list of layer blocks from module configuration in module_defs
|
||||
"""
|
||||
hyperparams = module_defs.pop(0)
|
||||
output_filters = [int(hyperparams['channels'])]
|
||||
module_list = nn.ModuleList()
|
||||
yolo_layer_count = 0
|
||||
for i, module_def in enumerate(module_defs):
|
||||
modules = nn.Sequential()
|
||||
|
||||
if module_def['type'] == 'convolutional':
|
||||
bn = int(module_def['batch_normalize'])
|
||||
filters = int(module_def['filters'])
|
||||
kernel_size = int(module_def['size'])
|
||||
pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
|
||||
modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],
|
||||
out_channels=filters,
|
||||
kernel_size=kernel_size,
|
||||
stride=int(module_def['stride']),
|
||||
padding=pad,
|
||||
bias=not bn))
|
||||
if bn:
|
||||
modules.add_module('batch_norm_%d' % i, batch_norm(filters))
|
||||
if module_def['activation'] == 'leaky':
|
||||
modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))
|
||||
|
||||
elif module_def['type'] == 'maxpool':
|
||||
kernel_size = int(module_def['size'])
|
||||
stride = int(module_def['stride'])
|
||||
if kernel_size == 2 and stride == 1:
|
||||
modules.add_module('_debug_padding_%d' % i, nn.ZeroPad2d((0, 1, 0, 1)))
|
||||
maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
|
||||
modules.add_module('maxpool_%d' % i, maxpool)
|
||||
|
||||
elif module_def['type'] == 'upsample':
|
||||
# upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest') # WARNING: deprecated
|
||||
upsample = Upsample(scale_factor=int(module_def['stride']))
|
||||
modules.add_module('upsample_%d' % i, upsample)
|
||||
|
||||
elif module_def['type'] == 'route':
|
||||
layers = [int(x) for x in module_def['layers'].split(',')]
|
||||
filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers])
|
||||
modules.add_module('route_%d' % i, EmptyLayer())
|
||||
|
||||
elif module_def['type'] == 'shortcut':
|
||||
filters = output_filters[int(module_def['from'])]
|
||||
modules.add_module('shortcut_%d' % i, EmptyLayer())
|
||||
|
||||
elif module_def['type'] == 'yolo':
|
||||
anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
|
||||
# Extract anchors
|
||||
anchors = [float(x) for x in module_def['anchors'].split(',')]
|
||||
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
|
||||
anchors = [anchors[i] for i in anchor_idxs]
|
||||
nC = int(module_def['classes']) # number of classes
|
||||
img_size = (int(hyperparams['width']),int(hyperparams['height']))
|
||||
# Define detection layer
|
||||
yolo_layer = YOLOLayer(anchors, nC, hyperparams['nID'], img_size, yolo_layer_count, cfg=hyperparams['cfg'])
|
||||
modules.add_module('yolo_%d' % i, yolo_layer)
|
||||
yolo_layer_count += 1
|
||||
|
||||
# Register module list and number of output filters
|
||||
module_list.append(modules)
|
||||
output_filters.append(filters)
|
||||
|
||||
return hyperparams, module_list
|
||||
|
||||
|
||||
class EmptyLayer(nn.Module):
|
||||
"""Placeholder for 'route' and 'shortcut' layers"""
|
||||
|
||||
def __init__(self):
|
||||
super(EmptyLayer, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return x
|
||||
|
||||
|
||||
class Upsample(nn.Module):
|
||||
# Custom Upsample layer (nn.Upsample gives deprecated warning message)
|
||||
|
||||
def __init__(self, scale_factor=1, mode='nearest'):
|
||||
super(Upsample, self).__init__()
|
||||
self.scale_factor = scale_factor
|
||||
self.mode = mode
|
||||
|
||||
def forward(self, x):
|
||||
return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
|
||||
|
||||
|
||||
class YOLOLayer(nn.Module):
|
||||
def __init__(self, anchors, nC, nID, img_size, yolo_layer, cfg):
|
||||
super(YOLOLayer, self).__init__()
|
||||
self.layer = yolo_layer
|
||||
nA = len(anchors)
|
||||
self.anchors = torch.FloatTensor(anchors)
|
||||
self.nA = nA # number of anchors (3)
|
||||
self.nC = nC # number of classes (80)
|
||||
self.nID = nID # number of identities
|
||||
self.img_size = 0
|
||||
self.emb_dim = 512
|
||||
|
||||
self.SmoothL1Loss = nn.SmoothL1Loss()
|
||||
self.SoftmaxLoss = nn.CrossEntropyLoss(ignore_index=-1)
|
||||
self.CrossEntropyLoss = nn.CrossEntropyLoss()
|
||||
self.IDLoss = nn.CrossEntropyLoss(ignore_index=-1)
|
||||
self.s_c = nn.Parameter(1*torch.ones(1)) # -4.15
|
||||
self.s_r = nn.Parameter(1*torch.ones(1)) # -4.85
|
||||
self.s_id = nn.Parameter(1*torch.ones(1)) # -2.3
|
||||
self.emb_scale = math.sqrt(2) * math.log(self.nID-1)
|
||||
|
||||
|
||||
def forward(self, p_cat, img_size, targets=None, classifier=None, test_emb=False):
|
||||
p, p_emb = p_cat[:, :24, ...], p_cat[:, 24:, ...]
|
||||
nB, nGh, nGw = p.shape[0], p.shape[-2], p.shape[-1]
|
||||
|
||||
if self.img_size != img_size:
|
||||
create_grids(self, img_size, nGh, nGw)
|
||||
|
||||
if p.is_cuda:
|
||||
self.grid_xy = self.grid_xy.cuda()
|
||||
self.anchor_wh = self.anchor_wh.cuda()
|
||||
|
||||
# p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 80) # (bs, anchors, grid, grid, classes + xywh)
|
||||
p = p.view(nB, self.nA, self.nC + 5, nGh, nGw).permute(0, 1, 3, 4, 2).contiguous() # prediction
|
||||
|
||||
p_emb = p_emb.permute(0,2,3,1).contiguous()
|
||||
p_box = p[..., :4]
|
||||
p_conf = p[..., 4:6].permute(0, 4, 1, 2, 3) # Conf
|
||||
|
||||
# Training
|
||||
if targets is not None:
|
||||
if test_emb:
|
||||
tconf, tbox, tids = build_targets_max(targets, self.anchor_vec.cuda(), self.nA, self.nC, nGh, nGw)
|
||||
else:
|
||||
tconf, tbox, tids = build_targets_thres(targets, self.anchor_vec.cuda(), self.nA, self.nC, nGh, nGw)
|
||||
tconf, tbox, tids = tconf.cuda(), tbox.cuda(), tids.cuda()
|
||||
mask = tconf > 0
|
||||
|
||||
# Compute losses
|
||||
nT = sum([len(x) for x in targets]) # number of targets
|
||||
nM = mask.sum().float() # number of anchors (assigned to targets)
|
||||
nP = torch.ones_like(mask).sum().float()
|
||||
if nM > 0:
|
||||
lbox = self.SmoothL1Loss(p_box[mask], tbox[mask])
|
||||
else:
|
||||
FT = torch.cuda.FloatTensor if p_conf.is_cuda else torch.FloatTensor
|
||||
lbox, lconf = FT([0]), FT([0])
|
||||
lconf = self.SoftmaxLoss(p_conf, tconf)
|
||||
lid = torch.Tensor(1).fill_(0).squeeze().cuda()
|
||||
emb_mask,_ = mask.max(1)
|
||||
|
||||
# For convenience we use max(1) to decide the id, TODO: more reseanable strategy
|
||||
tids,_ = tids.max(1)
|
||||
tids = tids[emb_mask]
|
||||
embedding = p_emb[emb_mask].contiguous()
|
||||
embedding = self.emb_scale * F.normalize(embedding)
|
||||
nI = emb_mask.sum().float()
|
||||
|
||||
if test_emb:
|
||||
if np.prod(embedding.shape)==0 or np.prod(tids.shape) == 0:
|
||||
return torch.zeros(0, self. emb_dim+1).cuda()
|
||||
emb_and_gt = torch.cat([embedding, tids.float()], dim=1)
|
||||
return emb_and_gt
|
||||
|
||||
if len(embedding) > 1:
|
||||
logits = classifier(embedding).contiguous()
|
||||
lid = self.IDLoss(logits, tids.squeeze())
|
||||
|
||||
# Sum loss components
|
||||
loss = torch.exp(-self.s_r)*lbox + torch.exp(-self.s_c)*lconf + torch.exp(-self.s_id)*lid + \
|
||||
(self.s_r + self.s_c + self.s_id)
|
||||
loss *= 0.5
|
||||
|
||||
return loss, loss.item(), lbox.item(), lconf.item(), lid.item(), nT
|
||||
|
||||
else:
|
||||
p_conf = torch.softmax(p_conf, dim=1)[:,1,...].unsqueeze(-1)
|
||||
p_emb = p_emb.unsqueeze(1).repeat(1,self.nA,1,1,1).contiguous()
|
||||
p_cls = torch.zeros(nB,self.nA,nGh,nGw,1).cuda() # Temp
|
||||
p = torch.cat([p_box, p_conf, p_cls, p_emb], dim=-1)
|
||||
p[..., :4] = decode_delta_map(p[..., :4], self.anchor_vec.to(p))
|
||||
p[..., :4] *= self.stride
|
||||
|
||||
# reshape from [nB, nA, nGh, nGw, 5 + nD] to [nB, -1, 5+nD]
|
||||
return p.view(nB, -1, p.shape[-1])
|
||||
|
||||
|
||||
class Darknet(nn.Module):
|
||||
"""YOLOv3 object detection model"""
|
||||
|
||||
def __init__(self, cfg_path, img_size=(1088, 608), nID=1591, test_emb=False):
|
||||
super(Darknet, self).__init__()
|
||||
|
||||
self.module_defs = parse_model_cfg(cfg_path)
|
||||
self.module_defs[0]['cfg'] = cfg_path
|
||||
self.module_defs[0]['nID'] = nID
|
||||
self.hyperparams, self.module_list = create_modules(self.module_defs)
|
||||
self.img_size = img_size
|
||||
self.loss_names = ['loss', 'box', 'conf', 'id', 'nT']
|
||||
self.losses = OrderedDict()
|
||||
for ln in self.loss_names:
|
||||
self.losses[ln] = 0
|
||||
self.emb_dim = 512
|
||||
self.classifier = nn.Linear(self.emb_dim, nID)
|
||||
self.test_emb=test_emb
|
||||
|
||||
|
||||
def forward(self, x, targets=None, targets_len=None):
|
||||
self.losses = OrderedDict()
|
||||
for ln in self.loss_names:
|
||||
self.losses[ln] = 0
|
||||
is_training = (targets is not None) and (not self.test_emb)
|
||||
#img_size = x.shape[-1]
|
||||
layer_outputs = []
|
||||
output = []
|
||||
|
||||
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
|
||||
mtype = module_def['type']
|
||||
if mtype in ['convolutional', 'upsample', 'maxpool']:
|
||||
x = module(x)
|
||||
elif mtype == 'route':
|
||||
layer_i = [int(x) for x in module_def['layers'].split(',')]
|
||||
if len(layer_i) == 1:
|
||||
x = layer_outputs[layer_i[0]]
|
||||
else:
|
||||
x = torch.cat([layer_outputs[i] for i in layer_i], 1)
|
||||
elif mtype == 'shortcut':
|
||||
layer_i = int(module_def['from'])
|
||||
x = layer_outputs[-1] + layer_outputs[layer_i]
|
||||
elif mtype == 'yolo':
|
||||
if is_training: # get loss
|
||||
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
|
||||
x, *losses = module[0](x, self.img_size, targets, self.classifier)
|
||||
for name, loss in zip(self.loss_names, losses):
|
||||
self.losses[name] += loss
|
||||
elif self.test_emb:
|
||||
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
|
||||
x = module[0](x, self.img_size, targets, self.classifier, self.test_emb)
|
||||
else: # get detections
|
||||
x = module[0](x, self.img_size)
|
||||
output.append(x)
|
||||
layer_outputs.append(x)
|
||||
|
||||
if is_training:
|
||||
self.losses['nT'] /= 3
|
||||
output = [o.squeeze() for o in output]
|
||||
return sum(output), torch.Tensor(list(self.losses.values())).cuda()
|
||||
elif self.test_emb:
|
||||
return torch.cat(output, 0)
|
||||
return torch.cat(output, 1)
|
||||
|
||||
|
||||
def create_grids(self, img_size, nGh, nGw):
|
||||
self.stride = img_size[0]/nGw
|
||||
assert self.stride == img_size[1] / nGh
|
||||
|
||||
# build xy offsets
|
||||
grid_x = torch.arange(nGw).repeat((nGh, 1)).view((1, 1, nGh, nGw)).float()
|
||||
grid_y = torch.arange(nGh).repeat((nGw, 1)).transpose(0,1).view((1, 1, nGh, nGw)).float()
|
||||
#grid_y = grid_x.permute(0, 1, 3, 2)
|
||||
self.grid_xy = torch.stack((grid_x, grid_y), 4)
|
||||
|
||||
# build wh gains
|
||||
self.anchor_vec = self.anchors / self.stride
|
||||
self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2)
|
||||
|
||||
|
||||
def load_darknet_weights(self, weights, cutoff=-1):
|
||||
# Parses and loads the weights stored in 'weights'
|
||||
# cutoff: save layers between 0 and cutoff (if cutoff = -1 all are saved)
|
||||
weights_file = weights.split(os.sep)[-1]
|
||||
|
||||
# Try to download weights if not available locally
|
||||
if not os.path.isfile(weights):
|
||||
try:
|
||||
os.system('wget https://pjreddie.com/media/files/' + weights_file + ' -O ' + weights)
|
||||
except IOError:
|
||||
print(weights + ' not found')
|
||||
|
||||
# Establish cutoffs
|
||||
if weights_file == 'darknet53.conv.74':
|
||||
cutoff = 75
|
||||
elif weights_file == 'yolov3-tiny.conv.15':
|
||||
cutoff = 15
|
||||
|
||||
# Open the weights file
|
||||
fp = open(weights, 'rb')
|
||||
header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values
|
||||
|
||||
# Needed to write header when saving weights
|
||||
self.header_info = header
|
||||
|
||||
self.seen = header[3] # number of images seen during training
|
||||
weights = np.fromfile(fp, dtype=np.float32) # The rest are weights
|
||||
fp.close()
|
||||
|
||||
ptr = 0
|
||||
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
|
||||
if module_def['type'] == 'convolutional':
|
||||
conv_layer = module[0]
|
||||
if module_def['batch_normalize']:
|
||||
# Load BN bias, weights, running mean and running variance
|
||||
bn_layer = module[1]
|
||||
num_b = bn_layer.bias.numel() # Number of biases
|
||||
# Bias
|
||||
bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
|
||||
bn_layer.bias.data.copy_(bn_b)
|
||||
ptr += num_b
|
||||
# Weight
|
||||
bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
|
||||
bn_layer.weight.data.copy_(bn_w)
|
||||
ptr += num_b
|
||||
# Running Mean
|
||||
bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
|
||||
bn_layer.running_mean.data.copy_(bn_rm)
|
||||
ptr += num_b
|
||||
# Running Var
|
||||
bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
|
||||
bn_layer.running_var.data.copy_(bn_rv)
|
||||
ptr += num_b
|
||||
else:
|
||||
# Load conv. bias
|
||||
num_b = conv_layer.bias.numel()
|
||||
conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
|
||||
conv_layer.bias.data.copy_(conv_b)
|
||||
ptr += num_b
|
||||
# Load conv. weights
|
||||
num_w = conv_layer.weight.numel()
|
||||
conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
|
||||
conv_layer.weight.data.copy_(conv_w)
|
||||
ptr += num_w
|
||||
|
||||
|
||||
"""
|
||||
@:param path - path of the new weights file
|
||||
@:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
|
||||
"""
|
||||
|
||||
|
||||
def save_weights(self, path, cutoff=-1):
|
||||
fp = open(path, 'wb')
|
||||
self.header_info[3] = self.seen # number of images seen during training
|
||||
self.header_info.tofile(fp)
|
||||
|
||||
# Iterate through layers
|
||||
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
|
||||
if module_def['type'] == 'convolutional':
|
||||
conv_layer = module[0]
|
||||
# If batch norm, load bn first
|
||||
if module_def['batch_normalize']:
|
||||
bn_layer = module[1]
|
||||
bn_layer.bias.data.cpu().numpy().tofile(fp)
|
||||
bn_layer.weight.data.cpu().numpy().tofile(fp)
|
||||
bn_layer.running_mean.data.cpu().numpy().tofile(fp)
|
||||
bn_layer.running_var.data.cpu().numpy().tofile(fp)
|
||||
# Load conv bias
|
||||
else:
|
||||
conv_layer.bias.data.cpu().numpy().tofile(fp)
|
||||
# Load conv weights
|
||||
conv_layer.weight.data.cpu().numpy().tofile(fp)
|
||||
|
||||
fp.close()
|
264
test.py
Normal file
264
test.py
Normal file
|
@ -0,0 +1,264 @@
|
|||
import argparse
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from sklearn import metrics
|
||||
from scipy import interpolate
|
||||
import torch.nn.functional as F
|
||||
from models import *
|
||||
from utils.utils import *
|
||||
from torchvision.transforms import transforms as T
|
||||
from utils.datasets import LoadImagesAndLabels, JointDataset, collate_fn
|
||||
|
||||
def test(
|
||||
cfg,
|
||||
data_cfg,
|
||||
weights,
|
||||
batch_size=16,
|
||||
img_size=416,
|
||||
iou_thres=0.5,
|
||||
conf_thres=0.3,
|
||||
nms_thres=0.45,
|
||||
print_interval=40,
|
||||
nID=14455,
|
||||
):
|
||||
|
||||
# Configure run
|
||||
f = open(data_cfg)
|
||||
data_cfg_dict = json.load(f)
|
||||
f.close()
|
||||
#nC = int(data_cfg_dict['classes']) # number of classes (80 for COCO)
|
||||
nC = 1
|
||||
test_path = data_cfg_dict['test']
|
||||
|
||||
# Initialize model
|
||||
model = Darknet(cfg, img_size, nID)
|
||||
|
||||
# Load weights
|
||||
if weights.endswith('.pt'): # pytorch format
|
||||
model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False)
|
||||
else: # darknet format
|
||||
load_darknet_weights(model, weights)
|
||||
|
||||
model = torch.nn.DataParallel(model)
|
||||
model.cuda().eval()
|
||||
|
||||
# Get dataloader
|
||||
transforms = T.Compose([T.ToTensor()])
|
||||
dataset = JointDataset(test_path, img_size, augment=False, transforms=transforms)
|
||||
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False,
|
||||
num_workers=8, drop_last=False, collate_fn=collate_fn)
|
||||
|
||||
mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0
|
||||
print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
|
||||
outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \
|
||||
[], [], [], [], [], [], [], [], []
|
||||
AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC)
|
||||
coco91class = coco80_to_coco91_class()
|
||||
for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader):
|
||||
t = time.time()
|
||||
output = model(imgs.cuda())
|
||||
output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres)
|
||||
for i, o in enumerate(output):
|
||||
if o is not None:
|
||||
output[i] = o[:, :6]
|
||||
|
||||
# Compute average precision for each sample
|
||||
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
|
||||
for si, (labels, detections) in enumerate(zip(targets, output)):
|
||||
seen += 1
|
||||
|
||||
if detections is None:
|
||||
# If there are labels but no detections mark as zero AP
|
||||
if labels.size(0) != 0:
|
||||
mAPs.append(0), mR.append(0), mP.append(0)
|
||||
continue
|
||||
|
||||
# Get detections sorted by decreasing confidence scores
|
||||
detections = detections.cpu().numpy()
|
||||
detections = detections[np.argsort(-detections[:, 4])]
|
||||
|
||||
|
||||
# If no labels add number of detections as incorrect
|
||||
correct = []
|
||||
if labels.size(0) == 0:
|
||||
# correct.extend([0 for _ in range(len(detections))])
|
||||
mAPs.append(0), mR.append(0), mP.append(0)
|
||||
continue
|
||||
else:
|
||||
target_cls = labels[:, 0]
|
||||
|
||||
# Extract target boxes as (x1, y1, x2, y2)
|
||||
target_boxes = xywh2xyxy(labels[:, 2:6])
|
||||
target_boxes[:, 0] *= img_size[0]
|
||||
target_boxes[:, 2] *= img_size[0]
|
||||
target_boxes[:, 1] *= img_size[1]
|
||||
target_boxes[:, 3] *= img_size[1]
|
||||
|
||||
detected = []
|
||||
for *pred_bbox, conf, obj_conf in detections:
|
||||
obj_pred = 0
|
||||
pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
|
||||
# Compute iou with target boxes
|
||||
iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0]
|
||||
# Extract index of largest overlap
|
||||
best_i = np.argmax(iou)
|
||||
# If overlap exceeds threshold and classification is correct mark as correct
|
||||
if iou[best_i] > iou_thres and obj_pred == labels[best_i, 0] and best_i not in detected:
|
||||
correct.append(1)
|
||||
detected.append(best_i)
|
||||
else:
|
||||
correct.append(0)
|
||||
|
||||
# Compute Average Precision (AP) per class
|
||||
AP, AP_class, R, P = ap_per_class(tp=correct,
|
||||
conf=detections[:, 4],
|
||||
pred_cls=np.zeros_like(detections[:, 5]), # detections[:, 6]
|
||||
target_cls=target_cls)
|
||||
|
||||
# Accumulate AP per class
|
||||
AP_accum_count += np.bincount(AP_class, minlength=nC)
|
||||
AP_accum += np.bincount(AP_class, minlength=nC, weights=AP)
|
||||
|
||||
# Compute mean AP across all classes in this image, and append to image list
|
||||
mAPs.append(AP.mean())
|
||||
mR.append(R.mean())
|
||||
mP.append(P.mean())
|
||||
|
||||
# Means of all images
|
||||
mean_mAP = np.sum(mAPs) / ( AP_accum_count + 1E-16)
|
||||
mean_R = np.sum(mR) / ( AP_accum_count + 1E-16)
|
||||
mean_P = np.sum(mP) / (AP_accum_count + 1E-16)
|
||||
|
||||
if batch_i % print_interval==0:
|
||||
# Print image mAP and running mean mAP
|
||||
print(('%11s%11s' + '%11.3g' * 4 + 's') %
|
||||
(seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t))
|
||||
# Print mAP per class
|
||||
print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
|
||||
|
||||
print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16)))
|
||||
|
||||
# Return mAP
|
||||
return mean_mAP, mean_R, mean_P
|
||||
|
||||
|
||||
def test_emb(
|
||||
cfg,
|
||||
data_cfg,
|
||||
weights,
|
||||
batch_size=16,
|
||||
img_size=416,
|
||||
iou_thres=0.5,
|
||||
conf_thres=0.3,
|
||||
nms_thres=0.45,
|
||||
print_interval=40,
|
||||
nID=14455,
|
||||
):
|
||||
|
||||
# Configure run
|
||||
f = open(data_cfg)
|
||||
data_cfg_dict = json.load(f)
|
||||
f.close()
|
||||
test_paths = data_cfg_dict['test_emb']
|
||||
|
||||
# Initialize model
|
||||
model = Darknet(cfg, img_size, nID, test_emb=True)
|
||||
|
||||
# Load weights
|
||||
if weights.endswith('.pt'): # pytorch format
|
||||
model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False)
|
||||
else: # darknet format
|
||||
load_darknet_weights(model, weights)
|
||||
|
||||
model = torch.nn.DataParallel(model)
|
||||
model.cuda().eval()
|
||||
|
||||
# Get dataloader
|
||||
transforms = T.Compose([T.ToTensor()])
|
||||
dataset = JointDataset(test_paths, img_size, augment=False, transforms=transforms)
|
||||
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False,
|
||||
num_workers=8, drop_last=False, collate_fn=collate_fn)
|
||||
embedding, id_labels = [], []
|
||||
print('Extracting pedestrain features...')
|
||||
for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader):
|
||||
t = time.time()
|
||||
output = model(imgs.cuda(), targets.cuda(), targets_len.cuda()).squeeze()
|
||||
|
||||
for out in output:
|
||||
feat, label = out[:-1], out[-1].long()
|
||||
if label != -1:
|
||||
embedding.append(feat)
|
||||
id_labels.append(label)
|
||||
|
||||
if batch_i % print_interval==0:
|
||||
print('Extracting {}/{}, # of instances {}, time {:.2f} sec.'.format(batch_i, len(dataloader), len(id_labels), time.time() - t))
|
||||
|
||||
print('Computing pairwise similairity...')
|
||||
if len(embedding) <1 :
|
||||
return None
|
||||
embedding = torch.stack(embedding, dim=0).cuda()
|
||||
id_labels = torch.LongTensor(id_labels)
|
||||
n = len(id_labels)
|
||||
print(n, len(embedding))
|
||||
assert len(embedding) == n
|
||||
|
||||
embedding = F.normalize(embedding, dim=1)
|
||||
pdist = torch.mm(embedding, embedding.t()).cpu().numpy()
|
||||
gt = id_labels.expand(n,n).eq(id_labels.expand(n,n).t()).numpy()
|
||||
|
||||
up_triangle = np.where(np.triu(pdist)- np.eye(n)*pdist !=0)
|
||||
pdist = pdist[up_triangle]
|
||||
gt = gt[up_triangle]
|
||||
|
||||
far_levels = [ 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
|
||||
far,tar,threshold = metrics.roc_curve(gt, pdist)
|
||||
interp = interpolate.interp1d(far, tar)
|
||||
tar_at_far = [interp(x) for x in far_levels]
|
||||
for f,fa in enumerate(far_levels):
|
||||
print('TPR@FAR={:.7f}: {:.4f}'.format(fa, tar_at_far[f]))
|
||||
return tar_at_far
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(prog='test.py')
|
||||
parser.add_argument('--batch-size', type=int, default=40, help='size of each image batch')
|
||||
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
|
||||
parser.add_argument('--data-cfg', type=str, default='cfg/ccmcpe.json', help='data config')
|
||||
parser.add_argument('--weights', type=str, default='weights/latest.pt', help='path to weights file')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
|
||||
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
|
||||
parser.add_argument('--img-size', type=int, default=(1088, 608), help='size of each image dimension')
|
||||
parser.add_argument('--print-interval', type=int, default=10, help='size of each image dimension')
|
||||
parser.add_argument('--test-emb', action='store_true', help='test embedding')
|
||||
opt = parser.parse_args()
|
||||
print(opt, end='\n\n')
|
||||
|
||||
with torch.no_grad():
|
||||
if opt.test_emb:
|
||||
res = test_emb(
|
||||
opt.cfg,
|
||||
opt.data_cfg,
|
||||
opt.weights,
|
||||
opt.batch_size,
|
||||
opt.img_size,
|
||||
opt.iou_thres,
|
||||
opt.conf_thres,
|
||||
opt.nms_thres,
|
||||
opt.print_interval,
|
||||
)
|
||||
else:
|
||||
mAP = test(
|
||||
opt.cfg,
|
||||
opt.data_cfg,
|
||||
opt.weights,
|
||||
opt.batch_size,
|
||||
opt.img_size,
|
||||
opt.iou_thres,
|
||||
opt.conf_thres,
|
||||
opt.nms_thres,
|
||||
opt.print_interval,
|
||||
)
|
||||
|
170
track.py
Normal file
170
track.py
Normal file
|
@ -0,0 +1,170 @@
|
|||
import os
|
||||
import os.path as osp
|
||||
import cv2
|
||||
import logging
|
||||
import argparse
|
||||
import motmetrics as mm
|
||||
|
||||
from tracker.mot_tracker_kalman import AETracker
|
||||
from utils import visualization as vis
|
||||
from utils.log import logger
|
||||
from utils.timer import Timer
|
||||
from utils.evaluation import Evaluator
|
||||
import utils.datasets as datasets
|
||||
import torch
|
||||
|
||||
def mkdirs(path):
|
||||
if os.path.exists(path):
|
||||
return
|
||||
os.makedirs(path)
|
||||
|
||||
def write_results(filename, results, data_type):
|
||||
if data_type == 'mot':
|
||||
save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
|
||||
elif data_type == 'kitti':
|
||||
save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
|
||||
else:
|
||||
raise ValueError(data_type)
|
||||
|
||||
with open(filename, 'w') as f:
|
||||
for frame_id, tlwhs, track_ids in results:
|
||||
if data_type == 'kitti':
|
||||
frame_id -= 1
|
||||
for tlwh, track_id in zip(tlwhs, track_ids):
|
||||
if track_id < 0:
|
||||
continue
|
||||
x1, y1, w, h = tlwh
|
||||
x2, y2 = x1 + w, y1 + h
|
||||
line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
|
||||
f.write(line)
|
||||
logger.info('save results to {}'.format(filename))
|
||||
|
||||
|
||||
def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30):
|
||||
if save_dir is not None:
|
||||
mkdirs(save_dir)
|
||||
|
||||
tracker = AETracker(opt, frame_rate=frame_rate)
|
||||
timer = Timer()
|
||||
results = []
|
||||
frame_id = 0
|
||||
for path, img, img0 in dataloader:
|
||||
if frame_id % 20 == 0:
|
||||
logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1./max(1e-5, timer.average_time)))
|
||||
|
||||
# run tracking
|
||||
timer.tic()
|
||||
blob = torch.from_numpy(img).cuda().unsqueeze(0)
|
||||
online_targets = tracker.update(blob, img0)
|
||||
online_tlwhs = []
|
||||
online_ids = []
|
||||
for t in online_targets:
|
||||
tlwh = t.tlwh
|
||||
tid = t.track_id
|
||||
vertical = tlwh[2] / tlwh[3] > 1.6
|
||||
if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
|
||||
online_tlwhs.append(tlwh)
|
||||
online_ids.append(tid)
|
||||
timer.toc()
|
||||
# save results
|
||||
results.append((frame_id + 1, online_tlwhs, online_ids))
|
||||
if show_image or save_dir is not None:
|
||||
online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id,
|
||||
fps=1. / timer.average_time)
|
||||
if show_image:
|
||||
cv2.imshow('online_im', online_im)
|
||||
if save_dir is not None:
|
||||
cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im)
|
||||
frame_id += 1
|
||||
# save results
|
||||
write_results(result_filename, results, data_type)
|
||||
return frame_id
|
||||
|
||||
|
||||
def main(opt, data_root='/data/MOT16/train', det_root=None,
|
||||
seqs=('MOT16-05',), exp_name='demo', save_image=False, show_image=True):
|
||||
logger.setLevel(logging.INFO)
|
||||
result_root = os.path.join(data_root, '..', 'results', exp_name)
|
||||
mkdirs(result_root)
|
||||
data_type = 'mot'
|
||||
|
||||
# run tracking
|
||||
timer = Timer()
|
||||
accs = []
|
||||
n_frame = 0
|
||||
timer.tic()
|
||||
for seq in seqs:
|
||||
output_dir = os.path.join(data_root, '..','outputs', exp_name, seq) if save_image else None
|
||||
|
||||
logger.info('start seq: {}'.format(seq))
|
||||
dataloader = datasets.LoadImages(osp.join(data_root, seq, 'img1'), opt.img_size)
|
||||
result_filename = os.path.join(result_root, '{}.txt'.format(seq))
|
||||
meta_info = open(os.path.join(data_root, seq, 'seqinfo.ini')).read()
|
||||
frame_rate = int(meta_info[meta_info.find('frameRate')+10:meta_info.find('\nseqLength')])
|
||||
n_frame += eval_seq(opt, dataloader, data_type, result_filename,
|
||||
save_dir=output_dir, show_image=show_image, frame_rate=frame_rate)
|
||||
|
||||
# eval
|
||||
logger.info('Evaluate seq: {}'.format(seq))
|
||||
evaluator = Evaluator(data_root, seq, data_type)
|
||||
accs.append(evaluator.eval_file(result_filename))
|
||||
timer.toc()
|
||||
logger.info('Time elapsed: {}, FPS {}'.format(timer.average_time, n_frame / timer.average_time))
|
||||
|
||||
# get summary
|
||||
# metrics = ['mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall']
|
||||
metrics = mm.metrics.motchallenge_metrics
|
||||
mh = mm.metrics.create()
|
||||
summary = Evaluator.get_summary(accs, seqs, metrics)
|
||||
strsummary = mm.io.render_summary(
|
||||
summary,
|
||||
formatters=mh.formatters,
|
||||
namemap=mm.io.motchallenge_metric_names
|
||||
)
|
||||
print(strsummary)
|
||||
Evaluator.save_summary(summary, os.path.join(result_root, 'summary_{}.xlsx'.format(exp_name)))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(prog='test.py')
|
||||
parser.add_argument('--batch-size', type=int, default=8, help='size of each image batch')
|
||||
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
|
||||
parser.add_argument('--weights', type=str, default='weights/latest.pt', help='path to weights file')
|
||||
parser.add_argument('--img-size', type=int, default=(864,480), help='size of each image dimension')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
|
||||
parser.add_argument('--nms-thres', type=float, default=0.4, help='iou threshold for non-maximum suppression')
|
||||
parser.add_argument('--min-box-area', type=float, default=200, help='filter out tiny boxes')
|
||||
parser.add_argument('--pixel-mean', type=float, default=[0,0,0], nargs='+', help='pixel mean')
|
||||
parser.add_argument('--track-buffer', type=int, default=30, help='tracking buffer')
|
||||
parser.add_argument('--test-mot16', action='store_true', help='tracking buffer')
|
||||
parser.add_argument('--save-images', action='store_true', help='save tracking results')
|
||||
opt = parser.parse_args()
|
||||
print(opt, end='\n\n')
|
||||
|
||||
if not opt.test_mot16:
|
||||
seqs_str = '''CVPR19-01
|
||||
CVPR19-02
|
||||
CVPR19-03
|
||||
CVPR19-05'''
|
||||
data_root = '/home/wangzd/datasets/MOT/MOT19/train'
|
||||
else:
|
||||
seqs_str = '''MOT16-01
|
||||
MOT16-03
|
||||
MOT16-06
|
||||
MOT16-07
|
||||
MOT16-08
|
||||
MOT16-12
|
||||
MOT16-14'''
|
||||
#seqs_str = 'MOT16-14'
|
||||
data_root = '/home/wangzd/datasets/MOT/MOT16/test'
|
||||
seqs = [seq.strip() for seq in seqs_str.split()]
|
||||
|
||||
main(opt,
|
||||
data_root=data_root,
|
||||
seqs=seqs,
|
||||
exp_name='darknet53.864x480',
|
||||
show_image=False,
|
||||
save_image=opt.save_images)
|
||||
|
0
tracker/__init__.py
Normal file
0
tracker/__init__.py
Normal file
53
tracker/basetrack.py
Normal file
53
tracker/basetrack.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
import numpy as np
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class TrackState(object):
|
||||
New = 0
|
||||
Tracked = 1
|
||||
Lost = 2
|
||||
Removed = 3
|
||||
|
||||
|
||||
class BaseTrack(object):
|
||||
_count = 0
|
||||
|
||||
track_id = 0
|
||||
is_activated = False
|
||||
state = TrackState.New
|
||||
|
||||
history = OrderedDict()
|
||||
features = []
|
||||
curr_feature = None
|
||||
score = 0
|
||||
start_frame = 0
|
||||
frame_id = 0
|
||||
time_since_update = 0
|
||||
|
||||
# multi-camera
|
||||
location = (np.inf, np.inf)
|
||||
|
||||
@property
|
||||
def end_frame(self):
|
||||
return self.frame_id
|
||||
|
||||
@staticmethod
|
||||
def next_id():
|
||||
BaseTrack._count += 1
|
||||
return BaseTrack._count
|
||||
|
||||
def activate(self, *args):
|
||||
raise NotImplementedError
|
||||
|
||||
def predict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def update(self, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def mark_lost(self):
|
||||
self.state = TrackState.Lost
|
||||
|
||||
def mark_removed(self):
|
||||
self.state = TrackState.Removed
|
||||
|
181
tracker/detector.py
Normal file
181
tracker/detector.py
Normal file
|
@ -0,0 +1,181 @@
|
|||
import numpy as np
|
||||
from numba import jit
|
||||
from collections import deque
|
||||
import itertools
|
||||
import os
|
||||
import os.path as osp
|
||||
import time
|
||||
import torch
|
||||
|
||||
from lib.utils.log import logger
|
||||
from lib.tracker import matching
|
||||
from lib.utils.kalman_filter import KalmanFilter
|
||||
from lib.model.faster_rcnn.resnet import resnet_deploy
|
||||
from lib.model.utils.config import cfg
|
||||
from lib.model.rpn.bbox_transform import clip_boxes, bbox_transform_inv
|
||||
from lib.model.nms.nms_wrapper import nms
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
|
||||
|
||||
class STrack(BaseTrack):
|
||||
|
||||
def __init__(self, tlwh, score, temp_feat):
|
||||
|
||||
# wait activate
|
||||
self._tlwh = np.asarray(tlwh, dtype=np.float)
|
||||
self.is_activated = False
|
||||
self.score = score
|
||||
self.tracklet_len = 0
|
||||
self.temp_feat = temp_feat
|
||||
|
||||
def activate(self, frame_id):
|
||||
"""Start a new tracklet"""
|
||||
self.track_id = self.next_id()
|
||||
self.time_since_update = 0
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
#self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
self.start_frame = frame_id
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
self._tlwh = new_track.tlwh
|
||||
self.temp_feat = new_track.temp_feat
|
||||
self.time_since_update = 0
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
|
||||
def update(self, new_track, frame_id, update_feature=True):
|
||||
"""
|
||||
Update a matched track
|
||||
:type new_track: STrack
|
||||
:type frame_id: int
|
||||
:type update_feature: bool
|
||||
:return:
|
||||
"""
|
||||
self.frame_id = frame_id
|
||||
self.time_since_update = 0
|
||||
self.tracklet_len += 1
|
||||
|
||||
self._tlwh = new_track.tlwh
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
self.score = new_track.score
|
||||
if update_feature:
|
||||
self.temp_feat = new_track.temp_feat
|
||||
|
||||
@property
|
||||
@jit
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
return self._tlwh.copy()
|
||||
|
||||
@property
|
||||
@jit
|
||||
def tlbr(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
def tlbr_to_tlwh(tlbr):
|
||||
ret = np.asarray(tlbr).copy()
|
||||
ret[2:] -= ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
def tlwh_to_tlbr(tlwh):
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
def __repr__(self):
|
||||
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
|
||||
|
||||
|
||||
class JDTracker(object):
|
||||
def __init__(self, checksession=3, checkepoch=24, checkpoint=663, det_thresh=0.92, frame_rate=30):
|
||||
self.classes = np.asarray(['__background__', 'pedestrian'])
|
||||
|
||||
self.fasterRCNN = resnet_deploy(self.classes, 101, pretrained=False, class_agnostic=False)
|
||||
self.fasterRCNN.create_architecture()
|
||||
|
||||
input_dir = osp.join('models', 'res101', 'mot17det')
|
||||
if not os.path.exists(input_dir):
|
||||
raise Exception('There is no input directory for loading network from ' + input_dir)
|
||||
load_name = os.path.join(input_dir,
|
||||
'faster_rcnn_{}_{}_{}.pth'.format(checksession, checkepoch, checkpoint))
|
||||
print("load checkpoint %s" % (load_name))
|
||||
checkpoint = torch.load(load_name)
|
||||
self.fasterRCNN.load_state_dict(checkpoint['model'], strict=False)
|
||||
print('load model successfully!')
|
||||
self.fasterRCNN.cuda()
|
||||
self.fasterRCNN.eval()
|
||||
|
||||
self.frame_id = 0
|
||||
self.det_thresh = det_thresh
|
||||
self.buffer_size = int(frame_rate / 30.0 * cfg.TRACKING_BUFFER_SIZE)
|
||||
self.max_time_lost = self.buffer_size
|
||||
#self.fmap_buffer = deque([], maxlen=self.buffer_size)
|
||||
|
||||
def update(self, im_blob):
|
||||
self.frame_id += 1
|
||||
|
||||
'''Forward'''
|
||||
im_blob = im_blob.cuda()
|
||||
im_info = torch.Tensor([[im_blob.shape[1], im_blob.shape[2], 1, ],]).float().cuda()
|
||||
self.im_info = im_info
|
||||
boxes, temp_feat, base_feat = self.predict(im_blob, im_info)
|
||||
|
||||
'''Detections'''
|
||||
detections = [STrack(STrack.tlbr_to_tlwh((t, l, b, r)), s, f) for (t, l, b, r, s), f in zip(boxes, temp_feat)]
|
||||
|
||||
|
||||
return detections
|
||||
|
||||
def predict(self, im_blob, im_info):
|
||||
im_blob = im_blob.permute(0,3,1,2)
|
||||
# Trivial input
|
||||
gt_boxes = torch.zeros(1, 1, 6).to(im_blob)
|
||||
num_boxes = gt_boxes[:, :, 0].squeeze()
|
||||
with torch.no_grad():
|
||||
rois, cls_prob, bbox_pred, base_feat = self.fasterRCNN(im_blob, im_info, gt_boxes, num_boxes)
|
||||
scores = cls_prob.data
|
||||
inds_first = torch.nonzero(scores[0, :, 1] > self.det_thresh).view(-1)
|
||||
if inds_first.numel() > 0:
|
||||
rois = rois[:, inds_first]
|
||||
scores = scores[:,inds_first]
|
||||
bbox_pred = bbox_pred[:, inds_first]
|
||||
|
||||
refined_rois = self.fasterRCNN.bbox_refine(rois, bbox_pred, im_info)
|
||||
template_feat = self.fasterRCNN.roi_pool(base_feat, refined_rois)
|
||||
pred_boxes = refined_rois.data[:, :, 1:5]
|
||||
|
||||
cls_scores = scores[0, :, 1]
|
||||
_, order = torch.sort(cls_scores, 0, True)
|
||||
cls_boxes = pred_boxes[0]
|
||||
cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
|
||||
cls_dets = cls_dets[order]
|
||||
temp_feat = template_feat[order]
|
||||
keep_first = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS).view(-1).long()
|
||||
cls_dets = cls_dets[keep_first]
|
||||
temp_feat = temp_feat[keep_first]
|
||||
output_box = cls_dets.cpu().numpy()
|
||||
else:
|
||||
output_box = []
|
||||
temp_feat = []
|
||||
return output_box, temp_feat, base_feat
|
||||
|
141
tracker/matching.py
Normal file
141
tracker/matching.py
Normal file
|
@ -0,0 +1,141 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
import scipy
|
||||
from scipy.spatial.distance import cdist
|
||||
from sklearn.utils import linear_assignment_
|
||||
|
||||
from utils.cython_bbox import bbox_ious
|
||||
from utils import kalman_filter
|
||||
import time
|
||||
|
||||
def merge_matches(m1, m2, shape):
|
||||
O,P,Q = shape
|
||||
m1 = np.asarray(m1)
|
||||
m2 = np.asarray(m2)
|
||||
|
||||
M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
|
||||
M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
|
||||
|
||||
mask = M1*M2
|
||||
match = mask.nonzero()
|
||||
match = list(zip(match[0], match[1]))
|
||||
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
|
||||
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
|
||||
|
||||
return match, unmatched_O, unmatched_Q
|
||||
|
||||
|
||||
|
||||
|
||||
def _indices_to_matches(cost_matrix, indices, thresh):
|
||||
matched_cost = cost_matrix[tuple(zip(*indices))]
|
||||
matched_mask = (matched_cost <= thresh)
|
||||
|
||||
matches = indices[matched_mask]
|
||||
unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
|
||||
unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
|
||||
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
|
||||
def linear_assignment(cost_matrix, thresh):
|
||||
"""
|
||||
Simple linear assignment
|
||||
:type cost_matrix: np.ndarray
|
||||
:type thresh: float
|
||||
:return: matches, unmatched_a, unmatched_b
|
||||
"""
|
||||
if cost_matrix.size == 0:
|
||||
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
|
||||
|
||||
cost_matrix[cost_matrix > thresh] = thresh + 1e-4
|
||||
indices = linear_assignment_.linear_assignment(cost_matrix)
|
||||
|
||||
return _indices_to_matches(cost_matrix, indices, thresh)
|
||||
|
||||
|
||||
def ious(atlbrs, btlbrs):
|
||||
"""
|
||||
Compute cost based on IoU
|
||||
:type atlbrs: list[tlbr] | np.ndarray
|
||||
:type atlbrs: list[tlbr] | np.ndarray
|
||||
|
||||
:rtype ious np.ndarray
|
||||
"""
|
||||
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
|
||||
if ious.size == 0:
|
||||
return ious
|
||||
|
||||
ious = bbox_ious(
|
||||
np.ascontiguousarray(atlbrs, dtype=np.float),
|
||||
np.ascontiguousarray(btlbrs, dtype=np.float)
|
||||
)
|
||||
|
||||
return ious
|
||||
|
||||
|
||||
def iou_distance(atracks, btracks):
|
||||
"""
|
||||
Compute cost based on IoU
|
||||
:type atracks: list[STrack]
|
||||
:type btracks: list[STrack]
|
||||
|
||||
:rtype cost_matrix np.ndarray
|
||||
"""
|
||||
|
||||
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
|
||||
atlbrs = atracks
|
||||
btlbrs = btracks
|
||||
else:
|
||||
atlbrs = [track.tlbr for track in atracks]
|
||||
btlbrs = [track.tlbr for track in btracks]
|
||||
_ious = ious(atlbrs, btlbrs)
|
||||
cost_matrix = 1 - _ious
|
||||
|
||||
return cost_matrix
|
||||
|
||||
#def embedding_distance(tracks, detections, metric='cosine'):
|
||||
# """
|
||||
# :param tracks: list[STrack]
|
||||
# :param detections: list[BaseTrack]
|
||||
# :param metric:
|
||||
# :return: cost_matrix np.ndarray
|
||||
# """
|
||||
#
|
||||
# cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
|
||||
# if cost_matrix.size == 0:
|
||||
# return cost_matrix
|
||||
# det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
|
||||
# for i, track in enumerate(tracks):
|
||||
# #cost_matrix[i, :] = np.maximum(0.0, cdist(track.features, det_features, metric).min(axis=0))
|
||||
# cost_matrix[i, :] = np.maximum(0.0, cdist(track.features, det_features, metric).min(axis=0))
|
||||
# return cost_matrix
|
||||
|
||||
def embedding_distance(tracks, detections, metric='cosine'):
|
||||
"""
|
||||
:param tracks: list[STrack]
|
||||
:param detections: list[BaseTrack]
|
||||
:param metric:
|
||||
:return: cost_matrix np.ndarray
|
||||
"""
|
||||
|
||||
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
|
||||
for i, track in enumerate(tracks):
|
||||
cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
|
||||
return cost_matrix
|
||||
|
||||
|
||||
def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = kalman_filter.chi2inv95[gating_dim]
|
||||
measurements = np.asarray([det.to_xyah() for det in detections])
|
||||
for row, track in enumerate(tracks):
|
||||
gating_distance = kf.gating_distance(
|
||||
track.mean, track.covariance, measurements, only_position)
|
||||
cost_matrix[row, gating_distance > gating_threshold] = np.inf
|
||||
return cost_matrix
|
473
tracker/mot_tracker.py
Normal file
473
tracker/mot_tracker.py
Normal file
|
@ -0,0 +1,473 @@
|
|||
import numpy as np
|
||||
from numba import jit
|
||||
from collections import deque
|
||||
import itertools
|
||||
import os
|
||||
import os.path as osp
|
||||
import time
|
||||
import torch
|
||||
|
||||
from utils.utils import *
|
||||
from utils.log import logger
|
||||
from models import *
|
||||
from tracker import matching
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
|
||||
|
||||
class STrack(BaseTrack):
|
||||
|
||||
def __init__(self, tlwh, score, temp_feat, buffer_size=30):
|
||||
|
||||
# wait activate
|
||||
self._tlwh = np.asarray(tlwh, dtype=np.float)
|
||||
self.is_activated = False
|
||||
self.score = score
|
||||
self.tracklet_len = 0
|
||||
self.smooth_feat = None
|
||||
self.update_features(temp_feat)
|
||||
self.features = deque([], maxlen=buffer_size)
|
||||
|
||||
def update_features(self, feat):
|
||||
print(1)
|
||||
self.curr_feat = feat
|
||||
if self.smooth_feat is None:
|
||||
self.smooth_feat = feat
|
||||
else:
|
||||
self.smooth_feat = 0.9 *self.smooth_feat + 0.1 * feat
|
||||
self.features.append(temp_feat)
|
||||
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
|
||||
|
||||
|
||||
def activate(self, frame_id):
|
||||
"""Start a new tracklet"""
|
||||
self.track_id = self.next_id()
|
||||
self.time_since_update = 0
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
#self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
self.start_frame = frame_id
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
self._tlwh = new_track.tlwh
|
||||
#self.features.append(new_track.curr_feat)
|
||||
self.update_features(new_track.curr_feat)
|
||||
self.time_since_update = 0
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
|
||||
def update(self, new_track, frame_id, update_feature=True):
|
||||
"""
|
||||
Update a matched track
|
||||
:type new_track: STrack
|
||||
:type frame_id: int
|
||||
:type update_feature: bool
|
||||
:return:
|
||||
"""
|
||||
self.frame_id = frame_id
|
||||
self.time_since_update = 0
|
||||
self.tracklet_len += 1
|
||||
|
||||
self._tlwh = new_track.tlwh
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
self.score = new_track.score
|
||||
if update_feature:
|
||||
#self.features.append( new_track.curr_feat)
|
||||
self.update_features(new_track.curr_feat)
|
||||
|
||||
@property
|
||||
@jit
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
return self._tlwh.copy()
|
||||
|
||||
@property
|
||||
@jit
|
||||
def tlbr(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
def tlbr_to_tlwh(tlbr):
|
||||
ret = np.asarray(tlbr).copy()
|
||||
ret[2:] -= ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
def tlwh_to_tlbr(tlwh):
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
def __repr__(self):
|
||||
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
|
||||
|
||||
class IOUTracker(object):
|
||||
def __init__(self, opt, frame_rate=30):
|
||||
self.opt = opt
|
||||
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
|
||||
#load_darknet_weights(self.model, opt.weights)
|
||||
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'])
|
||||
self.model.cuda().eval()
|
||||
|
||||
self.tracked_stracks = [] # type: list[STrack]
|
||||
self.lost_stracks = [] # type: list[STrack]
|
||||
self.removed_stracks = [] # type: list[STrack]
|
||||
|
||||
self.frame_id = 0
|
||||
self.det_thresh = opt.conf_thres
|
||||
self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
#self.fmap_buffer = deque([], maxlen=self.buffer_size)
|
||||
|
||||
def update(self, im_blob, img0):
|
||||
self.frame_id += 1
|
||||
activated_starcks = []
|
||||
refind_stracks = []
|
||||
lost_stracks = []
|
||||
removed_stracks = []
|
||||
|
||||
t1 = time.time()
|
||||
'''Forward'''
|
||||
with torch.no_grad():
|
||||
pred = self.model(im_blob)
|
||||
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
|
||||
if len(pred) > 0:
|
||||
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0]
|
||||
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
|
||||
'''Detections'''
|
||||
detections = [STrack(STrack.tlbr_to_tlwh((t, l, b, r)), s, None) for (t, l, b, r, s) in dets[:, :5]]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
t2 = time.time()
|
||||
#print('Forward: {} s'.format(t2-t1))
|
||||
|
||||
|
||||
'''matching for tracked targets'''
|
||||
unconfirmed = []
|
||||
tracked_stracks = [] # type: list[STrack]
|
||||
for track in self.tracked_stracks:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_stracks.append(track)
|
||||
|
||||
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
|
||||
#dists = self.track_matching(strack_pool, detections, base_feat)
|
||||
dists = matching.iou_distance(strack_pool, detections)
|
||||
#dists[np.where(iou_dists>0.4)] = 1.0
|
||||
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = strack_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
t3 = time.time()
|
||||
#print('First match {} s'.format(t3-t2))
|
||||
|
||||
#'''Remained det/track, use IOU between dets and tracks to associate directly'''
|
||||
#detections = [detections[i] for i in u_detection]
|
||||
#r_tracked_stracks = [strack_pool[i] for i in u_track ]
|
||||
#dists = matching.iou_distance(r_tracked_stracks, detections)
|
||||
#matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
#for itracked, idet in matches:
|
||||
# r_tracked_stracks[itracked].update(detections[idet], self.frame_id)
|
||||
for it in u_track:
|
||||
track = strack_pool[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_stracks.append(track)
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = matching.iou_distance(unconfirmed, detections)
|
||||
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
|
||||
"""step 4: init new stracks"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
|
||||
"""step 6: update state"""
|
||||
for track in self.lost_stracks:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
t4 = time.time()
|
||||
#print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
|
||||
#self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
|
||||
self.lost_stracks.extend(lost_stracks)
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
|
||||
self.removed_stracks.extend(removed_stracks)
|
||||
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
|
||||
|
||||
# get scores of lost tracks
|
||||
output_stracks = [track for track in self.tracked_stracks if track.is_activated]
|
||||
|
||||
logger.debug('===========Frame {}=========='.format(self.frame_id))
|
||||
logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
|
||||
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
|
||||
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
|
||||
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
|
||||
t5 = time.time()
|
||||
#print('Final {} s'.format(t5-t4))
|
||||
return output_stracks
|
||||
|
||||
|
||||
class AETracker(object):
|
||||
def __init__(self, opt, frame_rate=30):
|
||||
self.opt = opt
|
||||
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
|
||||
# load_darknet_weights(self.model, opt.weights)
|
||||
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'])
|
||||
self.model.cuda().eval()
|
||||
|
||||
self.tracked_stracks = [] # type: list[STrack]
|
||||
self.lost_stracks = [] # type: list[STrack]
|
||||
self.removed_stracks = [] # type: list[STrack]
|
||||
|
||||
self.frame_id = 0
|
||||
self.det_thresh = opt.conf_thres
|
||||
self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
def update(self, im_blob, img0):
|
||||
self.frame_id += 1
|
||||
activated_starcks = []
|
||||
refind_stracks = []
|
||||
lost_stracks = []
|
||||
removed_stracks = []
|
||||
|
||||
t1 = time.time()
|
||||
'''Forward'''
|
||||
with torch.no_grad():
|
||||
pred = self.model(im_blob)
|
||||
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
|
||||
if len(pred) > 0:
|
||||
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu()
|
||||
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
|
||||
'''Detections'''
|
||||
detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for
|
||||
(tlbrs, f) in zip(dets[:, :5], dets[:, -self.model.emb_dim:])]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
t2 = time.time()
|
||||
# print('Forward: {} s'.format(t2-t1))
|
||||
|
||||
'''matching for tracked targets'''
|
||||
unconfirmed = []
|
||||
tracked_stracks = [] # type: list[STrack]
|
||||
for track in self.tracked_stracks:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_stracks.append(track)
|
||||
|
||||
|
||||
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
|
||||
#strack_pool = tracked_stracks
|
||||
dists = matching.embedding_distance(strack_pool, detections)
|
||||
iou_dists = matching.iou_distance(strack_pool, detections)
|
||||
dists[np.where(iou_dists>0.99)] = 1.0
|
||||
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = strack_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
|
||||
|
||||
# detections = [detections[i] for i in u_detection]
|
||||
# dists = matching.embedding_distance(self.lost_stracks, detections)
|
||||
# iou_dists = matching.iou_distance(self.lost_stracks, detections)
|
||||
# dists[np.where(iou_dists>0.7)] = 1.0
|
||||
#
|
||||
# matches, u_track_lost, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
#
|
||||
# for itracked, idet in matches:
|
||||
# track = self.lost_stracks[itracked]
|
||||
# det = detections[idet]
|
||||
# if track.state == TrackState.Tracked:
|
||||
# track.update(detections[idet], self.frame_id)
|
||||
# activated_starcks.append(track)
|
||||
# else:
|
||||
# track.re_activate(det, self.frame_id, new_id=False)
|
||||
# refind_stracks.append(track)
|
||||
|
||||
|
||||
|
||||
'''Remained det/track, use IOU between dets and tracks to associate directly'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state==TrackState.Tracked ]
|
||||
r_lost_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state!=TrackState.Tracked ]
|
||||
dists = matching.iou_distance(r_tracked_stracks, detections)
|
||||
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_stracks[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_stracks[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_stracks.append(track)
|
||||
|
||||
# '''Remained det/track, use IOU between dets and tracks to associate directly'''
|
||||
# detections = [detections[i] for i in u_detection]
|
||||
# dists = matching.iou_distance(r_lost_stracks, detections)
|
||||
# matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.25)
|
||||
#
|
||||
# for itracked, idet in matches:
|
||||
# track = r_lost_stracks[itracked]
|
||||
# det = detections[idet]
|
||||
# if track.state == TrackState.Tracked:
|
||||
# track.update(det, self.frame_id)
|
||||
# activated_starcks.append(track)
|
||||
# else:
|
||||
# track.re_activate(det, self.frame_id, new_id=False)
|
||||
# refind_stracks.append(track)
|
||||
#
|
||||
# for it in u_track:
|
||||
# track = r_lost_stracks[it]
|
||||
# if not track.state == TrackState.Lost:
|
||||
# track.mark_lost()
|
||||
# lost_stracks.append(track)
|
||||
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = matching.iou_distance(unconfirmed, detections)
|
||||
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_starcks.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
|
||||
"""step 4: init new stracks"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
|
||||
"""step 6: update state"""
|
||||
for track in self.lost_stracks:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
t4 = time.time()
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
|
||||
# self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
|
||||
self.lost_stracks.extend(lost_stracks)
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
|
||||
self.removed_stracks.extend(removed_stracks)
|
||||
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
|
||||
|
||||
# get scores of lost tracks
|
||||
output_stracks = [track for track in self.tracked_stracks if track.is_activated]
|
||||
|
||||
logger.debug('===========Frame {}=========='.format(self.frame_id))
|
||||
logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
|
||||
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
|
||||
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
|
||||
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
|
||||
t5 = time.time()
|
||||
# print('Final {} s'.format(t5-t4))
|
||||
return output_stracks
|
||||
|
||||
def joint_stracks(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
def sub_stracks(tlista, tlistb):
|
||||
stracks = {}
|
||||
for t in tlista:
|
||||
stracks[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if stracks.get(tid, 0):
|
||||
del stracks[tid]
|
||||
return list(stracks.values())
|
||||
|
||||
def remove_duplicate_stracks(stracksa, stracksb):
|
||||
pdist = matching.iou_distance(stracksa, stracksb)
|
||||
pairs = np.where(pdist<0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p,q in zip(*pairs):
|
||||
timep = stracksa[p].frame_id - stracksa[p].start_frame
|
||||
timeq = stracksb[q].frame_id - stracksb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i,t in enumerate(stracksa) if not i in dupa]
|
||||
resb = [t for i,t in enumerate(stracksb) if not i in dupb]
|
||||
return resa, resb
|
||||
|
||||
|
466
tracker/mot_tracker_kalman.py
Normal file
466
tracker/mot_tracker_kalman.py
Normal file
|
@ -0,0 +1,466 @@
|
|||
import numpy as np
|
||||
from numba import jit
|
||||
from collections import deque
|
||||
import itertools
|
||||
import os
|
||||
import os.path as osp
|
||||
import time
|
||||
import torch
|
||||
|
||||
from utils.utils import *
|
||||
from utils.log import logger
|
||||
from utils.kalman_filter import KalmanFilter
|
||||
from models import *
|
||||
from tracker import matching
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
|
||||
|
||||
class STrack(BaseTrack):
|
||||
|
||||
def __init__(self, tlwh, score, temp_feat, buffer_size=30):
|
||||
|
||||
# wait activate
|
||||
self._tlwh = np.asarray(tlwh, dtype=np.float)
|
||||
self.kalman_filter = None
|
||||
self.mean, self.covariance = None, None
|
||||
self.is_activated = False
|
||||
|
||||
self.score = score
|
||||
self.tracklet_len = 0
|
||||
|
||||
self.smooth_feat = None
|
||||
self.update_features(temp_feat)
|
||||
self.features = deque([], maxlen=buffer_size)
|
||||
self.alpha = 0.9
|
||||
|
||||
def update_features(self, feat):
|
||||
self.curr_feat = feat
|
||||
if self.smooth_feat is None:
|
||||
self.smooth_feat = feat
|
||||
else:
|
||||
self.smooth_feat = self.alpha *self.smooth_feat + (1-self.alpha) * feat
|
||||
self.features.append(feat)
|
||||
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
|
||||
|
||||
def predict(self):
|
||||
mean_state = self.mean.copy()
|
||||
if self.state != TrackState.Tracked:
|
||||
mean_state[7] = 0
|
||||
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
|
||||
|
||||
|
||||
def activate(self, kalman_filter, frame_id):
|
||||
"""Start a new tracklet"""
|
||||
self.kalman_filter = kalman_filter
|
||||
self.track_id = self.next_id()
|
||||
self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh))
|
||||
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
#self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
self.start_frame = frame_id
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
self.mean, self.covariance = self.kalman_filter.update(
|
||||
self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
|
||||
)
|
||||
|
||||
self.update_features(new_track.curr_feat)
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
|
||||
def update(self, new_track, frame_id, update_feature=True):
|
||||
"""
|
||||
Update a matched track
|
||||
:type new_track: STrack
|
||||
:type frame_id: int
|
||||
:type update_feature: bool
|
||||
:return:
|
||||
"""
|
||||
self.frame_id = frame_id
|
||||
self.tracklet_len += 1
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.mean, self.covariance = self.kalman_filter.update(
|
||||
self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
self.score = new_track.score
|
||||
if update_feature:
|
||||
self.update_features(new_track.curr_feat)
|
||||
|
||||
@property
|
||||
@jit
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
if self.mean is None:
|
||||
return self._tlwh.copy()
|
||||
ret = self.mean[:4].copy()
|
||||
ret[2] *= ret[3]
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
@property
|
||||
@jit
|
||||
def tlbr(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
def tlwh_to_xyah(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, aspect ratio,
|
||||
height)`, where the aspect ratio is `width / height`.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
ret[2] /= ret[3]
|
||||
return ret
|
||||
|
||||
def to_xyah(self):
|
||||
return self.tlwh_to_xyah(self.tlwh)
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
def tlbr_to_tlwh(tlbr):
|
||||
ret = np.asarray(tlbr).copy()
|
||||
ret[2:] -= ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
@jit
|
||||
def tlwh_to_tlbr(tlwh):
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
def __repr__(self):
|
||||
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
|
||||
|
||||
class IOUTracker(object):
|
||||
def __init__(self, opt, frame_rate=30):
|
||||
self.opt = opt
|
||||
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
|
||||
#load_darknet_weights(self.model, opt.weights)
|
||||
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'])
|
||||
self.model.cuda().eval()
|
||||
|
||||
self.tracked_stracks = [] # type: list[STrack]
|
||||
self.lost_stracks = [] # type: list[STrack]
|
||||
self.removed_stracks = [] # type: list[STrack]
|
||||
|
||||
self.frame_id = 0
|
||||
self.det_thresh = opt.conf_thres
|
||||
self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
#self.fmap_buffer = deque([], maxlen=self.buffer_size)
|
||||
|
||||
def update(self, im_blob, img0):
|
||||
self.frame_id += 1
|
||||
activated_starcks = []
|
||||
refind_stracks = []
|
||||
lost_stracks = []
|
||||
removed_stracks = []
|
||||
|
||||
t1 = time.time()
|
||||
'''Forward'''
|
||||
with torch.no_grad():
|
||||
pred = self.model(im_blob)
|
||||
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
|
||||
if len(pred) > 0:
|
||||
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0]
|
||||
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
|
||||
'''Detections'''
|
||||
detections = [STrack(STrack.tlbr_to_tlwh((t, l, b, r)), s, None) for (t, l, b, r, s) in dets[:, :5]]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
t2 = time.time()
|
||||
#print('Forward: {} s'.format(t2-t1))
|
||||
|
||||
|
||||
'''matching for tracked targets'''
|
||||
unconfirmed = []
|
||||
tracked_stracks = [] # type: list[STrack]
|
||||
for track in self.tracked_stracks:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_stracks.append(track)
|
||||
|
||||
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
|
||||
#dists = self.track_matching(strack_pool, detections, base_feat)
|
||||
dists = matching.iou_distance(strack_pool, detections)
|
||||
#dists[np.where(iou_dists>0.4)] = 1.0
|
||||
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = strack_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
t3 = time.time()
|
||||
#print('First match {} s'.format(t3-t2))
|
||||
|
||||
#'''Remained det/track, use IOU between dets and tracks to associate directly'''
|
||||
#detections = [detections[i] for i in u_detection]
|
||||
#r_tracked_stracks = [strack_pool[i] for i in u_track ]
|
||||
#dists = matching.iou_distance(r_tracked_stracks, detections)
|
||||
#matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
#for itracked, idet in matches:
|
||||
# r_tracked_stracks[itracked].update(detections[idet], self.frame_id)
|
||||
for it in u_track:
|
||||
track = strack_pool[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_stracks.append(track)
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = matching.iou_distance(unconfirmed, detections)
|
||||
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
|
||||
"""step 4: init new stracks"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.kalman_filter, self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
|
||||
"""step 6: update state"""
|
||||
for track in self.lost_stracks:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
t4 = time.time()
|
||||
#print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
|
||||
#self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
|
||||
self.lost_stracks.extend(lost_stracks)
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
|
||||
self.removed_stracks.extend(removed_stracks)
|
||||
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
|
||||
|
||||
# get scores of lost tracks
|
||||
output_stracks = [track for track in self.tracked_stracks if track.is_activated]
|
||||
|
||||
logger.debug('===========Frame {}=========='.format(self.frame_id))
|
||||
logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
|
||||
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
|
||||
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
|
||||
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
|
||||
t5 = time.time()
|
||||
#print('Final {} s'.format(t5-t4))
|
||||
return output_stracks
|
||||
|
||||
|
||||
class AETracker(object):
|
||||
def __init__(self, opt, frame_rate=30):
|
||||
self.opt = opt
|
||||
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
|
||||
# load_darknet_weights(self.model, opt.weights)
|
||||
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'])
|
||||
self.model.cuda().eval()
|
||||
|
||||
self.tracked_stracks = [] # type: list[STrack]
|
||||
self.lost_stracks = [] # type: list[STrack]
|
||||
self.removed_stracks = [] # type: list[STrack]
|
||||
|
||||
self.frame_id = 0
|
||||
self.det_thresh = opt.conf_thres
|
||||
self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.kalman_filter = KalmanFilter()
|
||||
|
||||
def update(self, im_blob, img0):
|
||||
self.frame_id += 1
|
||||
activated_starcks = []
|
||||
refind_stracks = []
|
||||
lost_stracks = []
|
||||
removed_stracks = []
|
||||
|
||||
t1 = time.time()
|
||||
''' Step 1: Network forward, get detections & embeddings'''
|
||||
with torch.no_grad():
|
||||
pred = self.model(im_blob)
|
||||
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
|
||||
if len(pred) > 0:
|
||||
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu()
|
||||
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
|
||||
'''Detections'''
|
||||
detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for
|
||||
(tlbrs, f) in zip(dets[:, :5], dets[:, -self.model.emb_dim:])]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
t2 = time.time()
|
||||
# print('Forward: {} s'.format(t2-t1))
|
||||
|
||||
''' Add newly detected tracklets to tracked_stracks'''
|
||||
unconfirmed = []
|
||||
tracked_stracks = [] # type: list[STrack]
|
||||
for track in self.tracked_stracks:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_stracks.append(track)
|
||||
|
||||
''' Step 2: First association, with embedding'''
|
||||
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
|
||||
# Predict the current location with KF
|
||||
for strack in strack_pool:
|
||||
strack.predict()
|
||||
|
||||
dists = matching.embedding_distance(strack_pool, detections)
|
||||
dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
|
||||
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = strack_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
|
||||
''' Step 3: Second association, with IOU'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state==TrackState.Tracked ]
|
||||
dists = matching.iou_distance(r_tracked_stracks, detections)
|
||||
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_stracks[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_stracks[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_stracks.append(track)
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = matching.iou_distance(unconfirmed, detections)
|
||||
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_starcks.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
|
||||
""" Step 4: Init new stracks"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.kalman_filter, self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_stracks:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
t4 = time.time()
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
|
||||
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
|
||||
# self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
|
||||
self.lost_stracks.extend(lost_stracks)
|
||||
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
|
||||
self.removed_stracks.extend(removed_stracks)
|
||||
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
|
||||
|
||||
# get scores of lost tracks
|
||||
output_stracks = [track for track in self.tracked_stracks if track.is_activated]
|
||||
|
||||
logger.debug('===========Frame {}=========='.format(self.frame_id))
|
||||
logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
|
||||
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
|
||||
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
|
||||
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
|
||||
t5 = time.time()
|
||||
# print('Final {} s'.format(t5-t4))
|
||||
return output_stracks
|
||||
|
||||
def joint_stracks(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
def sub_stracks(tlista, tlistb):
|
||||
stracks = {}
|
||||
for t in tlista:
|
||||
stracks[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if stracks.get(tid, 0):
|
||||
del stracks[tid]
|
||||
return list(stracks.values())
|
||||
|
||||
def remove_duplicate_stracks(stracksa, stracksb):
|
||||
pdist = matching.iou_distance(stracksa, stracksb)
|
||||
pairs = np.where(pdist<0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p,q in zip(*pairs):
|
||||
timep = stracksa[p].frame_id - stracksa[p].start_frame
|
||||
timeq = stracksb[q].frame_id - stracksb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i,t in enumerate(stracksa) if not i in dupa]
|
||||
resb = [t for i,t in enumerate(stracksb) if not i in dupb]
|
||||
return resa, resb
|
||||
|
||||
|
198
train.py
Normal file
198
train.py
Normal file
|
@ -0,0 +1,198 @@
|
|||
import argparse
|
||||
import json
|
||||
import time
|
||||
|
||||
import test # Import test.py to get mAP after each epoch
|
||||
from models import *
|
||||
from utils.datasets import JointDataset, collate_fn
|
||||
from utils.utils import *
|
||||
from torchvision.transforms import transforms as T
|
||||
|
||||
|
||||
def train(
|
||||
cfg,
|
||||
data_cfg,
|
||||
img_size=416,
|
||||
resume=False,
|
||||
epochs=100,
|
||||
batch_size=16,
|
||||
accumulated_batches=1,
|
||||
freeze_backbone=False,
|
||||
var=0,
|
||||
opt=None,
|
||||
):
|
||||
weights = 'weights' + os.sep
|
||||
latest = weights + 'latest.pt'
|
||||
best = weights + 'best.pt'
|
||||
device = torch_utils.select_device()
|
||||
|
||||
torch.backends.cudnn.benchmark = True # unsuitable for multiscale
|
||||
|
||||
# Configure run
|
||||
f = open(data_cfg)
|
||||
trainset_paths = json.load(f)['train']
|
||||
f.close()
|
||||
|
||||
|
||||
transforms = T.Compose([T.ToTensor()])
|
||||
# Get dataloader
|
||||
dataset = JointDataset(trainset_paths, img_size, augment=True, transforms=transforms)
|
||||
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,
|
||||
num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn)
|
||||
|
||||
# Initialize model
|
||||
model = Darknet(cfg, img_size, dataset.nID)
|
||||
|
||||
lr0 = opt.lr
|
||||
cutoff = -1 # backbone reaches to cutoff layer
|
||||
start_epoch = 0
|
||||
best_loss = float('inf')
|
||||
if resume:
|
||||
checkpoint = torch.load(latest, map_location='cpu')
|
||||
|
||||
# Load weights to resume from
|
||||
model.load_state_dict(checkpoint['model'])
|
||||
model.to(device).train()
|
||||
|
||||
# Set optimizer
|
||||
optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9)
|
||||
|
||||
start_epoch = checkpoint['epoch'] + 1
|
||||
if checkpoint['optimizer'] is not None:
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
best_loss = checkpoint['best_loss']
|
||||
|
||||
del checkpoint # current, saved
|
||||
|
||||
else:
|
||||
# Initialize model with backbone (optional)
|
||||
if cfg.endswith('yolov3.cfg'):
|
||||
load_darknet_weights(model, weights + 'darknet53.conv.74')
|
||||
cutoff = 75
|
||||
elif cfg.endswith('yolov3-tiny.cfg'):
|
||||
load_darknet_weights(model, weights + 'yolov3-tiny.conv.15')
|
||||
cutoff = 15
|
||||
|
||||
model.to(device).train()
|
||||
|
||||
# Set optimizer
|
||||
optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9, weight_decay=1e-4)
|
||||
|
||||
model = torch.nn.DataParallel(model)
|
||||
# Set scheduler
|
||||
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
|
||||
milestones=[int(0.5*opt.epochs), int(0.75*opt.epochs)], gamma=0.1)
|
||||
|
||||
# An important trick for detection: freeze bn during fine-tuning
|
||||
if not opt.unfreeze_bn:
|
||||
for i, (name, p) in enumerate(model.named_parameters()):
|
||||
p.requires_grad = False if 'batch_norm' in name else True
|
||||
|
||||
model_info(model)
|
||||
t0 = time.time()
|
||||
for epoch in range(epochs):
|
||||
epoch += start_epoch
|
||||
|
||||
print(('%8s%12s' + '%10s' * 6) % (
|
||||
'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
|
||||
|
||||
# Update scheduler (automatic)
|
||||
scheduler.step()
|
||||
|
||||
|
||||
# Freeze darknet53.conv.74 for first epoch
|
||||
if freeze_backbone and (epoch < 2):
|
||||
for i, (name, p) in enumerate(model.named_parameters()):
|
||||
if int(name.split('.')[2]) < cutoff: # if layer < 75
|
||||
p.requires_grad = False if (epoch == 0) else True
|
||||
|
||||
ui = -1
|
||||
rloss = defaultdict(float) # running loss
|
||||
optimizer.zero_grad()
|
||||
for i, (imgs, targets, _, _, targets_len) in enumerate(dataloader):
|
||||
if sum([len(x) for x in targets]) < 1: # if no targets continue
|
||||
continue
|
||||
|
||||
# SGD burn-in
|
||||
burnin = min(1000, len(dataloader))
|
||||
if (epoch == 0) & (i <= burnin):
|
||||
lr = lr0 * (i / burnin) **4
|
||||
for g in optimizer.param_groups:
|
||||
g['lr'] = lr
|
||||
|
||||
# Compute loss, compute gradient, update parameters
|
||||
loss, components = model(imgs.cuda(), targets.cuda(), targets_len.cuda())
|
||||
components = torch.mean(components.view(4,-1),dim=0)
|
||||
|
||||
loss = torch.mean(loss)
|
||||
loss.backward()
|
||||
|
||||
# accumulate gradient for x batches before optimizing
|
||||
if ((i + 1) % accumulated_batches == 0) or (i == len(dataloader) - 1):
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
|
||||
# Running epoch-means of tracked metrics
|
||||
ui += 1
|
||||
|
||||
for ii, key in enumerate(model.module.loss_names):
|
||||
rloss[key] = (rloss[key] * ui + components[ii]) / (ui + 1)
|
||||
|
||||
s = ('%8s%12s' + '%10.3g' * 6) % (
|
||||
'%g/%g' % (epoch, epochs - 1),
|
||||
'%g/%g' % (i, len(dataloader) - 1),
|
||||
rloss['box'], rloss['conf'],
|
||||
rloss['id'],rloss['loss'],
|
||||
rloss['nT'], time.time() - t0)
|
||||
t0 = time.time()
|
||||
if i % opt.print_interval == 0:
|
||||
print(s)
|
||||
|
||||
|
||||
# Save latest checkpoint
|
||||
checkpoint = {'epoch': epoch,
|
||||
# 'best_loss': best_loss,
|
||||
'model': model.module.state_dict(),
|
||||
'optimizer': optimizer.state_dict()}
|
||||
torch.save(checkpoint, latest)
|
||||
|
||||
|
||||
# Calculate mAP
|
||||
if epoch % opt.test_interval ==0:
|
||||
with torch.no_grad():
|
||||
mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
|
||||
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--epochs', type=int, default=30, help='number of epochs')
|
||||
parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
|
||||
parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step')
|
||||
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
|
||||
parser.add_argument('--data-cfg', type=str, default='cfg/ccmcpe.json', help='coco.data file path')
|
||||
parser.add_argument('--img-size', type=int, default=(1088, 608), help='pixels')
|
||||
parser.add_argument('--resume', action='store_true', help='resume training flag')
|
||||
parser.add_argument('--var', type=float, default=0, help='test variable')
|
||||
parser.add_argument('--print-interval', type=int, default=40, help='print interval')
|
||||
parser.add_argument('--test-interval', type=int, default=9, help='test interval')
|
||||
parser.add_argument('--lr', type=float, default=1e-2, help='init lr')
|
||||
parser.add_argument('--idw', type=float, default=0.1, help='loss id weight')
|
||||
parser.add_argument('--unfreeze-bn', action='store_true', help='unfreeze bn')
|
||||
opt = parser.parse_args()
|
||||
print(opt, end='\n\n')
|
||||
|
||||
init_seeds()
|
||||
|
||||
train(
|
||||
opt.cfg,
|
||||
opt.data_cfg,
|
||||
img_size=opt.img_size,
|
||||
resume=opt.resume,
|
||||
epochs=opt.epochs,
|
||||
batch_size=opt.batch_size,
|
||||
accumulated_batches=opt.accumulated_batches,
|
||||
var=opt.var,
|
||||
opt=opt,
|
||||
)
|
362
utils/datasets.py
Executable file
362
utils/datasets.py
Executable file
|
@ -0,0 +1,362 @@
|
|||
import glob
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from torch.utils.data import Dataset
|
||||
from utils.utils import xyxy2xywh
|
||||
|
||||
class LoadImages: # for inference
|
||||
def __init__(self, path, img_size=(1088, 608)):
|
||||
if os.path.isdir(path):
|
||||
image_format = ['.jpg', '.jpeg', '.png', '.tif']
|
||||
self.files = sorted(glob.glob('%s/*.*' % path))
|
||||
self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))
|
||||
elif os.path.isfile(path):
|
||||
self.files = [path]
|
||||
|
||||
self.nF = len(self.files) # number of image files
|
||||
self.width = img_size[0]
|
||||
self.height = img_size[1]
|
||||
self.count = 0
|
||||
|
||||
assert self.nF > 0, 'No images found in ' + path
|
||||
|
||||
def __iter__(self):
|
||||
self.count = -1
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
self.count += 1
|
||||
if self.count == self.nF:
|
||||
raise StopIteration
|
||||
img_path = self.files[self.count]
|
||||
|
||||
# Read image
|
||||
img0 = cv2.imread(img_path) # BGR
|
||||
assert img0 is not None, 'Failed to load ' + img_path
|
||||
|
||||
# Padded resize
|
||||
img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
|
||||
|
||||
# Normalize RGB
|
||||
img = img[:, :, ::-1].transpose(2, 0, 1)
|
||||
img = np.ascontiguousarray(img, dtype=np.float32)
|
||||
img /= 255.0
|
||||
|
||||
# cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
|
||||
return img_path, img, img0
|
||||
|
||||
def __getitem__(self, idx):
|
||||
idx = idx % self.nF
|
||||
img_path = self.files[idx]
|
||||
|
||||
# Read image
|
||||
img0 = cv2.imread(img_path) # BGR
|
||||
assert img0 is not None, 'Failed to load ' + img_path
|
||||
|
||||
# Padded resize
|
||||
img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
|
||||
|
||||
# Normalize RGB
|
||||
img = img[:, :, ::-1].transpose(2, 0, 1)
|
||||
img = np.ascontiguousarray(img, dtype=np.float32)
|
||||
img /= 255.0
|
||||
|
||||
return img_path, img, img0
|
||||
|
||||
def __len__(self):
|
||||
return self.nF # number of files
|
||||
|
||||
|
||||
class LoadImagesAndLabels: # for training
|
||||
def __init__(self, path, img_size=(1088,608), augment=False, transforms=None):
|
||||
with open(path, 'r') as file:
|
||||
self.img_files = file.readlines()
|
||||
self.img_files = [x.replace('\n', '') for x in self.img_files]
|
||||
self.img_files = list(filter(lambda x: len(x) > 0, self.img_files))
|
||||
|
||||
self.label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')
|
||||
for x in self.img_files]
|
||||
|
||||
self.nF = len(self.img_files) # number of image files
|
||||
self.width = img_size[0]
|
||||
self.height = img_size[1]
|
||||
self.augment = augment
|
||||
self.transforms = transforms
|
||||
|
||||
|
||||
def __getitem__(self, files_index):
|
||||
img_path = self.img_files[files_index]
|
||||
label_path = self.label_files[files_index]
|
||||
return self.get_data(img_path, label_path)
|
||||
|
||||
def get_data(self, img_path, label_path):
|
||||
height = self.height
|
||||
width = self.width
|
||||
img = cv2.imread(img_path) # BGR
|
||||
if img is None:
|
||||
raise ValueError('File corrupt {}'.format(img_path))
|
||||
augment_hsv = True
|
||||
if self.augment and augment_hsv:
|
||||
# SV augmentation by 50%
|
||||
fraction = 0.50
|
||||
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
||||
S = img_hsv[:, :, 1].astype(np.float32)
|
||||
V = img_hsv[:, :, 2].astype(np.float32)
|
||||
|
||||
a = (random.random() * 2 - 1) * fraction + 1
|
||||
S *= a
|
||||
if a > 1:
|
||||
np.clip(S, a_min=0, a_max=255, out=S)
|
||||
|
||||
a = (random.random() * 2 - 1) * fraction + 1
|
||||
V *= a
|
||||
if a > 1:
|
||||
np.clip(V, a_min=0, a_max=255, out=V)
|
||||
|
||||
img_hsv[:, :, 1] = S.astype(np.uint8)
|
||||
img_hsv[:, :, 2] = V.astype(np.uint8)
|
||||
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
|
||||
|
||||
h, w, _ = img.shape
|
||||
img, ratio, padw, padh = letterbox(img, height=height, width=width)
|
||||
|
||||
# Load labels
|
||||
if os.path.isfile(label_path):
|
||||
labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6)
|
||||
|
||||
# Normalized xywh to pixel xyxy format
|
||||
labels = labels0.copy()
|
||||
labels[:, 2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw
|
||||
labels[:, 3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh
|
||||
labels[:, 4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw
|
||||
labels[:, 5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh
|
||||
else:
|
||||
labels = np.array([])
|
||||
|
||||
# Augment image and labels
|
||||
if self.augment:
|
||||
img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))
|
||||
|
||||
plotFlag = False
|
||||
if plotFlag:
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
plt.figure(figsize=(50, 50))
|
||||
plt.imshow(img[:, :, ::-1])
|
||||
plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-')
|
||||
plt.axis('off')
|
||||
plt.savefig('test.jpg')
|
||||
time.sleep(10)
|
||||
|
||||
nL = len(labels)
|
||||
if nL > 0:
|
||||
# convert xyxy to xywh
|
||||
labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy()) #/ height
|
||||
labels[:, 2] /= width
|
||||
labels[:, 3] /= height
|
||||
labels[:, 4] /= width
|
||||
labels[:, 5] /= height
|
||||
if self.augment:
|
||||
# random left-right flip
|
||||
lr_flip = True
|
||||
if lr_flip & (random.random() > 0.5):
|
||||
img = np.fliplr(img)
|
||||
if nL > 0:
|
||||
labels[:, 2] = 1 - labels[:, 2]
|
||||
|
||||
img = np.ascontiguousarray(img[ :, :, ::-1]) # BGR to RGB
|
||||
if self.transforms is not None:
|
||||
img = self.transforms(img)
|
||||
|
||||
return img, labels, img_path, (h, w)
|
||||
|
||||
def __len__(self):
|
||||
return self.nF # number of batches
|
||||
|
||||
|
||||
def letterbox(img, height=608, width=1088, color=(127.5, 127.5, 127.5)): # resize a rectangular image to a padded rectangular
|
||||
shape = img.shape[:2] # shape = [height, width]
|
||||
ratio = min(float(height)/shape[0], float(width)/shape[1])
|
||||
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]
|
||||
dw = (width - new_shape[0]) / 2 # width padding
|
||||
dh = (height - new_shape[1]) / 2 # height padding
|
||||
top, bottom = round(dh - 0.1), round(dh + 0.1)
|
||||
left, right = round(dw - 0.1), round(dw + 0.1)
|
||||
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
|
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular
|
||||
return img, ratio, dw, dh
|
||||
|
||||
|
||||
def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
|
||||
borderValue=(127.5, 127.5, 127.5)):
|
||||
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
||||
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
|
||||
|
||||
border = 0 # width of added border (optional)
|
||||
height = img.shape[0]
|
||||
width = img.shape[1]
|
||||
|
||||
# Rotation and Scale
|
||||
R = np.eye(3)
|
||||
a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
|
||||
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
|
||||
s = random.random() * (scale[1] - scale[0]) + scale[0]
|
||||
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
|
||||
|
||||
# Translation
|
||||
T = np.eye(3)
|
||||
T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels)
|
||||
T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels)
|
||||
|
||||
# Shear
|
||||
S = np.eye(3)
|
||||
S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg)
|
||||
S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg)
|
||||
|
||||
M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
|
||||
imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR,
|
||||
borderValue=borderValue) # BGR order borderValue
|
||||
|
||||
# Return warped points also
|
||||
if targets is not None:
|
||||
if len(targets) > 0:
|
||||
n = targets.shape[0]
|
||||
points = targets[:, 2:6].copy()
|
||||
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
|
||||
|
||||
# warp points
|
||||
xy = np.ones((n * 4, 3))
|
||||
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
||||
xy = (xy @ M.T)[:, :2].reshape(n, 8)
|
||||
|
||||
# create new boxes
|
||||
x = xy[:, [0, 2, 4, 6]]
|
||||
y = xy[:, [1, 3, 5, 7]]
|
||||
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
||||
|
||||
# apply angle-based reduction
|
||||
radians = a * math.pi / 180
|
||||
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
|
||||
x = (xy[:, 2] + xy[:, 0]) / 2
|
||||
y = (xy[:, 3] + xy[:, 1]) / 2
|
||||
w = (xy[:, 2] - xy[:, 0]) * reduction
|
||||
h = (xy[:, 3] - xy[:, 1]) * reduction
|
||||
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
|
||||
|
||||
# reject warped points outside of image
|
||||
np.clip(xy[:, 0], 0, width, out=xy[:, 0])
|
||||
np.clip(xy[:, 2], 0, width, out=xy[:, 2])
|
||||
np.clip(xy[:, 1], 0, height, out=xy[:, 1])
|
||||
np.clip(xy[:, 3], 0, height, out=xy[:, 3])
|
||||
w = xy[:, 2] - xy[:, 0]
|
||||
h = xy[:, 3] - xy[:, 1]
|
||||
area = w * h
|
||||
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
|
||||
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
|
||||
|
||||
targets = targets[i]
|
||||
targets[:, 2:6] = xy[i]
|
||||
|
||||
return imw, targets, M
|
||||
else:
|
||||
return imw
|
||||
|
||||
def collate_fn(batch):
|
||||
imgs, labels, paths, sizes = zip(*batch)
|
||||
batch_size = len(labels)
|
||||
imgs = torch.stack(imgs, 0)
|
||||
max_box_len = max([l.shape[0] for l in labels])
|
||||
labels = [torch.from_numpy(l) for l in labels]
|
||||
filled_labels = torch.zeros(batch_size, max_box_len, 6)
|
||||
labels_len = torch.zeros(batch_size)
|
||||
|
||||
for i in range(batch_size):
|
||||
isize = labels[i].shape[0]
|
||||
if len(labels[i])>0:
|
||||
filled_labels[i, :isize, :] = labels[i]
|
||||
labels_len[i] = isize
|
||||
|
||||
return imgs, filled_labels, paths, sizes, labels_len.unsqueeze(1)
|
||||
|
||||
|
||||
class JointDataset(LoadImagesAndLabels): # for training
|
||||
def __init__(self, paths, img_size=(1088,608), augment=False, transforms=None):
|
||||
|
||||
dataset_names = paths.keys()
|
||||
self.img_files = OrderedDict()
|
||||
self.label_files = OrderedDict()
|
||||
self.tid_num = OrderedDict()
|
||||
self.tid_start_index = OrderedDict()
|
||||
for ds, path in paths.items():
|
||||
with open(path, 'r') as file:
|
||||
self.img_files[ds] = file.readlines()
|
||||
self.img_files[ds] = [x.strip() for x in self.img_files[ds]]
|
||||
self.img_files[ds] = list(filter(lambda x: len(x) > 0, self.img_files[ds]))
|
||||
|
||||
self.label_files[ds] = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')
|
||||
for x in self.img_files[ds]]
|
||||
|
||||
for ds, label_paths in self.label_files.items():
|
||||
max_index = -1
|
||||
for lp in label_paths:
|
||||
lb = np.loadtxt(lp)
|
||||
if len(lb) < 1:
|
||||
continue
|
||||
if len(lb.shape) < 2:
|
||||
img_max = lb[1]
|
||||
else:
|
||||
img_max = np.max(lb[:,1])
|
||||
if img_max >max_index:
|
||||
max_index = img_max
|
||||
self.tid_num[ds] = max_index + 1
|
||||
|
||||
last_index = 0
|
||||
for i, (k, v) in enumerate(self.tid_num.items()):
|
||||
self.tid_start_index[k] = last_index
|
||||
last_index += v
|
||||
|
||||
self.nID = int(last_index+1)
|
||||
self.nds = [len(x) for x in self.img_files.values()]
|
||||
self.cds = [sum(self.nds[:i]) for i in range(len(self.nds))]
|
||||
self.nF = sum(self.nds)
|
||||
self.width = img_size[0]
|
||||
self.height = img_size[1]
|
||||
self.augment = augment
|
||||
self.transforms = transforms
|
||||
|
||||
print('='*80)
|
||||
print('dataset summary')
|
||||
print(self.tid_num)
|
||||
print('total # identities:', self.nID)
|
||||
print('start index')
|
||||
print(self.tid_start_index)
|
||||
print('='*80)
|
||||
|
||||
|
||||
def __getitem__(self, files_index):
|
||||
|
||||
for i, c in enumerate(self.cds):
|
||||
if files_index >= c:
|
||||
ds = list(self.label_files.keys())[i]
|
||||
start_index = c
|
||||
|
||||
img_path = self.img_files[ds][files_index - start_index]
|
||||
label_path = self.label_files[ds][files_index - start_index]
|
||||
|
||||
imgs, labels, img_path, (h, w) = self.get_data(img_path, label_path)
|
||||
for i, _ in enumerate(labels):
|
||||
if labels[i,1] > -1:
|
||||
labels[i,1] += self.tid_start_index[ds]
|
||||
|
||||
return imgs, labels, img_path, (h, w)
|
||||
|
||||
|
101
utils/evaluation.py
Normal file
101
utils/evaluation.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import copy
|
||||
import motmetrics as mm
|
||||
|
||||
from utils.io import read_results, unzip_objs
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
|
||||
def __init__(self, data_root, seq_name, data_type):
|
||||
self.data_root = data_root
|
||||
self.seq_name = seq_name
|
||||
self.data_type = data_type
|
||||
|
||||
self.load_annotations()
|
||||
self.reset_accumulator()
|
||||
|
||||
def load_annotations(self):
|
||||
assert self.data_type == 'mot'
|
||||
|
||||
gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
|
||||
self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
|
||||
self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
|
||||
|
||||
def reset_accumulator(self):
|
||||
self.acc = mm.MOTAccumulator(auto_id=True)
|
||||
|
||||
def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
|
||||
# results
|
||||
trk_tlwhs = np.copy(trk_tlwhs)
|
||||
trk_ids = np.copy(trk_ids)
|
||||
|
||||
# gts
|
||||
gt_objs = self.gt_frame_dict.get(frame_id, [])
|
||||
gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
|
||||
|
||||
# ignore boxes
|
||||
ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
|
||||
ignore_tlwhs = unzip_objs(ignore_objs)[0]
|
||||
|
||||
# remove ignored results
|
||||
keep = np.ones(len(trk_tlwhs), dtype=bool)
|
||||
iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
|
||||
match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
|
||||
match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
|
||||
match_ious = iou_distance[match_is, match_js]
|
||||
|
||||
match_js = np.asarray(match_js, dtype=int)
|
||||
match_js = match_js[np.logical_not(np.isnan(match_ious))]
|
||||
keep[match_js] = False
|
||||
trk_tlwhs = trk_tlwhs[keep]
|
||||
trk_ids = trk_ids[keep]
|
||||
|
||||
# get distance matrix
|
||||
iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
|
||||
|
||||
# acc
|
||||
self.acc.update(gt_ids, trk_ids, iou_distance)
|
||||
|
||||
if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
|
||||
events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics
|
||||
else:
|
||||
events = None
|
||||
return events
|
||||
|
||||
def eval_file(self, filename):
|
||||
self.reset_accumulator()
|
||||
|
||||
result_frame_dict = read_results(filename, self.data_type, is_gt=False)
|
||||
frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
|
||||
for frame_id in frames:
|
||||
trk_objs = result_frame_dict.get(frame_id, [])
|
||||
trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
|
||||
self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
|
||||
|
||||
return self.acc
|
||||
|
||||
@staticmethod
|
||||
def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
|
||||
names = copy.deepcopy(names)
|
||||
if metrics is None:
|
||||
metrics = mm.metrics.motchallenge_metrics
|
||||
metrics = copy.deepcopy(metrics)
|
||||
|
||||
mh = mm.metrics.create()
|
||||
summary = mh.compute_many(
|
||||
accs,
|
||||
metrics=metrics,
|
||||
names=names,
|
||||
generate_overall=True
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
@staticmethod
|
||||
def save_summary(summary, filename):
|
||||
import pandas as pd
|
||||
writer = pd.ExcelWriter(filename)
|
||||
summary.to_excel(writer)
|
||||
writer.save()
|
112
utils/io.py
Normal file
112
utils/io.py
Normal file
|
@ -0,0 +1,112 @@
|
|||
import os
|
||||
from typing import Dict
|
||||
import numpy as np
|
||||
|
||||
from utils.log import logger
|
||||
|
||||
|
||||
def write_results(filename, results_dict: Dict, data_type: str):
|
||||
if not filename:
|
||||
return
|
||||
path = os.path.dirname(filename)
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
|
||||
if data_type in ('mot', 'mcmot', 'lab'):
|
||||
save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
|
||||
elif data_type == 'kitti':
|
||||
save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
|
||||
else:
|
||||
raise ValueError(data_type)
|
||||
|
||||
with open(filename, 'w') as f:
|
||||
for frame_id, frame_data in results_dict.items():
|
||||
if data_type == 'kitti':
|
||||
frame_id -= 1
|
||||
for tlwh, track_id in frame_data:
|
||||
if track_id < 0:
|
||||
continue
|
||||
x1, y1, w, h = tlwh
|
||||
x2, y2 = x1 + w, y1 + h
|
||||
line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
|
||||
f.write(line)
|
||||
logger.info('Save results to {}'.format(filename))
|
||||
|
||||
|
||||
def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
|
||||
if data_type in ('mot', 'lab'):
|
||||
read_fun = read_mot_results
|
||||
else:
|
||||
raise ValueError('Unknown data type: {}'.format(data_type))
|
||||
|
||||
return read_fun(filename, is_gt, is_ignore)
|
||||
|
||||
|
||||
"""
|
||||
labels={'ped', ... % 1
|
||||
'person_on_vhcl', ... % 2
|
||||
'car', ... % 3
|
||||
'bicycle', ... % 4
|
||||
'mbike', ... % 5
|
||||
'non_mot_vhcl', ... % 6
|
||||
'static_person', ... % 7
|
||||
'distractor', ... % 8
|
||||
'occluder', ... % 9
|
||||
'occluder_on_grnd', ... %10
|
||||
'occluder_full', ... % 11
|
||||
'reflection', ... % 12
|
||||
'crowd' ... % 13
|
||||
};
|
||||
"""
|
||||
|
||||
|
||||
def read_mot_results(filename, is_gt, is_ignore):
|
||||
valid_labels = {1}
|
||||
ignore_labels = {2, 7, 8, 12}
|
||||
results_dict = dict()
|
||||
if os.path.isfile(filename):
|
||||
with open(filename, 'r') as f:
|
||||
for line in f.readlines():
|
||||
linelist = line.split(',')
|
||||
if len(linelist) < 7:
|
||||
continue
|
||||
fid = int(linelist[0])
|
||||
if fid < 1:
|
||||
continue
|
||||
results_dict.setdefault(fid, list())
|
||||
|
||||
if is_gt:
|
||||
if 'MOT16-' in filename or 'MOT17-' in filename:
|
||||
label = int(float(linelist[7]))
|
||||
mark = int(float(linelist[6]))
|
||||
if mark == 0 or label not in valid_labels:
|
||||
continue
|
||||
score = 1
|
||||
elif is_ignore:
|
||||
if 'MOT16-' in filename or 'MOT17-' in filename:
|
||||
label = int(float(linelist[7]))
|
||||
vis_ratio = float(linelist[8])
|
||||
if label not in ignore_labels and vis_ratio >= 0:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
score = 1
|
||||
else:
|
||||
score = float(linelist[6])
|
||||
|
||||
tlwh = tuple(map(float, linelist[2:6]))
|
||||
target_id = int(linelist[1])
|
||||
|
||||
results_dict[fid].append((tlwh, target_id, score))
|
||||
|
||||
return results_dict
|
||||
|
||||
|
||||
def unzip_objs(objs):
|
||||
if len(objs) > 0:
|
||||
tlwhs, ids, scores = zip(*objs)
|
||||
else:
|
||||
tlwhs, ids, scores = [], [], []
|
||||
tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
|
||||
|
||||
return tlwhs, ids, scores
|
229
utils/kalman_filter.py
Normal file
229
utils/kalman_filter.py
Normal file
|
@ -0,0 +1,229 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
import numpy as np
|
||||
import scipy.linalg
|
||||
|
||||
|
||||
"""
|
||||
Table for the 0.95 quantile of the chi-square distribution with N degrees of
|
||||
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
|
||||
function and used as Mahalanobis gating threshold.
|
||||
"""
|
||||
chi2inv95 = {
|
||||
1: 3.8415,
|
||||
2: 5.9915,
|
||||
3: 7.8147,
|
||||
4: 9.4877,
|
||||
5: 11.070,
|
||||
6: 12.592,
|
||||
7: 14.067,
|
||||
8: 15.507,
|
||||
9: 16.919}
|
||||
|
||||
|
||||
class KalmanFilter(object):
|
||||
"""
|
||||
A simple Kalman filter for tracking bounding boxes in image space.
|
||||
|
||||
The 8-dimensional state space
|
||||
|
||||
x, y, a, h, vx, vy, va, vh
|
||||
|
||||
contains the bounding box center position (x, y), aspect ratio a, height h,
|
||||
and their respective velocities.
|
||||
|
||||
Object motion follows a constant velocity model. The bounding box location
|
||||
(x, y, a, h) is taken as direct observation of the state space (linear
|
||||
observation model).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
ndim, dt = 4, 1.
|
||||
|
||||
# Create Kalman filter model matrices.
|
||||
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
|
||||
for i in range(ndim):
|
||||
self._motion_mat[i, ndim + i] = dt
|
||||
self._update_mat = np.eye(ndim, 2 * ndim)
|
||||
|
||||
# Motion and observation uncertainty are chosen relative to the current
|
||||
# state estimate. These weights control the amount of uncertainty in
|
||||
# the model. This is a bit hacky.
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
|
||||
def initiate(self, measurement):
|
||||
"""Create track from unassociated measurement.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
measurement : ndarray
|
||||
Bounding box coordinates (x, y, a, h) with center position (x, y),
|
||||
aspect ratio a, and height h.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the mean vector (8 dimensional) and covariance matrix (8x8
|
||||
dimensional) of the new track. Unobserved velocities are initialized
|
||||
to 0 mean.
|
||||
|
||||
"""
|
||||
mean_pos = measurement
|
||||
mean_vel = np.zeros_like(mean_pos)
|
||||
mean = np.r_[mean_pos, mean_vel]
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
1e-2,
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
10 * self._std_weight_velocity * measurement[3],
|
||||
10 * self._std_weight_velocity * measurement[3],
|
||||
1e-5,
|
||||
10 * self._std_weight_velocity * measurement[3]]
|
||||
covariance = np.diag(np.square(std))
|
||||
return mean, covariance
|
||||
|
||||
def predict(self, mean, covariance):
|
||||
"""Run Kalman filter prediction step.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The 8 dimensional mean vector of the object state at the previous
|
||||
time step.
|
||||
covariance : ndarray
|
||||
The 8x8 dimensional covariance matrix of the object state at the
|
||||
previous time step.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the mean vector and covariance matrix of the predicted
|
||||
state. Unobserved velocities are initialized to 0 mean.
|
||||
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * mean[3],
|
||||
self._std_weight_position * mean[3],
|
||||
1e-2,
|
||||
self._std_weight_position * mean[3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * mean[3],
|
||||
self._std_weight_velocity * mean[3],
|
||||
1e-5,
|
||||
self._std_weight_velocity * mean[3]]
|
||||
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
mean = np.dot(self._motion_mat, mean)
|
||||
covariance = np.linalg.multi_dot((
|
||||
self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
|
||||
|
||||
return mean, covariance
|
||||
|
||||
def project(self, mean, covariance):
|
||||
"""Project state distribution to measurement space.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The state's mean vector (8 dimensional array).
|
||||
covariance : ndarray
|
||||
The state's covariance matrix (8x8 dimensional).
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the projected mean and covariance matrix of the given state
|
||||
estimate.
|
||||
|
||||
"""
|
||||
std = [
|
||||
self._std_weight_position * mean[3],
|
||||
self._std_weight_position * mean[3],
|
||||
1e-1,
|
||||
self._std_weight_position * mean[3]]
|
||||
innovation_cov = np.diag(np.square(std))
|
||||
|
||||
mean = np.dot(self._update_mat, mean)
|
||||
covariance = np.linalg.multi_dot((
|
||||
self._update_mat, covariance, self._update_mat.T))
|
||||
return mean, covariance + innovation_cov
|
||||
|
||||
def update(self, mean, covariance, measurement):
|
||||
"""Run Kalman filter correction step.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The predicted state's mean vector (8 dimensional).
|
||||
covariance : ndarray
|
||||
The state's covariance matrix (8x8 dimensional).
|
||||
measurement : ndarray
|
||||
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
|
||||
is the center position, a the aspect ratio, and h the height of the
|
||||
bounding box.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the measurement-corrected state distribution.
|
||||
|
||||
"""
|
||||
projected_mean, projected_cov = self.project(mean, covariance)
|
||||
|
||||
chol_factor, lower = scipy.linalg.cho_factor(
|
||||
projected_cov, lower=True, check_finite=False)
|
||||
kalman_gain = scipy.linalg.cho_solve(
|
||||
(chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
|
||||
check_finite=False).T
|
||||
innovation = measurement - projected_mean
|
||||
|
||||
new_mean = mean + np.dot(innovation, kalman_gain.T)
|
||||
new_covariance = covariance - np.linalg.multi_dot((
|
||||
kalman_gain, projected_cov, kalman_gain.T))
|
||||
return new_mean, new_covariance
|
||||
|
||||
def gating_distance(self, mean, covariance, measurements,
|
||||
only_position=False):
|
||||
"""Compute gating distance between state distribution and measurements.
|
||||
|
||||
A suitable distance threshold can be obtained from `chi2inv95`. If
|
||||
`only_position` is False, the chi-square distribution has 4 degrees of
|
||||
freedom, otherwise 2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
Mean vector over the state distribution (8 dimensional).
|
||||
covariance : ndarray
|
||||
Covariance of the state distribution (8x8 dimensional).
|
||||
measurements : ndarray
|
||||
An Nx4 dimensional matrix of N measurements, each in
|
||||
format (x, y, a, h) where (x, y) is the bounding box center
|
||||
position, a the aspect ratio, and h the height.
|
||||
only_position : Optional[bool]
|
||||
If True, distance computation is done with respect to the bounding
|
||||
box center position only.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns an array of length N, where the i-th element contains the
|
||||
squared Mahalanobis distance between (mean, covariance) and
|
||||
`measurements[i]`.
|
||||
|
||||
"""
|
||||
mean, covariance = self.project(mean, covariance)
|
||||
if only_position:
|
||||
mean, covariance = mean[:2], covariance[:2, :2]
|
||||
measurements = measurements[:, :2]
|
||||
|
||||
cholesky_factor = np.linalg.cholesky(covariance)
|
||||
d = measurements - mean
|
||||
z = scipy.linalg.solve_triangular(
|
||||
cholesky_factor, d.T, lower=True, check_finite=False,
|
||||
overwrite_b=True)
|
||||
squared_maha = np.sum(z * z, axis=0)
|
||||
return squared_maha
|
18
utils/log.py
Normal file
18
utils/log.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
import logging
|
||||
|
||||
|
||||
def get_logger(name='root'):
|
||||
formatter = logging.Formatter(
|
||||
# fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
|
||||
fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
logger = get_logger('root')
|
7
utils/nms.py
Normal file
7
utils/nms.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
# from ._utils import _C
|
||||
from utils import _C
|
||||
|
||||
nms = _C.nms
|
||||
# nms.__doc__ = """
|
||||
# This function performs Non-maximum suppresion"""
|
35
utils/parse_config.py
Normal file
35
utils/parse_config.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
def parse_model_cfg(path):
|
||||
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
|
||||
file = open(path, 'r')
|
||||
lines = file.read().split('\n')
|
||||
lines = [x for x in lines if x and not x.startswith('#')]
|
||||
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
|
||||
module_defs = []
|
||||
for line in lines:
|
||||
if line.startswith('['): # This marks the start of a new block
|
||||
module_defs.append({})
|
||||
module_defs[-1]['type'] = line[1:-1].rstrip()
|
||||
if module_defs[-1]['type'] == 'convolutional':
|
||||
module_defs[-1]['batch_normalize'] = 0
|
||||
else:
|
||||
key, value = line.split("=")
|
||||
value = value.strip()
|
||||
module_defs[-1][key.rstrip()] = value.strip()
|
||||
|
||||
return module_defs
|
||||
|
||||
|
||||
def parse_data_cfg(path):
|
||||
"""Parses the data configuration file"""
|
||||
options = dict()
|
||||
options['gpus'] = '0'
|
||||
options['num_workers'] = '10'
|
||||
with open(path, 'r') as fp:
|
||||
lines = fp.readlines()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line == '' or line.startswith('#'):
|
||||
continue
|
||||
key, value = line.split('=')
|
||||
options[key.strip()] = value.strip()
|
||||
return options
|
1
utils/syncbn
Submodule
1
utils/syncbn
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 265a7059ebbd20c27a81c3d74d43773779fe70d7
|
45
utils/timer.py
Executable file
45
utils/timer.py
Executable file
|
@ -0,0 +1,45 @@
|
|||
# --------------------------------------------------------
|
||||
# Fast R-CNN
|
||||
# Copyright (c) 2015 Microsoft
|
||||
# Licensed under The MIT License [see LICENSE for details]
|
||||
# Written by Ross Girshick
|
||||
# --------------------------------------------------------
|
||||
|
||||
import time
|
||||
|
||||
|
||||
class Timer(object):
|
||||
"""A simple timer."""
|
||||
def __init__(self):
|
||||
self.total_time = 0.
|
||||
self.calls = 0
|
||||
self.start_time = 0.
|
||||
self.diff = 0.
|
||||
self.average_time = 0.
|
||||
|
||||
self.duration = 0.
|
||||
|
||||
def tic(self):
|
||||
# using time.time instead of time.clock because time time.clock
|
||||
# does not normalize for multithreading
|
||||
self.start_time = time.time()
|
||||
|
||||
def toc(self, average=True):
|
||||
self.diff = time.time() - self.start_time
|
||||
self.total_time += self.diff
|
||||
self.calls += 1
|
||||
self.average_time = self.total_time / self.calls
|
||||
if average:
|
||||
self.duration = self.average_time
|
||||
else:
|
||||
self.duration = self.diff
|
||||
return self.duration
|
||||
|
||||
def clear(self):
|
||||
self.total_time = 0.
|
||||
self.calls = 0
|
||||
self.start_time = 0.
|
||||
self.diff = 0.
|
||||
self.average_time = 0.
|
||||
self.duration = 0.
|
||||
|
25
utils/torch_utils.py
Normal file
25
utils/torch_utils.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import torch
|
||||
|
||||
|
||||
def init_seeds(seed=0):
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
|
||||
|
||||
def select_device(force_cpu=False):
|
||||
if force_cpu:
|
||||
cuda = False
|
||||
device = torch.device('cpu')
|
||||
else:
|
||||
cuda = torch.cuda.is_available()
|
||||
device = torch.device('cuda:0' if cuda else 'cpu')
|
||||
|
||||
if torch.cuda.device_count() > 1:
|
||||
print('WARNING Using GPU0 Only: https://github.com/ultralytics/yolov3/issues/21')
|
||||
torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available
|
||||
# print('Using ', torch.cuda.device_count(), ' GPUs')
|
||||
|
||||
print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
|
||||
print(device)
|
||||
return device
|
545
utils/utils.py
Executable file
545
utils/utils.py
Executable file
|
@ -0,0 +1,545 @@
|
|||
import glob
|
||||
import random
|
||||
import time
|
||||
import os
|
||||
import os.path as osp
|
||||
|
||||
import cv2
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
from utils import torch_utils
|
||||
import maskrcnn_benchmark.layers.nms as nms
|
||||
from external.lib.nms.cpu_nms import cpu_soft_nms
|
||||
# Set printoptions
|
||||
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
|
||||
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
|
||||
|
||||
def mkdir_if_missing(d):
|
||||
if not osp.exists(d):
|
||||
os.makedirs(d)
|
||||
|
||||
|
||||
def float3(x): # format floats to 3 decimals
|
||||
return float(format(x, '.3f'))
|
||||
|
||||
|
||||
def init_seeds(seed=0):
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch_utils.init_seeds(seed=seed)
|
||||
|
||||
|
||||
def load_classes(path):
|
||||
"""
|
||||
Loads class labels at 'path'
|
||||
"""
|
||||
fp = open(path, 'r')
|
||||
names = fp.read().split('\n')
|
||||
return list(filter(None, names)) # filter removes empty strings (such as last line)
|
||||
|
||||
|
||||
def model_info(model): # Plots a line-by-line description of a PyTorch model
|
||||
n_p = sum(x.numel() for x in model.parameters()) # number parameters
|
||||
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
|
||||
print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
|
||||
for i, (name, p) in enumerate(model.named_parameters()):
|
||||
name = name.replace('module_list.', '')
|
||||
print('%5g %50s %9s %12g %20s %12.3g %12.3g' % (
|
||||
i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
|
||||
print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g))
|
||||
|
||||
|
||||
def coco_class_weights(): # frequency of each class in coco train2014
|
||||
weights = 1 / torch.FloatTensor(
|
||||
[187437, 4955, 30920, 6033, 3838, 4332, 3160, 7051, 7677, 9167, 1316, 1372, 833, 6757, 7355, 3302, 3776, 4671,
|
||||
6769, 5706, 3908, 903, 3686, 3596, 6200, 7920, 8779, 4505, 4272, 1862, 4698, 1962, 4403, 6659, 2402, 2689,
|
||||
4012, 4175, 3411, 17048, 5637, 14553, 3923, 5539, 4289, 10084, 7018, 4314, 3099, 4638, 4939, 5543, 2038, 4004,
|
||||
5053, 4578, 27292, 4113, 5931, 2905, 11174, 2873, 4036, 3415, 1517, 4122, 1980, 4464, 1190, 2302, 156, 3933,
|
||||
1877, 17630, 4337, 4624, 1075, 3468, 135, 1380])
|
||||
weights /= weights.sum()
|
||||
return weights
|
||||
|
||||
|
||||
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
|
||||
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
||||
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
|
||||
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
|
||||
# x = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
|
||||
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
|
||||
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
|
||||
return x
|
||||
|
||||
|
||||
def plot_one_box(x, img, color=None, label=None, line_thickness=None): # Plots one bounding box on image img
|
||||
tl = line_thickness or round(0.0004 * max(img.shape[0:2])) + 1 # line thickness
|
||||
color = color or [random.randint(0, 255) for _ in range(3)]
|
||||
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
|
||||
cv2.rectangle(img, c1, c2, color, thickness=tl)
|
||||
if label:
|
||||
tf = max(tl - 1, 1) # font thickness
|
||||
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
|
||||
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
|
||||
cv2.rectangle(img, c1, c2, color, -1) # filled
|
||||
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
|
||||
|
||||
|
||||
def weights_init_normal(m):
|
||||
classname = m.__class__.__name__
|
||||
if classname.find('Conv') != -1:
|
||||
torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
|
||||
elif classname.find('BatchNorm2d') != -1:
|
||||
torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
|
||||
torch.nn.init.constant_(m.bias.data, 0.0)
|
||||
|
||||
|
||||
def xyxy2xywh(x):
|
||||
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
|
||||
y = torch.zeros(x.shape) if x.dtype is torch.float32 else np.zeros(x.shape)
|
||||
y[:, 0] = (x[:, 0] + x[:, 2]) / 2
|
||||
y[:, 1] = (x[:, 1] + x[:, 3]) / 2
|
||||
y[:, 2] = x[:, 2] - x[:, 0]
|
||||
y[:, 3] = x[:, 3] - x[:, 1]
|
||||
return y
|
||||
|
||||
|
||||
def xywh2xyxy(x):
|
||||
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
|
||||
y = torch.zeros(x.shape) if x.dtype is torch.float32 else np.zeros(x.shape)
|
||||
y[:, 0] = (x[:, 0] - x[:, 2] / 2)
|
||||
y[:, 1] = (x[:, 1] - x[:, 3] / 2)
|
||||
y[:, 2] = (x[:, 0] + x[:, 2] / 2)
|
||||
y[:, 3] = (x[:, 1] + x[:, 3] / 2)
|
||||
return y
|
||||
|
||||
|
||||
def scale_coords(img_size, coords, img0_shape):
|
||||
# Rescale x1, y1, x2, y2 from 416 to image size
|
||||
gain_w = float(img_size[0]) / img0_shape[1] # gain = old / new
|
||||
gain_h = float(img_size[1]) / img0_shape[0]
|
||||
gain = min(gain_w, gain_h)
|
||||
pad_x = (img_size[0] - img0_shape[1] * gain) / 2 # width padding
|
||||
pad_y = (img_size[1] - img0_shape[0] * gain) / 2 # height padding
|
||||
coords[:, [0, 2]] -= pad_x
|
||||
coords[:, [1, 3]] -= pad_y
|
||||
coords[:, 0:4] /= gain
|
||||
coords[:, :4] = torch.clamp(coords[:, :4], min=0)
|
||||
return coords
|
||||
|
||||
|
||||
def ap_per_class(tp, conf, pred_cls, target_cls):
|
||||
""" Compute the average precision, given the recall and precision curves.
|
||||
Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
||||
# Arguments
|
||||
tp: True positives (list).
|
||||
conf: Objectness value from 0-1 (list).
|
||||
pred_cls: Predicted object classes (list).
|
||||
target_cls: True object classes (list).
|
||||
# Returns
|
||||
The average precision as computed in py-faster-rcnn.
|
||||
"""
|
||||
|
||||
# lists/pytorch to numpy
|
||||
tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(pred_cls), np.array(target_cls)
|
||||
|
||||
# Sort by objectness
|
||||
i = np.argsort(-conf)
|
||||
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
|
||||
|
||||
# Find unique classes
|
||||
unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
|
||||
|
||||
# Create Precision-Recall curve and compute AP for each class
|
||||
ap, p, r = [], [], []
|
||||
for c in unique_classes:
|
||||
i = pred_cls == c
|
||||
n_gt = sum(target_cls == c) # Number of ground truth objects
|
||||
n_p = sum(i) # Number of predicted objects
|
||||
|
||||
if (n_p == 0) and (n_gt == 0):
|
||||
continue
|
||||
elif (n_p == 0) or (n_gt == 0):
|
||||
ap.append(0)
|
||||
r.append(0)
|
||||
p.append(0)
|
||||
else:
|
||||
# Accumulate FPs and TPs
|
||||
fpc = np.cumsum(1 - tp[i])
|
||||
tpc = np.cumsum(tp[i])
|
||||
|
||||
# Recall
|
||||
recall_curve = tpc / (n_gt + 1e-16)
|
||||
r.append(tpc[-1] / (n_gt + 1e-16))
|
||||
|
||||
# Precision
|
||||
precision_curve = tpc / (tpc + fpc)
|
||||
p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
|
||||
|
||||
# AP from recall-precision curve
|
||||
ap.append(compute_ap(recall_curve, precision_curve))
|
||||
|
||||
return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)
|
||||
|
||||
|
||||
def compute_ap(recall, precision):
|
||||
""" Compute the average precision, given the recall and precision curves.
|
||||
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
|
||||
# Arguments
|
||||
recall: The recall curve (list).
|
||||
precision: The precision curve (list).
|
||||
# Returns
|
||||
The average precision as computed in py-faster-rcnn.
|
||||
"""
|
||||
# correct AP calculation
|
||||
# first append sentinel values at the end
|
||||
|
||||
mrec = np.concatenate(([0.], recall, [1.]))
|
||||
mpre = np.concatenate(([0.], precision, [0.]))
|
||||
|
||||
# compute the precision envelope
|
||||
for i in range(mpre.size - 1, 0, -1):
|
||||
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
||||
|
||||
# to calculate area under PR curve, look for points
|
||||
# where X axis (recall) changes value
|
||||
i = np.where(mrec[1:] != mrec[:-1])[0]
|
||||
|
||||
# and sum (\Delta recall) * prec
|
||||
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
||||
return ap
|
||||
|
||||
|
||||
def bbox_iou(box1, box2, x1y1x2y2=False):
|
||||
"""
|
||||
Returns the IoU of two bounding boxes
|
||||
"""
|
||||
N, M = len(box1), len(box2)
|
||||
if x1y1x2y2:
|
||||
# Get the coordinates of bounding boxes
|
||||
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
|
||||
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
|
||||
else:
|
||||
# Transform from center and width to exact coordinates
|
||||
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
|
||||
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
|
||||
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
|
||||
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
|
||||
|
||||
# get the coordinates of the intersection rectangle
|
||||
inter_rect_x1 = torch.max(b1_x1.unsqueeze(1), b2_x1)
|
||||
inter_rect_y1 = torch.max(b1_y1.unsqueeze(1), b2_y1)
|
||||
inter_rect_x2 = torch.min(b1_x2.unsqueeze(1), b2_x2)
|
||||
inter_rect_y2 = torch.min(b1_y2.unsqueeze(1), b2_y2)
|
||||
# Intersection area
|
||||
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0)
|
||||
# Union Area
|
||||
b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1))
|
||||
b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).view(-1,1).expand(N,M)
|
||||
b2_area = ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).view(1,-1).expand(N,M)
|
||||
|
||||
return inter_area / (b1_area + b2_area - inter_area + 1e-16)
|
||||
|
||||
|
||||
def build_targets_max(target, anchor_wh, nA, nC, nGh, nGw):
|
||||
"""
|
||||
returns nT, nCorrect, tx, ty, tw, th, tconf, tcls
|
||||
"""
|
||||
nB = len(target) # number of images in batch
|
||||
|
||||
txy = torch.zeros(nB, nA, nGh, nGw, 2).cuda() # batch size, anchors, grid size
|
||||
twh = torch.zeros(nB, nA, nGh, nGw, 2).cuda()
|
||||
tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda()
|
||||
tcls = torch.ByteTensor(nB, nA, nGh, nGw, nC).fill_(0).cuda() # nC = number of classes
|
||||
tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda()
|
||||
for b in range(nB):
|
||||
t = target[b]
|
||||
t_id = t[:, 1].clone().long().cuda()
|
||||
t = t[:,[0,2,3,4,5]]
|
||||
nTb = len(t) # number of targets
|
||||
if nTb == 0:
|
||||
continue
|
||||
|
||||
#gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG
|
||||
gxy, gwh = t[: , 1:3].clone() , t[:, 3:5].clone()
|
||||
gxy[:, 0] = gxy[:, 0] * nGw
|
||||
gxy[:, 1] = gxy[:, 1] * nGh
|
||||
gwh[:, 0] = gwh[:, 0] * nGw
|
||||
gwh[:, 1] = gwh[:, 1] * nGh
|
||||
gi = torch.clamp(gxy[:, 0], min=0, max=nGw -1).long()
|
||||
gj = torch.clamp(gxy[:, 1], min=0, max=nGh -1).long()
|
||||
|
||||
# Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors)
|
||||
#gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t()
|
||||
#gi, gj = gxy.long().t()
|
||||
|
||||
# iou of targets-anchors (using wh only)
|
||||
box1 = gwh
|
||||
box2 = anchor_wh.unsqueeze(1)
|
||||
inter_area = torch.min(box1, box2).prod(2)
|
||||
iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
|
||||
|
||||
# Select best iou_pred and anchor
|
||||
iou_best, a = iou.max(0) # best anchor [0-2] for each target
|
||||
|
||||
# Select best unique target-anchor combinations
|
||||
if nTb > 1:
|
||||
_, iou_order = torch.sort(-iou_best) # best to worst
|
||||
|
||||
# Unique anchor selection
|
||||
u = torch.stack((gi, gj, a), 0)[:, iou_order]
|
||||
# _, first_unique = np.unique(u, axis=1, return_index=True) # first unique indices
|
||||
first_unique = return_torch_unique_index(u, torch.unique(u, dim=1)) # torch alternative
|
||||
i = iou_order[first_unique]
|
||||
# best anchor must share significant commonality (iou) with target
|
||||
i = i[iou_best[i] > 0.60] # TODO: examine arbitrary threshold
|
||||
if len(i) == 0:
|
||||
continue
|
||||
|
||||
a, gj, gi, t = a[i], gj[i], gi[i], t[i]
|
||||
t_id = t_id[i]
|
||||
if len(t.shape) == 1:
|
||||
t = t.view(1, 5)
|
||||
else:
|
||||
if iou_best < 0.60:
|
||||
continue
|
||||
|
||||
tc, gxy, gwh = t[:, 0].long(), t[:, 1:3].clone(), t[:, 3:5].clone()
|
||||
gxy[:, 0] = gxy[:, 0] * nGw
|
||||
gxy[:, 1] = gxy[:, 1] * nGh
|
||||
gwh[:, 0] = gwh[:, 0] * nGw
|
||||
gwh[:, 1] = gwh[:, 1] * nGh
|
||||
|
||||
# XY coordinates
|
||||
txy[b, a, gj, gi] = gxy - gxy.floor()
|
||||
|
||||
# Width and height
|
||||
twh[b, a, gj, gi] = torch.log(gwh / anchor_wh[a]) # yolo method
|
||||
# twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_wh[a]) / 2 # power method
|
||||
|
||||
# One-hot encoding of label
|
||||
tcls[b, a, gj, gi, tc] = 1
|
||||
tconf[b, a, gj, gi] = 1
|
||||
tid[b, a, gj, gi] = t_id.unsqueeze(1)
|
||||
tbox = torch.cat([txy, twh], -1)
|
||||
return tconf, tbox, tid
|
||||
|
||||
|
||||
|
||||
def build_targets_thres(target, anchor_wh, nA, nC, nGh, nGw):
|
||||
ID_THRESH = 0.5
|
||||
FG_THRESH = 0.5
|
||||
BG_THRESH = 0.4
|
||||
nB = len(target) # number of images in batch
|
||||
assert(len(anchor_wh)==nA)
|
||||
|
||||
tbox = torch.zeros(nB, nA, nGh, nGw, 4).cuda() # batch size, anchors, grid size
|
||||
tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda()
|
||||
tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda()
|
||||
for b in range(nB):
|
||||
t = target[b]
|
||||
t_id = t[:, 1].clone().long().cuda()
|
||||
t = t[:,[0,2,3,4,5]]
|
||||
nTb = len(t) # number of targets
|
||||
if nTb == 0:
|
||||
continue
|
||||
|
||||
gxy, gwh = t[: , 1:3].clone() , t[:, 3:5].clone()
|
||||
gxy[:, 0] = gxy[:, 0] * nGw
|
||||
gxy[:, 1] = gxy[:, 1] * nGh
|
||||
gwh[:, 0] = gwh[:, 0] * nGw
|
||||
gwh[:, 1] = gwh[:, 1] * nGh
|
||||
gxy[:, 0] = torch.clamp(gxy[:, 0], min=0, max=nGw -1)
|
||||
gxy[:, 1] = torch.clamp(gxy[:, 1], min=0, max=nGh -1)
|
||||
|
||||
gt_boxes = torch.cat([gxy, gwh], dim=1) # Shape Ngx4 (xc, yc, w, h)
|
||||
|
||||
anchor_mesh = generate_anchor(nGh, nGw, anchor_wh)
|
||||
anchor_list = anchor_mesh.permute(0,2,3,1).contiguous().view(-1, 4) # Shpae (nA x nGh x nGw) x 4
|
||||
#print(anchor_list.shape, gt_boxes.shape)
|
||||
iou_pdist = bbox_iou(anchor_list, gt_boxes) # Shape (nA x nGh x nGw) x Ng
|
||||
iou_max, max_gt_index = torch.max(iou_pdist, dim=1) # Shape (nA x nGh x nGw), both
|
||||
|
||||
iou_map = iou_max.view(nA, nGh, nGw)
|
||||
gt_index_map = max_gt_index.view(nA, nGh, nGw)
|
||||
|
||||
#nms_map = pooling_nms(iou_map, 3)
|
||||
|
||||
id_index = iou_map > ID_THRESH
|
||||
fg_index = iou_map > FG_THRESH
|
||||
bg_index = iou_map < BG_THRESH
|
||||
ign_index = (iou_map < FG_THRESH) * (iou_map > BG_THRESH)
|
||||
tconf[b][fg_index] = 1
|
||||
tconf[b][bg_index] = 0
|
||||
tconf[b][ign_index] = -1
|
||||
|
||||
gt_index = gt_index_map[fg_index]
|
||||
gt_box_list = gt_boxes[gt_index]
|
||||
gt_id_list = t_id[gt_index_map[id_index]]
|
||||
#print(gt_index.shape, gt_index_map[id_index].shape, gt_boxes.shape)
|
||||
if torch.sum(fg_index) > 0:
|
||||
tid[b][id_index] = gt_id_list.unsqueeze(1)
|
||||
fg_anchor_list = anchor_list.view(nA, nGh, nGw, 4)[fg_index]
|
||||
delta_target = encode_delta(gt_box_list, fg_anchor_list)
|
||||
tbox[b][fg_index] = delta_target
|
||||
return tconf, tbox, tid
|
||||
|
||||
def generate_anchor(nGh, nGw, anchor_wh):
|
||||
nA = len(anchor_wh)
|
||||
yy, xx =torch.meshgrid(torch.arange(nGh), torch.arange(nGw))
|
||||
xx, yy = xx.cuda(), yy.cuda()
|
||||
|
||||
mesh = torch.stack([xx, yy], dim=0) # Shape 2, nGh, nGw
|
||||
mesh = mesh.unsqueeze(0).repeat(nA,1,1,1).float() # Shape nA x 2 x nGh x nGw
|
||||
anchor_offset_mesh = anchor_wh.unsqueeze(-1).unsqueeze(-1).repeat(1, 1, nGh,nGw) # Shape nA x 2 x nGh x nGw
|
||||
anchor_mesh = torch.cat([mesh, anchor_offset_mesh], dim=1) # Shape nA x 4 x nGh x nGw
|
||||
return anchor_mesh
|
||||
|
||||
def encode_delta(gt_box_list, fg_anchor_list):
|
||||
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
|
||||
fg_anchor_list[:, 2], fg_anchor_list[:,3]
|
||||
gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
|
||||
gt_box_list[:, 2], gt_box_list[:, 3]
|
||||
dx = (gx - px) / pw
|
||||
dy = (gy - py) / ph
|
||||
dw = torch.log(gw/pw)
|
||||
dh = torch.log(gh/ph)
|
||||
return torch.stack([dx, dy, dw, dh], dim=1)
|
||||
|
||||
def decode_delta(delta, fg_anchor_list):
|
||||
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
|
||||
fg_anchor_list[:, 2], fg_anchor_list[:,3]
|
||||
dx, dy, dw, dh = delta[:, 0], delta[:, 1], delta[:, 2], delta[:, 3]
|
||||
gx = pw * dx + px
|
||||
gy = ph * dy + py
|
||||
gw = pw * torch.exp(dw)
|
||||
gh = ph * torch.exp(dh)
|
||||
return torch.stack([gx, gy, gw, gh], dim=1)
|
||||
|
||||
def decode_delta_map(delta_map, anchors):
|
||||
'''
|
||||
:param: delta_map, shape (nB, nA, nGh, nGw, 4)
|
||||
:param: anchors, shape (nA,4)
|
||||
'''
|
||||
nB, nA, nGh, nGw, _ = delta_map.shape
|
||||
anchor_mesh = generate_anchor(nGh, nGw, anchors)
|
||||
anchor_mesh = anchor_mesh.permute(0,2,3,1).contiguous() # Shpae (nA x nGh x nGw) x 4
|
||||
anchor_mesh = anchor_mesh.unsqueeze(0).repeat(nB,1,1,1,1)
|
||||
pred_list = decode_delta(delta_map.view(-1,4), anchor_mesh.view(-1,4))
|
||||
pred_map = pred_list.view(nB, nA, nGh, nGw, 4)
|
||||
return pred_map
|
||||
|
||||
|
||||
def pooling_nms(heatmap, kernel=1):
|
||||
pad = (kernel -1 ) // 2
|
||||
hmax = F.max_pool2d(heatmap, (kernel, kernel), stride=1, padding=pad)
|
||||
keep = (hmax == heatmap).float()
|
||||
return keep * heatmap
|
||||
|
||||
def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.05, method=1):
|
||||
keep = cpu_soft_nms(np.ascontiguousarray(dets, dtype=np.float32),
|
||||
np.float32(sigma), np.float32(Nt),
|
||||
np.float32(threshold),
|
||||
np.uint8(method))
|
||||
return keep
|
||||
|
||||
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
|
||||
"""
|
||||
Removes detections with lower object confidence score than 'conf_thres'
|
||||
Non-Maximum Suppression to further filter detections.
|
||||
Returns detections with shape:
|
||||
(x1, y1, x2, y2, object_conf, class_score, class_pred)
|
||||
"""
|
||||
|
||||
output = [None for _ in range(len(prediction))]
|
||||
for image_i, pred in enumerate(prediction):
|
||||
# Filter out confidence scores below threshold
|
||||
# Get score and class with highest confidence
|
||||
|
||||
v = pred[:, 4] > conf_thres
|
||||
v = v.nonzero().squeeze()
|
||||
if len(v.shape) == 0:
|
||||
v = v.unsqueeze(0)
|
||||
|
||||
pred = pred[v]
|
||||
|
||||
# If none are remaining => process next image
|
||||
nP = pred.shape[0]
|
||||
if not nP:
|
||||
continue
|
||||
# From (center x, center y, width, height) to (x1, y1, x2, y2)
|
||||
pred[:, :4] = xywh2xyxy(pred[:, :4])
|
||||
|
||||
|
||||
# Non-maximum suppression
|
||||
if method == -1:
|
||||
nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
|
||||
else:
|
||||
dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
|
||||
nms_indices = soft_nms(dets, Nt=nms_thres, method=method)
|
||||
det_max = pred[nms_indices]
|
||||
|
||||
if len(det_max) > 0:
|
||||
# Add max detections to outputs
|
||||
output[image_i] = det_max if output[image_i] is None else torch.cat((output[image_i], det_max))
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def return_torch_unique_index(u, uv):
|
||||
n = uv.shape[1] # number of columns
|
||||
first_unique = torch.zeros(n, device=u.device).long()
|
||||
for j in range(n):
|
||||
first_unique[j] = (uv[:, j:j + 1] == u).all(0).nonzero()[0]
|
||||
|
||||
return first_unique
|
||||
|
||||
|
||||
def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
|
||||
# Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
|
||||
|
||||
a = torch.load(filename, map_location='cpu')
|
||||
a['optimizer'] = []
|
||||
torch.save(a, filename.replace('.pt', '_lite.pt'))
|
||||
|
||||
|
||||
def coco_class_count(path='../coco/labels/train2014/'):
|
||||
# histogram of occurrences per class
|
||||
|
||||
nC = 80 # number classes
|
||||
x = np.zeros(nC, dtype='int32')
|
||||
files = sorted(glob.glob('%s/*.*' % path))
|
||||
for i, file in enumerate(files):
|
||||
labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
|
||||
x += np.bincount(labels[:, 0].astype('int32'), minlength=nC)
|
||||
print(i, len(files))
|
||||
|
||||
|
||||
def coco_only_people(path='../coco/labels/val2014/'):
|
||||
# find images with only people
|
||||
|
||||
files = sorted(glob.glob('%s/*.*' % path))
|
||||
for i, file in enumerate(files):
|
||||
labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
|
||||
if all(labels[:, 0] == 0):
|
||||
print(labels.shape[0], file)
|
||||
|
||||
|
||||
def plot_results():
|
||||
# Plot YOLO training results file 'results.txt'
|
||||
# import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v1.txt')
|
||||
|
||||
plt.figure(figsize=(14, 7))
|
||||
s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'mAP', 'Recall', 'Precision']
|
||||
files = sorted(glob.glob('results*.txt'))
|
||||
for f in files:
|
||||
results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T # column 11 is mAP
|
||||
x = range(1, results.shape[1])
|
||||
for i in range(8):
|
||||
plt.subplot(2, 4, i + 1)
|
||||
plt.plot(x, results[i, x], marker='.', label=f)
|
||||
plt.title(s[i])
|
||||
if i == 0:
|
||||
plt.legend()
|
90
utils/visualization.py
Normal file
90
utils/visualization.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
import numpy as np
|
||||
import cv2
|
||||
|
||||
|
||||
def tlwhs_to_tlbrs(tlwhs):
|
||||
tlbrs = np.copy(tlwhs)
|
||||
if len(tlbrs) == 0:
|
||||
return tlbrs
|
||||
tlbrs[:, 2] += tlwhs[:, 0]
|
||||
tlbrs[:, 3] += tlwhs[:, 1]
|
||||
return tlbrs
|
||||
|
||||
|
||||
def get_color(idx):
|
||||
idx = idx * 3
|
||||
color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
|
||||
|
||||
return color
|
||||
|
||||
|
||||
def resize_image(image, max_size=800):
|
||||
if max(image.shape[:2]) > max_size:
|
||||
scale = float(max_size) / max(image.shape[:2])
|
||||
image = cv2.resize(image, None, fx=scale, fy=scale)
|
||||
return image
|
||||
|
||||
|
||||
def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
|
||||
im = np.ascontiguousarray(np.copy(image))
|
||||
im_h, im_w = im.shape[:2]
|
||||
|
||||
top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
|
||||
|
||||
text_scale = max(1, image.shape[1] / 1600.)
|
||||
text_thickness = 1 if text_scale > 1.1 else 1
|
||||
line_thickness = max(1, int(image.shape[1] / 600.))
|
||||
|
||||
radius = max(5, int(im_w/140.))
|
||||
cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
|
||||
(0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2)
|
||||
|
||||
for i, tlwh in enumerate(tlwhs):
|
||||
x1, y1, w, h = tlwh
|
||||
intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
|
||||
obj_id = int(obj_ids[i])
|
||||
id_text = '{}'.format(int(obj_id))
|
||||
if ids2 is not None:
|
||||
id_text = id_text + ', {}'.format(int(ids2[i]))
|
||||
_line_thickness = 1 if obj_id <= 0 else line_thickness
|
||||
color = get_color(abs(obj_id))
|
||||
cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
|
||||
cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
|
||||
thickness=text_thickness)
|
||||
return im
|
||||
|
||||
|
||||
def plot_trajectory(image, tlwhs, track_ids):
|
||||
image = image.copy()
|
||||
for one_tlwhs, track_id in zip(tlwhs, track_ids):
|
||||
color = get_color(int(track_id))
|
||||
for tlwh in one_tlwhs:
|
||||
x1, y1, w, h = tuple(map(int, tlwh))
|
||||
cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2)
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None):
|
||||
im = np.copy(image)
|
||||
text_scale = max(1, image.shape[1] / 800.)
|
||||
thickness = 2 if text_scale > 1.3 else 1
|
||||
for i, det in enumerate(tlbrs):
|
||||
x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int)
|
||||
if len(det) >= 7:
|
||||
label = 'det' if det[5] > 0 else 'trk'
|
||||
if ids is not None:
|
||||
text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i])
|
||||
cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
|
||||
thickness=thickness)
|
||||
else:
|
||||
text = '{}# {:.2f}'.format(label, det[6])
|
||||
|
||||
if scores is not None:
|
||||
text = '{:.2f}'.format(scores[i])
|
||||
cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
|
||||
thickness=thickness)
|
||||
|
||||
cv2.rectangle(im, (x1, y1), (x2, y2), color, 2)
|
||||
|
||||
return im
|
Loading…
Reference in a new issue