Tweak tracker to get better tracks

This commit is contained in:
Ruben van de Ven 2024-11-20 16:59:22 +01:00
parent abc80727da
commit 389da6701f
9 changed files with 578 additions and 96 deletions

11
custom_bytetrack.yaml Normal file
View file

@ -0,0 +1,11 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
track_high_thresh: 0.05 # threshold for the first association
track_low_thresh: 0.01 # threshold for the second association
new_track_thresh: 0.1 # threshold for init new track if the detection does not match any tracks
track_buffer: 35 # buffer to calculate the time when to remove tracks
match_thresh: 0.9 # threshold for matching tracks
fuse_score: True # Whether to fuse confidence scores with the iou distances before matching
# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)

87
poetry.lock generated
View file

@ -219,6 +219,25 @@ webencodings = "*"
[package.extras]
css = ["tinycss2 (>=1.1.0,<1.3)"]
[[package]]
name = "bytetracker"
version = "0.3.2"
description = "Packaged version of the ByteTrack repository"
optional = false
python-versions = ">=3.5"
files = []
develop = false
[package.dependencies]
lapx = ">=0.5.8"
scipy = ">=1.9.3"
[package.source]
type = "git"
url = "https://github.com/rubenvandeven/bytetrack-pip"
reference = "HEAD"
resolved_reference = "7053b946af8641581999b70230ac6260d37365ae"
[[package]]
name = "cachetools"
version = "5.3.2"
@ -1366,6 +1385,72 @@ files = [
{file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"},
]
[[package]]
name = "lapx"
version = "0.5.11"
description = "Linear Assignment Problem solver (LAPJV/LAPMOD)."
optional = false
python-versions = ">=3.7"
files = [
{file = "lapx-0.5.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ad2e100a81387e958cbb2b25b09f5f4b4c7af1ba39313b4bbda9965ee85b43a2"},
{file = "lapx-0.5.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:523baec5c6a554946c843877802fbefe2230179854134e6b0f281e0f47f5b342"},
{file = "lapx-0.5.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eba714681ea77c834a740eaa43640a3072341a7418e95fb6d5aa4380b6b2069"},
{file = "lapx-0.5.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:797cda3b06e835fa6ee3e245289c03ec29d110e6dd8e333db71483f5bbd32129"},
{file = "lapx-0.5.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:29b1447d32e00a89afa90627c7e806bd7eb8e21e4d559749f4a0fc4e47989f64"},
{file = "lapx-0.5.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:71caffa000782ab265f76e47aa018368b3738a5be5d62581a870dd6e68417703"},
{file = "lapx-0.5.11-cp310-cp310-win_amd64.whl", hash = "sha256:a3c1c20c7d80fa7b6eca0ea9e10966c93ccdaf4d5286e677b199bf021a889b18"},
{file = "lapx-0.5.11-cp310-cp310-win_arm64.whl", hash = "sha256:995aea6268f0a519e536f009f44144c4f4066e0724d19239fbcc1c1ab082d7c0"},
{file = "lapx-0.5.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a665dbc34f04fe21cdb798be1c003fa0d07b0e27e9020487e7626714bad4b8a"},
{file = "lapx-0.5.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5262a868f8802e368ecb470444c830e95b960a2a3763764dd3370680a466684e"},
{file = "lapx-0.5.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824a8c50e191343096d48164f2a0e9a5d466e8d20dd8e3eff1a3c1082b4d2b2"},
{file = "lapx-0.5.11-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d648b706b9a22255a028c72f4849c97ba0d754e03d74009ff447b26dbbd9bb59"},
{file = "lapx-0.5.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f6a0b789023d80b0f5ba3f20d83c9601d02e03abe8ae209ada3a77f304d91fff"},
{file = "lapx-0.5.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:44b3c36d52db1eea6eb0c46795440adddbdfe39c22ddb0f2a86db40ab963a798"},
{file = "lapx-0.5.11-cp311-cp311-win_amd64.whl", hash = "sha256:2e21c0162b426034ff545cedb86713b642f1e7335fda43605b330ca28a107d13"},
{file = "lapx-0.5.11-cp311-cp311-win_arm64.whl", hash = "sha256:2ea0c5dbf62de0612337c4c0a3f1b5ac8cc4fabfb9f68fd1c76612e2d873a28c"},
{file = "lapx-0.5.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:18c0c2e7f7ca76527468d98b99c54cf339ea040512392de6d20d8582235b43bc"},
{file = "lapx-0.5.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:20ab21b4a45bf975b890ba0364bc354652e3ebb548fb69f23cca4c337ce0e72b"},
{file = "lapx-0.5.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afc0233eaef80c04f88449f1bfbe059bfb5556458bc46de54d080e3236db6588"},
{file = "lapx-0.5.11-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d48adb34670c1c548cc39c40831e44dd57d7fe640320d86d61e3d2bf179f1f79"},
{file = "lapx-0.5.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:28fb1d9f076aff6b98abbc1287aa453d6fd3be0b0a5039adb2a822e2246a2bdd"},
{file = "lapx-0.5.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:70abdb835fcfad72856f3fa27d8233b9b267a9efe534a7fa8ede456b876a696d"},
{file = "lapx-0.5.11-cp312-cp312-win_amd64.whl", hash = "sha256:905fb018952b7b6ea9ef5ac8f5600e525c2545a679d11951bfc0d7e861efbe31"},
{file = "lapx-0.5.11-cp312-cp312-win_arm64.whl", hash = "sha256:02343669611038ec2826c4110d953235397d25e5eff01a5d2cbd9986c3492691"},
{file = "lapx-0.5.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d517ce6b0d17af31c71d9230b03f2c09cb7a701d20b3ffbe02c4366ed91c3b85"},
{file = "lapx-0.5.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6078aa84f768585c6121fc1d8767d6136d9af34ccd48525174ee488c86a59964"},
{file = "lapx-0.5.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:206382e2e942e1d968cb194149d0693a293052c16d0016505788b795818bab21"},
{file = "lapx-0.5.11-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:986f6eaa5a21d5b90869d54a0b7e11e9e532cd8938b3980cbd43d43154d96ac1"},
{file = "lapx-0.5.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:394555e2245cd6aa2ad79cea58c8a5cc73f6c6f79b85f497020e6a978c346329"},
{file = "lapx-0.5.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dcd1a608c19e14d6d7fd47c885c5f3c68ce4e08f3e8ec2eecbe32fc026b0d1de"},
{file = "lapx-0.5.11-cp313-cp313-win_amd64.whl", hash = "sha256:37b6e5f4f04c477a49f7d0780fbe76513c2d5e183bcf5005396c96bfd3de15d6"},
{file = "lapx-0.5.11-cp313-cp313-win_arm64.whl", hash = "sha256:c6c84a46f94829c6d992cce7fe747bacb971bca8cb9e77ea6ff80dfbc4fea6e2"},
{file = "lapx-0.5.11-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cb7175a5527a46cd6b9212da766539f256790e93747ba5503bd0507e3fd19e3c"},
{file = "lapx-0.5.11-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33769788e57b5a7fa0951a884c7e5f8a381792427357976041c1d4c3ae75ddcd"},
{file = "lapx-0.5.11-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc015da5a657dbb8822fb47f4702f5bf0b67bafff6f5aed98dad3a6204572e40"},
{file = "lapx-0.5.11-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:9b322e6e0685340f5a10ada401065165fa73e84c2db93ba17945e8e119bd17f5"},
{file = "lapx-0.5.11-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:a64bc3da09c5925efaff59d20dcfbc3febac64fd1fcc263604f7d4ccdd0e2a75"},
{file = "lapx-0.5.11-cp37-cp37m-win_amd64.whl", hash = "sha256:e6d98d31bdf7131a0ec9967068885c86357cc77cf7883d1a1335a48b24e537bb"},
{file = "lapx-0.5.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c381563bc713a6fc8a281698893a8115abb34363929105142d66e592252af196"},
{file = "lapx-0.5.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c4e10ec35b539984684ef9bfdbd0f763a441225e8a9cff5d7081b24795dd419"},
{file = "lapx-0.5.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6583c3a5a47dbfb360d312e4bb3bde509d519886da74161a46ad77653fd18dcb"},
{file = "lapx-0.5.11-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3963a87d44bc174eee423239dff952a922b3a8e30fbc514c00fab361f464c74"},
{file = "lapx-0.5.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5e8bf4b68f45e378ce7fc68dd407ea210926881e8f25a08dc18beb4a4a7cced0"},
{file = "lapx-0.5.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a8ee4578a898148110dd8ee877370ee99b6309d7b600bde60e204efd93858c0d"},
{file = "lapx-0.5.11-cp38-cp38-win_amd64.whl", hash = "sha256:46b8373f25c1ea85b236fc20183b077efd33112de57f57feccc61f3541f0d8f0"},
{file = "lapx-0.5.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f3c88ff301e7cf9a22a7e276e13a9154f0813a2c3f9c3619f3785def851b5f4c"},
{file = "lapx-0.5.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d07df31b37c92643b9fb6aceeafb53a13375d95dd9cbd2454d83f03941d7e137"},
{file = "lapx-0.5.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cbf42869528ac80d87e1d9019d594a619fd77c26fba0eb8e5242f6657197a57"},
{file = "lapx-0.5.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5f38aa911b4773b44d2da0046aad6ded8f08853cbc1ec60a2363b46371e16f8"},
{file = "lapx-0.5.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d04e5bd530fb04a22004c3ba1a31033c4e3deffe18fb43778fc234a627a55397"},
{file = "lapx-0.5.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:845ca48d0c313113f7ae53420d1e5af87c01355fe14e5624c2a934e1004ef43b"},
{file = "lapx-0.5.11-cp39-cp39-win_amd64.whl", hash = "sha256:0eb81393d0edd089b61de1c3c25895b8ed39bc8d91c766a6e06b761194e79894"},
{file = "lapx-0.5.11-cp39-cp39-win_arm64.whl", hash = "sha256:e12b3e0d9943e92a0cd3af7a6fa5fb8fab3aa018897beda6d02647d9ce708d5a"},
{file = "lapx-0.5.11.tar.gz", hash = "sha256:d925d4a11f436ef0f9e9684378a44e4375aa9c868b22e2f51e6ff15a3362685f"},
]
[package.dependencies]
numpy = ">=1.21.6"
[[package]]
name = "markdown"
version = "3.5.1"
@ -3640,4 +3725,4 @@ watchdog = ["watchdog (>=2.3)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10,<3.12,"
content-hash = "65e5518fd7bc216a72206768f621251e8f2ade9ec07ef0af1d3926492fc6fd70"
content-hash = "bf4feafd4afa6ceb39a1c599e3e7cdc84afbe11ab1672b49e5de99ad44568b08"

View file

@ -8,6 +8,7 @@ readme = "README.md"
[tool.poetry.scripts]
trapserv = "trap.plumber:start"
tracker = "trap.tools:tracker_preprocess"
compare = "trap.tools:tracker_compare"
process_data = "trap.process_data:main"
@ -37,6 +38,7 @@ pyglet = "^2.0.15"
pyglet-cornerpin = "^0.3.0"
opencv-python = {file="./opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"}
setproctitle = "^1.3.3"
bytetracker = { git = "https://github.com/rubenvandeven/bytetrack-pip" }
[build-system]
requires = ["poetry-core"]

View file

@ -30,6 +30,8 @@ from trap.preview_renderer import DrawnTrack, PROJECTION_IMG, PROJECTION_MAP
logger = logging.getLogger("trap.renderer")
COLOR_PRIMARY = (0,0,0,255)
class AnimationRenderer:
def __init__(self, config: Namespace, is_running: BaseEvent):
self.config = config
@ -96,7 +98,7 @@ class AnimationRenderer:
self.window.push_handlers(self.pins)
pyglet.gl.glClearColor(255,255,255,255)
self.fps_display = pyglet.window.FPSDisplay(window=self.window, color=(255,255,255,255))
self.fps_display = pyglet.window.FPSDisplay(window=self.window, color=COLOR_PRIMARY)
self.fps_display.label.x = self.window.width - 50
self.fps_display.label.y = self.window.height - 17
self.fps_display.label.bold = False
@ -117,11 +119,11 @@ class AnimationRenderer:
if self.config.render_debug_shapes:
self.debug_lines = [
pyglet.shapes.Line(1370, self.config.camera.h-360, 1380, 670-360, 2, (255,255,255,255), batch=self.batch_overlay),#v
pyglet.shapes.Line(0, 660-360, 1380, 670-360, 2, (255,255,255,255), batch=self.batch_overlay), #h
pyglet.shapes.Line(1140, 760-360, 1140, 675-360, 2, (255,255,255,255), batch=self.batch_overlay), #h
pyglet.shapes.Line(540, 760-360,540, 675-360, 2, (255,255,255,255), batch=self.batch_overlay), #v
pyglet.shapes.Line(0, 770-360, 1380, 770-360, 2, (255,255,255,255), batch=self.batch_overlay), #h
pyglet.shapes.Line(1370, self.config.camera.h-360, 1380, 670-360, 2, COLOR_PRIMARY, batch=self.batch_overlay),#v
pyglet.shapes.Line(0, 660-360, 1380, 670-360, 2, COLOR_PRIMARY, batch=self.batch_overlay), #h
pyglet.shapes.Line(1140, 760-360, 1140, 675-360, 2, COLOR_PRIMARY, batch=self.batch_overlay), #h
pyglet.shapes.Line(540, 760-360,540, 675-360, 2, COLOR_PRIMARY, batch=self.batch_overlay), #v
pyglet.shapes.Line(0, 770-360, 1380, 770-360, 2, COLOR_PRIMARY, batch=self.batch_overlay), #h
]
@ -197,7 +199,7 @@ class AnimationRenderer:
self.gradientLine = GradientLine
def init_labels(self):
base_color = (255,)*4
base_color = COLOR_PRIMARY
color_predictor = (255,255,0, 255)
color_info = (255,0, 255, 255)
color_tracker = (0,255, 255, 255)
@ -278,7 +280,7 @@ class AnimationRenderer:
self.video_sprite = pyglet.sprite.Sprite(img=img, batch=self.batch_bg)
# transform to flipped coordinate system for pyglet
self.video_sprite.y = self.window.height - self.video_sprite.height
self.video_sprite.opacity = 10
self.video_sprite.opacity = 70
except zmq.ZMQError as e:
# idx = frame.index if frame else "NONE"
# logger.debug(f"reuse video frame {idx}")

View file

@ -4,7 +4,7 @@ import types
import numpy as np
import json
from trap.tracker import DETECTORS
from trap.tracker import DETECTORS, TRACKER_BYTETRACK, TRACKERS
from trap.frame_emitter import Camera
from pyparsing import Optional
@ -294,6 +294,11 @@ tracker_parser.add_argument("--detector",
help="Specify the detector to use",
type=str,
choices=DETECTORS)
tracker_parser.add_argument("--tracker",
help="Specify the detector to use",
type=str,
default=TRACKER_BYTETRACK,
choices=TRACKERS)
tracker_parser.add_argument("--smooth-tracks",
help="Smooth the tracker tracks before sending them to the predictor",
action='store_true')

View file

@ -8,13 +8,15 @@ from pathlib import Path
import pickle
import sys
import time
from typing import Iterable, Optional
from typing import Iterable, List, Optional
import numpy as np
import cv2
import zmq
import os
from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack
from deep_sort_realtime.deep_sort.track import TrackState as DeepsortTrackState
from bytetracker.byte_tracker import STrack as ByteTrackTrack
from bytetracker.basetrack import TrackState as ByteTrackTrackState
from urllib.parse import urlparse
@ -51,6 +53,17 @@ class DetectionState(IntFlag):
return cls.Confirmed
raise RuntimeError("Should not run into Deleted entries here")
@classmethod
def from_bytetrack_track(cls, track: ByteTrackTrack):
if track.state == ByteTrackTrackState.New:
return cls.Tentative
if track.state == ByteTrackTrackState.Lost:
return cls.Lost
# if track.time_since_update > 0:
if track.state == ByteTrackTrackState.Tracked:
return cls.Confirmed
raise RuntimeError("Should not run into Deleted entries here")
class Camera:
def __init__(self, mtx, dist, w, h, H):
self.mtx = mtx
@ -71,13 +84,19 @@ class Detection:
conf: float # object detector probablity
state: DetectionState
frame_nr: int
det_class: str
def get_foot_coords(self) -> list[tuple[float, float]]:
return [self.l + 0.5 * self.w, self.t+self.h]
@classmethod
def from_deepsort(cls, dstrack: DeepsortTrack):
return cls(dstrack.track_id, *dstrack.to_ltwh(), dstrack.det_conf, DetectionState.from_deepsort_track(dstrack))
def from_deepsort(cls, dstrack: DeepsortTrack, frame_nr: int):
return cls(dstrack.track_id, *dstrack.to_ltwh(), dstrack.det_conf, DetectionState.from_deepsort_track(dstrack), frame_nr, dstrack.det_class)
@classmethod
def from_bytetrack(cls, bstrack: ByteTrackTrack, frame_nr: int):
return cls(bstrack.track_id, *bstrack.tlwh, bstrack.score, DetectionState.from_bytetrack_track(bstrack), frame_nr, bstrack.cls)
def get_scaled(self, scale: float = 1):
if scale == 1:
@ -90,7 +109,9 @@ class Detection:
self.w*scale,
self.h*scale,
self.conf,
self.state)
self.state,
self.frame_nr,
self.det_class)
def to_ltwh(self):
return (int(self.l), int(self.t), int(self.w), int(self.h))
@ -107,7 +128,7 @@ class Track:
and acceleration.
"""
track_id: str = None
history: [Detection] = field(default_factory=lambda: [])
history: List[Detection] = field(default_factory=lambda: [])
predictor_history: Optional[list] = None # in image space
predictions: Optional[list] = None

View file

@ -156,7 +156,7 @@ class DrawnTrack:
if ci >= len(self.shapes):
# TODO: add color2
line = self.renderer.gradientLine(x, y, x2, y2, 3, color, color, batch=self.renderer.batch_anim)
line = pyglet.shapes.Arc(x2, y2, 10, thickness=3, color=color, batch=self.renderer.batch_anim)
line = pyglet.shapes.Arc(x2, y2, 10, thickness=2, color=color, batch=self.renderer.batch_anim)
line.opacity = 20
self.shapes.append(line)
@ -164,7 +164,7 @@ class DrawnTrack:
line = self.shapes[ci-1]
line.x, line.y = x, y
line.x2, line.y2 = x2, y2
line.radius = int(exponentialDecay(line.radius, 2, 3, dt))
line.radius = int(exponentialDecay(line.radius, 1.5, 3, dt))
line.color = color
line.opacity = int(exponentialDecay(line.opacity, 180, 8, dt))

View file

@ -1,66 +1,95 @@
from argparse import Namespace
from pathlib import Path
import pickle
from tempfile import mktemp
import numpy as np
import pandas as pd
import trap.tracker
from trap.config import parser
from trap.frame_emitter import video_src_from_config, Frame
from trap.tracker import DETECTOR_YOLOv8, _yolov8_track, Track, TrainingDataWriter
from trap.frame_emitter import Detection, DetectionState, video_src_from_config, Frame
from trap.tracker import DETECTOR_YOLOv8, Smoother, _yolov8_track, Track, TrainingDataWriter, Tracker
from collections import defaultdict
import logging
import cv2
from typing import List, Iterable
from typing import List, Iterable, Optional
from ultralytics import YOLO
from ultralytics.engine.results import Results as YOLOResult
import tqdm
config = parser.parse_args()
logger = logging.getLogger('tools')
def tracker_preprocess():
video_srcs = video_src_from_config(config)
if not hasattr(config, "H"):
print("Set homography file with --homography param")
return
if config.detector != DETECTOR_YOLOv8:
print("Only YOLO for now...")
return
model = YOLO('EXPERIMENTS/yolov8x.pt')
with TrainingDataWriter(config.save_for_training) as writer:
for video_nr, video_path in enumerate(video_srcs):
class FrameGenerator():
def __init__(self, config):
self.video_srcs = video_src_from_config(config)
self.config = config
if not hasattr(config, "H"):
raise RuntimeError("Set homography file with --homography param")
# store current position
self.video_path = None
self.video_nr = None
self.frame_count = None
self.frame_idx = None
def __iter__(self):
n = 0
for video_nr, video_path in enumerate(self.video_srcs):
self.video_path = video_path
self.video_nr = video_path
logger.info(f"Play from '{str(video_path)}'")
video = cv2.VideoCapture(str(video_path))
fps = video.get(cv2.CAP_PROP_FPS)
frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
i = 0
if config.video_offset:
logger.info(f"Start at frame {config.video_offset}")
video.set(cv2.CAP_PROP_POS_FRAMES, config.video_offset)
i = config.video_offset
bar = tqdm.tqdm()
tracks = defaultdict(lambda: Track())
total = 0
self.frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
self.frame_idx = 0
if self.config.video_offset:
logger.info(f"Start at frame {self.config.video_offset}")
video.set(cv2.CAP_PROP_POS_FRAMES, self.config.video_offset)
self.frame_idx = self.config.video_offset
while True:
bar.update()
ret, img = video.read()
i+=1
self.frame_idx+=1
n+=1
# seek to 0 if video has finished. Infinite loop
if not ret:
# now loading multiple files
break
frame = Frame(index=bar.n, img=img, H=config.H, camera=config.camera)
frame = Frame(index=n, img=img, H=self.config.H, camera=self.config.camera)
yield frame
def tracker_preprocess():
config = parser.parse_args()
tracker = Tracker(config)
# model = YOLO('EXPERIMENTS/yolov8x.pt')
with TrainingDataWriter(config.save_for_training) as writer:
bar = tqdm.tqdm()
tracks = defaultdict(lambda: Track())
total = 0
frames = FrameGenerator(config)
for frame in frames:
bar.update()
detections = _yolov8_track(frame, model, classes=[0])
detections = tracker.track_frame(frame)
total += len(detections)
# detections = _yolov8_track(frame, model, imgsz=1440, classes=[0])
bar.set_description(f"[{video_nr}/{len(video_srcs)}] [{i}/{frame_count}] {str(video_path)} -- Detections {len(detections)}: {[d.track_id for d in detections]} (so far {total})")
bar.set_description(f"[{frames.video_nr}/{len(frames.video_srcs)}] [{frames.frame_idx}/{frames.frame_count}] {str(frames.video_path)} -- Detections {len(detections)}: {[d.track_id for d in detections]} (so far {total})")
for detection in detections:
track = tracks[detection.track_id]
@ -72,4 +101,232 @@ def tracker_preprocess():
writer.add(frame, active_tracks.values())
logger.info("Done!")
logger.info("Done!")
bgr_colors = [
(255, 0, 0),
(0, 255, 0),
(0, 0, 255),
(0, 255, 255),
]
def detection_color(detection: Detection, i):
return bgr_colors[i % len(bgr_colors)] if detection.state != DetectionState.Lost else (100,100,100)
def to_point(coord):
return (int(coord[0]), int(coord[1]))
def tracker_compare():
config = parser.parse_args()
trackers: List[Tracker] = []
# TODO, support all tracker.DETECTORS
for tracker_id in [
trap.tracker.DETECTOR_YOLOv8,
# trap.tracker.DETECTOR_MASKRCNN,
# trap.tracker.DETECTOR_RETINANET,
trap.tracker.DETECTOR_FASTERRCNN,
]:
tracker_config = Namespace(**vars(config))
tracker_config.detector = tracker_id
trackers.append(Tracker(tracker_config))
frames = FrameGenerator(config)
bar = tqdm.tqdm(frames)
cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
for frame in bar:
# frame.img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx) # try to undistort for better detections, seems not to matter at all
trackers_detections = [(t, t.track_frame(frame)) for t in trackers]
for i, tracker in enumerate(trackers):
cv2.putText(frame.img, tracker.config.detector, (10,30*(i+1)), cv2.FONT_HERSHEY_DUPLEX, 1, color=bgr_colors[i % len(bgr_colors)])
for i, (tracker, detections) in enumerate(trackers_detections):
for track_id in tracker.tracks:
history = tracker.tracks[track_id].history
cv2.putText(frame.img, f"{track_id}", to_point(history[0].get_foot_coords()), cv2.FONT_HERSHEY_DUPLEX, 1, color=bgr_colors[i % len(bgr_colors)])
for j in range(len(history)-1):
a = history[j]
b = history[j+1]
color = detection_color(b, i)
cv2.line(frame.img, to_point(a.get_foot_coords()), to_point(b.get_foot_coords()), color, 1)
for detection in detections:
color = color = detection_color(detection, i)
l, t, r, b = detection.to_ltrb()
cv2.rectangle(frame.img, (l, t), (r,b), color)
cv2.putText(frame.img, f"{detection.track_id}", (l, b+10), cv2.FONT_HERSHEY_DUPLEX, 1, color=color)
conf = f"{detection.conf:.3f}" if detection.conf is not None else "None"
cv2.putText(frame.img, f"{detection.det_class} - {conf}", (l, t), cv2.FONT_HERSHEY_DUPLEX, .7, color=color)
cv2.imshow('frame',cv2.resize(frame.img, (1920, 1080)))
cv2.waitKey(1)
bar.set_description(f"[{frames.video_nr}/{len(frames.video_srcs)}] [{frames.frame_idx}/{frames.frame_count}] {str(frames.video_path)}")
def interpolate_missing_frames(data: pd.DataFrame):
missing=0
old_size=len(data)
# slow way to append missing steps to the dataset
for ind, row in tqdm.tqdm(data.iterrows()):
if row['diff'] > 1:
for s in range(1, int(row['diff'])):
# add as many entries as missing
missing += 1
data.loc[len(data)] = [row['frame_id']-s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 1, 1]
# new_frame = [data.loc[ind-1]['frame_id']+s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan]
# data.loc[len(data)] = new_frame
logger.info(f'was:{old_size} added:{missing}, new length: {len(data)}')
# now sort, so that the added data is in the right place
data.sort_values(by=['track_id', 'frame_id'], inplace=True)
df=data.copy()
df = df.groupby('track_id').apply(lambda group: group.interpolate(method='linear'))
df.reset_index(drop=True, inplace=True)
# update diff, shouldnow be 1 | NaN
data['diff'] = data.groupby(['track_id'])['frame_id'].diff()
# data = df
return df
def smooth(data: pd.DataFrame):
df=data.copy()
if 'x_raw' not in df:
df['x_raw'] = df['x']
if 'y_raw' not in df:
df['y_raw'] = df['y']
print("Running smoother")
# print(df)
# from tsmoothie.smoother import KalmanSmoother, ConvolutionSmoother
smoother = Smoother(convolution=False)
def smoothing(data):
# smoother = ConvolutionSmoother(window_len=SMOOTHING_WINDOW, window_type='ones', copy=None)
return smoother.smooth(data).tolist()
# df=df.assign(smooth_data=smoother.smooth_data[0])
# return smoother.smooth_data[0].tolist()
# operate smoothing per axis
print("smooth x")
df['x'] = df.groupby('track_id')['x_raw'].transform(smoothing)
print("smooth y")
df['y'] = df.groupby('track_id')['y_raw'].transform(smoothing)
return df
def load_tracks_from_csv(file: Path, fps: float, grid_size: Optional[int] = None, sample: Optional[int] = None):
cache_file = Path('/tmp/load_tracks-smooth-' + file.name)
if cache_file.exists():
data = pd.read_pickle(cache_file)
else:
# grid_size is in points per meter
# sample: sample to every n-th point. Thus sample=5 converts 12fps to 2.4fps, and 4 to 3fps
data = pd.read_csv(file, delimiter="\t", index_col=False, header=None)
# l,t,w,h: image space (pixels)
# x,y: world space (meters or cm depending on homography)
data.columns = ['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state']
data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer')
data['frame_id'] = data['frame_id'] // 10 # compatibility with Trajectron++
data.sort_values(by=['track_id', 'frame_id'],inplace=True)
data.set_index(['track_id', 'frame_id'])
# cm to meter
data['x'] = data['x']/100
data['y'] = data['y']/100
if grid_size is not None:
data['x'] = (data['x']*grid_size).round() / grid_size
data['y'] = (data['y']*grid_size).round() / grid_size
data['diff'] = data.groupby(['track_id'])['frame_id'].diff() #.fillna(0)
data['diff'] = pd.to_numeric(data['diff'], downcast='integer')
data = interpolate_missing_frames(data)
data = smooth(data)
data.to_pickle(cache_file)
if sample is not None:
print(f"Samping 1/{sample}, of {data.shape[0]} items")
data["idx_in_track"] = data.groupby(['track_id']).cumcount() # create index in group
groups = data.groupby(['track_id'])
# print(groups, data)
# selection = groups['idx_in_track'].apply(lambda x: x % sample == 0)
# print(selection)
selection = data["idx_in_track"].apply(lambda x: x % sample == 0)
# data = data[selection]
data = data.loc[selection].copy() # avoid errors
# # convert from e.g. 12Hz, to 2.4Hz (1/5)
# sampled_groups = []
# for name, group in data.groupby('track_id'):
# sampled_groups.append(group.iloc[::sample])
# print(f"Sampled {len(sampled_groups)} groups")
# data = pd.concat(sampled_groups, axis=1).T
print(f"Done sampling kept {data.shape[0]} items")
# String ot int
data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer')
# redo diff after possible sampling:
data['diff'] = data.groupby(['track_id'])['frame_id'].diff()
# timestep to seconds
data['dt'] = data['diff'] * (1/fps)
# "Deriving displacement, velocity and accelation from x and y")
data['dx'] = data.groupby(['track_id'])['x'].diff()
data['dy'] = data.groupby(['track_id'])['y'].diff()
data['vx'] = data['dx'].div(data['dt'], axis=0)
data['vy'] = data['dy'].div(data['dt'], axis=0)
data['ax'] = data.groupby(['track_id'])['vx'].diff().div(data['dt'], axis=0)
data['ay'] = data.groupby(['track_id'])['vy'].diff().div(data['dt'], axis=0)
# then we need the velocity itself
data['v'] = np.sqrt(data['vx'].pow(2) + data['vy'].pow(2))
# and derive acceleration
data['a'] = data.groupby(['track_id'])['v'].diff().div(data['dt'], axis=0)
# we can calculate heading based on the velocity components
data['heading'] = (np.arctan2(data['vy'], data['vx']) * 180 / np.pi) % 360
# and derive it to get the rate of change of the heading
data['d_heading'] = data.groupby(['track_id'])['heading'].diff().div(data['dt'], axis=0)
# we can backfill the derived parameters (v and a), assuming they were constant when entering the frame
# so that our model can make estimations, based on these assumed values
group = data.groupby(['track_id'])
for field in ['dx', 'dy', 'vx', 'vy', 'ax', 'ay', 'v', 'a', 'heading', 'd_heading']:
data[field] = group[field].bfill()
data.set_index(['track_id', 'frame_id'], inplace=True) # use for quick access
return data
def filter_short_tracks(data: pd.DataFrame, n):
return data.groupby(['track_id']).filter(lambda group: len(group) >= n) # a lenght of 3 is neccessary to have all relevant derivatives of position
# print(filtered_data.shape[0], "items in filtered set, out of", data.shape[0], "in total set")
def normalise_position(data: pd.DataFrame):
mu = data[['x','y']].mean(axis=0)
std = data[['x','y']].std(axis=0)
data[['x_norm','y_norm']] = (data[['x','y']] - mu) / std
return data, mu, std

View file

@ -11,10 +11,11 @@ import time
from typing import Optional, List
import numpy as np
import torch
import torchvision
import zmq
import cv2
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights, keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights, maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights, keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights, maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights, FasterRCNN_ResNet50_FPN_V2_Weights, fasterrcnn_resnet50_fpn_v2
from deep_sort_realtime.deepsort_tracker import DeepSort
from torchvision.models import ResNet50_Weights
from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack
@ -23,7 +24,7 @@ from ultralytics import YOLO
from ultralytics.engine.results import Results as YOLOResult
from trap.frame_emitter import DetectionState, Frame, Detection, Track
from bytetracker import BYTETracker
from tsmoothie.smoother import KalmanSmoother, ConvolutionSmoother
import tsmoothie.smoother
@ -45,17 +46,28 @@ DETECTOR_MASKRCNN = 'maskrcnn'
DETECTOR_FASTERRCNN = 'fasterrcnn'
DETECTOR_YOLOv8 = 'ultralytics'
DETECTORS = [DETECTOR_RETINANET, DETECTOR_MASKRCNN, DETECTOR_FASTERRCNN, DETECTOR_YOLOv8]
TRACKER_DEEPSORT = 'deepsort'
TRACKER_BYTETRACK = 'bytetrack'
DETECTORS = [DETECTOR_RETINANET, DETECTOR_MASKRCNN, DETECTOR_FASTERRCNN, DETECTOR_YOLOv8]
TRACKERS =[TRACKER_DEEPSORT, TRACKER_BYTETRACK]
TRACKER_CONFIDENCE_MINIMUM = .2
TRACKER_BYTETRACK_MINIMUM = .1 # bytetrack can track items iwth lower thershold
NON_MAXIMUM_SUPRESSION = 1
RCNN_SCALE = .4 # seems to have no impact on detections in the corners
def _yolov8_track(frame: Frame, model: YOLO, **kwargs) -> List[Detection]:
results: List[YOLOResult] = list(model.track(frame.img, persist=True, tracker="bytetrack.yaml", verbose=False, **kwargs))
results: List[YOLOResult] = list(model.track(frame.img, persist=True, tracker="custom_bytetrack.yaml", verbose=False, **kwargs))
if results[0].boxes is None or results[0].boxes.id is None:
# work around https://github.com/ultralytics/ultralytics/issues/5968
return []
return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index) for bbox, track_id in zip(results[0].boxes.xywh.cpu(), results[0].boxes.id.int().cpu().tolist())]
boxes = results[0].boxes.xywh.cpu()
track_ids = results[0].boxes.id.int().cpu().tolist()
classes = results[0].boxes.cls.int().cpu().tolist()
return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index, class_id) for bbox, track_id, class_id in zip(boxes, track_ids, classes)]
class Multifile():
def __init__(self, srcs: List[Path]):
@ -79,7 +91,7 @@ class Multifile():
class TrainingDataWriter:
def __init__(self, training_path = Optional[Path]):
def __init__(self, training_path: Optional[Path]):
if training_path is None:
self.path = None
return
@ -151,6 +163,9 @@ class TrainingDataWriter:
logger.info(f"Splitting gathered data from {sources.name}")
# for source_file in source_files:
tracks_file = self.path / 'tracks.json'
tracks = defaultdict(lambda: [])
for name, line_nrs in lines.items():
dir_path = self.path / name
@ -178,25 +193,59 @@ class TrainingDataWriter:
parts[1] = str(track_id)
target_fp.write("\t".join(parts))
tracks[track_id].append(parts)
with tracks_file.open('w') as fp:
json.dump(tracks, fp)
class TrackerWrapper():
def __init__(self, tracker):
self.tracker = tracker
def track_detections():
raise RuntimeError("Not implemented")
@classmethod
def init_type(cls, tracker_type: str):
if tracker_type == TRACKER_BYTETRACK:
return ByteTrackWrapper(BYTETracker(track_thresh=TRACKER_BYTETRACK_MINIMUM, match_thresh=TRACKER_CONFIDENCE_MINIMUM, frame_rate=12)) # TODO)) Framerate from emitter
else:
return DeepSortWrapper(DeepSort(n_init=5, max_age=30, nms_max_overlap=NON_MAXIMUM_SUPRESSION,
embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
))
class DeepSortWrapper(TrackerWrapper):
def track_detections(self, detections, img: cv2.Mat, frame_idx: int):
detections = Tracker.detect_persons_deepsort_wrapper(detections)
tracks: List[DeepsortTrack] = self.tracker.update_tracks(detections, frame=img)
active_tracks = [t for t in tracks if t.is_confirmed()]
return [Detection.from_deepsort(t, frame_idx) for t in active_tracks]
# raw_detections, embeds=None, frame=None, today=None, others=None, instance_masks=Non
class ByteTrackWrapper(TrackerWrapper):
def __init__(self, tracker: BYTETracker):
self.tracker = tracker
def track_detections(self, detections: tuple[list[float,float,float,float], float, float], img: cv2.Mat, frame_idx: int):
# detections
if detections.shape[0] == 0:
detections = np.ndarray((0,0)) # needs to be 2-D
_ = self.tracker.update(detections)
active_tracks = [track for track in self.tracker.tracked_stracks if track.is_activated]
active_tracks = [track for track in active_tracks if track.start_frame < (self.tracker.frame_id - 5)]
return [Detection.from_bytetrack(track, frame_idx) for track in active_tracks]
class Tracker:
def __init__(self, config: Namespace, is_running: Event):
def __init__(self, config: Namespace):
self.config = config
self.is_running = is_running
context = zmq.Context()
self.frame_sock = context.socket(zmq.SUB)
self.frame_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
self.frame_sock.setsockopt(zmq.SUBSCRIBE, b'')
self.frame_sock.connect(config.zmq_frame_addr)
self.trajectory_socket = context.socket(zmq.PUB)
self.trajectory_socket.setsockopt(zmq.CONFLATE, 1) # only keep latest frame
self.trajectory_socket.bind(config.zmq_trajectory_addr)
# # TODO: config device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@ -206,30 +255,39 @@ class Tracker:
logger.debug(f"Load tracker: {self.config.detector}")
conf = TRACKER_BYTETRACK_MINIMUM if self.config.tracker == TRACKER_BYTETRACK else TRACKER_CONFIDENCE_MINIMUM
if self.config.detector == DETECTOR_RETINANET:
# weights = RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT
# self.model = retinanet_resnet50_fpn_v2(weights=weights, score_thresh=0.2)
weights = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT
self.model = keypointrcnn_resnet50_fpn(weights=weights, box_score_thresh=0.35)
weights = KeypointRCNN_ResNet50_FPN_Weights.COCO_V1
self.model = keypointrcnn_resnet50_fpn(weights=weights, box_score_thresh=conf)
self.model.to(self.device)
# Put the model in inference mode
self.model.eval()
# Get the transforms for the model's weights
self.preprocess = weights.transforms().to(self.device)
self.mot_tracker = DeepSort(max_iou_distance=1, max_cosine_distance=0.5, max_age=15, nms_max_overlap=0.9,
# embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
)
self.mot_tracker = TrackerWrapper.init_type(self.config.tracker)
elif self.config.detector == DETECTOR_FASTERRCNN:
# weights = RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT
# self.model = retinanet_resnet50_fpn_v2(weights=weights, score_thresh=0.2)
weights = FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1
self.model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=conf)
self.model.to(self.device)
# Put the model in inference mode
self.model.eval()
# Get the transforms for the model's weights
self.preprocess = weights.transforms().to(self.device)
self.mot_tracker = TrackerWrapper.init_type(self.config.tracker)
elif self.config.detector == DETECTOR_MASKRCNN:
weights = MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1
self.model = maskrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.7)
self.model = maskrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=conf) # if we use ByteTrack we can work with low probablity!
self.model.to(self.device)
# Put the model in inference mode
self.model.eval()
# Get the transforms for the model's weights
self.preprocess = weights.transforms().to(self.device)
self.mot_tracker = DeepSort(n_init=5, max_iou_distance=1, max_cosine_distance=0.5, max_age=15, nms_max_overlap=0.9,
# embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
)
# self.mot_tracker = DeepSort(n_init=5, max_iou_distance=1, max_cosine_distance=0.2, max_age=15, nms_max_overlap=0.9,
self.mot_tracker = TrackerWrapper.init_type(self.config.tracker)
elif self.config.detector == DETECTOR_YOLOv8:
self.model = YOLO('EXPERIMENTS/yolov8x.pt')
else:
@ -249,9 +307,40 @@ class Tracker:
logger.debug("Set up tracker")
def track_frame(self, frame: Frame):
if self.config.detector == DETECTOR_YOLOv8:
detections: List[Detection] = _yolov8_track(frame, self.model, classes=[0], imgsz=[1152, 640])
else :
detections: List[Detection] = self._resnet_track(frame, scale = RCNN_SCALE)
for detection in detections:
track = self.tracks[detection.track_id]
track.track_id = detection.track_id # for new tracks
track.history.append(detection) # add to history
return detections
def track(self):
def track(self, is_running: Event):
"""
Live tracking of frames coming in over zmq
"""
self.is_running = is_running
context = zmq.Context()
self.frame_sock = context.socket(zmq.SUB)
self.frame_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
self.frame_sock.setsockopt(zmq.SUBSCRIBE, b'')
self.frame_sock.connect(self.config.zmq_frame_addr)
self.trajectory_socket = context.socket(zmq.PUB)
self.trajectory_socket.setsockopt(zmq.CONFLATE, 1) # only keep latest frame
self.trajectory_socket.bind(self.config.zmq_trajectory_addr)
prev_run_time = 0
# training_fp = None
@ -312,19 +401,16 @@ class Tracker:
# logger.info(f"Frame delivery delay = {time.time()-frame.time}s")
if self.config.detector == DETECTOR_YOLOv8:
detections: [Detection] = _yolov8_track(frame, self.model, classes=[0], imgsz=[1152, 640])
else :
detections: [Detection] = self._resnet_track(frame.img, scale = 1)
detections: List[Detection] = self.track_frame(frame)
# Store detections into tracklets
projected_coordinates = []
for detection in detections:
track = self.tracks[detection.track_id]
track.track_id = detection.track_id # for new tracks
# now in track_frame()
# for detection in detections:
# track = self.tracks[detection.track_id]
# track.track_id = detection.track_id # for new tracks
track.history.append(detection) # add to history
# track.history.append(detection) # add to history
# projected_coordinates.append(track.get_projected_history(self.H)) # then get full history
# TODO: hadle occlusions, and dissappearance
@ -383,15 +469,17 @@ class Tracker:
logger.info('Stopping')
def _resnet_track(self, img, scale: float = 1) -> [Detection]:
def _resnet_track(self, frame: Frame, scale: float = 1) -> List[Detection]:
img = frame.img
if scale != 1:
dsize = (int(img.shape[1] * scale), int(img.shape[0] * scale))
img = cv2.resize(img, dsize)
detections = self._resnet_detect_persons(img)
tracks: [DeepsortTrack] = self.mot_tracker.update_tracks(detections, frame=img)
return [Detection.from_deepsort(t).get_scaled(1/scale) for t in tracks]
tracks: List[Detection] = self.mot_tracker.track_detections(detections, img, frame.index)
# active_tracks = [t for t in tracks if t.is_confirmed()]
return [d.get_scaled(1/scale) for d in tracks]
def _resnet_detect_persons(self, frame) -> [Detection]:
def _resnet_detect_persons(self, frame) -> List[Detection]:
t = torch.from_numpy(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# change axes of image loaded image to be compatilbe with torch.io.read_image (which has C,W,H format instead of W,H,C)
t = t.permute(2, 0, 1)
@ -406,8 +494,19 @@ class Tracker:
mask = prediction['labels'] == 1 # if we want more than one label: np.isin(prediction['labels'], [1,86])
scores = prediction['scores'][mask]
# print(scores, prediction['labels'])
labels = prediction['labels'][mask]
boxes = prediction['boxes'][mask]
# print(prediction['scores'])
if NON_MAXIMUM_SUPRESSION < 1:
nms_mask = torch.zeros(scores.shape[0]).bool()
nms_keep_ids = torchvision.ops.nms(boxes, scores, NON_MAXIMUM_SUPRESSION)
nms_mask[nms_keep_ids] = True
print(scores.shape[0], nms_keep_ids, nms_mask)
scores = scores[nms_mask]
labels = labels[nms_mask]
boxes = boxes[nms_mask]
# TODO: introduce confidence and NMS supression: https://github.com/cfotache/pytorch_objectdetecttrack/blob/master/PyTorch_Object_Tracking.ipynb
# (which I _think_ we better do after filtering)
@ -415,7 +514,7 @@ class Tracker:
# dets - a numpy array of detections in the format [[x1,y1,x2,y2,score, label],[x1,y1,x2,y2,score, label],...]
detections = np.array([np.append(bbox, [score, label]) for bbox, score, label in zip(boxes.cpu(), scores.cpu(), labels.cpu())])
detections = self.detect_persons_deepsort_wrapper(detections)
return detections
@ -429,8 +528,8 @@ class Tracker:
def run_tracker(config: Namespace, is_running: Event):
router = Tracker(config, is_running)
router.track()
router = Tracker(config)
router.track(is_running)
@ -467,7 +566,7 @@ class Smoother:
ws = self.smoother.smooth_data[0]
self.smoother.smooth(hs)
hs = self.smoother.smooth_data[0]
new_history = [Detection(d.track_id, l, t, w, h, d.conf, d.state, d.frame_nr) for l, t, w, h, d in zip(ls,ts,ws,hs, track.history)]
new_history = [Detection(d.track_id, l, t, w, h, d.conf, d.state, d.frame_nr, d.det_class) for l, t, w, h, d in zip(ls,ts,ws,hs, track.history)]
new_track = Track(track.track_id, new_history, track.predictor_history, track.predictions)
new_tracks.append(new_track)
frame.tracks = {t.track_id: t for t in new_tracks}