diff --git a/custom_bytetrack.yaml b/custom_bytetrack.yaml new file mode 100644 index 0000000..4c6fd11 --- /dev/null +++ b/custom_bytetrack.yaml @@ -0,0 +1,11 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack + +tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack'] +track_high_thresh: 0.05 # threshold for the first association +track_low_thresh: 0.01 # threshold for the second association +new_track_thresh: 0.1 # threshold for init new track if the detection does not match any tracks +track_buffer: 35 # buffer to calculate the time when to remove tracks +match_thresh: 0.9 # threshold for matching tracks +fuse_score: True # Whether to fuse confidence scores with the iou distances before matching +# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now) diff --git a/poetry.lock b/poetry.lock index 60cfe18..d99f761 100644 --- a/poetry.lock +++ b/poetry.lock @@ -219,6 +219,25 @@ webencodings = "*" [package.extras] css = ["tinycss2 (>=1.1.0,<1.3)"] +[[package]] +name = "bytetracker" +version = "0.3.2" +description = "Packaged version of the ByteTrack repository" +optional = false +python-versions = ">=3.5" +files = [] +develop = false + +[package.dependencies] +lapx = ">=0.5.8" +scipy = ">=1.9.3" + +[package.source] +type = "git" +url = "https://github.com/rubenvandeven/bytetrack-pip" +reference = "HEAD" +resolved_reference = "7053b946af8641581999b70230ac6260d37365ae" + [[package]] name = "cachetools" version = "5.3.2" @@ -1366,6 +1385,72 @@ files = [ {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, ] +[[package]] +name = "lapx" +version = "0.5.11" +description = "Linear Assignment Problem solver (LAPJV/LAPMOD)." +optional = false +python-versions = ">=3.7" +files = [ + {file = "lapx-0.5.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ad2e100a81387e958cbb2b25b09f5f4b4c7af1ba39313b4bbda9965ee85b43a2"}, + {file = "lapx-0.5.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:523baec5c6a554946c843877802fbefe2230179854134e6b0f281e0f47f5b342"}, + {file = "lapx-0.5.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eba714681ea77c834a740eaa43640a3072341a7418e95fb6d5aa4380b6b2069"}, + {file = "lapx-0.5.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:797cda3b06e835fa6ee3e245289c03ec29d110e6dd8e333db71483f5bbd32129"}, + {file = "lapx-0.5.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:29b1447d32e00a89afa90627c7e806bd7eb8e21e4d559749f4a0fc4e47989f64"}, + {file = "lapx-0.5.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:71caffa000782ab265f76e47aa018368b3738a5be5d62581a870dd6e68417703"}, + {file = "lapx-0.5.11-cp310-cp310-win_amd64.whl", hash = "sha256:a3c1c20c7d80fa7b6eca0ea9e10966c93ccdaf4d5286e677b199bf021a889b18"}, + {file = "lapx-0.5.11-cp310-cp310-win_arm64.whl", hash = "sha256:995aea6268f0a519e536f009f44144c4f4066e0724d19239fbcc1c1ab082d7c0"}, + {file = "lapx-0.5.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a665dbc34f04fe21cdb798be1c003fa0d07b0e27e9020487e7626714bad4b8a"}, + {file = "lapx-0.5.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5262a868f8802e368ecb470444c830e95b960a2a3763764dd3370680a466684e"}, + {file = "lapx-0.5.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824a8c50e191343096d48164f2a0e9a5d466e8d20dd8e3eff1a3c1082b4d2b2"}, + {file = "lapx-0.5.11-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d648b706b9a22255a028c72f4849c97ba0d754e03d74009ff447b26dbbd9bb59"}, + {file = "lapx-0.5.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f6a0b789023d80b0f5ba3f20d83c9601d02e03abe8ae209ada3a77f304d91fff"}, + {file = "lapx-0.5.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:44b3c36d52db1eea6eb0c46795440adddbdfe39c22ddb0f2a86db40ab963a798"}, + {file = "lapx-0.5.11-cp311-cp311-win_amd64.whl", hash = "sha256:2e21c0162b426034ff545cedb86713b642f1e7335fda43605b330ca28a107d13"}, + {file = "lapx-0.5.11-cp311-cp311-win_arm64.whl", hash = "sha256:2ea0c5dbf62de0612337c4c0a3f1b5ac8cc4fabfb9f68fd1c76612e2d873a28c"}, + {file = "lapx-0.5.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:18c0c2e7f7ca76527468d98b99c54cf339ea040512392de6d20d8582235b43bc"}, + {file = "lapx-0.5.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:20ab21b4a45bf975b890ba0364bc354652e3ebb548fb69f23cca4c337ce0e72b"}, + {file = "lapx-0.5.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afc0233eaef80c04f88449f1bfbe059bfb5556458bc46de54d080e3236db6588"}, + {file = "lapx-0.5.11-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d48adb34670c1c548cc39c40831e44dd57d7fe640320d86d61e3d2bf179f1f79"}, + {file = "lapx-0.5.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:28fb1d9f076aff6b98abbc1287aa453d6fd3be0b0a5039adb2a822e2246a2bdd"}, + {file = "lapx-0.5.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:70abdb835fcfad72856f3fa27d8233b9b267a9efe534a7fa8ede456b876a696d"}, + {file = "lapx-0.5.11-cp312-cp312-win_amd64.whl", hash = "sha256:905fb018952b7b6ea9ef5ac8f5600e525c2545a679d11951bfc0d7e861efbe31"}, + {file = "lapx-0.5.11-cp312-cp312-win_arm64.whl", hash = "sha256:02343669611038ec2826c4110d953235397d25e5eff01a5d2cbd9986c3492691"}, + {file = "lapx-0.5.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d517ce6b0d17af31c71d9230b03f2c09cb7a701d20b3ffbe02c4366ed91c3b85"}, + {file = "lapx-0.5.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6078aa84f768585c6121fc1d8767d6136d9af34ccd48525174ee488c86a59964"}, + {file = "lapx-0.5.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:206382e2e942e1d968cb194149d0693a293052c16d0016505788b795818bab21"}, + {file = "lapx-0.5.11-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:986f6eaa5a21d5b90869d54a0b7e11e9e532cd8938b3980cbd43d43154d96ac1"}, + {file = "lapx-0.5.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:394555e2245cd6aa2ad79cea58c8a5cc73f6c6f79b85f497020e6a978c346329"}, + {file = "lapx-0.5.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dcd1a608c19e14d6d7fd47c885c5f3c68ce4e08f3e8ec2eecbe32fc026b0d1de"}, + {file = "lapx-0.5.11-cp313-cp313-win_amd64.whl", hash = "sha256:37b6e5f4f04c477a49f7d0780fbe76513c2d5e183bcf5005396c96bfd3de15d6"}, + {file = "lapx-0.5.11-cp313-cp313-win_arm64.whl", hash = "sha256:c6c84a46f94829c6d992cce7fe747bacb971bca8cb9e77ea6ff80dfbc4fea6e2"}, + {file = "lapx-0.5.11-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cb7175a5527a46cd6b9212da766539f256790e93747ba5503bd0507e3fd19e3c"}, + {file = "lapx-0.5.11-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33769788e57b5a7fa0951a884c7e5f8a381792427357976041c1d4c3ae75ddcd"}, + {file = "lapx-0.5.11-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc015da5a657dbb8822fb47f4702f5bf0b67bafff6f5aed98dad3a6204572e40"}, + {file = "lapx-0.5.11-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:9b322e6e0685340f5a10ada401065165fa73e84c2db93ba17945e8e119bd17f5"}, + {file = "lapx-0.5.11-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:a64bc3da09c5925efaff59d20dcfbc3febac64fd1fcc263604f7d4ccdd0e2a75"}, + {file = "lapx-0.5.11-cp37-cp37m-win_amd64.whl", hash = "sha256:e6d98d31bdf7131a0ec9967068885c86357cc77cf7883d1a1335a48b24e537bb"}, + {file = "lapx-0.5.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c381563bc713a6fc8a281698893a8115abb34363929105142d66e592252af196"}, + {file = "lapx-0.5.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c4e10ec35b539984684ef9bfdbd0f763a441225e8a9cff5d7081b24795dd419"}, + {file = "lapx-0.5.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6583c3a5a47dbfb360d312e4bb3bde509d519886da74161a46ad77653fd18dcb"}, + {file = "lapx-0.5.11-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3963a87d44bc174eee423239dff952a922b3a8e30fbc514c00fab361f464c74"}, + {file = "lapx-0.5.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5e8bf4b68f45e378ce7fc68dd407ea210926881e8f25a08dc18beb4a4a7cced0"}, + {file = "lapx-0.5.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a8ee4578a898148110dd8ee877370ee99b6309d7b600bde60e204efd93858c0d"}, + {file = "lapx-0.5.11-cp38-cp38-win_amd64.whl", hash = "sha256:46b8373f25c1ea85b236fc20183b077efd33112de57f57feccc61f3541f0d8f0"}, + {file = "lapx-0.5.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f3c88ff301e7cf9a22a7e276e13a9154f0813a2c3f9c3619f3785def851b5f4c"}, + {file = "lapx-0.5.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d07df31b37c92643b9fb6aceeafb53a13375d95dd9cbd2454d83f03941d7e137"}, + {file = "lapx-0.5.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cbf42869528ac80d87e1d9019d594a619fd77c26fba0eb8e5242f6657197a57"}, + {file = "lapx-0.5.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5f38aa911b4773b44d2da0046aad6ded8f08853cbc1ec60a2363b46371e16f8"}, + {file = "lapx-0.5.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d04e5bd530fb04a22004c3ba1a31033c4e3deffe18fb43778fc234a627a55397"}, + {file = "lapx-0.5.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:845ca48d0c313113f7ae53420d1e5af87c01355fe14e5624c2a934e1004ef43b"}, + {file = "lapx-0.5.11-cp39-cp39-win_amd64.whl", hash = "sha256:0eb81393d0edd089b61de1c3c25895b8ed39bc8d91c766a6e06b761194e79894"}, + {file = "lapx-0.5.11-cp39-cp39-win_arm64.whl", hash = "sha256:e12b3e0d9943e92a0cd3af7a6fa5fb8fab3aa018897beda6d02647d9ce708d5a"}, + {file = "lapx-0.5.11.tar.gz", hash = "sha256:d925d4a11f436ef0f9e9684378a44e4375aa9c868b22e2f51e6ff15a3362685f"}, +] + +[package.dependencies] +numpy = ">=1.21.6" + [[package]] name = "markdown" version = "3.5.1" @@ -3640,4 +3725,4 @@ watchdog = ["watchdog (>=2.3)"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.12," -content-hash = "65e5518fd7bc216a72206768f621251e8f2ade9ec07ef0af1d3926492fc6fd70" +content-hash = "bf4feafd4afa6ceb39a1c599e3e7cdc84afbe11ab1672b49e5de99ad44568b08" diff --git a/pyproject.toml b/pyproject.toml index 5adac19..07c1e57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ readme = "README.md" [tool.poetry.scripts] trapserv = "trap.plumber:start" tracker = "trap.tools:tracker_preprocess" +compare = "trap.tools:tracker_compare" process_data = "trap.process_data:main" @@ -37,6 +38,7 @@ pyglet = "^2.0.15" pyglet-cornerpin = "^0.3.0" opencv-python = {file="./opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"} setproctitle = "^1.3.3" +bytetracker = { git = "https://github.com/rubenvandeven/bytetrack-pip" } [build-system] requires = ["poetry-core"] diff --git a/trap/animation_renderer.py b/trap/animation_renderer.py index 34dc744..75da0e7 100644 --- a/trap/animation_renderer.py +++ b/trap/animation_renderer.py @@ -30,6 +30,8 @@ from trap.preview_renderer import DrawnTrack, PROJECTION_IMG, PROJECTION_MAP logger = logging.getLogger("trap.renderer") +COLOR_PRIMARY = (0,0,0,255) + class AnimationRenderer: def __init__(self, config: Namespace, is_running: BaseEvent): self.config = config @@ -96,7 +98,7 @@ class AnimationRenderer: self.window.push_handlers(self.pins) pyglet.gl.glClearColor(255,255,255,255) - self.fps_display = pyglet.window.FPSDisplay(window=self.window, color=(255,255,255,255)) + self.fps_display = pyglet.window.FPSDisplay(window=self.window, color=COLOR_PRIMARY) self.fps_display.label.x = self.window.width - 50 self.fps_display.label.y = self.window.height - 17 self.fps_display.label.bold = False @@ -117,11 +119,11 @@ class AnimationRenderer: if self.config.render_debug_shapes: self.debug_lines = [ - pyglet.shapes.Line(1370, self.config.camera.h-360, 1380, 670-360, 2, (255,255,255,255), batch=self.batch_overlay),#v - pyglet.shapes.Line(0, 660-360, 1380, 670-360, 2, (255,255,255,255), batch=self.batch_overlay), #h - pyglet.shapes.Line(1140, 760-360, 1140, 675-360, 2, (255,255,255,255), batch=self.batch_overlay), #h - pyglet.shapes.Line(540, 760-360,540, 675-360, 2, (255,255,255,255), batch=self.batch_overlay), #v - pyglet.shapes.Line(0, 770-360, 1380, 770-360, 2, (255,255,255,255), batch=self.batch_overlay), #h + pyglet.shapes.Line(1370, self.config.camera.h-360, 1380, 670-360, 2, COLOR_PRIMARY, batch=self.batch_overlay),#v + pyglet.shapes.Line(0, 660-360, 1380, 670-360, 2, COLOR_PRIMARY, batch=self.batch_overlay), #h + pyglet.shapes.Line(1140, 760-360, 1140, 675-360, 2, COLOR_PRIMARY, batch=self.batch_overlay), #h + pyglet.shapes.Line(540, 760-360,540, 675-360, 2, COLOR_PRIMARY, batch=self.batch_overlay), #v + pyglet.shapes.Line(0, 770-360, 1380, 770-360, 2, COLOR_PRIMARY, batch=self.batch_overlay), #h ] @@ -197,7 +199,7 @@ class AnimationRenderer: self.gradientLine = GradientLine def init_labels(self): - base_color = (255,)*4 + base_color = COLOR_PRIMARY color_predictor = (255,255,0, 255) color_info = (255,0, 255, 255) color_tracker = (0,255, 255, 255) @@ -278,7 +280,7 @@ class AnimationRenderer: self.video_sprite = pyglet.sprite.Sprite(img=img, batch=self.batch_bg) # transform to flipped coordinate system for pyglet self.video_sprite.y = self.window.height - self.video_sprite.height - self.video_sprite.opacity = 10 + self.video_sprite.opacity = 70 except zmq.ZMQError as e: # idx = frame.index if frame else "NONE" # logger.debug(f"reuse video frame {idx}") diff --git a/trap/config.py b/trap/config.py index a7ffd21..421b635 100644 --- a/trap/config.py +++ b/trap/config.py @@ -4,7 +4,7 @@ import types import numpy as np import json -from trap.tracker import DETECTORS +from trap.tracker import DETECTORS, TRACKER_BYTETRACK, TRACKERS from trap.frame_emitter import Camera from pyparsing import Optional @@ -294,6 +294,11 @@ tracker_parser.add_argument("--detector", help="Specify the detector to use", type=str, choices=DETECTORS) +tracker_parser.add_argument("--tracker", + help="Specify the detector to use", + type=str, + default=TRACKER_BYTETRACK, + choices=TRACKERS) tracker_parser.add_argument("--smooth-tracks", help="Smooth the tracker tracks before sending them to the predictor", action='store_true') diff --git a/trap/frame_emitter.py b/trap/frame_emitter.py index c777239..0a0fd3b 100644 --- a/trap/frame_emitter.py +++ b/trap/frame_emitter.py @@ -8,13 +8,15 @@ from pathlib import Path import pickle import sys import time -from typing import Iterable, Optional +from typing import Iterable, List, Optional import numpy as np import cv2 import zmq import os from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack from deep_sort_realtime.deep_sort.track import TrackState as DeepsortTrackState +from bytetracker.byte_tracker import STrack as ByteTrackTrack +from bytetracker.basetrack import TrackState as ByteTrackTrackState from urllib.parse import urlparse @@ -51,6 +53,17 @@ class DetectionState(IntFlag): return cls.Confirmed raise RuntimeError("Should not run into Deleted entries here") + @classmethod + def from_bytetrack_track(cls, track: ByteTrackTrack): + if track.state == ByteTrackTrackState.New: + return cls.Tentative + if track.state == ByteTrackTrackState.Lost: + return cls.Lost + # if track.time_since_update > 0: + if track.state == ByteTrackTrackState.Tracked: + return cls.Confirmed + raise RuntimeError("Should not run into Deleted entries here") + class Camera: def __init__(self, mtx, dist, w, h, H): self.mtx = mtx @@ -71,13 +84,19 @@ class Detection: conf: float # object detector probablity state: DetectionState frame_nr: int + det_class: str def get_foot_coords(self) -> list[tuple[float, float]]: return [self.l + 0.5 * self.w, self.t+self.h] @classmethod - def from_deepsort(cls, dstrack: DeepsortTrack): - return cls(dstrack.track_id, *dstrack.to_ltwh(), dstrack.det_conf, DetectionState.from_deepsort_track(dstrack)) + def from_deepsort(cls, dstrack: DeepsortTrack, frame_nr: int): + return cls(dstrack.track_id, *dstrack.to_ltwh(), dstrack.det_conf, DetectionState.from_deepsort_track(dstrack), frame_nr, dstrack.det_class) + + + @classmethod + def from_bytetrack(cls, bstrack: ByteTrackTrack, frame_nr: int): + return cls(bstrack.track_id, *bstrack.tlwh, bstrack.score, DetectionState.from_bytetrack_track(bstrack), frame_nr, bstrack.cls) def get_scaled(self, scale: float = 1): if scale == 1: @@ -90,7 +109,9 @@ class Detection: self.w*scale, self.h*scale, self.conf, - self.state) + self.state, + self.frame_nr, + self.det_class) def to_ltwh(self): return (int(self.l), int(self.t), int(self.w), int(self.h)) @@ -107,7 +128,7 @@ class Track: and acceleration. """ track_id: str = None - history: [Detection] = field(default_factory=lambda: []) + history: List[Detection] = field(default_factory=lambda: []) predictor_history: Optional[list] = None # in image space predictions: Optional[list] = None diff --git a/trap/preview_renderer.py b/trap/preview_renderer.py index 22f4402..a5c8331 100644 --- a/trap/preview_renderer.py +++ b/trap/preview_renderer.py @@ -156,7 +156,7 @@ class DrawnTrack: if ci >= len(self.shapes): # TODO: add color2 line = self.renderer.gradientLine(x, y, x2, y2, 3, color, color, batch=self.renderer.batch_anim) - line = pyglet.shapes.Arc(x2, y2, 10, thickness=3, color=color, batch=self.renderer.batch_anim) + line = pyglet.shapes.Arc(x2, y2, 10, thickness=2, color=color, batch=self.renderer.batch_anim) line.opacity = 20 self.shapes.append(line) @@ -164,7 +164,7 @@ class DrawnTrack: line = self.shapes[ci-1] line.x, line.y = x, y line.x2, line.y2 = x2, y2 - line.radius = int(exponentialDecay(line.radius, 2, 3, dt)) + line.radius = int(exponentialDecay(line.radius, 1.5, 3, dt)) line.color = color line.opacity = int(exponentialDecay(line.opacity, 180, 8, dt)) diff --git a/trap/tools.py b/trap/tools.py index f935bf4..0aa5614 100644 --- a/trap/tools.py +++ b/trap/tools.py @@ -1,66 +1,95 @@ +from argparse import Namespace +from pathlib import Path +import pickle +from tempfile import mktemp + +import numpy as np +import pandas as pd +import trap.tracker from trap.config import parser -from trap.frame_emitter import video_src_from_config, Frame -from trap.tracker import DETECTOR_YOLOv8, _yolov8_track, Track, TrainingDataWriter +from trap.frame_emitter import Detection, DetectionState, video_src_from_config, Frame +from trap.tracker import DETECTOR_YOLOv8, Smoother, _yolov8_track, Track, TrainingDataWriter, Tracker from collections import defaultdict import logging import cv2 -from typing import List, Iterable +from typing import List, Iterable, Optional from ultralytics import YOLO from ultralytics.engine.results import Results as YOLOResult import tqdm -config = parser.parse_args() + logger = logging.getLogger('tools') -def tracker_preprocess(): - video_srcs = video_src_from_config(config) - if not hasattr(config, "H"): - print("Set homography file with --homography param") - return - - if config.detector != DETECTOR_YOLOv8: - print("Only YOLO for now...") - return - - model = YOLO('EXPERIMENTS/yolov8x.pt') - with TrainingDataWriter(config.save_for_training) as writer: - for video_nr, video_path in enumerate(video_srcs): + +class FrameGenerator(): + def __init__(self, config): + self.video_srcs = video_src_from_config(config) + self.config = config + if not hasattr(config, "H"): + raise RuntimeError("Set homography file with --homography param") + + # store current position + self.video_path = None + self.video_nr = None + self.frame_count = None + self.frame_idx = None + + def __iter__(self): + n = 0 + for video_nr, video_path in enumerate(self.video_srcs): + self.video_path = video_path + self.video_nr = video_path logger.info(f"Play from '{str(video_path)}'") video = cv2.VideoCapture(str(video_path)) fps = video.get(cv2.CAP_PROP_FPS) - frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT) - i = 0 - if config.video_offset: - logger.info(f"Start at frame {config.video_offset}") - video.set(cv2.CAP_PROP_POS_FRAMES, config.video_offset) - i = config.video_offset - - bar = tqdm.tqdm() - tracks = defaultdict(lambda: Track()) - - total = 0 + self.frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT) + self.frame_idx = 0 + if self.config.video_offset: + logger.info(f"Start at frame {self.config.video_offset}") + video.set(cv2.CAP_PROP_POS_FRAMES, self.config.video_offset) + self.frame_idx = self.config.video_offset while True: - bar.update() ret, img = video.read() - i+=1 + self.frame_idx+=1 + n+=1 # seek to 0 if video has finished. Infinite loop if not ret: # now loading multiple files break - frame = Frame(index=bar.n, img=img, H=config.H, camera=config.camera) + frame = Frame(index=n, img=img, H=self.config.H, camera=self.config.camera) + yield frame + + +def tracker_preprocess(): + + config = parser.parse_args() + + + tracker = Tracker(config) + # model = YOLO('EXPERIMENTS/yolov8x.pt') + + with TrainingDataWriter(config.save_for_training) as writer: + + bar = tqdm.tqdm() + tracks = defaultdict(lambda: Track()) + + total = 0 + frames = FrameGenerator(config) + for frame in frames: + bar.update() - detections = _yolov8_track(frame, model, classes=[0]) + detections = tracker.track_frame(frame) total += len(detections) # detections = _yolov8_track(frame, model, imgsz=1440, classes=[0]) - bar.set_description(f"[{video_nr}/{len(video_srcs)}] [{i}/{frame_count}] {str(video_path)} -- Detections {len(detections)}: {[d.track_id for d in detections]} (so far {total})") + bar.set_description(f"[{frames.video_nr}/{len(frames.video_srcs)}] [{frames.frame_idx}/{frames.frame_count}] {str(frames.video_path)} -- Detections {len(detections)}: {[d.track_id for d in detections]} (so far {total})") for detection in detections: track = tracks[detection.track_id] @@ -72,4 +101,232 @@ def tracker_preprocess(): writer.add(frame, active_tracks.values()) - logger.info("Done!") \ No newline at end of file + logger.info("Done!") + +bgr_colors = [ + (255, 0, 0), + (0, 255, 0), + (0, 0, 255), + (0, 255, 255), +] + +def detection_color(detection: Detection, i): + return bgr_colors[i % len(bgr_colors)] if detection.state != DetectionState.Lost else (100,100,100) + +def to_point(coord): + return (int(coord[0]), int(coord[1])) + +def tracker_compare(): + config = parser.parse_args() + + trackers: List[Tracker] = [] + # TODO, support all tracker.DETECTORS + for tracker_id in [ + trap.tracker.DETECTOR_YOLOv8, + # trap.tracker.DETECTOR_MASKRCNN, + # trap.tracker.DETECTOR_RETINANET, + trap.tracker.DETECTOR_FASTERRCNN, + ]: + tracker_config = Namespace(**vars(config)) + tracker_config.detector = tracker_id + trackers.append(Tracker(tracker_config)) + + + frames = FrameGenerator(config) + bar = tqdm.tqdm(frames) + cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN) + cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN) + + for frame in bar: + + # frame.img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx) # try to undistort for better detections, seems not to matter at all + trackers_detections = [(t, t.track_frame(frame)) for t in trackers] + + for i, tracker in enumerate(trackers): + cv2.putText(frame.img, tracker.config.detector, (10,30*(i+1)), cv2.FONT_HERSHEY_DUPLEX, 1, color=bgr_colors[i % len(bgr_colors)]) + + for i, (tracker, detections) in enumerate(trackers_detections): + + for track_id in tracker.tracks: + history = tracker.tracks[track_id].history + cv2.putText(frame.img, f"{track_id}", to_point(history[0].get_foot_coords()), cv2.FONT_HERSHEY_DUPLEX, 1, color=bgr_colors[i % len(bgr_colors)]) + for j in range(len(history)-1): + a = history[j] + b = history[j+1] + color = detection_color(b, i) + cv2.line(frame.img, to_point(a.get_foot_coords()), to_point(b.get_foot_coords()), color, 1) + for detection in detections: + color = color = detection_color(detection, i) + l, t, r, b = detection.to_ltrb() + cv2.rectangle(frame.img, (l, t), (r,b), color) + cv2.putText(frame.img, f"{detection.track_id}", (l, b+10), cv2.FONT_HERSHEY_DUPLEX, 1, color=color) + conf = f"{detection.conf:.3f}" if detection.conf is not None else "None" + cv2.putText(frame.img, f"{detection.det_class} - {conf}", (l, t), cv2.FONT_HERSHEY_DUPLEX, .7, color=color) + cv2.imshow('frame',cv2.resize(frame.img, (1920, 1080))) + cv2.waitKey(1) + + bar.set_description(f"[{frames.video_nr}/{len(frames.video_srcs)}] [{frames.frame_idx}/{frames.frame_count}] {str(frames.video_path)}") + + + +def interpolate_missing_frames(data: pd.DataFrame): + missing=0 + old_size=len(data) + # slow way to append missing steps to the dataset + for ind, row in tqdm.tqdm(data.iterrows()): + if row['diff'] > 1: + for s in range(1, int(row['diff'])): + # add as many entries as missing + missing += 1 + data.loc[len(data)] = [row['frame_id']-s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 1, 1] + # new_frame = [data.loc[ind-1]['frame_id']+s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan] + # data.loc[len(data)] = new_frame + + logger.info(f'was:{old_size} added:{missing}, new length: {len(data)}') + + # now sort, so that the added data is in the right place + data.sort_values(by=['track_id', 'frame_id'], inplace=True) + + df=data.copy() + df = df.groupby('track_id').apply(lambda group: group.interpolate(method='linear')) + df.reset_index(drop=True, inplace=True) + + # update diff, shouldnow be 1 | NaN + data['diff'] = data.groupby(['track_id'])['frame_id'].diff() + + # data = df + return df + +def smooth(data: pd.DataFrame): + + df=data.copy() + if 'x_raw' not in df: + df['x_raw'] = df['x'] + if 'y_raw' not in df: + df['y_raw'] = df['y'] + + print("Running smoother") + # print(df) + # from tsmoothie.smoother import KalmanSmoother, ConvolutionSmoother + smoother = Smoother(convolution=False) + def smoothing(data): + # smoother = ConvolutionSmoother(window_len=SMOOTHING_WINDOW, window_type='ones', copy=None) + return smoother.smooth(data).tolist() + # df=df.assign(smooth_data=smoother.smooth_data[0]) + # return smoother.smooth_data[0].tolist() + + # operate smoothing per axis + print("smooth x") + df['x'] = df.groupby('track_id')['x_raw'].transform(smoothing) + print("smooth y") + df['y'] = df.groupby('track_id')['y_raw'].transform(smoothing) + + return df + +def load_tracks_from_csv(file: Path, fps: float, grid_size: Optional[int] = None, sample: Optional[int] = None): + cache_file = Path('/tmp/load_tracks-smooth-' + file.name) + if cache_file.exists(): + data = pd.read_pickle(cache_file) + + else: + # grid_size is in points per meter + # sample: sample to every n-th point. Thus sample=5 converts 12fps to 2.4fps, and 4 to 3fps + data = pd.read_csv(file, delimiter="\t", index_col=False, header=None) + # l,t,w,h: image space (pixels) + # x,y: world space (meters or cm depending on homography) + data.columns = ['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'] + data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer') + data['frame_id'] = data['frame_id'] // 10 # compatibility with Trajectron++ + + data.sort_values(by=['track_id', 'frame_id'],inplace=True) + + data.set_index(['track_id', 'frame_id']) + + # cm to meter + data['x'] = data['x']/100 + data['y'] = data['y']/100 + + if grid_size is not None: + data['x'] = (data['x']*grid_size).round() / grid_size + data['y'] = (data['y']*grid_size).round() / grid_size + + data['diff'] = data.groupby(['track_id'])['frame_id'].diff() #.fillna(0) + data['diff'] = pd.to_numeric(data['diff'], downcast='integer') + + data = interpolate_missing_frames(data) + + + data = smooth(data) + data.to_pickle(cache_file) + + + if sample is not None: + print(f"Samping 1/{sample}, of {data.shape[0]} items") + data["idx_in_track"] = data.groupby(['track_id']).cumcount() # create index in group + groups = data.groupby(['track_id']) + # print(groups, data) + # selection = groups['idx_in_track'].apply(lambda x: x % sample == 0) + # print(selection) + selection = data["idx_in_track"].apply(lambda x: x % sample == 0) + # data = data[selection] + data = data.loc[selection].copy() # avoid errors + + # # convert from e.g. 12Hz, to 2.4Hz (1/5) + # sampled_groups = [] + # for name, group in data.groupby('track_id'): + # sampled_groups.append(group.iloc[::sample]) + # print(f"Sampled {len(sampled_groups)} groups") + # data = pd.concat(sampled_groups, axis=1).T + print(f"Done sampling kept {data.shape[0]} items") + + + # String ot int + data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer') + + # redo diff after possible sampling: + data['diff'] = data.groupby(['track_id'])['frame_id'].diff() + # timestep to seconds + data['dt'] = data['diff'] * (1/fps) + + # "Deriving displacement, velocity and accelation from x and y") + data['dx'] = data.groupby(['track_id'])['x'].diff() + data['dy'] = data.groupby(['track_id'])['y'].diff() + data['vx'] = data['dx'].div(data['dt'], axis=0) + data['vy'] = data['dy'].div(data['dt'], axis=0) + + data['ax'] = data.groupby(['track_id'])['vx'].diff().div(data['dt'], axis=0) + data['ay'] = data.groupby(['track_id'])['vy'].diff().div(data['dt'], axis=0) + + # then we need the velocity itself + data['v'] = np.sqrt(data['vx'].pow(2) + data['vy'].pow(2)) + # and derive acceleration + data['a'] = data.groupby(['track_id'])['v'].diff().div(data['dt'], axis=0) + + # we can calculate heading based on the velocity components + data['heading'] = (np.arctan2(data['vy'], data['vx']) * 180 / np.pi) % 360 + + # and derive it to get the rate of change of the heading + data['d_heading'] = data.groupby(['track_id'])['heading'].diff().div(data['dt'], axis=0) + + # we can backfill the derived parameters (v and a), assuming they were constant when entering the frame + # so that our model can make estimations, based on these assumed values + group = data.groupby(['track_id']) + for field in ['dx', 'dy', 'vx', 'vy', 'ax', 'ay', 'v', 'a', 'heading', 'd_heading']: + data[field] = group[field].bfill() + + data.set_index(['track_id', 'frame_id'], inplace=True) # use for quick access + return data + + + +def filter_short_tracks(data: pd.DataFrame, n): + return data.groupby(['track_id']).filter(lambda group: len(group) >= n) # a lenght of 3 is neccessary to have all relevant derivatives of position + + # print(filtered_data.shape[0], "items in filtered set, out of", data.shape[0], "in total set") + +def normalise_position(data: pd.DataFrame): + mu = data[['x','y']].mean(axis=0) + std = data[['x','y']].std(axis=0) + + data[['x_norm','y_norm']] = (data[['x','y']] - mu) / std + return data, mu, std \ No newline at end of file diff --git a/trap/tracker.py b/trap/tracker.py index 26f905d..0f104c5 100644 --- a/trap/tracker.py +++ b/trap/tracker.py @@ -11,10 +11,11 @@ import time from typing import Optional, List import numpy as np import torch +import torchvision import zmq import cv2 -from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights, keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights, maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights +from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights, keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights, maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights, FasterRCNN_ResNet50_FPN_V2_Weights, fasterrcnn_resnet50_fpn_v2 from deep_sort_realtime.deepsort_tracker import DeepSort from torchvision.models import ResNet50_Weights from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack @@ -23,7 +24,7 @@ from ultralytics import YOLO from ultralytics.engine.results import Results as YOLOResult from trap.frame_emitter import DetectionState, Frame, Detection, Track - +from bytetracker import BYTETracker from tsmoothie.smoother import KalmanSmoother, ConvolutionSmoother import tsmoothie.smoother @@ -45,17 +46,28 @@ DETECTOR_MASKRCNN = 'maskrcnn' DETECTOR_FASTERRCNN = 'fasterrcnn' DETECTOR_YOLOv8 = 'ultralytics' -DETECTORS = [DETECTOR_RETINANET, DETECTOR_MASKRCNN, DETECTOR_FASTERRCNN, DETECTOR_YOLOv8] +TRACKER_DEEPSORT = 'deepsort' +TRACKER_BYTETRACK = 'bytetrack' +DETECTORS = [DETECTOR_RETINANET, DETECTOR_MASKRCNN, DETECTOR_FASTERRCNN, DETECTOR_YOLOv8] +TRACKERS =[TRACKER_DEEPSORT, TRACKER_BYTETRACK] + +TRACKER_CONFIDENCE_MINIMUM = .2 +TRACKER_BYTETRACK_MINIMUM = .1 # bytetrack can track items iwth lower thershold +NON_MAXIMUM_SUPRESSION = 1 +RCNN_SCALE = .4 # seems to have no impact on detections in the corners def _yolov8_track(frame: Frame, model: YOLO, **kwargs) -> List[Detection]: - results: List[YOLOResult] = list(model.track(frame.img, persist=True, tracker="bytetrack.yaml", verbose=False, **kwargs)) + results: List[YOLOResult] = list(model.track(frame.img, persist=True, tracker="custom_bytetrack.yaml", verbose=False, **kwargs)) if results[0].boxes is None or results[0].boxes.id is None: # work around https://github.com/ultralytics/ultralytics/issues/5968 return [] - return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index) for bbox, track_id in zip(results[0].boxes.xywh.cpu(), results[0].boxes.id.int().cpu().tolist())] + boxes = results[0].boxes.xywh.cpu() + track_ids = results[0].boxes.id.int().cpu().tolist() + classes = results[0].boxes.cls.int().cpu().tolist() + return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index, class_id) for bbox, track_id, class_id in zip(boxes, track_ids, classes)] class Multifile(): def __init__(self, srcs: List[Path]): @@ -79,7 +91,7 @@ class Multifile(): class TrainingDataWriter: - def __init__(self, training_path = Optional[Path]): + def __init__(self, training_path: Optional[Path]): if training_path is None: self.path = None return @@ -151,6 +163,9 @@ class TrainingDataWriter: logger.info(f"Splitting gathered data from {sources.name}") # for source_file in source_files: + + tracks_file = self.path / 'tracks.json' + tracks = defaultdict(lambda: []) for name, line_nrs in lines.items(): dir_path = self.path / name @@ -178,25 +193,59 @@ class TrainingDataWriter: parts[1] = str(track_id) target_fp.write("\t".join(parts)) + tracks[track_id].append(parts) + + with tracks_file.open('w') as fp: + json.dump(tracks, fp) + +class TrackerWrapper(): + def __init__(self, tracker): + self.tracker = tracker + + def track_detections(): + raise RuntimeError("Not implemented") + + @classmethod + def init_type(cls, tracker_type: str): + if tracker_type == TRACKER_BYTETRACK: + return ByteTrackWrapper(BYTETracker(track_thresh=TRACKER_BYTETRACK_MINIMUM, match_thresh=TRACKER_CONFIDENCE_MINIMUM, frame_rate=12)) # TODO)) Framerate from emitter + else: + return DeepSortWrapper(DeepSort(n_init=5, max_age=30, nms_max_overlap=NON_MAXIMUM_SUPRESSION, + embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth" + )) + +class DeepSortWrapper(TrackerWrapper): + def track_detections(self, detections, img: cv2.Mat, frame_idx: int): + detections = Tracker.detect_persons_deepsort_wrapper(detections) + tracks: List[DeepsortTrack] = self.tracker.update_tracks(detections, frame=img) + active_tracks = [t for t in tracks if t.is_confirmed()] + return [Detection.from_deepsort(t, frame_idx) for t in active_tracks] + # raw_detections, embeds=None, frame=None, today=None, others=None, instance_masks=Non + + +class ByteTrackWrapper(TrackerWrapper): + def __init__(self, tracker: BYTETracker): + self.tracker = tracker + + def track_detections(self, detections: tuple[list[float,float,float,float], float, float], img: cv2.Mat, frame_idx: int): + # detections + if detections.shape[0] == 0: + detections = np.ndarray((0,0)) # needs to be 2-D + + _ = self.tracker.update(detections) + active_tracks = [track for track in self.tracker.tracked_stracks if track.is_activated] + active_tracks = [track for track in active_tracks if track.start_frame < (self.tracker.frame_id - 5)] + return [Detection.from_bytetrack(track, frame_idx) for track in active_tracks] + class Tracker: - def __init__(self, config: Namespace, is_running: Event): + def __init__(self, config: Namespace): self.config = config - self.is_running = is_running - - context = zmq.Context() - self.frame_sock = context.socket(zmq.SUB) - self.frame_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!! - self.frame_sock.setsockopt(zmq.SUBSCRIBE, b'') - self.frame_sock.connect(config.zmq_frame_addr) - - self.trajectory_socket = context.socket(zmq.PUB) - self.trajectory_socket.setsockopt(zmq.CONFLATE, 1) # only keep latest frame - self.trajectory_socket.bind(config.zmq_trajectory_addr) + # # TODO: config device self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -206,30 +255,39 @@ class Tracker: logger.debug(f"Load tracker: {self.config.detector}") + conf = TRACKER_BYTETRACK_MINIMUM if self.config.tracker == TRACKER_BYTETRACK else TRACKER_CONFIDENCE_MINIMUM if self.config.detector == DETECTOR_RETINANET: # weights = RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT # self.model = retinanet_resnet50_fpn_v2(weights=weights, score_thresh=0.2) - weights = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT - self.model = keypointrcnn_resnet50_fpn(weights=weights, box_score_thresh=0.35) + weights = KeypointRCNN_ResNet50_FPN_Weights.COCO_V1 + self.model = keypointrcnn_resnet50_fpn(weights=weights, box_score_thresh=conf) self.model.to(self.device) # Put the model in inference mode self.model.eval() # Get the transforms for the model's weights self.preprocess = weights.transforms().to(self.device) - self.mot_tracker = DeepSort(max_iou_distance=1, max_cosine_distance=0.5, max_age=15, nms_max_overlap=0.9, - # embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth" - ) + self.mot_tracker = TrackerWrapper.init_type(self.config.tracker) + elif self.config.detector == DETECTOR_FASTERRCNN: + # weights = RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT + # self.model = retinanet_resnet50_fpn_v2(weights=weights, score_thresh=0.2) + weights = FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1 + self.model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=conf) + self.model.to(self.device) + # Put the model in inference mode + self.model.eval() + # Get the transforms for the model's weights + self.preprocess = weights.transforms().to(self.device) + self.mot_tracker = TrackerWrapper.init_type(self.config.tracker) elif self.config.detector == DETECTOR_MASKRCNN: weights = MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1 - self.model = maskrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.7) + self.model = maskrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=conf) # if we use ByteTrack we can work with low probablity! self.model.to(self.device) # Put the model in inference mode self.model.eval() # Get the transforms for the model's weights self.preprocess = weights.transforms().to(self.device) - self.mot_tracker = DeepSort(n_init=5, max_iou_distance=1, max_cosine_distance=0.5, max_age=15, nms_max_overlap=0.9, - # embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth" - ) + # self.mot_tracker = DeepSort(n_init=5, max_iou_distance=1, max_cosine_distance=0.2, max_age=15, nms_max_overlap=0.9, + self.mot_tracker = TrackerWrapper.init_type(self.config.tracker) elif self.config.detector == DETECTOR_YOLOv8: self.model = YOLO('EXPERIMENTS/yolov8x.pt') else: @@ -249,9 +307,40 @@ class Tracker: logger.debug("Set up tracker") + + def track_frame(self, frame: Frame): + if self.config.detector == DETECTOR_YOLOv8: + detections: List[Detection] = _yolov8_track(frame, self.model, classes=[0], imgsz=[1152, 640]) + else : + detections: List[Detection] = self._resnet_track(frame, scale = RCNN_SCALE) + + for detection in detections: + track = self.tracks[detection.track_id] + track.track_id = detection.track_id # for new tracks + + track.history.append(detection) # add to history + + return detections - def track(self): + def track(self, is_running: Event): + """ + Live tracking of frames coming in over zmq + """ + + self.is_running = is_running + + + context = zmq.Context() + self.frame_sock = context.socket(zmq.SUB) + self.frame_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!! + self.frame_sock.setsockopt(zmq.SUBSCRIBE, b'') + self.frame_sock.connect(self.config.zmq_frame_addr) + + self.trajectory_socket = context.socket(zmq.PUB) + self.trajectory_socket.setsockopt(zmq.CONFLATE, 1) # only keep latest frame + self.trajectory_socket.bind(self.config.zmq_trajectory_addr) + prev_run_time = 0 # training_fp = None @@ -312,19 +401,16 @@ class Tracker: # logger.info(f"Frame delivery delay = {time.time()-frame.time}s") - if self.config.detector == DETECTOR_YOLOv8: - detections: [Detection] = _yolov8_track(frame, self.model, classes=[0], imgsz=[1152, 640]) - else : - detections: [Detection] = self._resnet_track(frame.img, scale = 1) - + detections: List[Detection] = self.track_frame(frame) # Store detections into tracklets projected_coordinates = [] - for detection in detections: - track = self.tracks[detection.track_id] - track.track_id = detection.track_id # for new tracks + # now in track_frame() + # for detection in detections: + # track = self.tracks[detection.track_id] + # track.track_id = detection.track_id # for new tracks - track.history.append(detection) # add to history + # track.history.append(detection) # add to history # projected_coordinates.append(track.get_projected_history(self.H)) # then get full history # TODO: hadle occlusions, and dissappearance @@ -383,15 +469,17 @@ class Tracker: logger.info('Stopping') - def _resnet_track(self, img, scale: float = 1) -> [Detection]: + def _resnet_track(self, frame: Frame, scale: float = 1) -> List[Detection]: + img = frame.img if scale != 1: dsize = (int(img.shape[1] * scale), int(img.shape[0] * scale)) img = cv2.resize(img, dsize) detections = self._resnet_detect_persons(img) - tracks: [DeepsortTrack] = self.mot_tracker.update_tracks(detections, frame=img) - return [Detection.from_deepsort(t).get_scaled(1/scale) for t in tracks] + tracks: List[Detection] = self.mot_tracker.track_detections(detections, img, frame.index) + # active_tracks = [t for t in tracks if t.is_confirmed()] + return [d.get_scaled(1/scale) for d in tracks] - def _resnet_detect_persons(self, frame) -> [Detection]: + def _resnet_detect_persons(self, frame) -> List[Detection]: t = torch.from_numpy(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # change axes of image loaded image to be compatilbe with torch.io.read_image (which has C,W,H format instead of W,H,C) t = t.permute(2, 0, 1) @@ -406,8 +494,19 @@ class Tracker: mask = prediction['labels'] == 1 # if we want more than one label: np.isin(prediction['labels'], [1,86]) scores = prediction['scores'][mask] + # print(scores, prediction['labels']) labels = prediction['labels'][mask] boxes = prediction['boxes'][mask] + # print(prediction['scores']) + + if NON_MAXIMUM_SUPRESSION < 1: + nms_mask = torch.zeros(scores.shape[0]).bool() + nms_keep_ids = torchvision.ops.nms(boxes, scores, NON_MAXIMUM_SUPRESSION) + nms_mask[nms_keep_ids] = True + print(scores.shape[0], nms_keep_ids, nms_mask) + scores = scores[nms_mask] + labels = labels[nms_mask] + boxes = boxes[nms_mask] # TODO: introduce confidence and NMS supression: https://github.com/cfotache/pytorch_objectdetecttrack/blob/master/PyTorch_Object_Tracking.ipynb # (which I _think_ we better do after filtering) @@ -415,7 +514,7 @@ class Tracker: # dets - a numpy array of detections in the format [[x1,y1,x2,y2,score, label],[x1,y1,x2,y2,score, label],...] detections = np.array([np.append(bbox, [score, label]) for bbox, score, label in zip(boxes.cpu(), scores.cpu(), labels.cpu())]) - detections = self.detect_persons_deepsort_wrapper(detections) + return detections @@ -429,8 +528,8 @@ class Tracker: def run_tracker(config: Namespace, is_running: Event): - router = Tracker(config, is_running) - router.track() + router = Tracker(config) + router.track(is_running) @@ -467,7 +566,7 @@ class Smoother: ws = self.smoother.smooth_data[0] self.smoother.smooth(hs) hs = self.smoother.smooth_data[0] - new_history = [Detection(d.track_id, l, t, w, h, d.conf, d.state, d.frame_nr) for l, t, w, h, d in zip(ls,ts,ws,hs, track.history)] + new_history = [Detection(d.track_id, l, t, w, h, d.conf, d.state, d.frame_nr, d.det_class) for l, t, w, h, d in zip(ls,ts,ws,hs, track.history)] new_track = Track(track.track_id, new_history, track.predictor_history, track.predictions) new_tracks.append(new_track) frame.tracks = {t.track_id: t for t in new_tracks}