tracker tool for fast tracking data n camera undistort

2024-11-06 16:22:03 +01:00 · 2024-11-06 16:22:03 +01:00 · 9284ce8849
commit 9284ce8849
parent a0c63c4929
6 changed files with 272 additions and 148 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,6 +7,7 @@ readme = "README.md"
 [tool.poetry.scripts]
 trapserv = "trap.plumber:start"
 tracker = "trap.tools:tracker_preprocess"
 [tool.poetry.dependencies]
--- a/trap/animation_renderer.py
+++ b/trap/animation_renderer.py
@ -38,7 +38,7 @@ class AnimationRenderer:
        self.prediction_sock = context.socket(zmq.SUB)
        self.prediction_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
        self.prediction_sock.setsockopt(zmq.SUBSCRIBE, b'')
-        self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr)
+        self.prediction_sock.connect(config.zmq_prediction_addr)
        self.tracker_sock = context.socket(zmq.SUB)
        self.tracker_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
@ -73,7 +73,7 @@ class AnimationRenderer:
        # , fullscreen=self.config.render_window
        display = pyglet.canvas.get_display()
-        screen = display.get_screens()[1]
+        screen = display.get_screens()[0]
        # self.window = pyglet.window.Window(width=self.frame_size[0], height=self.frame_size[1], config=config, fullscreen=False, screen=screens[1])
        self.window = pyglet.window.Window(width=screen.width, height=screen.height, config=config, fullscreen=True, screen=screen)
@ -108,8 +108,10 @@ class AnimationRenderer:
        self.batch_anim = pyglet.graphics.Batch()
        self.debug_lines = [
-            pyglet.shapes.Line(1380, self.config.camera.h, 1380, 690, 2, (255,255,255,255), batch=self.batch_overlay),
+            pyglet.shapes.Line(1380, self.config.camera.h, 1380, 670, 2, (255,255,255,255), batch=self.batch_overlay),
-            pyglet.shapes.Line(0, 660, 1380, 675, 2, (255,255,255,255), batch=self.batch_overlay),
+            pyglet.shapes.Line(0, 660, 1380, 670, 2, (255,255,255,255), batch=self.batch_overlay),
            pyglet.shapes.Line(1140, 760, 1140, 675, 2, (255,255,255,255), batch=self.batch_overlay),
            pyglet.shapes.Line(0, 750, 1380, 760, 2, (255,255,255,255), batch=self.batch_overlay),
        ]
--- a/trap/frame_emitter.py
+++ b/trap/frame_emitter.py
@ -120,6 +120,7 @@ class Frame:
    time: float= field(default_factory=lambda: time.time())
    tracks: Optional[dict[str, Track]] = None
    H: Optional[np.array] = None
    camera: Optional[Camera] = None
    def aslist(self) -> [dict]:
        return { t.track_id:
@ -134,6 +135,13 @@ class Frame:
            } for t in self.tracks.values()
        }
 def video_src_from_config(config):
    if config.video_loop:
        video_srcs: Iterable[Path] = cycle(config.video_src)
    else:
        video_srcs: Iterable[Path] = config.video_src
    return video_srcs
 class FrameEmitter:
    '''
    Emit frame in a separate threat so they can be throttled,
@ -151,10 +159,7 @@ class FrameEmitter:
        logger.info(f"Connection socket {config.zmq_frame_addr}")
-        if self.config.video_loop:
+        self.video_srcs: video_src_from_config(self.config)
            self.video_srcs: Iterable[Path] = cycle(self.config.video_src)
        else:
            self.video_srcs: [Path] = self.config.video_src
    def emit_video(self):
@ -212,7 +217,7 @@ class FrameEmitter:
                    # hack to mask out area
                    cv2.rectangle(img, (0,0), (800,200), (0,0,0), -1)
-                frame = Frame(index=i, img=img, H=video_H)
+                frame = Frame(index=i, img=img, H=self.config.H, camera=self.config.camera)
                # TODO: this is very dirty, need to find another way.
                # perhaps multiprocessing Array?
                self.frame_sock.send(pickle.dumps(frame))
--- a/trap/preview_renderer.py
+++ b/trap/preview_renderer.py
@ -253,7 +253,8 @@ class PreviewRenderer:
        self.prediction_sock = context.socket(zmq.SUB)
        self.prediction_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
        self.prediction_sock.setsockopt(zmq.SUBSCRIBE, b'')
-        self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr)
+        # self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr)
        self.prediction_sock.connect(config.zmq_prediction_addr)
        self.tracker_sock = context.socket(zmq.SUB)
        self.tracker_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
--- a/trap/tools.py
+++ b/trap/tools.py
@ -0,0 +1,72 @@
 from trap.config import parser
 from trap.frame_emitter import video_src_from_config, Frame
 from trap.tracker import DETECTOR_YOLOv8, _yolov8_track, Track, TrainingDataWriter
 from collections import defaultdict
 import logging
 import cv2
 from typing import List, Iterable
 from ultralytics import YOLO
 from ultralytics.engine.results import Results as YOLOResult
 import tqdm
 config = parser.parse_args()
 logger = logging.getLogger('tools')
 def tracker_preprocess():
    video_srcs = video_src_from_config(config)
    if not hasattr(config, "H"):
        print("Set homography file with --homography param")
        return
    if config.detector != DETECTOR_YOLOv8:
        print("Only YOLO for now...")
        return
    model = YOLO('EXPERIMENTS/yolov8x.pt')
    with TrainingDataWriter(config.save_for_training) as writer:
        for video_nr, video_path in enumerate(video_srcs):
            logger.info(f"Play from '{str(video_path)}'")
            video = cv2.VideoCapture(str(video_path))
            fps = video.get(cv2.CAP_PROP_FPS)
            frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
            i = 0
            if config.video_offset:
                logger.info(f"Start at frame {config.video_offset}")
                video.set(cv2.CAP_PROP_POS_FRAMES, config.video_offset)
                i = config.video_offset
            bar = tqdm.tqdm()
            tracks = defaultdict(lambda: Track())
            while True:
                bar.update()
                ret, img = video.read()
                i+=1
                # seek to 0 if video has finished. Infinite loop
                if not ret:
                    # now loading multiple files        
                    break
                frame =  Frame(index=bar.n, img=img, H=config.H, camera=config.camera)
                detections = _yolov8_track(frame, model, classes=[0])
                # detections = _yolov8_track(frame, model, imgsz=1440, classes=[0])
                bar.set_description(f"[{video_nr}/{len(video_srcs)}] [{i}/{frame_count}] {str(video_path)} -- Detections {len(detections)}: {[d.conf for d in detections]}")
                for detection in detections:
                    track = tracks[detection.track_id]
                    track.track_id = detection.track_id # for new tracks
                    track.history.append(detection) # add to history
                active_track_ids = [d.track_id for d in detections]
                active_tracks = {t.track_id: t for t in tracks.values() if t.track_id in active_track_ids}
                writer.add(frame, active_tracks.values())
        logger.info("Done!")
--- a/trap/tracker.py
+++ b/trap/tracker.py
@ -8,7 +8,7 @@ from multiprocessing import Event
 from pathlib import Path
 import pickle
 import time
-from typing import Optional
+from typing import Optional, List
 import numpy as np
 import torch
 import zmq
@ -47,8 +47,89 @@ DETECTOR_YOLOv8 = 'ultralytics'
 DETECTORS = [DETECTOR_RETINANET, DETECTOR_MASKRCNN, DETECTOR_FASTERRCNN, DETECTOR_YOLOv8]
 def _yolov8_track(frame: Frame, model: YOLO, **kwargs) -> List[Detection]:
        results: List[YOLOResult] = list(model.track(frame.img, persist=True, tracker="bytetrack.yaml", verbose=False, **kwargs))
        if results[0].boxes is None or results[0].boxes.id is None:
            # work around https://github.com/ultralytics/ultralytics/issues/5968
            return []
        return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index) for bbox, track_id in zip(results[0].boxes.xywh.cpu(), results[0].boxes.id.int().cpu().tolist())]
 class TrainingDataWriter:
    def __init__(self, training_path = Optional[Path]):
        if training_path is None:
            self.path = None
            return
        if not isinstance(training_path, Path):
            raise ValueError("save-for-training should be a path")
        if not training_path.exists():
            logger.info(f"Making path for training data: {training_path}")
            training_path.mkdir(parents=True, exist_ok=False)
        else:
            logger.warning(f"Path for training-data exists: {training_path}. Continuing assuming that's ok.")
        # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
        self.path = training_path
    def __enter__(self):
        if self.path:
            self.training_fp = open(self.path / 'all.txt', 'w')
            # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
            self.csv = csv.DictWriter(self.training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE)
            self.count = 0
        return self
    def add(self, frame: Frame, tracks: List[Track]):
        if not self.path:
            # skip if disabled
            return
        self.csv.writerows([{
                    'frame_id': round(frame.index * 10., 1), # not really time
                    'track_id': t.track_id,
                    'l': float(t.history[-1].l), # to float, so we're sure it's not a torch.tensor()
                    't': float(t.history[-1].t),
                    'w': float(t.history[-1].w),
                    'h': float(t.history[-1].h),
                    'x': t.get_projected_history(frame.H, frame.camera)[-1][0],
                    'y': t.get_projected_history(frame.H, frame.camera)[-1][1],
                    'state': t.history[-1].state.value
                    # only keep _actual_detections, no lost entries
                    } for t in tracks
                    # if t.history[-1].state != DetectionState.Lost
                    ])
        self.count += len(tracks)
    def __exit__(self, exc_type, exc_value, exc_tb):
        # ... ignore exception (type, value, traceback)
        if not self.path:
            return
        self.training_fp.close()
        lines = {
            'train': int(self.count * .8),
            'val': int(self.count * .12),
            'test': int(self.count * .08),
        }
        logger.info(f"Splitting gathered data from {self.training_fp.name}")
        with open(self.training_fp.name, 'r') as source_fp:
            for name, line_nrs in lines.items():
                dir_path = self.path / name
                dir_path.mkdir(exist_ok=True)
                file = dir_path / 'tracked.txt'
                logger.debug(f"- Write {line_nrs} lines to {file}")
                with file.open('w') as target_fp:
                    for i in range(line_nrs):
                        target_fp.write(source_fp.readline())
 class Tracker:
    def __init__(self, config: Namespace, is_running: Event):
        self.config = config
@ -98,7 +179,7 @@ class Tracker:
                                    #   embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
                                    )
        elif self.config.detector == DETECTOR_YOLOv8:
-            self.model = YOLO('EXPERIMENTS/yolov8x.pt')
+            self.model = YOLO('EXPERIMENTS/yolov8x.pt', classes=0)
        else: 
            raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help")
@ -120,156 +201,118 @@ class Tracker:
    def track(self):
        prev_run_time = 0
-        training_fp = None
+        # training_fp = None
-        training_csv = None
+        # training_csv = None
-        training_frames = 0
+        # training_frames = 0
-        if self.config.save_for_training is not None:
+        # if self.config.save_for_training is not None:
-            if not isinstance(self.config.save_for_training, Path):
+        #     if not isinstance(self.config.save_for_training, Path):
-                raise ValueError("save-for-training should be a path")
+        #         raise ValueError("save-for-training should be a path")
-            if not self.config.save_for_training.exists():
+        #     if not self.config.save_for_training.exists():
-                logger.info(f"Making path for training data: {self.config.save_for_training}")
+        #         logger.info(f"Making path for training data: {self.config.save_for_training}")
-                self.config.save_for_training.mkdir(parents=True, exist_ok=False)
+        #         self.config.save_for_training.mkdir(parents=True, exist_ok=False)
-            else:
+        #     else:
-                logger.warning(f"Path for training-data exists: {self.config.save_for_training}. Continuing assuming that's ok.")
+        #         logger.warning(f"Path for training-data exists: {self.config.save_for_training}. Continuing assuming that's ok.")
-            training_fp = open(self.config.save_for_training / 'all.txt', 'w')
+        #     training_fp = open(self.config.save_for_training / 'all.txt', 'w')
-            # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
+        #     # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
-            training_csv = csv.DictWriter(training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE)
+        #     training_csv = csv.DictWriter(training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE)
        prev_frame_i = -1
-        while self.is_running.is_set():
+        with TrainingDataWriter(self.config.save_for_training) as writer:
-            # this waiting for target_dt causes frame loss. E.g. with target_dt at .1, it
+            while self.is_running.is_set():
-            # skips exactly 1 frame on a 10 fps video (which, it obviously should not do)
+                # this waiting for target_dt causes frame loss. E.g. with target_dt at .1, it
-            # so for now, timing should move to emitter
+                # skips exactly 1 frame on a 10 fps video (which, it obviously should not do)
-            # this_run_time = time.time()
+                # so for now, timing should move to emitter
-            # # logger.debug(f'test {prev_run_time - this_run_time}')
+                # this_run_time = time.time()
-            # time.sleep(max(0, prev_run_time - this_run_time + TARGET_DT))
+                # # logger.debug(f'test {prev_run_time - this_run_time}')
-            # prev_run_time = time.time()
+                # time.sleep(max(0, prev_run_time - this_run_time + TARGET_DT))
                # prev_run_time = time.time()
-            zmq_ev = self.frame_sock.poll(timeout=2000)
+                zmq_ev = self.frame_sock.poll(timeout=2000)
-            if not zmq_ev:
+                if not zmq_ev:
-                logger.warn('skip poll after 2000ms')
+                    logger.warn('skip poll after 2000ms')
-                # when there's no data after timeout, loop so that is_running is checked
+                    # when there's no data after timeout, loop so that is_running is checked
-                continue
+                    continue
-            start_time = time.time()
+                start_time = time.time()
-            frame: Frame = self.frame_sock.recv_pyobj() # frame delivery in current setup: 0.012-0.03s
+                frame: Frame = self.frame_sock.recv_pyobj() # frame delivery in current setup: 0.012-0.03s
-            if frame.index > (prev_frame_i+1):
+                if frame.index > (prev_frame_i+1):
-                logger.warn(f"Dropped {frame.index - prev_frame_i - 1} frames ({frame.index=}, {prev_frame_i=})")
+                    logger.warn(f"Dropped {frame.index - prev_frame_i - 1} frames ({frame.index=}, {prev_frame_i=})")
            prev_frame_i = frame.index
            # load homography into frame (TODO: should this be done in emitter?)
            if frame.H is None:
                # logger.warning('Falling back to default H')
                # fallback: load configured H
                frame.H = self.H
            # logger.info(f"Frame delivery delay = {time.time()-frame.time}s")
            if self.config.detector == DETECTOR_YOLOv8:
                detections: [Detection]  = self._yolov8_track(frame)
            else :
                detections: [Detection] = self._resnet_track(frame.img, scale = 1)
-            # Store detections into tracklets
+                prev_frame_i = frame.index
-            projected_coordinates = []
+                # load homography into frame (TODO: should this be done in emitter?)
-            for detection in detections:
+                if frame.H is None:
-                track = self.tracks[detection.track_id]
+                    # logger.warning('Falling back to default H')
-                track.track_id = detection.track_id # for new tracks
+                    # fallback: load configured H
                    frame.H = self.H
-                track.history.append(detection) # add to history
+                # logger.info(f"Frame delivery delay = {time.time()-frame.time}s")
-                # projected_coordinates.append(track.get_projected_history(self.H)) # then get full history
+
                if self.config.detector == DETECTOR_YOLOv8:
                    detections: [Detection]  = _yolov8_track(frame, self.model)
                else :
                    detections: [Detection] = self._resnet_track(frame.img, scale = 1)
                # TODO: hadle occlusions, and dissappearance
                # if len(track.history) > 30:  # retain 90 tracks for 90 frames
                #     track.history.pop(0)
            # trajectories = {}
            # for detection in detections:                 
            #      tid = str(detection.track_id)
            #      track = self.tracks[detection.track_id]
            #      coords = track.get_projected_history(self.H) # get full history
            #      trajectories[tid] = {
            #          "id": tid,
            #          "det_conf": detection.conf,
            #          "bbox": detection.to_ltwh(),
            #          "history": [{"x":c[0], "y":c[1]} for c in coords[0]] if not self.config.bypass_prediction else coords[0].tolist() # already doubles nested, fine for test
            #      }
            active_track_ids = [d.track_id for d in detections]
            active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids}
            # logger.info(f"{trajectories}")
            frame.tracks = active_tracks
            # if self.config.bypass_prediction:
            #     self.trajectory_socket.send_string(json.dumps(trajectories))
            # else:
            #     self.trajectory_socket.send(pickle.dumps(frame))
            if self.config.smooth_tracks:
                frame = self.smoother.smooth_frame_tracks(frame)
            self.trajectory_socket.send_pyobj(frame)
            current_time = time.time()
            logger.debug(f"Trajectories: {len(active_tracks)}. Current frame delay = {current_time-frame.time}s (trajectories: {current_time - start_time}s)")
            # self.trajectory_socket.send_string(json.dumps(trajectories))
            # provide a {ID: {id: ID, history: [[x,y],[x,y],...]}}
            # TODO: provide a track object that actually keeps history (unlike tracker)
            #TODO calculate fps (also for other loops to see asynchonity)
            # fpsfilter=fpsfilter*.9+(1/dt)*.1    #trust value in order to stabilize fps display
            if training_csv:
                training_csv.writerows([{
                    'frame_id': round(frame.index * 10., 1), # not really time
                    'track_id': t.track_id,
                    'l': t.history[-1].l,
                    't': t.history[-1].t,
                    'w': t.history[-1].w,
                    'h': t.history[-1].h,
                    'x': t.get_projected_history(frame.H)[-1][0],
                    'y': t.get_projected_history(frame.H)[-1][1],
                    'state': t.history[-1].state.value
                    # only keep _actual_detections, no lost entries
                    } for t in active_tracks.values() 
                    # if t.history[-1].state != DetectionState.Lost
                    ])
                training_frames += len(active_tracks)
            # print(time.time() - start_time)
        if training_fp:
            training_fp.close()
            lines = {
                'train': int(training_frames * .8),
                'val': int(training_frames * .12),
                'test': int(training_frames * .08),
            }
            logger.info(f"Splitting gathered data from {training_fp.name}")
            with open(training_fp.name, 'r') as source_fp:
                for name, line_nrs in lines.items():
                    dir_path = self.config.save_for_training / name
                    dir_path.mkdir(exist_ok=True)
                    file = dir_path / 'tracked.txt'
                    logger.debug(f"- Write {line_nrs} lines to {file}")
                    with file.open('w') as target_fp:
                        for i in range(line_nrs):
                            target_fp.write(source_fp.readline())
                # Store detections into tracklets
                projected_coordinates = []
                for detection in detections:
                    track = self.tracks[detection.track_id]
                    track.track_id = detection.track_id # for new tracks
                    track.history.append(detection) # add to history
                    # projected_coordinates.append(track.get_projected_history(self.H)) # then get full history
                    # TODO: hadle occlusions, and dissappearance
                    # if len(track.history) > 30:  # retain 90 tracks for 90 frames
                    #     track.history.pop(0)
                # trajectories = {}
                # for detection in detections:                 
                #      tid = str(detection.track_id)
                #      track = self.tracks[detection.track_id]
                #      coords = track.get_projected_history(self.H) # get full history
                #      trajectories[tid] = {
                #          "id": tid,
                #          "det_conf": detection.conf,
                #          "bbox": detection.to_ltwh(),
                #          "history": [{"x":c[0], "y":c[1]} for c in coords[0]] if not self.config.bypass_prediction else coords[0].tolist() # already doubles nested, fine for test
                #      }
                active_track_ids = [d.track_id for d in detections]
                active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids}
                # logger.info(f"{trajectories}")
                frame.tracks = active_tracks
                # if self.config.bypass_prediction:
                #     self.trajectory_socket.send_string(json.dumps(trajectories))
                # else:
                #     self.trajectory_socket.send(pickle.dumps(frame))
                if self.config.smooth_tracks:
                    frame = self.smoother.smooth_frame_tracks(frame)
                self.trajectory_socket.send_pyobj(frame)
                current_time = time.time()
                logger.debug(f"Trajectories: {len(active_tracks)}. Current frame delay = {current_time-frame.time}s (trajectories: {current_time - start_time}s)")
                # self.trajectory_socket.send_string(json.dumps(trajectories))
                # provide a {ID: {id: ID, history: [[x,y],[x,y],...]}}
                # TODO: provide a track object that actually keeps history (unlike tracker)
                #TODO calculate fps (also for other loops to see asynchonity)
                # fpsfilter=fpsfilter*.9+(1/dt)*.1    #trust value in order to stabilize fps display
                writer.add(frame, active_tracks.values())
        logger.info('Stopping')
-    def _yolov8_track(self, frame: Frame,) -> [Detection]:
+    
        results: [YOLOResult] = self.model.track(frame.img, persist=True, tracker="bytetrack.yaml", verbose=False)
        if results[0].boxes is None or results[0].boxes.id is None:
            # work around https://github.com/ultralytics/ultralytics/issues/5968
            return []
        return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index) for bbox, track_id in zip(results[0].boxes.xywh.cpu(), results[0].boxes.id.int().cpu().tolist())]
    def _resnet_track(self, img, scale: float = 1) -> [Detection]:
        if scale != 1:
            dsize = (int(img.shape[1] * scale), int(img.shape[0] * scale))