Prep for lidar: map out of bounds checking and maps with translation

2025-11-05 22:24:38 +01:00 · 2025-11-05 22:24:38 +01:00 · 858bb91244
commit 858bb91244
parent 218419368a
8 changed files with 218 additions and 103 deletions
--- a/README.md
+++ b/README.md
@ -14,8 +14,13 @@ These are roughly the steps to go from datagathering to training
 1. Make sure to have some recordings with a fixed camera. [UPDATE: not needed anymore, except for calibration & homography footage]
    * Recording can be done with `ffmpeg  -rtsp_transport udp -i rtsp://USER:PASS@IP:554/Streaming/Channels/1.mp4   hof2-cam-$(date "+%Y%m%d-%H%M").mp4`
 2. Follow the steps in the auxilary  [traptools](https://git.rubenvandeven.com/security_vision/traptools) repository to obtain (1) camera matrix, lens distortion, image dimensions, and (2+3) homography
-3. Run the tracker, e.g. `uv run tracker --detector ultralytics  --homography ../DATASETS/NAME/homography.json   --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json  --save-for-training EXPERIMENTS/raw/NAME/`
-    * Note: You can run this right of the camera stream: `uv run tracker --eval_device cuda:0 --detector ultralytics   --video-src  rtsp://USER:PW@ADDRESS/STREAM --homography ../DATASETS/NAME/homography.json   --calibration ../DATASETS/NAME/calibration.json  --save-for-training EXPERIMENTS/raw/NAME/`, each recording adding a new file to the `raw` folder.
+3. Track lidar or video data:
+    1. Video: Run the video source & video tracker nodes:
+        * `uv run trap_video_source  --homography ../DATASETS/hof4-test-angle/homography.json   --video-src gige://../DATASETS/hof4-test-angle/gige_config.json --calibration ../DATASETS/hof4-test-angle/calibration.json` (Optionally, use recorded video with `--video-src videos/render-source-2025-10-19T21\:09.mp4 --video-offset 300`)
+        * `uv run trap_tracker --smooth-tracks --eval_device cuda:0 --detector ultralytics`
+    2. Lidar: `uv run trap_lidar --min-box-area 0 --pi LOCAL_IP --smooth-tracks`
+4. Save the tracks emitted by the video or lidar tracker: `uv run trap_track_writer --output-dir EXPERIMENTS/raw/hof-lidar`
+    * Each recording adds a new txt file to the `raw` folder.
 4. Parse tracker data to Trajectron format: `uv run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME` Optionally, smooth tracks: `--smooth-tracks`
    * Optionally, add a map: ideally a RGB png: 3 layers of 0-255
        * `uv run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME --smooth-tracks --camera-fps 12 --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --filter-displacement 2 --map-img-path ../DATASETS/NAME/map.png`
--- a/test_trajectron_maps.ipynb
+++ b/test_trajectron_maps.ipynb
--- a/trap/base.py
+++ b/trap/base.py
@ -171,6 +171,8 @@ class DistortedCamera(ABC):
            calibdata = json.load(fp)
        if 'type' in calibdata and calibdata['type'] == 'fisheye':
                camera = FisheyeCamera.from_calibdata(calibdata, H, fps)
+        elif 'type' in calibdata and calibdata['type'] == 'undistorted':
+            camera = UndistortedCamera(calibdata['fps'])
        else:
            camera = Camera.from_calibdata(calibdata, H, fps)
        
@ -760,6 +762,8 @@ class CameraAction(argparse.Action):
                data = json.load(fp)
            if 'type' in data and data['type'] == 'fisheye':
                camera = FisheyeCamera.from_calibfile(Path(values), namespace.H, namespace.camera_fps)
+            elif 'type' in data and data['type'] == 'undistorted':
+                camera = UndistortedCamera(namespace.camera_fps)
            else:
                camera = Camera.from_calibfile(Path(values), namespace.H, namespace.camera_fps)
            #     # print(data)
--- a/trap/prediction_server.py
+++ b/trap/prediction_server.py
@ -6,6 +6,7 @@ import pathlib
 import pickle
 import random
 import time
+from typing import List
 import warnings
 from argparse import ArgumentParser, Namespace
 from multiprocessing import Event
@ -14,7 +15,7 @@ import dill
 import numpy as np
 import torch
 import zmq
-from trajectron.environment import Environment, Scene
+from trajectron.environment import Environment, Scene, GeometricMap
 from trajectron.model.model_registrar import ModelRegistrar
 from trajectron.model.online.online_trajectron import OnlineTrajectron
 from trajectron.utils import prediction_output_to_trajectories
@ -22,6 +23,7 @@ from trajectron.utils import prediction_output_to_trajectories
 from trap.frame_emitter import DataclassJSONEncoder, Frame
 from trap.node import Node
 from trap.tracker import Smoother
+from trap.utils import ImageMap

 logger = logging.getLogger("trap.prediction")

@ -61,8 +63,8 @@ def create_online_env(env, hyperparams, scene_idx, init_timestep):
                       robot_type=env.robot_type)


-def get_maps_for_input(input_dict, scene, hyperparams, device):
-    scene_maps = list()
+def get_maps_for_input(input_dict, scene: Scene, hyperparams, device):
+    scene_maps: List[ImageMap] = list()
    scene_pts = list()
    heading_angles = list()
    patch_sizes = list()
@ -84,10 +86,10 @@ def get_maps_for_input(input_dict, scene, hyperparams, device):
            else:
                heading_angle = None

-            scene_map = scene.map[node.type]
+            scene_map: ImageMap = scene.map[node.type]
            # map_point = x[-1, :2]
-            # map_point = x[:2]
-            map_point = x[:2].clip(0) # prevent crash for out of map point.
+            map_point = x[:2]
+            # map_point = x[:2].clip(0) # prevent crash for out of map point.

            patch_size = hyperparams['map_encoder'][node.type]['patch_size']

--- a/trap/process_data.py
+++ b/trap/process_data.py
@ -13,8 +13,9 @@ import pandas as pd
 import dill
 import tqdm
 import argparse
-from typing import List, Optional 
+from typing import Dict, List, Optional 

+from trap.base import Track
 from trap.config import CameraAction, HomographyAction
 from trap.frame_emitter import Camera
 from trap.tracker import FinalDisplacementFilter, Smoother, TrackReader
@ -101,11 +102,15 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
        type_map = {}
        # TODO)) For now, assume the map is a 100x scale of the world coordinates (i.e. 100px per meter)
        # thus when we do a homography of 5px per meter, scale down by 20
-        homography_matrix = np.array([
-                [5, 0,0],
-                [0, 5,0],
-                [0,0,1],
-                ]) # 100 scale
+        map_H_path = map_img_path.with_suffix('.json')
+        if map_H_path.exists():
+            homography_matrix = np.loadtxt(map_H_path)
+        else:
+            homography_matrix = np.array([
+                    [5, 0,0],
+                    [0, 5,0],
+                    [0,0,1],
+                    ]) # 100 scale
        img = cv2.imread(map_img_path)
        img = cv2.resize(img, (img.shape[1]//20, img.shape[0]//20))
        type_map['PEDESTRIAN'] = ImageMap(
@ -127,9 +132,11 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c

    reader = TrackReader(src_dir, camera.fps)
    tracks = [t for t in reader]
+    print(f"Unfiltered total: {len(tracks)} tracks")
    if filter_displacement > 0:
        filter = FinalDisplacementFilter(filter_displacement)
        tracks = filter.apply(tracks, camera)
+    print(f"Filtered: {len(tracks)} tracks")

    total = len(tracks)
    bar = tqdm.tqdm(total=total)
@ -153,7 +160,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
    dt3 = RollingAverage()
    dt4 = RollingAverage()

-    sets = {}
+    sets: Dict[str, List[Track]] = {}
    offset = 0
    for data_class, nr in destinations.items():
        # TODO)) think of a way to shuffle while keeping scenes
@ -164,6 +171,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
    print(f"Camera FPS: {camera.fps}, actual fps: {camera.fps/step_size} (or {(1/camera.fps)*step_size})")

    names = {}
+    max_pos = 0

    for data_class, nr_of_items in destinations.items():
        env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization)
@ -210,6 +218,8 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
                interpolated_track = track.get_with_interpolated_history()
                b = time.time()

+                
+
                for variation_nr, iteration_settings in enumerate(variations):

                    if iteration_settings.smooth:
@ -227,6 +237,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
                    
                    # track.get_projected_history(H=None, camera=self.config.camera)
                    node = track.to_trajectron_node(camera, env)
+                    max_pos = max(node.data.data[0][0], max_pos)

                    data_class = time.time()
                    
@ -288,7 +299,8 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
            
            # print(scene.nodes[0].first_timestep)

-        print(f'Processed {len(scenes):.2f} scene for data class {data_class}')
+        print(f'Processed {len(scenes)} scene with {sum([len(s.nodes) for s in scenes])} nodes for data class {data_class}')
+        # print("MAXIMUM!!", max_pos)

        env.scenes = scenes

--- a/trap/track_history.py
+++ b/trap/track_history.py
@ -37,6 +37,9 @@ class TrackHistory():
        
    
    def load_from_cache(self):
+        if self.cache_path is None:
+            return False
+
        if self.cache_path.exists():
            logger.debug("Load history state from cache")
            with self.cache_path.open('rb') as fp:
--- a/trap/tracker.py
+++ b/trap/tracker.py
@ -118,6 +118,7 @@ class FinalDisplacementFilter(TrackFilter):
    
    def filter(self, track: Track, camera: Camera):
        history = track.get_projected_history(H=None, camera=camera)
+        
        displacement = np.linalg.norm(history[0]-history[-1])
        return displacement > self.min_displacement

--- a/trap/utils.py
+++ b/trap/utils.py
@ -1,4 +1,5 @@
 # lerp & inverse lerp from https://gist.github.com/laundmo/b224b1f4c8ef6ca5fe47e132c8deab56
+from collections import namedtuple
 import linecache
 import math
 import os
@ -7,6 +8,7 @@ import tracemalloc
 from typing import Iterable
 import cv2
 import numpy as np
+import torch
 from trajectron.environment.map import GeometricMap

 def lerp(a: float, b: float, t: float) -> float:
@ -128,6 +130,7 @@ def display_top(snapshot: tracemalloc.Snapshot, key_type='lineno', limit=5):
    print("Total allocated size: %.1f KiB" % (total / 1024))


+ImageMapBounds = namedtuple('ImageMapBounds', ['min_x', 'max_x', 'min_y', 'max_y'])
 class ImageMap(GeometricMap):  # TODO Implement for image maps -> watch flipped coordinate system
    def __init__(self, img: cv2.Mat, H_world_to_map: cv2.Mat, description=None):
        # homography_matrix = np.loadtxt('H.txt')
@ -144,11 +147,56 @@ class ImageMap(GeometricMap):  # TODO Implement for image maps -> watch flipped
        layers = layers.copy() # copy to apply negative stride
        # layers = 

-        #scale 255
        
        #alternatively: morph image to world space with a scale, as in trajectron/experiments/nuscenes/process_data.py

        super().__init__(layers, homography_matrix, description)
+
+        self.set_bounds()
+    
+    def set_bounds(self):
+        """
+        Use homography and image to calculate the limits of positions in world coordinates
+        """
+        print(self.data.shape)
+
+        max_x = self.data.shape[1]
+        max_y = self.data.shape[2]
+
+        # this assumes a map that is only scaled and translated, not skewed
+        points_in_map = np.array([
+            [0, 0],
+            [max_x, max_y],
+        ])
+
+        # calculate bounds:
+        H_map_to_world = np.linalg.inv(self.homography)
+
+        # Convert points to homogeneous coordinates and Apply the transformation
+        homogeneous_points = np.hstack((points_in_map, np.ones((points_in_map.shape[0], 1))))
+        transformed_points = np.dot(homogeneous_points, H_map_to_world.T)
+        # Convert back to Cartesian coordinates
+        transformed_points = transformed_points[:, :2]
+
+        self.bounds = ImageMapBounds(
+            transformed_points[0][0],
+            transformed_points[1][0],
+            transformed_points[0][1],
+            transformed_points[1][1]
+            )
+    
+    @classmethod
+    def get_cropped_maps_from_scene_map_batch(cls, maps, scene_pts, patch_size, rotation=None, device='cpu'):
+        min_bounds = [maps[0].bounds.min_x, maps[0].bounds.min_y]
+        max_bounds = [maps[0].bounds.max_x, maps[0].bounds.max_y]
+
+        if torch.is_tensor(scene_pts):
+            min_bounds = torch.Tensor(min_bounds)
+            max_bounds = torch.Tensor(max_bounds)
+
+        scene_pts = scene_pts.clip(min=min_bounds, max=max_bounds)
+
+        return super().get_cropped_maps_from_scene_map_batch(maps, scene_pts, patch_size, rotation, device)
        
    def to_map_points(self, scene_pts):
        org_shape = None