Prep for lidar: map out of bounds checking and maps with translation

This commit is contained in:
Ruben van de Ven 2025-11-05 22:24:38 +01:00
parent 218419368a
commit 858bb91244
8 changed files with 218 additions and 103 deletions

View file

@ -14,8 +14,13 @@ These are roughly the steps to go from datagathering to training
1. Make sure to have some recordings with a fixed camera. [UPDATE: not needed anymore, except for calibration & homography footage]
* Recording can be done with `ffmpeg -rtsp_transport udp -i rtsp://USER:PASS@IP:554/Streaming/Channels/1.mp4 hof2-cam-$(date "+%Y%m%d-%H%M").mp4`
2. Follow the steps in the auxilary [traptools](https://git.rubenvandeven.com/security_vision/traptools) repository to obtain (1) camera matrix, lens distortion, image dimensions, and (2+3) homography
3. Run the tracker, e.g. `uv run tracker --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`
* Note: You can run this right of the camera stream: `uv run tracker --eval_device cuda:0 --detector ultralytics --video-src rtsp://USER:PW@ADDRESS/STREAM --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`, each recording adding a new file to the `raw` folder.
3. Track lidar or video data:
1. Video: Run the video source & video tracker nodes:
* `uv run trap_video_source --homography ../DATASETS/hof4-test-angle/homography.json --video-src gige://../DATASETS/hof4-test-angle/gige_config.json --calibration ../DATASETS/hof4-test-angle/calibration.json` (Optionally, use recorded video with `--video-src videos/render-source-2025-10-19T21\:09.mp4 --video-offset 300`)
* `uv run trap_tracker --smooth-tracks --eval_device cuda:0 --detector ultralytics`
2. Lidar: `uv run trap_lidar --min-box-area 0 --pi LOCAL_IP --smooth-tracks`
4. Save the tracks emitted by the video or lidar tracker: `uv run trap_track_writer --output-dir EXPERIMENTS/raw/hof-lidar`
* Each recording adds a new txt file to the `raw` folder.
4. Parse tracker data to Trajectron format: `uv run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME` Optionally, smooth tracks: `--smooth-tracks`
* Optionally, add a map: ideally a RGB png: 3 layers of 0-255
* `uv run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME --smooth-tracks --camera-fps 12 --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --filter-displacement 2 --map-img-path ../DATASETS/NAME/map.png`

File diff suppressed because one or more lines are too long

View file

@ -171,6 +171,8 @@ class DistortedCamera(ABC):
calibdata = json.load(fp)
if 'type' in calibdata and calibdata['type'] == 'fisheye':
camera = FisheyeCamera.from_calibdata(calibdata, H, fps)
elif 'type' in calibdata and calibdata['type'] == 'undistorted':
camera = UndistortedCamera(calibdata['fps'])
else:
camera = Camera.from_calibdata(calibdata, H, fps)
@ -760,6 +762,8 @@ class CameraAction(argparse.Action):
data = json.load(fp)
if 'type' in data and data['type'] == 'fisheye':
camera = FisheyeCamera.from_calibfile(Path(values), namespace.H, namespace.camera_fps)
elif 'type' in data and data['type'] == 'undistorted':
camera = UndistortedCamera(namespace.camera_fps)
else:
camera = Camera.from_calibfile(Path(values), namespace.H, namespace.camera_fps)
# # print(data)

View file

@ -6,6 +6,7 @@ import pathlib
import pickle
import random
import time
from typing import List
import warnings
from argparse import ArgumentParser, Namespace
from multiprocessing import Event
@ -14,7 +15,7 @@ import dill
import numpy as np
import torch
import zmq
from trajectron.environment import Environment, Scene
from trajectron.environment import Environment, Scene, GeometricMap
from trajectron.model.model_registrar import ModelRegistrar
from trajectron.model.online.online_trajectron import OnlineTrajectron
from trajectron.utils import prediction_output_to_trajectories
@ -22,6 +23,7 @@ from trajectron.utils import prediction_output_to_trajectories
from trap.frame_emitter import DataclassJSONEncoder, Frame
from trap.node import Node
from trap.tracker import Smoother
from trap.utils import ImageMap
logger = logging.getLogger("trap.prediction")
@ -61,8 +63,8 @@ def create_online_env(env, hyperparams, scene_idx, init_timestep):
robot_type=env.robot_type)
def get_maps_for_input(input_dict, scene, hyperparams, device):
scene_maps = list()
def get_maps_for_input(input_dict, scene: Scene, hyperparams, device):
scene_maps: List[ImageMap] = list()
scene_pts = list()
heading_angles = list()
patch_sizes = list()
@ -84,10 +86,10 @@ def get_maps_for_input(input_dict, scene, hyperparams, device):
else:
heading_angle = None
scene_map = scene.map[node.type]
scene_map: ImageMap = scene.map[node.type]
# map_point = x[-1, :2]
# map_point = x[:2]
map_point = x[:2].clip(0) # prevent crash for out of map point.
map_point = x[:2]
# map_point = x[:2].clip(0) # prevent crash for out of map point.
patch_size = hyperparams['map_encoder'][node.type]['patch_size']

View file

@ -13,8 +13,9 @@ import pandas as pd
import dill
import tqdm
import argparse
from typing import List, Optional
from typing import Dict, List, Optional
from trap.base import Track
from trap.config import CameraAction, HomographyAction
from trap.frame_emitter import Camera
from trap.tracker import FinalDisplacementFilter, Smoother, TrackReader
@ -101,11 +102,15 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
type_map = {}
# TODO)) For now, assume the map is a 100x scale of the world coordinates (i.e. 100px per meter)
# thus when we do a homography of 5px per meter, scale down by 20
homography_matrix = np.array([
[5, 0,0],
[0, 5,0],
[0,0,1],
]) # 100 scale
map_H_path = map_img_path.with_suffix('.json')
if map_H_path.exists():
homography_matrix = np.loadtxt(map_H_path)
else:
homography_matrix = np.array([
[5, 0,0],
[0, 5,0],
[0,0,1],
]) # 100 scale
img = cv2.imread(map_img_path)
img = cv2.resize(img, (img.shape[1]//20, img.shape[0]//20))
type_map['PEDESTRIAN'] = ImageMap(
@ -127,9 +132,11 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
reader = TrackReader(src_dir, camera.fps)
tracks = [t for t in reader]
print(f"Unfiltered total: {len(tracks)} tracks")
if filter_displacement > 0:
filter = FinalDisplacementFilter(filter_displacement)
tracks = filter.apply(tracks, camera)
print(f"Filtered: {len(tracks)} tracks")
total = len(tracks)
bar = tqdm.tqdm(total=total)
@ -153,7 +160,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
dt3 = RollingAverage()
dt4 = RollingAverage()
sets = {}
sets: Dict[str, List[Track]] = {}
offset = 0
for data_class, nr in destinations.items():
# TODO)) think of a way to shuffle while keeping scenes
@ -164,6 +171,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
print(f"Camera FPS: {camera.fps}, actual fps: {camera.fps/step_size} (or {(1/camera.fps)*step_size})")
names = {}
max_pos = 0
for data_class, nr_of_items in destinations.items():
env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization)
@ -210,6 +218,8 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
interpolated_track = track.get_with_interpolated_history()
b = time.time()
for variation_nr, iteration_settings in enumerate(variations):
if iteration_settings.smooth:
@ -227,6 +237,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
# track.get_projected_history(H=None, camera=self.config.camera)
node = track.to_trajectron_node(camera, env)
max_pos = max(node.data.data[0][0], max_pos)
data_class = time.time()
@ -288,7 +299,8 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
# print(scene.nodes[0].first_timestep)
print(f'Processed {len(scenes):.2f} scene for data class {data_class}')
print(f'Processed {len(scenes)} scene with {sum([len(s.nodes) for s in scenes])} nodes for data class {data_class}')
# print("MAXIMUM!!", max_pos)
env.scenes = scenes

View file

@ -37,6 +37,9 @@ class TrackHistory():
def load_from_cache(self):
if self.cache_path is None:
return False
if self.cache_path.exists():
logger.debug("Load history state from cache")
with self.cache_path.open('rb') as fp:

View file

@ -118,6 +118,7 @@ class FinalDisplacementFilter(TrackFilter):
def filter(self, track: Track, camera: Camera):
history = track.get_projected_history(H=None, camera=camera)
displacement = np.linalg.norm(history[0]-history[-1])
return displacement > self.min_displacement

View file

@ -1,4 +1,5 @@
# lerp & inverse lerp from https://gist.github.com/laundmo/b224b1f4c8ef6ca5fe47e132c8deab56
from collections import namedtuple
import linecache
import math
import os
@ -7,6 +8,7 @@ import tracemalloc
from typing import Iterable
import cv2
import numpy as np
import torch
from trajectron.environment.map import GeometricMap
def lerp(a: float, b: float, t: float) -> float:
@ -128,6 +130,7 @@ def display_top(snapshot: tracemalloc.Snapshot, key_type='lineno', limit=5):
print("Total allocated size: %.1f KiB" % (total / 1024))
ImageMapBounds = namedtuple('ImageMapBounds', ['min_x', 'max_x', 'min_y', 'max_y'])
class ImageMap(GeometricMap): # TODO Implement for image maps -> watch flipped coordinate system
def __init__(self, img: cv2.Mat, H_world_to_map: cv2.Mat, description=None):
# homography_matrix = np.loadtxt('H.txt')
@ -144,11 +147,56 @@ class ImageMap(GeometricMap): # TODO Implement for image maps -> watch flipped
layers = layers.copy() # copy to apply negative stride
# layers =
#scale 255
#alternatively: morph image to world space with a scale, as in trajectron/experiments/nuscenes/process_data.py
super().__init__(layers, homography_matrix, description)
self.set_bounds()
def set_bounds(self):
"""
Use homography and image to calculate the limits of positions in world coordinates
"""
print(self.data.shape)
max_x = self.data.shape[1]
max_y = self.data.shape[2]
# this assumes a map that is only scaled and translated, not skewed
points_in_map = np.array([
[0, 0],
[max_x, max_y],
])
# calculate bounds:
H_map_to_world = np.linalg.inv(self.homography)
# Convert points to homogeneous coordinates and Apply the transformation
homogeneous_points = np.hstack((points_in_map, np.ones((points_in_map.shape[0], 1))))
transformed_points = np.dot(homogeneous_points, H_map_to_world.T)
# Convert back to Cartesian coordinates
transformed_points = transformed_points[:, :2]
self.bounds = ImageMapBounds(
transformed_points[0][0],
transformed_points[1][0],
transformed_points[0][1],
transformed_points[1][1]
)
@classmethod
def get_cropped_maps_from_scene_map_batch(cls, maps, scene_pts, patch_size, rotation=None, device='cpu'):
min_bounds = [maps[0].bounds.min_x, maps[0].bounds.min_y]
max_bounds = [maps[0].bounds.max_x, maps[0].bounds.max_y]
if torch.is_tensor(scene_pts):
min_bounds = torch.Tensor(min_bounds)
max_bounds = torch.Tensor(max_bounds)
scene_pts = scene_pts.clip(min=min_bounds, max=max_bounds)
return super().get_cropped_maps_from_scene_map_batch(maps, scene_pts, patch_size, rotation, device)
def to_map_points(self, scene_pts):
org_shape = None