First attempt to provide image map encoder

This commit is contained in:
Ruben van de Ven 2024-12-27 11:28:16 +01:00
parent cebe102e74
commit 1033516712
6 changed files with 418 additions and 14 deletions

View file

@ -17,6 +17,8 @@ These are roughly the steps to go from datagathering to training
3. Run the tracker, e.g. `poetry run tracker --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`
* Note: You can run this right of the camera stream: `poetry run tracker --eval_device cuda:0 --detector ultralytics --video-src rtsp://USER:PW@ADDRESS/STREAM --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`, each recording adding a new file to the `raw` folder.
4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME` Optionally, smooth tracks: `--smooth-tracks`
* Optionally, add a map: ideally a RGB png: 3 layers of 0-255
* `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME --smooth-tracks --camera-fps 12 --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --filter-displacement 2 --map-img-path ../DATASETS/NAME/map.png`
5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data `
6. The run!
* On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr --eval_device cuda:0 --detector ultralytics --homography ../DATASETS/NAME/homography.json --eval_data_dict EXPERIMENTS/trajectron-data/hof2s-m_test.pkl --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions --smooth-tracks --num-samples 3 --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor)

test_trajectron_maps.ipynb Normal file

File diff suppressed because one or more lines are too long

View file

@ -394,8 +394,6 @@ class CvRenderer:
img = decorate_frame(frame, tracker_frame, prediction_frame, first_time, self.config, self.tracks, self.predictions)
img_path = (self.config.output_dir / f"{i:05d}.png").resolve()
logger.debug(f"write frame {frame.time - first_time:.3f}s")
if self.out_writer:
@ -403,6 +401,7 @@ class CvRenderer:
if self.config.render_window:
cv2.imshow('frame',cv2.resize(img, (1920, 1080)))
# cv2.imshow('frame',img)
# clear out old tracks & predictions:
@ -466,6 +465,8 @@ def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame,
undistorted_img = cv2.undistort(frame.img,,, None,
dst_img = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(,(,
# dst_img2 = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(, None)
# cv2.imwrite('/home/ruben/suspicion/DATASETS/hof3/camera2.png', dst_img2)
overlay = np.zeros(dst_img.shape, np.uint8)
# Fill image with red color(set each pixel to red)
@ -503,6 +504,7 @@ def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame,
draw_track_predictions(img, track, int(track.track_id)+1,, convert_world_points_to_img_points, anim_position=anim_position)
cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1)
base_color = (255,)*3
info_color = (255,255,0)
predictor_color = (255,0,255)

View file

@ -11,7 +11,7 @@ import pandas as pd
import dill
import tqdm
import argparse
from typing import List
from typing import List, Optional
from trap.config import CameraAction, HomographyAction
from trap.frame_emitter import Camera
@ -22,6 +22,8 @@ from trajectron.environment import Environment, Scene, Node
from trajectron.utils import maybe_makedirs
from trajectron.environment import derivative_of
from trap.utils import ImageMap
FPS = 12
desired_max_time = 100
pred_indices = [2, 3]
@ -81,10 +83,29 @@ class TrackIteration:
# maybe_makedirs('trajectron-data')
# for desired_source in [ 'hof2', ]:# ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']:
def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int, filter_displacement:float):
def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int, filter_displacement:float, map_img_path: Optional[Path]):
name += f"-nostep" if step_size == 1 else f"-step{step_size}"
name += f"-conv{smooth_window}" if smooth_tracks else f"-nosmooth"
name += f"-f{filter_displacement}" if filter_displacement > 0 else ""
name += "-map" if map_img_path else "-nomap"
name += f"-{}"
print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}")
if map_img_path:
if not map_img_path.exists():
raise RuntimeError(f"Map image does not exists {map_img_path}")
type_map = {}
type_map['PEDESTRIAN'] = ImageMap(
f"Map from {}"
type_map = None
nl = 0
l = 0
data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
@ -221,7 +242,8 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
last_ts = max([n.last_timestep for n in nodes])
# print(sorted([n.first_timestep for n in nodes]))
scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None)
# TODO)) check use of maps:
scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None, map=type_map)
# print(scene)
@ -271,6 +293,11 @@ def main():
# type=Path,
help="Image file representing a mask of a map (uses camera homography, assumes: 3 layers, values 0-255)",
# type=Path,
args = parser.parse_args()
@ -285,6 +312,7 @@ def main():

View file

@ -198,7 +198,10 @@ def transition_path_points(path: np.array, t: float):
lengths = np.sqrt(np.sum(np.diff(path, axis=0)**2, axis=1))
cum_lenghts = np.cumsum(lengths)
# distance = cum_lenghts[-1] * t
ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1]))
# ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1]))
# print(cum_lenghts[-1])
DRAW_SPEED = 22 # fixed speed (independent of lenght) TODO)) make variable
ts = np.concatenate((np.array([0.]), cum_lenghts / DRAW_SPEED))
new_path = [path[0]]
for a, b, t_a, t_b in zip(path[:-1], path[1:], ts[:-1], ts[1:]):
@ -209,7 +212,6 @@ def transition_path_points(path: np.array, t: float):
relative_t = inv_lerp(t_a, t_b, t)
x = lerp(a[0], b[0], relative_t)
y = lerp(a[1], b[1], relative_t)
print(relative_t, a , b, x, y)
return np.array(new_path)
@ -235,12 +237,13 @@ def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:
for pred_i, pred in enumerate(track.predictions):
pred_coords = pred #cv2.perspectiveTransform(np.array([pred]), inv_H)[0].tolist()
# line_points = np.concatenate(([current_point], pred_coords)) # 'current point' is amoving target
line_points = pred_coords
# line_points = pred_coords
line_points = np.concatenate(([current_point], pred_coords)) # 'current point' is amoving target
# print(pred_coords, current_point, line_points)
line_points = transition_path_points(line_points, slide_t)
if convert_points:
line_points = convert_points(line_points)
line_points = np.rint(line_points).astype(int)
# color = (128,0,128) if pred_i else (128,128,0)
color = bgr_colors[color_index % len(bgr_colors)]
@ -260,7 +263,8 @@ def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:
# start = [int(p) for p in pred_coords[ci-1]]
# end = [int(p) for p in pred_coords[ci]]
# print(np.rint(start),np.rint(end).tolist())
cv2.line(img, np.rint(start).astype(int), np.rint(end).astype(int), color, 1, lineType=cv2.LINE_AA)
cv2.line(img, start, end, color, 1, lineType=cv2.LINE_AA)
#, end, 2, color, 1, lineType=cv2.LINE_AA)
def draw_trackjectron_history(img: cv2.Mat, track: Track, color_index: int, convert_points: Optional[Callable]):

View file

@ -1,11 +1,12 @@
# lerp & inverse lerp from
import linecache
import os
from pathlib import Path
import tracemalloc
from typing import Iterable
import cv2
import numpy as np
from import GeometricMap
def lerp(a: float, b: float, t: float) -> float:
"""Linear interpolate on the scale given by a to b, using t as the point on that scale.
@ -69,3 +70,78 @@ def display_top(snapshot: tracemalloc.Snapshot, key_type='lineno', limit=5):
print("%s other: %.1f KiB" % (len(other), size / 1024))
total = sum(stat.size for stat in top_stats)
print("Total allocated size: %.1f KiB" % (total / 1024))
class ImageMap(GeometricMap): # TODO Implement for image maps -> watch flipped coordinate system
def __init__(self, image_path: Path, H_img_to_world: cv2.Mat, description=None):
# homography_matrix = np.loadtxt('H.txt')
# homography_matrix = H_img_to_world.copy()
# homography_matrix /= homography_matrix[2, 2] # normalise?
# homography_matrix = np.linalg.inv(homography_matrix)
homography_matrix = np.array([
[100, 0,0],
[0, 100,0],
# RGB png image has 3 layers
img = cv2.imread(image_path).astype(np.uint8)
img_reverse = img[::-1,:,:] # origin to bottom left, instead of top-left
layers = np.transpose(img_reverse, (2, 1, 0)) # array order: layers, x, y
# layers =
#scale 255
#alternatively: morph image to world space with a scale, as in trajectron/experiments/nuscenes/
super().__init__(layers, homography_matrix, description)
def to_map_points(self, scene_pts):
org_shape = None
if len(scene_pts.shape) > 2:
org_shape = scene_pts.shape
scene_pts = scene_pts.reshape((-1, 2))
N, dims = scene_pts.shape
points_with_one = np.ones((dims + 1, N))
points_with_one[:dims] = scene_pts.T
# map_points = np.fliplr((self.homography @ points_with_one).T[..., :dims]).astype(np.uint32)
# map_points = np.flipud((self.homography @ points_with_one).T[..., :dims]).astype(np.uint32)
map_points = (self.homography @ points_with_one).T[..., :dims].astype(np.uint32)
if org_shape is not None:
map_points = map_points.reshape(org_shape)
# print(scene_pts,'->', map_points)
# exit()
return map_points
# nuscener
# type_map = dict()
# canvas_size = (np.round(3 * y_size).astype(int), np.round(3 * x_size).astype(int))
# homography = np.array([[3., 0., 0.], [0., 3., 0.], [0., 0., 3.]])
# layer_names = ['lane', 'road_segment', 'drivable_area', 'road_divider', 'lane_divider', 'stop_line',
# 'ped_crossing', 'stop_line', 'ped_crossing', 'walkway']
# map_mask = (nusc_map.get_map_mask(patch_box, patch_angle, layer_names, canvas_size) * 255.0).astype(
# np.uint8)
# map_mask = np.swapaxes(map_mask, 1, 2) # x axis comes first
# map_mask_pedestrian = np.stack((map_mask[9], map_mask[8], np.max(map_mask[:3], axis=0)), axis=0)
# type_map['PEDESTRIAN'] = GeometricMap(data=map_mask_pedestrian, homography=homography, description=', '.join(layer_names))
# Notes: map_mask is a list of masks
# map_mask = []
# _line_geom_to_mask
# def mask_for_lines(...):
# map_mask = np.zeros(canvas_size, np.uint8)
# if layer_name is 'traffic_light':
# return None
# for line in layer_geom:
# new_line = line.intersection(patch)
# if not new_line.is_empty:
# new_line = affinity.affine_transform(new_line,
# [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])
# new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))
# map_mask = self.mask_for_lines(new_line, map_mask)