First attempt to provide image map encoder

This commit is contained in:
Ruben van de Ven 2024-12-27 11:28:16 +01:00
parent cebe102e74
commit 1033516712
6 changed files with 418 additions and 14 deletions

View file

@ -17,6 +17,8 @@ These are roughly the steps to go from datagathering to training
3. Run the tracker, e.g. `poetry run tracker --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/` 3. Run the tracker, e.g. `poetry run tracker --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`
* Note: You can run this right of the camera stream: `poetry run tracker --eval_device cuda:0 --detector ultralytics --video-src rtsp://USER:PW@ADDRESS/STREAM --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`, each recording adding a new file to the `raw` folder. * Note: You can run this right of the camera stream: `poetry run tracker --eval_device cuda:0 --detector ultralytics --video-src rtsp://USER:PW@ADDRESS/STREAM --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`, each recording adding a new file to the `raw` folder.
4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME` Optionally, smooth tracks: `--smooth-tracks` 4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME` Optionally, smooth tracks: `--smooth-tracks`
* Optionally, add a map: ideally a RGB png: 3 layers of 0-255
* `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME --smooth-tracks --camera-fps 12 --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --filter-displacement 2 --map-img-path ../DATASETS/NAME/map.png`
5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data ` 5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data `
6. The run! 6. The run!
* On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr 100.69.123.91 --eval_device cuda:0 --detector ultralytics --homography ../DATASETS/NAME/homography.json --eval_data_dict EXPERIMENTS/trajectron-data/hof2s-m_test.pkl --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions --smooth-tracks --num-samples 3 --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor) * On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr 100.69.123.91 --eval_device cuda:0 --detector ultralytics --homography ../DATASETS/NAME/homography.json --eval_data_dict EXPERIMENTS/trajectron-data/hof2s-m_test.pkl --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions --smooth-tracks --num-samples 3 --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor)

292
test_trajectron_maps.ipynb Normal file

File diff suppressed because one or more lines are too long

View file

@ -394,8 +394,6 @@ class CvRenderer:
img = decorate_frame(frame, tracker_frame, prediction_frame, first_time, self.config, self.tracks, self.predictions) img = decorate_frame(frame, tracker_frame, prediction_frame, first_time, self.config, self.tracks, self.predictions)
img_path = (self.config.output_dir / f"{i:05d}.png").resolve()
logger.debug(f"write frame {frame.time - first_time:.3f}s") logger.debug(f"write frame {frame.time - first_time:.3f}s")
if self.out_writer: if self.out_writer:
self.out_writer.write(img) self.out_writer.write(img)
@ -403,6 +401,7 @@ class CvRenderer:
self.streaming_process.stdin.write(img.tobytes()) self.streaming_process.stdin.write(img.tobytes())
if self.config.render_window: if self.config.render_window:
cv2.imshow('frame',cv2.resize(img, (1920, 1080))) cv2.imshow('frame',cv2.resize(img, (1920, 1080)))
# cv2.imshow('frame',img)
cv2.waitKey(1) cv2.waitKey(1)
# clear out old tracks & predictions: # clear out old tracks & predictions:
@ -466,6 +465,8 @@ def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame,
undistorted_img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx) undistorted_img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx)
dst_img = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H),(config.camera.w,config.camera.h)) dst_img = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H),(config.camera.w,config.camera.h))
# dst_img2 = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H), None)
# cv2.imwrite('/home/ruben/suspicion/DATASETS/hof3/camera2.png', dst_img2)
overlay = np.zeros(dst_img.shape, np.uint8) overlay = np.zeros(dst_img.shape, np.uint8)
# Fill image with red color(set each pixel to red) # Fill image with red color(set each pixel to red)
@ -503,6 +504,7 @@ def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame,
draw_track_predictions(img, track, int(track.track_id)+1, config.camera, convert_world_points_to_img_points, anim_position=anim_position) draw_track_predictions(img, track, int(track.track_id)+1, config.camera, convert_world_points_to_img_points, anim_position=anim_position)
cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1) cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1)
base_color = (255,)*3 base_color = (255,)*3
info_color = (255,255,0) info_color = (255,255,0)
predictor_color = (255,0,255) predictor_color = (255,0,255)

View file

@ -11,7 +11,7 @@ import pandas as pd
import dill import dill
import tqdm import tqdm
import argparse import argparse
from typing import List from typing import List, Optional
from trap.config import CameraAction, HomographyAction from trap.config import CameraAction, HomographyAction
from trap.frame_emitter import Camera from trap.frame_emitter import Camera
@ -22,6 +22,8 @@ from trajectron.environment import Environment, Scene, Node
from trajectron.utils import maybe_makedirs from trajectron.utils import maybe_makedirs
from trajectron.environment import derivative_of from trajectron.environment import derivative_of
from trap.utils import ImageMap
FPS = 12 FPS = 12
desired_max_time = 100 desired_max_time = 100
pred_indices = [2, 3] pred_indices = [2, 3]
@ -81,10 +83,29 @@ class TrackIteration:
# maybe_makedirs('trajectron-data') # maybe_makedirs('trajectron-data')
# for desired_source in [ 'hof2', ]:# ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']: # for desired_source in [ 'hof2', ]:# ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']:
def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int, filter_displacement:float): def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int, filter_displacement:float, map_img_path: Optional[Path]):
name += f"-nostep" if step_size == 1 else f"-step{step_size}"
name += f"-conv{smooth_window}" if smooth_tracks else f"-nosmooth"
name += f"-f{filter_displacement}" if filter_displacement > 0 else ""
name += "-map" if map_img_path else "-nomap"
name += f"-{datetime.date.today()}" name += f"-{datetime.date.today()}"
print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}") print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}")
if map_img_path:
if not map_img_path.exists():
raise RuntimeError(f"Map image does not exists {map_img_path}")
type_map = {}
type_map['PEDESTRIAN'] = ImageMap(
map_img_path,
camera.H,
f"Map from {map_img_path.name}"
)
else:
type_map = None
nl = 0 nl = 0
l = 0 l = 0
data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']]) data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
@ -221,7 +242,8 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
last_ts = max([n.last_timestep for n in nodes]) last_ts = max([n.last_timestep for n in nodes])
# print(sorted([n.first_timestep for n in nodes])) # print(sorted([n.first_timestep for n in nodes]))
scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None) # TODO)) check use of maps: https://github.com/StanfordASL/Trajectron-plus-plus/issues/14
scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None, map=type_map)
scene.nodes.extend(nodes) scene.nodes.extend(nodes)
scenes.append(scene) scenes.append(scene)
# print(scene) # print(scene)
@ -271,6 +293,11 @@ def main():
# type=Path, # type=Path,
default=0, default=0,
type=float) type=float)
parser.add_argument("--map-img-path",
help="Image file representing a mask of a map (uses camera homography, assumes: 3 layers, values 0-255)",
# type=Path,
default=None,
type=Path)
args = parser.parse_args() args = parser.parse_args()
@ -285,6 +312,7 @@ def main():
args.bin_positions, args.bin_positions,
args.camera, args.camera,
args.step_size, args.step_size,
filter_displacement=args.filter_displacement filter_displacement=args.filter_displacement,
map_img_path=args.map_img_path
) )

View file

@ -198,7 +198,10 @@ def transition_path_points(path: np.array, t: float):
lengths = np.sqrt(np.sum(np.diff(path, axis=0)**2, axis=1)) lengths = np.sqrt(np.sum(np.diff(path, axis=0)**2, axis=1))
cum_lenghts = np.cumsum(lengths) cum_lenghts = np.cumsum(lengths)
# distance = cum_lenghts[-1] * t # distance = cum_lenghts[-1] * t
ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1])) # ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1]))
# print(cum_lenghts[-1])
DRAW_SPEED = 22 # fixed speed (independent of lenght) TODO)) make variable
ts = np.concatenate((np.array([0.]), cum_lenghts / DRAW_SPEED))
new_path = [path[0]] new_path = [path[0]]
for a, b, t_a, t_b in zip(path[:-1], path[1:], ts[:-1], ts[1:]): for a, b, t_a, t_b in zip(path[:-1], path[1:], ts[:-1], ts[1:]):
@ -209,7 +212,6 @@ def transition_path_points(path: np.array, t: float):
relative_t = inv_lerp(t_a, t_b, t) relative_t = inv_lerp(t_a, t_b, t)
x = lerp(a[0], b[0], relative_t) x = lerp(a[0], b[0], relative_t)
y = lerp(a[1], b[1], relative_t) y = lerp(a[1], b[1], relative_t)
print(relative_t, a , b, x, y)
new_path.append([x,y]) new_path.append([x,y])
break break
return np.array(new_path) return np.array(new_path)
@ -235,12 +237,13 @@ def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:
for pred_i, pred in enumerate(track.predictions): for pred_i, pred in enumerate(track.predictions):
pred_coords = pred #cv2.perspectiveTransform(np.array([pred]), inv_H)[0].tolist() pred_coords = pred #cv2.perspectiveTransform(np.array([pred]), inv_H)[0].tolist()
# line_points = np.concatenate(([current_point], pred_coords)) # 'current point' is amoving target # line_points = pred_coords
line_points = pred_coords line_points = np.concatenate(([current_point], pred_coords)) # 'current point' is amoving target
# print(pred_coords, current_point, line_points) # print(pred_coords, current_point, line_points)
line_points = transition_path_points(line_points, slide_t) line_points = transition_path_points(line_points, slide_t)
if convert_points: if convert_points:
line_points = convert_points(line_points) line_points = convert_points(line_points)
line_points = np.rint(line_points).astype(int)
# color = (128,0,128) if pred_i else (128,128,0) # color = (128,0,128) if pred_i else (128,128,0)
color = bgr_colors[color_index % len(bgr_colors)] color = bgr_colors[color_index % len(bgr_colors)]
@ -260,7 +263,8 @@ def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:
# start = [int(p) for p in pred_coords[ci-1]] # start = [int(p) for p in pred_coords[ci-1]]
# end = [int(p) for p in pred_coords[ci]] # end = [int(p) for p in pred_coords[ci]]
# print(np.rint(start),np.rint(end).tolist()) # print(np.rint(start),np.rint(end).tolist())
cv2.line(img, np.rint(start).astype(int), np.rint(end).astype(int), color, 1, lineType=cv2.LINE_AA) cv2.line(img, start, end, color, 1, lineType=cv2.LINE_AA)
pass
# cv2.circle(img, end, 2, color, 1, lineType=cv2.LINE_AA) # cv2.circle(img, end, 2, color, 1, lineType=cv2.LINE_AA)
def draw_trackjectron_history(img: cv2.Mat, track: Track, color_index: int, convert_points: Optional[Callable]): def draw_trackjectron_history(img: cv2.Mat, track: Track, color_index: int, convert_points: Optional[Callable]):

View file

@ -1,11 +1,12 @@
# lerp & inverse lerp from https://gist.github.com/laundmo/b224b1f4c8ef6ca5fe47e132c8deab56 # lerp & inverse lerp from https://gist.github.com/laundmo/b224b1f4c8ef6ca5fe47e132c8deab56
import linecache import linecache
import os import os
from pathlib import Path
import tracemalloc import tracemalloc
from typing import Iterable from typing import Iterable
import cv2 import cv2
import numpy as np import numpy as np
from trajectron.environment.map import GeometricMap
def lerp(a: float, b: float, t: float) -> float: def lerp(a: float, b: float, t: float) -> float:
"""Linear interpolate on the scale given by a to b, using t as the point on that scale. """Linear interpolate on the scale given by a to b, using t as the point on that scale.
@ -69,3 +70,78 @@ def display_top(snapshot: tracemalloc.Snapshot, key_type='lineno', limit=5):
print("%s other: %.1f KiB" % (len(other), size / 1024)) print("%s other: %.1f KiB" % (len(other), size / 1024))
total = sum(stat.size for stat in top_stats) total = sum(stat.size for stat in top_stats)
print("Total allocated size: %.1f KiB" % (total / 1024)) print("Total allocated size: %.1f KiB" % (total / 1024))
class ImageMap(GeometricMap): # TODO Implement for image maps -> watch flipped coordinate system
def __init__(self, image_path: Path, H_img_to_world: cv2.Mat, description=None):
# homography_matrix = np.loadtxt('H.txt')
# homography_matrix = H_img_to_world.copy()
# homography_matrix /= homography_matrix[2, 2] # normalise? https://github.com/StanfordASL/Trajectron-plus-plus/issues/14#issuecomment-637880857
# homography_matrix = np.linalg.inv(homography_matrix)
homography_matrix = np.array([
[100, 0,0],
[0, 100,0],
[0,0,1],
])
# RGB png image has 3 layers
img = cv2.imread(image_path).astype(np.uint8)
img_reverse = img[::-1,:,:] # origin to bottom left, instead of top-left
layers = np.transpose(img_reverse, (2, 1, 0)) # array order: layers, x, y
# layers =
#scale 255
#alternatively: morph image to world space with a scale, as in trajectron/experiments/nuscenes/process_data.py
super().__init__(layers, homography_matrix, description)
def to_map_points(self, scene_pts):
org_shape = None
if len(scene_pts.shape) > 2:
org_shape = scene_pts.shape
scene_pts = scene_pts.reshape((-1, 2))
N, dims = scene_pts.shape
points_with_one = np.ones((dims + 1, N))
points_with_one[:dims] = scene_pts.T
# map_points = np.fliplr((self.homography @ points_with_one).T[..., :dims]).astype(np.uint32)
# map_points = np.flipud((self.homography @ points_with_one).T[..., :dims]).astype(np.uint32)
map_points = (self.homography @ points_with_one).T[..., :dims].astype(np.uint32)
if org_shape is not None:
map_points = map_points.reshape(org_shape)
# print(scene_pts,'->', map_points)
# exit()
return map_points
# nuscener process_data.py
# type_map = dict()
# canvas_size = (np.round(3 * y_size).astype(int), np.round(3 * x_size).astype(int))
# homography = np.array([[3., 0., 0.], [0., 3., 0.], [0., 0., 3.]])
# layer_names = ['lane', 'road_segment', 'drivable_area', 'road_divider', 'lane_divider', 'stop_line',
# 'ped_crossing', 'stop_line', 'ped_crossing', 'walkway']
# map_mask = (nusc_map.get_map_mask(patch_box, patch_angle, layer_names, canvas_size) * 255.0).astype(
# np.uint8)
# map_mask = np.swapaxes(map_mask, 1, 2) # x axis comes first
# # PEDESTRIANS
# map_mask_pedestrian = np.stack((map_mask[9], map_mask[8], np.max(map_mask[:3], axis=0)), axis=0)
#
# type_map['PEDESTRIAN'] = GeometricMap(data=map_mask_pedestrian, homography=homography, description=', '.join(layer_names))
# Notes: map_mask is a list of masks
# map_mask = []
# _line_geom_to_mask
# def mask_for_lines(...):
# map_mask = np.zeros(canvas_size, np.uint8)
# if layer_name is 'traffic_light':
# return None
# for line in layer_geom:
# new_line = line.intersection(patch)
# if not new_line.is_empty:
# new_line = affinity.affine_transform(new_line,
# [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])
# new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))
# map_mask = self.mask_for_lines(new_line, map_mask)