First attempt to provide image map encoder

2024-12-27 11:28:16 +01:00 · 2024-12-27 11:28:16 +01:00 · 1033516712
commit 1033516712
parent cebe102e74
6 changed files with 418 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -17,6 +17,8 @@ These are roughly the steps to go from datagathering to training
 3. Run the tracker, e.g. `poetry run tracker --detector ultralytics  --homography ../DATASETS/NAME/homography.json   --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json  --save-for-training EXPERIMENTS/raw/NAME/`
    * Note: You can run this right of the camera stream: `poetry run tracker --eval_device cuda:0 --detector ultralytics   --video-src  rtsp://USER:PW@ADDRESS/STREAM --homography ../DATASETS/NAME/homography.json   --calibration ../DATASETS/NAME/calibration.json  --save-for-training EXPERIMENTS/raw/NAME/`, each recording adding a new file to the `raw` folder.
 4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME` Optionally, smooth tracks: `--smooth-tracks`
+    * Optionally, add a map: ideally a RGB png: 3 layers of 0-255
+        * `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME --smooth-tracks --camera-fps 12 --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --filter-displacement 2 --map-img-path ../DATASETS/NAME/map.png`
 5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data `
 6. The run!
    * On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr 100.69.123.91 --eval_device cuda:0 --detector ultralytics  --homography ../DATASETS/NAME/homography.json --eval_data_dict EXPERIMENTS/trajectron-data/hof2s-m_test.pkl  --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions  --smooth-tracks --num-samples 3  --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor)
--- a/test_trajectron_maps.ipynb
+++ b/test_trajectron_maps.ipynb
--- a/trap/cv_renderer.py
+++ b/trap/cv_renderer.py
@ -394,8 +394,6 @@ class CvRenderer:

            img = decorate_frame(frame, tracker_frame, prediction_frame, first_time, self.config, self.tracks, self.predictions)

-            img_path = (self.config.output_dir / f"{i:05d}.png").resolve()
-
            logger.debug(f"write frame {frame.time - first_time:.3f}s")
            if self.out_writer:
                self.out_writer.write(img)
@ -403,6 +401,7 @@ class CvRenderer:
                self.streaming_process.stdin.write(img.tobytes())
            if self.config.render_window:
                cv2.imshow('frame',cv2.resize(img, (1920, 1080)))
+                # cv2.imshow('frame',img)
                cv2.waitKey(1)

            # clear out old tracks & predictions:
@ -466,6 +465,8 @@ def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame,
    
    undistorted_img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx)
    dst_img = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H),(config.camera.w,config.camera.h))
+    # dst_img2 = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H), None)
+    # cv2.imwrite('/home/ruben/suspicion/DATASETS/hof3/camera2.png', dst_img2)
    
    overlay = np.zeros(dst_img.shape, np.uint8)
    # Fill image with red color(set each pixel to red)
@ -503,6 +504,7 @@ def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame,
            draw_track_predictions(img, track, int(track.track_id)+1, config.camera, convert_world_points_to_img_points, anim_position=anim_position)
            cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1)
        
+
    base_color = (255,)*3
    info_color = (255,255,0)
    predictor_color = (255,0,255)
--- a/trap/process_data.py
+++ b/trap/process_data.py
@ -11,7 +11,7 @@ import pandas as pd
 import dill
 import tqdm
 import argparse
-from typing import List 
+from typing import List, Optional 

 from trap.config import CameraAction, HomographyAction
 from trap.frame_emitter import Camera
@ -22,6 +22,8 @@ from trajectron.environment import Environment, Scene, Node
 from trajectron.utils import maybe_makedirs
 from trajectron.environment import derivative_of

+from trap.utils import ImageMap
+
 FPS = 12
 desired_max_time = 100
 pred_indices = [2, 3]
@ -81,10 +83,29 @@ class TrackIteration:
 # maybe_makedirs('trajectron-data')
 # for desired_source in [ 'hof2', ]:#  ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']:

-def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int, filter_displacement:float):
+def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int, filter_displacement:float, map_img_path: Optional[Path]):
+    name += f"-nostep" if step_size == 1 else f"-step{step_size}"
+    name += f"-conv{smooth_window}" if smooth_tracks else f"-nosmooth"
+    name += f"-f{filter_displacement}" if filter_displacement > 0 else ""
+    name += "-map" if map_img_path else "-nomap"
    name += f"-{datetime.date.today()}"
+
    print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}")

+    if map_img_path:
+        if not map_img_path.exists():
+            raise RuntimeError(f"Map image does not exists {map_img_path}")
+        
+        type_map = {}
+        type_map['PEDESTRIAN'] = ImageMap(
+            map_img_path,
+            camera.H,
+            f"Map from {map_img_path.name}"
+        )
+    else:
+        type_map = None
+
+    
    nl = 0
    l = 0
    data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
@ -221,7 +242,8 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
            last_ts = max([n.last_timestep for n in nodes])

            # print(sorted([n.first_timestep for n in nodes]))
-            scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None)
+            # TODO)) check use of maps: https://github.com/StanfordASL/Trajectron-plus-plus/issues/14
+            scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None, map=type_map)
            scene.nodes.extend(nodes)
            scenes.append(scene)
            # print(scene)
@ -271,6 +293,11 @@ def main():
                    # type=Path,
                    default=0,
                    type=float)
+    parser.add_argument("--map-img-path",
+                    help="Image file representing a mask of a map (uses camera homography, assumes: 3 layers, values 0-255)",
+                    # type=Path,
+                    default=None,
+                    type=Path)
    
    args = parser.parse_args()
    
@ -285,6 +312,7 @@ def main():
        args.bin_positions,
        args.camera,
        args.step_size,
-        filter_displacement=args.filter_displacement
+        filter_displacement=args.filter_displacement,
+        map_img_path=args.map_img_path
    )

--- a/trap/tools.py
+++ b/trap/tools.py
@ -198,7 +198,10 @@ def transition_path_points(path: np.array, t: float):
    lengths = np.sqrt(np.sum(np.diff(path, axis=0)**2, axis=1))
    cum_lenghts = np.cumsum(lengths)
    # distance = cum_lenghts[-1] * t
-    ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1]))
+    # ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1]))
+    # print(cum_lenghts[-1])
+    DRAW_SPEED = 22 # fixed speed (independent of lenght) TODO)) make variable
+    ts = np.concatenate((np.array([0.]), cum_lenghts / DRAW_SPEED))
    new_path = [path[0]]

    for a, b, t_a, t_b in zip(path[:-1], path[1:], ts[:-1], ts[1:]):
@ -209,7 +212,6 @@ def transition_path_points(path: np.array, t: float):
        relative_t = inv_lerp(t_a, t_b, t)
        x = lerp(a[0], b[0], relative_t)
        y = lerp(a[1], b[1], relative_t)
-        print(relative_t, a , b, x, y)
        new_path.append([x,y])
        break
    return np.array(new_path)
@ -235,12 +237,13 @@ def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:
    
    for pred_i, pred in enumerate(track.predictions):
        pred_coords = pred #cv2.perspectiveTransform(np.array([pred]), inv_H)[0].tolist()
-        # line_points = np.concatenate(([current_point], pred_coords)) # 'current point' is amoving target
-        line_points = pred_coords
+        # line_points = pred_coords
+        line_points = np.concatenate(([current_point], pred_coords)) # 'current point' is amoving target
        # print(pred_coords, current_point, line_points)
        line_points = transition_path_points(line_points, slide_t)
        if convert_points:
            line_points = convert_points(line_points)
+        line_points = np.rint(line_points).astype(int)
        # color = (128,0,128) if pred_i else (128,128,0)

        color = bgr_colors[color_index % len(bgr_colors)]
@ -260,7 +263,8 @@ def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:
            #     start = [int(p) for p in pred_coords[ci-1]]
            # end = [int(p) for p in pred_coords[ci]]
            # print(np.rint(start),np.rint(end).tolist())
-            cv2.line(img, np.rint(start).astype(int), np.rint(end).astype(int), color, 1, lineType=cv2.LINE_AA)
+            cv2.line(img, start, end, color, 1, lineType=cv2.LINE_AA)
+            pass
            # cv2.circle(img, end, 2, color, 1, lineType=cv2.LINE_AA)

 def draw_trackjectron_history(img: cv2.Mat, track: Track, color_index: int, convert_points: Optional[Callable]):
--- a/trap/utils.py
+++ b/trap/utils.py
@ -1,11 +1,12 @@
 # lerp & inverse lerp from https://gist.github.com/laundmo/b224b1f4c8ef6ca5fe47e132c8deab56
 import linecache
 import os
+from pathlib import Path
 import tracemalloc
 from typing import Iterable
 import cv2
 import numpy as np
-
+from trajectron.environment.map import GeometricMap

 def lerp(a: float, b: float, t: float) -> float:
    """Linear interpolate on the scale given by a to b, using t as the point on that scale.
@ -69,3 +70,78 @@ def display_top(snapshot: tracemalloc.Snapshot, key_type='lineno', limit=5):
        print("%s other: %.1f KiB" % (len(other), size / 1024))
    total = sum(stat.size for stat in top_stats)
    print("Total allocated size: %.1f KiB" % (total / 1024))
+
+
+class ImageMap(GeometricMap):  # TODO Implement for image maps -> watch flipped coordinate system
+    def __init__(self, image_path: Path, H_img_to_world: cv2.Mat, description=None):
+        # homography_matrix = np.loadtxt('H.txt')
+        # homography_matrix = H_img_to_world.copy()
+        # homography_matrix /= homography_matrix[2, 2] # normalise? https://github.com/StanfordASL/Trajectron-plus-plus/issues/14#issuecomment-637880857
+        # homography_matrix = np.linalg.inv(homography_matrix)
+        homography_matrix = np.array([
+            [100, 0,0],
+            [0, 100,0],
+            [0,0,1],
+        ])
+
+        # RGB png image has 3 layers
+        img = cv2.imread(image_path).astype(np.uint8)
+        img_reverse = img[::-1,:,:] # origin to bottom left, instead of top-left
+        layers = np.transpose(img_reverse, (2, 1, 0)) # array order: layers, x, y
+        # layers = 
+
+        #scale 255
+        
+        #alternatively: morph image to world space with a scale, as in trajectron/experiments/nuscenes/process_data.py
+
+        super().__init__(layers, homography_matrix, description)
+        
+    def to_map_points(self, scene_pts):
+        org_shape = None
+        if len(scene_pts.shape) > 2:
+            org_shape = scene_pts.shape
+            scene_pts = scene_pts.reshape((-1, 2))
+        N, dims = scene_pts.shape
+        points_with_one = np.ones((dims + 1, N))
+        points_with_one[:dims] = scene_pts.T
+        # map_points = np.fliplr((self.homography @ points_with_one).T[..., :dims]).astype(np.uint32)
+        # map_points = np.flipud((self.homography @ points_with_one).T[..., :dims]).astype(np.uint32)
+        map_points = (self.homography @ points_with_one).T[..., :dims].astype(np.uint32)
+        if org_shape is not None:
+            map_points = map_points.reshape(org_shape)
+        # print(scene_pts,'->', map_points)
+        # exit()
+        return map_points
+
+
+# nuscener process_data.py
+# type_map = dict()
+# canvas_size = (np.round(3 * y_size).astype(int), np.round(3 * x_size).astype(int))
+# homography = np.array([[3., 0., 0.], [0., 3., 0.], [0., 0., 3.]])
+# layer_names = ['lane', 'road_segment', 'drivable_area', 'road_divider', 'lane_divider', 'stop_line',
+#                'ped_crossing', 'stop_line', 'ped_crossing', 'walkway']
+# map_mask = (nusc_map.get_map_mask(patch_box, patch_angle, layer_names, canvas_size) * 255.0).astype(
+#     np.uint8)
+# map_mask = np.swapaxes(map_mask, 1, 2)  # x axis comes first
+# # PEDESTRIANS
+# map_mask_pedestrian = np.stack((map_mask[9], map_mask[8], np.max(map_mask[:3], axis=0)), axis=0)
+# 
+# type_map['PEDESTRIAN'] = GeometricMap(data=map_mask_pedestrian, homography=homography, description=', '.join(layer_names))
+
+# Notes: map_mask is a list of masks
+# map_mask = []
+# _line_geom_to_mask
+# def mask_for_lines(...):
+# map_mask = np.zeros(canvas_size, np.uint8)
+
+#         if layer_name is 'traffic_light':
+#             return None
+
+#         for line in layer_geom:
+#             new_line = line.intersection(patch)
+#             if not new_line.is_empty:
+#                 new_line = affinity.affine_transform(new_line,
+#                                                      [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])
+#                 new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))
+
+#                 map_mask = self.mask_for_lines(new_line, map_mask)