First attempt to provide image map encoder

2024-12-27 11:28:16 +01:00 · 2024-12-27 11:28:16 +01:00 · 1033516712
commit 1033516712
parent cebe102e74
6 changed files with 418 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -17,6 +17,8 @@ These are roughly the steps to go from datagathering to training
 3. Run the tracker, e.g. `poetry run tracker --detector ultralytics  --homography ../DATASETS/NAME/homography.json   --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json  --save-for-training EXPERIMENTS/raw/NAME/`
    * Note: You can run this right of the camera stream: `poetry run tracker --eval_device cuda:0 --detector ultralytics   --video-src  rtsp://USER:PW@ADDRESS/STREAM --homography ../DATASETS/NAME/homography.json   --calibration ../DATASETS/NAME/calibration.json  --save-for-training EXPERIMENTS/raw/NAME/`, each recording adding a new file to the `raw` folder.
 4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME` Optionally, smooth tracks: `--smooth-tracks`
    * Optionally, add a map: ideally a RGB png: 3 layers of 0-255
        * `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME --smooth-tracks --camera-fps 12 --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --filter-displacement 2 --map-img-path ../DATASETS/NAME/map.png`
 5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data `
 6. The run!
    * On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr 100.69.123.91 --eval_device cuda:0 --detector ultralytics  --homography ../DATASETS/NAME/homography.json --eval_data_dict EXPERIMENTS/trajectron-data/hof2s-m_test.pkl  --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions  --smooth-tracks --num-samples 3  --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor)
--- a/test_trajectron_maps.ipynb
+++ b/test_trajectron_maps.ipynb
--- a/trap/cv_renderer.py
+++ b/trap/cv_renderer.py
@ -394,8 +394,6 @@ class CvRenderer:
            img = decorate_frame(frame, tracker_frame, prediction_frame, first_time, self.config, self.tracks, self.predictions)
            img_path = (self.config.output_dir / f"{i:05d}.png").resolve()
            logger.debug(f"write frame {frame.time - first_time:.3f}s")
            if self.out_writer:
                self.out_writer.write(img)
@ -403,6 +401,7 @@ class CvRenderer:
                self.streaming_process.stdin.write(img.tobytes())
            if self.config.render_window:
                cv2.imshow('frame',cv2.resize(img, (1920, 1080)))
                # cv2.imshow('frame',img)
                cv2.waitKey(1)
            # clear out old tracks & predictions:
@ -466,6 +465,8 @@ def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame,
    undistorted_img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx)
    dst_img = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H),(config.camera.w,config.camera.h))
    # dst_img2 = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H), None)
    # cv2.imwrite('/home/ruben/suspicion/DATASETS/hof3/camera2.png', dst_img2)
    overlay = np.zeros(dst_img.shape, np.uint8)
    # Fill image with red color(set each pixel to red)
@ -503,6 +504,7 @@ def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame,
            draw_track_predictions(img, track, int(track.track_id)+1, config.camera, convert_world_points_to_img_points, anim_position=anim_position)
            cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1)
    base_color = (255,)*3
    info_color = (255,255,0)
    predictor_color = (255,0,255)
--- a/trap/process_data.py
+++ b/trap/process_data.py
@ -11,7 +11,7 @@ import pandas as pd
 import dill
 import tqdm
 import argparse
-from typing import List 
+from typing import List, Optional 
 from trap.config import CameraAction, HomographyAction
 from trap.frame_emitter import Camera
@ -22,6 +22,8 @@ from trajectron.environment import Environment, Scene, Node
 from trajectron.utils import maybe_makedirs
 from trajectron.environment import derivative_of
 from trap.utils import ImageMap
 FPS = 12
 desired_max_time = 100
 pred_indices = [2, 3]
@ -81,10 +83,29 @@ class TrackIteration:
 # maybe_makedirs('trajectron-data')
 # for desired_source in [ 'hof2', ]:#  ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']:
-def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int, filter_displacement:float):
+def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int, filter_displacement:float, map_img_path: Optional[Path]):
    name += f"-nostep" if step_size == 1 else f"-step{step_size}"
    name += f"-conv{smooth_window}" if smooth_tracks else f"-nosmooth"
    name += f"-f{filter_displacement}" if filter_displacement > 0 else ""
    name += "-map" if map_img_path else "-nomap"
    name += f"-{datetime.date.today()}"
    print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}")
    if map_img_path:
        if not map_img_path.exists():
            raise RuntimeError(f"Map image does not exists {map_img_path}")
        type_map = {}
        type_map['PEDESTRIAN'] = ImageMap(
            map_img_path,
            camera.H,
            f"Map from {map_img_path.name}"
        )
    else:
        type_map = None
    nl = 0
    l = 0
    data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
@ -221,7 +242,8 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
            last_ts = max([n.last_timestep for n in nodes])
            # print(sorted([n.first_timestep for n in nodes]))
-            scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None)
+            # TODO)) check use of maps: https://github.com/StanfordASL/Trajectron-plus-plus/issues/14
            scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None, map=type_map)
            scene.nodes.extend(nodes)
            scenes.append(scene)
            # print(scene)
@ -271,6 +293,11 @@ def main():
                    # type=Path,
                    default=0,
                    type=float)
    parser.add_argument("--map-img-path",
                    help="Image file representing a mask of a map (uses camera homography, assumes: 3 layers, values 0-255)",
                    # type=Path,
                    default=None,
                    type=Path)
    args = parser.parse_args()
@ -285,6 +312,7 @@ def main():
        args.bin_positions,
        args.camera,
        args.step_size,
-        filter_displacement=args.filter_displacement
+        filter_displacement=args.filter_displacement,
        map_img_path=args.map_img_path
    )
--- a/trap/tools.py
+++ b/trap/tools.py
@ -198,7 +198,10 @@ def transition_path_points(path: np.array, t: float):
    lengths = np.sqrt(np.sum(np.diff(path, axis=0)**2, axis=1))
    cum_lenghts = np.cumsum(lengths)
    # distance = cum_lenghts[-1] * t
-    ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1]))
+    # ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1]))
    # print(cum_lenghts[-1])
    DRAW_SPEED = 22 # fixed speed (independent of lenght) TODO)) make variable
    ts = np.concatenate((np.array([0.]), cum_lenghts / DRAW_SPEED))
    new_path = [path[0]]
    for a, b, t_a, t_b in zip(path[:-1], path[1:], ts[:-1], ts[1:]):
@ -209,7 +212,6 @@ def transition_path_points(path: np.array, t: float):
        relative_t = inv_lerp(t_a, t_b, t)
        x = lerp(a[0], b[0], relative_t)
        y = lerp(a[1], b[1], relative_t)
        print(relative_t, a , b, x, y)
        new_path.append([x,y])
        break
    return np.array(new_path)
@ -235,12 +237,13 @@ def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:
    for pred_i, pred in enumerate(track.predictions):
        pred_coords = pred #cv2.perspectiveTransform(np.array([pred]), inv_H)[0].tolist()
-        # line_points = np.concatenate(([current_point], pred_coords)) # 'current point' is amoving target
+        # line_points = pred_coords
-        line_points = pred_coords
+        line_points = np.concatenate(([current_point], pred_coords)) # 'current point' is amoving target
        # print(pred_coords, current_point, line_points)
        line_points = transition_path_points(line_points, slide_t)
        if convert_points:
            line_points = convert_points(line_points)
        line_points = np.rint(line_points).astype(int)
        # color = (128,0,128) if pred_i else (128,128,0)
        color = bgr_colors[color_index % len(bgr_colors)]
@ -260,7 +263,8 @@ def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:
            #     start = [int(p) for p in pred_coords[ci-1]]
            # end = [int(p) for p in pred_coords[ci]]
            # print(np.rint(start),np.rint(end).tolist())
-            cv2.line(img, np.rint(start).astype(int), np.rint(end).astype(int), color, 1, lineType=cv2.LINE_AA)
+            cv2.line(img, start, end, color, 1, lineType=cv2.LINE_AA)
            pass
            # cv2.circle(img, end, 2, color, 1, lineType=cv2.LINE_AA)
 def draw_trackjectron_history(img: cv2.Mat, track: Track, color_index: int, convert_points: Optional[Callable]):
--- a/trap/utils.py
+++ b/trap/utils.py
@ -1,11 +1,12 @@
 # lerp & inverse lerp from https://gist.github.com/laundmo/b224b1f4c8ef6ca5fe47e132c8deab56
 import linecache
 import os
 from pathlib import Path
 import tracemalloc
 from typing import Iterable
 import cv2
 import numpy as np
-
+from trajectron.environment.map import GeometricMap
 def lerp(a: float, b: float, t: float) -> float:
    """Linear interpolate on the scale given by a to b, using t as the point on that scale.
@ -69,3 +70,78 @@ def display_top(snapshot: tracemalloc.Snapshot, key_type='lineno', limit=5):
        print("%s other: %.1f KiB" % (len(other), size / 1024))
    total = sum(stat.size for stat in top_stats)
    print("Total allocated size: %.1f KiB" % (total / 1024))
 class ImageMap(GeometricMap):  # TODO Implement for image maps -> watch flipped coordinate system
    def __init__(self, image_path: Path, H_img_to_world: cv2.Mat, description=None):
        # homography_matrix = np.loadtxt('H.txt')
        # homography_matrix = H_img_to_world.copy()
        # homography_matrix /= homography_matrix[2, 2] # normalise? https://github.com/StanfordASL/Trajectron-plus-plus/issues/14#issuecomment-637880857
        # homography_matrix = np.linalg.inv(homography_matrix)
        homography_matrix = np.array([
            [100, 0,0],
            [0, 100,0],
            [0,0,1],
        ])
        # RGB png image has 3 layers
        img = cv2.imread(image_path).astype(np.uint8)
        img_reverse = img[::-1,:,:] # origin to bottom left, instead of top-left
        layers = np.transpose(img_reverse, (2, 1, 0)) # array order: layers, x, y
        # layers = 
        #scale 255
        #alternatively: morph image to world space with a scale, as in trajectron/experiments/nuscenes/process_data.py
        super().__init__(layers, homography_matrix, description)
    def to_map_points(self, scene_pts):
        org_shape = None
        if len(scene_pts.shape) > 2:
            org_shape = scene_pts.shape
            scene_pts = scene_pts.reshape((-1, 2))
        N, dims = scene_pts.shape
        points_with_one = np.ones((dims + 1, N))
        points_with_one[:dims] = scene_pts.T
        # map_points = np.fliplr((self.homography @ points_with_one).T[..., :dims]).astype(np.uint32)
        # map_points = np.flipud((self.homography @ points_with_one).T[..., :dims]).astype(np.uint32)
        map_points = (self.homography @ points_with_one).T[..., :dims].astype(np.uint32)
        if org_shape is not None:
            map_points = map_points.reshape(org_shape)
        # print(scene_pts,'->', map_points)
        # exit()
        return map_points
 # nuscener process_data.py
 # type_map = dict()
 # canvas_size = (np.round(3 * y_size).astype(int), np.round(3 * x_size).astype(int))
 # homography = np.array([[3., 0., 0.], [0., 3., 0.], [0., 0., 3.]])
 # layer_names = ['lane', 'road_segment', 'drivable_area', 'road_divider', 'lane_divider', 'stop_line',
 #                'ped_crossing', 'stop_line', 'ped_crossing', 'walkway']
 # map_mask = (nusc_map.get_map_mask(patch_box, patch_angle, layer_names, canvas_size) * 255.0).astype(
 #     np.uint8)
 # map_mask = np.swapaxes(map_mask, 1, 2)  # x axis comes first
 # # PEDESTRIANS
 # map_mask_pedestrian = np.stack((map_mask[9], map_mask[8], np.max(map_mask[:3], axis=0)), axis=0)
 # 
 # type_map['PEDESTRIAN'] = GeometricMap(data=map_mask_pedestrian, homography=homography, description=', '.join(layer_names))
 # Notes: map_mask is a list of masks
 # map_mask = []
 # _line_geom_to_mask
 # def mask_for_lines(...):
 # map_mask = np.zeros(canvas_size, np.uint8)
 #         if layer_name is 'traffic_light':
 #             return None
 #         for line in layer_geom:
 #             new_line = line.intersection(patch)
 #             if not new_line.is_empty:
 #                 new_line = affinity.affine_transform(new_line,
 #                                                      [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])
 #                 new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))
 #                 map_mask = self.mask_for_lines(new_line, map_mask)