Fixes to config.json for heading derivates and map rendering

render trajectron's cropped maps in cv renderer
Map in inference
2024-12-28 21:02:07 +01:00 · 2024-12-28 19:50:44 +01:00 · 2024-12-27 16:12:50 +01:00
12 changed files with 6738 additions and 3225 deletions
--- a/EXPERIMENTS/config.json
+++ b/EXPERIMENTS/config.json
@ -0,0 +1,130 @@
+{
+    "batch_size": 512,
+    "grad_clip": 1.0,
+    "learning_rate_style": "exp",
+    "learning_rate": 0.01, 
+    "min_learning_rate": 1e-05,
+    "learning_decay_rate": 0.9999,
+    "prediction_horizon": 50,
+    "minimum_history_length": 10,
+    "maximum_history_length": 80,
+    "map_encoder": {
+        "PEDESTRIAN": {
+            "heading_state_index": [2, 3],
+            "patch_size": [
+                50,
+                10,
+                50,
+                90
+            ],
+            "map_channels": 3,
+            "hidden_channels": [
+                10,
+                20,
+                10,
+                1
+            ],
+            "output_size": 32,
+            "masks": [
+                5,
+                5,
+                5,
+                5
+            ],
+            "strides": [
+                1,
+                1,
+                1,
+                1
+            ],
+            "dropout": 0.5
+        }
+    },
+    "k": 1,
+    "k_eval": 25,
+    "kl_min": 0.07,
+    "kl_weight": 100.0,
+    "kl_weight_start": 0,
+    "kl_decay_rate": 0.99995,
+    "kl_crossover": 400,
+    "kl_sigmoid_divisor": 4,
+    "rnn_kwargs": {
+        "dropout_keep_prob": 0.75
+    },
+    "MLP_dropout_keep_prob": 0.9,
+    "enc_rnn_dim_edge": 32,
+    "enc_rnn_dim_edge_influence": 32,
+    "enc_rnn_dim_history": 32,
+    "enc_rnn_dim_future": 32,
+    "dec_rnn_dim": 128,
+    "q_z_xy_MLP_dims": null,
+    "p_z_x_MLP_dims": 32,
+    "GMM_components": 1,
+    "log_p_yt_xz_max": 6,
+    "N": 1,
+    "K": 25,
+    "tau_init": 2.0,
+    "tau_final": 0.05,
+    "tau_decay_rate": 0.997,
+    "use_z_logit_clipping": true,
+    "z_logit_clip_start": 0.05,
+    "z_logit_clip_final": 5.0,
+    "z_logit_clip_crossover": 300,
+    "z_logit_clip_divisor": 5,
+    "dynamic": {
+        "PEDESTRIAN": {
+            "name": "SingleIntegrator",
+            "distribution": false,
+            "limits": {}
+        }
+    },
+    "state": {
+        "PEDESTRIAN": {
+            "position": [
+                "x",
+                "y"
+            ],
+            "velocity": [
+                "x",
+                "y"
+            ],
+            "acceleration": [
+                "x",
+                "y"
+            ]
+        }
+    },
+    "pred_state": {
+        "PEDESTRIAN": {
+            "velocity": [
+                "x",
+                "y"
+            ]
+        }
+    },
+    "log_histograms": false,
+    "dynamic_edges": "yes",
+    "edge_state_combine_method": "sum",
+    "edge_influence_combine_method": "attention",
+    "edge_addition_filter": [
+        0.25,
+        0.5,
+        0.75,
+        1.0
+    ],
+    "edge_removal_filter": [
+        1.0,
+        0.0
+    ],
+    "offline_scene_graph": "yes",
+    "incl_robot_node": false,
+    "node_freq_mult_train": false,
+    "node_freq_mult_eval": false,
+    "scene_freq_mult_train": false,
+    "scene_freq_mult_eval": false,
+    "scene_freq_mult_viz": false,
+    "edge_encoding": true,
+    "use_map_encoding": true,
+    "augment": false,
+    "override_attention_radius": []
+}
--- a/pyproject.toml
+++ b/pyproject.toml
@ -13,7 +13,6 @@ process_data = "trap.process_data:main"
 blacklist = "trap.tools:blacklist_tracks"
 rewrite_tracks = "trap.tools:rewrite_raw_track_files"

-
 [tool.poetry.dependencies]
 python = "^3.10,<3.12,"

--- a/test_custom_rnn.ipynb
+++ b/test_custom_rnn.ipynb
--- a/test_homography.ipynb
+++ b/test_homography.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@ -15,7 +15,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@ -42,13 +42,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "from os import PathLike\n",
    "\n",
+    "\n",
+    "\n",
    "def render_projection(src, dst, img: PathLike, points = []):\n",
+    "    return render_projection_frame(src, dst, cv2.imread(str(img)), points)\n",
+    "\n",
+    "def render_projection_frame(src, dst, frame, points = []):\n",
    "    x_min = min(dst[:,0])\n",
    "    if x_min < 0:\n",
    "        dst[:,0] += x_min * -1\n",
@ -66,7 +71,7 @@
    "    H, status = cv2.findHomography(src,dst)\n",
    "    f, axes = plt.subplots(1, 2, figsize=(16,8))\n",
    "\n",
-    "    img = cv2.imread(str(img))\n",
+    "    img = frame\n",
    "    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
    "\n",
    "    for i, p in enumerate(src):\n",
@ -97,7 +102,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@ -118,7 +123,7 @@
       "       [-2.89572527e-04,  1.97232411e-03,  1.00000000e+00]])"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -141,7 +146,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@ -201,7 +206,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@ -240,7 +245,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@ -251,7 +256,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
@ -301,7 +306,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@ -346,7 +351,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@ -371,6 +376,615 @@
    "print(f\"{minx} < x < {maxx}\")\n",
    "print(f\"{miny} < y < {maxy}\")"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Expand to multiple video files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# collect all files\n",
+    "video_paths = list(Path('../DATASETS/hof/').glob(\"*.m4v\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "../DATASETS/hof/webcam20240110-4.m4v\n",
+      "[[[101 120 129]\n",
+      "  [101 120 129]\n",
+      "  [101 120 129]\n",
+      "  ...\n",
+      "  [122 110 112]\n",
+      "  [121 120 100]\n",
+      "  [123 122 102]]\n",
+      "\n",
+      " [[101 120 129]\n",
+      "  [101 120 129]\n",
+      "  [101 120 129]\n",
+      "  ...\n",
+      "  [122 110 112]\n",
+      "  [121 120 100]\n",
+      "  [123 122 102]]\n",
+      "\n",
+      " [[100 119 128]\n",
+      "  [101 120 129]\n",
+      "  [101 120 129]\n",
+      "  ...\n",
+      "  [128 112 110]\n",
+      "  [128 120 101]\n",
+      "  [130 122 103]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[172 184 189]\n",
+      "  [172 184 189]\n",
+      "  [172 184 189]\n",
+      "  ...\n",
+      "  [149 203 245]\n",
+      "  [149 203 245]\n",
+      "  [149 203 245]]\n",
+      "\n",
+      " [[172 184 189]\n",
+      "  [172 184 189]\n",
+      "  [172 184 189]\n",
+      "  ...\n",
+      "  [151 203 245]\n",
+      "  [151 203 245]\n",
+      "  [151 203 245]]\n",
+      "\n",
+      " [[172 184 189]\n",
+      "  [172 184 189]\n",
+      "  [172 184 189]\n",
+      "  ...\n",
+      "  [151 203 245]\n",
+      "  [151 203 245]\n",
+      "  [151 203 245]]]\n",
+      "../DATASETS/hof/webcam20231103-4.m4v\n",
+      "[[[172 164 145]\n",
+      "  [172 164 145]\n",
+      "  [166 162 152]\n",
+      "  ...\n",
+      "  [146 125 104]\n",
+      "  [146 125 104]\n",
+      "  [146 125 104]]\n",
+      "\n",
+      " [[172 164 145]\n",
+      "  [172 164 145]\n",
+      "  [166 162 152]\n",
+      "  ...\n",
+      "  [146 125 104]\n",
+      "  [146 125 104]\n",
+      "  [146 125 104]]\n",
+      "\n",
+      " [[172 162 148]\n",
+      "  [172 162 148]\n",
+      "  [168 162 150]\n",
+      "  ...\n",
+      "  [146 125 104]\n",
+      "  [146 125 104]\n",
+      "  [146 125 104]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[194 220 232]\n",
+      "  [194 220 232]\n",
+      "  [194 220 232]\n",
+      "  ...\n",
+      "  [209 217 214]\n",
+      "  [209 217 214]\n",
+      "  [209 217 214]]\n",
+      "\n",
+      " [[192 222 234]\n",
+      "  [192 222 234]\n",
+      "  [192 222 234]\n",
+      "  ...\n",
+      "  [205 216 217]\n",
+      "  [205 216 217]\n",
+      "  [205 216 217]]\n",
+      "\n",
+      " [[193 223 235]\n",
+      "  [193 223 235]\n",
+      "  [193 223 235]\n",
+      "  ...\n",
+      "  [205 216 217]\n",
+      "  [205 216 217]\n",
+      "  [205 216 217]]]\n",
+      "../DATASETS/hof/webcam20231103-2.m4v\n",
+      "[[[180 173 165]\n",
+      "  [180 173 165]\n",
+      "  [180 173 165]\n",
+      "  ...\n",
+      "  [158 132 107]\n",
+      "  [158 132 107]\n",
+      "  [158 132 107]]\n",
+      "\n",
+      " [[180 173 165]\n",
+      "  [180 173 165]\n",
+      "  [180 173 165]\n",
+      "  ...\n",
+      "  [158 132 107]\n",
+      "  [158 132 107]\n",
+      "  [158 132 107]]\n",
+      "\n",
+      " [[181 174 166]\n",
+      "  [179 172 164]\n",
+      "  [180 173 165]\n",
+      "  ...\n",
+      "  [156 130 105]\n",
+      "  [156 130 105]\n",
+      "  [156 130 105]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[195 212 221]\n",
+      "  [195 212 221]\n",
+      "  [195 212 221]\n",
+      "  ...\n",
+      "  [208 213 211]\n",
+      "  [208 213 211]\n",
+      "  [208 213 211]]\n",
+      "\n",
+      " [[197 215 229]\n",
+      "  [197 215 229]\n",
+      "  [197 215 229]\n",
+      "  ...\n",
+      "  [206 214 213]\n",
+      "  [206 214 213]\n",
+      "  [206 214 213]]\n",
+      "\n",
+      " [[199 217 231]\n",
+      "  [199 217 231]\n",
+      "  [199 217 231]\n",
+      "  ...\n",
+      "  [206 214 213]\n",
+      "  [206 214 213]\n",
+      "  [206 214 213]]]\n",
+      "../DATASETS/hof/webcam20231103-3.m4v\n",
+      "[[[185 177 165]\n",
+      "  [185 177 165]\n",
+      "  [181 176 168]\n",
+      "  ...\n",
+      "  [156 142 135]\n",
+      "  [156 142 135]\n",
+      "  [156 142 135]]\n",
+      "\n",
+      " [[185 177 165]\n",
+      "  [185 177 165]\n",
+      "  [181 176 168]\n",
+      "  ...\n",
+      "  [156 142 135]\n",
+      "  [156 142 135]\n",
+      "  [156 142 135]]\n",
+      "\n",
+      " [[188 177 168]\n",
+      "  [188 177 168]\n",
+      "  [184 177 169]\n",
+      "  ...\n",
+      "  [156 142 135]\n",
+      "  [156 142 135]\n",
+      "  [156 142 135]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[189 225 233]\n",
+      "  [189 225 233]\n",
+      "  [189 225 233]\n",
+      "  ...\n",
+      "  [211 219 223]\n",
+      "  [211 219 223]\n",
+      "  [211 219 223]]\n",
+      "\n",
+      " [[197 228 225]\n",
+      "  [197 228 225]\n",
+      "  [197 228 225]\n",
+      "  ...\n",
+      "  [208 220 225]\n",
+      "  [208 220 225]\n",
+      "  [208 220 225]]\n",
+      "\n",
+      " [[197 228 225]\n",
+      "  [197 228 225]\n",
+      "  [197 228 225]\n",
+      "  ...\n",
+      "  [208 220 225]\n",
+      "  [208 220 225]\n",
+      "  [208 220 225]]]\n",
+      "../DATASETS/hof/webcam20240619-1.m4v\n",
+      "\tNo homography for ../DATASETS/hof/webcam20240619-1.m4v\n",
+      "[[[106 105 115]\n",
+      "  [108 107 117]\n",
+      "  [112 111 121]\n",
+      "  ...\n",
+      "  [214 178 141]\n",
+      "  [228 187 146]\n",
+      "  [229 188 147]]\n",
+      "\n",
+      " [[105 104 114]\n",
+      "  [107 106 116]\n",
+      "  [111 110 120]\n",
+      "  ...\n",
+      "  [215 182 144]\n",
+      "  [228 187 146]\n",
+      "  [228 187 146]]\n",
+      "\n",
+      " [[104 103 113]\n",
+      "  [105 104 114]\n",
+      "  [109 108 118]\n",
+      "  ...\n",
+      "  [224 187 148]\n",
+      "  [227 187 149]\n",
+      "  [226 186 148]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[146 133 122]\n",
+      "  [146 133 122]\n",
+      "  [146 133 122]\n",
+      "  ...\n",
+      "  [173 214 240]\n",
+      "  [175 214 240]\n",
+      "  [175 214 240]]\n",
+      "\n",
+      " [[147 134 123]\n",
+      "  [147 134 123]\n",
+      "  [147 134 123]\n",
+      "  ...\n",
+      "  [177 220 234]\n",
+      "  [179 219 234]\n",
+      "  [179 219 234]]\n",
+      "\n",
+      " [[149 136 125]\n",
+      "  [149 136 125]\n",
+      "  [149 136 125]\n",
+      "  ...\n",
+      "  [179 218 235]\n",
+      "  [181 218 235]\n",
+      "  [181 218 235]]]\n",
+      "../DATASETS/hof/webcam20240110-2.m4v\n",
+      "[[[190 227 226]\n",
+      "  [190 227 226]\n",
+      "  [190 227 226]\n",
+      "  ...\n",
+      "  [173 159 152]\n",
+      "  [183 167 159]\n",
+      "  [188 172 164]]\n",
+      "\n",
+      " [[190 227 226]\n",
+      "  [190 227 226]\n",
+      "  [190 227 226]\n",
+      "  ...\n",
+      "  [174 160 153]\n",
+      "  [182 166 158]\n",
+      "  [186 170 162]]\n",
+      "\n",
+      " [[190 227 226]\n",
+      "  [190 227 226]\n",
+      "  [190 227 226]\n",
+      "  ...\n",
+      "  [183 165 155]\n",
+      "  [186 167 154]\n",
+      "  [185 166 153]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[223 223 223]\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]\n",
+      "  ...\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]]\n",
+      "\n",
+      " [[224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  ...\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]]\n",
+      "\n",
+      " [[224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  ...\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]]]\n",
+      "../DATASETS/hof/webcam20240111-2.m4v\n",
+      "[[[ 62  77 100]\n",
+      "  [ 59  74  97]\n",
+      "  [ 62  77 100]\n",
+      "  ...\n",
+      "  [147 127  90]\n",
+      "  [150 130  93]\n",
+      "  [145 125  88]]\n",
+      "\n",
+      " [[ 75  90 113]\n",
+      "  [ 66  81 104]\n",
+      "  [ 62  77 100]\n",
+      "  ...\n",
+      "  [145 125  88]\n",
+      "  [147 127  90]\n",
+      "  [143 123  86]]\n",
+      "\n",
+      " [[ 83  91 108]\n",
+      "  [ 74  82  99]\n",
+      "  [ 70  78  95]\n",
+      "  ...\n",
+      "  [147 127  90]\n",
+      "  [150 130  93]\n",
+      "  [145 125  88]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[123 121 112]\n",
+      "  [123 121 112]\n",
+      "  [123 121 112]\n",
+      "  ...\n",
+      "  [177 178 165]\n",
+      "  [177 178 165]\n",
+      "  [177 178 165]]\n",
+      "\n",
+      " [[123 121 112]\n",
+      "  [123 121 112]\n",
+      "  [123 121 112]\n",
+      "  ...\n",
+      "  [174 172 155]\n",
+      "  [174 172 155]\n",
+      "  [174 172 155]]\n",
+      "\n",
+      " [[123 121 112]\n",
+      "  [123 121 112]\n",
+      "  [123 121 112]\n",
+      "  ...\n",
+      "  [172 170 153]\n",
+      "  [172 170 153]\n",
+      "  [172 170 153]]]\n",
+      "../DATASETS/hof/webcam20240111-1.m4v\n",
+      "[[[ 64  81 111]\n",
+      "  [ 61  78 108]\n",
+      "  [ 53  70 100]\n",
+      "  ...\n",
+      "  [151 138  86]\n",
+      "  [148 135  83]\n",
+      "  [147 134  82]]\n",
+      "\n",
+      " [[ 66  83 113]\n",
+      "  [ 62  79 109]\n",
+      "  [ 54  71 101]\n",
+      "  ...\n",
+      "  [151 138  86]\n",
+      "  [148 135  83]\n",
+      "  [147 134  82]]\n",
+      "\n",
+      " [[ 76  89 110]\n",
+      "  [ 72  85 106]\n",
+      "  [ 64  77  98]\n",
+      "  ...\n",
+      "  [151 138  86]\n",
+      "  [148 135  83]\n",
+      "  [147 134  82]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[127 126 115]\n",
+      "  [127 126 115]\n",
+      "  [127 126 115]\n",
+      "  ...\n",
+      "  [178 177 164]\n",
+      "  [178 177 164]\n",
+      "  [178 177 164]]\n",
+      "\n",
+      " [[127 126 115]\n",
+      "  [127 126 115]\n",
+      "  [127 126 115]\n",
+      "  ...\n",
+      "  [179 169 155]\n",
+      "  [178 168 154]\n",
+      "  [178 168 154]]\n",
+      "\n",
+      " [[127 126 115]\n",
+      "  [127 126 115]\n",
+      "  [127 126 115]\n",
+      "  ...\n",
+      "  [176 166 152]\n",
+      "  [175 165 151]\n",
+      "  [175 165 151]]]\n",
+      "../DATASETS/hof/webcam20240110-3.m4v\n",
+      "[[[174 201 215]\n",
+      "  [174 201 215]\n",
+      "  [173 200 214]\n",
+      "  ...\n",
+      "  [160 159 153]\n",
+      "  [163 165 158]\n",
+      "  [165 167 160]]\n",
+      "\n",
+      " [[175 202 216]\n",
+      "  [175 202 216]\n",
+      "  [174 201 215]\n",
+      "  ...\n",
+      "  [161 160 154]\n",
+      "  [163 165 158]\n",
+      "  [164 166 159]]\n",
+      "\n",
+      " [[178 205 219]\n",
+      "  [178 205 219]\n",
+      "  [177 204 218]\n",
+      "  ...\n",
+      "  [164 159 151]\n",
+      "  [165 160 152]\n",
+      "  [165 160 152]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  ...\n",
+      "  [220 223 223]\n",
+      "  [220 223 223]\n",
+      "  [220 223 223]]\n",
+      "\n",
+      " [[224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  ...\n",
+      "  [220 223 223]\n",
+      "  [220 223 223]\n",
+      "  [220 223 223]]\n",
+      "\n",
+      " [[224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  ...\n",
+      "  [220 223 223]\n",
+      "  [220 223 223]\n",
+      "  [220 223 223]]]\n",
+      "../DATASETS/hof/webcam20240110-1.m4v\n",
+      "[[[224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  ...\n",
+      "  [190 158 136]\n",
+      "  [197 158 137]\n",
+      "  [198 159 138]]\n",
+      "\n",
+      " [[224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  ...\n",
+      "  [191 159 137]\n",
+      "  [199 160 139]\n",
+      "  [199 160 139]]\n",
+      "\n",
+      " [[224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  ...\n",
+      "  [192 160 138]\n",
+      "  [194 159 138]\n",
+      "  [194 159 138]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[223 223 223]\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]\n",
+      "  ...\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]]\n",
+      "\n",
+      " [[223 223 223]\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]\n",
+      "  ...\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]]\n",
+      "\n",
+      " [[223 223 223]\n",
+      "  [223 223 223]\n",
+      "  [223 223 223]\n",
+      "  ...\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]\n",
+      "  [224 224 224]]]\n",
+      "../DATASETS/hof/webcam20240111-3.m4v\n",
+      "[[[ 65  83 103]\n",
+      "  [ 60  78  98]\n",
+      "  [ 60  78  98]\n",
+      "  ...\n",
+      "  [152 132  90]\n",
+      "  [152 132  90]\n",
+      "  [152 132  90]]\n",
+      "\n",
+      " [[ 67  85 105]\n",
+      "  [ 62  80 100]\n",
+      "  [ 59  77  97]\n",
+      "  ...\n",
+      "  [151 131  89]\n",
+      "  [151 131  89]\n",
+      "  [151 131  89]]\n",
+      "\n",
+      " [[ 78  92 106]\n",
+      "  [ 70  84  98]\n",
+      "  [ 64  78  92]\n",
+      "  ...\n",
+      "  [151 131  89]\n",
+      "  [149 129  87]\n",
+      "  [149 129  87]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[129 125 115]\n",
+      "  [129 125 115]\n",
+      "  [129 125 115]\n",
+      "  ...\n",
+      "  [177 178 167]\n",
+      "  [177 178 167]\n",
+      "  [177 178 167]]\n",
+      "\n",
+      " [[129 125 115]\n",
+      "  [129 125 115]\n",
+      "  [129 125 115]\n",
+      "  ...\n",
+      "  [180 174 162]\n",
+      "  [180 174 162]\n",
+      "  [180 174 162]]\n",
+      "\n",
+      " [[129 125 115]\n",
+      "  [129 125 115]\n",
+      "  [129 125 115]\n",
+      "  ...\n",
+      "  [179 173 161]\n",
+      "  [179 173 161]\n",
+      "  [179 173 161]]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "for video_path in video_paths:\n",
+    "    print(video_path)\n",
+    "    video = cv2.VideoCapture(str(video_path))\n",
+    "    fps = video.get(cv2.CAP_PROP_FPS)\n",
+    "    target_frame_duration = 1./fps\n",
+    "    if '-' in video_path.stem:\n",
+    "        path_stem = video_path.stem[:video_path.stem.rfind('-')]\n",
+    "    else:\n",
+    "        path_stem = video_path.stem\n",
+    "    path_stem += \"-homography\"\n",
+    "    homography_path = video_path.with_stem(path_stem).with_suffix('.txt')\n",
+    "    if homography_path.exists():\n",
+    "        #print(f'Found custom homography file! Using {homography_path}')\n",
+    "        video_H = np.loadtxt(homography_path, delimiter=',')\n",
+    "    else:\n",
+    "        print(f\"\\tNo homography for {video_path}\")\n",
+    "\n",
+    "    _, frame = video.read()\n",
+    "    render_projection_frame()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
--- a/test_training_data.ipynb
+++ b/test_training_data.ipynb
--- a/test_trajectron_maps.ipynb
+++ b/test_trajectron_maps.ipynb
--- a/trap/cv_renderer.py
+++ b/trap/cv_renderer.py
@ -503,6 +503,23 @@ def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame,
            anim_position = get_animation_position(track, frame)
            draw_track_predictions(img, track, int(track.track_id)+1, config.camera, convert_world_points_to_img_points, anim_position=anim_position)
            cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1)
+        if prediction_frame.maps:
+            for i, m in enumerate(prediction_frame.maps):
+                map_img = np.ascontiguousarray(np.flipud(np.transpose(m[0], (2, 1, 0))*255), np.uint8)
+                cv2.circle(map_img, (10,50), 5, (0,255,0), 2)
+                cv2.line(map_img, (10,50), (10+15, 50), (0,0,255), 2)
+                cv2.rectangle(map_img, (0,0), (map_img.shape[1]-1, map_img.shape[0]-1), (255,255,255), 1)
+
+                height, width, _ = map_img.shape
+                padding= 50
+                y = img.shape[0] - padding - height
+                x = width*i
+
+                if x+width > img.shape[1]:
+                    break # stop drawing maps when there's a lot of them
+
+                img[y:y+height,x:x+width] = map_img
+                
        

    base_color = (255,)*3
--- a/trap/frame_emitter.py
+++ b/trap/frame_emitter.py
@ -406,6 +406,7 @@ class Frame:
    tracks: Optional[dict[str, Track]] = None
    H: Optional[np.array] = None
    camera: Optional[Camera] = None
+    maps: Optional[List[cv2.Mat]] = None

    def aslist(self) -> [dict]:
        return { t.track_id:
@ -421,7 +422,7 @@ class Frame:
        }

    def without_img(self):
-        return Frame(self.index, None, self.time, self.tracks, self.H, self.camera)
+        return Frame(self.index, None, self.time, self.tracks, self.H, self.camera, self.maps)

 def video_src_from_config(config) -> UrlOrPath:
    if config.video_loop:
--- a/trap/prediction_server.py
+++ b/trap/prediction_server.py
@ -67,7 +67,7 @@ def create_online_env(env, hyperparams, scene_idx, init_timestep):
                       robot_type=env.robot_type)


-def get_maps_for_input(input_dict, scene, hyperparams):
+def get_maps_for_input(input_dict, scene, hyperparams, device):
    scene_maps = list()
    scene_pts = list()
    heading_angles = list()
@ -81,15 +81,18 @@ def get_maps_for_input(input_dict, scene, hyperparams):
                heading_state_index = me_hyp['heading_state_index']
                # We have to rotate the map in the opposit direction of the agent to match them
                if type(heading_state_index) is list:  # infer from velocity or heading vector
-                    heading_angle = -np.arctan2(x[-1, heading_state_index[1]],
-                                                x[-1, heading_state_index[0]]) * 180 / np.pi
+                    # heading_angle = -np.arctan2(x[-1, heading_state_index[1]],
+                    #                             x[-1, heading_state_index[0]]) * 180 / np.pi
+                    heading_angle = -np.arctan2(x[heading_state_index[1]],
+                                                x[heading_state_index[0]]) * 180 / np.pi
                else:
                    heading_angle = -x[-1, heading_state_index] * 180 / np.pi
            else:
                heading_angle = None

            scene_map = scene.map[node.type]
-            map_point = x[-1, :2]
+            # map_point = x[-1, :2]
+            map_point = x[:2]

            patch_size = hyperparams['map_encoder'][node.type]['patch_size']

@ -104,12 +107,14 @@ def get_maps_for_input(input_dict, scene, hyperparams):
    else:
        heading_angles = torch.Tensor(heading_angles)

+    # print(scene_maps, patch_sizes, heading_angles)
    maps = scene_maps[0].get_cropped_maps_from_scene_map_batch(scene_maps,
                                                               scene_pts=torch.Tensor(scene_pts),
                                                               patch_size=patch_sizes[0],
-                                                               rotation=heading_angles)
+                                                               rotation=heading_angles,
+                                                               device='cpu')

-    maps_dict = {node: maps[[i]] for i, node in enumerate(nodes_with_maps)}
+    maps_dict = {node: maps[[i]].to(device) for i, node in enumerate(nodes_with_maps)}
    return maps_dict


@ -207,7 +212,7 @@ class PredictionServer:
        # hyperparams['maximum_history_length'] = 12 # test

        logger.info(f"Use hyperparams: {hyperparams=}")
-
+        
        with open(self.config.eval_data_dict, 'rb') as f:
            eval_env = dill.load(f, encoding='latin1')

@ -222,6 +227,7 @@ class PredictionServer:
        init_timestep = 2

        eval_scene = eval_env.scenes[scene_idx]
+        logger.info(f"Basing online env on {eval_scene=} -- loaded from {self.config.eval_data_dict}")
        online_env = create_online_env(eval_env, hyperparams, scene_idx, init_timestep)

        # auto-find highest iteration
@ -370,7 +376,8 @@ class PredictionServer:

            maps = None
            if hyperparams['use_map_encoding']:
-                maps = get_maps_for_input(input_dict, eval_scene, hyperparams)
+                maps = get_maps_for_input(input_dict, eval_scene, hyperparams, device=self.config.eval_device)
+                
            # print(maps)

            # robot_present_and_future = None
@ -472,6 +479,8 @@ class PredictionServer:
            
            if self.config.smooth_predictions:
                frame = self.smoother.smooth_frame_predictions(frame)
+            
+            frame.maps = list([m.cpu().numpy() for m in maps.values()]) if maps else None

            self.send_frame(frame)
            
--- a/trap/process_data.py
+++ b/trap/process_data.py
@ -6,6 +6,7 @@ import sys
 import os
 import time
 from attr import dataclass
+import cv2
 import numpy as np
 import pandas as pd
 import dill
@ -97,9 +98,18 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
            raise RuntimeError(f"Map image does not exists {map_img_path}")
        
        type_map = {}
+        # TODO)) For now, assume the map is a 100x scale of the world coordinates (i.e. 100px per meter)
+        # thus when we do a homography of 5px per meter, scale down by 20
+        homography_matrix = np.array([
+                [5, 0,0],
+                [0, 5,0],
+                [0,0,1],
+                ]) # 100 scale
+        img = cv2.imread(map_img_path)
+        img = cv2.resize(img, (img.shape[1]//20, img.shape[0]//20))
        type_map['PEDESTRIAN'] = ImageMap(
-            map_img_path,
-            camera.H,
+            img,
+            homography_matrix,
            f"Map from {map_img_path.name}"
        )
    else:
--- a/trap/tools.py
+++ b/trap/tools.py
@ -200,7 +200,7 @@ def transition_path_points(path: np.array, t: float):
    # distance = cum_lenghts[-1] * t
    # ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1]))
    # print(cum_lenghts[-1])
-    DRAW_SPEED = 22 # fixed speed (independent of lenght) TODO)) make variable
+    DRAW_SPEED = 30 # fixed speed (independent of lenght) TODO)) make variable
    ts = np.concatenate((np.array([0.]), cum_lenghts / DRAW_SPEED))
    new_path = [path[0]]

@ -263,7 +263,7 @@ def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:
            #     start = [int(p) for p in pred_coords[ci-1]]
            # end = [int(p) for p in pred_coords[ci]]
            # print(np.rint(start),np.rint(end).tolist())
-            cv2.line(img, start, end, color, 1, lineType=cv2.LINE_AA)
+            cv2.line(img, start, end, color, 2, lineType=cv2.LINE_AA)
            pass
            # cv2.circle(img, end, 2, color, 1, lineType=cv2.LINE_AA)

--- a/trap/utils.py
+++ b/trap/utils.py
@ -73,21 +73,19 @@ def display_top(snapshot: tracemalloc.Snapshot, key_type='lineno', limit=5):


 class ImageMap(GeometricMap):  # TODO Implement for image maps -> watch flipped coordinate system
-    def __init__(self, image_path: Path, H_img_to_world: cv2.Mat, description=None):
+    def __init__(self, img: cv2.Mat, H_world_to_map: cv2.Mat, description=None):
        # homography_matrix = np.loadtxt('H.txt')
        # homography_matrix = H_img_to_world.copy()
        # homography_matrix /= homography_matrix[2, 2] # normalise? https://github.com/StanfordASL/Trajectron-plus-plus/issues/14#issuecomment-637880857
        # homography_matrix = np.linalg.inv(homography_matrix)
-        homography_matrix = np.array([
-            [100, 0,0],
-            [0, 100,0],
-            [0,0,1],
-        ])
+        homography_matrix = H_world_to_map

        # RGB png image has 3 layers
-        img = cv2.imread(image_path).astype(np.uint8)
+        img = img.astype(np.uint8)
+        # img = cv2.resize(img, (img.shape[1]//10, img.shape[0]//10))
        img_reverse = img[::-1,:,:] # origin to bottom left, instead of top-left
-        layers = np.transpose(img_reverse, (2, 1, 0)) # array order: layers, x, y
+        layers = np.transpose(img, (2, 1, 0)) # array order: layers, x, y 
+        layers = layers.copy() # copy to apply negative stride
        # layers = 

        #scale 255
Author	SHA1	Message	Date
Ruben van de Ven	fb83554c37	Fixes to config.json for heading derivates and map rendering	2024-12-28 21:02:07 +01:00
Ruben van de Ven	e9defb211f	render trajectron's cropped maps in cv renderer	2024-12-28 19:50:44 +01:00
Ruben van de Ven	212712be76	Map in inference	2024-12-27 16:12:50 +01:00