From ef24bb56f5dc1a7e64d49333e6a95cb9425abd1f Mon Sep 17 00:00:00 2001 From: Ruben van de Ven Date: Thu, 1 May 2025 21:07:13 +0200 Subject: [PATCH] Custom VAE for trajectroy prediction --- test_training.ipynb | 875 ++++++++++++++++++++++++++++++++++++++++++++ trap/base.py | 40 ++ 2 files changed, 915 insertions(+) create mode 100644 test_training.ipynb diff --git a/test_training.ipynb b/test_training.ipynb new file mode 100644 index 0000000..291e8d7 --- /dev/null +++ b/test_training.ipynb @@ -0,0 +1,875 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from copy import deepcopy\n", + "from pathlib import Path\n", + "from typing import List\n", + "\n", + "import cv2\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "from pandas import DataFrame\n", + "from torch import optim\n", + "from torch.utils.data import DataLoader, TensorDataset\n", + "from tqdm import tqdm\n", + "\n", + "from trap.frame_emitter import Camera, Track\n", + "from trap.tracker import FinalDisplacementFilter, TrackReader\n", + "from trap.utils import ImageMap\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 0. Training options" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Training options\n", + "input_seq_length = 36\n", + "output_seq_length = 36\n", + "\n", + "lr = 0.00005\n", + "num_epochs = 100\n", + "batch_size = 512\n", + "hidden_size = 32\n", + "num_gru_layers = 1\n", + "grad_clip = 1.0\n", + "scheduled_sampling_decay = 10\n", + "dropout = 0.\n", + "\n", + "# As opposed to point-wise (assumes Gaussian)\n", + "# probabilistic = True\n", + "\n", + "# use_attention = True\n", + "\n", + "path = Path(\"EXPERIMENTS/raw/hof3/\")\n", + "calibration_path = Path(\"../DATASETS/hof3/calibration.json\")\n", + "homography_path = Path(\"../DATASETS/hof3/homography.json\")\n", + "device = device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", + "camera = Camera.from_paths(calibration_path, homography_path, 12)\n", + "\n", + "# when using a map encoder:\n", + "image_path = Path(\"../DATASETS/hof3/map-undistorted-H-2.png\")\n", + "assert image_path.exists()\n", + "\n", + "CACHE_DIR = Path(\"/tmp/cache-custom-rnn\")\n", + "cache_path = Path(CACHE_DIR)\n", + "cache_path.mkdir(parents=True, exist_ok=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Data loading" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded 2379\n" + ] + } + ], + "source": [ + "from trap.tracker import Smoother\n", + "\n", + "\n", + "reader = TrackReader(path, camera.fps, exclude_whitelisted = False, include_blacklisted=False)\n", + "\n", + "smoother = Smoother()\n", + "# \n", + "# make sure we have all points for all tracks\n", + "tracks: List[Track] = [t.get_with_interpolated_history() for t in reader]\n", + "# t = Smoother().smooth_track(t)\n", + "track_filter = FinalDisplacementFilter(2)\n", + "tracks = track_filter.apply(tracks, camera)\n", + "tracks = [smoother.smooth_track(t) for t in tracks]\n", + "\n", + "print(f\"Loaded {len(tracks)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# split training & validation:\n", + "np.random.shuffle(tracks)\n", + "test_offset_idx = int(len(tracks) * .8)\n", + "\n", + "training_tracks, test_tracks = tracks[:test_offset_idx], tracks[test_offset_idx:]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "in_fields = [ 'x', 'y', 'dx', 'dy']\n", + "out_fields = ['dx', 'dy']" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# find_id = \"109271\"\n", + "# for i, track in enumerate(training_tracks):\n", + "# if track.track_id == find_id:\n", + "# print(i)\n", + "# break\n", + "# # print(track)\n", + "\n", + "# track.to_flat_dataframe(camera).isna().any().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/1903 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(18, 16))\n", + "plt.plot(past[:, 0], past[:, 1], 'b-o', label='Past')\n", + "plt.plot(future[:, 0], future[:, 1], 'g-o', label='Future (GT)')\n", + "\n", + "for i in range(samples.shape[0]):\n", + " plt.plot(samples[i, :, 0], samples[i, :, 1], '--', label=f'Sample {i+1}')\n", + "\n", + "plt.legend()\n", + "plt.title('Trajectory Prediction with RP-VAE Samples')\n", + "plt.xlabel('X')\n", + "plt.ylabel('Y')\n", + "plt.grid(True)\n", + "plt.axis('equal')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TODO\n", + "\n", + "* Stop prediction in model.forward() based on out of bounds. Only in inference mode.\n", + "* Duplicate this file and check other notebook for training improvements\n", + " * Error/loss metric\n", + " * Add attention to network?\n", + "* Check Trajectron paper on \"Overshoot\"\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/trap/base.py b/trap/base.py index 5676367..29052f7 100644 --- a/trap/base.py +++ b/trap/base.py @@ -308,6 +308,37 @@ class Track: pos += distance return new_path + + def get_simplified_history_with_absolute_distance(self, distance: float, camera: Camera) -> list[tuple[float, float]]: + # Similar to get_simplified_history, but with absolute world-space distance + # not the distance of the track length + + if len(self.history) < 1: + return [] + + + path = self.get_projected_history(H=None, camera=camera) + new_path: List[dict] = [path[0]] + + distance_sq = distance**2 + + for a, b in zip(path[:-1], path[1:]): + # check if segment has our next point (pos) + # because running sequentially, this is if point b + # is lower then our target position + b_distance_sq = ((b[0]-new_path[0])**2 + (b[1]-new_path[1])**2) + + if b_distance_sq <= distance_sq: + continue + + a_distance_sq = ((a[0]-new_path[0])**2 + (a[1]-new_path[1])**2) + + relative_t = inv_lerp(a_distance_sq, b_distance_sq, distance_sq) + x = lerp(a[0], b[0], relative_t) + y = lerp(a[1], b[1], relative_t) + new_path.append([x,y]) + + return new_path @@ -406,6 +437,15 @@ class Track: } return pd.DataFrame(data_dict, columns=data_columns) + + def to_flat_dataframe(self, camera: Camera) -> pd.DataFrame: + positions = self.get_projected_history(None, camera) + data = pd.DataFrame(positions, columns=['x', 'y']) + + data['dx'] = data['x'].diff() + data['dy'] = data['y'].diff() + + return data.bfill() def to_trajectron_node(self, camera: Camera, env: Environment) -> Node: node_data = self.to_dataframe(camera)