From 49c5e66f1d7135abb81b040d38d73289c9d04092 Mon Sep 17 00:00:00 2001
From: Ruben van de Ven <git@rubenvandeven.com>
Date: Sun, 29 Dec 2024 20:40:31 +0100
Subject: [PATCH] Split scenes on a 5 minute empty timeline. Preventing OOM on
 huge scenes during training (hopefully)

---
 trap/process_data.py | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/trap/process_data.py b/trap/process_data.py
index d5524da..474030f 100644
--- a/trap/process_data.py
+++ b/trap/process_data.py
@@ -1,10 +1,11 @@
 from collections import defaultdict
 import datetime
 from pathlib import Path
-from random import shuffle
+from random import seed, shuffle
 import sys
 import os
 import time
+from xml.dom.pulldom import default_bufsize
 from attr import dataclass
 import cv2
 import numpy as np
@@ -144,6 +145,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
     print(max_frame_nr)
 
     # separate call so cursor is kept during multiple loops
+    seed(123)
     shuffle(tracks)
 
     dt1 = RollingAverage()
@@ -177,7 +179,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
         # scene = None
 
         scene_nodes = defaultdict(lambda: [])
-        iterations = TrackIteration.iteration_variations(smooth_tracks, False, step_size)
+        variations = TrackIteration.iteration_variations(smooth_tracks, False, step_size)
 
         for i, track in enumerate(sets[data_class]):
                 bar.update()
@@ -205,7 +207,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
                 interpolated_track = track.get_with_interpolated_history()
                 b = time.time()
 
-                for i_nr, iteration_settings in enumerate(iterations):
+                for variation_nr, iteration_settings in enumerate(variations):
 
                     if iteration_settings.smooth:
                         track = smoother.smooth_track(interpolated_track)
@@ -235,7 +237,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
                     #     data['pos_y'] =np.digitize(data['pos_y'], bins=space_y)
                     #     print(data['pos_x'])
 
-                    scene_nodes[f"{track_source}_{i_nr}"].append(node)
+                    scene_nodes[f"{track_source}_{variation_nr}"].append(node)
                     created+=1
 
                     e = time.time()
@@ -245,18 +247,41 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
                     dt3.add(data_class-c)
                     dt4.add(e-data_class)
 
-        for scene_nr, nodes in scene_nodes.items():            
-            first_ts = min([n.first_timestep for n in nodes])
+        scene_nodes_splits = defaultdict(lambda: [])
+        for scene_nr, nodes in scene_nodes.items():
+            # Some scenes grow obscenely 'large', as in, they span many timesteps
+            # Even though most might be empty. Here, split the scenes into gaps
+            # (Hopefully this prevents OOM in training)
+
+            # nodes in order of appearance
+            nodes = sorted(nodes, key= lambda n: n.first_timestep)
+            split = 0
+            last_timestep = 0
             for node in nodes:
+                if node.first_timestep > (last_timestep+5*60*camera.fps): # a little buffer of x minutes
+                    split += 1
+                
+                last_timestep = max(node.last_timestep, last_timestep)
+
+                scene_nodes_splits[f"{scene_nr}_{split}"].append(node)
+
+        for scene_nr, nodes in scene_nodes_splits.items():
+            first_ts = min([n.first_timestep for n in nodes])
+            # print(first_ts)
+            for node in nodes:
+                # print(f"set ts: {node.first_timestep} to {node.first_timestep-first_ts-1}")
                 node.first_timestep -= (first_ts - 1)
+                node._last_timestep = None # reset (should now be handled by setter)
+                # print(f" -- got: {node.first_timestep}")
             last_ts = max([n.last_timestep for n in nodes])
+            first_ts = max([n.first_timestep for n in nodes])
 
             # print(sorted([n.first_timestep for n in nodes]))
             # TODO)) check use of maps: https://github.com/StanfordASL/Trajectron-plus-plus/issues/14
             scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None, map=type_map)
             scene.nodes.extend(nodes)
             scenes.append(scene)
-            # print(scene)
+            # print(scene_nr, scene)
             
             # print(scene.nodes[0].first_timestep)