Split scenes on a 5 minute empty timeline. Preventing OOM on huge scenes during training (hopefully)

This commit is contained in:
Ruben van de Ven 2024-12-29 20:40:31 +01:00
parent fb83554c37
commit 49c5e66f1d

View file

@ -1,10 +1,11 @@
from collections import defaultdict
import datetime
from pathlib import Path
from random import shuffle
from random import seed, shuffle
import sys
import os
import time
from xml.dom.pulldom import default_bufsize
from attr import dataclass
import cv2
import numpy as np
@ -144,6 +145,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
print(max_frame_nr)
# separate call so cursor is kept during multiple loops
seed(123)
shuffle(tracks)
dt1 = RollingAverage()
@ -177,7 +179,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
# scene = None
scene_nodes = defaultdict(lambda: [])
iterations = TrackIteration.iteration_variations(smooth_tracks, False, step_size)
variations = TrackIteration.iteration_variations(smooth_tracks, False, step_size)
for i, track in enumerate(sets[data_class]):
bar.update()
@ -205,7 +207,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
interpolated_track = track.get_with_interpolated_history()
b = time.time()
for i_nr, iteration_settings in enumerate(iterations):
for variation_nr, iteration_settings in enumerate(variations):
if iteration_settings.smooth:
track = smoother.smooth_track(interpolated_track)
@ -235,7 +237,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
# data['pos_y'] =np.digitize(data['pos_y'], bins=space_y)
# print(data['pos_x'])
scene_nodes[f"{track_source}_{i_nr}"].append(node)
scene_nodes[f"{track_source}_{variation_nr}"].append(node)
created+=1
e = time.time()
@ -245,18 +247,41 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
dt3.add(data_class-c)
dt4.add(e-data_class)
for scene_nr, nodes in scene_nodes.items():
first_ts = min([n.first_timestep for n in nodes])
scene_nodes_splits = defaultdict(lambda: [])
for scene_nr, nodes in scene_nodes.items():
# Some scenes grow obscenely 'large', as in, they span many timesteps
# Even though most might be empty. Here, split the scenes into gaps
# (Hopefully this prevents OOM in training)
# nodes in order of appearance
nodes = sorted(nodes, key= lambda n: n.first_timestep)
split = 0
last_timestep = 0
for node in nodes:
if node.first_timestep > (last_timestep+5*60*camera.fps): # a little buffer of x minutes
split += 1
last_timestep = max(node.last_timestep, last_timestep)
scene_nodes_splits[f"{scene_nr}_{split}"].append(node)
for scene_nr, nodes in scene_nodes_splits.items():
first_ts = min([n.first_timestep for n in nodes])
# print(first_ts)
for node in nodes:
# print(f"set ts: {node.first_timestep} to {node.first_timestep-first_ts-1}")
node.first_timestep -= (first_ts - 1)
node._last_timestep = None # reset (should now be handled by setter)
# print(f" -- got: {node.first_timestep}")
last_ts = max([n.last_timestep for n in nodes])
first_ts = max([n.first_timestep for n in nodes])
# print(sorted([n.first_timestep for n in nodes]))
# TODO)) check use of maps: https://github.com/StanfordASL/Trajectron-plus-plus/issues/14
scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None, map=type_map)
scene.nodes.extend(nodes)
scenes.append(scene)
# print(scene)
# print(scene_nr, scene)
# print(scene.nodes[0].first_timestep)