Split scenes on a 5 minute empty timeline. Preventing OOM on huge scenes during training (hopefully)

This commit is contained in:
Ruben van de Ven 2024-12-29 20:40:31 +01:00
parent fb83554c37
commit 49c5e66f1d

View file

@ -1,10 +1,11 @@
from collections import defaultdict from collections import defaultdict
import datetime import datetime
from pathlib import Path from pathlib import Path
from random import shuffle from random import seed, shuffle
import sys import sys
import os import os
import time import time
from xml.dom.pulldom import default_bufsize
from attr import dataclass from attr import dataclass
import cv2 import cv2
import numpy as np import numpy as np
@ -144,6 +145,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
print(max_frame_nr) print(max_frame_nr)
# separate call so cursor is kept during multiple loops # separate call so cursor is kept during multiple loops
seed(123)
shuffle(tracks) shuffle(tracks)
dt1 = RollingAverage() dt1 = RollingAverage()
@ -177,7 +179,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
# scene = None # scene = None
scene_nodes = defaultdict(lambda: []) scene_nodes = defaultdict(lambda: [])
iterations = TrackIteration.iteration_variations(smooth_tracks, False, step_size) variations = TrackIteration.iteration_variations(smooth_tracks, False, step_size)
for i, track in enumerate(sets[data_class]): for i, track in enumerate(sets[data_class]):
bar.update() bar.update()
@ -205,7 +207,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
interpolated_track = track.get_with_interpolated_history() interpolated_track = track.get_with_interpolated_history()
b = time.time() b = time.time()
for i_nr, iteration_settings in enumerate(iterations): for variation_nr, iteration_settings in enumerate(variations):
if iteration_settings.smooth: if iteration_settings.smooth:
track = smoother.smooth_track(interpolated_track) track = smoother.smooth_track(interpolated_track)
@ -235,7 +237,7 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
# data['pos_y'] =np.digitize(data['pos_y'], bins=space_y) # data['pos_y'] =np.digitize(data['pos_y'], bins=space_y)
# print(data['pos_x']) # print(data['pos_x'])
scene_nodes[f"{track_source}_{i_nr}"].append(node) scene_nodes[f"{track_source}_{variation_nr}"].append(node)
created+=1 created+=1
e = time.time() e = time.time()
@ -245,18 +247,41 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c
dt3.add(data_class-c) dt3.add(data_class-c)
dt4.add(e-data_class) dt4.add(e-data_class)
scene_nodes_splits = defaultdict(lambda: [])
for scene_nr, nodes in scene_nodes.items(): for scene_nr, nodes in scene_nodes.items():
first_ts = min([n.first_timestep for n in nodes]) # Some scenes grow obscenely 'large', as in, they span many timesteps
# Even though most might be empty. Here, split the scenes into gaps
# (Hopefully this prevents OOM in training)
# nodes in order of appearance
nodes = sorted(nodes, key= lambda n: n.first_timestep)
split = 0
last_timestep = 0
for node in nodes: for node in nodes:
if node.first_timestep > (last_timestep+5*60*camera.fps): # a little buffer of x minutes
split += 1
last_timestep = max(node.last_timestep, last_timestep)
scene_nodes_splits[f"{scene_nr}_{split}"].append(node)
for scene_nr, nodes in scene_nodes_splits.items():
first_ts = min([n.first_timestep for n in nodes])
# print(first_ts)
for node in nodes:
# print(f"set ts: {node.first_timestep} to {node.first_timestep-first_ts-1}")
node.first_timestep -= (first_ts - 1) node.first_timestep -= (first_ts - 1)
node._last_timestep = None # reset (should now be handled by setter)
# print(f" -- got: {node.first_timestep}")
last_ts = max([n.last_timestep for n in nodes]) last_ts = max([n.last_timestep for n in nodes])
first_ts = max([n.first_timestep for n in nodes])
# print(sorted([n.first_timestep for n in nodes])) # print(sorted([n.first_timestep for n in nodes]))
# TODO)) check use of maps: https://github.com/StanfordASL/Trajectron-plus-plus/issues/14 # TODO)) check use of maps: https://github.com/StanfordASL/Trajectron-plus-plus/issues/14
scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None, map=type_map) scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None, map=type_map)
scene.nodes.extend(nodes) scene.nodes.extend(nodes)
scenes.append(scene) scenes.append(scene)
# print(scene) # print(scene_nr, scene)
# print(scene.nodes[0].first_timestep) # print(scene.nodes[0].first_timestep)