369 lines
13 KiB
Python
369 lines
13 KiB
Python
from collections import defaultdict
|
|
import datetime
|
|
from pathlib import Path
|
|
from random import seed, shuffle
|
|
import sys
|
|
import os
|
|
import time
|
|
from xml.dom.pulldom import default_bufsize
|
|
from attr import dataclass
|
|
import cv2
|
|
import numpy as np
|
|
import pandas as pd
|
|
import dill
|
|
import tqdm
|
|
import argparse
|
|
from typing import Dict, List, Optional
|
|
|
|
from trap.base import Track
|
|
from trap.config import CameraAction, HomographyAction
|
|
from trap.frame_emitter import Camera
|
|
from trap.tracker import FinalDisplacementFilter, Smoother, TrackReader
|
|
|
|
#sys.path.append("../../")
|
|
from trajectron.environment import Environment, Scene, Node
|
|
from trajectron.utils import maybe_makedirs
|
|
from trajectron.environment import derivative_of
|
|
|
|
from trap.utils import ImageMap
|
|
|
|
FPS = 12
|
|
desired_max_time = 100
|
|
pred_indices = [2, 3]
|
|
state_dim = 6
|
|
frame_diff = 10
|
|
desired_frame_diff = 1
|
|
dt = 1/FPS # dt per frame (e.g. 1/FPS)
|
|
smooth_window = FPS # see also tracker.py
|
|
min_track_length = 20
|
|
|
|
standardization = {
|
|
'PEDESTRIAN': {
|
|
'position': {
|
|
'x': {'mean': 0, 'std': 1},
|
|
'y': {'mean': 0, 'std': 1}
|
|
},
|
|
'velocity': {
|
|
'x': {'mean': 0, 'std': 2},
|
|
'y': {'mean': 0, 'std': 2}
|
|
},
|
|
'acceleration': {
|
|
'x': {'mean': 0, 'std': 1},
|
|
'y': {'mean': 0, 'std': 1}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
class RollingAverage():
|
|
def __init__(self):
|
|
self.v = 0
|
|
self.n = 0
|
|
|
|
def add(self, v):
|
|
self.v = (self.v * self.n + v) / (self.n +1)
|
|
self.n += 1
|
|
|
|
return self.v
|
|
|
|
|
|
@dataclass
|
|
class TrackIteration:
|
|
smooth: bool
|
|
step_size: int
|
|
step_offset: int
|
|
|
|
@classmethod
|
|
def iteration_variations(cls, smooth = True, toggle_smooth=True, sample_step_size=1):
|
|
iterations: List[TrackIteration] = []
|
|
for i in range(sample_step_size):
|
|
iterations.append(TrackIteration(smooth, sample_step_size, i))
|
|
if toggle_smooth:
|
|
iterations.append(TrackIteration(not smooth, sample_step_size, i))
|
|
return iterations
|
|
|
|
# maybe_makedirs('trajectron-data')
|
|
# for desired_source in [ 'hof2', ]:# ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']:
|
|
|
|
def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int, filter_displacement:float, map_img_path: Optional[Path]):
|
|
name += f"-nostep" if step_size == 1 else f"-step{step_size}"
|
|
name += f"-conv{smooth_window}" if smooth_tracks else f"-nosmooth"
|
|
name += f"-f{filter_displacement}" if filter_displacement > 0 else ""
|
|
name += "-map" if map_img_path else "-nomap"
|
|
name += f"-{datetime.date.today()}"
|
|
|
|
print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}")
|
|
|
|
if map_img_path:
|
|
if not map_img_path.exists():
|
|
raise RuntimeError(f"Map image does not exists {map_img_path}")
|
|
|
|
type_map = {}
|
|
# TODO)) For now, assume the map is a 100x scale of the world coordinates (i.e. 100px per meter)
|
|
# thus when we do a homography of 5px per meter, scale down by 20
|
|
map_H_path = map_img_path.with_suffix('.json')
|
|
if map_H_path.exists():
|
|
homography_matrix = np.loadtxt(map_H_path)
|
|
else:
|
|
homography_matrix = np.array([
|
|
[5, 0,0],
|
|
[0, 5,0],
|
|
[0,0,1],
|
|
]) # 100 scale
|
|
img = cv2.imread(map_img_path)
|
|
img = cv2.resize(img, (img.shape[1]//20, img.shape[0]//20))
|
|
type_map['PEDESTRIAN'] = ImageMap(
|
|
img,
|
|
homography_matrix,
|
|
f"Map from {map_img_path.name}"
|
|
)
|
|
else:
|
|
type_map = None
|
|
|
|
|
|
nl = 0
|
|
l = 0
|
|
data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
|
|
skipped_for_error = 0
|
|
created = 0
|
|
|
|
smoother = Smoother(window_len=smooth_window, convolution=True) if smooth_tracks else None
|
|
|
|
reader = TrackReader(src_dir, camera.fps)
|
|
tracks = [t for t in reader]
|
|
print(f"Unfiltered total: {len(tracks)} tracks")
|
|
if filter_displacement > 0:
|
|
filter = FinalDisplacementFilter(filter_displacement)
|
|
tracks = filter.apply(tracks, camera)
|
|
print(f"Filtered: {len(tracks)} tracks")
|
|
|
|
total = len(tracks)
|
|
bar = tqdm.tqdm(total=total)
|
|
|
|
destinations = {
|
|
'train': int(total * .8),
|
|
'val': int(total * .12),
|
|
'test': int(total * .08),
|
|
}
|
|
|
|
max_track = reader.get(str(max([int(k) for k in reader._tracks.keys()])))
|
|
max_frame_nr = max_track.history[-1].frame_nr
|
|
print(max_frame_nr)
|
|
|
|
# separate call so cursor is kept during multiple loops
|
|
# seed(123)
|
|
shuffle(tracks)
|
|
|
|
dt1 = RollingAverage()
|
|
dt2 = RollingAverage()
|
|
dt3 = RollingAverage()
|
|
dt4 = RollingAverage()
|
|
|
|
sets: Dict[str, List[Track]] = {}
|
|
offset = 0
|
|
for data_class, nr in destinations.items():
|
|
# TODO)) think of a way to shuffle while keeping scenes
|
|
sets[data_class] = tracks[offset : offset+nr]
|
|
offset += nr
|
|
|
|
|
|
print(f"Camera FPS: {camera.fps}, actual fps: {camera.fps/step_size} (or {(1/camera.fps)*step_size})")
|
|
|
|
names = {}
|
|
max_pos = 0
|
|
|
|
for data_class, nr_of_items in destinations.items():
|
|
env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization)
|
|
attention_radius = dict()
|
|
attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 2.0
|
|
env.attention_radius = attention_radius
|
|
|
|
scenes = []
|
|
split_id = f"{name}_{data_class}"
|
|
data_dict_path = dst_dir / (split_id + '.pkl')
|
|
names[data_class] = data_dict_path
|
|
# subpath = src_dir / data_class
|
|
|
|
|
|
# prev_src_file = None
|
|
# scene = None
|
|
|
|
scene_nodes = defaultdict(lambda: [])
|
|
variations = TrackIteration.iteration_variations(smooth_tracks, False, step_size)
|
|
|
|
for i, track in enumerate(sets[data_class]):
|
|
bar.update()
|
|
|
|
track_source = track.source
|
|
|
|
|
|
|
|
# if track.source != prev_src_file:
|
|
# scene =
|
|
|
|
|
|
|
|
tot = (dt1.v+dt2.v+dt3.v+dt4.v)
|
|
if tot:
|
|
bar.set_description(f"{data_dict_path.name} {track_source} ({dt1.v/tot:.4f}, {dt2.v/tot:.4f}, {dt3.v/tot:.4f}, {dt4.v/tot:.4f}) - {len(scene_nodes)}")
|
|
|
|
# for file in subpath.glob("*.txt"):]
|
|
input_data_dict = dict()
|
|
|
|
if len(track.history) < min_track_length:
|
|
continue
|
|
|
|
a = time.time()
|
|
interpolated_track = track.get_with_interpolated_history()
|
|
b = time.time()
|
|
|
|
|
|
|
|
for variation_nr, iteration_settings in enumerate(variations):
|
|
|
|
if iteration_settings.smooth:
|
|
track = smoother.smooth_track(interpolated_track)
|
|
# track = Smoother(smooth_window, False).smooth_track(track)
|
|
else:
|
|
track = interpolated_track # TODO)) Copy & move smooth outside iter loop
|
|
c = time.time()
|
|
|
|
if iteration_settings.step_size > 1:
|
|
track = track.get_sampled(iteration_settings.step_size, iteration_settings.step_offset)
|
|
# redo test, it might fall out again
|
|
if len(track.history) < min_track_length:
|
|
continue
|
|
|
|
# track.get_projected_history(H=None, camera=self.config.camera)
|
|
node = track.to_trajectron_node(camera, env)
|
|
max_pos = max(node.data.data[0][0], max_pos)
|
|
|
|
data_class = time.time()
|
|
|
|
# if center_data:
|
|
# data['pos_x'] -= cx
|
|
# data['pos_y'] -= cy
|
|
|
|
|
|
# if bin_positions:
|
|
# data['pos_x'] =np.digitize(data['pos_x'], bins=space_x)
|
|
# data['pos_y'] =np.digitize(data['pos_y'], bins=space_y)
|
|
# print(data['pos_x'])
|
|
|
|
scene_nodes[f"{track_source}_{variation_nr}"].append(node)
|
|
created+=1
|
|
|
|
e = time.time()
|
|
|
|
dt1.add(b-a)
|
|
dt2.add(c-b)
|
|
dt3.add(data_class-c)
|
|
dt4.add(e-data_class)
|
|
|
|
scene_nodes_splits = defaultdict(lambda: [])
|
|
for scene_nr, nodes in scene_nodes.items():
|
|
# Some scenes grow obscenely 'large', as in, they span many timesteps
|
|
# Even though most might be empty. Here, split the scenes into gaps
|
|
# (Hopefully this prevents OOM in training)
|
|
|
|
# nodes in order of appearance
|
|
nodes = sorted(nodes, key= lambda n: n.first_timestep)
|
|
split = 0
|
|
last_timestep = 0
|
|
for node in nodes:
|
|
if node.first_timestep > (last_timestep+5*60*camera.fps): # a little buffer of x minutes
|
|
split += 1
|
|
|
|
last_timestep = max(node.last_timestep, last_timestep)
|
|
|
|
scene_nodes_splits[f"{scene_nr}_{split}"].append(node)
|
|
|
|
for scene_nr, nodes in scene_nodes_splits.items():
|
|
first_ts = min([n.first_timestep for n in nodes])
|
|
# print(first_ts)
|
|
for node in nodes:
|
|
# print(f"set ts: {node.first_timestep} to {node.first_timestep-first_ts-1}")
|
|
node.first_timestep -= (first_ts - 1)
|
|
node._last_timestep = None # reset (should now be handled by setter)
|
|
# print(f" -- got: {node.first_timestep}")
|
|
last_ts = max([n.last_timestep for n in nodes])
|
|
first_ts = max([n.first_timestep for n in nodes])
|
|
|
|
# print(sorted([n.first_timestep for n in nodes]))
|
|
# TODO)) check use of maps: https://github.com/StanfordASL/Trajectron-plus-plus/issues/14
|
|
scene = Scene(timesteps=last_ts, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None, map=type_map)
|
|
scene.nodes.extend(nodes)
|
|
scenes.append(scene)
|
|
# print(scene_nr, scene)
|
|
|
|
# print(scene.nodes[0].first_timestep)
|
|
|
|
print(f'Processed {len(scenes)} scene with {sum([len(s.nodes) for s in scenes])} nodes for data class {data_class}')
|
|
# print("MAXIMUM!!", max_pos)
|
|
|
|
env.scenes = scenes
|
|
|
|
# print(env.scenes)
|
|
|
|
if len(scenes) > 0:
|
|
with open(data_dict_path, 'wb') as f:
|
|
dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)
|
|
|
|
# print(f"Linear: {l}")
|
|
# print(f"Non-Linear: {nl}")
|
|
print(f"error: {skipped_for_error}, used: {created}")
|
|
return names
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--src-dir", "-s", type=Path, required=True, help="Directory with tracker output in .txt files")
|
|
parser.add_argument("--dst-dir", "-d", type=Path, required=True, help="Destination directory to store parsed .pkl files (typically 'trajectron-data')")
|
|
parser.add_argument("--name", "-n", type=str, required=True, help="Identifier to prefix the output .pkl files with (result is NAME-train.pkl, NAME-test.pkl)")
|
|
parser.add_argument("--smooth-tracks", action='store_true', help=f"Enable smoother. Set to {smooth_window} frames")
|
|
parser.add_argument("--cm-to-m", action='store_true', help=f"If homography is in cm, convert tracked points to meter for beter results")
|
|
parser.add_argument("--center-data", action='store_true', help=f"Normalise around center")
|
|
parser.add_argument("--bin-positions", action='store_true', help=f"Experiment to put round positions to a grid")
|
|
parser.add_argument("--step-size", type=int, default=1, help=f"Take only every n-th point")
|
|
parser.add_argument("--camera-fps",
|
|
help="Camera FPS",
|
|
type=int,
|
|
default=12)
|
|
parser.add_argument("--homography",
|
|
help="File with homography params",
|
|
type=Path,
|
|
default='../DATASETS/VIRAT_subset_0102x/VIRAT_0102_homography_img2world.txt',
|
|
action=HomographyAction)
|
|
parser.add_argument("--calibration",
|
|
help="File with camera intrinsics and lens distortion params (calibration.json)",
|
|
# type=Path,
|
|
default=None,
|
|
action=CameraAction)
|
|
parser.add_argument("--filter-displacement",
|
|
help="Filter tracks with a final displacement less then the given value",
|
|
# type=Path,
|
|
default=0,
|
|
type=float)
|
|
parser.add_argument("--map-img-path",
|
|
help="Image file representing a mask of a map (uses camera homography, assumes: 3 layers, values 0-255)",
|
|
# type=Path,
|
|
default=None,
|
|
type=Path)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# process_data(**args.__dict__)
|
|
process_data(
|
|
args.src_dir,
|
|
args.dst_dir,
|
|
args.name,
|
|
args.smooth_tracks,
|
|
args.cm_to_m,
|
|
args.center_data,
|
|
args.bin_positions,
|
|
args.camera,
|
|
args.step_size,
|
|
filter_displacement=args.filter_displacement,
|
|
map_img_path=args.map_img_path
|
|
)
|
|
|