Initial public release.

2020-01-13 10:55:45 -08:00 · 2020-01-13 10:55:45 -08:00 · 3588a3ebea
commit 3588a3ebea
54 changed files with 6099 additions and 0 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "data/nuScenes/nuscenes-devkit"]
+	path = data/nuScenes/nuscenes-devkit
+	url = https://github.com/nutonomy/nuscenes-devkit
--- a/README.md
+++ b/README.md
@ -0,0 +1,56 @@
+# Trajectron++: Multi-Agent Generative Trajectory Forecasting With Heterogeneous Data for Control #
+
+This repository contains the code for [Trajectron++: Multi-Agent Generative Trajectory Forecasting With Heterogeneous Data for Control](https://arxiv.org/abs/2001.03093) by Tim Salzmann\*, Boris Ivanovic\*, Punarjay Chakravarty, and Marco Pavone (\* denotes equal contribution).
+
+Specifically, this branch is for the Trajectron++ applied to the nuScenes autonomous driving dataset.
+
+## Installation ##
+
+### Note about Submodules ###
+When cloning this branch, make sure you clone the submodules as well, with the following command:
+```
+git clone --recurse-submodules <repository cloning URL>
+```
+Alternatively, you can clone the repository as normal and then load submodules later with:
+```
+git submodule init # Initializing our local configuration file
+git submodule update # Fetching all of the data from the submodules at the specified commits
+```
+
+### Environment Setup ###
+
+First, we'll create a conda environment to hold the dependencies.
+```
+conda create --name trajectron++ python=3.6 -y
+source activate trajectron++
+pip install -r requirements.txt
+```
+
+Then, since this project uses IPython notebooks, we'll install this conda environment as a kernel.
+```
+python -m ipykernel install --user --name trajectron++ --display-name "Python 3.6 (Trajectron++)"
+```
+
+Now, you can start a Jupyter session and view/run all the notebooks in `code/notebooks` with
+```
+jupyter notebook
+```
+
+When you're done, don't forget to deactivate the conda environment with
+```
+source deactivate
+```
+
+## Scripts ##
+
+Run any of these with a `-h` or `--help` flag to see all available command arguments.
+* `code/train.py` - Trains a new Trajectron++ model.
+* `code/notebooks/run_eval.bash` - Evaluates the performance of the Trajectron++. This script mainly collects evaluation data, which can then be visualized with `code/notebooks/NuScenes Quantitative.ipynb`.
+* `data/nuScenes/process_nuScenes.py` - Processes the nuScenes dataset into a format that the Trajectron++ can directly work with, following our internal structures for handling data (see `code/data` for more information).
+* `code/notebooks/NuScenes Qualitative.ipynb` - Visualizes the predictions that the Trajectron++ makes.
+
+## Datasets ##
+
+A sample of fully-processed scenes from the nuScenes dataset are available in this repository, in `data/processed`.
+
+If you want the *original* nuScenes dataset, you can find it here: [nuScenes Dataset](https://www.nuscenes.org/).
--- a/code/config.json
+++ b/code/config.json
@ -0,0 +1,108 @@
+{
+
+  "batch_size": 256,
+  "grad_clip": 1.0,
+
+  "learning_rate_style": "exp",
+  "learning_rate": 0.002,
+  "min_learning_rate": 0.0005,
+  "learning_decay_rate": 0.9995,
+
+  "prediction_horizon": 6,
+  "minimum_history_length": 1,
+  "maximum_history_length": 8,
+
+  "map_context": 120,
+  "map_enc_num_layers": 4,
+  "map_enc_hidden_size": 512,
+  "map_enc_output_size": 512,
+  "map_enc_dropout": 0.5,
+
+  "alpha": 1,
+  "k": 30,
+  "k_eval": 200,
+  "use_iwae": false,
+  "kl_exact": true,
+  
+  "kl_min": 0.07,
+  "kl_weight": 5.0,
+  "kl_weight_start": 0,
+  "kl_decay_rate": 0.99995,
+  "kl_crossover": 500,
+  "kl_sigmoid_divisor": 4,
+
+  "inf_warmup": 1.0,
+  "inf_warmup_start": 1.0,
+  "inf_warmup_crossover": 1500,
+  "inf_warmup_sigmoid_divisor": 4,
+  
+  "rnn_kwargs": {
+    "dropout_keep_prob": 0.5
+  },
+  "MLP_dropout_keep_prob": 0.9,
+  "rnn_io_dropout_keep_prob": 1.0,
+  "enc_rnn_dim_multiple_inputs": 8,
+  "enc_rnn_dim_edge": 8,
+  "enc_rnn_dim_edge_influence": 8,
+  "enc_rnn_dim_history": 32,
+  "enc_rnn_dim_future": 32,
+  "dec_rnn_dim": 512,
+  "dec_GMM_proj_MLP_dims": null,
+  "sample_model_during_dec": true,
+  "dec_sample_model_prob_start": 1.00,
+  "dec_sample_model_prob_final": 1.00,
+  "dec_sample_model_prob_crossover": 200,
+  "dec_sample_model_prob_divisor": 4,
+  "q_z_xy_MLP_dims": null,
+  "p_z_x_MLP_dims": 32,
+  "fuzz_factor": 0.05,
+  "GMM_components": 12,
+  "log_sigma_min": -10,
+  "log_sigma_max": 10,
+  "log_p_yt_xz_max": 50,
+  "N": 2,
+  "K": 5,
+  "tau_init": 2.0,
+  "tau_final": 0.05,
+  "tau_decay_rate": 0.997,
+  "use_z_logit_clipping": true,
+  "z_logit_clip_start": 0.05,
+  "z_logit_clip_final": 5.0,
+  "z_logit_clip_crossover": 500,
+  "z_logit_clip_divisor": 5,
+
+  "state": {
+    "PEDESTRIAN": {
+      "position": ["x", "y"],
+      "velocity": ["x", "y"],
+      "acceleration": ["x", "y"],
+      "heading": ["value"]
+    },
+    "BICYCLE": {
+      "position": ["x", "y"],
+      "velocity": ["x", "y", "m"],
+      "acceleration": ["x", "y", "m"],
+      "heading": ["value"]
+    },
+    "VEHICLE": {
+      "position": ["x", "y"],
+      "velocity": ["x", "y", "m"],
+      "acceleration": ["x", "y", "m"],
+      "heading": ["value"]
+    }
+  },
+
+  "pred_state": {
+    "PEDESTRIAN": {
+      "velocity": ["x", "y"]
+    },
+    "BICYCLE": {
+      "velocity": ["x", "y"]
+    },
+    "VEHICLE": {
+      "velocity": ["x", "y"]
+    }
+  },
+
+  "log_histograms": false
+}
--- a/code/data/init.py
+++ b/code/data/init.py
@ -0,0 +1,5 @@
+from .data_structures import Position, Velocity, Acceleration, Orientation, Map, ActuatorAngle, Scalar
+from .scene import Scene
+from .node import Node, BicycleNode
+from .scene_graph import TemporalSceneGraph
+from .environment import Environment
--- a/code/data/data_structures.py
+++ b/code/data/data_structures.py
@ -0,0 +1,163 @@
+import numpy as np
+from scipy.ndimage.interpolation import rotate
+
+
+class MotionEntity(object):
+    def __init__(self, x, y, z=None):
+        self.x = x
+        self.y = y
+        self.z = z
+        self.m = None
+
+    @property
+    def l(self):
+        if self.z is not None:
+            return np.linalg.norm(np.vstack((self.x, self.y, self.z)), axis=0)
+        else:
+            return np.linalg.norm(np.vstack((self.x, self.y)), axis=0)
+
+
+class Position(MotionEntity):
+    pass
+
+
+class Velocity(MotionEntity):
+    @staticmethod
+    def from_position(position, dt=1):
+        dx = np.zeros_like(position.x) * np.nan
+        dx[~np.isnan(position.x)] = np.gradient(position.x[~np.isnan(position.x)], dt)
+
+        dy = np.zeros_like(position.y) * np.nan
+        dy[~np.isnan(position.y)] = np.gradient(position.y[~np.isnan(position.y)], dt)
+
+        if position.z is not None:
+            dz = np.zeros_like(position.z) * np.nan
+            dz[~np.isnan(position.z)] = np.gradient(position.z[~np.isnan(position.z)], dt)
+        else:
+            dz = None
+
+        return Velocity(dx, dy, dz)
+
+
+class Acceleration(MotionEntity):
+    @staticmethod
+    def from_velocity(velocity, dt=1):
+        ddx = np.zeros_like(velocity.x) * np.nan
+        ddx[~np.isnan(velocity.x)] = np.gradient(velocity.x[~np.isnan(velocity.x)], dt)
+
+        ddy = np.zeros_like(velocity.y) * np.nan
+        ddy[~np.isnan(velocity.y)] = np.gradient(velocity.y[~np.isnan(velocity.y)], dt)
+
+        if velocity.z is not None:
+            ddz = np.zeros_like(velocity.z) * np.nan
+            ddz[~np.isnan(velocity.z)] = np.gradient(velocity.z[~np.isnan(velocity.z)], dt)
+        else:
+            ddz = None
+
+        return Acceleration(ddx, ddy, ddz)
+
+
+class ActuatorAngle(object):
+    def __init__(self):
+        pass
+
+
+class Scalar(object):
+    def __init__(self, value):
+        self.value = value
+        self.derivative = None
+
+# TODO Finish
+class Orientation(object):
+    def __init__(self, x, y, z, w):
+        self.x = x
+        self.y = y
+        self.z = z
+        self.w = w
+
+
+class Map(object):
+    def __init__(self, data=None, homography=None, description=None, data_file=""):
+        self.data = data
+        self.homography = homography
+        self.description = description
+        self.uint = False
+        self.data_file = data_file
+        self.rotated_maps_origin = None
+        self.rotated_maps = None
+        if self.data.dtype == np.uint8:
+            self.uint = True
+
+    @property
+    def fdata(self):
+        if self.uint:
+            return self.data / 255.
+        else:
+            return self.data
+
+    def to_map_points(self, world_pts):
+        org_shape = None
+        if len(world_pts.shape) > 2:
+            org_shape = world_pts.shape
+            world_pts = world_pts.reshape((-1, 2))
+        N, dims = world_pts.shape
+        points_with_one = np.ones((dims + 1, N))
+        points_with_one[:dims] = world_pts.T
+        map_points = (self.homography @ points_with_one).T[..., :dims] # TODO There was np.fliplr here for pedestrian dataset. WHY?
+        if org_shape is not None:
+            map_points = map_points.reshape(org_shape)
+        return map_points
+
+    def to_rotated_map_points(self, world_pts, rotation_angle):
+        rotation_rad = -rotation_angle * np.pi / 180
+        rot_mat = np.array([[np.cos(rotation_rad), np.sin(rotation_rad), 0.],
+                            [-np.sin(rotation_rad), np.cos(rotation_rad), 0.],
+                            [0., 0., 1.]])
+        org_map_points = self.to_map_points(world_pts) + 1
+
+        org_shape = None
+        if len(org_map_points.shape) > 2:
+            org_shape = org_map_points.shape
+            org_map_points = org_map_points.reshape((-1, 2))
+        N, dims = org_map_points.shape
+        points_with_one = np.ones((dims + 1, N))
+        points_with_one[:dims] = org_map_points.T
+        org_map_pts_rot = (rot_mat @ points_with_one).T[..., :dims]
+        if org_shape is not None:
+            org_map_pts_rot = org_map_pts_rot.reshape(org_shape)
+
+        map_pts_rot = self.rotated_maps_origin + org_map_pts_rot
+        return map_pts_rot
+
+    def calculate_rotations(self):
+        org_shape = self.data.shape
+        l = (np.ceil(np.sqrt(org_shape[0]**2 + org_shape[1]**2)) * 2).astype(int) + 1
+        rotated_maps = np.zeros((360, l, l, org_shape[2]), dtype=np.uint8)
+        o = np.array([l // 2, l // 2])
+        rotated_maps[0, o[0]+1:o[0]+org_shape[0]+1, o[1]+1:o[1]+org_shape[1]+1] = self.data
+        for i in range(1, 360):
+            rotated_maps[i] = rotate(rotated_maps[0], reshape=False, angle=i, prefilter=False)
+        rotated_maps[0] = rotate(rotated_maps[0], reshape=False, angle=0, prefilter=False)
+        self.rotated_maps_origin = o
+        self.rotated_maps = rotated_maps
+
+    # def __getstate__(self):
+    #     with open(self.data_file, 'w') as f:
+    #         np.save(f, self.rotated_maps)
+    #     self.rotated_maps = None
+    #     state = self.__dict__.copy()
+    #     return state
+    #
+    # def __setstate__(self, state):
+    #     self.__dict__.update(state)
+    #     with open(self.data_file, 'r') as f:
+    #         self.rotated_maps = np.load(f)
+
+if __name__ == "__main__":
+    img = np.zeros((103, 107, 3))
+    img[57, 84] = 255.
+    homography = np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
+    m = Map(data=img, homography=homography)
+    m.calculate_rotations()
+    t = m.to_rotated_map_points(np.array([[57, 84]]), 0).astype(int)
+    print(m.rotated_maps[0, t[0, 0], t[0, 1]])
--- a/code/data/environment.py
+++ b/code/data/environment.py
@ -0,0 +1,74 @@
+import numpy as np
+from enum import Enum
+from itertools import product
+
+
+class Environment(object):
+    def __init__(self, node_type_list, standardization, scenes=None, attention_radius=None):
+        self.scenes = scenes
+        self.node_type_list = node_type_list
+        self.attention_radius = attention_radius
+        self.NodeType = Enum('NodeType', node_type_list)
+
+        self.standardization = standardization
+
+    def get_edge_types(self):
+        return [e for e in product([node_type for node_type in self.NodeType], repeat=2)]
+
+    def edge_type_str(self, edge_type):
+        return edge_type[0].name + '-' + edge_type[1].name
+
+    def get_standardize_params(self, state, node_type):
+        standardize_mean_list = list()
+        standardize_std_list = list()
+        for entity, dims in state.items():
+            for dim in dims:
+                standardize_mean_list.append(self.standardization[node_type.name][entity][dim]['mean'])
+                standardize_std_list.append(self.standardization[node_type.name][entity][dim]['std'])
+        standardize_mean = np.stack(standardize_mean_list)
+        standardize_std = np.stack(standardize_std_list)
+
+        return standardize_mean, standardize_std
+
+    def standardize(self, array, state, node_type, mean=None, std=None):
+        if mean is None and std is None:
+            mean, std = self.get_standardize_params(state, node_type)
+        elif mean is None and std is not None:
+            mean, _ = self.get_standardize_params(state, node_type)
+        elif mean is not None and std is None:
+            _, std = self.get_standardize_params(state, node_type)
+        return np.where(np.isnan(array), np.array(np.nan), (array - mean) / std)
+
+    def unstandardize(self, array, state, node_type, mean=None, std=None):
+        if mean is None and std is None:
+            mean, std = self.get_standardize_params(state, node_type)
+        elif mean is None and std is not None:
+            mean, _ = self.get_standardize_params(state, node_type)
+        elif mean is not None and std is None:
+            _, std = self.get_standardize_params(state, node_type)
+        return array * std + mean
+
+    # These two functions have to be implemented as pickle can not handle dynamic enums
+    def __getstate__(self):
+        for scene in self.scenes:
+            for node in scene.nodes:
+                node.type = node.type.name
+        attention_radius_no_enum = dict()
+        for key, value in self.attention_radius.items():
+            attention_radius_no_enum[(key[0].name, key[1].name)] = value
+        self.attention_radius = attention_radius_no_enum
+        self.NodeType = None
+        state = self.__dict__.copy()
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self.NodeType = Enum('NodeType', self.node_type_list)
+        for scene in self.scenes:
+            for node in scene.nodes:
+                node.type = getattr(self.NodeType, node.type)
+        attention_radius_enum = dict()
+        for key, value in self.attention_radius.items():
+            attention_radius_enum[(getattr(self.NodeType, key[0]), getattr(self.NodeType, key[1]))] = value
+        self.attention_radius = attention_radius_enum
+
--- a/code/data/node.py
+++ b/code/data/node.py
@ -0,0 +1,264 @@
+import numpy as np
+from pyquaternion import Quaternion
+from . import ActuatorAngle
+from scipy.interpolate import splrep, splev, CubicSpline
+from scipy.integrate import cumtrapz
+
+
+class Node(object):
+    def __init__(self, type, position=None, velocity=None, acceleration=None, heading=None, orientation=None,
+                 length=None, width=None, height=None, first_timestep=0, is_robot=False):
+        self.type = type
+        self.position = position
+        self.heading = heading
+        self.length = length
+        self.wdith = width
+        self.height = height
+        self.orientation = orientation
+        self.velocity = velocity
+        self.acceleration = acceleration
+        self.first_timestep = first_timestep
+        self.dimensions = ['x', 'y', 'z']
+        self.is_robot = is_robot
+        self._last_timestep = None
+        self.description = ""
+
+    def __repr__(self):
+        return self.type.name
+
+    def scene_ts_to_node_ts(self, scene_ts):
+        """
+        Transforms timestamp from scene into timeframe of node data.
+        :param scene_ts: Scene timesteps
+        :return: ts: Transformed timesteps, paddingl: Number of timesteps in scene range which are not available in
+                    node data before data is available. paddingu: Number of timesteps in scene range which are not
+                    available in node data after data is available.
+        """
+        paddingl = (self.first_timestep - scene_ts[0]).clip(0)
+        paddingu = (scene_ts[1] - self.last_timestep).clip(0)
+        ts = np.array(scene_ts).clip(min=self.first_timestep, max=self.last_timestep) - self.first_timestep
+        return ts, paddingl, paddingu
+
+    def history_points_at(self, ts):
+        """
+        Number of history points in trajectory. Timestep is exclusive.
+        :param ts: Scene timestep where the number of history points are queried.
+        :return: Number of history timesteps.
+        """
+        return ts - self.first_timestep
+
+    def get_entity(self, ts_scene, entity, dims, padding=np.nan):
+        if ts_scene.size == 1:
+            ts_scene = np.array([ts_scene, ts_scene])
+        length = ts_scene[1] - ts_scene[0] + 1 # ts is inclusive
+        entity_array = np.zeros((length, len(dims))) * padding
+        ts, paddingl, paddingu = self.scene_ts_to_node_ts(ts_scene)
+        entity_array[paddingl:length - paddingu] = np.array([getattr(getattr(self, entity), d)[ts[0]:ts[1]+1] for d in dims]).T
+        return entity_array
+
+    def get(self, ts_scene, state, padding=np.nan):
+        return np.hstack([self.get_entity(ts_scene, entity, dims, padding) for entity, dims in state.items()])
+
+
+    @property
+    def timesteps(self):
+        return self.position.x.size
+
+    @property
+    def last_timestep(self):
+        if self._last_timestep is None:
+            self._last_timestep = self.first_timestep + self.timesteps - 1
+        return self._last_timestep
+
+
+class BicycleNode(Node):
+    def __init__(self, type, position=None, velocity=None, acceleration=None, heading=None, orientation=None,
+                 length=None, width=None, height=None, first_timestep=0, actuator_angle=None):
+        super().__init__(type, position=position, velocity=velocity, acceleration=acceleration, heading=heading,
+                         orientation=orientation, length=length, width=width, height=height,
+                         first_timestep=first_timestep)
+        self.actuator_angle = actuator_angle
+
+    # TODO Probably wrong. Differential of magnitude is not euqal to the  the magnitude of the differentials
+    def calculate_steering_angle_old(self, vel_tresh=0.0):
+        vel = np.linalg.norm(np.hstack((np.expand_dims(self.velocity.x, 1), np.expand_dims(self.velocity.y, 1))), axis=1)
+
+        beta = np.arctan2(self.velocity.y, self.velocity.x) - self.heading.value
+        beta[vel < vel_tresh] = 0.
+        steering_angle = np.arctan2(2 * np.sin(beta), np.cos(beta))
+        steering_angle[np.abs(steering_angle) > np.pi / 2] = 0 # Velocity Outlier
+
+        aa = ActuatorAngle()
+        aa.steering_angle = np.zeros_like(np.arctan2(2 * np.sin(beta), np.cos(beta)))
+        self.actuator_angle = aa
+
+    def calculate_steering_angle(self, dt, steering_tresh=0.0, vel_tresh=0.0):
+        t = np.arange(0, self.timesteps * dt, dt)
+        s = 0.01 * len(t)
+        #c_pos_x_g_x_tck = CubicSpline(t, np.array(pos_x_filtert))
+        #c_pos_y_g_x_tck = CubicSpline(t, np.array(pos_y_filtert))
+        #c_pos_x_g_x_tck = splrep(t, self.position.x, s=s)
+        #c_pos_y_g_x_tck = splrep(t, self.position.y, s=s)
+
+        #vel_x_g = c_pos_x_g_x_tck(t, 1)
+        #vel_y_g = c_pos_y_g_x_tck(t, 1)
+        #vel_x_g = splev(t, c_pos_x_g_x_tck, der=1)
+        #vel_y_g = splev(t, c_pos_y_g_x_tck, der=1)
+
+        vel_x_g = self.velocity.x
+        vel_y_g = self.velocity.y
+
+        v_x_ego = []
+        h = []
+        for t in range(self.timesteps):
+            dh_max = 1.2 / self.length * (np.linalg.norm(np.array([vel_x_g[t], vel_y_g[t]])))
+            heading = np.arctan2(vel_y_g[t], vel_x_g[t])
+            #if len(h) > 0 and np.abs(heading - h[-1]) > dh_max:
+            #    heading = h[-1]
+            h.append(heading)
+            q = Quaternion(axis=(0.0, 0.0, 1.0), radians=heading)
+            v_x_ego_t = q.inverse.rotate(np.array([vel_x_g[t], vel_y_g[t], 1]))[0]
+            if v_x_ego_t < 0.0:
+                v_x_ego_t = 0.
+            v_x_ego.append(v_x_ego_t)
+
+        v_x_ego = np.stack(v_x_ego, axis=0)
+        h = np.stack(h, axis=0)
+
+        dh = np.gradient(h, dt)
+
+        sa = np.arctan2(dh * self.length, v_x_ego)
+        sa[(dh == 0.) | (v_x_ego == 0.)] = 0.
+        sa = sa.clip(min=-steering_tresh, max=steering_tresh)
+
+        a = np.gradient(v_x_ego, dt)
+        # int = self.integrate_bicycle_model(np.array([a]),
+        #                                    sa,
+        #                                    np.array([h[0]]),
+        #                                    np.array([self.position.x[0],
+        #                                              self.position.y[0]]),
+        #                                    v_x_ego[0],
+        #                                    self.length, 0.5)
+        # p = np.stack((self.position.x, self.position.y), axis=1)
+        #
+        # #assert ((int[0] - p) < 1.0).all()
+
+        aa = ActuatorAngle()
+        aa.steering_angle = sa
+        self.acceleration.m = a
+        self.actuator_angle = aa
+
+    def inverse_np_gradient(self, f, dx, F0=0.):
+        N = f.shape[0]
+        l = f.shape[-1]
+        l2 = np.ceil(l / 2).astype(int)
+        return (F0 +
+                ((2 * dx) *
+                 np.c_['-1',
+                       np.r_['-1', np.zeros((N, 1)), f[..., 1:-1:2].cumsum(axis=-1)],
+                       f[..., ::2].cumsum(axis=-1) - f[..., [0]] / 2]
+                 ).reshape((N, 2, l2)).reshape(N, 2 * l2, order='F')[:, :l]
+                )
+
+    def integrate_trajectory(self, v, x0, dt):
+        xd_ = self.inverse_np_gradient(v[..., 0], dx=dt, F0=x0[0])
+        yd_ = self.inverse_np_gradient(v[..., 1], dx=dt, F0=x0[1])
+        integrated = np.stack([xd_, yd_], axis=2)
+        return integrated
+
+    def integrate_bicycle_model(self, a, sa, h0, x0, v0, l, dt):
+        v_m = self.inverse_np_gradient(a, dx=0.5, F0=v0)
+
+        dh = (np.tan(sa) / l) * v_m[0]
+        h = self.inverse_np_gradient(np.array([dh]), dx=dt, F0=h0)
+
+        vx = np.cos(h) * v_m
+        vy = np.sin(h) * v_m
+
+        v = np.stack((vx, vy), axis=2)
+        return self.integrate_trajectory(v, x0, dt)
+
+    def calculate_steering_angle_keep(self, dt, steering_tresh=0.0, vel_tresh=0.0):
+
+        vel_approx = np.linalg.norm(np.stack((self.velocity.x, self.velocity.y), axis=0), axis=0)
+        mask = np.ones_like(vel_approx)
+        mask[vel_approx < vel_tresh] = 0
+
+        t = np.arange(0, self.timesteps * dt, dt)
+        pos_x_filtert = []
+        pos_y_filtert = []
+        s = None
+        for i in range(mask.size):
+            if mask[i] == 0 and s is None:
+                s = i
+            elif mask[i] != 0 and s is not None:
+                t_start = t[s-1]
+                pos_x_start = self.position.x[s-1]
+                pos_y_start = self.position.y[s-1]
+                t_mean = t[s:i].mean()
+                pos_x_mean = self.position.x[s:i].mean()
+                pos_y_mean = self.position.y[s:i].mean()
+                t_end = t[i]
+                pos_x_end = self.position.x[i]
+                pos_y_end = self.position.y[i]
+                for step in range(s, i+1):
+                    if t[step] <= t_mean:
+                        pos_x_filtert.append(pos_x_start + ((t[step] - t_start) / (t_mean - t_start)) * (pos_x_mean - pos_x_start))
+                        pos_y_filtert.append(pos_y_start + ((t[step] - t_start) / (t_mean - t_start)) * (pos_y_mean - pos_y_start))
+                    else:
+                        pos_x_filtert.append(pos_x_mean + ((t[step] - t_end) / (t_end - t_mean)) * (pos_x_end - pos_x_mean))
+                        pos_y_filtert.append(pos_y_mean + ((t[step] - t_end) / (t_end - t_mean)) * (pos_y_end - pos_y_mean))
+                s = None
+            elif mask[i] != 0 and s is None:
+                pos_x_filtert.append(self.position.x[i].mean())
+                pos_y_filtert.append(self.position.y[i].mean())
+        if s is not None:
+            t_start = t[s - 1]
+            pos_x_start = self.position.x[s - 1]
+            pos_y_start = self.position.y[s - 1]
+            t_mean = t[s:i].max()
+            pos_x_mean = self.position.x[s:i].mean()
+            pos_y_mean = self.position.y[s:i].mean()
+            for step in range(s, i+1):
+                pos_x_filtert.append(
+                    pos_x_start + ((t[step] - t_start) / (t_mean - t_start)) * (pos_x_mean - pos_x_start))
+                pos_y_filtert.append(
+                    pos_y_start + ((t[step] - t_start) / (t_mean - t_start)) * (pos_y_mean - pos_y_start))
+
+        s = 0.001 * len(t)
+        #c_pos_x_g_x_tck = CubicSpline(t, np.array(pos_x_filtert))
+        #c_pos_y_g_x_tck = CubicSpline(t, np.array(pos_y_filtert))
+        c_pos_x_g_x_tck = splrep(t, np.array(pos_x_filtert), s=s)
+        c_pos_y_g_x_tck = splrep(t, np.array(pos_y_filtert), s=s)
+
+        #vel_x_g = c_pos_x_g_x_tck(t, 1)
+        #vel_y_g = c_pos_y_g_x_tck(t, 1)
+        vel_x_g = splev(t, c_pos_x_g_x_tck, der=1)
+        vel_y_g = splev(t, c_pos_y_g_x_tck, der=1)
+
+        v_x_ego = []
+        h = []
+        for t in range(self.timesteps):
+            dh_max = 1.19 / self.length * (np.linalg.norm(np.array([vel_x_g[t], vel_y_g[t]])))
+            heading = np.arctan2(vel_y_g[t], vel_x_g[t])
+            if len(h) > 0 and np.abs(heading - h[-1]) > dh_max:
+                heading = h[-1]
+            h.append(heading)
+            q = Quaternion(axis=(0.0, 0.0, 1.0), radians=heading)
+            v_x_ego_t = q.inverse.rotate(np.array([vel_x_g[t], vel_y_g[t], 1]))[0]
+            if v_x_ego_t < 0.0:
+                v_x_ego_t = 0.
+            v_x_ego.append(v_x_ego_t)
+
+        v_x_ego = np.stack(v_x_ego, axis=0)
+        h = np.stack(h, axis=0)
+
+        dh = np.gradient(h, dt)
+
+        sa = np.arctan2(dh * self.length, v_x_ego)
+        sa[dh == 0.] = 0.
+
+        aa = ActuatorAngle()
+        aa.steering_angle = sa
+        self.actuator_angle = aa
+
--- a/code/data/scene.py
+++ b/code/data/scene.py
@ -0,0 +1,102 @@
+import numpy as np
+from .scene_graph import TemporalSceneGraph
+
+
+class Scene(object):
+    def __init__(self, map=None, timesteps=0, dt=1, name=""):
+        self.map = map
+        self.timesteps = timesteps
+        self.dt = dt
+        self.name = name
+
+        self.nodes = []
+
+        self.robot = None
+
+        self.temporal_scene_graph = None
+
+        self.description = ""
+
+    def get_scene_graph(self, timestep, attention_radius=None, edge_addition_filter=None, edge_removal_filter=None):
+        if self.temporal_scene_graph is None:
+            timestep_range = np.array([timestep - len(edge_addition_filter), timestep + len(edge_removal_filter)])
+            node_pos_dict = dict()
+            present_nodes = self.present_nodes(np.array([timestep]))
+
+            for node in present_nodes[timestep]:
+                node_pos_dict[node] = np.squeeze(node.get(timestep_range, {'position': ['x', 'y']}))
+            tsg = TemporalSceneGraph.create_from_temp_scene_dict(node_pos_dict,
+                                                                 attention_radius,
+                                                                 duration=(len(edge_addition_filter) +
+                                                                       len(edge_removal_filter) + 1),
+                                                                 edge_addition_filter=edge_addition_filter,
+                                                                 edge_removal_filter=edge_removal_filter
+                                                                 )
+
+            return tsg.to_scene_graph(t=len(edge_addition_filter),
+                                      t_hist=len(edge_addition_filter),
+                                      t_fut=len(edge_removal_filter))
+        else:
+            return self.temporal_scene_graph.to_scene_graph(timestep,
+                                                            len(edge_addition_filter),
+                                                            len(edge_removal_filter))
+
+    def calculate_scene_graph(self, attention_radius, state, edge_addition_filter=None, edge_removal_filter=None):
+        timestep_range = np.array([0, self.timesteps-1])
+        node_pos_dict = dict()
+
+        for node in self.nodes:
+            node_pos_dict[node] = np.squeeze(node.get(timestep_range, {'position': ['x', 'y']}))
+
+        self.temporal_scene_graph = TemporalSceneGraph.create_from_temp_scene_dict(node_pos_dict,
+                                                                                   attention_radius,
+                                                                                   duration=self.timesteps,
+                                                                                   edge_addition_filter=edge_addition_filter,
+                                                                                   edge_removal_filter=edge_removal_filter)
+
+    def length(self):
+        return self.timesteps * self.dt
+
+    def present_nodes(self, timesteps, type=None, min_history_timesteps=0, min_future_timesteps=0, include_robot=True, max_nodes=None, curve=False): # TODO REMOVE
+        present_nodes = {}
+
+        picked_nodes = 0
+
+        rand_idx = np.random.choice(len(self.nodes), len(self.nodes), replace=False)
+
+        for i in rand_idx:
+            node = self.nodes[i]
+            if node.is_robot and not include_robot:
+                continue
+            if type is None or node.type == type:
+                if curve and node.type.name == 'VEHICLE':
+                    if 'curve' not in node.description and np.random.rand() > 0.1:
+                        continue
+                lower_bound = timesteps - min_history_timesteps
+                upper_bound = timesteps + min_future_timesteps
+                mask = (node.first_timestep <= lower_bound) & (upper_bound <= node.last_timestep)
+                if mask.any():
+                    timestep_indices_present = np.nonzero(mask)[0]
+                    for timestep_index_present in timestep_indices_present:
+                        if timesteps[timestep_index_present] in present_nodes.keys():
+                            present_nodes[timesteps[timestep_index_present]].append(node)
+                        else:
+                            present_nodes[timesteps[timestep_index_present]] = [node]
+                        picked_nodes += 1
+                        if max_nodes is not None and picked_nodes >= max_nodes:
+                            break
+
+            if max_nodes is not None and picked_nodes >= max_nodes:
+                break
+
+        return present_nodes
+
+    def sample_timesteps(self, batch_size, min_future_timesteps=0):
+        if batch_size > self.timesteps:
+            batch_size = self.timesteps
+        return np.random.choice(np.arange(0, self.timesteps-min_future_timesteps), size=batch_size, replace=False)
+
+    def __repr__(self):
+        return f"Scene: Duration: {self.length()}s," \
+               f" Nodes: {len(self.nodes)}," \
+               f" Map: {'Yes' if self.map is not None else 'No'}."
--- a/code/data/scene_graph.py
+++ b/code/data/scene_graph.py
@ -0,0 +1,173 @@
+import numpy as np
+from scipy.spatial.distance import pdist, squareform
+import scipy.signal as ss
+from collections import defaultdict
+import warnings
+
+
+class TemporalSceneGraph(object):
+    def __init__(self,
+                 edge_radius,
+                 nodes=None,
+                 adj_cube=np.zeros((1, 0, 0)),
+                 weight_cube=np.zeros((1, 0, 0)),
+                 node_type_mat=np.zeros((0, 0)),
+                 edge_scaling=None):
+        self.edge_radius = edge_radius
+        self.nodes = nodes
+        if nodes is None:
+            self.nodes = np.array([])
+        self.adj_cube = adj_cube
+        self.weight_cube = weight_cube
+        self.node_type_mat = node_type_mat
+        self.adj_mat = np.max(self.adj_cube, axis=0).clip(max=1.0)
+        self.edge_scaling = edge_scaling
+        self.node_index_lookup = None
+        self.calculate_node_index_lookup()
+
+    def calculate_node_index_lookup(self):
+        node_index_lookup = dict()
+        for i, node in enumerate(self.nodes):
+            node_index_lookup[node] = i
+
+        self.node_index_lookup = node_index_lookup
+
+    def get_num_edges(self, t=0):
+        return np.sum(self.adj_cube[t]) // 2
+
+    def get_index(self, node):
+        return self.node_index_lookup[node]
+
+    @staticmethod
+    def get_edge_type(n1, n2):
+        return '-'.join(sorted([str(n1), str(n2)]))
+
+    @classmethod
+    def create_from_temp_scene_dict(cls,
+                                    scene_temp_dict,
+                                    attention_radius,
+                                    duration=1,
+                                    edge_addition_filter=None,
+                                    edge_removal_filter=None):
+        """
+        Construct a spatiotemporal graph from agent positions in a dataset.
+
+        returns: sg: An aggregate SceneGraph of the dataset.
+        """
+        nodes = scene_temp_dict.keys()
+        N = len(nodes)
+        total_timesteps = duration
+
+        position_cube = np.zeros((total_timesteps, N, 2))
+
+        adj_cube = np.zeros((total_timesteps, N, N), dtype=np.int8)
+        dist_cube = np.zeros((total_timesteps, N, N), dtype=np.float)
+
+        node_type_mat = np.zeros((N, N), dtype=np.int8)
+        node_attention_mat = np.zeros((N, N), dtype=np.float)
+
+        for node_idx, node in enumerate(nodes):
+            position_cube[:, node_idx] = scene_temp_dict[node]
+            node_type_mat[:, node_idx] = node.type.value
+            for node_idx_from, node_from in enumerate(nodes):
+                node_attention_mat[node_idx_from, node_idx] = attention_radius[(node_from.type, node.type)]
+
+        np.fill_diagonal(node_type_mat, 0)
+        agg_adj_matrix = np.zeros((N, N), dtype=np.int8)
+
+        for timestep in range(position_cube.shape[0]):
+            dists = squareform(pdist(position_cube[timestep], metric='euclidean'))
+
+            # Put a 1 for all agent pairs which are closer than the edge_radius.
+            # Can produce a warning as dists can be nan if no data for node is available.
+            # This is accepted as nan <= x evaluates to False
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                adj_matrix = (dists <= node_attention_mat).astype(np.int8) * node_type_mat
+
+            # Remove self-loops.
+            np.fill_diagonal(adj_matrix, 0)
+
+            agg_adj_matrix |= adj_matrix
+
+            adj_cube[timestep] = adj_matrix
+            dist_cube[timestep] = dists
+
+        dist_cube[np.isnan(dist_cube)] = 0.
+        weight_cube = np.divide(1.,
+                                dist_cube,
+                                out=np.zeros_like(dist_cube),
+                                where=(dist_cube > 0.))
+        edge_scaling = None
+        if edge_addition_filter is not None and edge_removal_filter is not None:
+            edge_scaling = cls.calculate_edge_scaling(adj_cube, edge_addition_filter, edge_removal_filter)
+        sg = cls(attention_radius, np.array(list(nodes)), adj_cube, weight_cube, node_type_mat, edge_scaling=edge_scaling)
+        return sg
+
+    @staticmethod
+    def calculate_edge_scaling(adj_cube, edge_addition_filter, edge_removal_filter):
+        new_edges = np.minimum(
+            ss.convolve(adj_cube, np.reshape(edge_addition_filter, (-1, 1, 1)), 'same'), 1.
+        )
+
+        old_edges = np.minimum(
+            ss.convolve(adj_cube, np.reshape(edge_removal_filter, (-1, 1, 1)), 'same'), 1.
+        )
+
+        return np.minimum(new_edges + old_edges, 1.)
+
+    def to_scene_graph(self, t, t_hist=0, t_fut=0):
+        lower_t = np.clip(t-t_hist, a_min=0, a_max=None)
+        higher_t = np.clip(t + t_fut + 1, a_min=None, a_max=self.adj_cube.shape[0] + 1)
+        adj_mat = np.max(self.adj_cube[lower_t:higher_t], axis=0)
+        weight_mat = np.max(self.weight_cube[lower_t:higher_t], axis=0)
+        return SceneGraph(self.edge_radius,
+                          self.nodes,
+                          adj_mat,
+                          weight_mat,
+                          self.node_type_mat,
+                          self.node_index_lookup,
+                          edge_scaling=self.edge_scaling[t])
+
+
+class SceneGraph(object):
+    def __init__(self,
+                 edge_radius,
+                 nodes=None,
+                 adj_mat=np.zeros((0, 0)),
+                 weight_mat=np.zeros((0, 0)),
+                 node_type_mat=np.zeros((0, 0)),
+                 node_index_lookup=None,
+                 edge_scaling=None):
+        self.edge_radius = edge_radius
+        self.nodes = nodes
+        if nodes is None:
+            self.nodes = np.array([])
+        self.node_type_mat = node_type_mat
+        self.adj_mat = adj_mat
+        self.weight_mat = weight_mat
+        self.edge_scaling = edge_scaling
+        self.node_index_lookup = node_index_lookup
+
+    def get_index(self, node):
+        return self.node_index_lookup[node]
+
+    def get_neighbors(self, node, type):
+        node_index = self.get_index(node)
+        connection_mask = self.adj_mat[node_index].astype(bool)
+        mask = ((self.node_type_mat[node_index] == type.value) * connection_mask)
+        return self.nodes[mask]
+
+    def get_edge_scaling(self, node=None):
+        if node is None:
+            return self.edge_scaling
+        else:
+            node_index = self.get_index(node)
+            return self.edge_scaling[node_index, self.adj_mat[node_index] > 0.]
+
+    def get_edge_weight(self, node=None):
+        if node is None:
+            return self.weight_mat
+        else:
+            node_index = self.get_index(node)
+            return self.weight_mat[node_index, self.adj_mat[node_index] > 0.]
--- a/code/evaluation/init.py
+++ b/code/evaluation/init.py
@ -0,0 +1 @@
+from .evaluation import compute_batch_statistics, log_batch_errors
--- a/code/evaluation/evaluation.py
+++ b/code/evaluation/evaluation.py
@ -0,0 +1,106 @@
+import numpy as np
+from scipy.interpolate import RectBivariateSpline
+from scipy.ndimage import binary_dilation
+from scipy.stats import gaussian_kde
+from utils import prediction_output_to_trajectories
+import visualization
+from matplotlib import pyplot as plt
+
+
+def compute_ade(predicted_trajs, gt_traj):
+    error = np.linalg.norm(predicted_trajs - gt_traj, axis=-1)
+    ade = np.mean(error, axis=-1)
+    return ade
+
+
+def compute_fde(predicted_trajs, gt_traj):
+    final_error = np.linalg.norm(predicted_trajs[:, -1] - gt_traj[-1], axis=-1)
+    return final_error
+
+
+def compute_kde_nll(predicted_trajs, gt_traj):
+    kde_ll = 0.
+    log_pdf_lower_bound = -20
+    num_timesteps = gt_traj.shape[0]
+
+    for timestep in range(num_timesteps): # TODO First timestep
+        kde = gaussian_kde(predicted_trajs[:, timestep].T)
+        pdf = np.clip(kde.logpdf(gt_traj[timestep].T), a_min=log_pdf_lower_bound, a_max=None)[0]
+        kde_ll += pdf / num_timesteps
+
+    return -kde_ll
+
+
+def compute_obs_violations(predicted_trajs, map):
+    obs_map = 1 - map.fdata[..., 0]
+
+    interp_obs_map = RectBivariateSpline(range(obs_map.shape[0]),
+                                         range(obs_map.shape[1]),
+                                         obs_map,
+                                         kx=1, ky=1)
+
+    old_shape = predicted_trajs.shape
+    pred_trajs_map = map.to_map_points(predicted_trajs.reshape((-1, 2)))
+
+    traj_obs_values = interp_obs_map(pred_trajs_map[:, 0], pred_trajs_map[:, 1], grid=False)
+    traj_obs_values = traj_obs_values.reshape((old_shape[0], old_shape[1]))
+    num_viol_trajs = np.sum(traj_obs_values.max(axis=1) > 0, dtype=float)
+
+    return num_viol_trajs
+
+
+def compute_batch_statistics(prediction_output_dict, dt, max_hl, ph, node_type_enum, kde=True, obs=False, map=None):
+
+    (prediction_dict,
+     _,
+     futures_dict) = prediction_output_to_trajectories(prediction_output_dict, dt, max_hl, ph)
+
+    batch_error_dict = dict()
+    for node_type in node_type_enum:
+        batch_error_dict[node_type] =  {'ade': list(), 'fde': list(), 'kde': list(), 'obs_viols': list()}
+
+    for t in prediction_dict.keys():
+        for node in prediction_dict[t].keys():
+            ade_errors = compute_ade(prediction_dict[t][node], futures_dict[t][node])
+            fde_errors = compute_fde(prediction_dict[t][node], futures_dict[t][node])
+            if kde:
+                kde_ll = compute_kde_nll(prediction_dict[t][node], futures_dict[t][node])
+            else:
+                kde_ll = 0
+            if obs:
+                obs_viols = compute_obs_violations(prediction_dict[t][node], map)
+            else:
+                obs_viols = 0
+            batch_error_dict[node.type]['ade'].extend(list(ade_errors))
+            batch_error_dict[node.type]['fde'].extend(list(fde_errors))
+            batch_error_dict[node.type]['kde'].extend([kde_ll])
+            batch_error_dict[node.type]['obs_viols'].extend([obs_viols])
+
+    return batch_error_dict
+
+
+def log_batch_errors(batch_errors_list, log_writer, namespace, curr_iter, bar_plot=[], box_plot=[]):
+    for node_type in batch_errors_list[0].keys():
+        for metric in batch_errors_list[0][node_type].keys():
+            metric_batch_error = []
+            for batch_errors in batch_errors_list:
+                metric_batch_error.extend(batch_errors[node_type][metric])
+
+            if len(metric_batch_error) > 0:
+                log_writer.add_histogram(f"{node_type.name}/{namespace}/{metric}", metric_batch_error, curr_iter)
+                log_writer.add_scalar(f"{node_type.name}/{namespace}/{metric}_mean", np.mean(metric_batch_error), curr_iter)
+                log_writer.add_scalar(f"{node_type.name}/{namespace}/{metric}_median", np.median(metric_batch_error), curr_iter)
+
+                if metric in bar_plot:
+                    pd = {'dataset': [namespace] * len(metric_batch_error),
+                                  metric: metric_batch_error}
+                    kde_barplot_fig, ax = plt.subplots(figsize=(5, 5))
+                    visualization.visualization_utils.plot_barplots(ax, pd, 'dataset', metric)
+                    log_writer.add_figure(f"{node_type.name}/{namespace}/{metric}_bar_plot", kde_barplot_fig, curr_iter)
+
+                if metric in box_plot:
+                    mse_fde_pd = {'dataset': [namespace] * len(metric_batch_error),
+                                  metric: metric_batch_error}
+                    fig, ax = plt.subplots(figsize=(5, 5))
+                    visualization.visualization_utils.plot_boxplots(ax, mse_fde_pd, 'dataset', metric)
+                    log_writer.add_figure(f"{node_type.name}/{namespace}/{metric}_box_plot", fig, curr_iter)
--- a/code/model/init.py
+++ b/code/model/init.py
--- a/code/model/components/init.py
+++ b/code/model/components/init.py
@ -0,0 +1,4 @@
+from .discrete_latent import DiscreteLatent
+from .gmm2d import GMM2D
+from .map_encoder import CNNMapEncoder
+from .additive_attention import AdditiveAttention, TemporallyBatchedAdditiveAttention
--- a/code/model/components/additive_attention.py
+++ b/code/model/components/additive_attention.py
@ -0,0 +1,67 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class AdditiveAttention(nn.Module):
+    # Implementing the attention module of Bahdanau et al. 2015 where
+    # score(h_j, s_(i-1)) = v . tanh(W_1 h_j + W_2 s_(i-1))
+    def __init__(self, encoder_hidden_state_dim, decoder_hidden_state_dim, internal_dim=None):
+        super(AdditiveAttention, self).__init__()
+
+        if internal_dim is None:
+            internal_dim = int((encoder_hidden_state_dim + decoder_hidden_state_dim) / 2)
+
+        self.w1 = nn.Linear(encoder_hidden_state_dim, internal_dim, bias=False)
+        self.w2 = nn.Linear(decoder_hidden_state_dim, internal_dim, bias=False)
+        self.v = nn.Linear(internal_dim, 1, bias=False)
+
+    def score(self, encoder_state, decoder_state):
+        # encoder_state is of shape (batch, enc_dim)
+        # decoder_state is of shape (batch, dec_dim)
+        # return value should be of shape (batch, 1)
+        return self.v(torch.tanh(self.w1(encoder_state) + self.w2(decoder_state)))
+
+    def forward(self, encoder_states, decoder_state):
+        # encoder_states is of shape (batch, num_enc_states, enc_dim)
+        # decoder_state is of shape (batch, dec_dim)
+        score_vec = torch.cat([self.score(encoder_states[:, i], decoder_state) for i in range(encoder_states.shape[1])],
+                              dim=1)
+        # score_vec is of shape (batch, num_enc_states)
+
+        attention_probs = torch.unsqueeze(F.softmax(score_vec, dim=1), dim=2)
+        # attention_probs is of shape (batch, num_enc_states, 1)
+
+        final_context_vec = torch.sum(attention_probs * encoder_states, dim=1)
+        # final_context_vec is of shape (batch, enc_dim)
+
+        return final_context_vec, attention_probs
+
+
+class TemporallyBatchedAdditiveAttention(AdditiveAttention):
+    # Implementing the attention module of Bahdanau et al. 2015 where
+    # score(h_j, s_(i-1)) = v . tanh(W_1 h_j + W_2 s_(i-1))
+    def __init__(self, encoder_hidden_state_dim, decoder_hidden_state_dim, internal_dim=None):
+        super(TemporallyBatchedAdditiveAttention, self).__init__(encoder_hidden_state_dim,
+                                                                 decoder_hidden_state_dim,
+                                                                 internal_dim)
+
+    def score(self, encoder_state, decoder_state):
+        # encoder_state is of shape (batch, num_enc_states, max_time, enc_dim)
+        # decoder_state is of shape (batch, max_time, dec_dim)
+        # return value should be of shape (batch, num_enc_states, max_time, 1)
+        return self.v(torch.tanh(self.w1(encoder_state) + torch.unsqueeze(self.w2(decoder_state), dim=1)))
+
+    def forward(self, encoder_states, decoder_state):
+        # encoder_states is of shape (batch, num_enc_states, max_time, enc_dim)
+        # decoder_state is of shape (batch, max_time, dec_dim)
+        score_vec = self.score(encoder_states, decoder_state)
+        # score_vec is of shape (batch, num_enc_states, max_time, 1)
+
+        attention_probs = F.softmax(score_vec, dim=1)
+        # attention_probs is of shape (batch, num_enc_states, max_time, 1)
+
+        final_context_vec = torch.sum(attention_probs * encoder_states, dim=1)
+        # final_context_vec is of shape (batch, max_time, enc_dim)
+
+        return final_context_vec, torch.squeeze(torch.transpose(attention_probs, 1, 2), dim=3)
--- a/code/model/components/discrete_latent.py
+++ b/code/model/components/discrete_latent.py
@ -0,0 +1,107 @@
+import torch
+import torch.distributions as td
+import numpy as np
+from model.model_utils import ModeKeys, tile
+
+
+class DiscreteLatent(object):
+    def __init__(self, hyperparams, device):
+        self.hyperparams = hyperparams
+        self.z_dim = hyperparams['N'] * hyperparams['K']
+        self.N = hyperparams['N']
+        self.K = hyperparams['K']
+        self.kl_min = hyperparams['kl_min']
+        self.device = device
+        self.temp = None  # filled in by MultimodalGenerativeCVAE.set_annealing_params
+        self.z_logit_clip = None  # filled in by MultimodalGenerativeCVAE.set_annealing_params
+        self.p_dist = None  # filled in by MultimodalGenerativeCVAE.encoder
+        self.q_dist = None  # filled in by MultimodalGenerativeCVAE.encoder
+
+    def dist_from_h(self, h, mode):
+        logits_separated = torch.reshape(h, (-1, self.N, self.K))
+        logits_separated_mean_zero = logits_separated - torch.mean(logits_separated, dim=-1, keepdim=True)
+        if self.z_logit_clip is not None and mode == ModeKeys.TRAIN:
+            c = self.z_logit_clip
+            logits = torch.clamp(logits_separated_mean_zero, min=-c, max=c)
+        else:
+            logits = logits_separated_mean_zero
+
+        return td.OneHotCategorical(logits=logits)
+
+    def sample_q(self, k, mode):
+        if mode == ModeKeys.TRAIN:
+            z_dist = td.RelaxedOneHotCategorical(self.temp, logits=self.q_dist.logits)
+            z_NK = z_dist.rsample((k,))
+        elif mode == ModeKeys.EVAL:
+            z_NK = self.q_dist.sample((k,))
+        return torch.reshape(z_NK, (k, -1, self.z_dim))
+
+    def sample_p(self, num_samples_z, mode, num_samples_gmm=1, most_likely=False, all_z=False):
+        if all_z:
+            bs = self.p_dist.probs.size()[0]
+            z_NK = torch.from_numpy(self.all_one_hot_combinations(self.N, self.K)).float().to(self.device).repeat(1, bs)
+            num_samples_z = self.K ** self.N
+
+        elif most_likely:
+            # Sampling the most likely z from p(z|x).
+            eye_mat = torch.eye(self.p_dist.event_shape[-1], device=self.device)
+            argmax_idxs = torch.argmax(self.p_dist.probs, dim=2)
+            z_NK = torch.unsqueeze(eye_mat[argmax_idxs], dim=0).expand(num_samples_z, -1, -1, -1)
+
+        else:
+            z_NK = self.p_dist.sample((num_samples_z,))
+
+        z_NK = tile(z_NK, 0, num_samples_gmm, device=self.device)
+        k = num_samples_z * num_samples_gmm
+
+        if mode == ModeKeys.PREDICT:
+            return torch.reshape(z_NK, (k, -1, self.N * self.K)), num_samples_z
+        else:
+            return torch.reshape(z_NK, (k, -1, self.N * self.K))
+
+    def kl_q_p(self, log_writer=None, prefix=None, curr_iter=None):
+        kl_separated = td.kl_divergence(self.q_dist, self.p_dist)
+        if len(kl_separated.size()) < 2:
+            kl_separated = torch.unsqueeze(kl_separated, dim=0)
+
+        kl_minibatch = torch.mean(kl_separated, dim=0, keepdim=True)
+
+        if log_writer is not None:
+            log_writer.add_scalar(prefix + '/true_kl', torch.sum(kl_minibatch), curr_iter)
+
+        if self.kl_min > 0:
+            kl_lower_bounded = torch.clamp(kl_minibatch, min=self.kl_min)
+            kl = torch.sum(kl_lower_bounded)
+        else:
+            kl = torch.sum(kl_minibatch)
+
+        return kl
+
+    def q_log_prob(self, z):
+        k = z.size()[0]
+        z_NK = torch.reshape(z, [k, -1, self.N, self.K])
+        return torch.sum(self.q_dist.log_prob(z_NK), dim=2)
+
+    def p_log_prob(self, z):
+        k = z.size()[0]
+        z_NK = torch.reshape(z, [k, -1, self.N, self.K])
+        return torch.sum(self.p_dist.log_prob(z_NK), dim=2)
+
+    def get_p_dist_probs(self):
+        return self.p_dist.probs
+
+    @staticmethod
+    def all_one_hot_combinations(N, K):
+        return np.eye(K).take(np.reshape(np.indices([K] * N), [N, -1]).T, axis=0).reshape(-1, N * K)  # [K**N, N*K]
+
+    def summarize_for_tensorboard(self, log_writer, prefix, curr_iter):
+        log_writer.add_histogram(prefix + "/latent/p_z_x", self.p_dist.probs, curr_iter)
+        log_writer.add_histogram(prefix + "/latent/q_z_xy", self.q_dist.probs, curr_iter)
+        log_writer.add_histogram(prefix + "/latent/p_z_x_logits", self.p_dist.logits, curr_iter)
+        log_writer.add_histogram(prefix + "/latent/q_z_xy_logits", self.q_dist.logits, curr_iter)
+        if self.z_dim <= 9:
+            for i in range(self.N):
+                for j in range(self.K):
+                    log_writer.add_histogram(prefix + "/latent/q_z_xy_logit{0}{1}".format(i, j),
+                                             self.q_dist.logits[:, i, j],
+                                             curr_iter)
--- a/code/model/components/gmm2d.py
+++ b/code/model/components/gmm2d.py
@ -0,0 +1,61 @@
+import torch
+import torch.distributions as td
+import numpy as np
+from model.model_utils import to_one_hot
+
+
+class GMM2D(object):
+    def __init__(self, log_pis, mus, log_sigmas, corrs, pred_state_length, device,
+                 clip_lo=-10, clip_hi=10):
+        self.device = device
+        self.pred_state_length = pred_state_length
+
+        # input shapes
+        # pis: [..., GMM_c]
+        # mus: [..., GMM_c*2]
+        # sigmas: [..., GMM_c*2]
+        # corrs: [..., GMM_c]
+        GMM_c = log_pis.shape[-1]
+
+        # Sigma = [s1^2    p*s1*s2      L = [s1   0
+        #          p*s1*s2 s2^2 ]            p*s2 sqrt(1-p^2)*s2]
+        log_pis = log_pis - torch.logsumexp(log_pis, dim=-1, keepdim=True)
+        mus = self.reshape_to_components(mus, GMM_c)         # [..., GMM_c, 2]
+        log_sigmas = self.reshape_to_components(torch.clamp(log_sigmas, min=clip_lo, max=clip_hi), GMM_c)
+        sigmas = torch.exp(log_sigmas)                       # [..., GMM_c, 2]
+        one_minus_rho2 = 1 - corrs**2                        # [..., GMM_c]
+
+        self.L1 = sigmas*torch.stack([torch.ones_like(corrs, device=self.device), corrs], dim=-1)
+        self.L2 = sigmas*torch.stack([torch.zeros_like(corrs, device=self.device), torch.sqrt(one_minus_rho2)], dim=-1)
+
+        self.batch_shape = log_pis.shape[:-1]
+        self.GMM_c = GMM_c
+        self.log_pis = log_pis        # [..., GMM_c]
+        self.mus = mus                # [..., GMM_c, 2]
+        self.log_sigmas = log_sigmas  # [..., GMM_c, 2]
+        self.sigmas = sigmas          # [..., GMM_c, 2]
+        self.corrs = corrs            # [..., GMM_c]
+        self.one_minus_rho2 = one_minus_rho2  # [..., GMM_c]
+        self.cat = td.Categorical(logits=log_pis)
+
+    def sample(self):
+        MVN_samples = (self.mus
+                       + self.L1*torch.unsqueeze(torch.randn_like(self.corrs, device=self.device), dim=-1)     # [..., GMM_c, 2]
+                       + self.L2*torch.unsqueeze(torch.randn_like(self.corrs, device=self.device), dim=-1))    # (manual 2x2 matmul)
+        cat_samples = self.cat.sample()    # [...]
+        selector = torch.unsqueeze(to_one_hot(cat_samples, self.GMM_c, self.device), dim=-1)
+        return torch.sum(MVN_samples*selector, dim=-2)
+
+    def log_prob(self, x):
+        # x: [..., 2]
+        x = torch.unsqueeze(x, dim=-2)    # [..., 1, 2]
+        dx = x - self.mus            # [..., GMM_c, 2]
+        z = (torch.sum((dx/self.sigmas)**2, dim=-1) -
+             2*self.corrs*torch.prod(dx, dim=-1)/torch.prod(self.sigmas, dim=-1))    # [..., GMM_c]
+        component_log_p = -(torch.log(self.one_minus_rho2) + 2*torch.sum(self.log_sigmas, dim=-1) +
+                            z/self.one_minus_rho2 +
+                            2*np.log(2*np.pi))/2
+        return torch.logsumexp(self.log_pis + component_log_p, dim=-1)
+
+    def reshape_to_components(self, tensor, GMM_c):
+        return torch.reshape(tensor, list(tensor.shape[:-1]) + [GMM_c, self.pred_state_length])
--- a/code/model/components/map_encoder.py
+++ b/code/model/components/map_encoder.py
@ -0,0 +1,20 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class CNNMapEncoder(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(CNNMapEncoder, self).__init__()
+        self.conv1 = nn.Conv2d(3, 128, 5, stride=2)
+        self.conv2 = nn.Conv2d(128, 256, 5, stride=3)
+        self.conv3 = nn.Conv2d(256, 64, 5, stride=2)
+        self.fc = nn.Linear(7 * 7 * 64, 512)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = F.relu(self.conv2(x))
+        x = F.relu(self.conv3(x))
+        x = x.view(-1, 7 * 7 * 64)
+        x = F.relu(self.fc(x))
+        return x
--- a/code/model/dyn_stg.py
+++ b/code/model/dyn_stg.py
@ -0,0 +1,454 @@
+import numpy as np
+import torch
+from model.node_model import MultimodalGenerativeCVAE
+
+
+class SpatioTemporalGraphCVAEModel(object):
+    def __init__(self, model_registrar,
+                 hyperparams, log_writer,
+                 device):
+        super(SpatioTemporalGraphCVAEModel, self).__init__()
+        self.hyperparams = hyperparams
+        self.log_writer = log_writer
+        self.device = device
+        self.curr_iter = 0
+
+        self.model_registrar = model_registrar
+        self.node_models_dict = dict()
+        self.nodes = set()
+
+        self.env = None
+
+        self.min_hl = self.hyperparams['minimum_history_length']
+        self.max_hl = self.hyperparams['maximum_history_length']
+        self.ph = self.hyperparams['prediction_horizon']
+        self.state = self.hyperparams['state']
+        self.state_length = dict()
+        for type in self.state.keys():
+            self.state_length[type] = int(np.sum([len(entity_dims) for entity_dims in self.state[type].values()]))
+        self.pred_state = self.hyperparams['pred_state']
+
+    def set_scene_graph(self, env):
+        self.env = env
+
+        self.node_models_dict.clear()
+
+        edge_types = env.get_edge_types()
+
+        for node_type in env.NodeType:
+            self.node_models_dict[node_type] = MultimodalGenerativeCVAE(env,
+                                                                        node_type,
+                                                                        self.model_registrar,
+                                                                        self.hyperparams,
+                                                                        self.device,
+                                                                        edge_types,
+                                                                        log_writer=self.log_writer)
+
+    def set_curr_iter(self, curr_iter):
+        self.curr_iter = curr_iter
+        for node_str, model in self.node_models_dict.items():
+            model.set_curr_iter(curr_iter)
+
+    def set_annealing_params(self):
+        for node_str, model in self.node_models_dict.items():
+            model.set_annealing_params()
+
+    def step_annealers(self):
+        for node in self.node_models_dict:
+            self.node_models_dict[node].step_annealers()
+
+    def get_input(self, scene, timesteps, node_type, min_future_timesteps, max_nodes=None, curve=False):  # Curve is there to resample during training
+        inputs = list()
+        labels = list()
+        first_history_indices = list()
+        nodes = list()
+        node_scene_graph_batched = list()
+        timesteps_in_scene = list()
+        nodes_per_ts = scene.present_nodes(timesteps,
+                                           type=node_type,
+                                           min_history_timesteps=self.min_hl,
+                                           min_future_timesteps=min_future_timesteps,
+                                           include_robot=not self.hyperparams['incl_robot_node'],
+                                           max_nodes=max_nodes,
+                                           curve=curve)
+
+        # Get Inputs for each node present in Scene
+        for timestep in timesteps:
+            if timestep in nodes_per_ts.keys():
+                present_nodes = nodes_per_ts[timestep]
+                timestep_range = np.array([timestep - self.max_hl, timestep + min_future_timesteps])
+                scene_graph_t = scene.get_scene_graph(timestep,
+                                                      self.env.attention_radius,
+                                                      self.hyperparams['edge_addition_filter'],
+                                                      self.hyperparams['edge_removal_filter'])
+
+                for node in present_nodes:
+                    timesteps_in_scene.append(timestep)
+                    input = node.get(timestep_range, self.state[node.type.name])
+                    label = node.get(timestep_range, self.pred_state[node.type.name])
+                    first_history_index = (self.max_hl - node.history_points_at(timestep)).clip(0)
+                    inputs.append(input)
+                    labels.append(label)
+                    first_history_indices.append(first_history_index)
+                    nodes.append(node)
+
+                    node_scene_graph_batched.append((node, scene_graph_t))
+
+        return inputs, labels, first_history_indices, timesteps_in_scene, node_scene_graph_batched, nodes
+
+    def train_loss(self, scene, timesteps, max_nodes=None):
+        losses = dict()
+        for node_type in self.env.NodeType:
+            losses[node_type] = []
+
+        for node_type in self.env.NodeType:
+            # Get Input data for node type and given timesteps
+            (inputs,
+             labels,
+             first_history_indices,
+             timesteps_in_scene,
+             node_scene_graph_batched, _) = self.get_input(scene,
+                                                           timesteps,
+                                                           node_type,
+                                                           self.ph,
+                                                           max_nodes=max_nodes,
+                                                           curve=True)  # Curve is there to resample during training
+
+            # There are no nodes of type present for timestep
+            if len(inputs) == 0:
+                continue
+
+            uniform_t = self.max_hl
+
+            inputs = np.array(inputs)
+            labels = np.array(labels)
+
+            # Vehicles are rotated such that the x axis is lateral
+            if node_type == self.env.NodeType.VEHICLE:
+                # transform x y to ego.
+                pos = inputs[..., 0:2]
+                pos_org = pos.copy()
+                vel = inputs[..., 2:4]
+                acc = inputs[..., 5:7]
+                heading = inputs[:, uniform_t, -1]
+                rot_mat = np.zeros((pos.shape[0], pos.shape[1], 3, 3))
+                rot_mat[:, :, 0, 0] = np.cos(heading)[:, np.newaxis]
+                rot_mat[:, :, 0, 1] = np.sin(heading)[:, np.newaxis]
+                rot_mat[:, :, 1, 0] = -np.sin(heading)[:, np.newaxis]
+                rot_mat[:, :, 1, 1] = np.cos(heading)[:, np.newaxis]
+                rot_mat[:, :, 2, 2] = 1.
+
+                pos = pos - pos[:, uniform_t, np.newaxis, :]
+
+                pos_with_one = np.ones((pos.shape[0], pos.shape[1], 3, 1))
+                pos_with_one[:, :, :2] = pos[..., np.newaxis]
+                pos_rot = np.squeeze(rot_mat @ pos_with_one, axis=-1)[..., :2]
+
+                vel_with_one = np.ones((vel.shape[0], vel.shape[1], 3, 1))
+                vel_with_one[:, :, :2] = vel[..., np.newaxis]
+                vel_rot = np.squeeze(rot_mat @ vel_with_one, axis=-1)[..., :2]
+
+                acc_with_one = np.ones((acc.shape[0], acc.shape[1], 3, 1))
+                acc_with_one[:, :, :2] = acc[..., np.newaxis]
+                acc_rot = np.squeeze(rot_mat @ acc_with_one, axis=-1)[..., :2]
+
+                inputs[..., 0:2] = pos_rot
+                inputs[..., 2:4] = vel_rot
+                inputs[..., 5:7] = acc_rot
+
+                l_vel_with_one = np.ones((labels.shape[0], labels.shape[1], 3, 1))
+                l_vel_with_one[:, :, :2] = labels[..., np.newaxis]
+                labels = np.squeeze(rot_mat @ l_vel_with_one, axis=-1)[..., :2]
+
+            # Standardize, Position is standardized relative to current pos and attention_radius for node_type-node_type
+            _, std = self.env.get_standardize_params(self.state[node_type.name], node_type=node_type)
+            # std[0:2] = self.env.attention_radius[(node_type, node_type)]
+            rel_state = np.array(inputs)[:, uniform_t]
+            rel_state = np.hstack((rel_state, np.zeros_like(rel_state)))
+            rel_state = np.expand_dims(rel_state, 1)
+            std = np.tile(std, 2)
+            inputs = np.tile(inputs, 2)
+            inputs[..., self.state_length[node_type.name]:self.state_length[node_type.name]+2] = 0.
+            inputs_st = self.env.standardize(inputs,
+                                             self.state[node_type.name],
+                                             mean=rel_state,
+                                             std=std,
+                                             node_type=node_type)
+            labels_st = self.env.standardize(labels, self.pred_state[node_type.name], node_type=node_type)
+
+            if node_type == self.env.NodeType.VEHICLE:
+                inputs[..., 0:2] = pos_org
+
+            # Convert to torch tensors
+            inputs = torch.tensor(inputs).float().to(self.device)
+            inputs_st = torch.tensor(inputs_st).float().to(self.device)
+            first_history_indices = torch.tensor(first_history_indices).float().to(self.device).long()
+            labels = torch.tensor(labels).float().to(self.device)
+            labels_st = torch.tensor(labels_st).float().to(self.device)
+
+            # Run forward pass
+            model = self.node_models_dict[node_type]
+            loss = model.train_loss(inputs,
+                                    inputs_st,
+                                    first_history_indices,
+                                    labels,
+                                    labels_st,
+                                    scene,
+                                    node_scene_graph_batched,
+                                    timestep=uniform_t,
+                                    timesteps_in_scene=timesteps_in_scene,
+                                    prediction_horizon=self.ph)
+
+            losses[node_type].append(loss)
+
+        for node_type in self.env.NodeType:
+            losses[node_type] = torch.mean(torch.stack(losses[node_type])) if len(losses[node_type]) > 0 else None
+        return losses
+
+    def eval_loss(self, scene, timesteps, max_nodes=None):
+        losses = dict()
+        for node_type in self.env.NodeType:
+            losses[node_type] = {'nll_q_is': list(), 'nll_p': list(), 'nll_exact': list(), 'nll_sampled': list()}
+        for node_type in self.env.NodeType:
+            # Get Input data for node type and given timesteps
+            (inputs,
+             labels,
+             first_history_indices,
+             timesteps_in_scene,
+             node_scene_graph_batched, _) = self.get_input(scene, timesteps, node_type, self.ph, max_nodes=max_nodes)
+
+            # There are no nodes of type present for timestep
+            if len(inputs) == 0:
+                continue
+
+            uniform_t = self.max_hl
+
+            inputs = np.array(inputs)
+            labels = np.array(labels)
+
+            # Vehicles are rotated such that the x axis is lateral
+            if node_type == self.env.NodeType.VEHICLE:
+                # transform x y to ego.
+                pos = inputs[..., 0:2]
+                pos_org = pos.copy()
+                vel = inputs[..., 2:4]
+                acc = inputs[..., 5:7]
+                heading = inputs[:, uniform_t, -1]
+                rot_mat = np.zeros((pos.shape[0], pos.shape[1], 3, 3))
+                rot_mat[:, :, 0, 0] = np.cos(heading)[:, np.newaxis]
+                rot_mat[:, :, 0, 1] = np.sin(heading)[:, np.newaxis]
+                rot_mat[:, :, 1, 0] = -np.sin(heading)[:, np.newaxis]
+                rot_mat[:, :, 1, 1] = np.cos(heading)[:, np.newaxis]
+                rot_mat[:, :, 2, 2] = 1.
+
+                pos = pos - pos[:, uniform_t, np.newaxis, :]
+
+                pos_with_one = np.ones((pos.shape[0], pos.shape[1], 3, 1))
+                pos_with_one[:, :, :2] = pos[..., np.newaxis]
+                pos_rot = np.squeeze(rot_mat @ pos_with_one, axis=-1)[..., :2]
+
+                vel_with_one = np.ones((vel.shape[0], vel.shape[1], 3, 1))
+                vel_with_one[:, :, :2] = vel[..., np.newaxis]
+                vel_rot = np.squeeze(rot_mat @ vel_with_one, axis=-1)[..., :2]
+
+                acc_with_one = np.ones((acc.shape[0], acc.shape[1], 3, 1))
+                acc_with_one[:, :, :2] = acc[..., np.newaxis]
+                acc_rot = np.squeeze(rot_mat @ acc_with_one, axis=-1)[..., :2]
+
+                inputs[..., 0:2] = pos_rot
+                inputs[..., 2:4] = vel_rot
+                inputs[..., 5:7] = acc_rot
+
+                l_vel_with_one = np.ones((labels.shape[0], labels.shape[1], 3, 1))
+                l_vel_with_one[:, :, :2] = labels[..., np.newaxis]
+                labels = np.squeeze(rot_mat @ l_vel_with_one, axis=-1)[..., :2]
+
+            # Standardize, Position is standardized relative to current pos and attention_radius for node_type-node_type
+            _, std = self.env.get_standardize_params(self.state[node_type.name], node_type=node_type)
+            rel_state = np.array(inputs)[:, uniform_t]
+            rel_state = np.hstack((rel_state, np.zeros_like(rel_state)))
+            rel_state = np.expand_dims(rel_state, 1)
+            std = np.tile(std, 2)
+            inputs = np.tile(inputs, 2)
+            inputs[..., self.state_length[node_type.name]:self.state_length[node_type.name]+2] = 0.
+            inputs_st = self.env.standardize(inputs,
+                                             self.state[node_type.name],
+                                             mean=rel_state,
+                                             std=std,
+                                             node_type=node_type)
+            labels_st = self.env.standardize(labels, self.pred_state[node_type.name], node_type=node_type)
+
+            if node_type == self.env.NodeType.VEHICLE:
+                inputs[..., 0:2] = pos_org
+
+            # Convert to torch tensors
+            inputs = torch.tensor(inputs).float().to(self.device)
+            inputs_st = torch.tensor(inputs_st).float().to(self.device)
+            first_history_indices = torch.tensor(first_history_indices).float().to(self.device).long()
+            labels = torch.tensor(labels).float().to(self.device)
+            labels_st = torch.tensor(labels_st).float().to(self.device)
+
+            # Run forward pass
+            model = self.node_models_dict[node_type]
+            (nll_q_is, nll_p, nll_exact, nll_sampled) = model.eval_loss(inputs,
+                                                                        inputs_st,
+                                                                        first_history_indices,
+                                                                        labels,
+                                                                        labels_st,
+                                                                        scene,
+                                                                        node_scene_graph_batched,
+                                                                        timestep=uniform_t,
+                                                                        timesteps_in_scene=timesteps_in_scene,
+                                                                        prediction_horizon=self.ph)
+
+            if nll_q_is is not None:
+                losses[node_type]['nll_q_is'].append(nll_q_is.cpu().numpy())
+                losses[node_type]['nll_p'].append(nll_p.cpu().numpy())
+                losses[node_type]['nll_exact'].append(nll_exact.cpu().numpy())
+                losses[node_type]['nll_sampled'].append(nll_sampled.cpu().numpy())
+
+        return losses
+
+    def predict(self,
+                scene,
+                timesteps,
+                ph,
+                num_samples_z=1,
+                num_samples_gmm=1,
+                min_future_timesteps=0,
+                most_likely_z=False,
+                most_likely_gmm=False,
+                all_z=False,
+                max_nodes=None):
+
+        predictions_dict = {}
+        for node_type in self.env.NodeType:
+            # Get Input data for node type and given timesteps
+            (inputs,
+             labels,
+             first_history_indices,
+             timesteps_in_scene,
+             node_scene_graph_batched,
+             nodes) = self.get_input(scene, timesteps, node_type, min_future_timesteps, max_nodes=max_nodes)
+
+            # There are no nodes of type present for timestep
+            if len(inputs) == 0:
+                continue
+
+            uniform_t = self.max_hl
+
+            inputs = np.array(inputs)
+            labels = np.array(labels)
+
+            # Vehicles are rotated such that the x axis is lateral
+            if node_type == self.env.NodeType.VEHICLE:
+                # transform x y to ego.
+                pos = inputs[..., 0:2]
+                pos_org = pos.copy()
+                vel = inputs[..., 2:4]
+                acc = inputs[..., 5:7]
+                heading = inputs[:, uniform_t, -1]
+                rot_mat = np.zeros((pos.shape[0], pos.shape[1], 3, 3))
+                rot_mat[:, :, 0, 0] = np.cos(heading)[:, np.newaxis]
+                rot_mat[:, :, 0, 1] = np.sin(heading)[:, np.newaxis]
+                rot_mat[:, :, 1, 0] = -np.sin(heading)[:, np.newaxis]
+                rot_mat[:, :, 1, 1] = np.cos(heading)[:, np.newaxis]
+                rot_mat[:, :, 2, 2] = 1.
+
+                pos = pos - pos[:, uniform_t, np.newaxis, :]
+
+                pos_with_one = np.ones((pos.shape[0], pos.shape[1], 3, 1))
+                pos_with_one[:, :, :2] = pos[..., np.newaxis]
+                pos_rot = np.squeeze(rot_mat @ pos_with_one, axis=-1)[..., :2]
+
+                vel_with_one = np.ones((vel.shape[0], vel.shape[1], 3, 1))
+                vel_with_one[:, :, :2] = vel[..., np.newaxis]
+                vel_rot = np.squeeze(rot_mat @ vel_with_one, axis=-1)[..., :2]
+
+                acc_with_one = np.ones((acc.shape[0], acc.shape[1], 3, 1))
+                acc_with_one[:, :, :2] = acc[..., np.newaxis]
+                acc_rot = np.squeeze(rot_mat @ acc_with_one, axis=-1)[..., :2]
+
+                inputs[..., 0:2] = pos_rot
+                inputs[..., 2:4] = vel_rot
+                inputs[..., 5:7] = acc_rot
+
+                l_vel_with_one = np.ones((labels.shape[0], labels.shape[1], 3, 1))
+                l_vel_with_one[:, :, :2] = labels[..., np.newaxis]
+                labels = np.squeeze(rot_mat @ l_vel_with_one, axis=-1)[..., :2]
+
+            # Standardize, Position is standardized relative to current pos and attention_radius for node_type-node_type
+            _, std = self.env.get_standardize_params(self.state[node_type.name], node_type=node_type)
+            rel_state = np.array(inputs)[:, uniform_t]
+            rel_state = np.hstack((rel_state, np.zeros_like(rel_state)))
+            rel_state = np.expand_dims(rel_state, 1)
+            std = np.tile(std, 2)
+            inputs = np.tile(inputs, 2)
+            inputs[..., self.state_length[node_type.name]:self.state_length[node_type.name]+2] = 0.
+            inputs_st = self.env.standardize(inputs,
+                                             self.state[node_type.name],
+                                             mean=rel_state,
+                                             std=std,
+                                             node_type=node_type)
+            labels_st = self.env.standardize(labels, self.pred_state[node_type.name], node_type=node_type)
+
+            if node_type == self.env.NodeType.VEHICLE:
+                inputs[..., 0:2] = pos_org
+
+            # Convert to torch tensors
+            inputs = torch.tensor(inputs).float().to(self.device)
+            inputs_st = torch.tensor(inputs_st).float().to(self.device)
+            first_history_indices = torch.tensor(first_history_indices).float().to(self.device).long()
+            labels = torch.tensor(labels).float().to(self.device)
+            labels_st = torch.tensor(labels_st).float().to(self.device)
+
+            # Run forward pass
+            model = self.node_models_dict[node_type]
+            predictions = model.predict(inputs,
+                                        inputs_st,
+                                        labels,
+                                        labels_st,
+                                        first_history_indices,
+                                        scene,
+                                        node_scene_graph_batched,
+                                        timestep=uniform_t,
+                                        timesteps_in_scene=timesteps_in_scene,
+                                        prediction_horizon=ph,
+                                        num_samples_z=num_samples_z,
+                                        num_samples_gmm=num_samples_gmm,
+                                        most_likely_z=most_likely_z,
+                                        most_likely_gmm=most_likely_gmm,
+                                        all_z=all_z)
+
+            predictions_uns = self.env.unstandardize(predictions.cpu().detach().numpy(),
+                                                     self.pred_state[node_type.name],
+                                                     node_type)
+
+            # Vehicles are rotated such that the x axis is lateral. For output rotation has to be reversed
+            if node_type == self.env.NodeType.VEHICLE:
+                heading = inputs.cpu().detach().numpy()[:, uniform_t, -1]
+                rot_mat = np.zeros((predictions_uns.shape[0],
+                                    predictions_uns.shape[1],
+                                    predictions_uns.shape[2],
+                                    predictions_uns.shape[3], 3, 3))
+                rot_mat[:, :, :, :, 0, 0] = np.cos(-heading)[:, np.newaxis]
+                rot_mat[:, :, :, :, 0, 1] = np.sin(-heading)[:, np.newaxis]
+                rot_mat[:, :, :, :, 1, 0] = -np.sin(-heading)[:, np.newaxis]
+                rot_mat[:, :, :, :, 1, 1] = np.cos(-heading)[:, np.newaxis]
+                rot_mat[:, :, :, :, 2, 2] = 1.
+
+                p_vel_with_one = np.ones((predictions_uns.shape[0],
+                                          predictions_uns.shape[1],
+                                          predictions_uns.shape[2],
+                                          predictions_uns.shape[3], 3, 1))
+                p_vel_with_one[:, :, :, :, :2] = predictions_uns[..., np.newaxis]
+                predictions_uns = np.squeeze(rot_mat @ p_vel_with_one, axis=-1)[..., :2]
+
+            # Assign predictions to node
+            for i, ts in enumerate(timesteps_in_scene):
+                if not ts in predictions_dict.keys():
+                    predictions_dict[ts] = dict()
+                predictions_dict[ts][nodes[i]] = predictions_uns[:, :, i]
+
+        return predictions_dict
+
--- a/code/model/model_registrar.py
+++ b/code/model/model_registrar.py
@ -0,0 +1,70 @@
+import os
+import torch
+import torch.nn as nn
+
+
+def get_model_device(model):
+    return next(model.parameters()).device
+
+
+class ModelRegistrar(nn.Module):
+    def __init__(self, model_dir, device):
+        super(ModelRegistrar, self).__init__()
+        self.model_dict = nn.ModuleDict()
+        self.model_dir = model_dir
+        self.device = device
+
+
+    def forward(self):
+        raise NotImplementedError('Although ModelRegistrar is a nn.Module, it is only to store parameters.')
+
+
+    def get_model(self, name, model_if_absent=None):
+        # 4 cases: name in self.model_dict and model_if_absent is None         (OK)
+        #          name in self.model_dict and model_if_absent is not None     (OK)
+        #          name not in self.model_dict and model_if_absent is not None (OK)
+        #          name not in self.model_dict and model_if_absent is None     (NOT OK)
+
+        if name in self.model_dict:
+            return self.model_dict[name]
+
+        elif model_if_absent is not None:
+            self.model_dict[name] = model_if_absent.to(self.device)
+            return self.model_dict[name]
+
+        else:
+            raise ValueError(f'{name} was never initialized in this Registrar!')
+
+
+    def print_model_names(self):
+        print(self.model_dict.keys())
+
+
+    def save_models(self, curr_iter):
+        # Create the model directiory if it's not present.
+        save_path = os.path.join(self.model_dir,
+                                 'model_registrar-%d.pt' % curr_iter)
+        print('')
+        print('Saving to ' + save_path)
+        torch.save(self.model_dict, save_path)
+        print('Saved!')
+        print('')
+
+
+    def load_models(self, iter_num):
+        self.model_dict.clear()
+        
+        save_path = os.path.join(self.model_dir,
+                                 'model_registrar-%d.pt' % iter_num)
+
+        print('')
+        print('Loading from ' + save_path)
+        self.model_dict = torch.load(save_path, map_location=self.device)
+        print('Loaded!')
+        print('')
+
+
+    def to(self, device):
+        for name, model in self.model_dict.items():
+            if get_model_device(model) != device:
+                model.to(device)
--- a/code/model/model_utils.py
+++ b/code/model/model_utils.py
@ -0,0 +1,169 @@
+import torch
+import torch.nn.utils.rnn as rnn
+from enum import Enum
+import functools
+import numpy as np
+import math
+from scipy.ndimage import interpolation
+
+
+class ModeKeys(Enum):
+    TRAIN = 1
+    EVAL = 2
+    PREDICT = 3
+
+def cyclical_lr(stepsize, min_lr=3e-4, max_lr=3e-3, decay=1.):
+
+    # Lambda function to calculate the LR
+    lr_lambda = lambda it: min_lr + (max_lr - min_lr) * relative(it, stepsize) * decay**it
+
+    # Additional function to see where on the cycle we are
+    def relative(it, stepsize):
+        cycle = math.floor(1 + it / (2 * stepsize))
+        x = abs(it / stepsize - 2 * cycle + 1)
+        return max(0, (1 - x))
+
+    return lr_lambda
+
+
+def tile(a, dim, n_tile, device='cpu'):
+    init_dim = a.size(dim)
+    repeat_idx = [1] * a.dim()
+    repeat_idx[dim] = n_tile
+    a = a.repeat(*(repeat_idx))
+    order_index = torch.LongTensor(np.concatenate([init_dim * np.arange(n_tile) + i for i in range(init_dim)])).to(device)
+    return torch.index_select(a, dim, order_index)
+
+
+def to_one_hot(labels, n_labels, device):
+    return torch.eye(n_labels, device=device)[labels]
+
+
+def exp_anneal(anneal_kws):
+    device = anneal_kws['device']
+    start = torch.tensor(anneal_kws['start'], device=device)
+    finish = torch.tensor(anneal_kws['finish'], device=device)
+    rate = torch.tensor(anneal_kws['rate'], device=device)
+    return lambda step: finish - (finish - start)*torch.pow(rate, torch.tensor(step, dtype=torch.float, device=device))
+
+
+def sigmoid_anneal(anneal_kws):
+    device = anneal_kws['device']
+    start = torch.tensor(anneal_kws['start'], device=device)
+    finish = torch.tensor(anneal_kws['finish'], device=device)
+    center_step = torch.tensor(anneal_kws['center_step'], device=device, dtype=torch.float)
+    steps_lo_to_hi = torch.tensor(anneal_kws['steps_lo_to_hi'], device=device, dtype=torch.float)
+    return lambda step: start + (finish - start)*torch.sigmoid((torch.tensor(float(step), device=device) - center_step) * (1./steps_lo_to_hi))
+
+
+class CustomLR(torch.optim.lr_scheduler.LambdaLR):
+    def __init__(self, optimizer, lr_lambda, last_epoch=-1):
+        super(CustomLR, self).__init__(optimizer, lr_lambda, last_epoch)
+
+    def get_lr(self):
+        return [lmbda(self.last_epoch)
+                for lmbda, base_lr in zip(self.lr_lambdas, self.base_lrs)]
+
+
+def run_lstm_on_variable_length_seqs(lstm_module, original_seqs, break_indices, lower_indices, total_length):
+    # This is done so that we can just pass in self.prediction_timesteps
+    # (which we want to INCLUDE, so this will exclude the next timestep).
+    inclusive_break_indices = break_indices + 1
+
+    pad_list = list()
+    for i, seq_len in enumerate(inclusive_break_indices):
+        pad_list.append(original_seqs[i, lower_indices[i]:seq_len])
+
+    packed_seqs = rnn.pack_sequence(pad_list, enforce_sorted=False)
+    packed_output, (h_n, c_n) = lstm_module(packed_seqs)
+    output, _ = rnn.pad_packed_sequence(packed_output,
+                                        batch_first=True,
+                                        total_length=total_length)
+
+    return output, (h_n, c_n)
+
+
+def extract_subtensor_per_batch_element(tensor, indices):
+    batch_idxs = torch.arange(start=0, end=len(indices))
+
+    batch_idxs = batch_idxs[~torch.isnan(indices)]
+    indices = indices[~torch.isnan(indices)]
+    if indices.size == 0:
+        return None
+    else:
+        indices = indices.long()
+    if tensor.is_cuda:
+        batch_idxs = batch_idxs.to(tensor.get_device())
+        indices = indices.to(tensor.get_device())
+    return tensor[batch_idxs, indices]
+
+
+def unpack_RNN_state(state_tuple):
+    # PyTorch returned LSTM states have 3 dims:
+    # (num_layers * num_directions, batch, hidden_size)
+
+    state = torch.cat(state_tuple, dim=0).permute(1, 0, 2)
+    # Now state is (batch, 2 * num_layers * num_directions, hidden_size)
+
+    state_size = state.size()
+    return torch.reshape(state, (-1, state_size[1] * state_size[2]))
+
+
+def rsetattr(obj, attr, val):
+    pre, _, post = attr.rpartition('.')
+    return setattr(rgetattr(obj, pre) if pre else obj, post, val)
+
+
+# using wonder's beautiful simplification:
+# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427
+def rgetattr(obj, attr, *args):
+    def _getattr(obj, attr):
+        return getattr(obj, attr, *args)
+    return functools.reduce(_getattr, [obj] + attr.split('.'))
+
+
+def get_cropped_maps(world_pts, map, context_size=50):
+    """world_pts: N x 2 array of positions relative to the world."""
+    expanded_obs_img = np.full((map.data.shape[0] + context_size, map.data.shape[1] + context_size, map.data.shape[2]), False, dtype=np.float32)
+    expanded_obs_img[context_size//2:-context_size//2, context_size//2:-context_size//2] = map.fdata.astype(np.float32)
+    img_pts = context_size//2 + np.round(map.to_map_points(world_pts)).astype(int)
+    return np.stack([expanded_obs_img[img_pts[i, 0] - context_size//2 : img_pts[i, 0] + context_size//2,
+                                      img_pts[i, 1] - context_size//2 : img_pts[i, 1] + context_size//2]
+                      for i in range(world_pts.shape[0])], axis=0)
+
+def get_cropped_maps_heading(world_pts, map, context_size=50, heading=None):
+    """world_pts: N x 2 array of positions relative to the world."""
+    rotations = np.round((heading) / (np.pi / 2)).astype(int)
+
+    expanded_obs_img = np.full((map.data.shape[0] + context_size, map.data.shape[1] + context_size, map.data.shape[2]),
+                               False, dtype=np.float32)
+    expanded_obs_img[context_size // 2:-context_size // 2, context_size // 2:-context_size // 2] = map.fdata.astype(
+        np.float32)
+    img_pts = context_size // 2 + np.round(map.to_map_points(world_pts)).astype(int)
+    map_h =  np.stack([expanded_obs_img[img_pts[i, 0] - context_size // 2: img_pts[i, 0] + context_size // 2,
+                     img_pts[i, 1] - context_size // 2: img_pts[i, 1] + context_size // 2]
+                     for i in range(world_pts.shape[0])], axis=0)
+
+    map_h[rotations == 1] = np.rot90(map_h[rotations == 1], -1, axes=(1, 2))
+    map_h[rotations == 2] = np.rot90(map_h[rotations == 2], 2, axes=(1, 2))
+    map_h[rotations == -1] = np.rot90(map_h[rotations == -1], 1, axes=(1, 2))
+    map_h[rotations == -2] = np.rot90(map_h[rotations == -2], 2, axes=(1, 2))
+    return map_h
+
+def get_cropped_maps_heading_exact(world_pts, map, context_size=50, heading=None):
+    """world_pts: N x 2 array of positions relative to the world."""
+    angles = -heading * 180 / np.pi
+
+    expanded_obs_img = np.full((map.data.shape[0] + context_size, map.data.shape[1] + context_size, map.data.shape[2]),
+                               False, dtype=np.float32)
+    expanded_obs_img[context_size // 2:-context_size // 2, context_size // 2:-context_size // 2] = map.fdata.astype(
+        np.float32)
+    img_pts = context_size // 2 + np.round(map.to_map_points(world_pts)).astype(int)
+    map_h = np.stack([expanded_obs_img[img_pts[i, 0] - context_size // 2: img_pts[i, 0] + context_size // 2,
+                      img_pts[i, 1] - context_size // 2: img_pts[i, 1] + context_size // 2]
+                      for i in range(world_pts.shape[0])], axis=0)
+
+    for i in range(angles.shape[0]):
+        map_h[i] = interpolation.rotate(map_h[i], reshape=False, angle=angles[i], prefilter=False)
+
+    return map_h
--- a/code/model/node_model.py
+++ b/code/model/node_model.py
--- a/code/notebooks/Car
+++ b/code/notebooks/Car
--- a/code/notebooks/Car
+++ b/code/notebooks/Car
--- a/code/notebooks/Car
+++ b/code/notebooks/Car
--- a/code/notebooks/Car
+++ b/code/notebooks/Car
--- a/code/notebooks/Car
+++ b/code/notebooks/Car
--- a/code/notebooks/Car
+++ b/code/notebooks/Car
--- a/code/notebooks/NuScenes
+++ b/code/notebooks/NuScenes
--- a/code/notebooks/NuScenes
+++ b/code/notebooks/NuScenes
--- a/code/notebooks/helper.py
+++ b/code/notebooks/helper.py
@ -0,0 +1,288 @@
+import os
+import json
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.offsetbox import OffsetImage, AnnotationBbox
+import matplotlib.patheffects as pe
+from scipy.ndimage import rotate
+import seaborn as sns
+
+from model.model_registrar import ModelRegistrar
+from model.dyn_stg import SpatioTemporalGraphCVAEModel
+from utils import prediction_output_to_trajectories
+
+from scipy.integrate import cumtrapz
+
+line_colors = ['#80CBE5', '#375397', '#F05F78', '#ABCB51', '#C8B0B0']
+
+cars = [plt.imread('Car TOP_VIEW 80CBE5.png'),
+        plt.imread('Car TOP_VIEW 375397.png'),
+        plt.imread('Car TOP_VIEW F05F78.png'),
+        plt.imread('Car TOP_VIEW ABCB51.png'),
+        plt.imread('Car TOP_VIEW C8B0B0.png')]
+
+robot = plt.imread('Car TOP_VIEW ROBOT.png')
+
+
+def load_model(model_dir, env, ts=3999):
+    model_registrar = ModelRegistrar(model_dir, 'cpu')
+    model_registrar.load_models(ts)
+    with open(os.path.join(model_dir, 'config.json'), 'r') as config_json:
+        hyperparams = json.load(config_json)
+
+    hyperparams['map_enc_dropout'] = 0.0
+    if 'incl_robot_node' not in hyperparams:
+        hyperparams['incl_robot_node'] = False
+
+    stg = SpatioTemporalGraphCVAEModel(model_registrar,
+                                       hyperparams,
+                                       None, 'cpu')
+
+    stg.set_scene_graph(env)
+
+    stg.set_annealing_params()
+
+    return stg, hyperparams
+
+
+def plot_vehicle_nice(ax, predictions, dt, max_hl=10, ph=6, map=None, x_min=0, y_min=0):
+    prediction_dict, histories_dict, futures_dict = prediction_output_to_trajectories(predictions,
+                                                                                      dt,
+                                                                                      max_hl,
+                                                                                      ph,
+                                                                                      map=map)
+    assert (len(prediction_dict.keys()) <= 1)
+    if len(prediction_dict.keys()) == 0:
+        return
+    ts_key = list(prediction_dict.keys())[0]
+
+    prediction_dict = prediction_dict[ts_key]
+    histories_dict = histories_dict[ts_key]
+    futures_dict = futures_dict[ts_key]
+
+    if map is not None:
+        ax.imshow(map.fdata, origin='lower', alpha=0.5)
+
+    cmap = ['k', 'b', 'y', 'g', 'r']
+    line_alpha = 0.7
+    line_width = 0.2
+    edge_width = 2
+    circle_edge_width = 0.5
+    node_circle_size = 0.3
+    a = []
+    i = 0
+    node_list = sorted(histories_dict.keys(), key=lambda x: x.length)
+    for node in node_list:
+        history = histories_dict[node] + np.array([x_min, y_min])
+        future = futures_dict[node] + np.array([x_min, y_min])
+        predictions = prediction_dict[node] + np.array([x_min, y_min])
+        if node.type.name == 'VEHICLE':
+            # ax.plot(history[:, 0], history[:, 1], 'ko-', linewidth=1)
+
+            ax.plot(future[:, 0],
+                    future[:, 1],
+                    'w--o',
+                    linewidth=4,
+                    markersize=3,
+                    zorder=650,
+                    path_effects=[pe.Stroke(linewidth=5, foreground='k'), pe.Normal()])
+
+            for t in range(predictions.shape[1]):
+                sns.kdeplot(predictions[:, t, 0], predictions[:, t, 1],
+                            ax=ax, shade=True, shade_lowest=False,
+                            color=line_colors[i % len(line_colors)], zorder=600, alpha=0.8)
+
+            vel = node.get(ts_key, {'velocity': ['x', 'y']})
+            h = np.arctan2(vel[0, 1], vel[0, 0])
+            r_img = rotate(cars[i % len(cars)], node.get(ts_key, {'heading': ['value']})[0, 0] * 180 / np.pi,
+                           reshape=True)
+            oi = OffsetImage(r_img, zoom=0.035, zorder=700)
+            veh_box = AnnotationBbox(oi, (history[-1, 0], history[-1, 1]), frameon=False)
+            veh_box.zorder = 700
+            ax.add_artist(veh_box)
+            i += 1
+        else:
+            # ax.plot(history[:, 0], history[:, 1], 'k--')
+
+            for t in range(predictions.shape[1]):
+                sns.kdeplot(predictions[:, t, 0], predictions[:, t, 1],
+                            ax=ax, shade=True, shade_lowest=False,
+                            color='b', zorder=600, alpha=0.8)
+
+            ax.plot(future[:, 0],
+                    future[:, 1],
+                    'w--',
+                    zorder=650,
+                    path_effects=[pe.Stroke(linewidth=edge_width, foreground='k'), pe.Normal()])
+            # Current Node Position
+            circle = plt.Circle((history[-1, 0],
+                                 history[-1, 1]),
+                                node_circle_size,
+                                facecolor='g',
+                                edgecolor='k',
+                                lw=circle_edge_width,
+                                zorder=3)
+            ax.add_artist(circle)
+
+
+def plot_vehicle_mm(ax, predictions, dt, max_hl=10, ph=6, map=None, x_min=0, y_min=0):
+    prediction_dict, histories_dict, futures_dict = prediction_output_to_trajectories(predictions,
+                                                                                      dt,
+                                                                                      max_hl,
+                                                                                      ph,
+                                                                                      map=map)
+    assert (len(prediction_dict.keys()) <= 1)
+    if len(prediction_dict.keys()) == 0:
+        return
+    ts_key = list(prediction_dict.keys())[0]
+
+    prediction_dict = prediction_dict[ts_key]
+    histories_dict = histories_dict[ts_key]
+    futures_dict = futures_dict[ts_key]
+
+    if map is not None:
+        ax.imshow(map.fdata, origin='lower', alpha=0.5)
+
+    cmap = ['k', 'b', 'y', 'g', 'r']
+    line_alpha = 0.7
+    line_width = 0.2
+    edge_width = 2
+    circle_edge_width = 0.5
+    node_circle_size = 0.5
+    a = []
+    i = 0
+    node_list = sorted(histories_dict.keys(), key=lambda x: x.length)
+    for node in node_list:
+        history = histories_dict[node] + np.array([x_min, y_min])
+        future = futures_dict[node] + np.array([x_min, y_min])
+
+        predictions = prediction_dict[node] + np.array([x_min, y_min])
+        if node.type.name == 'VEHICLE':
+            for sample_num in range(prediction_dict[node].shape[0]):
+                ax.plot(predictions[sample_num, :, 0], predictions[sample_num, :, 1], 'ko-',
+                        zorder=620,
+                        markersize=5,
+                        linewidth=3, alpha=0.7)
+        else:
+            for sample_num in range(prediction_dict[node].shape[0]):
+                ax.plot(predictions[sample_num, :, 0], predictions[sample_num, :, 1], 'ko-',
+                        zorder=620,
+                        markersize=2,
+                        linewidth=1, alpha=0.7)
+
+
+def plot_vehicle_nice_mv(ax, predictions, dt, max_hl=10, ph=6, map=None, x_min=0, y_min=0):
+    prediction_dict, histories_dict, futures_dict = prediction_output_to_trajectories(predictions,
+                                                                                      dt,
+                                                                                      max_hl,
+                                                                                      ph,
+                                                                                      map=map)
+    assert (len(prediction_dict.keys()) <= 1)
+    if len(prediction_dict.keys()) == 0:
+        return
+    ts_key = list(prediction_dict.keys())[0]
+
+    prediction_dict = prediction_dict[ts_key]
+    histories_dict = histories_dict[ts_key]
+    futures_dict = futures_dict[ts_key]
+
+    if map is not None:
+        ax.imshow(map.fdata, origin='lower', alpha=0.5)
+
+    cmap = ['k', 'b', 'y', 'g', 'r']
+    line_alpha = 0.7
+    line_width = 0.2
+    edge_width = 2
+    circle_edge_width = 0.5
+    node_circle_size = 0.3
+    a = []
+    i = 0
+    node_list = sorted(histories_dict.keys(), key=lambda x: x.length)
+    for node in node_list:
+        h = node.get(ts_key, {'heading': ['value']})[0, 0]
+        history_org = histories_dict[node] + np.array([x_min, y_min])
+        history = histories_dict[node] + np.array([x_min, y_min]) + node.length * np.array([np.cos(h), np.sin(h)])
+        future = futures_dict[node] + np.array([x_min, y_min]) + node.length * np.array([np.cos(h), np.sin(h)])
+        predictions = prediction_dict[node] + np.array([x_min, y_min]) + node.length * np.array([np.cos(h), np.sin(h)])
+        if node.type.name == 'VEHICLE':
+            for t in range(predictions.shape[1]):
+                sns.kdeplot(predictions[:, t, 0], predictions[:, t, 1],
+                            ax=ax, shade=True, shade_lowest=False,
+                            color=line_colors[i % len(line_colors)], zorder=600, alpha=1.0)
+
+            r_img = rotate(cars[i % len(cars)], node.get(ts_key, {'heading': ['value']})[0, 0] * 180 / np.pi,
+                           reshape=True)
+            oi = OffsetImage(r_img, zoom=0.08, zorder=700)
+            veh_box = AnnotationBbox(oi, (history_org[-1, 0], history_org[-1, 1]), frameon=False)
+            veh_box.zorder = 700
+            ax.add_artist(veh_box)
+            i += 1
+        else:
+
+            for t in range(predictions.shape[1]):
+                sns.kdeplot(predictions[:, t, 0], predictions[:, t, 1],
+                            ax=ax, shade=True, shade_lowest=False,
+                            color='b', zorder=600, alpha=0.8)
+
+            # Current Node Position
+            circle = plt.Circle((history[-1, 0],
+                                 history[-1, 1]),
+                                node_circle_size,
+                                facecolor='g',
+                                edgecolor='k',
+                                lw=circle_edge_width,
+                                zorder=3)
+            ax.add_artist(circle)
+
+
+def plot_vehicle_nice_mv_robot(ax, predictions, dt, max_hl=10, ph=6, map=None, x_min=0, y_min=0):
+    prediction_dict, histories_dict, futures_dict = prediction_output_to_trajectories(predictions,
+                                                                                      dt,
+                                                                                      max_hl,
+                                                                                      ph,
+                                                                                      map=map)
+    assert (len(prediction_dict.keys()) <= 1)
+    if len(prediction_dict.keys()) == 0:
+        return
+    ts_key = list(prediction_dict.keys())[0]
+
+    prediction_dict = prediction_dict[ts_key]
+    histories_dict = histories_dict[ts_key]
+    futures_dict = futures_dict[ts_key]
+
+    if map is not None:
+        ax.imshow(map.fdata, origin='lower', alpha=0.5)
+
+    cmap = ['k', 'b', 'y', 'g', 'r']
+    line_alpha = 0.7
+    line_width = 0.2
+    edge_width = 2
+    circle_edge_width = 0.5
+    node_circle_size = 0.3
+
+    node_list = sorted(histories_dict.keys(), key=lambda x: x.length)
+    for node in node_list:
+        h = node.get(ts_key, {'heading': ['value']})[0, 0]
+        history_org = histories_dict[node] + np.array([x_min, y_min]) + node.length / 2 * np.array(
+            [np.cos(h), np.sin(h)])
+        future = futures_dict[node] + np.array([x_min, y_min]) + node.length * np.array([np.cos(h), np.sin(h)])
+
+        ax.plot(future[:, 0],
+                future[:, 1],
+                '--o',
+                c='#F05F78',
+                linewidth=4,
+                markersize=3,
+                zorder=650,
+                path_effects=[pe.Stroke(linewidth=5, foreground='k'), pe.Normal()])
+
+        r_img = rotate(robot, node.get(ts_key, {'heading': ['value']})[0, 0] * 180 / np.pi, reshape=True)
+        oi = OffsetImage(r_img, zoom=0.08, zorder=700)
+        veh_box = AnnotationBbox(oi, (history_org[-1, 0], history_org[-1, 1]), frameon=False)
+        veh_box.zorder = 700
+        ax.add_artist(veh_box)
+
+
+def integrate(f, dx, F0=0.):
+    N = f.shape[0]
+    return F0 + np.hstack((np.zeros((N, 1)), cumtrapz(f, axis=1, dx=dx)))
--- a/code/notebooks/model_to_metric_nuScenes.py
+++ b/code/notebooks/model_to_metric_nuScenes.py
@ -0,0 +1,182 @@
+import sys
+sys.path.append('../../code')
+import os
+import pickle
+import json
+import argparse
+import torch
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from model.model_registrar import ModelRegistrar
+from model.dyn_stg import SpatioTemporalGraphCVAEModel
+import evaluation
+from utils import prediction_output_to_trajectories
+from scipy.interpolate import RectBivariateSpline
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--model", help="model full path", type=str)
+parser.add_argument("--checkpoint", help="model checkpoint to evaluate", type=int)
+parser.add_argument("--data", help="full path to data file", type=str)
+parser.add_argument("--output", help="full path to output csv file", type=str)
+parser.add_argument("--node_type", help="Node Type to evaluate", type=str)
+parser.add_argument("--prediction_horizon", nargs='+', help="prediction horizon", type=int, default=None)
+args = parser.parse_args()
+
+
+def compute_obs_violations(predicted_trajs, map):
+    obs_map = 1 - map.fdata[..., 0]
+
+    interp_obs_map = RectBivariateSpline(range(obs_map.shape[0]),
+                                         range(obs_map.shape[1]),
+                                         obs_map,
+                                         kx=1, ky=1)
+
+    old_shape = predicted_trajs.shape
+    pred_trajs_map = map.to_map_points(predicted_trajs.reshape((-1, 2)))
+
+    traj_obs_values = interp_obs_map(pred_trajs_map[:, 0], pred_trajs_map[:, 1], grid=False)
+    traj_obs_values = traj_obs_values.reshape((old_shape[0], old_shape[1]))
+    num_viol_trajs = np.sum(traj_obs_values.max(axis=1) > 0, dtype=float)
+
+    return num_viol_trajs
+
+def compute_heading_error(prediction_output_dict, dt, max_hl, ph, node_type_enum, kde=True, obs=False, map=None):
+
+    heading_error = list()
+
+    for t in prediction_output_dict.keys():
+        for node in prediction_output_dict[t].keys():
+            if node.type.name == 'VEHICLE':
+                gt_vel = node.get(t + ph - 1, {'velocity': ['x', 'y']})[0]
+                gt_heading = np.arctan2(gt_vel[1], gt_vel[0])
+                our_heading = np.arctan2(prediction_output_dict[t][node][..., -2, 1], prediction_output_dict[t][node][..., -2,  0])
+                he = np.mean(np.abs(gt_heading - our_heading)) % (2 * np.pi)
+                heading_error.append(he)
+
+    return heading_error
+
+
+def load_model(model_dir, env, ts=99):
+    model_registrar = ModelRegistrar(model_dir, 'cpu')
+    model_registrar.load_models(ts)
+    with open(os.path.join(model_dir, 'config.json'), 'r') as config_json:
+        hyperparams = json.load(config_json)
+
+    stg = SpatioTemporalGraphCVAEModel(model_registrar,
+                                       hyperparams,
+                                       None, 'cuda:0')
+    hyperparams['incl_robot_node'] = False
+
+    stg.set_scene_graph(env)
+    stg.set_annealing_params()
+    return stg, hyperparams
+
+
+if __name__ == "__main__":
+    with open(args.data, 'rb') as f:
+        env = pickle.load(f, encoding='latin1')
+    scenes = env.scenes
+
+    eval_stg, hyperparams = load_model(args.model, env, ts=args.checkpoint)
+
+    print("-- Preparing Node Graph")
+    for scene in tqdm(scenes):
+        scene.calculate_scene_graph(hyperparams['edge_radius'],
+                                    hyperparams['state'],
+                                    hyperparams['edge_addition_filter'],
+                                    hyperparams['edge_removal_filter'])
+
+    if args.prediction_horizon is None:
+        args.prediction_horizon = [hyperparams['prediction_horizon']]
+
+    for ph in args.prediction_horizon:
+        print(f"Prediction Horizon: {ph}")
+        max_hl = hyperparams['maximum_history_length']
+        node_type = env.NodeType[args.node_type]
+        print(f"Node Type: {node_type.name}")
+        print(f"Edge Radius: {hyperparams['edge_radius']}")
+
+        with torch.no_grad():
+            eval_ade_batch_errors = np.array([])
+            eval_fde_batch_errors = np.array([])
+            eval_kde_nll = np.array([])
+            eval_obs_viols = np.array([])
+            print("-- Evaluating Full")
+            for i, scene in enumerate(tqdm(scenes)):
+                for timestep in range(scene.timesteps):
+                    predictions = eval_stg.predict(scene,
+                                                   np.array([timestep]),
+                                                   ph,
+                                                   num_samples_z=2000,
+                                                   most_likely_z=False,
+                                                   min_future_timesteps=8)
+
+                    if not predictions:
+                        continue
+
+                    eval_error_dict = evaluation.compute_batch_statistics(predictions,
+                                                                          scene.dt,
+                                                                          node_type_enum=env.NodeType,
+                                                                          max_hl=max_hl,
+                                                                          ph=ph,
+                                                                          map=scene.map[node_type.name],
+                                                                          obs=True)
+
+                    eval_ade_batch_errors = np.hstack((eval_ade_batch_errors, eval_error_dict[node_type]['ade']))
+                    eval_fde_batch_errors = np.hstack((eval_fde_batch_errors, eval_error_dict[node_type]['fde']))
+                    eval_kde_nll = np.hstack((eval_kde_nll, eval_error_dict[node_type]['kde']))
+                    eval_obs_viols = np.hstack((eval_obs_viols, eval_error_dict[node_type]['obs_viols']))
+
+                    del predictions
+                    del eval_error_dict
+
+            print(f"Final Mean Displacement Error @{ph * scene.dt}s: {np.mean(eval_fde_batch_errors)}")
+            print(f"Road Violations @{ph * scene.dt}s: {100 * np.sum(eval_obs_viols) / (eval_obs_viols.shape[0] * 2000)}%")
+            pd.DataFrame({'error_value': eval_ade_batch_errors, 'error_type': 'ade', 'type': 'full', 'ph': ph}).to_csv(args.output + '_ade_full_' + str(ph)+'ph' + '.csv')
+            pd.DataFrame({'error_value': eval_fde_batch_errors, 'error_type': 'fde', 'type': 'full', 'ph': ph}).to_csv(args.output + '_fde_full' + str(ph)+'ph' + '.csv')
+            pd.DataFrame({'error_value': eval_kde_nll, 'error_type': 'kde', 'type': 'full', 'ph': ph}).to_csv(args.output + '_kde_full' + str(ph)+'ph' + '.csv')
+            pd.DataFrame({'error_value': eval_obs_viols, 'error_type': 'obs', 'type': 'full', 'ph': ph}).to_csv(args.output + '_obs_full' + str(ph)+'ph' + '.csv')
+
+            eval_ade_batch_errors = np.array([])
+            eval_fde_batch_errors = np.array([])
+            eval_heading_err = np.array([])
+            eval_obs_viols = np.array([])
+            print("-- Evaluating most likely Z and GMM")
+            for i, scene in enumerate(scenes):
+                print(f"---- Evaluating Scene {i+1}/{len(scenes)}")
+                for t in np.arange(0, scene.timesteps, 20):
+                    timesteps = np.arange(t, t+20)
+
+                    predictions = eval_stg.predict(scene,
+                                                   timesteps,
+                                                   ph,
+                                                   num_samples_z=1,
+                                                   most_likely_z=True,
+                                                   most_likely_gmm=True,
+                                                   min_future_timesteps=8)
+
+                    eval_error_dict = evaluation.compute_batch_statistics(predictions,
+                                                              scene.dt,
+                                                                          node_type_enum=env.NodeType,
+                                                              max_hl=max_hl,
+                                                              ph=ph,
+                                                              map=1 - scene.map[node_type.name].fdata[..., 0],
+                                                              kde=False)
+                    eval_ade_batch_errors = np.hstack((eval_ade_batch_errors, eval_error_dict[node_type]['ade']))
+                    eval_fde_batch_errors = np.hstack((eval_fde_batch_errors, eval_error_dict[node_type]['fde']))
+                    eval_obs_viols = np.hstack((eval_obs_viols, eval_error_dict[node_type]['obs_viols']))
+
+                    heading_error = compute_heading_error(predictions,
+                                                              scene.dt,
+                                                            node_type_enum=env.NodeType,
+                                                              max_hl=max_hl,
+                                                              ph=ph,
+                                                              map=1 - scene.map[node_type.name].fdata[..., 0],
+                                                              kde=False)
+                    eval_heading_err = np.hstack((eval_heading_err, heading_error))
+
+            print(f"Final Displacement Error @{ph * scene.dt}s: {np.mean(eval_fde_batch_errors)}")
+            pd.DataFrame({'error_value': eval_ade_batch_errors, 'error_type': 'ade', 'type': 'mm', 'ph': ph}).to_csv(args.output + '_ade_mm' + str(ph)+'ph' + '.csv')
+            pd.DataFrame({'error_value': eval_fde_batch_errors, 'error_type': 'fde', 'type': 'mm', 'ph': ph}).to_csv(args.output + '_fde_mm' + str(ph)+'ph' + '.csv')
+            pd.DataFrame({'error_value': eval_obs_viols, 'error_type': 'obs', 'type': 'mm', 'ph': ph}).to_csv( args.output + '_obs_mm' + str(ph)+'ph' + '.csv')
--- a/code/notebooks/run_eval.bash
+++ b/code/notebooks/run_eval.bash
@ -0,0 +1,6 @@
+#!/bin/bash
+python model_to_metric_nuScenes.py --model "../../data/nuScenes/models/full" --checkpoint 1 --data "../../data/processed/nuScenes_test.pkl" --output "./csv/full_veh" --node_type "VEHICLE" --prediction_horizon 2 4 6 8 > full_out.txt
+python model_to_metric_nuScenes.py --model "../../data/nuScenes/models/me_demo" --checkpoint 1 --data "../../data/processed/nuScenes_test.pkl" --output "./csv/me_veh" --node_type "VEHICLE" --prediction_horizon 2 4 6 8 > me_out.txt
+python model_to_metric_nuScenes.py --model "../../data/nuScenes/models/edge" --checkpoint 1 --data "../../data/processed/nuScenes_test.pkl" --output "./csv/edge_veh" --node_type "VEHICLE" --prediction_horizon 2 4 6 8  > edge_out.txt
+python model_to_metric_nuScenes.py --model "../../data/nuScenes/models/baseline" --checkpoint 1 --data "../../data/processed/nuScenes_test.pkl" --output "./csv/baseline_veh" --node_type "VEHICLE" --prediction_horizon 2 4 6 8  > baseline_out.txt
+python model_to_metric_nuScenes.py --model "../../data/nuScenes/models/full" --checkpoint 1 --data "../../data/processed/nuScenes_test.pkl" --output "./csv/full_ped" --node_type "PEDESTRIAN" --prediction_horizon 2 4 6 8  > full_out_ped.txt
--- a/code/train.py
+++ b/code/train.py
@ -0,0 +1,476 @@
+import torch
+from torch import nn, optim
+import numpy as np
+import os
+import time
+import psutil
+import pickle
+import json
+import random
+import argparse
+import pathlib
+import visualization
+import evaluation
+import matplotlib.pyplot as plt
+from model.dyn_stg import SpatioTemporalGraphCVAEModel
+from model.model_registrar import ModelRegistrar
+from model.model_utils import cyclical_lr
+from tensorboardX import SummaryWriter
+#torch.autograd.set_detect_anomaly(True) # TODO Remove for speed
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--conf", help="path to json config file for hyperparameters",
+                    type=str, default='config.json')
+parser.add_argument("--offline_scene_graph", help="whether to precompute the scene graphs offline, options are 'no' and 'yes'",
+                    type=str, default='yes')
+parser.add_argument("--dynamic_edges", help="whether to use dynamic edges or not, options are 'no' and 'yes'",
+                    type=str, default='yes')
+parser.add_argument("--edge_radius", help="the radius (in meters) within which two nodes will be connected by an edge",
+                    type=float, default=3.0)
+parser.add_argument("--edge_state_combine_method", help="the method to use for combining edges of the same type",
+                    type=str, default='sum')
+parser.add_argument("--edge_influence_combine_method", help="the method to use for combining edge influences",
+                    type=str, default='attention')
+parser.add_argument('--edge_addition_filter', nargs='+', help="what scaling to use for edges as they're created",
+                    type=float, default=[0.25, 0.5, 0.75, 1.0]) # We automatically pad left with 0.0
+parser.add_argument('--edge_removal_filter', nargs='+', help="what scaling to use for edges as they're removed",
+                    type=float, default=[1.0, 0.0]) # We automatically pad right with 0.0
+parser.add_argument('--incl_robot_node', help="whether to include a robot node in the graph or simply model all agents",
+                    action='store_true')
+parser.add_argument('--use_map_encoding', help="Whether to use map encoding or not",
+                    action='store_true')
+
+parser.add_argument("--data_dir", help="what dir to look in for data",
+                    type=str, default='../data/processed')
+parser.add_argument("--train_data_dict", help="what file to load for training data",
+                    type=str, default='nuScenes_train.pkl')
+parser.add_argument("--eval_data_dict", help="what file to load for evaluation data",
+                    type=str, default='nuScenes_val.pkl')
+parser.add_argument("--log_dir", help="what dir to save training information (i.e., saved models, logs, etc)",
+                    type=str, default='../data/nuScenes/logs')
+parser.add_argument("--log_tag", help="tag for the log folder",
+                    type=str, default='')
+
+parser.add_argument('--device', help='what device to perform training on',
+                    type=str, default='cuda:1')
+parser.add_argument("--eval_device", help="what device to use during evaluation",
+                    type=str, default=None)
+
+parser.add_argument("--num_iters", help="number of iterations to train for",
+                    type=int, default=2000)
+parser.add_argument('--batch_multiplier', help='how many minibatches to run per iteration of training',
+                    type=int, default=1)
+parser.add_argument('--batch_size', help='training batch size',
+                    type=int, default=256)
+parser.add_argument('--eval_batch_size', help='evaluation batch size',
+                    type=int, default=256)
+parser.add_argument('--k_eval', help='how many samples to take during evaluation',
+                    type=int, default=50)
+
+parser.add_argument('--seed', help='manual seed to use, default is 123',
+                    type=int, default=123)
+parser.add_argument('--eval_every', help='how often to evaluate during training, never if None',
+                    type=int, default=50)
+parser.add_argument('--vis_every', help='how often to visualize during training, never if None',
+                    type=int, default=50)
+parser.add_argument('--save_every', help='how often to save during training, never if None',
+                    type=int, default=100)
+args = parser.parse_args()
+
+if not torch.cuda.is_available() or args.device == 'cpu':
+    args.device = torch.device('cpu')
+else:
+    if torch.cuda.device_count() == 1:
+        # If you have CUDA_VISIBLE_DEVICES set, which you should,
+        # then this will prevent leftover flag arguments from
+        # messing with the device allocation.
+        args.device = 'cuda:0'
+
+    args.device = torch.device(args.device)
+
+if args.eval_device is None:
+    args.eval_device = 'cpu'
+
+if args.seed is not None:
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(args.seed)
+
+def main():
+    # Load hyperparameters from json
+    if not os.path.exists(args.conf):
+        print('Config json not found!')
+    with open(args.conf, 'r') as conf_json:
+        hyperparams = json.load(conf_json)
+
+    # Add hyperparams from arguments
+    hyperparams['dynamic_edges'] = args.dynamic_edges
+    hyperparams['edge_state_combine_method'] = args.edge_state_combine_method
+    hyperparams['edge_influence_combine_method'] = args.edge_influence_combine_method
+    hyperparams['edge_radius'] = args.edge_radius
+    hyperparams['use_map_encoding'] = args.use_map_encoding
+    hyperparams['edge_addition_filter'] = args.edge_addition_filter
+    hyperparams['edge_removal_filter'] = args.edge_removal_filter
+    hyperparams['batch_size'] = args.batch_size
+    hyperparams['k_eval'] = args.k_eval
+    hyperparams['offline_scene_graph'] = args.offline_scene_graph
+    hyperparams['incl_robot_node'] = args.incl_robot_node
+
+    print('-----------------------')
+    print('| TRAINING PARAMETERS |')
+    print('-----------------------')
+    print('| iterations: %d' % args.num_iters)
+    print('| batch_size: %d' % args.batch_size)
+    print('| batch_multiplier: %d' % args.batch_multiplier)
+    print('| effective batch size: %d (= %d * %d)' % (args.batch_size * args.batch_multiplier, args.batch_size, args.batch_multiplier))
+    print('| device: %s' % args.device)
+    print('| eval_device: %s' % args.eval_device)
+    print('| Offline Scene Graph Calculation: %s' % args.offline_scene_graph)
+    print('| edge_radius: %s' % args.edge_radius)
+    print('| EE state_combine_method: %s' % args.edge_state_combine_method)
+    print('| EIE scheme: %s' % args.edge_influence_combine_method)
+    print('| dynamic_edges: %s' % args.dynamic_edges)
+    print('| robot node: %s' % args.incl_robot_node)
+    print('| map encoding: %s' % args.use_map_encoding)
+    print('| edge_addition_filter: %s' % args.edge_addition_filter)
+    print('| edge_removal_filter: %s' % args.edge_removal_filter)
+    print('| MHL: %s' % hyperparams['minimum_history_length'])
+    print('| PH: %s' % hyperparams['prediction_horizon'])
+    print('-----------------------')
+
+    # Create the log and model directiory if they're not present.
+    model_dir = os.path.join(args.log_dir, 'models_' + time.strftime('%d_%b_%Y_%H_%M_%S', time.localtime()) + args.log_tag)
+    pathlib.Path(model_dir).mkdir(parents=True, exist_ok=True)
+
+    # Save config to model directory
+    with open(os.path.join(model_dir, 'config.json'), 'w') as conf_json:
+        json.dump(hyperparams, conf_json)
+
+    log_writer = SummaryWriter(log_dir=model_dir)
+
+    train_scenes = []
+    train_data_path = os.path.join(args.data_dir, args.train_data_dict)
+    with open(train_data_path, 'rb') as f:
+        train_env = pickle.load(f, encoding='latin1')
+    train_scenes = train_env.scenes
+    print('Loaded training data from %s' % (train_data_path,))
+
+    eval_scenes = []
+    if args.eval_every is not None:
+        eval_data_path = os.path.join(args.data_dir, args.eval_data_dict)
+        with open(eval_data_path, 'rb') as f:
+            eval_env = pickle.load(f, encoding='latin1')
+        eval_scenes = eval_env.scenes
+        print('Loaded evaluation data from %s' % (eval_data_path, ))
+
+    # Calculate Scene Graph
+    if hyperparams['offline_scene_graph'] == 'yes':
+        print(f"Offline calculating scene graphs")
+        for i, scene in enumerate(train_scenes):
+            scene.calculate_scene_graph(train_env.attention_radius,
+                                        hyperparams['state'],
+                                        hyperparams['edge_addition_filter'],
+                                        hyperparams['edge_removal_filter'])
+            print(f"Created Scene Graph for Scene {i}")
+
+        for i, scene in enumerate(eval_scenes):
+            scene.calculate_scene_graph(eval_env.attention_radius,
+                                        hyperparams['state'],
+                                        hyperparams['edge_addition_filter'],
+                                        hyperparams['edge_removal_filter'])
+            print(f"Created Scene Graph for Scene {i}")
+
+    model_registrar = ModelRegistrar(model_dir, args.device)
+
+    # We use pre trained weights for the map CNN
+    if args.use_map_encoding:
+        inf_encoder_registrar = os.path.join(args.log_dir, 'weight_trans/model_registrar-1499.pt')
+        model_dict = torch.load(inf_encoder_registrar, map_location=args.device)
+
+        for key in model_dict.keys():
+            if 'map_encoder' in key:
+                model_registrar.model_dict[key] = model_dict[key]
+                assert model_registrar.get_model(key) is model_dict[key]
+
+    stg = SpatioTemporalGraphCVAEModel(model_registrar,
+                                       hyperparams,
+                                       log_writer, args.device)
+    stg.set_scene_graph(train_env)
+    stg.set_annealing_params()
+    print('Created training STG model.')
+
+    eval_stg = None
+    if args.eval_every is not None or args.vis_ervery is not None:
+        eval_stg = SpatioTemporalGraphCVAEModel(model_registrar,
+                                                hyperparams,
+                                                log_writer, args.device)
+        eval_stg.set_scene_graph(eval_env)
+        eval_stg.set_annealing_params() # TODO Check if necessary
+    if hyperparams['learning_rate_style'] == 'const':
+        optimizer = optim.Adam(model_registrar.parameters(), lr=hyperparams['learning_rate'])
+        lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=1.0)
+    elif hyperparams['learning_rate_style'] == 'exp':
+        optimizer = optim.Adam(model_registrar.parameters(), lr=hyperparams['learning_rate'])
+        lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=hyperparams['learning_decay_rate'])
+    elif hyperparams['learning_rate_style'] == 'triangle':
+        optimizer = optim.Adam(model_registrar.parameters(), lr=1.0)
+        clr = cyclical_lr(100, min_lr=hyperparams['min_learning_rate'], max_lr=hyperparams['learning_rate'], decay=hyperparams['learning_decay_rate'])
+        lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, [clr])
+
+    print_training_header(newline_start=True)
+    for curr_iter in range(args.num_iters):
+        # Necessary because we flip the weights contained between GPU and CPU sometimes.
+        model_registrar.to(args.device)
+
+        # Setting the current iterator value for internal logging.
+        stg.set_curr_iter(curr_iter)
+        if args.vis_every is not None:
+            eval_stg.set_curr_iter(curr_iter)
+
+        # Stepping forward the learning rate scheduler and annealers.
+        lr_scheduler.step()
+        log_writer.add_scalar('train/learning_rate',
+                              lr_scheduler.get_lr()[0],
+                              curr_iter)
+        stg.step_annealers()
+
+        # Zeroing gradients for the upcoming iteration.
+        optimizer.zero_grad()
+        train_losses = dict()
+        for node_type in train_env.NodeType:
+            train_losses[node_type] = []
+        for scene in np.random.choice(train_scenes, 10):
+            for mb_num in range(args.batch_multiplier):
+                # Obtaining the batch's training loss.
+                timesteps = scene.sample_timesteps(hyperparams['batch_size'])
+
+                # Compute the training loss.
+                train_loss_by_type = stg.train_loss(scene, timesteps, max_nodes=hyperparams['batch_size'])
+                for node_type, train_loss in train_loss_by_type.items():
+                    if train_loss is not None:
+                        train_loss = train_loss / (args.batch_multiplier * 10)
+                        train_losses[node_type].append(train_loss.item())
+
+                        # Calculating gradients.
+                        train_loss.backward()
+
+        # Print training information. Also, no newline here. It's added in at a later line.
+        print('{:9} | '.format(curr_iter), end='', flush=True)
+        for node_type in train_env.NodeType:
+            print('{}:{:10} | '.format(node_type.name[0], '%.2f' % sum(train_losses[node_type])), end='', flush=True)
+
+        for node_type in train_env.NodeType:
+            if len(train_losses[node_type]) > 0:
+                log_writer.add_histogram(f"{node_type.name}/train/minibatch_losses", np.asarray(train_losses[node_type]), curr_iter)
+                log_writer.add_scalar(f"{node_type.name}/train/loss", sum(train_losses[node_type]), curr_iter)
+
+        # Clipping gradients.
+        if hyperparams['grad_clip'] is not None:
+            nn.utils.clip_grad_value_(model_registrar.parameters(), hyperparams['grad_clip'])
+
+        # Performing a gradient step.
+        optimizer.step()
+
+        del train_loss  # TODO Necessary?
+
+        if args.vis_every is not None and (curr_iter + 1) % args.vis_every == 0:
+            max_hl = hyperparams['maximum_history_length']
+            ph = hyperparams['prediction_horizon']
+            with torch.no_grad():
+                # Predict random timestep to plot for train data set
+                scene = np.random.choice(train_scenes)
+                timestep = scene.sample_timesteps(1, min_future_timesteps=ph)
+                predictions = stg.predict(scene,
+                                          timestep,
+                                          ph,
+                                          num_samples_z=100,
+                                          most_likely_z=False,
+                                          all_z=False)
+
+                # Plot predicted timestep for random scene
+                fig, ax = plt.subplots(figsize=(5, 5))
+                visualization.visualize_prediction(ax,
+                                                   predictions,
+                                                   scene.dt,
+                                                   max_hl=max_hl,
+                                                   ph=ph)
+                ax.set_title(f"{scene.name}-t: {timestep}")
+                log_writer.add_figure('train/prediction', fig, curr_iter)
+
+                # Predict random timestep to plot for eval data set
+                scene = np.random.choice(eval_scenes)
+                timestep = scene.sample_timesteps(1, min_future_timesteps=ph)
+                predictions = eval_stg.predict(scene,
+                                               timestep,
+                                               ph,
+                                               num_samples_z=100,
+                                               most_likely_z=False,
+                                               all_z=False,
+                                               max_nodes=4 * args.eval_batch_size)
+
+                # Plot predicted timestep for random scene
+                fig, ax = plt.subplots(figsize=(5, 5))
+                visualization.visualize_prediction(ax,
+                                                   predictions,
+                                                   scene.dt,
+                                                   max_hl=max_hl,
+                                                   ph=ph)
+                ax.set_title(f"{scene.name}-t: {timestep}")
+                log_writer.add_figure('eval/prediction', fig, curr_iter)
+
+                # Plot predicted timestep for random scene in map
+                fig, ax = plt.subplots(figsize=(15, 15))
+                visualization.visualize_prediction(ax,
+                                                   predictions,
+                                                   scene.dt,
+                                                   max_hl=max_hl,
+                                                   ph=ph,
+                                                   map=scene.map['PLOT'])
+                ax.set_title(f"{scene.name}-t: {timestep}")
+                log_writer.add_figure('eval/prediction_map', fig, curr_iter)
+
+                # Predict random timestep to plot for eval data set
+                predictions = eval_stg.predict(scene,
+                                               timestep,
+                                               ph,
+                                               num_samples_gmm=50,
+                                               most_likely_z=False,
+                                               all_z=True,
+                                               max_nodes=4 * args.eval_batch_size)
+
+                # Plot predicted timestep for random scene
+                fig, ax = plt.subplots(figsize=(5, 5))
+                visualization.visualize_prediction(ax,
+                                                   predictions,
+                                                   scene.dt,
+                                                   max_hl=max_hl,
+                                                   ph=ph)
+                ax.set_title(f"{scene.name}-t: {timestep}")
+                log_writer.add_figure('eval/prediction_all_z', fig, curr_iter)
+
+        if args.eval_every is not None and (curr_iter + 1) % args.eval_every == 0:
+            max_hl = hyperparams['maximum_history_length']
+            ph = hyperparams['prediction_horizon']
+            with torch.no_grad():
+                # Predict batch timesteps for training dataset evaluation
+                train_batch_errors = []
+                max_scenes = np.min([len(train_scenes), 5])
+                for scene in np.random.choice(train_scenes, max_scenes):
+                    timesteps = scene.sample_timesteps(args.eval_batch_size)
+                    predictions = stg.predict(scene,
+                                              timesteps,
+                                              ph,
+                                              num_samples_z=100,
+                                              min_future_timesteps=ph,
+                                              max_nodes=4*args.eval_batch_size)
+
+                    train_batch_errors.append(evaluation.compute_batch_statistics(predictions,
+                                                                                  scene.dt,
+                                                                                  max_hl=max_hl,
+                                                                                  ph=ph,
+                                                                                  node_type_enum=train_env.NodeType,
+                                                                                  map=scene.map))
+
+                evaluation.log_batch_errors(train_batch_errors,
+                                            log_writer,
+                                            'train',
+                                            curr_iter,
+                                            bar_plot=['kde'],
+                                            box_plot=['ade', 'fde'])
+
+                # Predict batch timesteps for evaluation dataset evaluation
+                eval_batch_errors = []
+                for scene in eval_scenes:
+                    timesteps = scene.sample_timesteps(args.eval_batch_size)
+
+                    predictions = eval_stg.predict(scene,
+                                                   timesteps,
+                                                   ph,
+                                                   num_samples_z=100,
+                                                   min_future_timesteps=ph,
+                                                   max_nodes=4 * args.eval_batch_size)
+
+                    eval_batch_errors.append(evaluation.compute_batch_statistics(predictions,
+                                                                                 scene.dt,
+                                                                                 max_hl=max_hl,
+                                                                                 ph=ph,
+                                                                                 node_type_enum=eval_env.NodeType,
+                                                                                 map=scene.map))
+
+                evaluation.log_batch_errors(eval_batch_errors,
+                                            log_writer,
+                                            'eval',
+                                            curr_iter,
+                                            bar_plot=['kde'],
+                                            box_plot=['ade', 'fde'])
+
+
+                # Predict maximum likelihood batch timesteps for evaluation dataset evaluation
+                eval_batch_errors_ml = []
+                for scene in eval_scenes:
+                    timesteps = scene.sample_timesteps(scene.timesteps)
+
+                    predictions = eval_stg.predict(scene,
+                                                   timesteps,
+                                                   ph,
+                                                   num_samples_z=1,
+                                                   min_future_timesteps=ph,
+                                                   most_likely_z=True,
+                                                   most_likely_gmm=True)
+
+                    eval_batch_errors_ml.append(evaluation.compute_batch_statistics(predictions,
+                                                              scene.dt,
+                                                              max_hl=max_hl,
+                                                              ph=ph,
+                                                              map=scene.map,
+                                                              node_type_enum=eval_env.NodeType,
+                                                              kde=False))
+
+                evaluation.log_batch_errors(eval_batch_errors_ml,
+                                            log_writer,
+                                            'eval/ml',
+                                            curr_iter)
+
+                eval_loss = []
+                max_scenes = np.min([len(eval_scenes), 25])
+                for scene in np.random.choice(eval_scenes, max_scenes):
+                    eval_loss.append(eval_stg.eval_loss(scene, timesteps))
+
+                evaluation.log_batch_errors(eval_loss,
+                                            log_writer,
+                                            'eval/loss',
+                                            curr_iter)
+
+
+        else:
+            print('{:15} | {:10} | {:14}'.format('', '', ''),
+                  end='', flush=True)
+
+        # Here's the newline that ends the current training information printing.
+        print('')
+
+        if args.save_every is not None and (curr_iter + 1) % args.save_every == 0:
+            model_registrar.save_models(curr_iter)
+            print_training_header()
+
+
+def print_training_header(newline_start=False):
+    if newline_start:
+        print('')
+
+    print('Iteration | Train Loss | Eval NLL Q (IS) | Eval NLL P | Eval NLL Exact')
+    print('----------------------------------------------------------------------')
+
+
+def memInUse():
+    pid = os.getpid()
+    py = psutil.Process(pid)
+    memoryUse = py.memory_info()[0] / 2. ** 30  # memory use in GB...I think
+    print('memory GB:', memoryUse)
+
+
+if __name__ == '__main__':
+    main()
--- a/code/utils/init.py
+++ b/code/utils/init.py
@ -0,0 +1 @@
+from .trajectory_utils import integrate_trajectory, prediction_output_to_trajectories
--- a/code/utils/trajectory_utils.py
+++ b/code/utils/trajectory_utils.py
@ -0,0 +1,72 @@
+import numpy as np
+from scipy.integrate import cumtrapz
+
+
+def integrate(f, dx, F0=0.):
+    N = f.shape[0]
+    return F0 + np.hstack((np.zeros((N, 1)), cumtrapz(f, axis=1, dx=dx)))
+
+
+def integrate_trajectory(v, x0, dt):
+    xd_ = integrate(v[..., 0], dx=dt, F0=x0[0])
+    yd_ = integrate(v[..., 1], dx=dt, F0=x0[1])
+    integrated = np.stack([xd_, yd_], axis=2)
+    return integrated
+
+
+def prediction_output_to_trajectories(prediction_output_dict,
+                                      dt,
+                                      max_h,
+                                      ph,
+                                      map=None,
+                                      gmm_agg='mean',
+                                      prune_ph_to_future=False):
+
+    prediction_timesteps = prediction_output_dict.keys()
+
+    output_dict = dict()
+    histories_dict = dict()
+    futures_dict = dict()
+
+    for t in prediction_timesteps:
+        histories_dict[t] = dict()
+        output_dict[t] = dict()
+        futures_dict[t] = dict()
+        prediction_nodes = prediction_output_dict[t].keys()
+        for node in prediction_nodes:
+            predictions_output = prediction_output_dict[t][node]
+            position_state = {'position': ['x', 'y']}
+            velocity_state = {'velocity': ['x', 'y']}
+            acceleration_state = {'acceleration': ['m']}
+            history = node.get(np.array([t - max_h, t]), position_state)  # History includes current pos
+            history = history[~np.isnan(history.sum(axis=1))]
+
+            future = node.get(np.array([t + 1, t + ph]), position_state)
+            future = future[~np.isnan(future.sum(axis=1))]
+
+            current_pos = node.get(t, position_state)[0]  # List with single item
+            current_vel = node.get(t, velocity_state)[0]  # List with single item
+
+            predictions_output = getattr(predictions_output, gmm_agg)(axis=1)
+
+            if prune_ph_to_future:
+                predictions_output = predictions_output[:, :future.shape[0]]
+                if predictions_output.shape[1] == 0:
+                    continue
+
+            vel_broad = np.expand_dims(np.broadcast_to(current_vel,
+                                                       (predictions_output.shape[0],
+                                                        current_vel.shape[-1])), axis=-2)
+            vel = np.concatenate((vel_broad, predictions_output), axis=1)
+            trajectory = integrate_trajectory(vel, current_pos, dt=dt)[:, 1:]
+
+            if map is None:
+                histories_dict[t][node] = history
+                output_dict[t][node] = trajectory
+                futures_dict[t][node] = future
+            else:
+                histories_dict[t][node] = map.to_map_points(history)
+                output_dict[t][node] = map.to_map_points(trajectory)
+                futures_dict[t][node] = map.to_map_points(future)
+
+    return output_dict, histories_dict, futures_dict
--- a/code/visualization/init.py
+++ b/code/visualization/init.py
@ -0,0 +1,2 @@
+from .visualization import visualize_prediction
+from .visualization_utils import plot_boxplots
--- a/code/visualization/visualization.py
+++ b/code/visualization/visualization.py
@ -0,0 +1,101 @@
+from utils import prediction_output_to_trajectories
+import matplotlib.pyplot as plt
+import matplotlib.patheffects as pe
+
+
+def plot_trajectories(ax,
+                      prediction_dict,
+                      histories_dict,
+                      futures_dict,
+                      line_alpha=0.7,
+                      line_width=0.2,
+                      edge_width=2,
+                      circle_edge_width=0.5,
+                      node_circle_size=0.3):
+
+    cmap = ['k', 'b', 'y', 'g', 'r']
+
+    for node in histories_dict:
+        history = histories_dict[node]
+        future = futures_dict[node]
+        predictions = prediction_dict[node]
+
+        ax.plot(history[:, 1], history[:, 0], 'k--')
+
+        for sample_num in range(prediction_dict[node].shape[0]):
+            ax.plot(predictions[sample_num, :, 1], predictions[sample_num, :, 0],
+                    color=cmap[node.type.value],
+                    linewidth=line_width, alpha=line_alpha)
+
+            ax.plot(future[:, 1],
+                    future[:, 0],
+                    'w--',
+                    path_effects=[pe.Stroke(linewidth=edge_width, foreground='k'), pe.Normal()])
+
+            # Current Node Position
+            circle = plt.Circle((history[-1, 1],
+                                 history[-1, 0]),
+                                node_circle_size,
+                                facecolor='g',
+                                edgecolor='k',
+                                lw=circle_edge_width,
+                                zorder=3)
+            ax.add_artist(circle)
+
+    # Robot Node # TODO Robot Node
+    # if robot_node is not None:
+    #     prefix_earliest_idx = max(0, t_predict - predict_horizon)
+    #     robot_prefix = inputs[robot_node][0, prefix_earliest_idx : t_predict + 1, 0:2].cpu().numpy()
+    #     robot_future = inputs[robot_node][0, t_predict + 1 : min(t_predict + predict_horizon + 1, traj_length), 0:2].cpu().numpy()
+    #
+    #     prefix_all_zeros = not np.any(robot_prefix)
+    #     future_all_zeros = not np.any(robot_future)
+    #     if not (prefix_all_zeros and future_all_zeros):
+    #         ax.plot(robot_prefix[:, 0], robot_prefix[:, 1], 'k--')
+    #         ax.plot(robot_future[:, 0], robot_future[:, 1], 'w--',
+    #                 path_effects=[pe.Stroke(linewidth=edge_width, foreground='k'), pe.Normal()])
+    #
+    #         circle = plt.Circle((robot_prefix[-1, 0],
+    #                              robot_prefix[-1, 1]),
+    #                             node_circle_size,
+    #                             facecolor='g',
+    #                             edgecolor='k',
+    #                             lw=circle_edge_width,
+    #                             zorder=3)
+    #         ax.add_artist(circle)
+    #
+    #         # Radius of influence
+    #         if robot_circle:
+    #             circle = plt.Circle((robot_prefix[-1, 0], robot_prefix[-1, 1]), test_stg.hyperparams['edge_radius'],
+    #                                 fill=False, color='r', linestyle='--', zorder=3)
+    #             ax.plot([], [], 'r--', label='Edge Radius')
+    #             ax.add_artist(circle)
+
+
+def visualize_prediction(ax,
+                         prediction_output_dict,
+                         dt,
+                         max_hl,
+                         ph,
+                         robot_node=None,
+                         map=None,
+                         **kwargs):
+
+    prediction_dict, histories_dict, futures_dict = prediction_output_to_trajectories(prediction_output_dict,
+                                                                                      dt,
+                                                                                      max_hl,
+                                                                                      ph,
+                                                                                      map=map)
+
+    assert(len(prediction_dict.keys()) <= 1)
+    if len(prediction_dict.keys()) == 0:
+        return
+    ts_key = list(prediction_dict.keys())[0]
+
+    prediction_dict = prediction_dict[ts_key]
+    histories_dict = histories_dict[ts_key]
+    futures_dict = futures_dict[ts_key]
+
+    if map is not None:
+        ax.imshow(map.fdata, origin='lower', alpha=0.5)
+    plot_trajectories(ax, prediction_dict, histories_dict, futures_dict, *kwargs)
--- a/code/visualization/visualization_utils.py
+++ b/code/visualization/visualization_utils.py
@ -0,0 +1,20 @@
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+
+def plot_boxplots(ax, perf_dict_for_pd, x_label, y_label):
+    perf_df = pd.DataFrame.from_dict(perf_dict_for_pd)
+    our_mean_color = sns.color_palette("muted")[9]
+    marker_size = 7
+    mean_markers = 'X'
+    with sns.color_palette("muted"):
+        sns.boxplot(x=x_label, y=y_label, data=perf_df, ax=ax, showfliers=False)
+        ax.plot([0], [np.mean(perf_df[y_label])], color=our_mean_color, marker=mean_markers,
+                markeredgecolor='#545454', markersize=marker_size, zorder=10)
+
+
+def plot_barplots(ax, perf_dict_for_pd, x_label, y_label):
+    perf_df = pd.DataFrame.from_dict(perf_dict_for_pd)
+    with sns.color_palette("muted"):
+        sns.barplot(x=x_label, y=y_label, ax=ax, data=perf_df)
--- a/data/nuScenes/download_file.py
+++ b/data/nuScenes/download_file.py
@ -0,0 +1,11 @@
+#!/usr/bin/python
+
+import sys
+import urllib.request
+
+def main():
+    # print command line arguments
+    urllib.request.urlretrieve(sys.argv[1], sys.argv[2])
+
+if __name__ == "__main__":
+    main()
--- a/data/nuScenes/kalman_filter.py
+++ b/data/nuScenes/kalman_filter.py
@ -0,0 +1,191 @@
+import numpy as np
+
+
+class LinearPointMass:
+    """Linear Kalman Filter for an autonomous point mass system, assuming constant velocity"""
+
+    def __init__(self, dt, sPos=None, sVel=None, sMeasurement=None):
+        """
+        input matrices must be numpy arrays
+        :param A: state transition matrix
+        :param B: state control matrix
+        :param C: measurement matrix
+        :param Q: covariance of the Gaussian error in state transition
+        :param R: covariance of the Gaussain error in measurement
+        """
+        self.dt = dt
+
+        # matrices of state transition and measurement
+        self.A = np.array([[1, dt, 0, 0], [0, 1, 0, 0], [0, 0, 1, dt], [0, 0, 0, 1]])
+        self.B = np.array([[0, 0], [dt, 0], [0, 0], [0, dt]])
+        self.C = np.array([[1, 0, 0, 0], [0, 0, 1, 0]])
+
+        # default noise covariance
+        if (sPos is None) and (sVel is None) and (sMeasurement is None):
+            # sPos = 0.5 * 5 * dt ** 2  # assume 5m/s2 as maximum acceleration
+            # sVel = 5.0 * dt  # assume 8.8m/s2 as maximum acceleration
+            sPos = 1.3*self.dt   # assume 5m/s2 as maximum acceleration
+            sVel = 4*self.dt  # assume 8.8m/s2 as maximum acceleration
+            sMeasurement = 0.2 # 68% of the measurement is within [-sMeasurement, sMeasurement]
+
+        # state transition noise
+        self.Q = np.diag([sPos ** 2, sVel ** 2, sPos ** 2, sVel ** 2])
+        # measurement noise
+        self.R = np.diag([sMeasurement ** 2, sMeasurement ** 2])
+
+    def predict_and_update(self, x_vec_est, u_vec, P_matrix, z_new):
+        """
+        for background please refer to wikipedia: https://en.wikipedia.org/wiki/Kalman_filter
+        :param x_vec_est:
+        :param u_vec:
+        :param P_matrix:
+        :param z_new:
+        :return:
+        """
+
+        ## Prediction Step
+        # predicted state estimate
+        x_pred = self.A.dot(x_vec_est) + self.B.dot(u_vec)
+        # predicted error covariance
+        P_pred = self.A.dot(P_matrix.dot(self.A.transpose())) + self.Q
+
+        ## Update Step
+        # innovation or measurement pre-fit residual
+        y_telda = z_new - self.C.dot(x_pred)
+        # innovation covariance
+        S = self.C.dot(P_pred.dot(self.C.transpose())) + self.R
+        # optimal Kalman gain
+        K = P_pred.dot(self.C.transpose().dot(np.linalg.inv(S)))
+        # updated (a posteriori) state estimate
+        x_vec_est_new = x_pred + K.dot(y_telda)
+        # updated (a posteriori) estimate covariance
+        P_matrix_new = np.dot((np.identity(4) - K.dot(self.C)), P_pred)
+
+        return x_vec_est_new, P_matrix_new
+
+
+class NonlinearKinematicBicycle:
+    """
+    Nonlinear Kalman Filter for a kinematic bicycle model, assuming constant longitudinal speed
+    and constant heading angle
+    """
+
+    def __init__(self, lf, lr, dt, sPos=None, sHeading=None, sVel=None, sMeasurement=None):
+        self.dt = dt
+
+        # params for state transition
+        self.lf = lf
+        self.lr = lr
+        # measurement matrix
+        self.C = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
+
+        # default noise covariance
+        if (sPos is None) and (sHeading is None) and (sVel is None) and (sMeasurement is None):
+            # TODO need to further check
+            # sPos = 0.5 * 8.8 * dt ** 2  # assume 8.8m/s2 as maximum acceleration
+            # sHeading = 0.5 * dt  # assume 0.5rad/s as maximum turn rate
+            # sVel = 8.8 * dt  # assume 8.8m/s2 as maximum acceleration
+            # sMeasurement = 1.0
+            sPos = 16 * self.dt  # assume 8.8m/s2 as maximum acceleration
+            sHeading = np.pi/2 * self.dt  # assume 0.5rad/s as maximum turn rate
+            sVel = 8 * self.dt  # assume 8.8m/s2 as maximum acceleration
+            sMeasurement = 0.8
+        # state transition noise
+        self.Q = np.diag([sPos ** 2, sPos ** 2, sHeading ** 2, sVel ** 2])
+        # measurement noise
+        self.R = np.diag([sMeasurement ** 2, sMeasurement ** 2, sMeasurement ** 2, sMeasurement ** 2])
+
+    def predict_and_update(self, x_vec_est, u_vec, P_matrix, z_new):
+        """
+        for background please refer to wikipedia: https://en.wikipedia.org/wiki/Extended_Kalman_filter
+        :param x_vec_est:
+        :param u_vec:
+        :param P_matrix:
+        :param z_new:
+        :return:
+        """
+
+        ## Prediction Step
+        # predicted state estimate
+        x_pred = self._kinematic_bicycle_model_rearCG(x_vec_est, u_vec)
+        # Compute Jacobian to obtain the state transition matrix
+        A = self._cal_state_Jacobian(x_vec_est, u_vec)
+        # predicted error covariance
+        P_pred = A.dot(P_matrix.dot(A.transpose())) + self.Q
+
+        ## Update Step
+        # innovation or measurement pre-fit residual
+        y_telda = z_new - self.C.dot(x_pred)
+        # innovation covariance
+        S = self.C.dot(P_pred.dot(self.C.transpose())) + self.R
+        # near-optimal Kalman gain
+        K = P_pred.dot(self.C.transpose().dot(np.linalg.inv(S)))
+        # updated (a posteriori) state estimate
+        x_vec_est_new = x_pred + K.dot(y_telda)
+        # updated (a posteriori) estimate covariance
+        P_matrix_new = np.dot((np.identity(4) - K.dot(self.C)), P_pred)
+
+        return x_vec_est_new, P_matrix_new
+
+    def _kinematic_bicycle_model_rearCG(self, x_old, u):
+        """
+        :param x: vehicle state vector = [x position, y position, heading, velocity]
+        :param u: control vector = [acceleration, steering angle]
+        :param dt:
+        :return:
+        """
+
+        acc = u[0]
+        delta = u[1]
+
+        x = x_old[0]
+        y = x_old[1]
+        psi = x_old[2]
+        vel = x_old[3]
+
+        x_new = np.array([[0.], [0.], [0.], [0.]])
+
+        beta = np.arctan(self.lr * np.tan(delta) / (self.lf + self.lr))
+
+        x_new[0] = x + self.dt * vel * np.cos(psi + beta)
+        x_new[1] = y + self.dt * vel * np.sin(psi + beta)
+        x_new[2] = psi + self.dt * vel * np.cos(beta) / (self.lf + self.lr) * np.tan(delta)
+        #x_new[2] = _heading_angle_correction(x_new[2])
+        x_new[3] = vel + self.dt * acc
+
+        return x_new
+
+    def _cal_state_Jacobian(self, x_vec, u_vec):
+        acc = u_vec[0]
+        delta = u_vec[1]
+
+        x = x_vec[0]
+        y = x_vec[1]
+        psi = x_vec[2]
+        vel = x_vec[3]
+
+        beta = np.arctan(self.lr * np.tan(delta) / (self.lf + self.lr))
+
+        a13 = -self.dt * vel * np.sin(psi + beta)
+        a14 = self.dt * np.cos(psi + beta)
+        a23 = self.dt * vel * np.cos(psi + beta)
+        a24 = self.dt * np.sin(psi + beta)
+        a34 = self.dt * np.cos(beta) / (self.lf + self.lr) * np.tan(delta)
+
+        JA = np.array([[1.0, 0.0, a13[0], a14[0]],
+                       [0.0, 1.0, a23[0], a24[0]],
+                       [0.0, 0.0, 1.0, a34[0]],
+                       [0.0, 0.0, 0.0, 1.0]])
+
+        return JA
+
+
+def _heading_angle_correction(theta):
+    """
+    correct heading angle so that it always remains in [-pi, pi]
+    :param theta:
+    :return:
+    """
+    theta_corrected = (theta + np.pi) % (2.0 * np.pi) - np.pi
+    return theta_corrected
+
--- a/data/nuScenes/models/baseline/config.json
+++ b/data/nuScenes/models/baseline/config.json
@ -0,0 +1 @@
+{"batch_size": 256, "grad_clip": 1.0, "learning_rate_style": "exp", "learning_rate": 0.003, "min_learning_rate": 0.0005, "learning_decay_rate": 0.9995, "prediction_horizon": 6, "minimum_history_length": 1, "maximum_history_length": 8, "map_context": 120, "map_enc_num_layers": 4, "map_enc_hidden_size": 512, "map_enc_output_size": 512, "map_enc_dropout": 0.3, "alpha": 1, "k": 30, "k_eval": 50, "use_iwae": false, "kl_exact": true, "kl_min": 0.07, "kl_weight": 5.0, "kl_weight_start": 0, "kl_decay_rate": 0.99995, "kl_crossover": 500, "kl_sigmoid_divisor": 4, "inf_warmup": 1.0, "inf_warmup_start": 1.0, "inf_warmup_crossover": 1500, "inf_warmup_sigmoid_divisor": 4, "rnn_kwargs": {"dropout_keep_prob": 0.7}, "MLP_dropout_keep_prob": 0.9, "rnn_io_dropout_keep_prob": 1.0, "enc_rnn_dim_multiple_inputs": 8, "enc_rnn_dim_edge": 8, "enc_rnn_dim_edge_influence": 8, "enc_rnn_dim_history": 32, "enc_rnn_dim_future": 32, "dec_rnn_dim": 512, "dec_GMM_proj_MLP_dims": null, "sample_model_during_dec": true, "dec_sample_model_prob_start": 1.0, "dec_sample_model_prob_final": 1.0, "dec_sample_model_prob_crossover": 200, "dec_sample_model_prob_divisor": 4, "q_z_xy_MLP_dims": null, "p_z_x_MLP_dims": 32, "fuzz_factor": 0.05, "GMM_components": 12, "log_sigma_min": -10, "log_sigma_max": 10, "log_p_yt_xz_max": 50, "N": 2, "K": 5, "tau_init": 2.0, "tau_final": 0.05, "tau_decay_rate": 0.997, "use_z_logit_clipping": true, "z_logit_clip_start": 0.05, "z_logit_clip_final": 5.0, "z_logit_clip_crossover": 500, "z_logit_clip_divisor": 5, "state": {"PEDESTRIAN": {"position": ["x", "y"], "velocity": ["x", "y"], "acceleration": ["x", "y"], "heading": ["value"]}, "BICYCLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}, "VEHICLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}}, "pred_state": {"PEDESTRIAN": {"velocity": ["x", "y"]}, "BICYCLE": {"velocity": ["x", "y"]}, "VEHICLE": {"velocity": ["x", "y"]}}, "log_histograms": false, "dynamic_edges": "yes", "edge_state_combine_method": "sum", "edge_influence_combine_method": "attention", "edge_radius": 0.0, "use_map_encoding": false, "edge_addition_filter": [0.25, 0.5, 0.75, 1.0], "edge_removal_filter": [1.0, 0.0], "offline_scene_graph": "yes"}
--- a/data/nuScenes/models/baseline/model_registrar-1.pt
+++ b/data/nuScenes/models/baseline/model_registrar-1.pt
--- a/data/nuScenes/models/edge/config.json
+++ b/data/nuScenes/models/edge/config.json
@ -0,0 +1 @@
+{"batch_size": 256, "grad_clip": 1.0, "learning_rate_style": "exp", "learning_rate": 0.003, "min_learning_rate": 0.0005, "learning_decay_rate": 0.9995, "prediction_horizon": 6, "minimum_history_length": 1, "maximum_history_length": 8, "map_context": 120, "map_enc_num_layers": 4, "map_enc_hidden_size": 512, "map_enc_output_size": 512, "map_enc_dropout": 0.15, "alpha": 1, "k": 30, "k_eval": 50, "use_iwae": false, "kl_exact": true, "kl_min": 0.07, "kl_weight": 5.0, "kl_weight_start": 0, "kl_decay_rate": 0.99995, "kl_crossover": 500, "kl_sigmoid_divisor": 4, "inf_warmup": 1.0, "inf_warmup_start": 1.0, "inf_warmup_crossover": 1500, "inf_warmup_sigmoid_divisor": 4, "rnn_kwargs": {"dropout_keep_prob": 0.7}, "MLP_dropout_keep_prob": 0.9, "rnn_io_dropout_keep_prob": 1.0, "enc_rnn_dim_multiple_inputs": 8, "enc_rnn_dim_edge": 8, "enc_rnn_dim_edge_influence": 8, "enc_rnn_dim_history": 32, "enc_rnn_dim_future": 32, "dec_rnn_dim": 512, "dec_GMM_proj_MLP_dims": null, "sample_model_during_dec": true, "dec_sample_model_prob_start": 1.0, "dec_sample_model_prob_final": 1.0, "dec_sample_model_prob_crossover": 200, "dec_sample_model_prob_divisor": 4, "q_z_xy_MLP_dims": null, "p_z_x_MLP_dims": 32, "fuzz_factor": 0.05, "GMM_components": 12, "log_sigma_min": -10, "log_sigma_max": 10, "log_p_yt_xz_max": 50, "N": 2, "K": 5, "tau_init": 2.0, "tau_final": 0.05, "tau_decay_rate": 0.997, "use_z_logit_clipping": true, "z_logit_clip_start": 0.05, "z_logit_clip_final": 5.0, "z_logit_clip_crossover": 500, "z_logit_clip_divisor": 5, "state": {"PEDESTRIAN": {"position": ["x", "y"], "velocity": ["x", "y"], "acceleration": ["x", "y"], "heading": ["value"]}, "BICYCLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}, "VEHICLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}}, "pred_state": {"PEDESTRIAN": {"velocity": ["x", "y"]}, "BICYCLE": {"velocity": ["x", "y"]}, "VEHICLE": {"velocity": ["x", "y"]}}, "log_histograms": false, "dynamic_edges": "yes", "edge_state_combine_method": "sum", "edge_influence_combine_method": "attention", "edge_radius": 20.0, "use_map_encoding": false, "edge_addition_filter": [0.25, 0.5, 0.75, 1.0], "edge_removal_filter": [1.0, 0.0], "offline_scene_graph": "yes"}
--- a/data/nuScenes/models/edge/model_registrar-1.pt
+++ b/data/nuScenes/models/edge/model_registrar-1.pt
--- a/data/nuScenes/models/full/config.json
+++ b/data/nuScenes/models/full/config.json
@ -0,0 +1 @@
+{"batch_size": 256, "grad_clip": 1.0, "learning_rate_style": "const", "learning_rate": 0.002, "min_learning_rate": 0.0005, "learning_decay_rate": 0.9995, "prediction_horizon": 6, "minimum_history_length": 1, "maximum_history_length": 8, "map_context": 120, "map_enc_num_layers": 4, "map_enc_hidden_size": 512, "map_enc_output_size": 512, "map_enc_dropout": 0.5, "alpha": 1, "k": 30, "k_eval": 50, "use_iwae": false, "kl_exact": true, "kl_min": 0.07, "kl_weight": 5.0, "kl_weight_start": 0, "kl_decay_rate": 0.99995, "kl_crossover": 500, "kl_sigmoid_divisor": 4, "inf_warmup": 1.0, "inf_warmup_start": 1.0, "inf_warmup_crossover": 1500, "inf_warmup_sigmoid_divisor": 4, "rnn_kwargs": {"dropout_keep_prob": 0.5}, "MLP_dropout_keep_prob": 0.9, "rnn_io_dropout_keep_prob": 1.0, "enc_rnn_dim_multiple_inputs": 8, "enc_rnn_dim_edge": 8, "enc_rnn_dim_edge_influence": 8, "enc_rnn_dim_history": 32, "enc_rnn_dim_future": 32, "dec_rnn_dim": 512, "dec_GMM_proj_MLP_dims": null, "sample_model_during_dec": true, "dec_sample_model_prob_start": 1.0, "dec_sample_model_prob_final": 1.0, "dec_sample_model_prob_crossover": 200, "dec_sample_model_prob_divisor": 4, "q_z_xy_MLP_dims": null, "p_z_x_MLP_dims": 32, "fuzz_factor": 0.05, "GMM_components": 12, "log_sigma_min": -10, "log_sigma_max": 10, "log_p_yt_xz_max": 50, "N": 2, "K": 5, "tau_init": 2.0, "tau_final": 0.05, "tau_decay_rate": 0.997, "use_z_logit_clipping": true, "z_logit_clip_start": 0.05, "z_logit_clip_final": 5.0, "z_logit_clip_crossover": 500, "z_logit_clip_divisor": 5, "state": {"PEDESTRIAN": {"position": ["x", "y"], "velocity": ["x", "y"], "acceleration": ["x", "y"], "heading": ["value"]}, "BICYCLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}, "VEHICLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}}, "pred_state": {"PEDESTRIAN": {"velocity": ["x", "y"]}, "BICYCLE": {"velocity": ["x", "y"]}, "VEHICLE": {"velocity": ["x", "y"]}}, "log_histograms": false, "dynamic_edges": "yes", "edge_state_combine_method": "sum", "edge_influence_combine_method": "attention", "edge_radius": 20.0, "use_map_encoding": true, "edge_addition_filter": [0.25, 0.5, 0.75, 1.0], "edge_removal_filter": [1.0, 0.0], "offline_scene_graph": "yes", "incl_robot_node": false}
--- a/data/nuScenes/models/full/model_registrar-1.pt
+++ b/data/nuScenes/models/full/model_registrar-1.pt
--- a/data/nuScenes/models/me_demo/config.json
+++ b/data/nuScenes/models/me_demo/config.json
@ -0,0 +1 @@
+{"batch_size": 256, "grad_clip": 1.0, "learning_rate_style": "exp", "learning_rate": 0.002, "min_learning_rate": 0.0005, "learning_decay_rate": 0.9995, "prediction_horizon": 6, "minimum_history_length": 1, "maximum_history_length": 8, "map_context": 120, "map_enc_num_layers": 4, "map_enc_hidden_size": 512, "map_enc_output_size": 512, "map_enc_dropout": 0.5, "alpha": 1, "k": 30, "k_eval": 50, "use_iwae": false, "kl_exact": true, "kl_min": 0.07, "kl_weight": 5.0, "kl_weight_start": 0, "kl_decay_rate": 0.99995, "kl_crossover": 500, "kl_sigmoid_divisor": 4, "inf_warmup": 1.0, "inf_warmup_start": 1.0, "inf_warmup_crossover": 1500, "inf_warmup_sigmoid_divisor": 4, "rnn_kwargs": {"dropout_keep_prob": 0.5}, "MLP_dropout_keep_prob": 0.9, "rnn_io_dropout_keep_prob": 1.0, "enc_rnn_dim_multiple_inputs": 8, "enc_rnn_dim_edge": 8, "enc_rnn_dim_edge_influence": 8, "enc_rnn_dim_history": 32, "enc_rnn_dim_future": 32, "dec_rnn_dim": 512, "dec_GMM_proj_MLP_dims": null, "sample_model_during_dec": true, "dec_sample_model_prob_start": 1.0, "dec_sample_model_prob_final": 1.0, "dec_sample_model_prob_crossover": 200, "dec_sample_model_prob_divisor": 4, "q_z_xy_MLP_dims": null, "p_z_x_MLP_dims": 32, "fuzz_factor": 0.05, "GMM_components": 12, "log_sigma_min": -10, "log_sigma_max": 10, "log_p_yt_xz_max": 50, "N": 2, "K": 5, "tau_init": 2.0, "tau_final": 0.05, "tau_decay_rate": 0.997, "use_z_logit_clipping": true, "z_logit_clip_start": 0.05, "z_logit_clip_final": 5.0, "z_logit_clip_crossover": 500, "z_logit_clip_divisor": 5, "state": {"PEDESTRIAN": {"position": ["x", "y"], "velocity": ["x", "y"], "acceleration": ["x", "y"], "heading": ["value"]}, "BICYCLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}, "VEHICLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}}, "pred_state": {"PEDESTRIAN": {"velocity": ["x", "y"]}, "BICYCLE": {"velocity": ["x", "y"]}, "VEHICLE": {"velocity": ["x", "y"]}}, "log_histograms": false, "dynamic_edges": "yes", "edge_state_combine_method": "sum", "edge_influence_combine_method": "attention", "edge_radius": 0.0, "use_map_encoding": true, "edge_addition_filter": [0.25, 0.5, 0.75, 1.0], "edge_removal_filter": [1.0, 0.0], "offline_scene_graph": "yes"}
--- a/data/nuScenes/models/me_demo/model_registrar-1.pt
+++ b/data/nuScenes/models/me_demo/model_registrar-1.pt
--- a/data/nuScenes/models/robot_demo/config.json
+++ b/data/nuScenes/models/robot_demo/config.json
@ -0,0 +1 @@
+{"batch_size": 256, "grad_clip": 1.0, "learning_rate_style": "exp", "learning_rate": 0.003, "min_learning_rate": 0.0005, "learning_decay_rate": 0.9995, "prediction_horizon": 6, "minimum_history_length": 1, "maximum_history_length": 8, "map_context": 120, "map_enc_num_layers": 4, "map_enc_hidden_size": 512, "map_enc_output_size": 512, "map_enc_dropout": 0.5, "alpha": 1, "k": 30, "k_eval": 50, "use_iwae": false, "kl_exact": true, "kl_min": 0.07, "kl_weight": 5.0, "kl_weight_start": 0, "kl_decay_rate": 0.99995, "kl_crossover": 500, "kl_sigmoid_divisor": 4, "inf_warmup": 1.0, "inf_warmup_start": 1.0, "inf_warmup_crossover": 1500, "inf_warmup_sigmoid_divisor": 4, "rnn_kwargs": {"dropout_keep_prob": 0.5}, "MLP_dropout_keep_prob": 0.9, "rnn_io_dropout_keep_prob": 1.0, "enc_rnn_dim_multiple_inputs": 8, "enc_rnn_dim_edge": 8, "enc_rnn_dim_edge_influence": 8, "enc_rnn_dim_history": 32, "enc_rnn_dim_future": 32, "dec_rnn_dim": 512, "dec_GMM_proj_MLP_dims": null, "sample_model_during_dec": true, "dec_sample_model_prob_start": 1.0, "dec_sample_model_prob_final": 1.0, "dec_sample_model_prob_crossover": 200, "dec_sample_model_prob_divisor": 4, "q_z_xy_MLP_dims": null, "p_z_x_MLP_dims": 32, "fuzz_factor": 0.05, "GMM_components": 12, "log_sigma_min": -10, "log_sigma_max": 10, "log_p_yt_xz_max": 50, "N": 2, "K": 5, "tau_init": 2.0, "tau_final": 0.05, "tau_decay_rate": 0.997, "use_z_logit_clipping": true, "z_logit_clip_start": 0.05, "z_logit_clip_final": 5.0, "z_logit_clip_crossover": 500, "z_logit_clip_divisor": 5, "state": {"PEDESTRIAN": {"position": ["x", "y"], "velocity": ["x", "y"], "acceleration": ["x", "y"], "heading": ["value"]}, "BICYCLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}, "VEHICLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}}, "pred_state": {"PEDESTRIAN": {"velocity": ["x", "y"]}, "BICYCLE": {"velocity": ["x", "y"]}, "VEHICLE": {"velocity": ["x", "y"]}}, "log_histograms": false, "dynamic_edges": "yes", "edge_state_combine_method": "sum", "edge_influence_combine_method": "attention", "edge_radius": 20.0, "use_map_encoding": false, "edge_addition_filter": [0.25, 0.5, 0.75, 1.0], "edge_removal_filter": [1.0, 0.0], "offline_scene_graph": "yes", "incl_robot_node": true}
--- a/data/nuScenes/models/robot_demo/model_registrar-1.pt
+++ b/data/nuScenes/models/robot_demo/model_registrar-1.pt
--- a/data/nuScenes/nuscenes-devkit
+++ b/data/nuScenes/nuscenes-devkit
@ -0,0 +1 @@
+Subproject commit f3594b967cbf42396da5c6cb08bd714437b53111
--- a/data/nuScenes/process_nuScenes.py
+++ b/data/nuScenes/process_nuScenes.py
@ -0,0 +1,491 @@
+import sys
+import os
+import numpy as np
+import pandas as pd
+import pickle
+import json
+from tqdm import tqdm
+from pyquaternion import Quaternion
+from kalman_filter import LinearPointMass, NonlinearKinematicBicycle
+from scipy.integrate import cumtrapz
+from scipy.ndimage.morphology import binary_dilation, generate_binary_structure
+
+nu_path = './nuscenes-devkit/python-sdk/'
+#op_path = './pytorch-openpose/python/'
+sys.path.append(nu_path)
+sys.path.append("../../code")
+#sys.path.append(op_path)
+from nuscenes.nuscenes import NuScenes
+from nuscenes.map_expansion.map_api import NuScenesMap
+from data import Environment, Scene, Node, BicycleNode, Position, Velocity, Acceleration, ActuatorAngle, Map, Scalar
+
+scene_blacklist = [3, 12, 18, 19, 33, 35, 36, 41, 45, 50, 54, 55, 61, 120, 121, 123, 126, 132, 133, 134, 149,
+                           154, 159, 196, 268, 278, 351, 365, 367, 368, 369, 372, 376, 377, 382, 385, 499, 515, 517,
+                           945, 947, 952, 955, 962, 963, 968] + [969]
+
+
+types = ['PEDESTRIAN',
+         'BICYCLE',
+         'VEHICLE']
+
+standardization = {
+    'PEDESTRIAN': {
+        'position': {
+            'x': {'mean': 0, 'std': 25},
+            'y': {'mean': 0, 'std': 25}
+        },
+        'velocity': {
+            'x': {'mean': 0, 'std': 2},
+            'y': {'mean': 0, 'std': 2}
+        },
+        'acceleration': {
+            'x': {'mean': 0, 'std': 1},
+            'y': {'mean': 0, 'std': 1}
+        },
+        'heading': {
+            'value': {'mean': 0, 'std': np.pi},
+            'derivative': {'mean': 0, 'std': np.pi / 4}
+        }
+    },
+    'BICYCLE': {
+        'position': {
+            'x': {'mean': 0, 'std': 50},
+            'y': {'mean': 0, 'std': 50}
+        },
+        'velocity': {
+            'x': {'mean': 0, 'std': 6},
+            'y': {'mean': 0, 'std': 6},
+            'm': {'mean': 0, 'std': 6}
+        },
+        'acceleration': {
+            'x': {'mean': 0, 'std': 4},
+            'y': {'mean': 0, 'std': 4},
+            'm': {'mean': 0, 'std': 4}
+        },
+        'actuator_angle': {
+            'steering_angle': {'mean': 0, 'std': np.pi/2}
+        },
+        'heading': {
+            'value': {'mean': 0, 'std': np.pi},
+            'derivative': {'mean': 0, 'std': np.pi / 4}
+        }
+    },
+    'VEHICLE': {
+        'position': {
+            'x': {'mean': 0, 'std': 100},
+            'y': {'mean': 0, 'std': 100}
+        },
+        'velocity': {
+            'x': {'mean': 0, 'std': 20},
+            'y': {'mean': 0, 'std': 20},
+            'm': {'mean': 0, 'std': 20}
+        },
+        'acceleration': {
+            'x': {'mean': 0, 'std': 4},
+            'y': {'mean': 0, 'std': 4},
+            'm': {'mean': 0, 'std': 4}
+        },
+        'actuator_angle': {
+            'steering_angle': {'mean': 0, 'std': np.pi/2}
+        },
+        'heading': {
+            'value': {'mean': 0, 'std': np.pi},
+            'derivative': {'mean': 0, 'std': np.pi / 4}
+        }
+    }
+}
+
+def inverse_np_gradient(f, dx, F0=0.):
+    N = f.shape[0]
+    return F0 + np.hstack((np.zeros((N, 1)), cumtrapz(f, axis=1, dx=dx)))
+
+def integrate_trajectory(v, x0, dt):
+    xd_ = inverse_np_gradient(v[..., 0], dx=dt, F0=x0[0])
+    yd_ = inverse_np_gradient(v[..., 1], dx=dt, F0=x0[1])
+    integrated = np.stack([xd_, yd_], axis=2)
+    return integrated
+
+def integrate_heading_model(a, dh, h0, x0, v0, dt):
+    h = inverse_np_gradient(dh, dx=dt, F0=h0)
+    v_m = inverse_np_gradient(a, dx=dt, F0=v0)
+
+    vx = np.cos(h) * v_m
+    vy = np.sin(h) * v_m
+
+    v = np.stack((vx, vy), axis=2)
+    return integrate_trajectory(v, x0, dt)
+
+if __name__ == "__main__":
+    num_global_straight = 0
+    num_global_curve = 0
+
+    test = False
+    if sys.argv[1] == 'mini':
+        data_path = './raw_data/mini'
+        nusc = NuScenes(version='v1.0-mini', dataroot=data_path, verbose=True)
+        add = "_mini"
+        train_scenes = nusc.scene[0:7]
+        val_scenes = nusc.scene[7:]
+        test_scenes = []
+    elif sys.argv[1] == 'test':
+        test = True
+        data_path = './raw_data'
+        nusc = NuScenes(version='v1.0-test', dataroot=data_path, verbose=True)
+        train_scenes = []
+        val_scenes = []
+        test_scenes = nusc.scene
+        with open(os.path.join('./raw_data/results_test_megvii.json'), 'r') as test_json:
+            test_annotations = json.load(test_json)
+    else:
+        data_path = '/home/timsal/Documents/code/GenTrajectron_nuScenes_ssh/data/nuScenes/raw_data'
+        nusc = NuScenes(version='v1.0-trainval', dataroot=data_path, verbose=True)
+        add = ""
+        train_scenes = nusc.scene[0:]
+        val_scenes = nusc.scene[700:]
+        test_scenes = []
+
+    for data_class, nuscenes in [('train', train_scenes), ('val', val_scenes), ('test', test_scenes)]:
+        print(f"Processing data class {data_class}")
+        data_dict_path = os.path.join('../processed', '_'.join(['nuScenes', data_class])+ 'samp.pkl')
+        env = Environment(node_type_list=types, standardization=standardization)
+        attention_radius = dict()
+        attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 3.0
+        attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.VEHICLE)] = 20.0
+        attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.BICYCLE)] = 10.0
+        attention_radius[(env.NodeType.VEHICLE, env.NodeType.PEDESTRIAN)] = 20.0
+        attention_radius[(env.NodeType.VEHICLE, env.NodeType.VEHICLE)] = 20.0
+        attention_radius[(env.NodeType.VEHICLE, env.NodeType.BICYCLE)] = 20.0
+        attention_radius[(env.NodeType.BICYCLE, env.NodeType.PEDESTRIAN)] = 10.0
+        attention_radius[(env.NodeType.BICYCLE, env.NodeType.VEHICLE)] = 20.0
+        attention_radius[(env.NodeType.BICYCLE, env.NodeType.BICYCLE)] = 10.0
+        env.attention_radius = attention_radius
+        scenes = []
+        pbar = tqdm(nuscenes, ncols=100)
+        for nuscene in pbar:
+            scene_id = int(nuscene['name'].replace('scene-', ''))
+            if scene_id in scene_blacklist: # Some scenes have bad localization
+                continue
+
+            if not (scene_id == 1002 or scene_id == 234):
+                continue
+
+            data = pd.DataFrame(columns=['frame_id',
+                                         'type',
+                                         'node_id',
+                                         'robot',
+                                         'x', 'y', 'z',
+                                         'length',
+                                         'width',
+                                         'height',
+                                         'heading',
+                                         'orientation'])
+
+            sample_token = nuscene['first_sample_token']
+            sample = nusc.get('sample', sample_token)
+            frame_id = 0
+            while sample['next']:
+                if not test:
+                    annotation_tokens = sample['anns']
+                else:
+                    annotation_tokens = test_annotations['results'][sample['token']]
+                for annotation_token in annotation_tokens:
+                    if not test:
+                        annotation = nusc.get('sample_annotation', annotation_token)
+                        category = annotation['category_name']
+                        if len(annotation['attribute_tokens']):
+                            attribute = nusc.get('attribute', annotation['attribute_tokens'][0])['name']
+
+                        if 'pedestrian' in category and not 'stroller' in category and not 'wheelchair' in category:
+                            our_category = env.NodeType.PEDESTRIAN
+                        elif ('vehicle.bicycle' in category) and 'with_rider' in attribute:
+                            continue
+                            our_category = env.NodeType.BICYCLE
+                        elif 'vehicle' in category and 'bicycle' not in category and 'motorcycle' not in category and 'parked' not in attribute:
+                            our_category = env.NodeType.VEHICLE
+                        # elif ('vehicle.motorcycle' in category) and 'with_rider' in attribute:
+                        # our_category = env.NodeType.VEHICLE
+                        else:
+                            continue
+                    else:
+                        annotation = annotation_token
+                        category = annotation['tracking_name']
+                        attribute = ""#annotation['attribute_name']
+
+                        if 'pedestrian' in category :
+                            our_category = env.NodeType.PEDESTRIAN
+                        elif (('car' in category or 'bus' in category or 'construction_vehicle' in category) and 'parked' not in attribute):
+                            our_category = env.NodeType.VEHICLE
+                        # elif ('vehicle.motorcycle' in category) and 'with_rider' in attribute:
+                        # our_category = env.NodeType.VEHICLE
+                        else:
+                            continue
+
+
+                    data_point = pd.Series({'frame_id': frame_id,
+                                            'type': our_category,
+                                            'node_id': annotation['instance_token'] if not test else annotation['tracking_id'],
+                                            'robot': False,
+                                            'x': annotation['translation'][0],
+                                            'y': annotation['translation'][1],
+                                            'z': annotation['translation'][2],
+                                            'length': annotation['size'][0],
+                                            'width': annotation['size'][1],
+                                            'height': annotation['size'][2],
+                                            'heading': Quaternion(annotation['rotation']).yaw_pitch_roll[0],
+                                            'orientation': None})
+                    data = data.append(data_point, ignore_index=True)
+
+                # Ego Vehicle
+                our_category = env.NodeType.VEHICLE
+                sample_data = nusc.get('sample_data', sample['data']['CAM_FRONT'])
+                annotation = nusc.get('ego_pose', sample_data['ego_pose_token'])
+                data_point = pd.Series({'frame_id': frame_id,
+                                        'type': our_category,
+                                        'node_id': 'ego',
+                                        'robot': True,
+                                        'x': annotation['translation'][0],
+                                        'y': annotation['translation'][1],
+                                        'z': annotation['translation'][2],
+                                        'length': 4,
+                                        'width': 1.7,
+                                        'height': 1.5,
+                                        'heading': Quaternion(annotation['rotation']).yaw_pitch_roll[0],
+                                        'orientation': None})
+                data = data.append(data_point, ignore_index=True)
+
+                sample = nusc.get('sample', sample['next'])
+                frame_id += 1
+
+            if len(data.index) == 0:
+                continue
+
+            data.sort_values('frame_id', inplace=True)
+            max_timesteps = data['frame_id'].max()
+
+            x_min = np.round(data['x'].min() - 50)
+            x_max = np.round(data['x'].max() + 50)
+            y_min = np.round(data['y'].min() - 50)
+            y_max = np.round(data['y'].max() + 50)
+
+            data['x'] = data['x'] - x_min
+            data['y'] = data['y'] - y_min
+
+            scene = Scene(timesteps=max_timesteps + 1, dt=0.5, name=str(scene_id))
+
+            # Generate Maps
+            map_name = nusc.get('log', nuscene['log_token'])['location']
+            nusc_map = NuScenesMap(dataroot=data_path, map_name=map_name)
+
+            type_map = dict()
+            x_size = x_max - x_min
+            y_size = y_max - y_min
+            patch_box = (x_min + 0.5 * (x_max - x_min), y_min + 0.5 * (y_max - y_min), y_size, x_size)
+            patch_angle = 0  # Default orientation where North is up
+            canvas_size = (np.round(3 * y_size).astype(int), np.round(3 * x_size).astype(int))
+            homography = np.array([[3., 0., 0.], [0., 3., 0.], [0., 0., 3.]])
+            layer_names = ['lane', 'road_segment', 'drivable_area', 'road_divider', 'lane_divider', 'stop_line',
+                           'ped_crossing', 'stop_line', 'ped_crossing', 'walkway']
+            map_mask = (nusc_map.get_map_mask(patch_box, patch_angle, layer_names, canvas_size) * 255.0).astype(
+                np.uint8)
+            map_mask = np.swapaxes(map_mask, 1, 2) # x axis comes first
+            # PEDESTRIANS
+            map_mask_pedestrian = np.stack((map_mask[9], map_mask[8], np.max(map_mask[:3], axis=0)), axis=2)
+            type_map['PEDESTRIAN'] = Map(data=map_mask_pedestrian, homography=homography,
+                                         description=', '.join(layer_names))
+            # Bicycles
+            map_mask_bicycles = np.stack((map_mask[9], map_mask[8], np.max(map_mask[:3], axis=0)), axis=2)
+            type_map['BICYCLE'] = Map(data=map_mask_bicycles, homography=homography, description=', '.join(layer_names))
+            # VEHICLES
+            map_mask_vehicle = np.stack((np.max(map_mask[:3], axis=0), map_mask[3], map_mask[4]), axis=2)
+            type_map['VEHICLE'] = Map(data=map_mask_vehicle, homography=homography, description=', '.join(layer_names))
+
+            map_mask_plot = np.stack(((np.max(map_mask[:3], axis=0) - (map_mask[3] + 0.5 * map_mask[4]).clip(
+                max=255)).clip(min=0).astype(np.uint8), map_mask[8], map_mask[9]), axis=2)
+            type_map['PLOT'] = Map(data=map_mask_plot, homography=homography, description=', '.join(layer_names))
+
+            scene.map = type_map
+            del map_mask
+            del map_mask_pedestrian
+            del map_mask_vehicle
+            del map_mask_bicycles
+            del map_mask_plot
+
+            for node_id in pd.unique(data['node_id']):
+                node_df = data[data['node_id'] == node_id]
+
+                if node_df['x'].shape[0] < 2:
+                    continue
+
+                if not np.all(np.diff(node_df['frame_id']) == 1):
+                    #print('Occlusion')
+                    continue # TODO Make better
+
+                node_values = node_df['x'].values
+                if node_df.iloc[0]['type'] == env.NodeType.PEDESTRIAN:
+                    node = Node(type=node_df.iloc[0]['type'])
+                else:
+                    node = BicycleNode(type=node_df.iloc[0]['type'])
+                node.first_timestep = node_df['frame_id'].iloc[0]
+                node.position = Position(node_df['x'].values, node_df['y'].values)
+                node.velocity = Velocity.from_position(node.position, scene.dt)
+                node.velocity.m = np.linalg.norm(np.vstack((node.velocity.x, node.velocity.y)), axis=0)
+                node.acceleration = Acceleration.from_velocity(node.velocity, scene.dt)
+                node.heading = Scalar(node_df['heading'].values)
+                heading_t = node_df['heading'].values.copy()
+                shifted_heading = np.zeros_like(node.heading.value)
+                shifted_heading[0] = node.heading.value[0]
+                for i in range(1, len(node.heading.value)):
+                    if not (np.sign(node.heading.value[i]) == np.sign(node.heading.value[i - 1])) and np.abs(
+                            node.heading.value[i]) > np.pi / 2:
+                        shifted_heading[i] = shifted_heading[i - 1] + (
+                                node.heading.value[i] - node.heading.value[i - 1]) - np.sign(
+                            (node.heading.value[i] - node.heading.value[i - 1])) * 2 * np.pi
+                    else:
+                        shifted_heading[i] = shifted_heading[i - 1] + (
+                                node.heading.value[i] - node.heading.value[i - 1])
+                node.heading.value = shifted_heading
+                node.length = node_df.iloc[0]['length']
+                node.width = node_df.iloc[0]['width']
+
+                if node_df.iloc[0]['robot'] == True:
+                    node.is_robot = True
+
+                if node_df.iloc[0]['type'] == env.NodeType.PEDESTRIAN:
+                    filter_ped = LinearPointMass(dt=scene.dt)
+                    for i in range(len(node.position.x)):
+                        if i == 0:  # initalize KF
+                            P_matrix = np.identity(4)
+                        elif i < len(node.position.x):
+                            # assign new est values
+                            node.position.x[i] = x_vec_est_new[0][0]
+                            node.velocity.x[i] = x_vec_est_new[1][0]
+                            node.position.y[i] = x_vec_est_new[2][0]
+                            node.velocity.y[i] = x_vec_est_new[3][0]
+
+                        if i < len(node.position.x) - 1:  # no action on last data
+                            # filtering
+                            x_vec_est = np.array([[node.position.x[i]],
+                                                  [node.velocity.x[i]],
+                                                  [node.position.y[i]],
+                                                  [node.velocity.y[i]]])
+                            z_new = np.array([[node.position.x[i+1]],
+                                              [node.position.y[i+1]]])
+                            x_vec_est_new, P_matrix_new = filter_ped.predict_and_update(
+                                x_vec_est=x_vec_est,
+                                u_vec=np.array([[0.], [0.]]),
+                                P_matrix=P_matrix,
+                                z_new=z_new
+                            )
+                            P_matrix = P_matrix_new
+                else:
+                    filter_veh = NonlinearKinematicBicycle(lf=node.length*0.6, lr=node.length*0.4, dt=scene.dt)
+                    for i in range(len(node.position.x)):
+                        if i == 0:  # initalize KF
+                            # initial P_matrix
+                            P_matrix = np.identity(4)
+                        elif i < len(node.position.x):
+                            # assign new est values
+                            node.position.x[i] = x_vec_est_new[0][0]
+                            node.position.y[i] = x_vec_est_new[1][0]
+                            node.heading.value[i] = x_vec_est_new[2][0]
+                            node.velocity.m[i] = x_vec_est_new[3][0]
+
+                        if i < len(node.position.x) - 1:  # no action on last data
+                            # filtering
+                            x_vec_est = np.array([[node.position.x[i]],
+                                                  [node.position.y[i]],
+                                                  [node.heading.value[i]],
+                                                  [node.velocity.m[i]]])
+                            z_new = np.array([[node.position.x[i+1]],
+                                              [node.position.y[i+1]],
+                                              [node.heading.value[i+1]],
+                                              [node.velocity.m[i+1]]])
+                            x_vec_est_new, P_matrix_new = filter_veh.predict_and_update(
+                                x_vec_est=x_vec_est,
+                                u_vec=np.array([[0.], [0.]]),
+                                P_matrix=P_matrix,
+                                z_new=z_new
+                            )
+                            P_matrix = P_matrix_new
+
+                v_tmp = node.velocity.m
+                node.velocity = Velocity.from_position(node.position, scene.dt)
+                node.velocity.m = v_tmp
+                #if (np.abs(np.linalg.norm(np.vstack((node.velocity.x, node.velocity.y)), axis=0) - v_tmp) > 0.4).any():
+                #    print(np.abs(np.linalg.norm(np.vstack((node.velocity.x, node.velocity.y)), axis=0) - v_tmp))
+
+                node.acceleration = Acceleration.from_velocity(node.velocity, scene.dt)
+                node.acceleration.m = np.gradient(v_tmp, scene.dt)
+                node.heading.derivative = np.gradient(node.heading.value, scene.dt)
+                node.heading.value = (node.heading.value + np.pi) % (2.0 * np.pi) - np.pi
+
+                if node_df.iloc[0]['type'] == env.NodeType.VEHICLE:
+                    node_pos = np.stack((node.position.x, node.position.y), axis=1)
+                    node_pos_map = scene.map[env.NodeType.VEHICLE.name].to_map_points(node_pos)
+                    node_pos_int = np.round(node_pos_map).astype(int)
+                    dilated_map = binary_dilation(scene.map[env.NodeType.VEHICLE.name].data[..., 0], generate_binary_structure(2, 2))
+                    if np.sum((dilated_map[node_pos_int[:, 0], node_pos_int[:, 1]] == 0))/node_pos_int.shape[0] > 0.1:
+                        del node
+                        continue # Out of map
+
+
+
+                if not node_df.iloc[0]['type'] == env.NodeType.PEDESTRIAN:
+                    # Re Integrate:
+                    i_pos = integrate_heading_model(np.array([node.acceleration.m[1:]]),
+                                            np.array([node.heading.derivative[1:]]),
+                                            node.heading.value[0],
+                                            np.vstack((node.position.x[0], node.position.y[0])),
+                                            node.velocity.m[0], 0.5)
+
+
+                    #if (np.abs(node.heading.derivative) > np.pi/8).any():
+                    #    print(np.abs(node.heading.derivative).max())
+                scene.nodes.append(node)
+                if node.is_robot is True:
+                    scene.robot = node
+
+            robot = False
+            num_heading_changed = 0
+            num_moving_vehicles = 0
+            for node in scene.nodes:
+                node.description = "straight"
+                num_global_straight += 1
+                if node.type == env.NodeType.VEHICLE:
+                    if np.linalg.norm((node.position.x[0] - node.position.x[-1], node.position.y[0] - node.position.y[-1])) > 10:
+                        num_moving_vehicles += 1
+                    if np.abs(node.heading.value[0] - node.heading.value[-1]) > np.pi / 6:
+                        if not np.sign(node.heading.value[0]) == np.sign(node.heading.value[-1]) and np.abs(node.heading.value[0] > 1/2 * np.pi):
+                            if (node.heading.value[0] - node.heading.value[-1]) - np.sign((node.heading.value[0] - node.heading.value[-1])) * 2 * np.pi > np.pi / 6:
+                                node.description = "curve"
+                                num_global_curve += 1
+                                num_global_straight -= 1
+                                num_heading_changed += 1
+                        else:
+                            node.description = "curve"
+                            num_global_curve += 1
+                            num_global_straight -= 1
+                            num_heading_changed += 1
+
+                if node.is_robot:
+                    robot = True
+
+            if num_moving_vehicles > 0 and num_heading_changed / num_moving_vehicles > 0.4:
+                scene.description = "curvy"
+            else:
+                scene.description = "straight"
+
+            if robot: # If we dont have a ego vehicle there was bad localization
+                pbar.set_description(str(scene))
+                scenes.append(scene)
+
+            del data
+
+        env.scenes = scenes
+
+        if len(scenes) > 0:
+            with open(data_dict_path, 'wb') as f:
+                pickle.dump(env, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+    print(num_global_straight)
+    print(num_global_curve)
--- a/data/processed/nuScenes_samples.pkl
+++ b/data/processed/nuScenes_samples.pkl
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,13 @@
+matplotlib
+numpy==1.16.4
+pandas==0.25.1
+psutil==5.6.3
+scipy==1.3.1
+seaborn==0.9.0
+tensorboard==1.14.0
+tensorboardX==1.8
+tensorflow==1.14.0
+tensorflow-estimator==1.14.0
+torch==1.2.0
+Pillow==6.1.0
+pyquaternion==0.9.5
				`@ -0,0 +1 @@`
				`from .evaluation import compute_batch_statistics, log_batch_errors`
				`@ -0,0 +1 @@`
				`from .trajectory_utils import integrate_trajectory, prediction_output_to_trajectories`
				`@ -0,0 +1 @@`
				{"batch_size": 256, "grad_clip": 1.0, "learning_rate_style": "exp", "learning_rate": 0.003, "min_learning_rate": 0.0005, "learning_decay_rate": 0.9995, "prediction_horizon": 6, "minimum_history_length": 1, "maximum_history_length": 8, "map_context": 120, "map_enc_num_layers": 4, "map_enc_hidden_size": 512, "map_enc_output_size": 512, "map_enc_dropout": 0.3, "alpha": 1, "k": 30, "k_eval": 50, "use_iwae": false, "kl_exact": true, "kl_min": 0.07, "kl_weight": 5.0, "kl_weight_start": 0, "kl_decay_rate": 0.99995, "kl_crossover": 500, "kl_sigmoid_divisor": 4, "inf_warmup": 1.0, "inf_warmup_start": 1.0, "inf_warmup_crossover": 1500, "inf_warmup_sigmoid_divisor": 4, "rnn_kwargs": {"dropout_keep_prob": 0.7}, "MLP_dropout_keep_prob": 0.9, "rnn_io_dropout_keep_prob": 1.0, "enc_rnn_dim_multiple_inputs": 8, "enc_rnn_dim_edge": 8, "enc_rnn_dim_edge_influence": 8, "enc_rnn_dim_history": 32, "enc_rnn_dim_future": 32, "dec_rnn_dim": 512, "dec_GMM_proj_MLP_dims": null, "sample_model_during_dec": true, "dec_sample_model_prob_start": 1.0, "dec_sample_model_prob_final": 1.0, "dec_sample_model_prob_crossover": 200, "dec_sample_model_prob_divisor": 4, "q_z_xy_MLP_dims": null, "p_z_x_MLP_dims": 32, "fuzz_factor": 0.05, "GMM_components": 12, "log_sigma_min": -10, "log_sigma_max": 10, "log_p_yt_xz_max": 50, "N": 2, "K": 5, "tau_init": 2.0, "tau_final": 0.05, "tau_decay_rate": 0.997, "use_z_logit_clipping": true, "z_logit_clip_start": 0.05, "z_logit_clip_final": 5.0, "z_logit_clip_crossover": 500, "z_logit_clip_divisor": 5, "state": {"PEDESTRIAN": {"position": ["x", "y"], "velocity": ["x", "y"], "acceleration": ["x", "y"], "heading": ["value"]}, "BICYCLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}, "VEHICLE": {"position": ["x", "y"], "velocity": ["x", "y", "m"], "acceleration": ["x", "y", "m"], "heading": ["value"]}}, "pred_state": {"PEDESTRIAN": {"velocity": ["x", "y"]}, "BICYCLE": {"velocity": ["x", "y"]}, "VEHICLE": {"velocity": ["x", "y"]}}, "log_histograms": false, "dynamic_edges": "yes", "edge_state_combine_method": "sum", "edge_influence_combine_method": "attention", "edge_radius": 0.0, "use_map_encoding": false, "edge_addition_filter": [0.25, 0.5, 0.75, 1.0], "edge_removal_filter": [1.0, 0.0], "offline_scene_graph": "yes"}
				`@ -0,0 +1 @@`
				`Subproject commit f3594b967cbf42396da5c6cb08bd714437b53111`