trajpred/04_track_objects_with_deeps...

3.3 MiB

Use DeepSORT instead of SORT for tracking

Based on ZQPei's repository, I replace SORT with DeepSort:

Deep sort is basicly the same with sort but added a CNN model to extract features in image of human part bounded by a detector. ZQPei

Other additions:

  • Use a generator function (a programming construct) for for video analysis and detection per frame.
  • This also allows for caching of intermediate steps
In [1]:
import cv2
from pathlib import Path
import numpy as np
from PIL import Image
import torch
from torchvision.io.video import read_video
import matplotlib.pyplot as plt
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights
import tempfile        
In [2]:
source = Path('../DATASETS/VIRAT_subset_0102x')
videos = list(source.glob('*.mp4'))
tmpdir = Path(tempfile.gettempdir()) / 'trajpred'
tmpdir.mkdir(exist_ok=True)
In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
Out[3]:
device(type='cuda')
In [4]:
weights = RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT
model = retinanet_resnet50_fpn_v2(weights=weights, score_thresh=0.35)
model.to(device)
# Put the model in inference mode
model.eval()
# Get the transforms for the model's weights
preprocess = weights.transforms().to(device)

The score_thresh argument defines the threshold at which an object is detected as an object of a class. Intuitively, it's the confidence threshold, and we won't classify an object to belong to a class if the model is less than 35% confident that it belongs to a class.

The result from a single prediction coming from model(batch) looks like:

{'boxes': tensor([[5.7001e+02, 2.5786e+02, 6.3138e+02, 3.6970e+02],
         [5.0109e+02, 2.4508e+02, 5.5308e+02, 3.4852e+02],
         [3.4096e+02, 2.7015e+02, 3.6156e+02, 3.1857e+02],
         [5.0219e-01, 3.7588e+02, 9.7911e+01, 7.2000e+02],
         [3.4096e+02, 2.7015e+02, 3.6156e+02, 3.1857e+02],
         [8.3241e+01, 5.8410e+02, 1.7502e+02, 7.1743e+02]]),
 'scores': tensor([0.8525, 0.6491, 0.5985, 0.4999, 0.3753, 0.3746]),
 'labels': tensor([64, 64,  1, 64, 18, 86])}
In [5]:
%matplotlib inline


import pylab as pl
from IPython import display
from utils.timer import Timer
from sort_cfotache import Sort
import pickle

def detect_persons(video_path: Path):
    """
    returns detections as structure: [[x1,y1,x2,y2,score],...]
    """
    video = cv2.VideoCapture(str(video_path))


    cachefile = tmpdir / f"detections-{video_path.name}.pcl"
    if cachefile.exists():
        with cachefile.open('rb') as fp:
            all_detections = pickle.load(fp)
            for detections in all_detections:
                yield detections
    else:
        all_detections = []
        while True:
            ret, frame = video.read()
            
            if not ret:
                # print("Can't receive frame (stream end?). Exiting ...")
                break

            t = torch.from_numpy(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            # change axes of image loaded image to be compatilbe with torch.io.read_image (which has C,W,H format instead of W,H,C)
            t = t.permute(2, 0, 1)

            batch = preprocess(t)[None, :].to(device)
            # no_grad can be used on inference, should be slightly faster
            with torch.no_grad():
                predictions = model(batch)
            prediction = predictions[0] # we feed only one frame at the once

            mask = prediction['labels'] == 1 # if we want more than one: np.isin(prediction['labels'], [1,86])

            scores = prediction['scores'][mask]
            labels = prediction['labels'][mask]
            boxes = prediction['boxes'][mask]
            
            # TODO: introduce confidence and NMS supression: https://github.com/cfotache/pytorch_objectdetecttrack/blob/master/PyTorch_Object_Tracking.ipynb
            # (which I _think_ we better do after filtering)
            # alternatively look at Soft-NMS https://towardsdatascience.com/non-maximum-suppression-nms-93ce178e177c

            #  dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
            detections = np.array([np.append(bbox, [score, label]) for bbox, score, label in zip(boxes.cpu(), scores.cpu(), labels.cpu())])
            
            all_detections.append(detections)
            
            yield detections
    
        with cachefile.open('wb') as fp:
            pickle.dump(all_detections, fp)

def track_video(video_path: Path) -> dict:
    mot_tracker = Sort()
    

    for detections in detect_persons(video_path):
        # tracks structure: [[x1,y1,x2,y2,score, obj_id],...]
        tracks = mot_tracker.update(detections)

        # now convert back to boxes and labels
        # print(tracks)
        boxes = np.array([t[:4] for t in tracks])
        # initialize empty with the necesserary dimensions for drawing_bounding_boxes glitch
        t_boxes = torch.from_numpy(boxes) if len(boxes) else torch.Tensor().new_empty([0, 6])
        labels = [str(int(t[4])) for t in tracks]
        # print(t_boxes, boxes, labels)

        for track in tracks:
            yield track
            

    #     display.clear_output(wait=True)
/home/ruben/suspicion/trajpred/sort_cfotache.py:36: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  def iou(bb_test,bb_gt):
In [8]:
def track_videos(video_paths: list[Path]) -> dict:
    """
    returns tracked instances as dict with lists:
      {'obj_id': [ [x1, y1, x2, y2, obj_id,  obj_class ], ...]}
    """
    # collect instances of all videos with unique key
    video_paths = list(video_paths)
    tracked_instances = {}
    timer = Timer()
    for i, p in enumerate(video_paths):
        print(f"{i}/{len(video_paths)}: {p}")

        cachefile = tmpdir / (p.name + '.pcl')
        if cachefile.exists():
            print('\tLoad pickle')
            with cachefile.open('rb') as fp:
             new_instances = pickle.load(fp)
        else:
            #continue # to quickly test from cache
            new_instances = {}
            timer.tic()
            for track in track_video(p):
                track_id = f"{i}_{str(int(track[4]))}"
                if track_id not in new_instances:
                    new_instances[track_id] = []
                new_instances[track_id].append(track)
            with cachefile.open('wb') as fp:
                pickle.dump(new_instances, fp)
            print(" time for video: ", timer.toc())
        tracked_instances.update(new_instances)
        
    return tracked_instances
In [10]:
tracked_instances = track_videos(videos)
len(tracked_instances)
0/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_00_000060_000218.mp4
	Load pickle
1/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_09_001285_001336.mp4
	Load pickle
2/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_08_000895_000975.mp4
 time for video:  68.54228949546814
3/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010205_04_000545_000576.mp4
 time for video:  44.32873034477234
4/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_04_000929_000954.mp4
 time for video:  33.93052832285563
5/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_10_000923_000959.mp4
 time for video:  32.10865515470505
6/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010205_06_000830_000904.mp4
 time for video:  37.853862571716306
7/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_08_001308_001332.mp4
 time for video:  34.11902721722921
8/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_09_001484_001510.mp4
 time for video:  31.546590941292898
9/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_00_000047_000139.mp4
 time for video:  37.09887546300888
10/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010205_03_000370_000395.mp4
 time for video:  34.8637207614051
11/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010206_02_000414_000439.mp4
 time for video:  33.25210754871368
12/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_03_000865_000911.mp4
 time for video:  33.80455418066545
13/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010208_09_000857_000886.mp4
 time for video:  32.45200256506602
14/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_09_001010_001036.mp4
 time for video:  31.583646572553196
15/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_00_000000_000053.mp4
 time for video:  32.641018697193694
16/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_05_000499_000527.mp4
 time for video:  32.00097533861796
17/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_03_000400_000435.mp4
 time for video:  31.541199699044228
18/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_08_000705_000739.mp4
 time for video:  31.2182135441724
19/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_01_000712_000752.mp4
 time for video:  31.049288577503628
20/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010208_06_000671_000744.mp4
 time for video:  32.444181505002476
21/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_05_000856_000890.mp4
 time for video:  31.905359435081483
22/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_06_000620_000760.mp4
 time for video:  36.00819862456549
23/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_04_000374_000469.mp4
 time for video:  38.11233546517112
24/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_03_000270_000359.mp4
 time for video:  39.8342572917109
25/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_04_000646_000754.mp4
 time for video:  42.07712532083193
26/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010202_00_000001_000033.mp4
 time for video:  41.18428315162659
27/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_08_000838_000867.mp4
 time for video:  40.349428882965675
28/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_03_000606_000632.mp4
 time for video:  39.4015819673185
29/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010205_02_000301_000345.mp4
 time for video:  39.33332359790802
30/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_05_000515_000593.mp4
 time for video:  40.141791606771534
31/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010205_01_000207_000288.mp4
 time for video:  41.12426764170329
32/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_07_000942_000989.mp4
 time for video:  40.93559175152932
33/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_02_000167_000197.mp4
 time for video:  40.19536356627941
34/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_05_001013_001038.mp4
 time for video:  39.39239246195013
35/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010202_06_000784_000873.mp4
 time for video:  40.31660431974075
36/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_02_000347_000397.mp4
 time for video:  40.229051855632235
37/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_01_000072_000225.mp4
 time for video:  42.85438562101788
38/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_02_000349_000398.mp4
 time for video:  42.71105306857341
39/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010206_01_000124_000206.mp4
 time for video:  43.457573558154856
40/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010202_02_000161_000189.mp4
 time for video:  42.839277823766075
41/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010206_03_000546_000580.mp4
 time for video:  42.360220515727995
42/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_06_000702_000744.mp4
 time for video:  42.07845686121685
43/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010208_00_000000_000049.mp4
 time for video:  42.10020278749012
44/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_01_000125_000152.mp4
 time for video:  41.67217009566551
45/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_04_000568_000620.mp4
 time for video:  41.65529489517212
46/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_07_000748_000837.mp4
 time for video:  42.41890318128798
47/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_07_001195_001260.mp4
 time for video:  42.65501337466033
48/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_07_000601_000697.mp4
 time for video:  43.47806889452833
49/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_09_000886_000915.mp4
 time for video:  43.088951567808785
50/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_10_001372_001395.mp4
 time for video:  42.53280181300883
51/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_05_000658_000700.mp4
 time for video:  42.25315068721771
52/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010202_03_000313_000355.mp4
 time for video:  41.96725109979218
53/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_02_000790_000816.mp4
 time for video:  41.553693734682525
54/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_04_000457_000511.mp4
 time for video:  41.64384494187697
55/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010202_01_000055_000147.mp4
 time for video:  42.249081523330126
56/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_11_001524_001607.mp4
 time for video:  42.495637898011644
57/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_01_000254_000322.mp4
 time for video:  42.69913638489587
58/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010208_05_000591_000631.mp4
 time for video:  42.50969683914854
59/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010208_10_000904_000991.mp4
 time for video:  43.049635936473976
60/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_06_001064_001097.mp4
 time for video:  42.65656978397046
61/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010208_02_000150_000180.mp4
 time for video:  42.29242134888967
62/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010206_04_000720_000767.mp4
 time for video:  42.12751168501182
63/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010208_08_000807_000831.mp4
 time for video:  41.69687400325652
64/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_10_001092_001121.mp4
 time for video:  41.40194404692877
65/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010206_05_000797_000823.mp4
 time for video:  41.02733028307557
66/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_06_000550_000600.mp4
 time for video:  41.03235809986408
67/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010201_09_000770_000801.mp4
 time for video:  40.803716746243566
68/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010206_00_000007_000035.mp4
 time for video:  40.51510126910993
69/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_00_000030_000059.mp4
 time for video:  40.27838978697272
70/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010208_03_000201_000232.mp4
 time for video:  39.99749055461607
71/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010204_06_000913_000939.mp4
 time for video:  39.69219965934754
72/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010207_10_001549_001596.mp4
 time for video:  39.65503925001118
73/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010203_07_000775_000869.mp4
 time for video:  40.16896542575624
74/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_03_000470_000567.mp4
 time for video:  40.67782217835727
75/76: ../DATASETS/VIRAT_subset_0102x/VIRAT_S_010208_07_000768_000791.mp4
 time for video:  40.35599481092917
Out[10]:
5952

Project / Homography

Now that all trajectories are captured (for a single video), these can then be projected onto a flat surface by homography). The necessary $H$ matrix is already provided by VIRAT in the homographies folder of their online data repository.

In [11]:
homography = list(source.glob('*img2world.txt'))[0]
H = np.loadtxt(homography, delimiter=',')

The homography matrix helps to transform points from image space to a flat world plane. The README_homography.txt from VIRAT describes:

Roughly estimated 3-by-3 homographies are included for convenience. Each homography H provides a mapping from image coordinate to scene-dependent world coordinate.
[xw,yw,zw]' = H*[xi,yi,1]'

xi: horizontal axis on image with left top corner as origin, increases right. yi: vertical axis on image with left top corner as origin, increases downward.

xw/zw: world x coordinate yw/zw: world y coordiante

In [12]:
print(Image.open("../DATASETS/VIRAT_subset_0102x/VIRAT_0102_homography_img2world.png").size)
Image.open("../DATASETS/VIRAT_subset_0102x/VIRAT_0102_homography_img2world.png")
(1200, 900)
Out[12]:
In [41]:
from matplotlib import pyplot as plt

fig = plt.figure(figsize=(20,8))
ax1, ax2 = fig.subplots(1,2)

ax1.set_aspect(1)
ax2.imshow(Image.open("../DATASETS/VIRAT_subset_0102x/VIRAT_S_0102.jpg"))

for bboxes in tracked_instances.values():
    traj = np.array([[[0.5 * (det[0]+det[2]), det[3]]] for det in bboxes])
    projected_traj = cv2.perspectiveTransform(traj,H)
    # plt.plot(projected_traj[:,0])
    ax1.plot(projected_traj[:,:,0].reshape(-1), projected_traj[:,:,1].reshape(-1))
    ax2.plot(traj[:,:,0].reshape(-1), traj[:,:,1].reshape(-1))
    
plt.show()

One of the things that stands out from these plots in the detections at the edges of the image. In particular the bottom edge (on the right-hand side in the projected image) is visible. As the 'anchor' of the person detection is follows the detection bounding box, which is no longer moving, but growing or shrinking at the edge.

Let's apply a simple filter to ditch the detections close to the edge.

In [47]:
def filter_edges(tracked_instances):
    filtered_tracks =  {}
    for track_id in tracked_instances:
        bboxes = tracked_instances[track_id]
        track = list(filter(lambda bbox: bbox[1] < 710 and bbox[3] < 710, bboxes))
        if len(track):
            filtered_tracks[track_id] = track
    return filtered_tracks

filtered_tracks = filter_edges(tracked_instances)

# validate it works:
bbox_count = sum([len(t) for t in tracked_instances.values()])
bbox_count_filtered = sum([len(t) for t in filtered_tracks.values()])

print(f"removed {((bbox_count-bbox_count_filtered)/bbox_count)*100:.2f}% of bounding boxes")
removed 2.87% of bounding boxes

Below we plot the filtered trajectories

In [48]:
from matplotlib import pyplot as plt

fig = plt.figure(figsize=(20,8))
ax1, ax2 = fig.subplots(1,2)

ax1.set_aspect(1)
ax2.imshow(Image.open("../DATASETS/VIRAT_subset_0102x/VIRAT_S_0102.jpg"))

for bboxes in filtered_tracks.values():
    traj = np.array([[[0.5 * (det[0]+det[2]), det[3]]] for det in bboxes])
    projected_traj = cv2.perspectiveTransform(traj,H)
    # plt.plot(projected_traj[:,0])
    ax1.plot(projected_traj[:,:,0].reshape(-1), projected_traj[:,:,1].reshape(-1))
    ax2.plot(traj[:,:,0].reshape(-1), traj[:,:,1].reshape(-1))
    
plt.show()

What if the projection is a heatmap of where people are? For this I reuse the above plot and apply blurring effects of pyplot from their documentation

Note that person tracking would not really be necessary for this to work. Detection on a frame-by-frame basis would be sufficient to achieve something similar.

The plots below use two slightly different ways of plotting. The first shows the tracks as transparent lines, the second plots the detections as points. This last way of plotting would not stricktly require the tracking as only individual detections are aggregrated.

In [49]:
from matplotlib import gridspec
import matplotlib.cm as cm
import matplotlib.transforms as mtransforms
from matplotlib.colors import LightSource
from matplotlib.artist import Artist


def smooth1d(x, window_len):
    # copied from https://scipy-cookbook.readthedocs.io/items/SignalSmooth.html
    s = np.r_[2*x[0] - x[window_len:1:-1], x, 2*x[-1] - x[-1:-window_len:-1]]
    w = np.hanning(window_len)
    y = np.convolve(w/w.sum(), s, mode='same')
    return y[window_len-1:-window_len+1]


def smooth2d(A, sigma=3):
    window_len = max(int(sigma), 3) * 2 + 1
    A = np.apply_along_axis(smooth1d, 0, A, window_len)
    A = np.apply_along_axis(smooth1d, 1, A, window_len)
    return A


class BaseFilter:

    def get_pad(self, dpi):
        return 0

    def process_image(self, padded_src, dpi):
        raise NotImplementedError("Should be overridden by subclasses")

    def __call__(self, im, dpi):
        pad = self.get_pad(dpi)
        padded_src = np.pad(im, [(pad, pad), (pad, pad), (0, 0)], "constant")
        tgt_image = self.process_image(padded_src, dpi)
        return tgt_image, -pad, -pad



class GaussianFilter(BaseFilter):
    """Simple Gaussian filter."""

    def __init__(self, sigma, alpha=0.5, color=(0, 0, 0)):
        self.sigma = sigma
        self.alpha = alpha
        self.color = color

    def get_pad(self, dpi):
        return int(self.sigma*3 / 72 * dpi)

    def process_image(self, padded_src, dpi):
        tgt_image = np.empty_like(padded_src)
        tgt_image[:, :, :3] = self.color
        tgt_image[:, :, 3] = smooth2d(padded_src[:, :, 3] * self.alpha,
                                      self.sigma / 72 * dpi)
        return tgt_image

gauss = GaussianFilter(2)

fig = plt.figure(figsize=(20,12))


# Create 2x2 sub plots
gs = gridspec.GridSpec(2, 2)

# (ax1, ax2), (ax3, ax4) = fig.subplots(2,2)
ax1 = fig.add_subplot(gs[0,0])
ax3 = fig.add_subplot(gs[1,0])
ax2 = fig.add_subplot(gs[:,1])

ax1.set_aspect(1)
ax3.set_aspect(1)

# show the image from the dataset on ax2
ax2.imshow(Image.open("../DATASETS/VIRAT_subset_0102x/VIRAT_S_0102.jpg"))

for bboxes in filtered_tracks.values():
    traj = np.array([[[0.5 * (det[0]+det[2]), det[3]]] for det in bboxes])
    projected_traj = cv2.perspectiveTransform(traj,H)
    # plt.plot(projected_traj[:,0])
    
    # option1: draw the tracks as lines
    line, = ax1.plot(projected_traj[:,:,0].reshape(-1), projected_traj[:,:,1].reshape(-1), color=(0,0,0,0.05))
    line.set_agg_filter(gauss)
    line.set_rasterized(True) # "to suport mixed-mode renderers"

    # option2: draw the tracks merely as individual detection points (for which no tracking would have been necessary)
    points = ax3.scatter(projected_traj[:,:,0].reshape(-1), projected_traj[:,:,1].reshape(-1), color=(0,0,0,0.01))
    points.set_agg_filter(gauss)
    points.set_rasterized(True) # "to suport mixed-mode renderers"

    ax2.plot(traj[:,:,0].reshape(-1), traj[:,:,1].reshape(-1))
    
plt.show()