# Visualise embeddings of the JDE model

This notebook embeds images from the training data using the JDE model. It then collects all embeddings and projects them using different techniques (e.g. UMAP, PCA). These projections are plotted; try to hover the plot to see the source detection.

In a second step these images are drawn onto a canvas using the projected points.

In [1]:
import glob
import pickle
from typing import TypedDict
from tqdm.auto import tqdm

import os
import numpy as np

import logging
import argparse
logger = logging.getLogger(__name__)

In [2]:
from track import eval_seq

from utils.parse_config import parse_model_cfg
from utils.utils import mkdir_if_missing
import utils.datasets as datasets
from utils.log import logger as trmlog # we need to override this...


trmlog.setLevel(logging.INFO)

2023-04-05 13:28:53 [INFO]: generated new fontManager


In [3]:
import umap # should provide better results than t-SNE



In [70]:
# import matplotlib.pyplot as plt

from bokeh.io import curdoc, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.layouts import column, row, gridplot
from bokeh.plotting import figure, show
from bokeh.models import (PanTool,
                           ResetTool,
                          HoverTool, WheelZoomTool, BoxZoomTool)

# load bokeh
output_notebook()


In [5]:
# p1 = figure(plot_width=250, plot_height=250)
# r1 = p1.circle([1,2,3],[3,2,1], size=20)

# t = show(p1, notebook_handle=True)

In [6]:
img_seqs = [
    "MOT16/test/MOT16-01/",
    "MOT16/test/MOT16-03/",
    "MOT16/test/MOT16-06/",
    "MOT16/test/MOT16-07/",
    "MOT16/test/MOT16-08/",
    "MOT16/test/MOT16-12/",
    "MOT16/test/MOT16-14/",
]

In [49]:
# Data types

from dataclasses import dataclass
from pathlib import Path


Tlwh = list[float, float, float, float] #top left width height
Embedding = np.array
TrackerFrameEmbedding = list[Tlwh, Embedding]

@dataclass
class FrameEmbedding():
    pcl_filename: str
    tlwh: Tlwh
    embedding: Embedding
    umap: Embedding = None
    pca: Embedding = None
    
    @classmethod
    def from_tracker_embedding(cls, pcl_filename: str | Path, tfe: TrackerFrameEmbedding):
        return cls(pcl_filename=pcl_filename, tlwh=tfe[0], embedding=tfe[1])
    
    @property
    def img_filename(self):
        return self.pcl_filename[:-4] + '.jpg'
       


In [50]:
# load options; quick'n'dirty copy from track.py (as the Namespace object is used in the multitracker)

parser = argparse.ArgumentParser(prog='visualise_embeddings.py')
parser.add_argument('--cfg', type=str, default='cfg/yolov3_1088x608.cfg', help='cfg file path')
parser.add_argument('--weights', type=str, default='jde.1088x608.uncertainty.pt', help='path to weights file')
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.4, help='iou threshold for non-maximum suppression')
parser.add_argument('--min-box-area', type=float, default=200, help='filter out tiny boxes')
parser.add_argument('--track-buffer', type=int, default=30, help='tracking buffer')
parser.add_argument('--dataset-dir', type=str, default="/datasets", help='Path to directory with datasets')
parser.add_argument('--experiment-name', type=str, default="embedding_test", help="name to prefix output directory with")
parser.add_argument('--output-dir', type=str, default="./OUT", help="directory for results")
    
# we're running in notebook, so default to empty
opt = parser.parse_args([])

logger.setLevel(logging.INFO)
result_path = os.path.join(opt.output_dir, opt.experiment_name)
mkdir_if_missing(result_path)
data_type = 'mot'

# Read config
cfg_dict = parse_model_cfg(opt.cfg)
# set img_size in opt, so it is passed on to JDETracker
opt.img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]


In [None]:
 # run tracking on all img_seqs

for seq in tqdm(img_seqs):
    logger.info('start seq: {}'.format(seq))
    dataloader = datasets.LoadImages(os.path.join(opt.dataset_dir, seq, 'img1'), opt.img_size)
    # result_filename = os.path.join(result_path, '{}.txt'.format(seq))
    meta_info = open(os.path.join(opt.dataset_dir, seq, 'seqinfo.ini')).read() 
    frame_rate = int(meta_info[meta_info.find('frameRate')+10:meta_info.find('\nseqLength')])
    nf, ta, tc = eval_seq(opt, dataloader, data_type, None,
                          save_dir=os.path.join(result_path, seq), save_figures=True, save_img=False, show_image=False, frame_rate=frame_rate)


In [51]:
result_path = os.path.join(opt.output_dir, opt.experiment_name)
embedding_pcl = os.path.join(result_path, 'umap_embeddings.pcl')

Load all pre-calculated embeddings from disk and project them. Save this collection of embeddings and their projections for easier working at a later stages.

In [84]:
from sklearn import decomposition

frame_embeddings: list[FrameEmbedding] = []

if os.path.exists(embedding_pcl):
    with open(embedding_pcl, 'rb') as fp:
        frame_embeddings = pickle.load(fp)
        logger.info(f'loaded {len(frame_embeddings)} embeddings')
else:
    for seq in tqdm(img_seqs):
        seq_path = os.path.join(result_path, seq)
        for i, frame_path in tqdm(enumerate(glob.iglob(f"{seq_path}/*-*.pcl"))):
            if i%2 == 1:
                # TODO skip 50% for now
                continue
            with open(frame_path, 'rb') as fp:
                tracker_embedding = pickle.load(fp)
                fe = FrameEmbedding.from_tracker_embedding(frame_path, tracker_embedding)
                frame_embeddings.append(fe)

    logger.info(f'loaded {len(frame_embeddings)} embeddings')
    
    logger.info(f'transform using UMAP')
    reducer = umap.UMAP(n_components=2)
    umap_embeddings = reducer.fit_transform([e.embedding for e in frame_embeddings])
    for i, e in enumerate(umap_embeddings):
        frame_embeddings[i].umap = e
    
    logger.info(f'transform using PCA')
    pca = decomposition.PCA(n_components=2)
    pca_embeddings = pca.fit_transform([e.embedding for e in frame_embeddings])
    for i, e in enumerate(pca_embeddings):
        frame_embeddings[i].pca = e

    with open(embedding_pcl, 'wb') as fp:
        logger.info(f'saved transformed embeddings')
        pickle.dump(frame_embeddings, fp)

  0%|          | 0/7 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

2023-04-05 14:27:09 [INFO]: loaded 73232 embeddings
2023-04-05 14:27:09 [INFO]: transform using UMAP
2023-04-05 14:27:37 [INFO]: transform using PCA
2023-04-05 14:27:38 [INFO]: saved transformed embeddings


In [63]:

# print(embeddings)
umap_embeddings = np.array([e.umap for e in frame_embeddings])
pca_embeddings = np.array([e.pca for e in frame_embeddings])
   

In [64]:

import base64
import cv2


def b64_image_files(frame_embeddings: list[FrameEmbedding]):
    urls = []
    for i, fe in enumerate(frame_embeddings):
        im = cv2.imread(fe.img_filename)
        pic_width = int(im.shape[1] * .3)
        pic_height = int(im.shape[0] * .3)
        new_dimension = (pic_width, pic_height)
        try:
            im = cv2.resize(im, new_dimension)
            _, byte_data = cv2.imencode('.png', im)        
        except Exception as e:
            print(i, fe.img_filename, e)
        
        url = 'data:image/png;base64,' + base64.b64encode(byte_data).decode('utf-8')
        urls.append(url)
    return urls

In [65]:
 
# source = ColumnDataSource(data={'x': embeddings[:, 0], 'y': embeddings[:, 1], 'b64': b64_image_files(frame_embeddings), 'fn': [e.img_filename for e in frame_embeddings] })
source = ColumnDataSource(data={
    'x': umap_embeddings[:, 0], 'y': umap_embeddings[:, 1], 
    'pca_x': pca_embeddings[:, 0], 'pca_y': pca_embeddings[:, 1],
    'fn': [e.img_filename for e in frame_embeddings]
    })


In [85]:


# thanks to https://github.com/jni/blob-explorer/blob/bd9fa676a2a23317e2ea84bdf48b19e71b9e75d4/picker.py#L24
# who uses base64 encoding, but in VScode we can just use the _path_ to the file
tooltip = """
            <img height=100 src='@fn'>
              """
    
tools1 = [BoxZoomTool(),  PanTool(), WheelZoomTool(), ResetTool(), HoverTool(tooltips=tooltip)]
tools2 = [BoxZoomTool(),  PanTool(), WheelZoomTool(), ResetTool(), HoverTool(tooltips=tooltip)]


In [86]:

p_umap = figure(width=800, height=800, title='UMAP projection', 
                              tools=tools1
                              )
r_umap = p_umap.circle(source=source, size=10, color="navy", alpha=0.5)

In [87]:
p_pca = figure(width=800, height=800, title='PCA projection', 
                              tools=tools2
                              )
r_pca = p_pca.circle('pca_x', 'pca_y', source=source, size=10, color="red", alpha=0.5)

In [83]:
handle = show(gridplot([[p_umap, p_pca]]), notebook_handle=True)

# Stage 2 Generate a grid images out of the projections

Now that there are individual points, we can perheaps better make sense of patterns if we see all points at once. As this is virtually impossible, we can try rendering a grid, with images on it. For each field on the grid, try to find the point closest to the center, and draw that image.

In [88]:
from scipy import spatial

In [89]:
# create a KDTree for fast searching of nearest points
umap_spatial_tree = spatial.KDTree(umap_embeddings)


In [92]:
min_x = min(umap_embeddings[:,0])
max_x = max(umap_embeddings[:,0])
min_y = min(umap_embeddings[:,1])
max_y = max(umap_embeddings[:,1])

min_x, max_x, min_y, max_y

(-7.052247, 25.377386, -12.119581, 18.601715)

In [96]:
grid_size = 30
grid_items = 100
canvas = np.zeros((grid_size * grid_items, grid_size*grid_items, 3), np.uint8)


scale_x = 


In [99]:
for ix in range(grid_items):
    cx = ix * grid_size + grid_size/2

    embedding_x = (ix+0.5) / grid_items * (max_x - min_x) + min_x

    for iy in range(grid_items):
        cy = iy * grid_size + grid_size/2
        embedding_y = (iy+0.5) / grid_items * (max_y - min_y) + min_y

        distance, index = umap_spatial_tree.query([embedding_x, embedding_y])
        print(distance, frame_embeddings[index].img_filename)

6.300341327870013 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
6.036267520128621 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
5.776461712056195 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
5.521526406397165 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
5.272168188860236 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
5.029216696078724 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
4.793646143594954 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
4.566598924066698 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
4.349410043439232 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
4.143629969972174 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
3.9510417030868856 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
3.773665507223767 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
3.613742075838796 ./OUT/embedding_test/MOT16/test/MOT16-03/00481-64.jpg
3.473682747512149 ./OUT/embedding_test/MOT16/test/MOT16-03/0048