trajpred/02_track_objects.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "from pathlib import Path\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "import torch\n",
    "from torchvision.io.video import read_video\n",
    "import matplotlib.pyplot as plt\n",
    "from torchvision.utils import draw_bounding_boxes\n",
    "from torchvision.transforms.functional import to_pil_image\n",
    "from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "source = Path('../DATASETS/VIRAT_subset_0102x')\n",
    "videos = source.glob('*.mp4')\n",
    "homography = list(source.glob('*img2world.txt'))[0]\n",
    "H = np.loadtxt(homography, delimiter=',')\n",
    "\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The homography matrix helps to transform points from image space to a flat world plane. The `README_homography.txt` from VIRAT describes:\n",
    "\n",
    "> Roughly estimated 3-by-3 homographies are included for convenience. \n",
    "> Each homography H provides a mapping from image coordinate to scene-dependent world coordinate.\n",
    ">   \n",
    "> [xw,yw,zw]' = H*[xi,yi,1]'\n",
    "> \n",
    "> xi: horizontal axis on image with left top corner as origin, increases right.\n",
    "> yi: vertical axis on image with left top corner as origin, increases downward.\n",
    "> \n",
    "> xw/zw: world x coordinate\n",
    "> yw/zw: world y coordiante"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# H.dot(np.array([20,300, 1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "video_path = list(videos)[0]\n",
    "video_path = Path(\"../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_00_000060_000218.mp4\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PosixPath('../DATASETS/VIRAT_subset_0102x/VIRAT_S_010200_00_000060_000218.mp4')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "video_path"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Suggestions from: https://stackabuse.com/retinanet-object-detection-with-pytorch-and-torchvision/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "device(type='cuda')"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "weights = RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT\n",
    "model = retinanet_resnet50_fpn_v2(weights=weights, score_thresh=0.35)\n",
    "model.to(device)\n",
    "# Put the model in inference mode\n",
    "model.eval()\n",
    "# Get the transforms for the model's weights\n",
    "preprocess = weights.transforms().to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# hub.set_dir()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "video = cv2.VideoCapture(str(video_path))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> The score_thresh argument defines the threshold at which an object is detected as an object of a class. Intuitively, it's the confidence threshold, and we won't classify an object to belong to a class if the model is less than 35% confident that it belongs to a class."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The result from a single prediction coming from `model(batch)` looks like:\n",
    "\n",
    "```python\n",
    "{'boxes': tensor([[5.7001e+02, 2.5786e+02, 6.3138e+02, 3.6970e+02],\n",
    "         [5.0109e+02, 2.4508e+02, 5.5308e+02, 3.4852e+02],\n",
    "         [3.4096e+02, 2.7015e+02, 3.6156e+02, 3.1857e+02],\n",
    "         [5.0219e-01, 3.7588e+02, 9.7911e+01, 7.2000e+02],\n",
    "         [3.4096e+02, 2.7015e+02, 3.6156e+02, 3.1857e+02],\n",
    "         [8.3241e+01, 5.8410e+02, 1.7502e+02, 7.1743e+02]]),\n",
    " 'scores': tensor([0.8525, 0.6491, 0.5985, 0.4999, 0.3753, 0.3746]),\n",
    " 'labels': tensor([64, 64,  1, 64, 18, 86])}\n",
    "```"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Now with SORT tracking\n",
    "\n",
    "Using a sort implementation originally by Alex Bewley, but adapted by [Chris Fotache](https://github.com/cfotache/pytorch_objectdetecttrack/blob/master/README.md). For an example implementation, see [his notebook](https://github.com/cfotache/pytorch_objectdetecttrack/blob/master/PyTorch_Object_Tracking.ipynb).\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sort_cfotache import Sort\n",
    "\n",
    "mot_tracker = Sort()\n",
    "\n",
    "display_image = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "tracked_instances = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[58], line 29\u001b[0m\n\u001b[1;32m     27\u001b[0m \u001b[39m# no_grad can be used on inference, should be slightly faster\u001b[39;00m\n\u001b[1;32m     28\u001b[0m \u001b[39mwith\u001b[39;00m torch\u001b[39m.\u001b[39mno_grad():\n\u001b[0;32m---> 29\u001b[0m     predictions \u001b[39m=\u001b[39m model(batch)\n\u001b[1;32m     30\u001b[0m prediction \u001b[39m=\u001b[39m predictions[\u001b[39m0\u001b[39m] \u001b[39m# we feed only one frame at the once\u001b[39;00m\n\u001b[1;32m     32\u001b[0m mask \u001b[39m=\u001b[39m prediction[\u001b[39m'\u001b[39m\u001b[39mlabels\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m \u001b[39m# if we want more than one: np.isin(prediction['labels'], [1,86])\u001b[39;00m\n",
      "File \u001b[0;32m~/suspicion/trajpred/.venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1496\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1497\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1498\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1499\u001b[0m         \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1500\u001b[0m         \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m     \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m   1502\u001b[0m \u001b[39m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[39m=\u001b[39m [], []\n",
      "File \u001b[0;32m~/suspicion/trajpred/.venv/lib/python3.9/site-packages/torchvision/models/detection/retinanet.py:663\u001b[0m, in \u001b[0;36mRetinaNet.forward\u001b[0;34m(self, images, targets)\u001b[0m\n\u001b[1;32m    660\u001b[0m     split_anchors \u001b[39m=\u001b[39m [\u001b[39mlist\u001b[39m(a\u001b[39m.\u001b[39msplit(num_anchors_per_level)) \u001b[39mfor\u001b[39;00m a \u001b[39min\u001b[39;00m anchors]\n\u001b[1;32m    662\u001b[0m     \u001b[39m# compute the detections\u001b[39;00m\n\u001b[0;32m--> 663\u001b[0m     detections \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpostprocess_detections(split_head_outputs, split_anchors, images\u001b[39m.\u001b[39;49mimage_sizes)\n\u001b[1;32m    664\u001b[0m     detections \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransform\u001b[39m.\u001b[39mpostprocess(detections, images\u001b[39m.\u001b[39mimage_sizes, original_image_sizes)\n\u001b[1;32m    666\u001b[0m \u001b[39mif\u001b[39;00m torch\u001b[39m.\u001b[39mjit\u001b[39m.\u001b[39mis_scripting():\n",
      "File \u001b[0;32m~/suspicion/trajpred/.venv/lib/python3.9/site-packages/torchvision/models/detection/retinanet.py:531\u001b[0m, in \u001b[0;36mRetinaNet.postprocess_detections\u001b[0;34m(self, head_outputs, anchors, image_shapes)\u001b[0m\n\u001b[1;32m    529\u001b[0m scores_per_level \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39msigmoid(logits_per_level)\u001b[39m.\u001b[39mflatten()\n\u001b[1;32m    530\u001b[0m keep_idxs \u001b[39m=\u001b[39m scores_per_level \u001b[39m>\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscore_thresh\n\u001b[0;32m--> 531\u001b[0m scores_per_level \u001b[39m=\u001b[39m scores_per_level[keep_idxs]\n\u001b[1;32m    532\u001b[0m topk_idxs \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mwhere(keep_idxs)[\u001b[39m0\u001b[39m]\n\u001b[1;32m    534\u001b[0m \u001b[39m# keep only topk scoring predictions\u001b[39;00m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "# TODO make into loop\n",
    "%matplotlib inline\n",
    "\n",
    "\n",
    "import pylab as pl\n",
    "from IPython import display\n",
    "from utils.timer import Timer\n",
    "\n",
    "i=0\n",
    "timer = Timer()\n",
    "while True:\n",
    "    timer.tic()\n",
    "    ret, frame = video.read()\n",
    "    i+=1\n",
    "    \n",
    "    if not ret:\n",
    "        print(\"Can't receive frame (stream end?). Exiting ...\")\n",
    "        break\n",
    "\n",
    "    t = torch.from_numpy(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))\n",
    "    t.shape\n",
    "    # image = image[np.newaxis, :] \n",
    "    t = t.permute(2, 0, 1)\n",
    "    t.shape\n",
    "\n",
    "    batch = preprocess(t)[None, :].to(device)\n",
    "    # no_grad can be used on inference, should be slightly faster\n",
    "    with torch.no_grad():\n",
    "        predictions = model(batch)\n",
    "    prediction = predictions[0] # we feed only one frame at the once\n",
    "\n",
    "    mask = prediction['labels'] == 1 # if we want more than one: np.isin(prediction['labels'], [1,86])\n",
    "\n",
    "    scores = prediction['scores'][mask]\n",
    "    labels = prediction['labels'][mask]\n",
    "    boxes = prediction['boxes'][mask]\n",
    "    \n",
    "    # TODO: introduce confidence and NMS supression: https://github.com/cfotache/pytorch_objectdetecttrack/blob/master/PyTorch_Object_Tracking.ipynb\n",
    "    # (which I _think_ we better do after filtering)\n",
    "    # alternatively look at Soft-NMS https://towardsdatascience.com/non-maximum-suppression-nms-93ce178e177c\n",
    "\n",
    "    \n",
    "    #  dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]\n",
    "    detections = np.array([np.append(bbox, [score, label]) for bbox, score, label in zip(boxes.cpu(), scores.cpu(), labels.cpu())])\n",
    "    # print(detections)\n",
    "    tracks = mot_tracker.update(detections)\n",
    "\n",
    "    # now convert back to boxes and labels\n",
    "    # print(tracks)\n",
    "    boxes = np.array([t[:4] for t in tracks])\n",
    "    # initialize empty with the necesserary dimensions for drawing_bounding_boxes glitch\n",
    "    t_boxes = torch.from_numpy(boxes) if len(boxes) else torch.Tensor().new_empty([0, 6])\n",
    "    labels = [str(int(t[4])) for t in tracks]\n",
    "    # print(t_boxes, boxes, labels)\n",
    "\n",
    "\n",
    "    for track in tracks:\n",
    "        # TODO add to tracked_instances\n",
    "        track_id = str(int(track[4]))\n",
    "        if track_id not in tracked_instances:\n",
    "            tracked_instances[track_id] = []\n",
    "        tracked_instances[track_id].append(track)\n",
    "\n",
    "    \n",
    "    # labels = [weights.meta[\"categories\"][i] for i in labels]\n",
    "\n",
    "    if display_image:\n",
    "        box = draw_bounding_boxes(t, boxes=t_boxes,\n",
    "                                labels=labels,\n",
    "                                colors=\"cyan\",\n",
    "                                width=2, \n",
    "                                font_size=30,\n",
    "                                # font='Arial'\n",
    "                                )\n",
    "\n",
    "        im = to_pil_image(box.detach())\n",
    "\n",
    "        display.display(im, f\"frame {i}\")\n",
    "    print(prediction)\n",
    "    print(\"time for frame: \", timer.toc(), \", avg:\", 1/timer.average_time, \"fps\")\n",
    "\n",
    "    display.clear_output(wait=True)\n",
    "\n",
    "    # break # for now\n",
    "    # pl.clf()\n",
    "    # # pl.plot(pl.randn(100))\n",
    "    # pl.figure(figsize=(24,50))\n",
    "    # # fig.axes[0].imshow(img)\n",
    "    # pl.imshow(im)\n",
    "    # display.display(pl.gcf(), f\"frame {i}\")\n",
    "    # display.clear_output(wait=True)\n",
    "    # time.sleep(1.0)\n",
    "\n",
    "    # fig, ax = plt.subplots(figsize=(16, 12))\n",
    "    # ax.imshow(im)\n",
    "    # plt.show()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['22', '24', '26', '27', '30', '31', '32', '33', '37'])"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(Image.open(\"../DATASETS/VIRAT_subset_0102x/VIRAT_0102_homography_img2world.png\").size)\n",
    "Image.open(\"../DATASETS/VIRAT_subset_0102x/VIRAT_0102_homography_img2world.png\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "44\n",
      "43\n",
      "42\n",
      "45\n",
      "46\n",
      "49\n",
      "52\n",
      "53\n",
      "56\n",
      "57\n",
      "59\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA9QAAAH5CAYAAABgeXZFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOz9eawt21Xfj35mU81qdnva25zre91hG4xNbGKaoPdofqEJiDaJ9UDPjzxBFJ6DiJVEOAogQoKVhIcIKAKBRGIQSEQhpHko1ssz+QUIBmMbDLZxf9tz7+l3v9aqqtm8P+asWmufs8/1vcc+PjZ3fNDx3XvtWlW1qmYt5neO7xhDxRgjgiAIgiAIgiAIgiA8L/S9PgFBEARBEARBEARB+HxEBLUgCIIgCIIgCIIg3AEiqAVBEARBEARBEAThDhBBLQiCIAiCIAiCIAh3gAhqQRAEQRAEQRAEQbgDRFALgiAIgiAIgiAIwh0ggloQBEEQBEEQBEEQ7gB7r0/g2Qgh8PTTT7O2toZS6l6fjiAIgiAQY+Tg4ID7778frWVdWhAEQRBeyHxOC+qnn36aCxcu3OvTEARBEIRbePLJJ3nwwQfv9WkIgiAIgnAP+ZwW1Gtra0CatKyvr9/jsxEEQRAE2N/f58KFC8P/jxIEQRAE4YXL57Sg7m3e6+vrIqgFQRCEzykkFUkQBEEQBEn+EgRBEARBEARBEIQ7QAS1IAiCIAiCIAiCINwBIqgFQRAEQRAEQRAE4Q4QQS0IgiAIgiAIgiAId4AIakEQBEEQBEEQBEG4A0RQC4IgCIIgCIIgCMIdIIJaEARBEARBEARBEO4AEdSCIAiCIAiCIAiCcAeIoBYEQRAEQRAEQRCEO0AEtSAIgiAIgiAIgiDcASKoBUEQBEEQBEEQBOEOEEEtCIIgCIIgCIIgCHeACGpBEARBEARBEARBuANEUAuCIAiCIAiCIAjCHSCCWhAEQRAEQRAEQRDuABHUgiAIgiAIgiAIgnAHiKAWBEEQBEEQBEEQhDtABLXwabPoPG/+9ffxvf/u3ff6VARBEARBEARBED5r3FVB7b3nR37kR3jkkUcYjUa85CUv4Sd+4ieIMd7NwwqfZT559Yj/z589w//8yFV+5V2P3evTEQRBEARBEARB+Kxg7+bO/+W//Jf8/M//PG9/+9v5wi/8Qt7znvfwvd/7vWxsbPCDP/iDd/PQwmeRV963Nvz87//3Y/zfv/zhe3cygiAIgiAIgiAInyXuaoT6D/7gD/jWb/1W/sbf+Bs8/PDDfNd3fRd//a//dd79brEG/2VCKcW/+PYvAiDESAjiQBAEQRAEQRAE4S8/d1VQf8VXfAXvfOc7+ehHPwrA+9//fn7/93+fb/zGbzxx+6Zp2N/fP/ZP+Pzg2177AGuV5bHrM37/49fu9ekIgiAIgiAIgiDcde6qoP7hH/5h3vjGN/KKV7yCoij4ki/5En7oh36I7/7u7z5x+7e97W1sbGwM/y5cuHA3T0/4DDKpLN/5ugcB+JV3PX6Pz0YQBEEQBEEQBOHuc1cF9X/4D/+BX/u1X+PXf/3Xed/73sfb3/52fuqnfoq3v/3tJ27/1re+lb29veHfk08+eTdPT/gM8z1f9iIAfufDl3nyxuwen40gCIIgCIIgCMLd5a4WJftH/+gfDVFqgFe/+tU8/vjjvO1tb+NNb3rTLdtXVUVVVXfzlIS7yEvPTvnKl57if3/8Or/2R0/ww9/4int9SoIgCIIgCIIgCHeNuxqhns1maH38EMYYQgh387DCPeRNucL3f3jPk7RO7rMgCIIgCIIgCH95uauC+lu+5Vv4F//iX/Dbv/3bPPbYY/zWb/0WP/3TP823f/u3383DCveQr3nFWc6tV9w4avm3//Pj9/p0BEEQBEEQBEEQ7hp3VVD/3M/9HN/1Xd/FD/zAD/DKV76Sf/gP/yF/9+/+XX7iJ37ibh5WuIdYo/kn3/RKAP6/H7osUWpBEARBEARBEP7SomKMn7NNg/f399nY2GBvb4/19fV7fTrC8+A/vvcpvvmL76MuzL0+FUEQhM8o8v+bBEEQBEHouatFyYQXLt+VW2gJgiAIgiAIgiD8ZeWuWr4FQRAEQRAEQRAE4S8rIqgFQRAEQRAEQRAE4Q4QQS0IgiAIgiAIgiAId4AIakEQBEEQBEEQBEG4A0RQC4IgCIIgCIIgCMIdIIJaEARBEARBEARBEO4AEdSCIAiCIAiCIAiCcAeIoBYEQRAEQRAEQRCEO0AEtSAIgiAIgiAIgiDcASKoBUEQBEEQBEEQBOEOEEEtCIIgCIIgCIIgCHeACGpBEARBEARBEARBuANEUAuCIAiCIAiCIAjCHSCCWhAEQRAEQRAEQRDuABHUgiAIgiAIgiAIgnAHiKAWBEEQBEEQBEEQhDtABLUgCIIgCIIgCIIg3AEiqAVBEARBEARBEAThDhBBLQiCIAiCIAiCIAh3gAhqQRAEQRAEQRAEQbgDRFALgiAIgiAIgiAIwh0ggloQBEEQBEEQBEEQ7gAR1IIgCIIgCIIgCIJwB4igFgRBEARBEARBEIQ7QAS1IAiCIAiCIAiCINwBIqgFQRAEQRAEQRAE4Q4QQS0IgiAIgiAIgiAId4AIakEQBEEQBEEQBEG4A0RQC4IgCIIgCIIgCMIdIIJaEARBEARBEARBEO4AEdSCIAiCIAiCIAiCcAeIoBYEQRAEQRAEQRCEO0AEtSAIgiAIgiAIgiDcASKoBUEQBEEQBEEQBOEOEEEtCIIgCIIgCIIgCHeACGpBEARBEARBEARBuANEUAuCIAiCIAiCIAjCHSCCWhAEQRAEQRAEQRDuABHUgiAIgiAIgiAIgnAHiKAWBEEQBEEQBEEQhDtABLUgCIIgCIIgCIIg3AEiqAVBEARBEARBEAThDhBBLQiCIAiCIAiCIAh3gAhq4QXFHz92g5/5/32Ua4fNvT4VQRAEQRAEQRA+z7H3+gQE4bPJj/2XD/KhZ/Z55PSEb33tA/f6dARBEARBEARB+DxGItTCC4qvevlpAP7Lnz59j89EEARBEARBEITPd0RQCy8o3vilDwHwOx++wn94z5P3+GwEQRAEQRAEQfh8RgS18ILikdMTKpuG/T/+j3/GzlF7j89IEARBEARBEITPV0RQCy84fvsH/9rwcxfCPTwTQRAEQRAEQRA+n3nBFCULITCbzZhOp/f6VIR7zEvPrvGxf/GNHC4cW5PyXp+OIAiCIAiCIAifp7xgBPU73vEO3v3ud7O9vc0P/uAP3uvTEe4xhdEipgVBEARBEARB+LR4wVi+nXMA3Lhxg1/8xV+8x2cjCIIgCIIgCIIgfL7zghHU3/It3zL8/PTTT/PLv/zL9/BsBEEQBEEQBEEQhM93XjCCWinFj/7ojw6/P/HEE/zSL/3SPTwjQRAEQRAEQRAE4fOZF4ygBtBa85a3vGX4/eLFi/zJn/zJPTwjQRAEQRAEQRAE4fOVF5SgBlhfX+c7v/M7h9//y3/5L/zZn/3ZPTwjQRAEQRAEQRAE4fORF5ygBnj1q1/Nt3/7tw+//6f/9J/4wAc+8Gnt89B5vv+Dj/HYvPl0T08QBEEQBEEQBEH4POAFKagBXvOa1/AN3/ANw+//8T/+R46Ojp7Te4+cJ8Q4/P5fr+zy0t/7c/7rlV2+7A//gvfuPbf9CIIgCIIgCIIgCJ+/vGAFNcCXfdmX8VVf9VXD74899tinfM+/eewyL/m9P+f+//P9/D8/8Cj/70cv8Xc/ePx9f+N9H+NDh/PP8NkKgiAIgiAIgiAIn0u8oAU1wNd8zdfw+te/HoDf/M3f5BOf+MSzbv/R2WL4+bev7vGvH7tEPGG7/3nj4DN5moIgCIIgCIIgCMLnGC94Qa2U4pu+6Zt41ateRQiB3/iN3+DixYsnbrvwgR99yf3HXvvq7bVjv3//g2f4lVc/wvc9ePqunbMgCIIgCIIgCIJw73nBC2pI7bS+4zu+g0ceeYS2bfm1X/s1rl27dmyb/351l4d/9894zR98kP/XQ2eH17/21Dr/80u/gC9dnzAxmv/HA6f566c3KLVcWkEQBEEQBEEQhL/
      "text/plain": [
       "<Figure size 1200x600 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "\n",
    "fig = plt.figure(figsize=(12,6))\n",
    "ax1, ax2 = fig.subplots(1,2)\n",
    "\n",
    "ax2.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))\n",
    "\n",
    "for track_id in tracked_instances:\n",
    "    # print(track_id)\n",
    "    bboxes = tracked_instances[track_id]\n",
    "    traj = np.array([[[0.5 * (det[0]+det[2]), det[3]]] for det in bboxes])\n",
    "    projected_traj = cv2.perspectiveTransform(traj,H)\n",
    "    # plt.plot(projected_traj[:,0])\n",
    "    ax1.plot(projected_traj[:,:,0].reshape(-1), projected_traj[:,:,1].reshape(-1))\n",
    "    ax2.plot(traj[:,:,0].reshape(-1), traj[:,:,1].reshape(-1))\n",
    "    # for detection in trajectory:\n",
    "    #     x1, y1, x2, y2 = detection[:4]\n",
    "    #     # take the point at the center bottom of the bbox\n",
    "    #     center_foot_xyz = [[.5*x1 + .5*x2, y2]]\n",
    "    #     print(center_foot_xyz)\n",
    "    #     projected_bbox = cv2.perspectiveTransform(np.array([center_foot_xyz]),H)\n",
    "\n",
    "    #     print(projected_bbox)\n",
    "plt.show()        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(720, 1280, 3)"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "1135f674f58caf91385e41dd32dc418daf761a3c5d4526b1ac3bad0b893c2eb5"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}