Documentation (#95)

* Added documentation * Added docstrings and comments * Removed unused imports * Removed unused imports * Added functionality of saving checkpoints during training process * Update train.py * Update multitracker.py
2020-03-14 07:54:27 +05:30 · 2020-03-14 07:54:27 +05:30 · 24f351d1b5
commit 24f351d1b5
parent 0a0665e682
8 changed files with 184 additions and 89 deletions
--- a/demo.py
+++ b/demo.py
@ -24,23 +24,13 @@ Todo:
    * More documentation
 """
 import os
 import os.path as osp
 import cv2
 import logging
 import argparse
 import motmetrics as mm
 from tracker.multitracker import JDETracker
 from utils import visualization as vis
 from utils.utils import *
 from utils.io import read_results
 from utils.log import logger
 from utils.timer import Timer
 from utils.evaluation import Evaluator
 from utils.parse_config import parse_model_cfg
 import utils.datasets as datasets
 import torch
 from track import eval_seq
--- a/track.py
+++ b/track.py
@ -39,6 +39,41 @@ def write_results(filename, results, data_type):
 def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30):
    '''
       Processes the video sequence given and provides the output of tracking result (write the results in video file)
       It uses JDE model for getting information about the online targets present.
       Parameters
       ----------
       opt : Namespace
             Contains information passed as commandline arguments.
       dataloader : LoadVideo
                    Instance of LoadVideo class used for fetching the image sequence and associated data.
       data_type : String
                   Type of dataset corresponding(similar) to the given video.
       result_filename : String
                         The name(path) of the file for storing results.
       save_dir : String
                  Path to the folder for storing the frames containing bounding box information (Result frames).
       show_image : bool
                    Option for shhowing individial frames during run-time.
       frame_rate : int
                    Frame-rate of the given video.
       Returns
       -------
       (Returns are not significant here)
       frame_id : int
                  Sequence number of the last sequence
       '''
    if save_dir:
        mkdir_if_missing(save_dir)
    tracker = JDETracker(opt, frame_rate=frame_rate)
--- a/tracker/matching.py
+++ b/tracker/matching.py
@ -1,6 +1,3 @@
 import cv2
 import torch
 import torch.nn.functional as F
 import numpy as np
 import scipy
 from scipy.spatial.distance import cdist
@ -8,7 +5,6 @@ import lap
 from cython_bbox import bbox_overlaps as bbox_ious
 from utils import kalman_filter
 import time
 def merge_matches(m1, m2, shape):
    O,P,Q = shape
--- a/tracker/multitracker.py
+++ b/tracker/multitracker.py
@ -1,15 +1,6 @@
 import numpy as np
 from numba import jit
 from collections import deque
 import itertools
 import os
 import os.path as osp
 import time
 import torch
 import torch.nn.functional as F
 from utils.utils import *
 from utils.log import logger
 from utils.kalman_filter import KalmanFilter
 from models import *
 from tracker import matching
@ -17,7 +8,6 @@ from .basetrack import BaseTrack, TrackState
 class STrack(BaseTrack):
    shared_kalman = KalmanFilter()
    def __init__(self, tlwh, score, temp_feat, buffer_size=30):
@ -64,7 +54,6 @@ class STrack(BaseTrack):
                stracks[i].mean = mean
                stracks[i].covariance = cov
    def activate(self, kalman_filter, frame_id):
        """Start a new tracklet"""
        self.kalman_filter = kalman_filter
@ -112,7 +101,7 @@ class STrack(BaseTrack):
            self.update_features(new_track.curr_feat)
    @property
-    #@jit(nopython=True)
+    @jit
    def tlwh(self):
        """Get current position in bounding box format `(top left x, top left y,
                width, height)`.
@ -125,7 +114,7 @@ class STrack(BaseTrack):
        return ret
    @property
-    #@jit(nopython=True)
+    @jit
    def tlbr(self):
        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
        `(top left, bottom right)`.
@ -135,7 +124,7 @@ class STrack(BaseTrack):
        return ret
    @staticmethod
-    #@jit(nopython=True)
+    @jit
    def tlwh_to_xyah(tlwh):
        """Convert bounding box to format `(center x, center y, aspect ratio,
        height)`, where the aspect ratio is `width / height`.
@ -149,14 +138,14 @@ class STrack(BaseTrack):
        return self.tlwh_to_xyah(self.tlwh)
    @staticmethod
-    #@jit(nopython=True)
+    @jit
    def tlbr_to_tlwh(tlbr):
        ret = np.asarray(tlbr).copy()
        ret[2:] -= ret[:2]
        return ret
    @staticmethod
-    #@jit(nopython=True)
+    @jit
    def tlwh_to_tlbr(tlwh):
        ret = np.asarray(tlwh).copy()
        ret[2:] += ret[:2]
@ -166,11 +155,10 @@ class STrack(BaseTrack):
        return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
 class JDETracker(object):
    def __init__(self, opt, frame_rate=30):
        self.opt = opt
-        self.model = Darknet(opt.cfg)
+        self.model = Darknet(opt.cfg, opt.img_size, nID=30)
        # load_darknet_weights(self.model, opt.weights)
        self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'], strict=False)
        self.model.cuda().eval()
@ -187,61 +175,106 @@ class JDETracker(object):
        self.kalman_filter = KalmanFilter()
    def update(self, im_blob, img0):
        """
        Processes the image frame and finds bounding box(detections).
        Associates the detection with corresponding tracklets and also handles lost, removed, refound and active tracklets
        Parameters
        ----------
        im_blob : torch.float32
                  Tensor of shape depending upon the size of image. By default, shape of this tensor is [1, 3, 608, 1088]
        img0 : ndarray
               ndarray of shape depending on the input image sequence. By default, shape is [608, 1080, 3]
        Returns
        -------
        output_stracks : list of Strack(instances)
                         The list contains information regarding the online_tracklets for the recieved image tensor.
        """
        self.frame_id += 1
-        activated_starcks = []
+        activated_starcks = []      # for storing active tracks, for the current frame
-        refind_stracks = []
+        refind_stracks = []         # Lost Tracks whose detections are obtained in the current frame
-        lost_stracks = []
+        lost_stracks = []           # The tracks which are not obtained in the current frame but are not removed.(Lost for some time lesser than the threshold for removing)
        removed_stracks = []
        t1 = time.time()
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            pred = self.model(im_blob)
        # pred is tensor of all the proposals (default number of proposals: 54264). Proposals have information associated with the bounding box and embeddings
        pred = pred[pred[:, :, 4] > self.opt.conf_thres]
        # pred now has lesser number of proposals. Proposals rejected on basis of object confidence score
        if len(pred) > 0:
-            dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, 
+            dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu()
-                                       self.opt.nms_thres)[0]
+            # Final proposals are obtained in dets. Information of bounding box and embeddings also included
            # Next step changes the detection scales
            scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
-            dets, embs = dets[:, :5].cpu().numpy(), dets[:, 6:].cpu().numpy()
+            '''Detections is list of (x1, y1, x2, y2, object_conf, class_score, class_pred)'''
-            '''Detections'''
+            # class_pred is the embeddings.
-            detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for
+
-                          (tlbrs, f) in zip(dets, embs)]
+            detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for
                          (tlbrs, f) in zip(dets[:, :5], dets[:, 6:])]
        else:
            detections = []
        t2 = time.time()
        # print('Forward: {} s'.format(t2-t1))
        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:
            if not track.is_activated:
                # previous tracks which are not active in the current frame are added in unconfirmed list
                unconfirmed.append(track)
                # print("Should not be here, in unconfirmed")
            else:
                # Active tracks are added to the local list 'tracked_stracks'
                tracked_stracks.append(track)
        ''' Step 2: First association, with embedding'''
        # Combining currently tracked_stracks and lost_stracks
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        # Predict the current location with KF
        STrack.multi_predict(strack_pool)
        dists = matching.embedding_distance(strack_pool, detections)
        # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections)
        # The dists is the list of distances of the detection with the tracks in strack_pool
        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
        # The matches is the array for corresponding matches of the detection with the corresponding strack_pool
        for itracked, idet in matches:
            # itracked is the id of the track and idet is the detection
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                # If the track is active, add the detection to the track
                track.update(detections[idet], self.frame_id)
                activated_starcks.append(track)
            else:
                # We have obtained a detection from a track which is not active, hence put the track in refind_stracks list
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)
        # None of the steps below happen if there are no undetected tracks.
        ''' Step 3: Second association, with IOU'''
        detections = [detections[i] for i in u_detection]
-        r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state==TrackState.Tracked ]
+        # detections is now a list of the unmatched detections
        r_tracked_stracks = [] # This is container for stracks which were tracked till the
        # previous frame but no detection was found for it in the current frame
        for i in u_track:
            if strack_pool[i].state == TrackState.Tracked:
                r_tracked_stracks.append(strack_pool[i])
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5)
-        
+        # matches is the list of detections which matched with corresponding tracks by IOU distance method
        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
@ -251,12 +284,14 @@ class JDETracker(object):
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)
        # Same process done for some unmatched detections, but now considering IOU_distance as measure
        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(track)
        # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost
        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
        detections = [detections[i] for i in u_detection]
@ -265,11 +300,14 @@ class JDETracker(object):
        for itracked, idet in matches:
            unconfirmed[itracked].update(detections[idet], self.frame_id)
            activated_starcks.append(unconfirmed[itracked])
        # The tracks which are yet not matched
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)
        # after all these confirmation steps, if a new detection is found, it is initialized for a new track
        """ Step 4: Init new stracks"""
        for inew in u_detection:
            track = detections[inew]
@ -279,14 +317,18 @@ class JDETracker(object):
            activated_starcks.append(track)
        """ Step 5: Update state"""
        # If the tracks are lost for more frames than the threshold number, the tracks are removed.
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()
                removed_stracks.append(track)
        # print('Remained match {} s'.format(t4-t3))
        # Update the self.tracked_stracks and self.lost_stracks using the updates in this step.
        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
        self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
        # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost]  # type: list[STrack]
        self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
@ -301,6 +343,7 @@ class JDETracker(object):
        logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
        # print('Final {} s'.format(t5-t4))
        return output_stracks
 def joint_stracks(tlista, tlistb):
--- a/train.py
+++ b/train.py
@ -1,9 +1,10 @@
 import argparse
 import json
 import time
-
+from time import gmtime, strftime
 import test
 from models import *
 from shutil import copyfile
 from utils.datasets import JointDataset, collate_fn
 from utils.utils import *
 from utils.log import logger
@ -13,6 +14,10 @@ from torchvision.transforms import transforms as T
 def train(
        cfg,
        data_cfg,
        weights_from="",
        weights_to="",
        save_every=10,
        img_size=(1088, 608),
        resume=False,
        epochs=100,
        batch_size=16,
@ -20,9 +25,16 @@ def train(
        freeze_backbone=False,
        opt=None,
 ):
-    weights = 'weights' 
+    # The function starts
-    mkdir_if_missing(weights)
+
-    latest = osp.join(weights, 'latest.pt')
+    timme = strftime("%Y-%d-%m %H:%M:%S", gmtime())
    timme = timme[5:-3].replace('-', '_')
    timme = timme.replace(' ', '_')
    timme = timme.replace(':', '_')
    weights_to = osp.join(weights_to, 'run' + timme)
    mkdir_if_missing(weights_to)
    if resume:
        latest_resume = osp.join(weights_from, 'latest.pt')
    torch.backends.cudnn.benchmark = True  # unsuitable for multiscale
@ -32,24 +44,19 @@ def train(
    trainset_paths = data_config['train']
    dataset_root = data_config['root']
    f.close()
    cfg_dict = parse_model_cfg(cfg) 
    img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
    # Get dataloader
    transforms = T.Compose([T.ToTensor()])
    # Get dataloader
    dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,
                                             num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn)
    # Initialize model
-    model = Darknet(cfg_dict, dataset.nID)
+    model = Darknet(cfg, dataset.nID)
    cutoff = -1  # backbone reaches to cutoff layer
    start_epoch = 0
    if resume:
-        checkpoint = torch.load(latest, map_location='cpu')
+        checkpoint = torch.load(latest_resume, map_location='cpu')
        # Load weights to resume from
        model.load_state_dict(checkpoint['model'])
@ -67,33 +74,33 @@ def train(
    else:
        # Initialize model with backbone (optional)
        if cfg.endswith('yolov3.cfg'):
-            load_darknet_weights(model, osp.join(weights ,'darknet53.conv.74'))
+            load_darknet_weights(model, osp.join(weights_from, 'darknet53.conv.74'))
            cutoff = 75
        elif cfg.endswith('yolov3-tiny.cfg'):
-            load_darknet_weights(model, osp.join(weights , 'yolov3-tiny.conv.15'))
+            load_darknet_weights(model, osp.join(weights_from, 'yolov3-tiny.conv.15'))
            cutoff = 15
        model.cuda().train()
        # Set optimizer
-        optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9, weight_decay=1e-4)
+        optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9,
                                    weight_decay=1e-4)
    model = torch.nn.DataParallel(model)
    # Set scheduler
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
-            milestones=[int(0.5*opt.epochs), int(0.75*opt.epochs)], gamma=0.1)
+                                                     milestones=[int(0.5 * opt.epochs), int(0.75 * opt.epochs)],
                                                     gamma=0.1)
    # An important trick for detection: freeze bn during fine-tuning
    if not opt.unfreeze_bn:
        for i, (name, p) in enumerate(model.named_parameters()):
            p.requires_grad = False if 'batch_norm' in name else True
-    model_info(model)
+    # model_info(model)
    t0 = time.time()
    for epoch in range(epochs):
        epoch += start_epoch
        logger.info(('%8s%12s' + '%10s' * 6) % (
            'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
@ -120,7 +127,6 @@ def train(
            # Compute loss, compute gradient, update parameters
            loss, components = model(imgs.cuda(), targets.cuda(), targets_len.cuda())
            components = torch.mean(components.view(-1, 5), dim=0)
            loss = torch.mean(loss)
            loss.backward()
@ -135,6 +141,7 @@ def train(
            for ii, key in enumerate(model.module.loss_names):
                rloss[key] = (rloss[key] * ui + components[ii]) / (ui + 1)
            # rloss indicates running loss values with mean updated at every epoch
            s = ('%8s%12s' + '%10.3g' * 6) % (
                '%g/%g' % (epoch, epochs - 1),
                '%g/%g' % (i, len(dataloader) - 1),
@ -149,26 +156,45 @@ def train(
        checkpoint = {'epoch': epoch,
                      'model': model.module.state_dict(),
                      'optimizer': optimizer.state_dict()}
        torch.save(checkpoint, latest)
        copyfile(cfg, weights_to + '/cfg/yolo3.cfg')
        copyfile(data_cfg, weights_to + '/cfg/ccmcpe.json')
        latest = osp.join(weights_to, 'latest.pt')
        torch.save(checkpoint, latest)
        if epoch % save_every == 0 and epoch != 0:
            # making the checkpoint lite
            checkpoint["optimizer"] = []
            torch.save(checkpoint, osp.join(weights_to, "weights_epoch_" + str(epoch) + ".pt"))
        # Calculate mAP
        if epoch % opt.test_interval == 0:
            with torch.no_grad():
-                mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40)
+                mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size,
-                test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40)
+                                      print_interval=40, nID=dataset.nID)
-
+                test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size,
                              print_interval=40, nID=dataset.nID)
        # Call scheduler.step() after opimizer.step() with pytorch > 1.1.0
        scheduler.step()
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--epochs', type=int, default=30, help='number of epochs')
    parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
    parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step')
-    parser.add_argument('--cfg', type=str, default='cfg/yolov3_1088x608.cfg', help='cfg file path')
+    parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
    parser.add_argument('--weights-from', type=str, default='weights/',
                        help='Path for getting the trained model for resuming training (Should only be used with '
                             '--resume)')
    parser.add_argument('--weights-to', type=str, default='weights/',
                        help='Store the trained weights after resuming training session. It will create a new folder '
                             'with timestamp in the given path')
    parser.add_argument('--save-model-after', type=int, default=10,
                        help='Save a checkpoint of model at given interval of epochs')
    parser.add_argument('--data-cfg', type=str, default='cfg/ccmcpe.json', help='coco.data file path')
    parser.add_argument('--img-size', type=int, default=[1088, 608], nargs='+', help='pixels')
    parser.add_argument('--resume', action='store_true', help='resume training flag')
    parser.add_argument('--print-interval', type=int, default=40, help='print interval')
    parser.add_argument('--test-interval', type=int, default=9, help='test interval')
@ -181,6 +207,10 @@ if __name__ == '__main__':
    train(
        opt.cfg,
        opt.data_cfg,
        weights_from=opt.weights_from,
        weights_to=opt.weights_to,
        save_every=opt.save_model_after,
        img_size=opt.img_size,
        resume=opt.resume,
        epochs=opt.epochs,
        batch_size=opt.batch_size,
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -393,6 +393,9 @@ class JointDataset(LoadImagesAndLabels):  # for training
    def __getitem__(self, files_index):
        """
        Iterator function for train dataset
        """
        for i, c in enumerate(self.cds):
            if files_index >= c: 
                ds = list(self.label_files.keys())[i]
--- a/utils/kalman_filter.py
+++ b/utils/kalman_filter.py
@ -1,5 +1,4 @@
 # vim: expandtab:ts=4:sw=4
 import numba
 import numpy as np
 import scipy.linalg
--- a/utils/utils.py
+++ b/utils/utils.py
@ -1,6 +1,5 @@
 import glob
 import random
 import time
 import os
 import os.path as osp