surveilling-surveillance/detection/main.py

import os
import fire
from pytorch_lightning import Trainer
import torch

from util.nni import run_nni
from util import init_exp_folder, Args
from util import constants as C
from lightning import (get_task,
                       load_task,
                       get_ckpt_callback,
                       get_early_stop_callback,
                       get_logger)


def train(save_dir=C.SANDBOX_PATH,
          tb_path=C.TB_PATH,
          exp_name="DemoExperiment",
          model="FasterRCNN",
          task='detection',
          gpus=1,
          pretrained=True,
          batch_size=8,
          accelerator="gpu",
          strategy="ddp",
          gradient_clip_val=0.5,
          max_epochs=100,
          learning_rate=1e-5,
          patience=30,
          limit_train_batches=1.0,
          limit_val_batches=1.0,
          limit_test_batches=1.0,
          weights_summary=None,
          ):
    """
    Run the training experiment.

    Args:
        save_dir: Path to save the checkpoints and logs
        exp_name: Name of the experiment
        model: Model name ("mask_rcnn","faster_rcnn","retinanet","rpn","fast_rcnn", see 'detection/models/detection/detectron.py')
        gpus: int. (ie: 2 gpus)
             OR list to specify which GPUs [0, 1] OR '0,1'
             OR '-1' / -1 to use all available gpus
        pretrained: Whether or not to use the pretrained model
        num_classes: Number of classes
        accelerator: Supports passing different accelerator types (“cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”)
        strategy:  Supports different training strategies with aliases as well custom strategies (e.g. "ddp")
        gradient_clip_val:  Clip value of gradient norm
        limit_train_batches: Proportion of training data to use
        max_epochs: Max number of epochs
                patience: number of epochs with no improvement after
                                  which training will be stopped.
        tb_path: Path to global tb folder
        loss_fn: Loss function to use
        weights_summary: Prints a summary of the weights when training begins.

    Returns: None

    """
    num_classes = 2
    dataset_name = "camera-detection-new"

    args = Args(locals())
    init_exp_folder(args)
    task = get_task(args)
    trainer = Trainer(gpus=gpus,
                      accelerator=accelerator,
                      strategy=strategy,
                      logger=get_logger(save_dir, exp_name),
                      callbacks=[get_early_stop_callback(patience),
                                 get_ckpt_callback(save_dir, exp_name, monitor="mAP", mode="max")],
                      default_root_dir=os.path.join(save_dir, exp_name),
                      gradient_clip_val=gradient_clip_val,
                      limit_train_batches=limit_train_batches,
                      limit_val_batches=limit_val_batches,
                      limit_test_batches=limit_test_batches,
                    #   weights_summary=weights_summary,
                      max_epochs=max_epochs)
    trainer.fit(task)
    return save_dir, exp_name


def test(ckpt_path,
         visualize=False,
         deploy=False,
         limit_test_batches=1.0,
         gpus=1,
         deploy_meta_path="/home/haosheng/dataset/camera/deployment/16cityp1.csv",
         test_batch_size=1,
         **kwargs):
    """
    Run the testing experiment.

    Args:
        ckpt_path: Path for the experiment to load
        gpus: int. (ie: 2 gpus)
             OR list to specify which GPUs [0, 1] OR '0,1'
             OR '-1' / -1 to use all available gpus
    Returns: None

    """
    task = load_task(ckpt_path,
                     visualize=visualize,
                     deploy=deploy,
                     deploy_meta_path=deploy_meta_path,
                     test_batch_size=test_batch_size,
                     **kwargs)
    trainer = Trainer(gpus=gpus,
                      limit_test_batches=limit_test_batches)
    trainer.test(task)


def nni():
    run_nni(train, test)


if __name__ == "__main__":
    fire.Fire()