FasterNet slicing slower than split

slicing slower than split_cat

Open wsy-yjys opened this issue 1 year ago • 6 comments

Hi,I use the following code test the fps in “slicing” and “split_cat” pattern on RTX 2060（a total of 10 rounds of testing, each round first warm up 20 times, then infer 100 times, the final result is the average of the 10*100 times of infer time）, the results are shown below， unexpectedly found that “slicing” was slower than “split_cat”, Have you ever had this problem? Thankyou~

split_cat：{'fps': 246.7, 'time_mean': 4.1, 'time_std': 0.2}
slicing：{'fps': 227.0, 'time_mean': 4.4, 'time_std': 0.3}

from argparse import ArgumentParser
from utils.utils import *
from utils.fuse_conv_bn import fuse_conv_bn
from data.data_api import LitDataModule
from models.model_api import LitModel

def fps_mm(model, repetitions, num_warmup, infer_epoch):
   # 加载模型
   device = torch.device("cuda:0")
   model.to(device)
   model.eval()
   torch.backends.cudnn.benchmark = True
   # 初始化图像
   data = torch.randn(1, 3, 224, 224, dtype=torch.float).to(device)

   result_average = {'fps': 0, 'time_mean': 0, 'time_std': 0}

   for _ in range(infer_epoch):
       result = {}
       infer_time = []

       for i in range(repetitions):
           torch.cuda.synchronize()
           start_time = time.perf_counter()

           # infer
           with torch.no_grad():
               model(data)

           torch.cuda.synchronize()
           elapsed = (time.perf_counter() - start_time)

           if i >= num_warmup:
               infer_time.append(elapsed)

       result['fps'] = (repetitions - num_warmup) / sum(infer_time)
       result['time_mean'] = np.mean(infer_time) * 1000
       result['time_std'] = np.std(infer_time) * 1000

       result_average['fps'] += result['fps']
       result_average['time_mean'] += result['time_mean']
       result_average['time_std'] += result['time_std']

       for key, value in result.items():
           result[key] = round(value, 1)

       print(result)

   for key, value in result_average.items():
       result_average[key] = round(value / infer_epoch, 1)
   print("result_average:")
   print(result_average)


def main(args):
    # Init data pipeline
    dm, _ = LitDataModule(hparams=args)

    # Init LitModel
    if args.checkpoint_path is not None:
        PATH = args.checkpoint_path
        if PATH[-5:]=='.ckpt':
            model = LitModel.load_from_checkpoint(PATH, map_location='cpu', num_classes=dm.num_classes, hparams=args)
            print('Successfully load the pl checkpoint file.')
            if args.pl_ckpt_2_torch_pth:
                device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                model = model.model.to(device)
                torch.save(model.state_dict(), PATH[:-5]+'.pth')
                exit()
        elif PATH[-4:] == '.pth':
            model = LitModel(num_classes=dm.num_classes, hparams=args)
            missing_keys, unexpected_keys = model.model.load_state_dict(torch.load(PATH), False)
            # show for debug
            print('missing_keys: ', missing_keys)
            print('unexpected_keys: ', unexpected_keys)
        else:
            raise TypeError
    else:
        model = LitModel(num_classes=dm.num_classes, hparams=args)

    if args.fuse_conv_bn:
        fuse_conv_bn(model.model)

    if args.measure_latency:
        model = model.model
        fps_mm(model, repetitions=120, num_warmup=20, infer_epoch=10)

if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument('-c', '--cfg', type=str, default='cfg/fasternet_t0.yaml')
    parser.add_argument('-g', "--gpus", type=str, default="0",
                        help="Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node.")
    parser.add_argument('-d', "--dev", type=int, default=0, help='fast_dev_run for debug')
    parser.add_argument("--num_nodes", type=int, default=1)
    parser.add_argument('-n', "--num_workers", type=int, default=4)
    parser.add_argument('-b', "--batch_size", type=int, default=2048)
    parser.add_argument('-e', "--batch_size_eva", type=int, default=1, help='batch_size for evaluation')
    parser.add_argument("--model_ckpt_dir", type=str, default="./model_ckpt/")
    parser.add_argument("--data_dir", type=str, default="../../data/imagenet")
    parser.add_argument('--pin_memory', action='store_true')
    parser.add_argument("--checkpoint_path", type=str, default=None)
    parser.add_argument("--pconv_fw_type", type=str, default='slicing',
                        help="use 'split_cat' for training/inference and 'slicing' only for inference")
    parser.add_argument('--measure_latency', action='store_true', help='measure latency or throughput')
    parser.add_argument('--test_phase', action='store_true')
    parser.add_argument('--fuse_conv_bn', action='store_true')
    parser.add_argument("--wandb_project_name", type=str, default="fasternet")
    parser.add_argument('--wandb_offline', action='store_true')
    parser.add_argument('--wandb_save_dir', type=str, default='./')
    parser.add_argument('--pl_ckpt_2_torch_pth', action='store_true',
                        help='convert pl .ckpt file to torch .pth file, and then exit')

    args = parser.parse_args()
    cfg = load_cfg(args.cfg)
    args = merge_args_cfg(args, cfg)

    # please change {WANDB_API_KEY} to your personal api_key before using wandb
    # os.environ["WANDB_API_KEY"] = "{WANDB_API_KEY}"

    main(args)

May 05 '23 03:05 wsy-yjys

FasterNet FasterNet copied to clipboard

slicing slower than split_cat

FasterNet
FasterNet copied to clipboard