slicing slower than split_cat
Hi,I use the following code test the fps in “slicing” and “split_cat” pattern on RTX 2060(a total of 10 rounds of testing, each round first warm up 20 times, then infer 100 times, the final result is the average of the 10*100 times of infer time), the results are shown below, unexpectedly found that “slicing” was slower than “split_cat”, Have you ever had this problem? Thankyou~
split_cat:{'fps': 246.7, 'time_mean': 4.1, 'time_std': 0.2}
slicing:{'fps': 227.0, 'time_mean': 4.4, 'time_std': 0.3}
from argparse import ArgumentParser
from utils.utils import *
from utils.fuse_conv_bn import fuse_conv_bn
from data.data_api import LitDataModule
from models.model_api import LitModel
def fps_mm(model, repetitions, num_warmup, infer_epoch):
# 加载模型
device = torch.device("cuda:0")
torch.backends.cudnn.benchmark = True
# 初始化图像
data = torch.randn(1, 3, 224, 224, dtype=torch.float).to(device)
result_average = {'fps': 0, 'time_mean': 0, 'time_std': 0}
for _ in range(infer_epoch):
result = {}
infer_time = []
for i in range(repetitions):
start_time = time.perf_counter()
# infer
with torch.no_grad():
elapsed = (time.perf_counter() - start_time)
if i >= num_warmup:
result['fps'] = (repetitions - num_warmup) / sum(infer_time)
result['time_mean'] = np.mean(infer_time) * 1000
result['time_std'] = np.std(infer_time) * 1000
result_average['fps'] += result['fps']
result_average['time_mean'] += result['time_mean']
result_average['time_std'] += result['time_std']
for key, value in result.items():
result[key] = round(value, 1)
for key, value in result_average.items():
result_average[key] = round(value / infer_epoch, 1)
def main(args):
# Init data pipeline
dm, _ = LitDataModule(hparams=args)
# Init LitModel
if args.checkpoint_path is not None:
PATH = args.checkpoint_path
if PATH[-5:]=='.ckpt':
model = LitModel.load_from_checkpoint(PATH, map_location='cpu', num_classes=dm.num_classes, hparams=args)
print('Successfully load the pl checkpoint file.')
if args.pl_ckpt_2_torch_pth:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model =, PATH[:-5]+'.pth')
elif PATH[-4:] == '.pth':
model = LitModel(num_classes=dm.num_classes, hparams=args)
missing_keys, unexpected_keys = model.model.load_state_dict(torch.load(PATH), False)
# show for debug
print('missing_keys: ', missing_keys)
print('unexpected_keys: ', unexpected_keys)
raise TypeError
model = LitModel(num_classes=dm.num_classes, hparams=args)
if args.fuse_conv_bn:
if args.measure_latency:
model = model.model
fps_mm(model, repetitions=120, num_warmup=20, infer_epoch=10)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('-c', '--cfg', type=str, default='cfg/fasternet_t0.yaml')
parser.add_argument('-g', "--gpus", type=str, default="0",
help="Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node.")
parser.add_argument('-d', "--dev", type=int, default=0, help='fast_dev_run for debug')
parser.add_argument("--num_nodes", type=int, default=1)
parser.add_argument('-n', "--num_workers", type=int, default=4)
parser.add_argument('-b', "--batch_size", type=int, default=2048)
parser.add_argument('-e', "--batch_size_eva", type=int, default=1, help='batch_size for evaluation')
parser.add_argument("--model_ckpt_dir", type=str, default="./model_ckpt/")
parser.add_argument("--data_dir", type=str, default="../../data/imagenet")
parser.add_argument('--pin_memory', action='store_true')
parser.add_argument("--checkpoint_path", type=str, default=None)
parser.add_argument("--pconv_fw_type", type=str, default='slicing',
help="use 'split_cat' for training/inference and 'slicing' only for inference")
parser.add_argument('--measure_latency', action='store_true', help='measure latency or throughput')
parser.add_argument('--test_phase', action='store_true')
parser.add_argument('--fuse_conv_bn', action='store_true')
parser.add_argument("--wandb_project_name", type=str, default="fasternet")
parser.add_argument('--wandb_offline', action='store_true')
parser.add_argument('--wandb_save_dir', type=str, default='./')
parser.add_argument('--pl_ckpt_2_torch_pth', action='store_true',
help='convert pl .ckpt file to torch .pth file, and then exit')
args = parser.parse_args()
cfg = load_cfg(args.cfg)
args = merge_args_cfg(args, cfg)
# please change {WANDB_API_KEY} to your personal api_key before using wandb
# os.environ["WANDB_API_KEY"] = "{WANDB_API_KEY}"