rl
rl copied to clipboard
[Feature] Faster RNNs (no split)
from torchrl.modules import LSTM
import torch
from torch.utils.benchmark import Timer
from torchrl.modules.tensordict_module.rnn import _get_num_per_traj_init, _split_and_pad_sequence
b = 10
t = 100
c = 32
device = "cuda"
with torch.device(device):
lstm = LSTM(c, c, batch_first=True)
x = torch.randn(b, t, c)
while True:
done = torch.zeros(b, t, 1, dtype=torch.bool).bernoulli_(0.05)
if done.any():
break
print(Timer("lstm(x, None, done)", globals={"lstm": lstm, "done": done, "x": x}).adaptive_autorange())
def split_and_run(x, done):
is_init = torch.cat([torch.ones_like(done[:, :1]), done[:, :-1]], 1)
splits = _get_num_per_traj_init(is_init.squeeze())
x_split = _split_and_pad_sequence(
x, splits
)
return lstm(x_split)
print(Timer("split_and_run(x, done)", globals={"split_and_run": split_and_run, "done": done, "x": x}).adaptive_autorange())
for mode in ["default", "reduce-overhead"]:
c_lstm = torch.compile(lstm, mode=mode)
c_lstm(x, None, done);
print(mode, Timer("lstm(x, None, done)", globals={"lstm": c_lstm, "done": done, "x": x}).adaptive_autorange())
:link: Helpful Links
:test_tube: See artifacts and rendered test results at hud.pytorch.org/pr/pytorch/rl/2009
- :page_facing_up: Preview Python docs built from this PR
Note: Links to docs will display an error until the docs builds have been completed.
:x: 3 New Failures
As of commit 43a87b9b89bda8ed65843b207d383f374b44de2e with merge base 07eb02d05ee541db6564ef95f29901a60a05a987 ():
NEW FAILURES - The following jobs have failed:
-
Unit-tests on Linux / tests-gpu (3.8, 12.1) / linux-job (gh)
test/test_cost.py::TestValues::test_gae_multidim[False-feature_dim1-dtype1-3-N1-0.99-0.99-device0]
-
Unit-tests on Linux / tests-optdeps (3.9, 12.1) / linux-job (gh)
test/test_cost.py::TestValues::test_gae_multidim[False-feature_dim1-dtype1-3-N1-0.99-0.99-device0]
-
Unit-tests on Linux / tests-stable-gpu (3.8, 11.8) / linux-job (gh)
test/test_cost.py::TestValues::test_gae_multidim[False-feature_dim1-dtype1-100-N1-0.1-0.1-device0]
This comment was automatically generated by Dr. CI and updates every 15 minutes.
$\color{#D29922}\textsf{\Large⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests
Total Benchmarks: 91. Improved: $\large\color{#35bf28}9$. Worsened: $\large\color{#d91a1a}11$.
Expand to view detailed results
Name | Max | Mean | Ops | Ops on Repo HEAD |
Change |
---|---|---|---|---|---|
test_single | 60.9637ms | 60.5021ms | 16.5284 Ops/s | 17.5356 Ops/s | $\textbf{\color{#d91a1a}-5.74\%}$ |
test_sync | 32.3769ms | 31.9850ms | 31.2647 Ops/s | 33.3982 Ops/s | $\textbf{\color{#d91a1a}-6.39\%}$ |
test_async | 66.6913ms | 32.4582ms | 30.8089 Ops/s | 35.9085 Ops/s | $\textbf{\color{#d91a1a}-14.20\%}$ |
test_simple | 0.4808s | 0.4299s | 2.3259 Ops/s | 3.0553 Ops/s | $\textbf{\color{#d91a1a}-23.87\%}$ |
test_transformed | 0.6275s | 0.5802s | 1.7235 Ops/s | 2.1212 Ops/s | $\textbf{\color{#d91a1a}-18.75\%}$ |
test_serial | 1.3429s | 1.2979s | 0.7705 Ops/s | 0.8498 Ops/s | $\textbf{\color{#d91a1a}-9.33\%}$ |
test_parallel | 1.1613s | 1.1157s | 0.8963 Ops/s | 0.9733 Ops/s | $\textbf{\color{#d91a1a}-7.91\%}$ |
test_step_mdp_speed[True-True-True-True-True] | 0.1226ms | 20.7906μs | 48.0987 KOps/s | 46.7732 KOps/s | $\color{#35bf28}+2.83\%$ |
test_step_mdp_speed[True-True-True-True-False] | 46.0260μs | 12.6336μs | 79.1541 KOps/s | 77.1652 KOps/s | $\color{#35bf28}+2.58\%$ |
test_step_mdp_speed[True-True-True-False-True] | 37.9810μs | 12.3657μs | 80.8687 KOps/s | 78.4269 KOps/s | $\color{#35bf28}+3.11\%$ |
test_step_mdp_speed[True-True-True-False-False] | 41.3470μs | 7.3416μs | 136.2095 KOps/s | 129.6577 KOps/s | $\textbf{\color{#35bf28}+5.05\%}$ |
test_step_mdp_speed[True-True-False-True-True] | 56.8670μs | 22.1063μs | 45.2359 KOps/s | 43.8294 KOps/s | $\color{#35bf28}+3.21\%$ |
test_step_mdp_speed[True-True-False-True-False] | 41.4580μs | 13.8601μs | 72.1498 KOps/s | 69.5989 KOps/s | $\color{#35bf28}+3.67\%$ |
test_step_mdp_speed[True-True-False-False-True] | 37.0190μs | 13.4851μs | 74.1557 KOps/s | 72.1862 KOps/s | $\color{#35bf28}+2.73\%$ |
test_step_mdp_speed[True-True-False-False-False] | 44.0920μs | 8.6233μs | 115.9644 KOps/s | 112.2817 KOps/s | $\color{#35bf28}+3.28\%$ |
test_step_mdp_speed[True-False-True-True-True] | 55.3340μs | 23.7378μs | 42.1268 KOps/s | 41.1643 KOps/s | $\color{#35bf28}+2.34\%$ |
test_step_mdp_speed[True-False-True-True-False] | 69.2500μs | 15.5319μs | 64.3836 KOps/s | 63.4547 KOps/s | $\color{#35bf28}+1.46\%$ |
test_step_mdp_speed[True-False-True-False-True] | 48.3900μs | 13.4366μs | 74.4238 KOps/s | 71.7516 KOps/s | $\color{#35bf28}+3.72\%$ |
test_step_mdp_speed[True-False-True-False-False] | 30.4070μs | 8.5753μs | 116.6136 KOps/s | 111.6215 KOps/s | $\color{#35bf28}+4.47\%$ |
test_step_mdp_speed[True-False-False-True-True] | 67.6260μs | 24.3775μs | 41.0214 KOps/s | 39.3512 KOps/s | $\color{#35bf28}+4.24\%$ |
test_step_mdp_speed[True-False-False-True-False] | 41.3470μs | 16.2063μs | 61.7045 KOps/s | 59.5165 KOps/s | $\color{#35bf28}+3.68\%$ |
test_step_mdp_speed[True-False-False-False-True] | 51.4160μs | 14.4414μs | 69.2455 KOps/s | 66.3971 KOps/s | $\color{#35bf28}+4.29\%$ |
test_step_mdp_speed[True-False-False-False-False] | 33.3830μs | 9.6696μs | 103.4170 KOps/s | 98.5458 KOps/s | $\color{#35bf28}+4.94\%$ |
test_step_mdp_speed[False-True-True-True-True] | 54.0610μs | 23.3293μs | 42.8645 KOps/s | 41.3666 KOps/s | $\color{#35bf28}+3.62\%$ |
test_step_mdp_speed[False-True-True-True-False] | 47.6590μs | 15.1219μs | 66.1291 KOps/s | 64.2725 KOps/s | $\color{#35bf28}+2.89\%$ |
test_step_mdp_speed[False-True-True-False-True] | 50.8750μs | 15.3997μs | 64.9364 KOps/s | 62.1638 KOps/s | $\color{#35bf28}+4.46\%$ |
test_step_mdp_speed[False-True-True-False-False] | 37.5700μs | 9.6998μs | 103.0944 KOps/s | 99.0524 KOps/s | $\color{#35bf28}+4.08\%$ |
test_step_mdp_speed[False-True-False-True-True] | 40.0450μs | 24.8244μs | 40.2830 KOps/s | 39.1203 KOps/s | $\color{#35bf28}+2.97\%$ |
test_step_mdp_speed[False-True-False-True-False] | 60.4030μs | 16.1872μs | 61.7772 KOps/s | 59.7702 KOps/s | $\color{#35bf28}+3.36\%$ |
test_step_mdp_speed[False-True-False-False-True] | 45.1240μs | 16.5502μs | 60.4222 KOps/s | 57.8772 KOps/s | $\color{#35bf28}+4.40\%$ |
test_step_mdp_speed[False-True-False-False-False] | 33.5120μs | 10.8325μs | 92.3145 KOps/s | 87.9551 KOps/s | $\color{#35bf28}+4.96\%$ |
test_step_mdp_speed[False-False-True-True-True] | 65.1620μs | 25.4563μs | 39.2830 KOps/s | 37.5430 KOps/s | $\color{#35bf28}+4.63\%$ |
test_step_mdp_speed[False-False-True-True-False] | 44.2030μs | 17.5159μs | 57.0909 KOps/s | 55.3564 KOps/s | $\color{#35bf28}+3.13\%$ |
test_step_mdp_speed[False-False-True-False-True] | 43.5410μs | 16.5094μs | 60.5715 KOps/s | 58.5484 KOps/s | $\color{#35bf28}+3.46\%$ |
test_step_mdp_speed[False-False-True-False-False] | 46.8670μs | 10.7644μs | 92.8986 KOps/s | 88.4212 KOps/s | $\textbf{\color{#35bf28}+5.06\%}$ |
test_step_mdp_speed[False-False-False-True-True] | 55.2230μs | 26.6206μs | 37.5649 KOps/s | 36.6278 KOps/s | $\color{#35bf28}+2.56\%$ |
test_step_mdp_speed[False-False-False-True-False] | 46.8470μs | 18.4605μs | 54.1697 KOps/s | 52.9629 KOps/s | $\color{#35bf28}+2.28\%$ |
test_step_mdp_speed[False-False-False-False-True] | 48.0590μs | 17.4886μs | 57.1801 KOps/s | 52.9840 KOps/s | $\textbf{\color{#35bf28}+7.92\%}$ |
test_step_mdp_speed[False-False-False-False-False] | 41.2170μs | 11.8948μs | 84.0703 KOps/s | 81.1447 KOps/s | $\color{#35bf28}+3.61\%$ |
test_values[generalized_advantage_estimate-True-True] | 12.4048ms | 9.4362ms | 105.9752 Ops/s | 106.6747 Ops/s | $\color{#d91a1a}-0.66\%$ |
test_values[vec_generalized_advantage_estimate-True-True] | 35.8098ms | 32.8933ms | 30.4013 Ops/s | 30.3001 Ops/s | $\color{#35bf28}+0.33\%$ |
test_values[td0_return_estimate-False-False] | 0.2476ms | 0.1672ms | 5.9798 KOps/s | 6.1014 KOps/s | $\color{#d91a1a}-1.99\%$ |
test_values[td1_return_estimate-False-False] | 23.5324ms | 23.2258ms | 43.0557 Ops/s | 42.6392 Ops/s | $\color{#35bf28}+0.98\%$ |
test_values[vec_td1_return_estimate-False-False] | 34.8936ms | 33.0134ms | 30.2907 Ops/s | 30.1346 Ops/s | $\color{#35bf28}+0.52\%$ |
test_values[td_lambda_return_estimate-True-False] | 37.7016ms | 33.9723ms | 29.4357 Ops/s | 29.2514 Ops/s | $\color{#35bf28}+0.63\%$ |
test_values[vec_td_lambda_return_estimate-True-False] | 34.1361ms | 32.8988ms | 30.3963 Ops/s | 30.1684 Ops/s | $\color{#35bf28}+0.76\%$ |
test_gae_speed[generalized_advantage_estimate-False-1-512] | 8.4608ms | 8.1867ms | 122.1494 Ops/s | 120.5404 Ops/s | $\color{#35bf28}+1.33\%$ |
test_gae_speed[vec_generalized_advantage_estimate-True-1-512] | 2.0737ms | 1.8968ms | 527.1963 Ops/s | 550.0661 Ops/s | $\color{#d91a1a}-4.16\%$ |
test_gae_speed[vec_generalized_advantage_estimate-False-1-512] | 0.4880ms | 0.3530ms | 2.8329 KOps/s | 2.8098 KOps/s | $\color{#35bf28}+0.82\%$ |
test_gae_speed[vec_generalized_advantage_estimate-True-32-512] | 41.7744ms | 41.0360ms | 24.3689 Ops/s | 23.1003 Ops/s | $\textbf{\color{#35bf28}+5.49\%}$ |
test_gae_speed[vec_generalized_advantage_estimate-False-32-512] | 3.5923ms | 3.0288ms | 330.1595 Ops/s | 332.7066 Ops/s | $\color{#d91a1a}-0.77\%$ |
test_dqn_speed | 1.8059ms | 1.3533ms | 738.9497 Ops/s | 728.1733 Ops/s | $\color{#35bf28}+1.48\%$ |
test_ddpg_speed | 2.8545ms | 2.6920ms | 371.4773 Ops/s | 374.4995 Ops/s | $\color{#d91a1a}-0.81\%$ |
test_sac_speed | 73.3868ms | 8.7531ms | 114.2449 Ops/s | 123.0610 Ops/s | $\textbf{\color{#d91a1a}-7.16\%}$ |
test_redq_speed | 14.1630ms | 13.0268ms | 76.7648 Ops/s | 75.0926 Ops/s | $\color{#35bf28}+2.23\%$ |
test_redq_deprec_speed | 13.6232ms | 12.9572ms | 77.1771 Ops/s | 72.6623 Ops/s | $\textbf{\color{#35bf28}+6.21\%}$ |
test_td3_speed | 8.6359ms | 8.1117ms | 123.2784 Ops/s | 122.5047 Ops/s | $\color{#35bf28}+0.63\%$ |
test_cql_speed | 36.8167ms | 35.8305ms | 27.9092 Ops/s | 27.9821 Ops/s | $\color{#d91a1a}-0.26\%$ |
test_a2c_speed | 8.3996ms | 7.2991ms | 137.0033 Ops/s | 136.9256 Ops/s | $\color{#35bf28}+0.06\%$ |
test_ppo_speed | 7.9875ms | 7.5694ms | 132.1117 Ops/s | 132.6493 Ops/s | $\color{#d91a1a}-0.41\%$ |
test_reinforce_speed | 7.7225ms | 6.5463ms | 152.7575 Ops/s | 154.0636 Ops/s | $\color{#d91a1a}-0.85\%$ |
test_iql_speed | 33.3152ms | 32.1272ms | 31.1263 Ops/s | 31.0966 Ops/s | $\color{#35bf28}+0.10\%$ |
test_rb_sample[TensorDictReplayBuffer-ListStorage-RandomSampler-4000] | 2.3436ms | 2.1173ms | 472.2903 Ops/s | 448.9410 Ops/s | $\textbf{\color{#35bf28}+5.20\%}$ |
test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] | 1.0198ms | 0.4960ms | 2.0159 KOps/s | 2.0161 KOps/s | $-0.01\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] | 0.6929ms | 0.4708ms | 2.1240 KOps/s | 2.1365 KOps/s | $\color{#d91a1a}-0.58\%$ |
test_rb_sample[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-4000] | 3.2545ms | 2.1076ms | 474.4693 Ops/s | 456.5760 Ops/s | $\color{#35bf28}+3.92\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] | 0.7862ms | 0.4894ms | 2.0432 KOps/s | 2.0737 KOps/s | $\color{#d91a1a}-1.47\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] | 3.5758ms | 0.4669ms | 2.1418 KOps/s | 2.0372 KOps/s | $\textbf{\color{#35bf28}+5.13\%}$ |
test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-sampler6-10000] | 1.5688ms | 1.3004ms | 769.0014 Ops/s | 775.1283 Ops/s | $\color{#d91a1a}-0.79\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-sampler7-10000] | 4.1489ms | 1.2403ms | 806.2393 Ops/s | 819.6216 Ops/s | $\color{#d91a1a}-1.63\%$ |
test_rb_sample[TensorDictPrioritizedReplayBuffer-ListStorage-None-4000] | 3.2905ms | 2.2581ms | 442.8598 Ops/s | 430.0656 Ops/s | $\color{#35bf28}+2.97\%$ |
test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] | 1.1700ms | 0.6125ms | 1.6328 KOps/s | 1.6423 KOps/s | $\color{#d91a1a}-0.58\%$ |
test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] | 0.8835ms | 0.5875ms | 1.7022 KOps/s | 1.7173 KOps/s | $\color{#d91a1a}-0.88\%$ |
test_rb_iterate[TensorDictReplayBuffer-ListStorage-RandomSampler-4000] | 3.1528ms | 2.0497ms | 487.8685 Ops/s | 451.9710 Ops/s | $\textbf{\color{#35bf28}+7.94\%}$ |
test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] | 0.6218ms | 0.4948ms | 2.0212 KOps/s | 2.0148 KOps/s | $\color{#35bf28}+0.32\%$ |
test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] | 0.8881ms | 0.4726ms | 2.1161 KOps/s | 2.1349 KOps/s | $\color{#d91a1a}-0.88\%$ |
test_rb_iterate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-4000] | 2.2402ms | 2.0799ms | 480.7976 Ops/s | 460.1857 Ops/s | $\color{#35bf28}+4.48\%$ |
test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] | 0.9805ms | 0.5347ms | 1.8701 KOps/s | 2.0591 KOps/s | $\textbf{\color{#d91a1a}-9.18\%}$ |
test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] | 0.6147ms | 0.4646ms | 2.1526 KOps/s | 2.1381 KOps/s | $\color{#35bf28}+0.68\%$ |
test_rb_iterate[TensorDictPrioritizedReplayBuffer-ListStorage-None-4000] | 3.4659ms | 2.2279ms | 448.8437 Ops/s | 431.3121 Ops/s | $\color{#35bf28}+4.06\%$ |
test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] | 0.8969ms | 0.6148ms | 1.6267 KOps/s | 1.6510 KOps/s | $\color{#d91a1a}-1.48\%$ |
test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] | 90.1428ms | 0.7063ms | 1.4159 KOps/s | 1.7186 KOps/s | $\textbf{\color{#d91a1a}-17.61\%}$ |
test_rb_populate[TensorDictReplayBuffer-ListStorage-RandomSampler-400] | 79.3715ms | 5.1322ms | 194.8491 Ops/s | 144.5542 Ops/s | $\textbf{\color{#35bf28}+34.79\%}$ |
test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-400] | 15.6371ms | 11.9192ms | 83.8983 Ops/s | 84.3836 Ops/s | $\color{#d91a1a}-0.58\%$ |
test_rb_populate[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-400] | 1.4999ms | 1.0285ms | 972.3079 Ops/s | 988.1546 Ops/s | $\color{#d91a1a}-1.60\%$ |
test_rb_populate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-400] | 83.9017ms | 6.7488ms | 148.1735 Ops/s | 148.3113 Ops/s | $\color{#d91a1a}-0.09\%$ |
test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-400] | 14.2095ms | 11.8850ms | 84.1399 Ops/s | 83.9646 Ops/s | $\color{#35bf28}+0.21\%$ |
test_rb_populate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-400] | 1.5017ms | 1.0308ms | 970.0763 Ops/s | 939.1129 Ops/s | $\color{#35bf28}+3.30\%$ |
test_rb_populate[TensorDictPrioritizedReplayBuffer-ListStorage-None-400] | 86.8732ms | 7.2778ms | 137.4042 Ops/s | 181.5859 Ops/s | $\textbf{\color{#d91a1a}-24.33\%}$ |
test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-400] | 14.2319ms | 12.1280ms | 82.4540 Ops/s | 82.1569 Ops/s | $\color{#35bf28}+0.36\%$ |
test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-400] | 1.8059ms | 1.3798ms | 724.7206 Ops/s | 732.1254 Ops/s | $\color{#d91a1a}-1.01\%$ |
$\color{#D29922}\textsf{\Large⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests
Total Benchmarks: 94. Improved: $\large\color{#35bf28}1$. Worsened: $\large\color{#d91a1a}10$.
Expand to view detailed results
Name | Max | Mean | Ops | Ops on Repo HEAD |
Change |
---|---|---|---|---|---|
test_single | 0.1139s | 0.1138s | 8.7887 Ops/s | 9.1754 Ops/s | $\color{#d91a1a}-4.21\%$ |
test_sync | 95.8251ms | 95.1042ms | 10.5148 Ops/s | 10.9562 Ops/s | $\color{#d91a1a}-4.03\%$ |
test_async | 0.1796s | 90.9924ms | 10.9899 Ops/s | 11.2618 Ops/s | $\color{#d91a1a}-2.41\%$ |
test_single_pixels | 0.1948s | 0.1320s | 7.5772 Ops/s | 8.8751 Ops/s | $\textbf{\color{#d91a1a}-14.62\%}$ |
test_sync_pixels | 82.0569ms | 80.4134ms | 12.4357 Ops/s | 14.9118 Ops/s | $\textbf{\color{#d91a1a}-16.60\%}$ |
test_async_pixels | 0.1523s | 64.0684ms | 15.6083 Ops/s | 17.9357 Ops/s | $\textbf{\color{#d91a1a}-12.98\%}$ |
test_simple | 0.8215s | 0.8182s | 1.2222 Ops/s | 1.4075 Ops/s | $\textbf{\color{#d91a1a}-13.16\%}$ |
test_transformed | 1.1077s | 1.0540s | 0.9487 Ops/s | 1.0899 Ops/s | $\textbf{\color{#d91a1a}-12.95\%}$ |
test_serial | 2.3332s | 2.2764s | 0.4393 Ops/s | 0.4633 Ops/s | $\textbf{\color{#d91a1a}-5.17\%}$ |
test_parallel | 2.0202s | 1.9419s | 0.5150 Ops/s | 0.5484 Ops/s | $\textbf{\color{#d91a1a}-6.10\%}$ |
test_step_mdp_speed[True-True-True-True-True] | 71.4610μs | 33.9048μs | 29.4943 KOps/s | 29.5266 KOps/s | $\color{#d91a1a}-0.11\%$ |
test_step_mdp_speed[True-True-True-True-False] | 70.0220μs | 19.8774μs | 50.3084 KOps/s | 50.8578 KOps/s | $\color{#d91a1a}-1.08\%$ |
test_step_mdp_speed[True-True-True-False-True] | 42.6810μs | 18.9178μs | 52.8602 KOps/s | 51.7050 KOps/s | $\color{#35bf28}+2.23\%$ |
test_step_mdp_speed[True-True-True-False-False] | 32.1510μs | 11.3971μs | 87.7414 KOps/s | 90.0214 KOps/s | $\color{#d91a1a}-2.53\%$ |
test_step_mdp_speed[True-True-False-True-True] | 65.0820μs | 34.9864μs | 28.5825 KOps/s | 27.9496 KOps/s | $\color{#35bf28}+2.26\%$ |
test_step_mdp_speed[True-True-False-True-False] | 44.3810μs | 21.8162μs | 45.8376 KOps/s | 46.5803 KOps/s | $\color{#d91a1a}-1.59\%$ |
test_step_mdp_speed[True-True-False-False-True] | 51.1400μs | 20.9281μs | 47.7826 KOps/s | 47.3631 KOps/s | $\color{#35bf28}+0.89\%$ |
test_step_mdp_speed[True-True-False-False-False] | 47.8910μs | 13.0466μs | 76.6482 KOps/s | 75.9765 KOps/s | $\color{#35bf28}+0.88\%$ |
test_step_mdp_speed[True-False-True-True-True] | 62.2010μs | 37.0022μs | 27.0254 KOps/s | 26.4842 KOps/s | $\color{#35bf28}+2.04\%$ |
test_step_mdp_speed[True-False-True-True-False] | 51.0110μs | 23.9470μs | 41.7588 KOps/s | 42.5059 KOps/s | $\color{#d91a1a}-1.76\%$ |
test_step_mdp_speed[True-False-True-False-True] | 44.8210μs | 20.9052μs | 47.8350 KOps/s | 47.5473 KOps/s | $\color{#35bf28}+0.61\%$ |
test_step_mdp_speed[True-False-True-False-False] | 69.8000μs | 12.9831μs | 77.0230 KOps/s | 75.7613 KOps/s | $\color{#35bf28}+1.67\%$ |
test_step_mdp_speed[True-False-False-True-True] | 63.1200μs | 38.7489μs | 25.8072 KOps/s | 25.3183 KOps/s | $\color{#35bf28}+1.93\%$ |
test_step_mdp_speed[True-False-False-True-False] | 56.6900μs | 25.3722μs | 39.4132 KOps/s | 39.5616 KOps/s | $\color{#d91a1a}-0.37\%$ |
test_step_mdp_speed[True-False-False-False-True] | 42.4600μs | 22.4340μs | 44.5751 KOps/s | 44.1087 KOps/s | $\color{#35bf28}+1.06\%$ |
test_step_mdp_speed[True-False-False-False-False] | 39.1600μs | 14.8576μs | 67.3058 KOps/s | 66.6024 KOps/s | $\color{#35bf28}+1.06\%$ |
test_step_mdp_speed[False-True-True-True-True] | 65.2800μs | 37.0329μs | 27.0030 KOps/s | 26.4746 KOps/s | $\color{#35bf28}+2.00\%$ |
test_step_mdp_speed[False-True-True-True-False] | 46.4410μs | 23.9756μs | 41.7091 KOps/s | 42.0645 KOps/s | $\color{#d91a1a}-0.84\%$ |
test_step_mdp_speed[False-True-True-False-True] | 55.3700μs | 24.8903μs | 40.1763 KOps/s | 39.5606 KOps/s | $\color{#35bf28}+1.56\%$ |
test_step_mdp_speed[False-True-True-False-False] | 34.7110μs | 15.0521μs | 66.4361 KOps/s | 67.2739 KOps/s | $\color{#d91a1a}-1.25\%$ |
test_step_mdp_speed[False-True-False-True-True] | 63.1120μs | 39.4706μs | 25.3353 KOps/s | 25.3191 KOps/s | $\color{#35bf28}+0.06\%$ |
test_step_mdp_speed[False-True-False-True-False] | 65.8910μs | 25.2719μs | 39.5697 KOps/s | 39.4762 KOps/s | $\color{#35bf28}+0.24\%$ |
test_step_mdp_speed[False-True-False-False-True] | 58.4200μs | 26.4667μs | 37.7833 KOps/s | 36.9986 KOps/s | $\color{#35bf28}+2.12\%$ |
test_step_mdp_speed[False-True-False-False-False] | 38.7900μs | 16.9940μs | 58.8444 KOps/s | 59.3730 KOps/s | $\color{#d91a1a}-0.89\%$ |
test_step_mdp_speed[False-False-True-True-True] | 69.4110μs | 39.9105μs | 25.0561 KOps/s | 24.1250 KOps/s | $\color{#35bf28}+3.86\%$ |
test_step_mdp_speed[False-False-True-True-False] | 51.3710μs | 27.2023μs | 36.7616 KOps/s | 36.6574 KOps/s | $\color{#35bf28}+0.28\%$ |
test_step_mdp_speed[False-False-True-False-True] | 51.2710μs | 26.1739μs | 38.2060 KOps/s | 37.0073 KOps/s | $\color{#35bf28}+3.24\%$ |
test_step_mdp_speed[False-False-True-False-False] | 42.4700μs | 16.6033μs | 60.2290 KOps/s | 59.0955 KOps/s | $\color{#35bf28}+1.92\%$ |
test_step_mdp_speed[False-False-False-True-True] | 92.3010μs | 42.0345μs | 23.7900 KOps/s | 23.4655 KOps/s | $\color{#35bf28}+1.38\%$ |
test_step_mdp_speed[False-False-False-True-False] | 62.4100μs | 28.7607μs | 34.7697 KOps/s | 34.2941 KOps/s | $\color{#35bf28}+1.39\%$ |
test_step_mdp_speed[False-False-False-False-True] | 60.2800μs | 28.1434μs | 35.5323 KOps/s | 34.9905 KOps/s | $\color{#35bf28}+1.55\%$ |
test_step_mdp_speed[False-False-False-False-False] | 38.9510μs | 18.4370μs | 54.2387 KOps/s | 53.7129 KOps/s | $\color{#35bf28}+0.98\%$ |
test_values[generalized_advantage_estimate-True-True] | 26.8979ms | 25.5855ms | 39.0846 Ops/s | 39.3967 Ops/s | $\color{#d91a1a}-0.79\%$ |
test_values[vec_generalized_advantage_estimate-True-True] | 84.6726ms | 3.2706ms | 305.7585 Ops/s | 302.1893 Ops/s | $\color{#35bf28}+1.18\%$ |
test_values[td0_return_estimate-False-False] | 0.1039ms | 66.6850μs | 14.9959 KOps/s | 15.2672 KOps/s | $\color{#d91a1a}-1.78\%$ |
test_values[td1_return_estimate-False-False] | 56.6339ms | 55.7660ms | 17.9321 Ops/s | 18.3514 Ops/s | $\color{#d91a1a}-2.28\%$ |
test_values[vec_td1_return_estimate-False-False] | 2.0815ms | 1.7747ms | 563.4709 Ops/s | 566.3065 Ops/s | $\color{#d91a1a}-0.50\%$ |
test_values[td_lambda_return_estimate-True-False] | 90.8781ms | 88.9471ms | 11.2426 Ops/s | 11.4778 Ops/s | $\color{#d91a1a}-2.05\%$ |
test_values[vec_td_lambda_return_estimate-True-False] | 2.1229ms | 1.7729ms | 564.0372 Ops/s | 566.4775 Ops/s | $\color{#d91a1a}-0.43\%$ |
test_gae_speed[generalized_advantage_estimate-False-1-512] | 25.3956ms | 24.9207ms | 40.1273 Ops/s | 42.1383 Ops/s | $\color{#d91a1a}-4.77\%$ |
test_gae_speed[vec_generalized_advantage_estimate-True-1-512] | 0.8989ms | 0.7111ms | 1.4062 KOps/s | 1.4048 KOps/s | $\color{#35bf28}+0.10\%$ |
test_gae_speed[vec_generalized_advantage_estimate-False-1-512] | 0.7336ms | 0.6710ms | 1.4902 KOps/s | 1.5259 KOps/s | $\color{#d91a1a}-2.34\%$ |
test_gae_speed[vec_generalized_advantage_estimate-True-32-512] | 1.5430ms | 1.4661ms | 682.0829 Ops/s | 683.8613 Ops/s | $\color{#d91a1a}-0.26\%$ |
test_gae_speed[vec_generalized_advantage_estimate-False-32-512] | 0.9536ms | 0.6798ms | 1.4710 KOps/s | 1.4772 KOps/s | $\color{#d91a1a}-0.42\%$ |
test_dqn_speed | 1.9531ms | 1.4350ms | 696.8431 Ops/s | 689.3707 Ops/s | $\color{#35bf28}+1.08\%$ |
test_ddpg_speed | 2.8951ms | 2.7429ms | 364.5829 Ops/s | 367.8574 Ops/s | $\color{#d91a1a}-0.89\%$ |
test_sac_speed | 8.7645ms | 8.0797ms | 123.7668 Ops/s | 124.1927 Ops/s | $\color{#d91a1a}-0.34\%$ |
test_redq_speed | 11.2746ms | 10.2908ms | 97.1740 Ops/s | 98.0889 Ops/s | $\color{#d91a1a}-0.93\%$ |
test_redq_deprec_speed | 11.6244ms | 11.1205ms | 89.9243 Ops/s | 89.3690 Ops/s | $\color{#35bf28}+0.62\%$ |
test_td3_speed | 8.1099ms | 8.0115ms | 124.8211 Ops/s | 124.2748 Ops/s | $\color{#35bf28}+0.44\%$ |
test_cql_speed | 0.1033s | 27.2276ms | 36.7275 Ops/s | 39.2186 Ops/s | $\textbf{\color{#d91a1a}-6.35\%}$ |
test_a2c_speed | 5.8159ms | 5.6161ms | 178.0609 Ops/s | 176.8037 Ops/s | $\color{#35bf28}+0.71\%$ |
test_ppo_speed | 6.1599ms | 5.9806ms | 167.2063 Ops/s | 167.5196 Ops/s | $\color{#d91a1a}-0.19\%$ |
test_reinforce_speed | 4.7482ms | 4.5424ms | 220.1460 Ops/s | 220.9254 Ops/s | $\color{#d91a1a}-0.35\%$ |
test_iql_speed | 20.5088ms | 19.7068ms | 50.7439 Ops/s | 51.5529 Ops/s | $\color{#d91a1a}-1.57\%$ |
test_rb_sample[TensorDictReplayBuffer-ListStorage-RandomSampler-4000] | 3.0711ms | 2.8808ms | 347.1270 Ops/s | 343.9531 Ops/s | $\color{#35bf28}+0.92\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] | 1.0481ms | 0.5448ms | 1.8355 KOps/s | 1.8526 KOps/s | $\color{#d91a1a}-0.93\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] | 0.7138ms | 0.5182ms | 1.9298 KOps/s | 1.9344 KOps/s | $\color{#d91a1a}-0.23\%$ |
test_rb_sample[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-4000] | 3.0136ms | 2.8719ms | 348.2007 Ops/s | 343.6263 Ops/s | $\color{#35bf28}+1.33\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] | 1.0718ms | 0.5332ms | 1.8756 KOps/s | 1.8879 KOps/s | $\color{#d91a1a}-0.65\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] | 0.6834ms | 0.5111ms | 1.9565 KOps/s | 1.9760 KOps/s | $\color{#d91a1a}-0.98\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-sampler6-10000] | 1.8062ms | 1.5433ms | 647.9669 Ops/s | 664.4524 Ops/s | $\color{#d91a1a}-2.48\%$ |
test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-sampler7-10000] | 5.4332ms | 1.4750ms | 677.9601 Ops/s | 687.7930 Ops/s | $\color{#d91a1a}-1.43\%$ |
test_rb_sample[TensorDictPrioritizedReplayBuffer-ListStorage-None-4000] | 3.0994ms | 2.9764ms | 335.9801 Ops/s | 331.0377 Ops/s | $\color{#35bf28}+1.49\%$ |
test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] | 0.8545ms | 0.6692ms | 1.4943 KOps/s | 1.3356 KOps/s | $\textbf{\color{#35bf28}+11.88\%}$ |
test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] | 0.1084s | 0.7412ms | 1.3491 KOps/s | 1.5759 KOps/s | $\textbf{\color{#d91a1a}-14.39\%}$ |
test_rb_iterate[TensorDictReplayBuffer-ListStorage-RandomSampler-4000] | 2.9781ms | 2.8613ms | 349.4901 Ops/s | 342.8888 Ops/s | $\color{#35bf28}+1.93\%$ |
test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] | 1.4278ms | 0.5512ms | 1.8143 KOps/s | 1.8403 KOps/s | $\color{#d91a1a}-1.42\%$ |
test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] | 0.6991ms | 0.5273ms | 1.8965 KOps/s | 1.9231 KOps/s | $\color{#d91a1a}-1.38\%$ |
test_rb_iterate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-4000] | 3.1984ms | 2.8732ms | 348.0425 Ops/s | 343.8408 Ops/s | $\color{#35bf28}+1.22\%$ |
test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] | 0.6571ms | 0.5389ms | 1.8556 KOps/s | 1.8676 KOps/s | $\color{#d91a1a}-0.65\%$ |
test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] | 0.1037s | 0.6598ms | 1.5156 KOps/s | 1.9168 KOps/s | $\textbf{\color{#d91a1a}-20.93\%}$ |
test_rb_iterate[TensorDictPrioritizedReplayBuffer-ListStorage-None-4000] | 3.1137ms | 3.0009ms | 333.2301 Ops/s | 327.5774 Ops/s | $\color{#35bf28}+1.73\%$ |
test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] | 0.7911ms | 0.6702ms | 1.4922 KOps/s | 1.5088 KOps/s | $\color{#d91a1a}-1.10\%$ |
test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] | 0.8712ms | 0.6476ms | 1.5443 KOps/s | 1.5528 KOps/s | $\color{#d91a1a}-0.55\%$ |
test_rb_populate[TensorDictReplayBuffer-ListStorage-RandomSampler-400] | 0.1074s | 8.7814ms | 113.8773 Ops/s | 110.4880 Ops/s | $\color{#35bf28}+3.07\%$ |
test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-400] | 17.0949ms | 14.8120ms | 67.5130 Ops/s | 66.8837 Ops/s | $\color{#35bf28}+0.94\%$ |
test_rb_populate[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-400] | 2.2559ms | 1.1647ms | 858.5849 Ops/s | 861.1578 Ops/s | $\color{#d91a1a}-0.30\%$ |
test_rb_populate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-400] | 0.1000s | 6.6939ms | 149.3891 Ops/s | 149.0980 Ops/s | $\color{#35bf28}+0.20\%$ |
test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-400] | 17.0107ms | 14.6903ms | 68.0723 Ops/s | 66.5034 Ops/s | $\color{#35bf28}+2.36\%$ |
test_rb_populate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-400] | 2.2976ms | 1.1264ms | 887.7765 Ops/s | 875.0294 Ops/s | $\color{#35bf28}+1.46\%$ |
test_rb_populate[TensorDictPrioritizedReplayBuffer-ListStorage-None-400] | 0.1010s | 8.9521ms | 111.7055 Ops/s | 110.9746 Ops/s | $\color{#35bf28}+0.66\%$ |
test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-400] | 17.6142ms | 15.0572ms | 66.4134 Ops/s | 65.4327 Ops/s | $\color{#35bf28}+1.50\%$ |
test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-400] | 7.3065ms | 1.6121ms | 620.2925 Ops/s | 619.1883 Ops/s | $\color{#35bf28}+0.18\%$ |