rl
rl copied to clipboard
[Refactor] Refactor multi-sync collectors
Description
Refactors collectors:
- MultiSyncCollector can (and should) concatenate data along time dimension but the old behaviour is kept for consistency
Because preemption now requires some reshaping, this could break with LazyStacked tensordicts. A warning should be raised when these two things are used jointly.
$\color{#D29922}\textsf{\Large⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests
Total Benchmarks: 89. Improved: $\large\color{#35bf28}3$. Worsened: $\large\color{#d91a1a}6$.
Expand to view detailed results
| Name | Max | Mean | Ops | Ops on Repo HEAD |
Change |
|---|---|---|---|---|---|
| test_single | 77.4586ms | 76.9546ms | 12.9947 Ops/s | 12.9608 Ops/s | $\color{#35bf28}+0.26\%$ |
| test_sync | 0.1181s | 47.2612ms | 21.1590 Ops/s | 22.7220 Ops/s | $\textbf{\color{#d91a1a}-6.88\%}$ |
| test_async | 0.1499s | 40.0755ms | 24.9529 Ops/s | 24.8438 Ops/s | $\color{#35bf28}+0.44\%$ |
| test_simple | 0.7091s | 0.6384s | 1.5664 Ops/s | 1.5462 Ops/s | $\color{#35bf28}+1.31\%$ |
| test_transformed | 0.8846s | 0.8322s | 1.2016 Ops/s | 1.2020 Ops/s | $\color{#d91a1a}-0.03\%$ |
| test_serial | 1.8682s | 1.8101s | 0.5525 Ops/s | 0.5502 Ops/s | $\color{#35bf28}+0.41\%$ |
| test_parallel | 1.6358s | 1.5451s | 0.6472 Ops/s | 0.6694 Ops/s | $\color{#d91a1a}-3.32\%$ |
| test_step_mdp_speed[True-True-True-True-True] | 0.1452ms | 44.6093μs | 22.4169 KOps/s | 22.5386 KOps/s | $\color{#d91a1a}-0.54\%$ |
| test_step_mdp_speed[True-True-True-True-False] | 90.4030μs | 25.2372μs | 39.6240 KOps/s | 39.3150 KOps/s | $\color{#35bf28}+0.79\%$ |
| test_step_mdp_speed[True-True-True-False-True] | 60.5020μs | 31.5400μs | 31.7058 KOps/s | 31.7882 KOps/s | $\color{#d91a1a}-0.26\%$ |
| test_step_mdp_speed[True-True-True-False-False] | 44.4000μs | 19.2278μs | 52.0079 KOps/s | 56.6121 KOps/s | $\textbf{\color{#d91a1a}-8.13\%}$ |
| test_step_mdp_speed[True-True-False-True-True] | 81.3010μs | 45.8951μs | 21.7888 KOps/s | 21.6153 KOps/s | $\color{#35bf28}+0.80\%$ |
| test_step_mdp_speed[True-True-False-True-False] | 0.1623ms | 26.8492μs | 37.2450 KOps/s | 37.0071 KOps/s | $\color{#35bf28}+0.64\%$ |
| test_step_mdp_speed[True-True-False-False-True] | 62.9010μs | 33.4127μs | 29.9287 KOps/s | 30.0769 KOps/s | $\color{#d91a1a}-0.49\%$ |
| test_step_mdp_speed[True-True-False-False-False] | 55.2010μs | 19.6293μs | 50.9443 KOps/s | 50.5721 KOps/s | $\color{#35bf28}+0.74\%$ |
| test_step_mdp_speed[True-False-True-True-True] | 75.3010μs | 47.9834μs | 20.8405 KOps/s | 20.7511 KOps/s | $\color{#35bf28}+0.43\%$ |
| test_step_mdp_speed[True-False-True-True-False] | 89.2010μs | 28.7933μs | 34.7303 KOps/s | 34.2912 KOps/s | $\color{#35bf28}+1.28\%$ |
| test_step_mdp_speed[True-False-True-False-True] | 75.8010μs | 33.7416μs | 29.6370 KOps/s | 29.6684 KOps/s | $\color{#d91a1a}-0.11\%$ |
| test_step_mdp_speed[True-False-True-False-False] | 55.2010μs | 19.9217μs | 50.1966 KOps/s | 51.2747 KOps/s | $\color{#d91a1a}-2.10\%$ |
| test_step_mdp_speed[True-False-False-True-True] | 77.8020μs | 49.7824μs | 20.0874 KOps/s | 19.6797 KOps/s | $\color{#35bf28}+2.07\%$ |
| test_step_mdp_speed[True-False-False-True-False] | 96.5020μs | 30.7219μs | 32.5500 KOps/s | 32.3284 KOps/s | $\color{#35bf28}+0.69\%$ |
| test_step_mdp_speed[True-False-False-False-True] | 61.1010μs | 35.3157μs | 28.3160 KOps/s | 28.3718 KOps/s | $\color{#d91a1a}-0.20\%$ |
| test_step_mdp_speed[True-False-False-False-False] | 57.4010μs | 21.2550μs | 47.0478 KOps/s | 46.7368 KOps/s | $\color{#35bf28}+0.67\%$ |
| test_step_mdp_speed[False-True-True-True-True] | 74.1010μs | 47.8813μs | 20.8850 KOps/s | 20.9068 KOps/s | $\color{#d91a1a}-0.10\%$ |
| test_step_mdp_speed[False-True-True-True-False] | 93.0020μs | 29.0283μs | 34.4491 KOps/s | 34.2922 KOps/s | $\color{#35bf28}+0.46\%$ |
| test_step_mdp_speed[False-True-True-False-True] | 88.8010μs | 37.3768μs | 26.7546 KOps/s | 26.9900 KOps/s | $\color{#d91a1a}-0.87\%$ |
| test_step_mdp_speed[False-True-True-False-False] | 3.3336ms | 21.5280μs | 46.4511 KOps/s | 46.2263 KOps/s | $\color{#35bf28}+0.49\%$ |
| test_step_mdp_speed[False-True-False-True-True] | 0.1155ms | 50.0476μs | 19.9810 KOps/s | 20.1902 KOps/s | $\color{#d91a1a}-1.04\%$ |
| test_step_mdp_speed[False-True-False-True-False] | 59.7010μs | 30.5396μs | 32.7444 KOps/s | 32.3478 KOps/s | $\color{#35bf28}+1.23\%$ |
| test_step_mdp_speed[False-True-False-False-True] | 66.2010μs | 38.7711μs | 25.7924 KOps/s | 25.7444 KOps/s | $\color{#35bf28}+0.19\%$ |
| test_step_mdp_speed[False-True-False-False-False] | 79.2010μs | 23.3987μs | 42.7373 KOps/s | 42.4279 KOps/s | $\color{#35bf28}+0.73\%$ |
| test_step_mdp_speed[False-False-True-True-True] | 0.1526ms | 52.3100μs | 19.1168 KOps/s | 19.4445 KOps/s | $\color{#d91a1a}-1.69\%$ |
| test_step_mdp_speed[False-False-True-True-False] | 57.1010μs | 32.4559μs | 30.8110 KOps/s | 30.1471 KOps/s | $\color{#35bf28}+2.20\%$ |
| test_step_mdp_speed[False-False-True-False-True] | 85.0010μs | 39.0961μs | 25.5780 KOps/s | 25.8086 KOps/s | $\color{#d91a1a}-0.89\%$ |
| test_step_mdp_speed[False-False-True-False-False] | 81.6010μs | 22.9935μs | 43.4906 KOps/s | 42.5238 KOps/s | $\color{#35bf28}+2.27\%$ |
| test_step_mdp_speed[False-False-False-True-True] | 0.1543ms | 52.6399μs | 18.9970 KOps/s | 19.0448 KOps/s | $\color{#d91a1a}-0.25\%$ |
| test_step_mdp_speed[False-False-False-True-False] | 57.6010μs | 33.8048μs | 29.5816 KOps/s | 29.1488 KOps/s | $\color{#35bf28}+1.48\%$ |
| test_step_mdp_speed[False-False-False-False-True] | 95.1010μs | 40.1893μs | 24.8823 KOps/s | 25.1063 KOps/s | $\color{#d91a1a}-0.89\%$ |
| test_step_mdp_speed[False-False-False-False-False] | 54.2010μs | 24.7259μs | 40.4435 KOps/s | 40.1666 KOps/s | $\color{#35bf28}+0.69\%$ |
| test_values[generalized_advantage_estimate-True-True] | 14.8568ms | 14.0291ms | 71.2805 Ops/s | 71.1744 Ops/s | $\color{#35bf28}+0.15\%$ |
| test_values[vec_generalized_advantage_estimate-True-True] | 45.8303ms | 40.9381ms | 24.4271 Ops/s | 24.1802 Ops/s | $\color{#35bf28}+1.02\%$ |
| test_values[td0_return_estimate-False-False] | 0.3594ms | 0.2202ms | 4.5419 KOps/s | 4.7881 KOps/s | $\textbf{\color{#d91a1a}-5.14\%}$ |
| test_values[td1_return_estimate-False-False] | 14.6029ms | 13.7348ms | 72.8076 Ops/s | 74.2988 Ops/s | $\color{#d91a1a}-2.01\%$ |
| test_values[vec_td1_return_estimate-False-False] | 46.3706ms | 41.1601ms | 24.2953 Ops/s | 24.4148 Ops/s | $\color{#d91a1a}-0.49\%$ |
| test_values[td_lambda_return_estimate-True-False] | 34.4865ms | 32.9751ms | 30.3259 Ops/s | 30.3533 Ops/s | $\color{#d91a1a}-0.09\%$ |
| test_values[vec_td_lambda_return_estimate-True-False] | 46.8890ms | 40.9904ms | 24.3959 Ops/s | 24.5983 Ops/s | $\color{#d91a1a}-0.82\%$ |
| test_gae_speed[generalized_advantage_estimate-False-1-512] | 13.2673ms | 12.3604ms | 80.9038 Ops/s | 80.5795 Ops/s | $\color{#35bf28}+0.40\%$ |
| test_gae_speed[vec_generalized_advantage_estimate-True-1-512] | 4.8546ms | 3.3845ms | 295.4644 Ops/s | 301.3508 Ops/s | $\color{#d91a1a}-1.95\%$ |
| test_gae_speed[vec_generalized_advantage_estimate-False-1-512] | 3.4673ms | 0.4714ms | 2.1211 KOps/s | 1.9121 KOps/s | $\textbf{\color{#35bf28}+10.93\%}$ |
| test_gae_speed[vec_generalized_advantage_estimate-True-32-512] | 60.9873ms | 57.0212ms | 17.5373 Ops/s | 18.1515 Ops/s | $\color{#d91a1a}-3.38\%$ |
| test_gae_speed[vec_generalized_advantage_estimate-False-32-512] | 9.8213ms | 2.9037ms | 344.3940 Ops/s | 361.8373 Ops/s | $\color{#d91a1a}-4.82\%$ |
| test_dqn_speed | 10.0235ms | 1.8391ms | 543.7500 Ops/s | 549.8016 Ops/s | $\color{#d91a1a}-1.10\%$ |
| test_ddpg_speed | 7.9931ms | 2.7435ms | 364.5015 Ops/s | 362.1283 Ops/s | $\color{#35bf28}+0.66\%$ |
| test_sac_speed | 15.4276ms | 8.1606ms | 122.5405 Ops/s | 123.3769 Ops/s | $\color{#d91a1a}-0.68\%$ |
| test_redq_speed | 20.4717ms | 15.8468ms | 63.1042 Ops/s | 61.9023 Ops/s | $\color{#35bf28}+1.94\%$ |
| test_redq_deprec_speed | 17.2694ms | 12.6852ms | 78.8318 Ops/s | 78.0242 Ops/s | $\color{#35bf28}+1.03\%$ |
| test_td3_speed | 11.3889ms | 10.2046ms | 97.9951 Ops/s | 99.0616 Ops/s | $\color{#d91a1a}-1.08\%$ |
| test_cql_speed | 30.3091ms | 25.7877ms | 38.7782 Ops/s | 36.5886 Ops/s | $\textbf{\color{#35bf28}+5.98\%}$ |
| test_a2c_speed | 21.6334ms | 5.3343ms | 187.4664 Ops/s | 185.8832 Ops/s | $\color{#35bf28}+0.85\%$ |
| test_ppo_speed | 6.1723ms | 5.5613ms | 179.8126 Ops/s | 175.9214 Ops/s | $\color{#35bf28}+2.21\%$ |
| test_reinforce_speed | 4.5468ms | 4.0340ms | 247.8926 Ops/s | 236.3897 Ops/s | $\color{#35bf28}+4.87\%$ |
| test_iql_speed | 28.1491ms | 21.4196ms | 46.6862 Ops/s | 45.2548 Ops/s | $\color{#35bf28}+3.16\%$ |
| test_sample_rb[TensorDictReplayBuffer-ListStorage-RandomSampler-4000] | 3.5634ms | 2.7455ms | 364.2325 Ops/s | 366.2248 Ops/s | $\color{#d91a1a}-0.54\%$ |
| test_sample_rb[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] | 4.5245ms | 2.8225ms | 354.2984 Ops/s | 345.8563 Ops/s | $\color{#35bf28}+2.44\%$ |
| test_sample_rb[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] | 0.1453s | 3.2884ms | 304.1019 Ops/s | 340.6786 Ops/s | $\textbf{\color{#d91a1a}-10.74\%}$ |
| test_sample_rb[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-4000] | 3.7841ms | 2.7292ms | 366.4119 Ops/s | 358.4209 Ops/s | $\color{#35bf28}+2.23\%$ |
| test_sample_rb[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] | 4.4826ms | 2.8558ms | 350.1612 Ops/s | 345.5966 Ops/s | $\color{#35bf28}+1.32\%$ |
| test_sample_rb[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] | 5.4908ms | 2.9075ms | 343.9357 Ops/s | 340.3965 Ops/s | $\color{#35bf28}+1.04\%$ |
| test_sample_rb[TensorDictPrioritizedReplayBuffer-ListStorage-None-4000] | 3.7135ms | 2.7544ms | 363.0550 Ops/s | 367.2304 Ops/s | $\color{#d91a1a}-1.14\%$ |
| test_sample_rb[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] | 5.4109ms | 2.8707ms | 348.3476 Ops/s | 347.8740 Ops/s | $\color{#35bf28}+0.14\%$ |
| test_sample_rb[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] | 2.7651ms | 2.7124ms | 368.6762 Ops/s | 348.8341 Ops/s | $\textbf{\color{#35bf28}+5.69\%}$ |
| test_iterate_rb[TensorDictReplayBuffer-ListStorage-RandomSampler-4000] | 4.3124ms | 2.6891ms | 371.8656 Ops/s | 366.9112 Ops/s | $\color{#35bf28}+1.35\%$ |
| test_iterate_rb[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] | 0.1419s | 3.2660ms | 306.1889 Ops/s | 344.3612 Ops/s | $\textbf{\color{#d91a1a}-11.08\%}$ |
| test_iterate_rb[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] | 5.5355ms | 2.9190ms | 342.5849 Ops/s | 350.4169 Ops/s | $\color{#d91a1a}-2.24\%$ |
| test_iterate_rb[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-4000] | 3.0423ms | 2.7146ms | 368.3736 Ops/s | 367.2798 Ops/s | $\color{#35bf28}+0.30\%$ |
| test_iterate_rb[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] | 4.9317ms | 2.8918ms | 345.8062 Ops/s | 353.1993 Ops/s | $\color{#d91a1a}-2.09\%$ |
| test_iterate_rb[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] | 5.0538ms | 2.9036ms | 344.3984 Ops/s | 342.9940 Ops/s | $\color{#35bf28}+0.41\%$ |
| test_iterate_rb[TensorDictPrioritizedReplayBuffer-ListStorage-None-4000] | 3.4841ms | 2.7578ms | 362.6143 Ops/s | 368.7211 Ops/s | $\color{#d91a1a}-1.66\%$ |
| test_iterate_rb[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] | 5.6858ms | 2.8314ms | 353.1797 Ops/s | 347.8962 Ops/s | $\color{#35bf28}+1.52\%$ |
| test_iterate_rb[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] | 5.0106ms | 2.8816ms | 347.0275 Ops/s | 347.4497 Ops/s | $\color{#d91a1a}-0.12\%$ |
| test_populate_rb[TensorDictReplayBuffer-ListStorage-RandomSampler-400] | 0.2689s | 30.1315ms | 33.1879 Ops/s | 33.5429 Ops/s | $\color{#d91a1a}-1.06\%$ |
| test_populate_rb[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-400] | 0.1510s | 30.2634ms | 33.0432 Ops/s | 34.3355 Ops/s | $\color{#d91a1a}-3.76\%$ |
| test_populate_rb[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-400] | 0.1416s | 27.2618ms | 36.6814 Ops/s | 37.3982 Ops/s | $\color{#d91a1a}-1.92\%$ |
| test_populate_rb[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-400] | 0.1440s | 29.7344ms | 33.6310 Ops/s | 34.3011 Ops/s | $\color{#d91a1a}-1.95\%$ |
| test_populate_rb[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-400] | 0.1481s | 27.5855ms | 36.2509 Ops/s | 37.2652 Ops/s | $\color{#d91a1a}-2.72\%$ |
| test_populate_rb[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-400] | 0.1425s | 29.7271ms | 33.6394 Ops/s | 34.2214 Ops/s | $\color{#d91a1a}-1.70\%$ |
| test_populate_rb[TensorDictPrioritizedReplayBuffer-ListStorage-None-400] | 0.1411s | 26.9716ms | 37.0760 Ops/s | 37.1629 Ops/s | $\color{#d91a1a}-0.23\%$ |
| test_populate_rb[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-400] | 0.1501s | 30.2780ms | 33.0273 Ops/s | 34.1139 Ops/s | $\color{#d91a1a}-3.19\%$ |
| test_populate_rb[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-400] | 0.1473s | 30.1439ms | 33.1742 Ops/s | 37.3057 Ops/s | $\textbf{\color{#d91a1a}-11.07\%}$ |