ucx
ucx copied to clipboard
UCT/CUDA: set default max_reg_ratio to 1.0
What
Set default ratio to 1.0 which means that cuda pinned allocations of any size will be registered fully by IB.
Why ?
Pinned device memory is not cannot be swapped in any case so registering all allocations with IB doesn't add any additional memory pressure. User would have to free up memory to allocate memory greater than what is physically available, at which point IB registrations also go away (assuming correct interception). For devices with low BAR1 capacity (currently on t4 detected), the following pieces of code prevent whole allocation registration to prevent BAR1 exhaustion:
66 static size_t
67 uct_cuda_base_get_total_device_mem(CUdevice cuda_device)
68 {
69 static size_t total_bytes[UCT_CUDA_MAX_DEVICES];
70 char dev_name[UCT_CUDA_DEV_NAME_MAX_LEN];
71 CUresult cu_err;
72 const char *cu_err_str;
73
74 ucs_assert(cuda_device < UCT_CUDA_MAX_DEVICES);
75
76 ucs_spin_lock(&uct_cuda_base_lock);
77
78 if (!total_bytes[cuda_device]) {
79 cu_err = cuDeviceTotalMem(&total_bytes[cuda_device], cuda_device);
80 if (cu_err != CUDA_SUCCESS) {
81 cuGetErrorString(cu_err, &cu_err_str);
82 ucs_error("cuDeviceTotalMem error: %s", cu_err_str);
83 goto err;
84 }
85
86 cu_err = cuDeviceGetName(dev_name, sizeof(dev_name), cuda_device);
87 if (cu_err != CUDA_SUCCESS) {
88 cuGetErrorString(cu_err, &cu_err_str);
89 ucs_error("cuDeviceGetName error: %s", cu_err_str);
90 goto err;
91 }
92
93 if (!strncmp(dev_name, "T4", 2)) {
94 total_bytes[cuda_device] = 1; /* should ensure that whole alloc
95 registration is not used for t4 */
96 }
97 }
98
99 ucs_spin_unlock(&uct_cuda_base_lock);
100 return total_bytes[cuda_device];
101
102 err:
103 ucs_spin_unlock(&uct_cuda_base_lock);
104 return 1; /* return 1 byte to avoid division by zero */
105 }
168 if (md->config.alloc_whole_reg == UCS_CONFIG_AUTO) {
169 total_bytes = uct_cuda_base_get_total_device_mem(cuda_device);
170 if (alloc_length > (total_bytes * md->config.max_reg_ratio)) {
171 goto out_default_range;
172 }
173 } else {
174 ucs_assert(md->config.alloc_whole_reg == UCS_CONFIG_ON);
175 }