CUDA: refactor host code, dyn. par. blocks

This commit is contained in:
Johannes Gäßler 2024-04-09 11:39:16 +02:00
parent 5668c79ea0
commit 34f93bbb39
3 changed files with 258 additions and 311 deletions

View file

@ -141,6 +141,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
info.devices[id].cc = 100*prop.major + 10*prop.minor;
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
info.devices[id].smpb = prop.sharedMemPerBlock;
info.devices[id].nsm = prop.multiProcessorCount;
}
for (int id = 0; id < info.device_count; ++id) {