CUDA: refactor host code, dyn. par. blocks
This commit is contained in:
parent
5668c79ea0
commit
34f93bbb39
3 changed files with 258 additions and 311 deletions
|
@ -141,6 +141,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
|
||||
info.devices[id].smpb = prop.sharedMemPerBlock;
|
||||
info.devices[id].nsm = prop.multiProcessorCount;
|
||||
}
|
||||
|
||||
for (int id = 0; id < info.device_count; ++id) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue