Vulkan Mixture of Experts (MoE) support (#7628)
* Finish Vulkan mul_mat_id implementation * Add Vulkan sum_rows and div ops * Fix MUL_MAT_ID matrix matrix shader * Fix MUL_MAT_ID matrix vector shader dispatch size * Fix MUL_MAT_ID matrix vector shader and dispatch code * Update Vulkan CPU offload for MUL_MAT_ID * Fix crash when using split mode none and setting a main GPU
This commit is contained in:
parent
a10cda58d3
commit
3d7ebf6312
5 changed files with 73389 additions and 13839 deletions
|
@ -1002,9 +1002,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
return true;
|
||||
}
|
||||
params.main_gpu = std::stoi(argv[i]);
|
||||
#ifndef GGML_USE_CUDA_SYCL
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the main GPU has no effect.\n");
|
||||
#endif // GGML_USE_CUDA_SYCL
|
||||
#ifndef GGML_USE_CUDA_SYCL_VULKAN
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the main GPU has no effect.\n");
|
||||
#endif // GGML_USE_CUDA_SYCL_VULKAN
|
||||
return true;
|
||||
}
|
||||
if (arg == "--split-mode" || arg == "-sm") {
|
||||
|
@ -1030,9 +1030,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
#ifndef GGML_USE_CUDA_SYCL
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the split mode has no effect.\n");
|
||||
#endif // GGML_USE_CUDA_SYCL
|
||||
#ifndef GGML_USE_CUDA_SYCL_VULKAN
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the split mode has no effect.\n");
|
||||
#endif // GGML_USE_CUDA_SYCL_VULKAN
|
||||
return true;
|
||||
}
|
||||
if (arg == "--tensor-split" || arg == "-ts") {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue