Vulkan Mixture of Experts (MoE) support (#7628)
* Finish Vulkan mul_mat_id implementation * Add Vulkan sum_rows and div ops * Fix MUL_MAT_ID matrix matrix shader * Fix MUL_MAT_ID matrix vector shader dispatch size * Fix MUL_MAT_ID matrix vector shader and dispatch code * Update Vulkan CPU offload for MUL_MAT_ID * Fix crash when using split mode none and setting a main GPU
This commit is contained in:
parent
a10cda58d3
commit
3d7ebf6312
5 changed files with 73389 additions and 13839 deletions
|
@ -16372,7 +16372,7 @@ struct llama_context * llama_new_context_with_model(
|
|||
return nullptr;
|
||||
}
|
||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
|
||||
ggml_backend_t backend = ggml_backend_vk_init(0);
|
||||
ggml_backend_t backend = ggml_backend_vk_init(model->main_gpu);
|
||||
if (backend == nullptr) {
|
||||
LLAMA_LOG_ERROR("%s: failed to initialize Vulkan backend\n", __func__);
|
||||
llama_free(ctx);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue