cuda : add CUDA_USE_TENSOR_CORES and GGML_CUDA_FORCE_MMQ macros

This commit is contained in:
Georgi Gerganov 2023-10-25 18:48:36 +03:00
parent 4c6744b526
commit a4e15a36e4
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 104 additions and 22 deletions

View file

@ -178,7 +178,7 @@ extern "C" {
float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model
// Keep the booleans together to avoid misalignment during copy-by-value.
bool mul_mat_q; // if true, use experimental mul_mat_q kernels
bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
bool f16_kv; // use fp16 for KV cache, fp32 otherwise
bool logits_all; // the llama_eval() call computes all logits, not just the last one
bool embedding; // embedding mode only