Merge branch 'master' into gg/flash-attn

2024-01-31 18:49:43 +02:00 · 2024-01-31 18:49:43 +02:00 · 2ddc9bbef1
commit 2ddc9bbef1
parent 3d03bcb7af d3bac7d584
30 changed files with 1758 additions and 1597 deletions
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@ -88,7 +88,7 @@ int main(int argc, char ** argv) {

    llama_model_params model_params = llama_model_default_params();

-    const std::vector<float> t_split (LLAMA_MAX_DEVICES, 0.0f);
+    const std::vector<float> t_split(llama_max_devices(), 0.0f);

    model_params.n_gpu_layers = n_gpu_layers;
    model_params.tensor_split = t_split.data();