llama : make tensor_split ptr instead of array (#2272)

This commit is contained in:
Georgi Gerganov 2023-07-21 13:10:51 +03:00 committed by GitHub
parent 54e3bc76fe
commit ae178ab46b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 8 additions and 4 deletions

View file

@ -88,7 +88,8 @@ extern "C" {
int32_t n_batch; // prompt processing batch size
int32_t n_gpu_layers; // number of layers to store in VRAM
int32_t main_gpu; // the GPU that is used for scratch and small tensors
float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
float rope_freq_base; // RoPE base frequency