Apply suggestions from code review

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
slaren 2024-01-10 13:27:19 +01:00 committed by GitHub
parent 3cd0cbb1b5
commit 74066f8c41
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 7 additions and 5 deletions

View file

@ -1006,7 +1006,7 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g
// pass 2: assign backends to ops from current assignments // pass 2: assign backends to ops from current assignments
// start from the end and assign the same backend to previous ops // start from the end and assign the same backend to previous ops
// expand gpu backends (ie non last prio) up and down, ignoring cpu // expand gpu backends (i.e. non last prio) up and down, ignoring cpu
// thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops // thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops
// pass 2.1 expand gpu up // pass 2.1 expand gpu up

View file

@ -185,9 +185,11 @@ extern "C" {
struct llama_model_params { struct llama_model_params {
int32_t n_gpu_layers; // number of layers to store in VRAM int32_t n_gpu_layers; // number of layers to store in VRAM
enum llama_split_mode split_mode; // how to split the model across multiple GPUs enum llama_split_mode split_mode; // how to split the model across multiple GPUs
// the GPU that is used for the model (LLAMA_SPLIT_NONE),
// for small tensors and intermediate results (LLAMA_SPLIT_ROW) // main_gpu interpretation depends on split_mode:
// ignored for LLAMA_SPLIT_LAYER // LLAMA_SPLIT_NONE: the GPU that is used for the entire model
// LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results
// LLAMA_SPLIT_LAYER: ignored
int32_t main_gpu; int32_t main_gpu;
// proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES // proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES
const float * tensor_split; const float * tensor_split;

View file

@ -468,7 +468,7 @@ struct test_case {
GGML_UNUSED(index); GGML_UNUSED(index);
}; };
bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud); const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud);
if (!cmp_ok) { if (!cmp_ok) {
printf("compare failed "); printf("compare failed ");