From fda60ead35f76ff05008e95a4c772772dfba50db Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Thu, 28 Sep 2023 12:39:34 -0500 Subject: [PATCH] Replace vector with C-style array and length in llama_split_layers_weighted --- examples/mpi/mpi.cpp | 2 +- llama.cpp | 6 +++--- llama.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/mpi/mpi.cpp b/examples/mpi/mpi.cpp index 0bf8f2f80..5dfa70f5c 100644 --- a/examples/mpi/mpi.cpp +++ b/examples/mpi/mpi.cpp @@ -243,7 +243,7 @@ int main(int argc, char ** argv) { return 0; } - llama_split_layers_weighted(ctx, params.mpi_layer_split); + llama_split_layers_weighted(ctx, params.mpi_layer_split.data(), params.mpi_layer_split.size()); std::string path_session = params.path_prompt_cache; std::vector session_tokens; diff --git a/llama.cpp b/llama.cpp index 1f674f13f..98ffa1075 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13087,12 +13087,12 @@ struct llama_context * llama_new_context_with_model( return ctx; } -void llama_split_layers_weighted(struct llama_context * ctx, std::vector device_weights) { +void llama_split_layers_weighted(struct llama_context * ctx, float device_weights[], size_t num_weights) { #ifdef GGML_USE_MPI - if (ggml_mpi_rank(ctx->ctx_mpi) == 0 && ggml_mpi_size(ctx->ctx_mpi) != device_weights.size()) { + if (ggml_mpi_rank(ctx->ctx_mpi) == 0 && ggml_mpi_size(ctx->ctx_mpi) != num_weights) { GGML_ASSERT(false && "Must have same number of split percentages as devices"); } - uint16_t** ranges = ggml_mpi_split_range(ctx->ctx_mpi, 0, ctx->model.hparams.n_layer - 1, device_weights.data()); + uint16_t** ranges = ggml_mpi_split_range(ctx->ctx_mpi, 0, ctx->model.hparams.n_layer - 1, device_weights); ggml_mpi_scatter_layers(ctx->ctx_mpi, ranges); #endif } diff --git a/llama.h b/llama.h index 0a13b037d..7ad4c9257 100644 --- a/llama.h +++ b/llama.h @@ -358,7 +358,7 @@ extern "C" { const char * path_model, struct llama_model_params params); - LLAMA_API void llama_split_layers_weighted(struct llama_context * ctx, std::vector device_weights); + LLAMA_API void llama_split_layers_weighted(struct llama_context * ctx, float device_weights[], size_t num_weights); LLAMA_API void llama_free_model(struct llama_model * model);