Merge branch 'ggerganov:master' into master

This commit is contained in:
Steward Garcia 2023-10-15 18:23:47 -04:00 committed by GitHub
commit f47fd17b73
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 75 additions and 30 deletions

View file

@ -529,13 +529,14 @@ static void init_lora(const struct my_llama_model * model, struct my_llama_lora
set_param_lora(lora);
// measure data size
struct ggml_allocr * alloc = NULL;
alloc = ggml_allocr_new_measure(tensor_alignment);
alloc_lora(alloc, lora);
size_t size = 0;
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
size += GGML_PAD(ggml_nbytes(t), tensor_alignment);
}
// allocate data
lora->data.resize(ggml_allocr_max_size(alloc) + tensor_alignment);
ggml_allocr_free(alloc);
struct ggml_allocr * alloc = NULL;
lora->data.resize(size + tensor_alignment);
alloc = ggml_allocr_new(lora->data.data(), lora->data.size(), tensor_alignment);
alloc_lora(alloc, lora);
ggml_allocr_free(alloc);
@ -1714,11 +1715,9 @@ int main(int argc, char ** argv) {
struct ggml_tensor * target_probs = ggml_new_tensor_3d(ctx_input, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
// measure required memory for input tensors
alloc = ggml_allocr_new_measure(tensor_alignment);
ggml_allocr_alloc(alloc, tokens_input);
ggml_allocr_alloc(alloc, target_probs);
size_t max_input_size = ggml_allocr_max_size(alloc) + tensor_alignment;
ggml_allocr_free(alloc);
size_t max_input_size = GGML_PAD(ggml_nbytes(tokens_input), tensor_alignment) +
GGML_PAD(ggml_nbytes(target_probs), tensor_alignment) +
tensor_alignment;
printf("%s: input_size = %zu bytes (%.1f MB)\n", __func__, max_input_size, (float) max_input_size / (1024.0f*1024.0f));
// allocate input tensors

View file

@ -79,7 +79,13 @@ int main(int argc, char ** argv) {
llama_backend_init(params.numa);
llama_model_params model_params = llama_model_default_params();
llama_model_params model_params = llama_model_default_params();
model_params.n_gpu_layers = params.n_gpu_layers;
model_params.main_gpu = params.main_gpu;
model_params.tensor_split = params.tensor_split;
model_params.use_mmap = params.use_mmap;
model_params.use_mlock = params.use_mlock;
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
if (model == NULL) {
fprintf(stderr , "%s: error: unable to load model\n" , __func__);