the cur parameter is missing
This commit is contained in:
parent
a113689571
commit
4d92be8813
1 changed files with 8 additions and 5 deletions
13
llama.cpp
13
llama.cpp
|
@ -1386,7 +1386,7 @@ static bool llama_model_load(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ggml_cgraph * llama_build_graph(
|
static struct std::pair<ggml_cgraph *, ggml_tensor *> llama_build_graph(
|
||||||
llama_context & lctx,
|
llama_context & lctx,
|
||||||
const llama_token * tokens,
|
const llama_token * tokens,
|
||||||
const float * embd,
|
const float * embd,
|
||||||
|
@ -1755,7 +1755,7 @@ static struct ggml_cgraph * llama_build_graph(
|
||||||
|
|
||||||
ggml_free(ctx0);
|
ggml_free(ctx0);
|
||||||
|
|
||||||
return gf;
|
return std::make_pair(gf, cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
// evaluate the transformer
|
// evaluate the transformer
|
||||||
|
@ -1799,8 +1799,9 @@ static bool llama_eval_internal(
|
||||||
#ifdef LLAMA_USE_ALLOCATOR
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
ggml_allocr_reset(lctx.alloc);
|
ggml_allocr_reset(lctx.alloc);
|
||||||
#endif
|
#endif
|
||||||
|
auto result = llama_build_graph(lctx, tokens, embd, n_tokens, n_past);
|
||||||
ggml_cgraph * gf = llama_build_graph(lctx, tokens, embd, n_tokens, n_past);
|
ggml_cgraph * gf = result.first;
|
||||||
|
ggml_tensor * cur = result.second;
|
||||||
|
|
||||||
#ifdef LLAMA_USE_ALLOCATOR
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
ggml_allocr_alloc_graph(lctx.alloc, gf);
|
ggml_allocr_alloc_graph(lctx.alloc, gf);
|
||||||
|
@ -3279,7 +3280,9 @@ struct llama_context * llama_new_context_with_model(
|
||||||
int n_tokens = std::min((int)hparams.n_ctx, params.n_batch);
|
int n_tokens = std::min((int)hparams.n_ctx, params.n_batch);
|
||||||
int n_past = hparams.n_ctx - n_tokens;
|
int n_past = hparams.n_ctx - n_tokens;
|
||||||
llama_token token = llama_token_bos(); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph
|
llama_token token = llama_token_bos(); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph
|
||||||
ggml_cgraph * gf = llama_build_graph(*ctx, &token, NULL, n_tokens, n_past);
|
auto result = llama_build_graph(*ctx, &token, NULL, n_tokens, n_past);
|
||||||
|
ggml_cgraph * gf = result.first;
|
||||||
|
ggml_tensor * cur = result.second;
|
||||||
|
|
||||||
// measure memory requirements for the graph
|
// measure memory requirements for the graph
|
||||||
size_t alloc_size = ggml_allocr_alloc_graph(ctx->alloc, gf) + tensor_alignment;
|
size_t alloc_size = ggml_allocr_alloc_graph(ctx->alloc, gf) + tensor_alignment;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue