llama : use n_embd_gqa instead of n_embd to handle llama-2 70B (#2433)

This commit is contained in:
Rand Xie 2023-07-28 01:42:53 -07:00 committed by GitHub
parent edcc7ae7d2
commit 65cdf34bdc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 3 additions and 2 deletions

View file

@ -3663,7 +3663,7 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
const auto & kv_self = ctx->kv_self;
const auto & hparams = ctx->model.hparams;
const int n_layer = hparams.n_layer;
const int n_embd = hparams.n_embd;
const int n_embd = hparams.n_embd_gqa();
const int n_ctx = hparams.n_ctx;
const size_t kv_size = kv_self.buf.size;
@ -3766,7 +3766,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
const auto & kv_self = ctx->kv_self;
const auto & hparams = ctx->model.hparams;
const int n_layer = hparams.n_layer;
const int n_embd = hparams.n_embd;
const int n_embd = hparams.n_embd_gqa();
const int n_ctx = hparams.n_ctx;
size_t kv_size;