diff --git a/convert.py b/convert.py index 4748e262b..54dba5979 100644 --- a/convert.py +++ b/convert.py @@ -1124,13 +1124,8 @@ class OutputFile: params.n_mult, params.n_head, params.n_layer, -<<<<<<< HEAD - params.n_embd // params.n_head, # rot (obsolete) - file_type.value, -======= params.n_vocab_base | 0xF0000000, # reuse obsolete rot value to store vocab_base - params.file_type.value, ->>>>>>> bfccc62 (Use some tricks to eliminate the necessity for a new format) + file_type.value, ] self.fout.write(struct.pack("I" * len(values), *values)) diff --git a/llama.cpp b/llama.cpp index 8bbe51009..c620d9897 100644 --- a/llama.cpp +++ b/llama.cpp @@ -555,7 +555,7 @@ struct llama_file_loader { // LLaMAv2 // TODO: read from header hparams.n_head_kv = hparams.n_head; -======= + } void read_vocab() { vocab.id_to_token.resize(hparams.n_vocab); @@ -1442,7 +1442,7 @@ static struct ggml_cgraph * llama_build_graph( const int64_t n_embd_head = hparams.n_embd_head(); const int64_t n_embd_gqa = hparams.n_embd_gqa(); - LLAMA_ASSERT(n_embd_head == hparams.n_rot); + LLAMA_ASSERT(n_embd_head == hparams.n_embd/hparams.n_head); const float freq_base = hparams.rope_freq_base; const float freq_scale = hparams.rope_freq_scale;