fixing memory bugs

This commit is contained in:
Concedo 2023-06-23 18:41:23 +08:00
parent e6ddb15c3a
commit df9135e3a9
5 changed files with 12 additions and 8 deletions

View file

@ -308,8 +308,12 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
params.memory_f16 = inputs.f16_kv;
params.n_ctx = inputs.max_context_length;
neox_ctx_v2.hparams.n_ctx = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx
= neox_ctx_v3.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx = mpt_ctx_v3.hparams.n_ctx = params.n_ctx;
neox_ctx_v2.hparams.n_ctx = neox_ctx_v3.hparams.n_ctx
= gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx
= gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx = gpt2_ctx_v3.hparams.n_ctx
= mpt_ctx_v3.hparams.n_ctx = params.n_ctx;
bool calc_mem_with_scratch = ggml_cpu_has_gpublas();
printf("System Info: %s\n", llama_print_system_info());
SetQuantsUnshuffled(false);

View file

@ -225,7 +225,7 @@ maxhordectx = 1024
maxhordelen = 256
modelbusy = False
defaultport = 5001
KcppVersion = "1.32"
KcppVersion = "1.32.1"
showdebug = True
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):

View file

@ -80,7 +80,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
{ MODEL_3B, 256ull * MB },
{ MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB },
{ MODEL_30B, 640ull * MB },
{ MODEL_65B, 1024ull * MB },
};
return k_sizes;
@ -92,7 +92,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
{ MODEL_3B, 256ull * MB },
{ MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB },
{ MODEL_30B, 640ull * MB },
{ MODEL_65B, 1024ull * MB },
};
return k_sizes;

View file

@ -98,7 +98,7 @@ void print_tok_vec(std::vector<float> &embd)
//we need to read more to determine
int32_t vocabsiz = 0;
fin.read((char *) &vocabsiz, sizeof(int32_t));
if(vocabsiz==4096) //actually the d_model for mpt
if(vocabsiz==4096 || vocabsiz==7168) //actually the d_model for mpt
{
fileformat = FileFormat::MPT_1;
}

View file

@ -59,7 +59,7 @@ static const std::map<e_model2, size_t> & MEM_REQ_SCRATCH0_2()
{ MODEL_UNKNOWN_2, 512ull * MB_2 },
{ MODEL_7B_2, 512ull * MB_2 },
{ MODEL_13B_2, 512ull * MB_2 },
{ MODEL_30B_2, 512ull * MB_2 },
{ MODEL_30B_2, 640ull * MB_2 },
{ MODEL_65B_2, 1024ull * MB_2 },
};
return k_sizes;
@ -71,7 +71,7 @@ static const std::map<e_model2, size_t> & MEM_REQ_SCRATCH1_2()
{ MODEL_UNKNOWN_2, 512ull * MB_2 },
{ MODEL_7B_2, 512ull * MB_2 },
{ MODEL_13B_2, 512ull * MB_2 },
{ MODEL_30B_2, 512ull * MB_2 },
{ MODEL_30B_2, 640ull * MB_2 },
{ MODEL_65B_2, 1024ull * MB_2 },
};
return k_sizes;