fixing memory bugs
This commit is contained in:
parent
e6ddb15c3a
commit
df9135e3a9
5 changed files with 12 additions and 8 deletions
|
@ -308,8 +308,12 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
params.memory_f16 = inputs.f16_kv;
|
||||
params.n_ctx = inputs.max_context_length;
|
||||
|
||||
neox_ctx_v2.hparams.n_ctx = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx
|
||||
= neox_ctx_v3.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx = mpt_ctx_v3.hparams.n_ctx = params.n_ctx;
|
||||
neox_ctx_v2.hparams.n_ctx = neox_ctx_v3.hparams.n_ctx
|
||||
= gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx
|
||||
= gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx = gpt2_ctx_v3.hparams.n_ctx
|
||||
= mpt_ctx_v3.hparams.n_ctx = params.n_ctx;
|
||||
|
||||
bool calc_mem_with_scratch = ggml_cpu_has_gpublas();
|
||||
|
||||
printf("System Info: %s\n", llama_print_system_info());
|
||||
SetQuantsUnshuffled(false);
|
||||
|
|
|
@ -225,7 +225,7 @@ maxhordectx = 1024
|
|||
maxhordelen = 256
|
||||
modelbusy = False
|
||||
defaultport = 5001
|
||||
KcppVersion = "1.32"
|
||||
KcppVersion = "1.32.1"
|
||||
showdebug = True
|
||||
|
||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||
|
|
|
@ -80,7 +80,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
|
|||
{ MODEL_3B, 256ull * MB },
|
||||
{ MODEL_7B, 512ull * MB },
|
||||
{ MODEL_13B, 512ull * MB },
|
||||
{ MODEL_30B, 512ull * MB },
|
||||
{ MODEL_30B, 640ull * MB },
|
||||
{ MODEL_65B, 1024ull * MB },
|
||||
};
|
||||
return k_sizes;
|
||||
|
@ -92,7 +92,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
|
|||
{ MODEL_3B, 256ull * MB },
|
||||
{ MODEL_7B, 512ull * MB },
|
||||
{ MODEL_13B, 512ull * MB },
|
||||
{ MODEL_30B, 512ull * MB },
|
||||
{ MODEL_30B, 640ull * MB },
|
||||
{ MODEL_65B, 1024ull * MB },
|
||||
};
|
||||
return k_sizes;
|
||||
|
|
|
@ -98,7 +98,7 @@ void print_tok_vec(std::vector<float> &embd)
|
|||
//we need to read more to determine
|
||||
int32_t vocabsiz = 0;
|
||||
fin.read((char *) &vocabsiz, sizeof(int32_t));
|
||||
if(vocabsiz==4096) //actually the d_model for mpt
|
||||
if(vocabsiz==4096 || vocabsiz==7168) //actually the d_model for mpt
|
||||
{
|
||||
fileformat = FileFormat::MPT_1;
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ static const std::map<e_model2, size_t> & MEM_REQ_SCRATCH0_2()
|
|||
{ MODEL_UNKNOWN_2, 512ull * MB_2 },
|
||||
{ MODEL_7B_2, 512ull * MB_2 },
|
||||
{ MODEL_13B_2, 512ull * MB_2 },
|
||||
{ MODEL_30B_2, 512ull * MB_2 },
|
||||
{ MODEL_30B_2, 640ull * MB_2 },
|
||||
{ MODEL_65B_2, 1024ull * MB_2 },
|
||||
};
|
||||
return k_sizes;
|
||||
|
@ -71,7 +71,7 @@ static const std::map<e_model2, size_t> & MEM_REQ_SCRATCH1_2()
|
|||
{ MODEL_UNKNOWN_2, 512ull * MB_2 },
|
||||
{ MODEL_7B_2, 512ull * MB_2 },
|
||||
{ MODEL_13B_2, 512ull * MB_2 },
|
||||
{ MODEL_30B_2, 512ull * MB_2 },
|
||||
{ MODEL_30B_2, 640ull * MB_2 },
|
||||
{ MODEL_65B_2, 1024ull * MB_2 },
|
||||
};
|
||||
return k_sizes;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue