fixing memory bugs
This commit is contained in:
parent
e6ddb15c3a
commit
df9135e3a9
5 changed files with 12 additions and 8 deletions
|
@ -308,8 +308,12 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
params.memory_f16 = inputs.f16_kv;
|
params.memory_f16 = inputs.f16_kv;
|
||||||
params.n_ctx = inputs.max_context_length;
|
params.n_ctx = inputs.max_context_length;
|
||||||
|
|
||||||
neox_ctx_v2.hparams.n_ctx = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx
|
neox_ctx_v2.hparams.n_ctx = neox_ctx_v3.hparams.n_ctx
|
||||||
= neox_ctx_v3.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx = mpt_ctx_v3.hparams.n_ctx = params.n_ctx;
|
= gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx
|
||||||
|
= gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx = gpt2_ctx_v3.hparams.n_ctx
|
||||||
|
= mpt_ctx_v3.hparams.n_ctx = params.n_ctx;
|
||||||
|
|
||||||
|
bool calc_mem_with_scratch = ggml_cpu_has_gpublas();
|
||||||
|
|
||||||
printf("System Info: %s\n", llama_print_system_info());
|
printf("System Info: %s\n", llama_print_system_info());
|
||||||
SetQuantsUnshuffled(false);
|
SetQuantsUnshuffled(false);
|
||||||
|
|
|
@ -225,7 +225,7 @@ maxhordectx = 1024
|
||||||
maxhordelen = 256
|
maxhordelen = 256
|
||||||
modelbusy = False
|
modelbusy = False
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.32"
|
KcppVersion = "1.32.1"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
|
|
||||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
|
|
|
@ -80,7 +80,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
|
||||||
{ MODEL_3B, 256ull * MB },
|
{ MODEL_3B, 256ull * MB },
|
||||||
{ MODEL_7B, 512ull * MB },
|
{ MODEL_7B, 512ull * MB },
|
||||||
{ MODEL_13B, 512ull * MB },
|
{ MODEL_13B, 512ull * MB },
|
||||||
{ MODEL_30B, 512ull * MB },
|
{ MODEL_30B, 640ull * MB },
|
||||||
{ MODEL_65B, 1024ull * MB },
|
{ MODEL_65B, 1024ull * MB },
|
||||||
};
|
};
|
||||||
return k_sizes;
|
return k_sizes;
|
||||||
|
@ -92,7 +92,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
|
||||||
{ MODEL_3B, 256ull * MB },
|
{ MODEL_3B, 256ull * MB },
|
||||||
{ MODEL_7B, 512ull * MB },
|
{ MODEL_7B, 512ull * MB },
|
||||||
{ MODEL_13B, 512ull * MB },
|
{ MODEL_13B, 512ull * MB },
|
||||||
{ MODEL_30B, 512ull * MB },
|
{ MODEL_30B, 640ull * MB },
|
||||||
{ MODEL_65B, 1024ull * MB },
|
{ MODEL_65B, 1024ull * MB },
|
||||||
};
|
};
|
||||||
return k_sizes;
|
return k_sizes;
|
||||||
|
|
|
@ -98,7 +98,7 @@ void print_tok_vec(std::vector<float> &embd)
|
||||||
//we need to read more to determine
|
//we need to read more to determine
|
||||||
int32_t vocabsiz = 0;
|
int32_t vocabsiz = 0;
|
||||||
fin.read((char *) &vocabsiz, sizeof(int32_t));
|
fin.read((char *) &vocabsiz, sizeof(int32_t));
|
||||||
if(vocabsiz==4096) //actually the d_model for mpt
|
if(vocabsiz==4096 || vocabsiz==7168) //actually the d_model for mpt
|
||||||
{
|
{
|
||||||
fileformat = FileFormat::MPT_1;
|
fileformat = FileFormat::MPT_1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,7 +59,7 @@ static const std::map<e_model2, size_t> & MEM_REQ_SCRATCH0_2()
|
||||||
{ MODEL_UNKNOWN_2, 512ull * MB_2 },
|
{ MODEL_UNKNOWN_2, 512ull * MB_2 },
|
||||||
{ MODEL_7B_2, 512ull * MB_2 },
|
{ MODEL_7B_2, 512ull * MB_2 },
|
||||||
{ MODEL_13B_2, 512ull * MB_2 },
|
{ MODEL_13B_2, 512ull * MB_2 },
|
||||||
{ MODEL_30B_2, 512ull * MB_2 },
|
{ MODEL_30B_2, 640ull * MB_2 },
|
||||||
{ MODEL_65B_2, 1024ull * MB_2 },
|
{ MODEL_65B_2, 1024ull * MB_2 },
|
||||||
};
|
};
|
||||||
return k_sizes;
|
return k_sizes;
|
||||||
|
@ -71,7 +71,7 @@ static const std::map<e_model2, size_t> & MEM_REQ_SCRATCH1_2()
|
||||||
{ MODEL_UNKNOWN_2, 512ull * MB_2 },
|
{ MODEL_UNKNOWN_2, 512ull * MB_2 },
|
||||||
{ MODEL_7B_2, 512ull * MB_2 },
|
{ MODEL_7B_2, 512ull * MB_2 },
|
||||||
{ MODEL_13B_2, 512ull * MB_2 },
|
{ MODEL_13B_2, 512ull * MB_2 },
|
||||||
{ MODEL_30B_2, 512ull * MB_2 },
|
{ MODEL_30B_2, 640ull * MB_2 },
|
||||||
{ MODEL_65B_2, 1024ull * MB_2 },
|
{ MODEL_65B_2, 1024ull * MB_2 },
|
||||||
};
|
};
|
||||||
return k_sizes;
|
return k_sizes;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue