fixed segfault with clblast by reversing commit in issue https://github.com/ggerganov/llama.cpp/issues/4296

This commit is contained in:
Concedo 2023-12-03 00:56:00 +08:00
parent a8e66ef31c
commit c142c5634a
2 changed files with 7 additions and 14 deletions

View file

@ -99,7 +99,7 @@ static std::mutex concat_output_mtx;
static std::string concat_output = "";
static std::string concat_output_reader_copy = "";
const size_t extra_context_handle_fragmentation = 80;
const int extra_context_handle_fragmentation = 80;
inline bool IsNanCheck(float f)
{
@ -888,6 +888,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
llama_model_params model_params = llama_model_default_params();
llama_context_params llama_ctx_params = llama_context_default_params();
llama_ctx_params.n_ctx = clamped_max_context_length;
if(useContextShift)
{
llama_ctx_params.n_ctx += extra_context_handle_fragmentation;
}
//llama_ctx_paran_parts = -1;
llama_ctx_params.seed = -1;
llama_ctx_params.f16_kv = inputs.f16_kv;
@ -1447,18 +1452,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
params.n_threads_batch = n_blasthreads;
bool stream_sse = inputs.stream_sse;
if(params.n_ctx >= 256 && useContextShift && (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON))
{
if(params.n_ctx + extra_context_handle_fragmentation >= max_context_limit_at_load)
{
params.n_ctx -= extra_context_handle_fragmentation; //add some additional buffer to handle KV fragmentation
if(debugmode==1)
{
printf("\nTrue max context permitted: %d\n",params.n_ctx);
}
}
}
bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;
generation_finished = false; // Set current generation status

View file

@ -392,7 +392,7 @@ maxhordelen = 256
modelbusy = threading.Lock()
requestsinqueue = 0
defaultport = 5001
KcppVersion = "1.51"
KcppVersion = "1.51.1"
showdebug = True
showsamplerwarning = True
showmaxctxwarning = True