fixed segfault with clblast by reversing commit in issue https://github.com/ggerganov/llama.cpp/issues/4296
This commit is contained in:
parent
a8e66ef31c
commit
c142c5634a
2 changed files with 7 additions and 14 deletions
|
@ -99,7 +99,7 @@ static std::mutex concat_output_mtx;
|
||||||
static std::string concat_output = "";
|
static std::string concat_output = "";
|
||||||
static std::string concat_output_reader_copy = "";
|
static std::string concat_output_reader_copy = "";
|
||||||
|
|
||||||
const size_t extra_context_handle_fragmentation = 80;
|
const int extra_context_handle_fragmentation = 80;
|
||||||
|
|
||||||
inline bool IsNanCheck(float f)
|
inline bool IsNanCheck(float f)
|
||||||
{
|
{
|
||||||
|
@ -888,6 +888,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
llama_model_params model_params = llama_model_default_params();
|
llama_model_params model_params = llama_model_default_params();
|
||||||
llama_context_params llama_ctx_params = llama_context_default_params();
|
llama_context_params llama_ctx_params = llama_context_default_params();
|
||||||
llama_ctx_params.n_ctx = clamped_max_context_length;
|
llama_ctx_params.n_ctx = clamped_max_context_length;
|
||||||
|
if(useContextShift)
|
||||||
|
{
|
||||||
|
llama_ctx_params.n_ctx += extra_context_handle_fragmentation;
|
||||||
|
}
|
||||||
|
|
||||||
//llama_ctx_paran_parts = -1;
|
//llama_ctx_paran_parts = -1;
|
||||||
llama_ctx_params.seed = -1;
|
llama_ctx_params.seed = -1;
|
||||||
llama_ctx_params.f16_kv = inputs.f16_kv;
|
llama_ctx_params.f16_kv = inputs.f16_kv;
|
||||||
|
@ -1447,18 +1452,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
params.n_threads_batch = n_blasthreads;
|
params.n_threads_batch = n_blasthreads;
|
||||||
bool stream_sse = inputs.stream_sse;
|
bool stream_sse = inputs.stream_sse;
|
||||||
|
|
||||||
if(params.n_ctx >= 256 && useContextShift && (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON))
|
|
||||||
{
|
|
||||||
if(params.n_ctx + extra_context_handle_fragmentation >= max_context_limit_at_load)
|
|
||||||
{
|
|
||||||
params.n_ctx -= extra_context_handle_fragmentation; //add some additional buffer to handle KV fragmentation
|
|
||||||
if(debugmode==1)
|
|
||||||
{
|
|
||||||
printf("\nTrue max context permitted: %d\n",params.n_ctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;
|
bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;
|
||||||
|
|
||||||
generation_finished = false; // Set current generation status
|
generation_finished = false; // Set current generation status
|
||||||
|
|
|
@ -392,7 +392,7 @@ maxhordelen = 256
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.51"
|
KcppVersion = "1.51.1"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
showmaxctxwarning = True
|
showmaxctxwarning = True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue