Fix to skip GPU offloading so falcon models work correctly
This commit is contained in:
parent
d4c22a8b02
commit
1301bd7e29
2 changed files with 13 additions and 24 deletions
33
expose.cpp
33
expose.cpp
|
@ -126,19 +126,6 @@ extern "C"
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2)
|
|
||||||
{
|
|
||||||
printf("\n---\nIdentified as RWKV model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
|
||||||
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
|
||||||
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5 || file_format==FileFormat::NEOX_6 || file_format==FileFormat::NEOX_7)
|
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5 || file_format==FileFormat::NEOX_6 || file_format==FileFormat::NEOX_7)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
|
@ -173,22 +160,24 @@ extern "C"
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::MPT_1)
|
|
||||||
{
|
|
||||||
printf("\n---\nIdentified as MPT model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
|
||||||
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
|
||||||
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return true;
|
if(file_format==FileFormat::MPT_1)
|
||||||
|
{
|
||||||
|
printf("\n---\nIdentified as MPT model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
}
|
}
|
||||||
|
else if(file_format==FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2)
|
||||||
|
{
|
||||||
|
printf("\n---\nIdentified as RWKV model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
|
}
|
||||||
|
else if(file_format==FileFormat::GGUF_FALCON)
|
||||||
|
{
|
||||||
|
printf("\n---\nIdentified as FALCON model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as LLAMA model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as LLAMA model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
|
}
|
||||||
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
||||||
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
|
|
|
@ -6385,7 +6385,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please
|
||||||
//version 1.2.4 and later supports unban tokens
|
//version 1.2.4 and later supports unban tokens
|
||||||
if (kobold_endpoint_version && kobold_endpoint_version != "" && compare_version_str(kobold_endpoint_version, "1.2.3") > 0)
|
if (kobold_endpoint_version && kobold_endpoint_version != "" && compare_version_str(kobold_endpoint_version, "1.2.3") > 0)
|
||||||
{
|
{
|
||||||
submit_payload.use_default_badwordids = (localsettings.unban_tokens?true:false);
|
submit_payload.use_default_badwordids = (localsettings.unban_tokens?false:true);
|
||||||
}
|
}
|
||||||
|
|
||||||
let pseudostreaming = should_use_pseudostreaming();
|
let pseudostreaming = should_use_pseudostreaming();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue