diff --git a/koboldcpp.py b/koboldcpp.py index 6c669b269..2d0e094d5 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -283,7 +283,7 @@ maxhordectx = 1024 maxhordelen = 256 modelbusy = threading.Lock() defaultport = 5001 -KcppVersion = "1.36" +KcppVersion = "1.37" showdebug = True showsamplerwarning = True exitcounter = 0 diff --git a/llama.cpp b/llama.cpp index 30431ac6a..000e65c6b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -106,11 +106,11 @@ static const std::map & MEM_REQ_SCRATCH0(int n_ctx) { static std::map k_sizes = { /* empirical scaling, still a guess */ - { MODEL_3B, ((size_t) n_ctx / 16ull + 180ull) * MB }, - { MODEL_7B, ((size_t) n_ctx / 16ull + 320ull) * MB }, - { MODEL_13B, ((size_t) n_ctx / 12ull + 460ull) * MB }, - { MODEL_30B, ((size_t) n_ctx / 10ull + 620ull) * MB }, - { MODEL_65B, ((size_t) n_ctx / 8ull + 860ull) * MB }, + { MODEL_3B, ((size_t) n_ctx / 11ull + 320ull) * MB }, + { MODEL_7B, ((size_t) n_ctx / 11ull + 440ull) * MB }, + { MODEL_13B, ((size_t) n_ctx / 10ull + 560ull) * MB }, + { MODEL_30B, ((size_t) n_ctx / 9ull + 680ull) * MB }, + { MODEL_65B, ((size_t) n_ctx / 8ull + 1000ull) * MB }, }; return k_sizes; }