explore quiet mode
This commit is contained in:
parent
a6eb9b8010
commit
bffa78116d
3 changed files with 19 additions and 11 deletions
1
expose.h
1
expose.h
|
@ -76,6 +76,7 @@ struct generation_inputs
|
||||||
const bool stream_sse;
|
const bool stream_sse;
|
||||||
const char * grammar;
|
const char * grammar;
|
||||||
const bool grammar_retain_state;
|
const bool grammar_retain_state;
|
||||||
|
const bool quiet = false;
|
||||||
};
|
};
|
||||||
struct generation_outputs
|
struct generation_outputs
|
||||||
{
|
{
|
||||||
|
|
|
@ -1442,6 +1442,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
params.n_threads_batch = n_blasthreads;
|
params.n_threads_batch = n_blasthreads;
|
||||||
bool stream_sse = inputs.stream_sse;
|
bool stream_sse = inputs.stream_sse;
|
||||||
|
|
||||||
|
bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;
|
||||||
|
|
||||||
generation_finished = false; // Set current generation status
|
generation_finished = false; // Set current generation status
|
||||||
generated_tokens.clear(); // New Generation, new tokens
|
generated_tokens.clear(); // New Generation, new tokens
|
||||||
|
|
||||||
|
@ -1695,7 +1697,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
|
printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
if(debugmode!=-1)
|
if(allow_regular_prints)
|
||||||
{
|
{
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
@ -1716,7 +1718,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
// predict
|
// predict
|
||||||
unsigned int embdsize = embd.size();
|
unsigned int embdsize = embd.size();
|
||||||
//print progress
|
//print progress
|
||||||
if (!startedsampling && debugmode!=-1)
|
if (!startedsampling && allow_regular_prints)
|
||||||
{
|
{
|
||||||
printf("\rProcessing Prompt%s (%d / %zu tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());
|
printf("\rProcessing Prompt%s (%d / %zu tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());
|
||||||
}
|
}
|
||||||
|
@ -1835,7 +1837,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
params.n_threads = original_threads;
|
params.n_threads = original_threads;
|
||||||
time1 = timer_check();
|
time1 = timer_check();
|
||||||
timer_start();
|
timer_start();
|
||||||
if(debugmode!=-1)
|
if(allow_regular_prints)
|
||||||
{
|
{
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
@ -1910,7 +1912,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
concat_output_mtx.unlock();
|
concat_output_mtx.unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (startedsampling && debugmode!=-1)
|
if (startedsampling && allow_regular_prints)
|
||||||
{
|
{
|
||||||
printf("\rGenerating (%d / %d tokens)", (params.n_predict - remaining_tokens), params.n_predict);
|
printf("\rGenerating (%d / %d tokens)", (params.n_predict - remaining_tokens), params.n_predict);
|
||||||
}
|
}
|
||||||
|
@ -1935,7 +1937,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
if(inputs.unban_tokens_rt && id==eosID)
|
if(inputs.unban_tokens_rt && id==eosID)
|
||||||
{
|
{
|
||||||
stopper_unused_tokens = remaining_tokens;
|
stopper_unused_tokens = remaining_tokens;
|
||||||
if(debugmode!=-1)
|
if(allow_regular_prints)
|
||||||
{
|
{
|
||||||
printf("\n(EOS token triggered!)");
|
printf("\n(EOS token triggered!)");
|
||||||
}
|
}
|
||||||
|
@ -1949,7 +1951,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
{
|
{
|
||||||
stopper_unused_tokens = remaining_tokens;
|
stopper_unused_tokens = remaining_tokens;
|
||||||
remaining_tokens = 0;
|
remaining_tokens = 0;
|
||||||
if(debugmode!=-1)
|
if(allow_regular_prints)
|
||||||
{
|
{
|
||||||
auto match_clean = matched;
|
auto match_clean = matched;
|
||||||
replace_all(match_clean, "\n", "\\n");
|
replace_all(match_clean, "\n", "\\n");
|
||||||
|
|
15
koboldcpp.py
15
koboldcpp.py
|
@ -70,7 +70,8 @@ class generation_inputs(ctypes.Structure):
|
||||||
("stop_sequence", ctypes.c_char_p * stop_token_max),
|
("stop_sequence", ctypes.c_char_p * stop_token_max),
|
||||||
("stream_sse", ctypes.c_bool),
|
("stream_sse", ctypes.c_bool),
|
||||||
("grammar", ctypes.c_char_p),
|
("grammar", ctypes.c_char_p),
|
||||||
("grammar_retain_state", ctypes.c_bool)]
|
("grammar_retain_state", ctypes.c_bool),
|
||||||
|
("quiet", ctypes.c_bool)]
|
||||||
|
|
||||||
class generation_outputs(ctypes.Structure):
|
class generation_outputs(ctypes.Structure):
|
||||||
_fields_ = [("status", ctypes.c_int),
|
_fields_ = [("status", ctypes.c_int),
|
||||||
|
@ -299,7 +300,7 @@ def load_model(model_filename):
|
||||||
ret = handle.load_model(inputs)
|
ret = handle.load_model(inputs)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False):
|
def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False):
|
||||||
global maxctx, args, currentusergenkey, totalgens
|
global maxctx, args, currentusergenkey, totalgens
|
||||||
inputs = generation_inputs()
|
inputs = generation_inputs()
|
||||||
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
|
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
|
||||||
|
@ -323,6 +324,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
|
||||||
inputs.rep_pen = rep_pen
|
inputs.rep_pen = rep_pen
|
||||||
inputs.rep_pen_range = rep_pen_range
|
inputs.rep_pen_range = rep_pen_range
|
||||||
inputs.stream_sse = stream_sse
|
inputs.stream_sse = stream_sse
|
||||||
|
inputs.quiet = quiet
|
||||||
inputs.grammar = grammar.encode("UTF-8")
|
inputs.grammar = grammar.encode("UTF-8")
|
||||||
inputs.grammar_retain_state = grammar_retain_state
|
inputs.grammar_retain_state = grammar_retain_state
|
||||||
inputs.unban_tokens_rt = not use_default_badwordsids
|
inputs.unban_tokens_rt = not use_default_badwordsids
|
||||||
|
@ -425,6 +427,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
|
|
||||||
async def generate_text(self, genparams, api_format, stream_flag):
|
async def generate_text(self, genparams, api_format, stream_flag):
|
||||||
global friendlymodelname
|
global friendlymodelname
|
||||||
|
is_quiet = genparams.get('quiet', False)
|
||||||
def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat
|
def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat
|
||||||
if api_format==1:
|
if api_format==1:
|
||||||
genparams["prompt"] = genparams.get('text', "")
|
genparams["prompt"] = genparams.get('text', "")
|
||||||
|
@ -503,7 +506,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
grammar=genparams.get('grammar', ''),
|
grammar=genparams.get('grammar', ''),
|
||||||
grammar_retain_state = genparams.get('grammar_retain_state', False),
|
grammar_retain_state = genparams.get('grammar_retain_state', False),
|
||||||
genkey=genparams.get('genkey', ''),
|
genkey=genparams.get('genkey', ''),
|
||||||
trimstop=genparams.get('trim_stop', False))
|
trimstop=genparams.get('trim_stop', False),
|
||||||
|
quiet=is_quiet)
|
||||||
|
|
||||||
recvtxt = ""
|
recvtxt = ""
|
||||||
if stream_flag:
|
if stream_flag:
|
||||||
|
@ -513,7 +517,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
else:
|
else:
|
||||||
recvtxt = run_blocking()
|
recvtxt = run_blocking()
|
||||||
|
|
||||||
if args.debugmode!=-1:
|
if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1:
|
||||||
utfprint("\nOutput: " + recvtxt)
|
utfprint("\nOutput: " + recvtxt)
|
||||||
|
|
||||||
if api_format==1:
|
if api_format==1:
|
||||||
|
@ -809,7 +813,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
utfprint("Body Err: " + str(body))
|
utfprint("Body Err: " + str(body))
|
||||||
return self.send_response(503)
|
return self.send_response(503)
|
||||||
|
|
||||||
if args.debugmode!=-1:
|
is_quiet = genparams.get('quiet', False)
|
||||||
|
if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1:
|
||||||
utfprint("\nInput: " + json.dumps(genparams))
|
utfprint("\nInput: " + json.dumps(genparams))
|
||||||
|
|
||||||
if args.foreground:
|
if args.foreground:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue