added stop reason in the perf endpoint
This commit is contained in:
parent
910744e2c0
commit
280abaf029
4 changed files with 18 additions and 1 deletions
|
@ -229,6 +229,9 @@ extern "C"
|
||||||
int get_last_token_count() {
|
int get_last_token_count() {
|
||||||
return last_token_count;
|
return last_token_count;
|
||||||
}
|
}
|
||||||
|
int get_last_stop_reason() {
|
||||||
|
return (int)last_stop_reason;
|
||||||
|
}
|
||||||
|
|
||||||
const char* get_pending_output() {
|
const char* get_pending_output() {
|
||||||
return gpttype_get_pending_output().c_str();
|
return gpttype_get_pending_output().c_str();
|
||||||
|
|
8
expose.h
8
expose.h
|
@ -14,6 +14,13 @@ enum samplers
|
||||||
KCPP_SAMPLER_REP_PEN=6,
|
KCPP_SAMPLER_REP_PEN=6,
|
||||||
KCPP_SAMPLER_MAX
|
KCPP_SAMPLER_MAX
|
||||||
};
|
};
|
||||||
|
enum stop_reason
|
||||||
|
{
|
||||||
|
INVALID=-1,
|
||||||
|
OUT_OF_TOKENS=0,
|
||||||
|
EOS_TOKEN=1,
|
||||||
|
CUSTOM_STOPPER=2,
|
||||||
|
};
|
||||||
struct load_model_inputs
|
struct load_model_inputs
|
||||||
{
|
{
|
||||||
const int threads;
|
const int threads;
|
||||||
|
@ -76,3 +83,4 @@ extern bool generation_finished;
|
||||||
extern float last_eval_time;
|
extern float last_eval_time;
|
||||||
extern float last_process_time;
|
extern float last_process_time;
|
||||||
extern int last_token_count;
|
extern int last_token_count;
|
||||||
|
extern stop_reason last_stop_reason;
|
||||||
|
|
|
@ -36,6 +36,7 @@ bool generation_finished;
|
||||||
float last_process_time = 0;
|
float last_process_time = 0;
|
||||||
float last_eval_time = 0;
|
float last_eval_time = 0;
|
||||||
int last_token_count = 0;
|
int last_token_count = 0;
|
||||||
|
stop_reason last_stop_reason = stop_reason::INVALID;
|
||||||
std::vector<std::string> generated_tokens;
|
std::vector<std::string> generated_tokens;
|
||||||
|
|
||||||
//return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
|
//return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
|
||||||
|
@ -871,6 +872,7 @@ const std::string & gpttype_get_pending_output()
|
||||||
generation_outputs gpttype_generate(const generation_inputs inputs, generation_outputs &output)
|
generation_outputs gpttype_generate(const generation_inputs inputs, generation_outputs &output)
|
||||||
{
|
{
|
||||||
concat_output = "";
|
concat_output = "";
|
||||||
|
last_stop_reason = stop_reason::OUT_OF_TOKENS;
|
||||||
stop_sequence.clear();
|
stop_sequence.clear();
|
||||||
for(int x=0;x<stop_token_max;++x)
|
for(int x=0;x<stop_token_max;++x)
|
||||||
{
|
{
|
||||||
|
@ -1433,6 +1435,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
stopper_unused_tokens = remaining_tokens;
|
stopper_unused_tokens = remaining_tokens;
|
||||||
printf("\n(EOS token triggered!)");
|
printf("\n(EOS token triggered!)");
|
||||||
remaining_tokens = 0;
|
remaining_tokens = 0;
|
||||||
|
last_stop_reason = stop_reason::EOS_TOKEN;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &matched : stop_sequence)
|
for (const auto &matched : stop_sequence)
|
||||||
|
@ -1445,6 +1448,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
{
|
{
|
||||||
printf("\n(Stop sequence triggered: <%s>)", matched.c_str());
|
printf("\n(Stop sequence triggered: <%s>)", matched.c_str());
|
||||||
}
|
}
|
||||||
|
last_stop_reason = stop_reason::CUSTOM_STOPPER;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -165,6 +165,7 @@ def init_library():
|
||||||
handle.get_last_eval_time.restype = ctypes.c_float
|
handle.get_last_eval_time.restype = ctypes.c_float
|
||||||
handle.get_last_process_time.restype = ctypes.c_float
|
handle.get_last_process_time.restype = ctypes.c_float
|
||||||
handle.get_last_token_count.restype = ctypes.c_int
|
handle.get_last_token_count.restype = ctypes.c_int
|
||||||
|
handle.get_last_stop_reason.restype = ctypes.c_int
|
||||||
handle.abort_generate.restype = ctypes.c_bool
|
handle.abort_generate.restype = ctypes.c_bool
|
||||||
handle.get_pending_output.restype = ctypes.c_char_p
|
handle.get_pending_output.restype = ctypes.c_char_p
|
||||||
|
|
||||||
|
@ -470,7 +471,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
lastp = handle.get_last_process_time()
|
lastp = handle.get_last_process_time()
|
||||||
laste = handle.get_last_eval_time()
|
laste = handle.get_last_eval_time()
|
||||||
lastc = handle.get_last_token_count()
|
lastc = handle.get_last_token_count()
|
||||||
response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc}).encode())
|
stopreason = handle.get_last_stop_reason()
|
||||||
|
response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "stop_reason":stopreason}).encode())
|
||||||
|
|
||||||
if response_body is None:
|
if response_body is None:
|
||||||
self.send_response(404)
|
self.send_response(404)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue