added token count, updated lite
This commit is contained in:
parent
c49a469a79
commit
39dc1a46c4
5 changed files with 10 additions and 3 deletions
|
@ -223,10 +223,12 @@ extern "C"
|
||||||
float get_last_eval_time() {
|
float get_last_eval_time() {
|
||||||
return last_eval_time;
|
return last_eval_time;
|
||||||
}
|
}
|
||||||
|
|
||||||
float get_last_process_time() {
|
float get_last_process_time() {
|
||||||
return last_process_time;
|
return last_process_time;
|
||||||
}
|
}
|
||||||
|
int get_last_token_count() {
|
||||||
|
return last_token_count;
|
||||||
|
}
|
||||||
|
|
||||||
const char* get_pending_output() {
|
const char* get_pending_output() {
|
||||||
return gpttype_get_pending_output().c_str();
|
return gpttype_get_pending_output().c_str();
|
||||||
|
|
1
expose.h
1
expose.h
|
@ -75,3 +75,4 @@ extern std::vector<std::string> generated_tokens;
|
||||||
extern bool generation_finished;
|
extern bool generation_finished;
|
||||||
extern float last_eval_time;
|
extern float last_eval_time;
|
||||||
extern float last_process_time;
|
extern float last_process_time;
|
||||||
|
extern int last_token_count;
|
||||||
|
|
|
@ -35,6 +35,7 @@ std::string lora_base = "";
|
||||||
bool generation_finished;
|
bool generation_finished;
|
||||||
float last_process_time = 0;
|
float last_process_time = 0;
|
||||||
float last_eval_time = 0;
|
float last_eval_time = 0;
|
||||||
|
int last_token_count = 0;
|
||||||
std::vector<std::string> generated_tokens;
|
std::vector<std::string> generated_tokens;
|
||||||
|
|
||||||
//return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
|
//return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
|
||||||
|
@ -1471,6 +1472,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
generation_finished = true;
|
generation_finished = true;
|
||||||
last_eval_time = pt2;
|
last_eval_time = pt2;
|
||||||
last_process_time = pt1;
|
last_process_time = pt1;
|
||||||
|
last_token_count = realnpredict;
|
||||||
snprintf(output.text, sizeof(output.text), "%s", concat_output.c_str());
|
snprintf(output.text, sizeof(output.text), "%s", concat_output.c_str());
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -164,6 +164,7 @@ def init_library():
|
||||||
handle.has_finished.restype = ctypes.c_bool
|
handle.has_finished.restype = ctypes.c_bool
|
||||||
handle.get_last_eval_time.restype = ctypes.c_float
|
handle.get_last_eval_time.restype = ctypes.c_float
|
||||||
handle.get_last_process_time.restype = ctypes.c_float
|
handle.get_last_process_time.restype = ctypes.c_float
|
||||||
|
handle.get_last_token_count.restype = ctypes.c_int
|
||||||
handle.abort_generate.restype = ctypes.c_bool
|
handle.abort_generate.restype = ctypes.c_bool
|
||||||
handle.get_pending_output.restype = ctypes.c_char_p
|
handle.get_pending_output.restype = ctypes.c_char_p
|
||||||
|
|
||||||
|
@ -465,7 +466,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
elif self.path.endswith(('/api/extra/perf')):
|
elif self.path.endswith(('/api/extra/perf')):
|
||||||
lastp = handle.get_last_process_time()
|
lastp = handle.get_last_process_time()
|
||||||
laste = handle.get_last_eval_time()
|
laste = handle.get_last_eval_time()
|
||||||
response_body = (json.dumps({"last_process":lastp,"last_eval":laste}).encode())
|
lastc = handle.get_last_token_count()
|
||||||
|
response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc}).encode())
|
||||||
|
|
||||||
if response_body is None:
|
if response_body is None:
|
||||||
self.send_response(404)
|
self.send_response(404)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue