added token count, updated lite

This commit is contained in:
Concedo 2023-07-20 14:41:06 +08:00
parent c49a469a79
commit 39dc1a46c4
5 changed files with 10 additions and 3 deletions

View file

@ -223,10 +223,12 @@ extern "C"
float get_last_eval_time() { float get_last_eval_time() {
return last_eval_time; return last_eval_time;
} }
float get_last_process_time() { float get_last_process_time() {
return last_process_time; return last_process_time;
} }
int get_last_token_count() {
return last_token_count;
}
const char* get_pending_output() { const char* get_pending_output() {
return gpttype_get_pending_output().c_str(); return gpttype_get_pending_output().c_str();

View file

@ -75,3 +75,4 @@ extern std::vector<std::string> generated_tokens;
extern bool generation_finished; extern bool generation_finished;
extern float last_eval_time; extern float last_eval_time;
extern float last_process_time; extern float last_process_time;
extern int last_token_count;

View file

@ -35,6 +35,7 @@ std::string lora_base = "";
bool generation_finished; bool generation_finished;
float last_process_time = 0; float last_process_time = 0;
float last_eval_time = 0; float last_eval_time = 0;
int last_token_count = 0;
std::vector<std::string> generated_tokens; std::vector<std::string> generated_tokens;
//return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt) //return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
@ -1471,6 +1472,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
generation_finished = true; generation_finished = true;
last_eval_time = pt2; last_eval_time = pt2;
last_process_time = pt1; last_process_time = pt1;
last_token_count = realnpredict;
snprintf(output.text, sizeof(output.text), "%s", concat_output.c_str()); snprintf(output.text, sizeof(output.text), "%s", concat_output.c_str());
return output; return output;

File diff suppressed because one or more lines are too long

View file

@ -164,6 +164,7 @@ def init_library():
handle.has_finished.restype = ctypes.c_bool handle.has_finished.restype = ctypes.c_bool
handle.get_last_eval_time.restype = ctypes.c_float handle.get_last_eval_time.restype = ctypes.c_float
handle.get_last_process_time.restype = ctypes.c_float handle.get_last_process_time.restype = ctypes.c_float
handle.get_last_token_count.restype = ctypes.c_int
handle.abort_generate.restype = ctypes.c_bool handle.abort_generate.restype = ctypes.c_bool
handle.get_pending_output.restype = ctypes.c_char_p handle.get_pending_output.restype = ctypes.c_char_p
@ -465,7 +466,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
elif self.path.endswith(('/api/extra/perf')): elif self.path.endswith(('/api/extra/perf')):
lastp = handle.get_last_process_time() lastp = handle.get_last_process_time()
laste = handle.get_last_eval_time() laste = handle.get_last_eval_time()
response_body = (json.dumps({"last_process":lastp,"last_eval":laste}).encode()) lastc = handle.get_last_token_count()
response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc}).encode())
if response_body is None: if response_body is None:
self.send_response(404) self.send_response(404)