diff --git a/expose.cpp b/expose.cpp
index f85f411c4..6b2e36feb 100644
--- a/expose.cpp
+++ b/expose.cpp
@@ -220,12 +220,12 @@ extern "C"
         return generation_finished;
     }
 
-    float get_prompt_eval_time() {
-        return prompt_eval_time;
+    float get_last_eval_time() {
+        return last_eval_time;
     }
 
-    float get_prompt_process_time() {
-        return prompt_process_time;
+    float get_last_process_time() {
+        return last_process_time;
     }
 
     const char* get_pending_output() {
diff --git a/expose.h b/expose.h
index fc6949a52..2e88946d7 100644
--- a/expose.h
+++ b/expose.h
@@ -72,5 +72,5 @@ extern std::string lora_filename;
 extern std::string lora_base;
 extern std::vector<std::string> generated_tokens;
 extern bool generation_finished;
-extern float prompt_eval_time;
-extern float prompt_process_time;
+extern float last_eval_time;
+extern float last_process_time;
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index ffc0017a2..a6d65133b 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -33,8 +33,8 @@ std::string executable_path = "";
 std::string lora_filename = "";
 std::string lora_base = "";
 bool generation_finished;
-float prompt_process_time;
-float prompt_eval_time;
+float last_process_time = 0;
+float last_eval_time = 0;
 std::vector<std::string> generated_tokens;
 
 //return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
@@ -869,8 +869,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     bool stream_sse = inputs.stream_sse;
 
     generation_finished = false; // Set current generation status
-    prompt_eval_time = 0;
-    prompt_process_time = 0;
     generated_tokens.clear(); // New Generation, new tokens
 
     if (params.repeat_last_n < 1)
@@ -1449,8 +1447,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     fflush(stdout);
     output.status = 1;
     generation_finished = true;
-    prompt_eval_time = pt2;
-    prompt_process_time = pt1;
+    last_eval_time = pt2;
+    last_process_time = pt1;
     snprintf(output.text, sizeof(output.text), "%s", concat_output.c_str());
 
     return output;
diff --git a/koboldcpp.py b/koboldcpp.py
index fa23e2645..8ca30f9de 100755
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -161,8 +161,8 @@ def init_library():
     handle.new_token.argtypes = [ctypes.c_int]
     handle.get_stream_count.restype = ctypes.c_int
     handle.has_finished.restype = ctypes.c_bool
-    handle.get_prompt_eval_time.restype = ctypes.c_float
-    handle.get_prompt_process_time.restype = ctypes.c_float
+    handle.get_last_eval_time.restype = ctypes.c_float
+    handle.get_last_process_time.restype = ctypes.c_float
     handle.abort_generate.restype = ctypes.c_bool
     handle.get_pending_output.restype = ctypes.c_char_p
 
@@ -455,6 +455,11 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
         elif self.path.endswith(('/api/extra/version')):
             response_body = (json.dumps({"result":"KoboldCpp","version":KcppVersion}).encode())
 
+        elif self.path.endswith(('/api/extra/perf')):
+            lastp = handle.get_last_process_time()
+            laste = handle.get_last_eval_time()
+            response_body = (json.dumps({"last_process":lastp,"last_eval":laste}).encode())
+
         if response_body is None:
             self.send_response(404)
             self.end_headers()
@@ -532,8 +537,6 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
             newprompt = fullprompt
 
             gen = asyncio.run(self.handle_request(genparams, newprompt, basic_api_flag, kai_sse_stream_flag))
-            gen['prompt_process_time'] = handle.get_prompt_process_time()
-            gen['prompt_eval_time'] = handle.get_prompt_eval_time()
             try:
                 self.send_response(200)
                 self.end_headers()