From a91487093bfb4c13bf2aaafce8ab600f06c7cf4d Mon Sep 17 00:00:00 2001
From: ningshanwutuobang <ningshanwutuobang@gmail.com>
Date: Tue, 6 Jun 2023 22:06:51 +0800
Subject: [PATCH] add test example for embd input

---
 .gitignore                              |  1 +
 Makefile                                | 11 +++++-
 examples/embd_input/embd_input.h        |  1 +
 examples/embd_input/embd_input.py       | 47 +++++++++++++++++++++++++
 examples/embd_input/embd_input_lib.cpp  | 15 +++++---
 examples/embd_input/embd_input_test.cpp | 10 +++---
 6 files changed, 75 insertions(+), 10 deletions(-)
 create mode 100644 examples/embd_input/embd_input.py

diff --git a/.gitignore b/.gitignore
index d231f3ff8..88bf142b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 *.o
 *.a
+*.so
 .DS_Store
 .build/
 .cache/
diff --git a/Makefile b/Makefile
index 8e8d426c5..7685003c2 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 # Define the default target now so that it is always the first target
-BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot
+BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot libembd_input.so embd_input_test
 
 ifdef LLAMA_BUILD_SERVER
 	BUILD_TARGETS += server
@@ -250,6 +250,15 @@ save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.
 server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
 
+libembd_input.so: examples/embd_input/embd_input.h examples/embd_input/embd_input_lib.cpp examples/embd_input/embd_input_test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
+	$(CXX) --shared $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
+
+
+embd_input_test: libembd_input.so examples/embd_input/embd_input_test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
+	$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.so,$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -Wl,-rpath=./ -lembd_input
+
+
+
 build-info.h: $(wildcard .git/index) scripts/build-info.sh
 	@sh scripts/build-info.sh > $@.tmp
 	@if ! cmp -s $@.tmp $@; then \
diff --git a/examples/embd_input/embd_input.h b/examples/embd_input/embd_input.h
index f5deb5277..f45cee32d 100644
--- a/examples/embd_input/embd_input.h
+++ b/examples/embd_input/embd_input.h
@@ -11,6 +11,7 @@ extern "C" {
 typedef struct MyModel {
     llama_context* ctx;
     gpt_params params;
+    int n_past = 0;
 } MyModel;
 
 
diff --git a/examples/embd_input/embd_input.py b/examples/embd_input/embd_input.py
new file mode 100644
index 000000000..6d1abf736
--- /dev/null
+++ b/examples/embd_input/embd_input.py
@@ -0,0 +1,47 @@
+import ctypes
+from ctypes import cdll, c_char_p, c_void_p, POINTER, c_float, c_int
+import numpy as np
+
+libc = cdll.LoadLibrary("./libembd_input.so")
+libc.sampling.restype=c_char_p
+libc.create_mymodel.restype=c_void_p
+libc.eval_string.argtypes=[c_void_p, c_char_p]
+libc.sampling.argtypes=[c_void_p]
+libc.eval_float.argtypes=[c_void_p, POINTER(c_float), c_int]
+
+
+class MyModel:
+    def __init__(self, args):
+        argc = len(args)
+        c_str = [c_char_p(i.encode()) for i in args]
+        args_c = (c_char_p * argc)(*c_str)
+        self.model = c_void_p(libc.create_mymodel(argc, args_c))
+        print("self.model", self.model)
+
+    def eval_float(self, x):
+        libc.eval_float(self.model, x.astype(np.float32).ctypes.data_as(POINTER(c_float)), x.shape[0])
+
+    def eval_string(self, x):
+        libc.eval_string(self.model, x.encode()) # c_char_p(x.encode()))
+
+    def eval_token(self, x):
+        libc.eval_id(self.model, x)
+
+    def sampling(self):
+        s = libc.sampling(self.model)
+        return s
+
+
+model = MyModel(["main", "--model", "../llama.cpp/models/ggml-vic13b-q4_1.bin"])
+print(model)
+model.eval_string("""There is a better way to deal with the formula, """)
+# model.eval_token(100)
+x = np.random.random((10,5120))# , dtype=np.float32)
+# print(x[0,0], x[0,1],x[1,0])
+model.eval_float(x)
+print(libc)
+
+for i in range(100):
+   print(model.sampling().decode(), end="")
+
+
diff --git a/examples/embd_input/embd_input_lib.cpp b/examples/embd_input/embd_input_lib.cpp
index a9edc120e..cb7e5d189 100644
--- a/examples/embd_input/embd_input_lib.cpp
+++ b/examples/embd_input/embd_input_lib.cpp
@@ -96,6 +96,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) {
     struct MyModel* ret= new MyModel();
     ret->ctx = ctx;
     ret->params = params;
+    ret->n_past = 0;
     // printf("ctx: %d\n", ret->ctx);
     return ret;
 }
@@ -106,11 +107,13 @@ bool eval_float(void* model, float* input, int N){
     llama_context* ctx = mymodel->ctx;
     gpt_params params = mymodel->params;
     int n_emb = llama_n_embd(ctx);
-    int n_past = 0;
-    for (int i = 0; i < (int) N; i += params.n_batch) {
+    int n_past = mymodel->n_past;
+    // printf("%f,%f\n", *input, *(input+1));
+    int n_batch = N; // params.n_batch;
+    for (int i = 0; i < (int) N; i += n_batch) {
         int n_eval = (int) N - i;
-        if (n_eval > params.n_batch) {
-            n_eval = params.n_batch;
+        if (n_eval > n_batch) {
+            n_eval = n_batch;
         }
         if (llama_eval_float(ctx, (input+i*n_emb), n_eval, n_past, params.n_threads)) {
             fprintf(stderr, "%s : failed to eval\n", __func__);
@@ -118,6 +121,7 @@ bool eval_float(void* model, float* input, int N){
         }
         n_past += n_eval;
     }
+    mymodel->n_past = n_past;
     return true;
 }
 
@@ -135,7 +139,7 @@ bool eval_tokens(void* model, std::vector<llama_token> tokens) {
     // printf("ctx2: %d\n", ctx);
     gpt_params params = mymodel->params;
     // printf("\n%d\n", params);
-    int n_past = 1;
+    int n_past = mymodel->n_past;
     for (int i = 0; i < (int) tokens.size(); i += params.n_batch) {
         int n_eval = (int) tokens.size() - i;
         if (n_eval > params.n_batch) {
@@ -148,6 +152,7 @@ bool eval_tokens(void* model, std::vector<llama_token> tokens) {
         }
         n_past += n_eval;
     }
+    mymodel->n_past = n_past;
     return true;
 }
 
diff --git a/examples/embd_input/embd_input_test.cpp b/examples/embd_input/embd_input_test.cpp
index 96ce130fd..7cd094e35 100644
--- a/examples/embd_input/embd_input_test.cpp
+++ b/examples/embd_input/embd_input_test.cpp
@@ -14,14 +14,16 @@ int main(int argc, char** argv) {
         data[i] = u(e);
     }
 
-    eval_string(mymodel, "111");
-    printf("eval float");
+    eval_string(mymodel, "user: what is the color of the flag of UN?");
+    // printf("eval float");
     eval_float(mymodel, data, N);
-    printf("eval float end\n");
+    eval_string(mymodel, "assistant:");
+    // printf("eval float end\n");
     eval_string(mymodel, mymodel->params.prompt.c_str());
-    for (int i=0;i < 500; i++) {
+    for (int i=0;i < 50; i++) {
         int id = sampling_id(mymodel);
         printf("%s", llama_token_to_str(mymodel->ctx, id));
+        fflush(stdout);
         eval_id(mymodel, id);
     }
     printf("\n");