add test example for embd input

2023-06-06 22:06:51 +08:00 · 2023-06-06 22:06:51 +08:00 · a91487093b
commit a91487093b
parent 20d5eef816
6 changed files with 75 additions and 10 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 *.o
 *.a
+*.so
 .DS_Store
 .build/
 .cache/
--- a/11
+++ b/11
@ -1,5 +1,5 @@
 # Define the default target now so that it is always the first target
-BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot
+BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot libembd_input.so embd_input_test

 ifdef LLAMA_BUILD_SERVER
 	BUILD_TARGETS += server
@ -250,6 +250,15 @@ save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.
 server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)

+libembd_input.so: examples/embd_input/embd_input.h examples/embd_input/embd_input_lib.cpp examples/embd_input/embd_input_test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
+	$(CXX) --shared $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
+
+
+embd_input_test: libembd_input.so examples/embd_input/embd_input_test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
+	$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.so,$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -Wl,-rpath=./ -lembd_input
+
+
+
 build-info.h: $(wildcard .git/index) scripts/build-info.sh
 	@sh scripts/build-info.sh > $@.tmp
 	@if ! cmp -s $@.tmp $@; then \
--- a/examples/embd_input/embd_input.h
+++ b/examples/embd_input/embd_input.h
@ -11,6 +11,7 @@ extern "C" {
 typedef struct MyModel {
    llama_context* ctx;
    gpt_params params;
+    int n_past = 0;
 } MyModel;


--- a/examples/embd_input/embd_input.py
+++ b/examples/embd_input/embd_input.py
@ -0,0 +1,47 @@
+import ctypes
+from ctypes import cdll, c_char_p, c_void_p, POINTER, c_float, c_int
+import numpy as np
+
+libc = cdll.LoadLibrary("./libembd_input.so")
+libc.sampling.restype=c_char_p
+libc.create_mymodel.restype=c_void_p
+libc.eval_string.argtypes=[c_void_p, c_char_p]
+libc.sampling.argtypes=[c_void_p]
+libc.eval_float.argtypes=[c_void_p, POINTER(c_float), c_int]
+
+
+class MyModel:
+    def __init__(self, args):
+        argc = len(args)
+        c_str = [c_char_p(i.encode()) for i in args]
+        args_c = (c_char_p * argc)(*c_str)
+        self.model = c_void_p(libc.create_mymodel(argc, args_c))
+        print("self.model", self.model)
+
+    def eval_float(self, x):
+        libc.eval_float(self.model, x.astype(np.float32).ctypes.data_as(POINTER(c_float)), x.shape[0])
+
+    def eval_string(self, x):
+        libc.eval_string(self.model, x.encode()) # c_char_p(x.encode()))
+
+    def eval_token(self, x):
+        libc.eval_id(self.model, x)
+
+    def sampling(self):
+        s = libc.sampling(self.model)
+        return s
+
+
+model = MyModel(["main", "--model", "../llama.cpp/models/ggml-vic13b-q4_1.bin"])
+print(model)
+model.eval_string("""There is a better way to deal with the formula, """)
+# model.eval_token(100)
+x = np.random.random((10,5120))# , dtype=np.float32)
+# print(x[0,0], x[0,1],x[1,0])
+model.eval_float(x)
+print(libc)
+
+for i in range(100):
+   print(model.sampling().decode(), end="")
+
+
--- a/examples/embd_input/embd_input_lib.cpp
+++ b/examples/embd_input/embd_input_lib.cpp
@ -96,6 +96,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) {
    struct MyModel* ret= new MyModel();
    ret->ctx = ctx;
    ret->params = params;
+    ret->n_past = 0;
    // printf("ctx: %d\n", ret->ctx);
    return ret;
 }
@ -106,11 +107,13 @@ bool eval_float(void* model, float* input, int N){
    llama_context* ctx = mymodel->ctx;
    gpt_params params = mymodel->params;
    int n_emb = llama_n_embd(ctx);
-    int n_past = 0;
-    for (int i = 0; i < (int) N; i += params.n_batch) {
+    int n_past = mymodel->n_past;
+    // printf("%f,%f\n", *input, *(input+1));
+    int n_batch = N; // params.n_batch;
+    for (int i = 0; i < (int) N; i += n_batch) {
        int n_eval = (int) N - i;
-        if (n_eval > params.n_batch) {
-            n_eval = params.n_batch;
+        if (n_eval > n_batch) {
+            n_eval = n_batch;
        }
        if (llama_eval_float(ctx, (input+i*n_emb), n_eval, n_past, params.n_threads)) {
            fprintf(stderr, "%s : failed to eval\n", __func__);
@ -118,6 +121,7 @@ bool eval_float(void* model, float* input, int N){
        }
        n_past += n_eval;
    }
+    mymodel->n_past = n_past;
    return true;
 }

@ -135,7 +139,7 @@ bool eval_tokens(void* model, std::vector<llama_token> tokens) {
    // printf("ctx2: %d\n", ctx);
    gpt_params params = mymodel->params;
    // printf("\n%d\n", params);
-    int n_past = 1;
+    int n_past = mymodel->n_past;
    for (int i = 0; i < (int) tokens.size(); i += params.n_batch) {
        int n_eval = (int) tokens.size() - i;
        if (n_eval > params.n_batch) {
@ -148,6 +152,7 @@ bool eval_tokens(void* model, std::vector<llama_token> tokens) {
        }
        n_past += n_eval;
    }
+    mymodel->n_past = n_past;
    return true;
 }

--- a/examples/embd_input/embd_input_test.cpp
+++ b/examples/embd_input/embd_input_test.cpp
@ -14,14 +14,16 @@ int main(int argc, char** argv) {
        data[i] = u(e);
    }

-    eval_string(mymodel, "111");
-    printf("eval float");
+    eval_string(mymodel, "user: what is the color of the flag of UN?");
+    // printf("eval float");
    eval_float(mymodel, data, N);
-    printf("eval float end\n");
+    eval_string(mymodel, "assistant:");
+    // printf("eval float end\n");
    eval_string(mymodel, mymodel->params.prompt.c_str());
-    for (int i=0;i < 500; i++) {
+    for (int i=0;i < 50; i++) {
        int id = sampling_id(mymodel);
        printf("%s", llama_token_to_str(mymodel->ctx, id));
+        fflush(stdout);
        eval_id(mymodel, id);
    }
    printf("\n");