From a91487093bfb4c13bf2aaafce8ab600f06c7cf4d Mon Sep 17 00:00:00 2001 From: ningshanwutuobang Date: Tue, 6 Jun 2023 22:06:51 +0800 Subject: [PATCH] add test example for embd input --- .gitignore | 1 + Makefile | 11 +++++- examples/embd_input/embd_input.h | 1 + examples/embd_input/embd_input.py | 47 +++++++++++++++++++++++++ examples/embd_input/embd_input_lib.cpp | 15 +++++--- examples/embd_input/embd_input_test.cpp | 10 +++--- 6 files changed, 75 insertions(+), 10 deletions(-) create mode 100644 examples/embd_input/embd_input.py diff --git a/.gitignore b/.gitignore index d231f3ff8..88bf142b9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.o *.a +*.so .DS_Store .build/ .cache/ diff --git a/Makefile b/Makefile index 8e8d426c5..7685003c2 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Define the default target now so that it is always the first target -BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot +BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot libembd_input.so embd_input_test ifdef LLAMA_BUILD_SERVER BUILD_TARGETS += server @@ -250,6 +250,15 @@ save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml. server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) +libembd_input.so: examples/embd_input/embd_input.h examples/embd_input/embd_input_lib.cpp examples/embd_input/embd_input_test.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) --shared $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) + + +embd_input_test: libembd_input.so examples/embd_input/embd_input_test.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.so,$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -Wl,-rpath=./ -lembd_input + + + build-info.h: $(wildcard .git/index) scripts/build-info.sh @sh scripts/build-info.sh > $@.tmp @if ! cmp -s $@.tmp $@; then \ diff --git a/examples/embd_input/embd_input.h b/examples/embd_input/embd_input.h index f5deb5277..f45cee32d 100644 --- a/examples/embd_input/embd_input.h +++ b/examples/embd_input/embd_input.h @@ -11,6 +11,7 @@ extern "C" { typedef struct MyModel { llama_context* ctx; gpt_params params; + int n_past = 0; } MyModel; diff --git a/examples/embd_input/embd_input.py b/examples/embd_input/embd_input.py new file mode 100644 index 000000000..6d1abf736 --- /dev/null +++ b/examples/embd_input/embd_input.py @@ -0,0 +1,47 @@ +import ctypes +from ctypes import cdll, c_char_p, c_void_p, POINTER, c_float, c_int +import numpy as np + +libc = cdll.LoadLibrary("./libembd_input.so") +libc.sampling.restype=c_char_p +libc.create_mymodel.restype=c_void_p +libc.eval_string.argtypes=[c_void_p, c_char_p] +libc.sampling.argtypes=[c_void_p] +libc.eval_float.argtypes=[c_void_p, POINTER(c_float), c_int] + + +class MyModel: + def __init__(self, args): + argc = len(args) + c_str = [c_char_p(i.encode()) for i in args] + args_c = (c_char_p * argc)(*c_str) + self.model = c_void_p(libc.create_mymodel(argc, args_c)) + print("self.model", self.model) + + def eval_float(self, x): + libc.eval_float(self.model, x.astype(np.float32).ctypes.data_as(POINTER(c_float)), x.shape[0]) + + def eval_string(self, x): + libc.eval_string(self.model, x.encode()) # c_char_p(x.encode())) + + def eval_token(self, x): + libc.eval_id(self.model, x) + + def sampling(self): + s = libc.sampling(self.model) + return s + + +model = MyModel(["main", "--model", "../llama.cpp/models/ggml-vic13b-q4_1.bin"]) +print(model) +model.eval_string("""There is a better way to deal with the formula, """) +# model.eval_token(100) +x = np.random.random((10,5120))# , dtype=np.float32) +# print(x[0,0], x[0,1],x[1,0]) +model.eval_float(x) +print(libc) + +for i in range(100): + print(model.sampling().decode(), end="") + + diff --git a/examples/embd_input/embd_input_lib.cpp b/examples/embd_input/embd_input_lib.cpp index a9edc120e..cb7e5d189 100644 --- a/examples/embd_input/embd_input_lib.cpp +++ b/examples/embd_input/embd_input_lib.cpp @@ -96,6 +96,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) { struct MyModel* ret= new MyModel(); ret->ctx = ctx; ret->params = params; + ret->n_past = 0; // printf("ctx: %d\n", ret->ctx); return ret; } @@ -106,11 +107,13 @@ bool eval_float(void* model, float* input, int N){ llama_context* ctx = mymodel->ctx; gpt_params params = mymodel->params; int n_emb = llama_n_embd(ctx); - int n_past = 0; - for (int i = 0; i < (int) N; i += params.n_batch) { + int n_past = mymodel->n_past; + // printf("%f,%f\n", *input, *(input+1)); + int n_batch = N; // params.n_batch; + for (int i = 0; i < (int) N; i += n_batch) { int n_eval = (int) N - i; - if (n_eval > params.n_batch) { - n_eval = params.n_batch; + if (n_eval > n_batch) { + n_eval = n_batch; } if (llama_eval_float(ctx, (input+i*n_emb), n_eval, n_past, params.n_threads)) { fprintf(stderr, "%s : failed to eval\n", __func__); @@ -118,6 +121,7 @@ bool eval_float(void* model, float* input, int N){ } n_past += n_eval; } + mymodel->n_past = n_past; return true; } @@ -135,7 +139,7 @@ bool eval_tokens(void* model, std::vector tokens) { // printf("ctx2: %d\n", ctx); gpt_params params = mymodel->params; // printf("\n%d\n", params); - int n_past = 1; + int n_past = mymodel->n_past; for (int i = 0; i < (int) tokens.size(); i += params.n_batch) { int n_eval = (int) tokens.size() - i; if (n_eval > params.n_batch) { @@ -148,6 +152,7 @@ bool eval_tokens(void* model, std::vector tokens) { } n_past += n_eval; } + mymodel->n_past = n_past; return true; } diff --git a/examples/embd_input/embd_input_test.cpp b/examples/embd_input/embd_input_test.cpp index 96ce130fd..7cd094e35 100644 --- a/examples/embd_input/embd_input_test.cpp +++ b/examples/embd_input/embd_input_test.cpp @@ -14,14 +14,16 @@ int main(int argc, char** argv) { data[i] = u(e); } - eval_string(mymodel, "111"); - printf("eval float"); + eval_string(mymodel, "user: what is the color of the flag of UN?"); + // printf("eval float"); eval_float(mymodel, data, N); - printf("eval float end\n"); + eval_string(mymodel, "assistant:"); + // printf("eval float end\n"); eval_string(mymodel, mymodel->params.prompt.c_str()); - for (int i=0;i < 500; i++) { + for (int i=0;i < 50; i++) { int id = sampling_id(mymodel); printf("%s", llama_token_to_str(mymodel->ctx, id)); + fflush(stdout); eval_id(mymodel, id); } printf("\n");