From 428e734bb6e9f337a12067a204a46c38756f629b Mon Sep 17 00:00:00 2001 From: Liu Ming Date: Mon, 12 Jun 2023 09:57:47 +0800 Subject: [PATCH] 1) change c++14 request from global to grpc-server only 2) change proto to llama/v1 dir according to lint suggestion --- CMakeLists.txt | 6 +++- examples/grpc-server/CMakeLists.txt | 2 +- examples/grpc-server/README.md | 14 +++++++++ examples/grpc-server/grpc-server.cpp | 29 +++++++++--------- examples/grpc-server/llama/v1/message.proto | 33 +++++++++++++++++++++ examples/grpc-server/message.proto | 29 ------------------ 6 files changed, 68 insertions(+), 45 deletions(-) create mode 100644 examples/grpc-server/README.md create mode 100644 examples/grpc-server/llama/v1/message.proto delete mode 100644 examples/grpc-server/message.proto diff --git a/CMakeLists.txt b/CMakeLists.txt index 89b36c45f..f771b6088 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,7 +116,11 @@ endif() # Compile flags # -set(CMAKE_CXX_STANDARD 14) +if(LLAMA_BUILD_GRPC_SERVER) + set(CMAKE_CXX_STANDARD 11) +else() + set(CMAKE_CXX_STANDARD 14) +endif() set(CMAKE_CXX_STANDARD_REQUIRED true) set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED true) diff --git a/examples/grpc-server/CMakeLists.txt b/examples/grpc-server/CMakeLists.txt index 2e8e1ce5a..55973a813 100644 --- a/examples/grpc-server/CMakeLists.txt +++ b/examples/grpc-server/CMakeLists.txt @@ -15,7 +15,7 @@ message(STATUS "Using protobuf ${Protobuf_VERSION} ${Protobuf_INCLUDE_DIRS} ${CM # Proto file -get_filename_component(hw_proto "./message.proto" ABSOLUTE) +get_filename_component(hw_proto "./llama/v1/message.proto" ABSOLUTE) get_filename_component(hw_proto_path "${hw_proto}" PATH) # Generated sources diff --git a/examples/grpc-server/README.md b/examples/grpc-server/README.md new file mode 100644 index 000000000..4e297491c --- /dev/null +++ b/examples/grpc-server/README.md @@ -0,0 +1,14 @@ +# llama grpc server + +service as a grpc server to completion and embedding (when `--embedding` argument is given) based on examples/server. + +## running service + +run grpc-server command using argument like main program of llama.cpp with the following change: + +* add `--host` argument to set the listening host +* add `--port` argument to set the listening port + +### behaving differences with examples/server + +* grpc-server will always break when is the predicted token. \ No newline at end of file diff --git a/examples/grpc-server/grpc-server.cpp b/examples/grpc-server/grpc-server.cpp index c7ae88996..3ce343f93 100644 --- a/examples/grpc-server/grpc-server.cpp +++ b/examples/grpc-server/grpc-server.cpp @@ -41,9 +41,10 @@ using grpc::ServerContext; using grpc::ServerUnaryReactor; using grpc::ServerWriteReactor; using grpc::Status; -using llama::Job; -using llama::LlamaGoService; -using llama::Output; +using llama::v1::Request; +using llama::v1::LlamaService; +using llama::v1::EmbedResponse; +using llama::v1::CompletionResponse; struct server_params { @@ -497,13 +498,13 @@ private: }; // Logic and data behind the server's behavior. -class LlamaServiceImpl final : public LlamaGoService::CallbackService +class LlamaServiceImpl final : public LlamaService::CallbackService { - class Reactor : public grpc::ServerWriteReactor + class Reactor : public grpc::ServerWriteReactor { public: - Reactor(CallbackServerContext *ctx, LlamaServerContext *llama, const Job *request) + Reactor(CallbackServerContext *ctx, LlamaServerContext *llama, const Request *request) : ctx_(ctx), request_(request), llama_(llama) { if (llama->loadPrompt(request->prompt())) @@ -534,22 +535,22 @@ class LlamaServiceImpl final : public LlamaGoService::CallbackService private: CallbackServerContext *const ctx_; LlamaServerContext *llama_; - const Job *const request_; + const Request *const request_; int n_remain{0}; std::mutex finish_mu_; bool finished_{false}; - Output *response; + CompletionResponse *response; void NextWrite() { - response = new Output(); + response = new CompletionResponse(); // loop inference until finish completion if (llama_->has_next_token) { std::lock_guard l(finish_mu_); auto result = llama_->doCompletion(); fprintf(stderr, "%s", result.c_str()); - response->set_status(llama::Status::RUNNING); + response->set_status(llama::v1::Status::RUNNING); response->set_output(result); StartWrite(response); } @@ -560,7 +561,7 @@ class LlamaServiceImpl final : public LlamaGoService::CallbackService l(finish_mu_); if (!finished_) { - response->set_status(llama::Status::FINISHED); + response->set_status(llama::v1::Status::FINISHED); StartWriteLast(response, grpc::WriteOptions()); } } @@ -585,8 +586,8 @@ public: { } - ServerWriteReactor *Answer( - CallbackServerContext *context, const Job *request) + ServerWriteReactor *Complete( + CallbackServerContext *context, const Request *request) { fprintf(stderr, "%s : new answer request: %s\n", __func__, request->prompt().c_str()); llama->rewind(); @@ -598,7 +599,7 @@ public: } ServerUnaryReactor *Embed( - CallbackServerContext *context, const Job *request, Output *response) + CallbackServerContext *context, const Request *request, EmbedResponse *response) { fprintf(stderr, "%s : get embed %s\n", __func__, request->prompt().c_str()); std::vector embeded = llama->embedding(request->prompt()); diff --git a/examples/grpc-server/llama/v1/message.proto b/examples/grpc-server/llama/v1/message.proto new file mode 100644 index 000000000..76727821d --- /dev/null +++ b/examples/grpc-server/llama/v1/message.proto @@ -0,0 +1,33 @@ +syntax = "proto3"; + +package llama.v1; + +option go_package = "./pkg/grpc"; + +service LlamaService { + rpc Complete(Request) returns (stream CompletionResponse){} + rpc Embed(Request) returns (EmbedResponse){} +} + +message Request { + string id = 1; + string prompt = 2; +} + +enum Status { + PENDING_UNSPECIFIED = 0; + RUNNING = 1; + FINISHED = 2; + FAILED = 3; +} + +message CompletionResponse { + string id = 1; + Status status = 2; + string output = 3; +} + +message EmbedResponse { + string id = 1; + repeated float embed = 2; +} diff --git a/examples/grpc-server/message.proto b/examples/grpc-server/message.proto deleted file mode 100644 index f4a608949..000000000 --- a/examples/grpc-server/message.proto +++ /dev/null @@ -1,29 +0,0 @@ -syntax = "proto3"; - -package llama; - -option go_package = "./pkg/grpc"; - -service LlamaGoService { - rpc Answer(Job) returns (stream Output){} - rpc Embed(Job) returns (Output){} -} - -message Job { - string id = 1; - string prompt = 2; -} - -enum Status { - PENDING = 0; - RUNNING = 1; - FINISHED = 2; - FAILED = 3; -} - -message Output { - string id = 1; - Status status = 2; - string output = 3; - repeated float embed = 4; -} \ No newline at end of file