server : refactored the task processing logic (#5065)
* server: add llama_server_queue struct * server: add llama_server_response_event * server: add comments * server: move all mutexes away from server.cpp * server: correct multitask response * server: only add back deferred tasks when one slot is available * server: fix a race condition cause by "request_completion"
This commit is contained in:
parent
413e7b0559
commit
48c857aa10
5 changed files with 876 additions and 692 deletions
2
Makefile
2
Makefile
|
@ -619,7 +619,7 @@ embedding: examples/embedding/embedding.cpp ggml.o llama.o $(C
|
|||
save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
||||
server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2) -Wno-cast-qual
|
||||
|
||||
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue