minimize diffs
This commit is contained in:
parent
dbf841b0d2
commit
ef61a4c79e
13 changed files with 30 additions and 71 deletions
|
@ -41,7 +41,7 @@ indent_style = tab
|
|||
trim_trailing_whitespace = unset
|
||||
insert_final_newline = unset
|
||||
|
||||
[{tests/chat/templates/*.jinja,tests/chat/goldens/*.txt}]
|
||||
[tests/chat/templates/*.jinja]
|
||||
indent_style = unset
|
||||
indent_size = unset
|
||||
end_of_line = unset
|
||||
|
|
6
Makefile
6
Makefile
|
@ -49,7 +49,6 @@ BUILD_TARGETS = \
|
|||
|
||||
# Binaries only useful for tests
|
||||
TEST_TARGETS = \
|
||||
tests/test-antiprompts \
|
||||
tests/test-arg-parser \
|
||||
tests/test-autorelease \
|
||||
tests/test-backend-ops \
|
||||
|
@ -1475,11 +1474,6 @@ tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \
|
|||
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
tests/test-antiprompts: tests/test-antiprompts.cpp \
|
||||
$(OBJ_ALL)
|
||||
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
tests/test-tool-call: tests/test-tool-call.cpp \
|
||||
$(OBJ_ALL)
|
||||
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
|
|
|
@ -4,12 +4,9 @@
|
|||
|
||||
#include "llama-cpp.h"
|
||||
|
||||
#include <functional>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define DIRECTORY_SEPARATOR '\\'
|
||||
|
|
|
@ -504,13 +504,12 @@ int main(int argc, char ** argv) {
|
|||
std::vector<llama_token> embd;
|
||||
|
||||
// single-token antiprompts
|
||||
std::vector<llama_token> antiprompt_single_token;
|
||||
std::vector<llama_token> antiprompt_token;
|
||||
|
||||
antiprompt_single_token.reserve(params.antiprompt.size());
|
||||
for (const std::string & antiprompt : params.antiprompt) {
|
||||
auto ids = ::common_tokenize(ctx, antiprompt, false, true);
|
||||
if (ids.size() == 1) {
|
||||
antiprompt_single_token.push_back(ids[0]);
|
||||
antiprompt_token.push_back(ids[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -756,7 +755,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// check for reverse prompt using special tokens
|
||||
llama_token last_token = common_sampler_last(smpl);
|
||||
if (std::find(antiprompt_single_token.begin(), antiprompt_single_token.end(), last_token) != antiprompt_single_token.end()) {
|
||||
if (std::find(antiprompt_token.begin(), antiprompt_token.end(), last_token) != antiprompt_token.end()) {
|
||||
if (params.interactive) {
|
||||
is_interacting = true;
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#include <deque>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <signal.h>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
|
@ -168,6 +167,7 @@ struct slot_params {
|
|||
{"min_keep", sampling.min_keep},
|
||||
{"grammar", sampling.grammar},
|
||||
{"grammar_trigger_words", sampling.grammar_trigger_words},
|
||||
{"grammar_trigger_tokens", sampling.grammar_trigger_tokens},
|
||||
{"samplers", samplers},
|
||||
{"speculative.n_max", speculative.n_max},
|
||||
{"speculative.n_min", speculative.n_min},
|
||||
|
@ -386,6 +386,14 @@ struct server_task {
|
|||
return out;
|
||||
};
|
||||
|
||||
{
|
||||
params.antiprompt.clear();
|
||||
const auto stop = data.find("stop");
|
||||
if (stop != data.end()) {
|
||||
params.antiprompt = to_string_vec(*stop);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const auto grammar_trigger_words = data.find("grammar_trigger_words");
|
||||
if (grammar_trigger_words != data.end()) {
|
||||
|
@ -401,13 +409,6 @@ struct server_task {
|
|||
}
|
||||
}
|
||||
|
||||
{
|
||||
const auto stop = data.find("stop");
|
||||
if (stop != data.end()) {
|
||||
params.antiprompt = to_string_vec(*stop);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const auto samplers = data.find("samplers");
|
||||
if (samplers != data.end()) {
|
||||
|
@ -730,7 +731,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
|
||||
std::time_t t = std::time(0);
|
||||
|
||||
json res {
|
||||
json res = json {
|
||||
{"choices", json::array({choice})},
|
||||
{"created", t},
|
||||
{"model", oaicompat_model},
|
||||
|
@ -762,13 +763,13 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
finish_reason = "stop";
|
||||
}
|
||||
|
||||
json choice {
|
||||
json choice = json {
|
||||
{"finish_reason", finish_reason},
|
||||
{"index", 0},
|
||||
{"delta", json::object()}
|
||||
};
|
||||
|
||||
json ret {
|
||||
json ret = json {
|
||||
{"choices", json::array({choice})},
|
||||
{"created", t},
|
||||
{"id", oaicompat_cmpl_id},
|
||||
|
@ -804,12 +805,10 @@ struct server_task_result_cmpl_partial : server_task_result {
|
|||
result_timings timings;
|
||||
|
||||
// OAI-compat fields
|
||||
bool verbose = false;
|
||||
oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
|
||||
std::string oaicompat_model;
|
||||
std::string oaicompat_cmpl_id;
|
||||
json oaicompat_tools;
|
||||
llama_tool_call_style oaicompat_tool_call_style = llama_tool_call_style::None;
|
||||
bool verbose = false;
|
||||
oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
|
||||
std::string oaicompat_model;
|
||||
std::string oaicompat_cmpl_id;
|
||||
|
||||
virtual int get_index() override {
|
||||
return index;
|
||||
|
@ -2048,9 +2047,6 @@ struct server_context {
|
|||
bool process_token(completion_token_output & result, server_slot & slot) {
|
||||
// remember which tokens were sampled - used for repetition penalties during sampling
|
||||
const std::string token_str = result.text_to_send;
|
||||
// TODO:
|
||||
// const std::string token_str = result.text_to_send;
|
||||
// const std::string token_str = common_token_to_piece(ctx, result.tok, params_base.special || (match.pos != std::string::npos && match.is_grammar_trigger));
|
||||
slot.sampled = result.tok;
|
||||
|
||||
slot.generated_text += token_str;
|
||||
|
@ -2276,8 +2272,6 @@ struct server_context {
|
|||
res->oaicompat = slot.params.oaicompat;
|
||||
res->oaicompat_model = slot.params.oaicompat_model;
|
||||
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
|
||||
// res->oaicompat_tools = slot.params.oaicompat_tools;
|
||||
// res->oaicompat_tool_call_style = slot.params.oaicompat_tool_call_style;
|
||||
|
||||
// populate res.probs_output
|
||||
if (slot.params.sampling.n_probs > 0) {
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
#!/bin/bash
|
||||
|
||||
# make sure we are in the right directory
|
||||
TESTS_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
cd $TESTS_DIR
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
cd $SCRIPT_DIR
|
||||
|
||||
set -eu
|
||||
|
||||
if [[ "${SLOW_TESTS:-0}" == 1 ]]; then
|
||||
# Slow tests for tool calls need quite a few models ahead of time to avoid timing out.
|
||||
python $TESTS_DIR/../../../scripts/fetch_server_test_models.py
|
||||
python $SCRIPT_DIR/../../../scripts/fetch_server_test_models.py
|
||||
fi
|
||||
|
||||
if [ $# -lt 1 ]
|
||||
|
|
|
@ -361,7 +361,6 @@ inline std::string format_chat(const common_chat_template & tmpl, const std::vec
|
|||
std::string role = json_value(curr_msg, "role", std::string(""));
|
||||
|
||||
std::string content;
|
||||
|
||||
if (curr_msg.contains("content")) {
|
||||
if (curr_msg["content"].is_string()) {
|
||||
content = curr_msg["content"].get<std::string>();
|
||||
|
@ -611,29 +610,16 @@ static json oaicompat_completion_params_parse(
|
|||
llama_params["stop"] = json_value(body, "stop", json::array());
|
||||
}
|
||||
|
||||
// Handle "response_format" field (https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format)
|
||||
// Handle "response_format" field
|
||||
auto tool_choice = json_value(body, "tool_choice", std::string("auto"));
|
||||
if (body.contains("response_format")) {
|
||||
json response_format = json_value(body, "response_format", json::object());
|
||||
std::string response_type = json_value(response_format, "type", std::string());
|
||||
if (response_type == "json_object") {
|
||||
// Legacy llama.cpp, llama-cpp-python and Together.ai format.
|
||||
llama_params["json_schema"] = json_value(response_format, "schema", json::object());
|
||||
} else if (response_type == "json_schema") {
|
||||
// OpenAI JSON schema format.
|
||||
auto json_schema = json_value(response_format, "json_schema", json::object());
|
||||
json schema = json_value(json_schema, "schema", json::object());
|
||||
std::string description = json_value(json_schema, "description", std::string());
|
||||
if (!description.empty()) {
|
||||
if (schema.contains("description")) {
|
||||
throw std::runtime_error("Cannot have both a description in the json_schema object and inside its schema.");
|
||||
}
|
||||
schema["description"] = description;
|
||||
}
|
||||
bool strict = json_value(json_schema, "strict", false);
|
||||
if (strict) {
|
||||
llama_params["json_schema"] = schema;
|
||||
}
|
||||
llama_params["json_schema"] = json_value(json_schema, "schema", json::object());
|
||||
} else if (!response_type.empty() && response_type != "text") {
|
||||
throw std::runtime_error("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type);
|
||||
}
|
||||
|
|
|
@ -1266,8 +1266,6 @@ extern "C" {
|
|||
// Returns the sampled token
|
||||
LLAMA_API llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_context * ctx, int32_t idx);
|
||||
|
||||
LLAMA_API bool llama_sampler_is_grammar_empty(struct llama_sampler * smpl);
|
||||
|
||||
// TODO: extend in the future
|
||||
//LLAMA_API void llama_decode_with_sampler(struct llama_context * ctx, struct llama_sampler * smpl, struct llama_batch batch, ...);
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
-r ../examples/agent/requirements.txt
|
||||
-r ../examples/llava/requirements.txt
|
||||
-r ../examples/server/bench/requirements.txt
|
||||
-r ../examples/server/tests/requirements.txt
|
||||
|
|
|
@ -1067,7 +1067,6 @@ struct llama_grammar * llama_grammar_init_impl(
|
|||
// then the pointers would be invalidated when the local vec_rules goes out of scope.
|
||||
return new llama_grammar {
|
||||
vocab,
|
||||
|
||||
std::move(vec_rules),
|
||||
std::move(stacks),
|
||||
/* .partial_utf8 = */ {},
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
#include "llama.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -116,6 +115,7 @@ struct llama_grammar {
|
|||
// buffer for partially generated UTF-8 sequence from accepted tokens
|
||||
llama_partial_utf8 partial_utf8;
|
||||
|
||||
// lazy grammars wait for trigger words or tokens before constraining the sampling.
|
||||
bool awaiting_trigger;
|
||||
std::string trigger_buffer;
|
||||
std::vector<llama_token> trigger_tokens;
|
||||
|
|
|
@ -1536,10 +1536,10 @@ struct llama_sampler * llama_sampler_init_grammar(
|
|||
|
||||
if (grammar_str != nullptr && grammar_str[0] != '\0') {
|
||||
*ctx = {
|
||||
/* .vocab = */ vocab,
|
||||
/* .grammar_str = */ grammar_str,
|
||||
/* .grammar_root = */ grammar_root,
|
||||
/* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens),
|
||||
/* .vocab = */ vocab,
|
||||
/* .grammar_str = */ grammar_str,
|
||||
/* .grammar_root = */ grammar_root,
|
||||
/* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens),
|
||||
};
|
||||
} else {
|
||||
*ctx = {
|
||||
|
@ -2423,11 +2423,6 @@ uint32_t llama_sampler_get_seed(const struct llama_sampler * smpl) {
|
|||
return LLAMA_DEFAULT_SEED;
|
||||
}
|
||||
|
||||
bool llama_sampler_is_grammar_empty(struct llama_sampler * smpl) {
|
||||
struct llama_sampler_grammar * ctx = (struct llama_sampler_grammar *) smpl->ctx;
|
||||
return ctx->grammar == nullptr;
|
||||
}
|
||||
|
||||
// perf
|
||||
|
||||
struct llama_perf_sampler_data llama_perf_sampler(const struct llama_sampler * chain) {
|
||||
|
|
2
tests/.gitignore
vendored
2
tests/.gitignore
vendored
|
@ -1,6 +1,4 @@
|
|||
*
|
||||
!chat/
|
||||
!chat/**
|
||||
!*.*
|
||||
*.o
|
||||
ggml-common.h
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue