minimize diffs

This commit is contained in:
Olivier Chafik 2025-01-22 01:46:51 +00:00
parent dbf841b0d2
commit ef61a4c79e
13 changed files with 30 additions and 71 deletions

View file

@ -41,7 +41,7 @@ indent_style = tab
trim_trailing_whitespace = unset
insert_final_newline = unset
[{tests/chat/templates/*.jinja,tests/chat/goldens/*.txt}]
[tests/chat/templates/*.jinja]
indent_style = unset
indent_size = unset
end_of_line = unset

View file

@ -49,7 +49,6 @@ BUILD_TARGETS = \
# Binaries only useful for tests
TEST_TARGETS = \
tests/test-antiprompts \
tests/test-arg-parser \
tests/test-autorelease \
tests/test-backend-ops \
@ -1475,11 +1474,6 @@ tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
tests/test-antiprompts: tests/test-antiprompts.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
tests/test-tool-call: tests/test-tool-call.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)

View file

@ -4,12 +4,9 @@
#include "llama-cpp.h"
#include <functional>
#include <queue>
#include <string>
#include <vector>
#include <sstream>
#include <unordered_map>
#ifdef _WIN32
#define DIRECTORY_SEPARATOR '\\'

View file

@ -504,13 +504,12 @@ int main(int argc, char ** argv) {
std::vector<llama_token> embd;
// single-token antiprompts
std::vector<llama_token> antiprompt_single_token;
std::vector<llama_token> antiprompt_token;
antiprompt_single_token.reserve(params.antiprompt.size());
for (const std::string & antiprompt : params.antiprompt) {
auto ids = ::common_tokenize(ctx, antiprompt, false, true);
if (ids.size() == 1) {
antiprompt_single_token.push_back(ids[0]);
antiprompt_token.push_back(ids[0]);
}
}
@ -756,7 +755,7 @@ int main(int argc, char ** argv) {
// check for reverse prompt using special tokens
llama_token last_token = common_sampler_last(smpl);
if (std::find(antiprompt_single_token.begin(), antiprompt_single_token.end(), last_token) != antiprompt_single_token.end()) {
if (std::find(antiprompt_token.begin(), antiprompt_token.end(), last_token) != antiprompt_token.end()) {
if (params.interactive) {
is_interacting = true;
}

View file

@ -26,7 +26,6 @@
#include <deque>
#include <memory>
#include <mutex>
#include <optional>
#include <signal.h>
#include <thread>
#include <unordered_map>
@ -168,6 +167,7 @@ struct slot_params {
{"min_keep", sampling.min_keep},
{"grammar", sampling.grammar},
{"grammar_trigger_words", sampling.grammar_trigger_words},
{"grammar_trigger_tokens", sampling.grammar_trigger_tokens},
{"samplers", samplers},
{"speculative.n_max", speculative.n_max},
{"speculative.n_min", speculative.n_min},
@ -386,6 +386,14 @@ struct server_task {
return out;
};
{
params.antiprompt.clear();
const auto stop = data.find("stop");
if (stop != data.end()) {
params.antiprompt = to_string_vec(*stop);
}
}
{
const auto grammar_trigger_words = data.find("grammar_trigger_words");
if (grammar_trigger_words != data.end()) {
@ -401,13 +409,6 @@ struct server_task {
}
}
{
const auto stop = data.find("stop");
if (stop != data.end()) {
params.antiprompt = to_string_vec(*stop);
}
}
{
const auto samplers = data.find("samplers");
if (samplers != data.end()) {
@ -730,7 +731,7 @@ struct server_task_result_cmpl_final : server_task_result {
std::time_t t = std::time(0);
json res {
json res = json {
{"choices", json::array({choice})},
{"created", t},
{"model", oaicompat_model},
@ -762,13 +763,13 @@ struct server_task_result_cmpl_final : server_task_result {
finish_reason = "stop";
}
json choice {
json choice = json {
{"finish_reason", finish_reason},
{"index", 0},
{"delta", json::object()}
};
json ret {
json ret = json {
{"choices", json::array({choice})},
{"created", t},
{"id", oaicompat_cmpl_id},
@ -804,12 +805,10 @@ struct server_task_result_cmpl_partial : server_task_result {
result_timings timings;
// OAI-compat fields
bool verbose = false;
oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
std::string oaicompat_model;
std::string oaicompat_cmpl_id;
json oaicompat_tools;
llama_tool_call_style oaicompat_tool_call_style = llama_tool_call_style::None;
bool verbose = false;
oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
std::string oaicompat_model;
std::string oaicompat_cmpl_id;
virtual int get_index() override {
return index;
@ -2048,9 +2047,6 @@ struct server_context {
bool process_token(completion_token_output & result, server_slot & slot) {
// remember which tokens were sampled - used for repetition penalties during sampling
const std::string token_str = result.text_to_send;
// TODO:
// const std::string token_str = result.text_to_send;
// const std::string token_str = common_token_to_piece(ctx, result.tok, params_base.special || (match.pos != std::string::npos && match.is_grammar_trigger));
slot.sampled = result.tok;
slot.generated_text += token_str;
@ -2276,8 +2272,6 @@ struct server_context {
res->oaicompat = slot.params.oaicompat;
res->oaicompat_model = slot.params.oaicompat_model;
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
// res->oaicompat_tools = slot.params.oaicompat_tools;
// res->oaicompat_tool_call_style = slot.params.oaicompat_tool_call_style;
// populate res.probs_output
if (slot.params.sampling.n_probs > 0) {

View file

@ -1,14 +1,14 @@
#!/bin/bash
# make sure we are in the right directory
TESTS_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd $TESTS_DIR
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd $SCRIPT_DIR
set -eu
if [[ "${SLOW_TESTS:-0}" == 1 ]]; then
# Slow tests for tool calls need quite a few models ahead of time to avoid timing out.
python $TESTS_DIR/../../../scripts/fetch_server_test_models.py
python $SCRIPT_DIR/../../../scripts/fetch_server_test_models.py
fi
if [ $# -lt 1 ]

View file

@ -361,7 +361,6 @@ inline std::string format_chat(const common_chat_template & tmpl, const std::vec
std::string role = json_value(curr_msg, "role", std::string(""));
std::string content;
if (curr_msg.contains("content")) {
if (curr_msg["content"].is_string()) {
content = curr_msg["content"].get<std::string>();
@ -611,29 +610,16 @@ static json oaicompat_completion_params_parse(
llama_params["stop"] = json_value(body, "stop", json::array());
}
// Handle "response_format" field (https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format)
// Handle "response_format" field
auto tool_choice = json_value(body, "tool_choice", std::string("auto"));
if (body.contains("response_format")) {
json response_format = json_value(body, "response_format", json::object());
std::string response_type = json_value(response_format, "type", std::string());
if (response_type == "json_object") {
// Legacy llama.cpp, llama-cpp-python and Together.ai format.
llama_params["json_schema"] = json_value(response_format, "schema", json::object());
} else if (response_type == "json_schema") {
// OpenAI JSON schema format.
auto json_schema = json_value(response_format, "json_schema", json::object());
json schema = json_value(json_schema, "schema", json::object());
std::string description = json_value(json_schema, "description", std::string());
if (!description.empty()) {
if (schema.contains("description")) {
throw std::runtime_error("Cannot have both a description in the json_schema object and inside its schema.");
}
schema["description"] = description;
}
bool strict = json_value(json_schema, "strict", false);
if (strict) {
llama_params["json_schema"] = schema;
}
llama_params["json_schema"] = json_value(json_schema, "schema", json::object());
} else if (!response_type.empty() && response_type != "text") {
throw std::runtime_error("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type);
}

View file

@ -1266,8 +1266,6 @@ extern "C" {
// Returns the sampled token
LLAMA_API llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_context * ctx, int32_t idx);
LLAMA_API bool llama_sampler_is_grammar_empty(struct llama_sampler * smpl);
// TODO: extend in the future
//LLAMA_API void llama_decode_with_sampler(struct llama_context * ctx, struct llama_sampler * smpl, struct llama_batch batch, ...);

View file

@ -1,4 +1,3 @@
-r ../examples/agent/requirements.txt
-r ../examples/llava/requirements.txt
-r ../examples/server/bench/requirements.txt
-r ../examples/server/tests/requirements.txt

View file

@ -1067,7 +1067,6 @@ struct llama_grammar * llama_grammar_init_impl(
// then the pointers would be invalidated when the local vec_rules goes out of scope.
return new llama_grammar {
vocab,
std::move(vec_rules),
std::move(stacks),
/* .partial_utf8 = */ {},

View file

@ -3,7 +3,6 @@
#include "llama.h"
#include <map>
#include <set>
#include <string>
#include <vector>
@ -116,6 +115,7 @@ struct llama_grammar {
// buffer for partially generated UTF-8 sequence from accepted tokens
llama_partial_utf8 partial_utf8;
// lazy grammars wait for trigger words or tokens before constraining the sampling.
bool awaiting_trigger;
std::string trigger_buffer;
std::vector<llama_token> trigger_tokens;

View file

@ -1536,10 +1536,10 @@ struct llama_sampler * llama_sampler_init_grammar(
if (grammar_str != nullptr && grammar_str[0] != '\0') {
*ctx = {
/* .vocab = */ vocab,
/* .grammar_str = */ grammar_str,
/* .grammar_root = */ grammar_root,
/* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens),
/* .vocab = */ vocab,
/* .grammar_str = */ grammar_str,
/* .grammar_root = */ grammar_root,
/* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens),
};
} else {
*ctx = {
@ -2423,11 +2423,6 @@ uint32_t llama_sampler_get_seed(const struct llama_sampler * smpl) {
return LLAMA_DEFAULT_SEED;
}
bool llama_sampler_is_grammar_empty(struct llama_sampler * smpl) {
struct llama_sampler_grammar * ctx = (struct llama_sampler_grammar *) smpl->ctx;
return ctx->grammar == nullptr;
}
// perf
struct llama_perf_sampler_data llama_perf_sampler(const struct llama_sampler * chain) {

2
tests/.gitignore vendored
View file

@ -1,6 +1,4 @@
*
!chat/
!chat/**
!*.*
*.o
ggml-common.h