From 6412a598a19de3901a6605d18a0dbf7e59abd909 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 9 Sep 2024 18:22:25 +0300 Subject: [PATCH] common : more explicit includes --- common/arg.cpp | 6 +- common/arg.h | 29 +--- common/common.h | 155 ++++++++++++------ common/sampling.h | 54 +----- examples/batched-bench/batched-bench.cpp | 1 + examples/batched/batched.cpp | 1 + .../cvector-generator/cvector-generator.cpp | 1 + examples/cvector-generator/pca.hpp | 5 +- examples/embedding/embedding.cpp | 1 + examples/eval-callback/eval-callback.cpp | 1 + examples/export-lora/export-lora.cpp | 1 + examples/gen-docs/gen-docs.cpp | 1 + examples/gritlm/gritlm.cpp | 1 + examples/imatrix/imatrix.cpp | 1 + examples/infill/infill.cpp | 3 +- examples/llava/llava-cli.cpp | 7 +- examples/llava/minicpmv-cli.cpp | 4 +- examples/lookahead/lookahead.cpp | 2 + examples/lookup/lookup-create.cpp | 7 +- examples/lookup/lookup-stats.cpp | 5 +- examples/lookup/lookup.cpp | 4 +- examples/main/main.cpp | 3 +- examples/parallel/parallel.cpp | 2 + examples/passkey/passkey.cpp | 1 + examples/perplexity/perplexity.cpp | 11 +- examples/retrieval/retrieval.cpp | 1 + examples/save-load-state/save-load-state.cpp | 1 + examples/server/server.cpp | 2 + examples/simple/simple.cpp | 1 + examples/speculative/speculative.cpp | 4 +- tests/test-arg-parser.cpp | 5 +- 31 files changed, 169 insertions(+), 152 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 99f9f9d5f..87ae8db4e 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1,5 +1,7 @@ #include "arg.h" +#include "sampling.h" + #include #include #include @@ -341,10 +343,6 @@ bool gpt_params_parse(int argc, char ** argv, llama_arg_context & ctx_arg) { return true; } -llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex) { - return gpt_params_parser_init(params, ex, nullptr); -} - llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **)) { llama_arg_context ctx_arg(params); ctx_arg.print_usage = print_usage; diff --git a/common/arg.h b/common/arg.h index eb9530dcb..c6b5f0cde 100644 --- a/common/arg.h +++ b/common/arg.h @@ -2,37 +2,14 @@ #include "common.h" +#include #include #include -#include // // CLI argument parsing // -struct gpt_params; - -enum llama_example { - LLAMA_EXAMPLE_COMMON, - LLAMA_EXAMPLE_SPECULATIVE, - LLAMA_EXAMPLE_MAIN, - LLAMA_EXAMPLE_INFILL, - LLAMA_EXAMPLE_EMBEDDING, - LLAMA_EXAMPLE_PERPLEXITY, - LLAMA_EXAMPLE_RETRIEVAL, - LLAMA_EXAMPLE_PASSKEY, - LLAMA_EXAMPLE_IMATRIX, - LLAMA_EXAMPLE_BENCH, - LLAMA_EXAMPLE_SERVER, - LLAMA_EXAMPLE_CVECTOR_GENERATOR, - LLAMA_EXAMPLE_EXPORT_LORA, - LLAMA_EXAMPLE_LLAVA, - LLAMA_EXAMPLE_LOOKUP, - LLAMA_EXAMPLE_PARALLEL, - - LLAMA_EXAMPLE_COUNT, -}; - struct llama_arg { std::set examples = {LLAMA_EXAMPLE_COMMON}; std::vector args; @@ -92,10 +69,8 @@ struct llama_arg_context { llama_arg_context(gpt_params & params) : params(params) {} }; -// initialize list of options (arguments) that can be used by the current example -llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex); // optionally, we can provide "print_usage" to print example usage -llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **)); +llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr); // parse input arguments from CLI // if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message) diff --git a/common/common.h b/common/common.h index 0f18e8f14..1039862eb 100644 --- a/common/common.h +++ b/common/common.h @@ -4,21 +4,11 @@ #include "llama.h" -#include "sampling.h" -#include "arg.h" - #define LOG_NO_FILE_LINE_FUNCTION #include "log.h" -#include #include #include -#include -#include -#include -#include -#include -#include #ifdef _WIN32 #define DIRECTORY_SEPARATOR '\\' @@ -57,19 +47,6 @@ struct llama_control_vector_load_info; // CPU utils // -int32_t cpu_get_num_physical_cores(); -int32_t cpu_get_num_math(); - -// -// Common params -// - -// dimensionality reduction methods, used by cvector-generator -enum dimre_method { - DIMRE_METHOD_PCA, - DIMRE_METHOD_MEAN, -}; - struct cpu_params { int n_threads = -1; bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask. @@ -79,6 +56,92 @@ struct cpu_params { uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling) }; +int32_t cpu_get_num_physical_cores(); +int32_t cpu_get_num_math(); + +// +// Common params +// + +enum llama_example { + LLAMA_EXAMPLE_COMMON, + LLAMA_EXAMPLE_SPECULATIVE, + LLAMA_EXAMPLE_MAIN, + LLAMA_EXAMPLE_INFILL, + LLAMA_EXAMPLE_EMBEDDING, + LLAMA_EXAMPLE_PERPLEXITY, + LLAMA_EXAMPLE_RETRIEVAL, + LLAMA_EXAMPLE_PASSKEY, + LLAMA_EXAMPLE_IMATRIX, + LLAMA_EXAMPLE_BENCH, + LLAMA_EXAMPLE_SERVER, + LLAMA_EXAMPLE_CVECTOR_GENERATOR, + LLAMA_EXAMPLE_EXPORT_LORA, + LLAMA_EXAMPLE_LLAVA, + LLAMA_EXAMPLE_LOOKUP, + LLAMA_EXAMPLE_PARALLEL, + + LLAMA_EXAMPLE_COUNT, +}; + +enum gpt_sampler_type { + GPT_SAMPLER_TYPE_NONE = 0, + GPT_SAMPLER_TYPE_TOP_K = 1, + GPT_SAMPLER_TYPE_TOP_P = 2, + GPT_SAMPLER_TYPE_MIN_P = 3, + GPT_SAMPLER_TYPE_TFS_Z = 4, + GPT_SAMPLER_TYPE_TYPICAL_P = 5, + GPT_SAMPLER_TYPE_TEMPERATURE = 6, +}; + +// dimensionality reduction methods, used by cvector-generator +enum dimre_method { + DIMRE_METHOD_PCA, + DIMRE_METHOD_MEAN, +}; + +// sampler parameters +struct gpt_sampler_params { + uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler + + int32_t n_prev = 64; // number of previous tokens to remember + int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. + int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens + int32_t top_k = 40; // <= 0 to use vocab size + float top_p = 0.95f; // 1.0 = disabled + float min_p = 0.05f; // 0.0 = disabled + float tfs_z = 1.00f; // 1.0 = disabled + float typ_p = 1.00f; // typical_p, 1.0 = disabled + float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities + float dynatemp_range = 0.00f; // 0.0 = disabled + float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler + int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) + float penalty_repeat = 1.00f; // 1.0 = disabled + float penalty_freq = 0.00f; // 0.0 = disabled + float penalty_present = 0.00f; // 0.0 = disabled + int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 + float mirostat_tau = 5.00f; // target entropy + float mirostat_eta = 0.10f; // learning rate + bool penalize_nl = false; // consider newlines as a repeatable token + bool ignore_eos = false; + + std::vector samplers = { + GPT_SAMPLER_TYPE_TOP_K, + GPT_SAMPLER_TYPE_TFS_Z, + GPT_SAMPLER_TYPE_TYPICAL_P, + GPT_SAMPLER_TYPE_TOP_P, + GPT_SAMPLER_TYPE_MIN_P, + GPT_SAMPLER_TYPE_TEMPERATURE + }; + + std::string grammar; // optional BNF-like grammar to constrain sampling + + std::vector logit_bias; // logit biases to apply + + // print the parameters into a string + std::string print() const; +}; + struct gpt_params { int32_t n_predict = -1; // new tokens to predict int32_t n_ctx = 0; // context size @@ -123,23 +186,23 @@ struct gpt_params { struct gpt_sampler_params sparams; - std::string model = ""; // model path - std::string model_draft = ""; // draft model for speculative decoding - std::string model_alias = "unknown"; // model alias - std::string model_url = ""; // model url to download - std::string hf_token = ""; // HF token - std::string hf_repo = ""; // HF repo - std::string hf_file = ""; // HF file - std::string prompt = ""; - std::string prompt_file = ""; // store the external prompt file name - std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state - std::string input_prefix = ""; // string to prefix user inputs with - std::string input_suffix = ""; // string to suffix user inputs with - std::string logdir = ""; // directory in which to save YAML log files - std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding - std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding - std::string logits_file = ""; // file for saving *all* logits - std::string rpc_servers = ""; // comma separated list of RPC servers + std::string model = ""; // model path // NOLINT + std::string model_draft = ""; // draft model for speculative decoding // NOLINT + std::string model_alias = "unknown"; // model alias // NOLINT + std::string model_url = ""; // model url to download // NOLINT + std::string hf_token = ""; // HF token // NOLINT + std::string hf_repo = ""; // HF repo // NOLINT + std::string hf_file = ""; // HF file // NOLINT + std::string prompt = ""; // NOLINT + std::string prompt_file = ""; // store the external prompt file name // NOLINT + std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT + std::string input_prefix = ""; // string to prefix user inputs with // NOLINT + std::string input_suffix = ""; // string to suffix user inputs with // NOLINT + std::string logdir = ""; // directory in which to save YAML log files // NOLINT + std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT + std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT + std::string logits_file = ""; // file for saving *all* logits // NOLINT + std::string rpc_servers = ""; // comma separated list of RPC servers // NOLINT std::vector in_files; // all input files std::vector antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts) @@ -200,7 +263,7 @@ struct gpt_params { std::string cache_type_v = "f16"; // KV cache data type for the V // multimodal models (see examples/llava) - std::string mmproj = ""; // path to multimodal projector + std::string mmproj = ""; // path to multimodal projector // NOLINT std::vector image; // path to image file(s) // embedding @@ -216,15 +279,15 @@ struct gpt_params { int n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool) std::string hostname = "127.0.0.1"; - std::string public_path = ""; - std::string chat_template = ""; - std::string system_prompt = ""; + std::string public_path = ""; // NOLINT + std::string chat_template = ""; // NOLINT + std::string system_prompt = ""; // NOLINT bool enable_chat_template = true; std::vector api_keys; - std::string ssl_file_key = ""; - std::string ssl_file_cert = ""; + std::string ssl_file_key = ""; // NOLINT + std::string ssl_file_cert = ""; // NOLINT bool endpoint_slots = true; bool endpoint_metrics = false; diff --git a/common/sampling.h b/common/sampling.h index 654e0c513..0a4461fab 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -2,61 +2,11 @@ #include "llama.h" +#include "common.h" + #include #include -enum gpt_sampler_type { - GPT_SAMPLER_TYPE_NONE = 0, - GPT_SAMPLER_TYPE_TOP_K = 1, - GPT_SAMPLER_TYPE_TOP_P = 2, - GPT_SAMPLER_TYPE_MIN_P = 3, - GPT_SAMPLER_TYPE_TFS_Z = 4, - GPT_SAMPLER_TYPE_TYPICAL_P = 5, - GPT_SAMPLER_TYPE_TEMPERATURE = 6, -}; - -// sampling parameters -struct gpt_sampler_params { - uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler - - int32_t n_prev = 64; // number of previous tokens to remember - int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. - int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens - int32_t top_k = 40; // <= 0 to use vocab size - float top_p = 0.95f; // 1.0 = disabled - float min_p = 0.05f; // 0.0 = disabled - float tfs_z = 1.00f; // 1.0 = disabled - float typ_p = 1.00f; // typical_p, 1.0 = disabled - float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities - float dynatemp_range = 0.00f; // 0.0 = disabled - float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler - int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) - float penalty_repeat = 1.00f; // 1.0 = disabled - float penalty_freq = 0.00f; // 0.0 = disabled - float penalty_present = 0.00f; // 0.0 = disabled - int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 - float mirostat_tau = 5.00f; // target entropy - float mirostat_eta = 0.10f; // learning rate - bool penalize_nl = false; // consider newlines as a repeatable token - bool ignore_eos = false; - - std::vector samplers = { - GPT_SAMPLER_TYPE_TOP_K, - GPT_SAMPLER_TYPE_TFS_Z, - GPT_SAMPLER_TYPE_TYPICAL_P, - GPT_SAMPLER_TYPE_TOP_P, - GPT_SAMPLER_TYPE_MIN_P, - GPT_SAMPLER_TYPE_TEMPERATURE - }; - - std::string grammar; // optional BNF-like grammar to constrain sampling - - std::vector logit_bias; // logit biases to apply - - // print the parameters into a string - std::string print() const; -}; - // gpt_sampler extends llama_sampler with additional functionality: // // - grammar support diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 2a8083051..b6a64e152 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index 6dfff77c8..32be23dfc 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp index 65883d24d..4707e28a8 100644 --- a/examples/cvector-generator/cvector-generator.cpp +++ b/examples/cvector-generator/cvector-generator.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" #include "ggml.h" diff --git a/examples/cvector-generator/pca.hpp b/examples/cvector-generator/pca.hpp index 6ec3141af..05c66856c 100644 --- a/examples/cvector-generator/pca.hpp +++ b/examples/cvector-generator/pca.hpp @@ -12,12 +12,9 @@ #include #include +#include #include -#include #include -#include -#include -#include #define DEBUG_POS 5 diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 64d126e07..20312a3fe 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp index ca8e6b030..9c8a5938b 100644 --- a/examples/eval-callback/eval-callback.cpp +++ b/examples/eval-callback/eval-callback.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" #include "ggml.h" diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index 47b5226a0..7c8c813ed 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "ggml.h" #include "ggml-alloc.h" diff --git a/examples/gen-docs/gen-docs.cpp b/examples/gen-docs/gen-docs.cpp index 89ebadc53..b6d4725fd 100644 --- a/examples/gen-docs/gen-docs.cpp +++ b/examples/gen-docs/gen-docs.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp index c6883e335..d3bbf1e68 100644 --- a/examples/gritlm/gritlm.cpp +++ b/examples/gritlm/gritlm.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index add0b8692..a3771b750 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 806770723..f5d1c239d 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -1,6 +1,7 @@ +#include "arg.h" #include "common.h" - #include "console.h" +#include "sampling.h" #include "llama.h" #include diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index e762107f9..e8974a355 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -1,11 +1,12 @@ -#include "ggml.h" +#include "arg.h" +#include "base64.hpp" #include "log.h" #include "common.h" +#include "sampling.h" #include "clip.h" #include "llava.h" #include "llama.h" - -#include "base64.hpp" +#include "ggml.h" #include #include diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index ac74d13a5..b0a4b810f 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -1,9 +1,11 @@ -#include "ggml.h" +#include "arg.h" #include "log.h" #include "common.h" +#include "sampling.h" #include "clip.h" #include "llava.h" #include "llama.h" +#include "ggml.h" #include #include diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp index a63b85bc6..93254cc6d 100644 --- a/examples/lookahead/lookahead.cpp +++ b/examples/lookahead/lookahead.cpp @@ -1,4 +1,6 @@ +#include "arg.h" #include "common.h" +#include "sampling.h" #include "llama.h" #include diff --git a/examples/lookup/lookup-create.cpp b/examples/lookup/lookup-create.cpp index 611a216d7..f632cd3b8 100644 --- a/examples/lookup/lookup-create.cpp +++ b/examples/lookup/lookup-create.cpp @@ -1,7 +1,8 @@ -#include "ggml.h" -#include "llama.h" +#include "arg.h" #include "common.h" #include "ngram-cache.h" +#include "ggml.h" +#include "llama.h" #include #include @@ -40,4 +41,6 @@ int main(int argc, char ** argv){ fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.lookup_cache_static.c_str()); llama_ngram_cache_save(ngram_cache, params.lookup_cache_static); + + return 0; } diff --git a/examples/lookup/lookup-stats.cpp b/examples/lookup/lookup-stats.cpp index 099a29d24..2bda3a07e 100644 --- a/examples/lookup/lookup-stats.cpp +++ b/examples/lookup/lookup-stats.cpp @@ -1,8 +1,9 @@ -#include "ggml.h" +#include "arg.h" #include "common.h" -#include "llama.h" #include "log.h" #include "ngram-cache.h" +#include "llama.h" +#include "ggml.h" #include #include diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp index fb07051d2..c1f237cef 100644 --- a/examples/lookup/lookup.cpp +++ b/examples/lookup/lookup.cpp @@ -1,7 +1,9 @@ +#include "arg.h" #include "ggml.h" -#include "llama.h" #include "common.h" #include "ngram-cache.h" +#include "sampling.h" +#include "llama.h" #include #include diff --git a/examples/main/main.cpp b/examples/main/main.cpp index fd19ecce8..38db1ba56 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -1,6 +1,7 @@ +#include "arg.h" #include "common.h" - #include "console.h" +#include "sampling.h" #include "llama.h" #include diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index ecda353f1..191c2e614 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -1,7 +1,9 @@ // A basic application simulating a server with multiple clients. // The clients submit requests to the server and they are processed in parallel. +#include "arg.h" #include "common.h" +#include "sampling.h" #include "llama.h" #include diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp index 10a8ca3ab..6cb2e403b 100644 --- a/examples/passkey/passkey.cpp +++ b/examples/passkey/passkey.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 623972fc1..970c29880 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -1,18 +1,19 @@ +#include "arg.h" #include "common.h" #include "llama.h" +#include +#include #include #include #include #include +#include +#include +#include #include #include -#include -#include #include -#include -#include -#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index e62c71849..7206f427b 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 743081a77..ffb638070 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" diff --git a/examples/server/server.cpp b/examples/server/server.cpp index f3298739e..0586d82fd 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1,6 +1,8 @@ #include "utils.hpp" +#include "arg.h" #include "common.h" +#include "sampling.h" #include "json-schema-to-grammar.h" #include "llama.h" diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 4ccf5d7e7..d573888d3 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -1,3 +1,4 @@ +#include "arg.h" #include "common.h" #include "llama.h" diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index efd429041..211a717dc 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -1,11 +1,13 @@ +#include "arg.h" #include "common.h" +#include "sampling.h" #include "llama.h" -#include #include #include #include #include +#include #define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 100 #define SPEC_VOCAB_CHECK_START_TOKEN_ID 5 diff --git a/tests/test-arg-parser.cpp b/tests/test-arg-parser.cpp index 75b3451a9..83366685c 100644 --- a/tests/test-arg-parser.cpp +++ b/tests/test-arg-parser.cpp @@ -1,3 +1,6 @@ +#include "arg.h" +#include "common.h" + #include #include #include @@ -6,8 +9,6 @@ #undef NDEBUG #include -#include "common.h" - int main(void) { gpt_params params;