common : more explicit includes

2024-09-09 18:22:25 +03:00 · 2024-09-09 18:22:25 +03:00 · 6412a598a1
commit 6412a598a1
parent 3e03807043
31 changed files with 169 additions and 152 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@ -1,5 +1,7 @@
 #include "arg.h"
 #include "sampling.h"
 #include <algorithm>
 #include <string>
 #include <vector>
@ -341,10 +343,6 @@ bool gpt_params_parse(int argc, char ** argv, llama_arg_context & ctx_arg) {
    return true;
 }
 llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex) {
    return gpt_params_parser_init(params, ex, nullptr);
 }
 llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **)) {
    llama_arg_context ctx_arg(params);
    ctx_arg.print_usage = print_usage;
--- a/common/arg.h
+++ b/common/arg.h
@ -2,37 +2,14 @@
 #include "common.h"
 #include <set>
 #include <string>
 #include <vector>
 #include <set>
 //
 // CLI argument parsing
 //
 struct gpt_params;
 enum llama_example {
    LLAMA_EXAMPLE_COMMON,
    LLAMA_EXAMPLE_SPECULATIVE,
    LLAMA_EXAMPLE_MAIN,
    LLAMA_EXAMPLE_INFILL,
    LLAMA_EXAMPLE_EMBEDDING,
    LLAMA_EXAMPLE_PERPLEXITY,
    LLAMA_EXAMPLE_RETRIEVAL,
    LLAMA_EXAMPLE_PASSKEY,
    LLAMA_EXAMPLE_IMATRIX,
    LLAMA_EXAMPLE_BENCH,
    LLAMA_EXAMPLE_SERVER,
    LLAMA_EXAMPLE_CVECTOR_GENERATOR,
    LLAMA_EXAMPLE_EXPORT_LORA,
    LLAMA_EXAMPLE_LLAVA,
    LLAMA_EXAMPLE_LOOKUP,
    LLAMA_EXAMPLE_PARALLEL,
    LLAMA_EXAMPLE_COUNT,
 };
 struct llama_arg {
    std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
    std::vector<const char *> args;
@ -92,10 +69,8 @@ struct llama_arg_context {
    llama_arg_context(gpt_params & params) : params(params) {}
 };
 // initialize list of options (arguments) that can be used by the current example
 llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex);
 // optionally, we can provide "print_usage" to print example usage
-llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **));
+llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
 // parse input arguments from CLI
 // if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
--- a/common/common.h
+++ b/common/common.h
@ -4,21 +4,11 @@
 #include "llama.h"
 #include "sampling.h"
 #include "arg.h"
 #define LOG_NO_FILE_LINE_FUNCTION
 #include "log.h"
 #include <cmath>
 #include <string>
 #include <vector>
 #include <random>
 #include <thread>
 #include <set>
 #include <unordered_map>
 #include <tuple>
 #include <functional>
 #ifdef _WIN32
 #define DIRECTORY_SEPARATOR '\\'
@ -57,19 +47,6 @@ struct llama_control_vector_load_info;
 // CPU utils
 //
 int32_t cpu_get_num_physical_cores();
 int32_t cpu_get_num_math();
 //
 // Common params
 //
 // dimensionality reduction methods, used by cvector-generator
 enum dimre_method {
    DIMRE_METHOD_PCA,
    DIMRE_METHOD_MEAN,
 };
 struct cpu_params {
    int      n_threads                   = -1;
    bool     cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
@ -79,6 +56,92 @@ struct cpu_params {
    uint32_t poll                        = 50;      // Polling (busywait) level (0 - no polling, 100 - mostly polling)
 };
 int32_t cpu_get_num_physical_cores();
 int32_t cpu_get_num_math();
 //
 // Common params
 //
 enum llama_example {
    LLAMA_EXAMPLE_COMMON,
    LLAMA_EXAMPLE_SPECULATIVE,
    LLAMA_EXAMPLE_MAIN,
    LLAMA_EXAMPLE_INFILL,
    LLAMA_EXAMPLE_EMBEDDING,
    LLAMA_EXAMPLE_PERPLEXITY,
    LLAMA_EXAMPLE_RETRIEVAL,
    LLAMA_EXAMPLE_PASSKEY,
    LLAMA_EXAMPLE_IMATRIX,
    LLAMA_EXAMPLE_BENCH,
    LLAMA_EXAMPLE_SERVER,
    LLAMA_EXAMPLE_CVECTOR_GENERATOR,
    LLAMA_EXAMPLE_EXPORT_LORA,
    LLAMA_EXAMPLE_LLAVA,
    LLAMA_EXAMPLE_LOOKUP,
    LLAMA_EXAMPLE_PARALLEL,
    LLAMA_EXAMPLE_COUNT,
 };
 enum gpt_sampler_type {
    GPT_SAMPLER_TYPE_NONE        = 0,
    GPT_SAMPLER_TYPE_TOP_K       = 1,
    GPT_SAMPLER_TYPE_TOP_P       = 2,
    GPT_SAMPLER_TYPE_MIN_P       = 3,
    GPT_SAMPLER_TYPE_TFS_Z       = 4,
    GPT_SAMPLER_TYPE_TYPICAL_P   = 5,
    GPT_SAMPLER_TYPE_TEMPERATURE = 6,
 };
 // dimensionality reduction methods, used by cvector-generator
 enum dimre_method {
    DIMRE_METHOD_PCA,
    DIMRE_METHOD_MEAN,
 };
 // sampler parameters
 struct gpt_sampler_params {
    uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
    int32_t n_prev            = 64;    // number of previous tokens to remember
    int32_t n_probs           = 0;     // if greater than 0, output the probabilities of top n_probs tokens.
    int32_t min_keep          = 0;     // 0 = disabled, otherwise samplers should return at least min_keep tokens
    int32_t top_k             = 40;    // <= 0 to use vocab size
    float   top_p             = 0.95f; // 1.0 = disabled
    float   min_p             = 0.05f; // 0.0 = disabled
    float   tfs_z             = 1.00f; // 1.0 = disabled
    float   typ_p             = 1.00f; // typical_p, 1.0 = disabled
    float   temp              = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
    float   dynatemp_range    = 0.00f; // 0.0 = disabled
    float   dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
    int32_t penalty_last_n    = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size)
    float   penalty_repeat    = 1.00f; // 1.0 = disabled
    float   penalty_freq      = 0.00f; // 0.0 = disabled
    float   penalty_present   = 0.00f; // 0.0 = disabled
    int32_t mirostat          = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
    float   mirostat_tau      = 5.00f; // target entropy
    float   mirostat_eta      = 0.10f; // learning rate
    bool    penalize_nl       = false; // consider newlines as a repeatable token
    bool    ignore_eos        = false;
    std::vector<enum gpt_sampler_type> samplers = {
        GPT_SAMPLER_TYPE_TOP_K,
        GPT_SAMPLER_TYPE_TFS_Z,
        GPT_SAMPLER_TYPE_TYPICAL_P,
        GPT_SAMPLER_TYPE_TOP_P,
        GPT_SAMPLER_TYPE_MIN_P,
        GPT_SAMPLER_TYPE_TEMPERATURE
    };
    std::string grammar; // optional BNF-like grammar to constrain sampling
    std::vector<llama_logit_bias> logit_bias; // logit biases to apply
    // print the parameters into a string
    std::string print() const;
 };
 struct gpt_params {
    int32_t n_predict             =    -1; // new tokens to predict
    int32_t n_ctx                 =     0; // context size
@ -123,23 +186,23 @@ struct gpt_params {
    struct gpt_sampler_params sparams;
-    std::string model                = ""; // model path
+    std::string model                = ""; // model path                                                    // NOLINT
-    std::string model_draft          = ""; // draft model for speculative decoding
+    std::string model_draft          = ""; // draft model for speculative decoding                          // NOLINT
-    std::string model_alias          = "unknown"; // model alias
+    std::string model_alias          = "unknown"; // model alias                                            // NOLINT
-    std::string model_url            = ""; // model url to download
+    std::string model_url            = ""; // model url to download                                         // NOLINT
-    std::string hf_token             = ""; // HF token
+    std::string hf_token             = ""; // HF token                                                      // NOLINT
-    std::string hf_repo              = ""; // HF repo
+    std::string hf_repo              = ""; // HF repo                                                       // NOLINT
-    std::string hf_file              = ""; // HF file
+    std::string hf_file              = ""; // HF file                                                       // NOLINT
-    std::string prompt               = "";
+    std::string prompt               = "";                                                                  // NOLINT
-    std::string prompt_file          = ""; // store the external prompt file name
+    std::string prompt_file          = ""; // store the external prompt file name                           // NOLINT
-    std::string path_prompt_cache    = ""; // path to file for saving/loading prompt eval state
+    std::string path_prompt_cache    = ""; // path to file for saving/loading prompt eval state             // NOLINT
-    std::string input_prefix         = ""; // string to prefix user inputs with
+    std::string input_prefix         = ""; // string to prefix user inputs with                             // NOLINT
-    std::string input_suffix         = ""; // string to suffix user inputs with
+    std::string input_suffix         = ""; // string to suffix user inputs with                             // NOLINT
-    std::string logdir               = ""; // directory in which to save YAML log files
+    std::string logdir               = ""; // directory in which to save YAML log files                     // NOLINT
-    std::string lookup_cache_static  = ""; // path of static ngram cache file for lookup decoding
+    std::string lookup_cache_static  = ""; // path of static ngram cache file for lookup decoding           // NOLINT
-    std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
+    std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding          // NOLINT
-    std::string logits_file          = ""; // file for saving *all* logits
+    std::string logits_file          = ""; // file for saving *all* logits                                  // NOLINT
-    std::string rpc_servers          = ""; // comma separated list of RPC servers
+    std::string rpc_servers          = ""; // comma separated list of RPC servers                           // NOLINT
    std::vector<std::string> in_files;   // all input files
    std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
@ -200,7 +263,7 @@ struct gpt_params {
    std::string cache_type_v = "f16"; // KV cache data type for the V
    // multimodal models (see examples/llava)
-    std::string mmproj = "";        // path to multimodal projector
+    std::string mmproj = "";        // path to multimodal projector                                         // NOLINT
    std::vector<std::string> image; // path to image file(s)
    // embedding
@ -216,15 +279,15 @@ struct gpt_params {
    int     n_threads_http = -1;           // number of threads to process HTTP requests (TODO: support threadpool)
    std::string hostname      = "127.0.0.1";
-    std::string public_path   = "";
+    std::string public_path   = "";                                                                         // NOLINT
-    std::string chat_template = "";
+    std::string chat_template = "";                                                                         // NOLINT
-    std::string system_prompt = "";
+    std::string system_prompt = "";                                                                         // NOLINT
    bool enable_chat_template = true;
    std::vector<std::string> api_keys;
-    std::string ssl_file_key  = "";
+    std::string ssl_file_key  = "";                                                                         // NOLINT
-    std::string ssl_file_cert = "";
+    std::string ssl_file_cert = "";                                                                         // NOLINT
    bool endpoint_slots   = true;
    bool endpoint_metrics = false;
--- a/common/sampling.h
+++ b/common/sampling.h
@ -2,61 +2,11 @@
 #include "llama.h"
 #include "common.h"
 #include <string>
 #include <vector>
 enum gpt_sampler_type {
    GPT_SAMPLER_TYPE_NONE        = 0,
    GPT_SAMPLER_TYPE_TOP_K       = 1,
    GPT_SAMPLER_TYPE_TOP_P       = 2,
    GPT_SAMPLER_TYPE_MIN_P       = 3,
    GPT_SAMPLER_TYPE_TFS_Z       = 4,
    GPT_SAMPLER_TYPE_TYPICAL_P   = 5,
    GPT_SAMPLER_TYPE_TEMPERATURE = 6,
 };
 // sampling parameters
 struct gpt_sampler_params {
    uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
    int32_t n_prev            = 64;    // number of previous tokens to remember
    int32_t n_probs           = 0;     // if greater than 0, output the probabilities of top n_probs tokens.
    int32_t min_keep          = 0;     // 0 = disabled, otherwise samplers should return at least min_keep tokens
    int32_t top_k             = 40;    // <= 0 to use vocab size
    float   top_p             = 0.95f; // 1.0 = disabled
    float   min_p             = 0.05f; // 0.0 = disabled
    float   tfs_z             = 1.00f; // 1.0 = disabled
    float   typ_p             = 1.00f; // typical_p, 1.0 = disabled
    float   temp              = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
    float   dynatemp_range    = 0.00f; // 0.0 = disabled
    float   dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
    int32_t penalty_last_n    = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size)
    float   penalty_repeat    = 1.00f; // 1.0 = disabled
    float   penalty_freq      = 0.00f; // 0.0 = disabled
    float   penalty_present   = 0.00f; // 0.0 = disabled
    int32_t mirostat          = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
    float   mirostat_tau      = 5.00f; // target entropy
    float   mirostat_eta      = 0.10f; // learning rate
    bool    penalize_nl       = false; // consider newlines as a repeatable token
    bool    ignore_eos        = false;
    std::vector<enum gpt_sampler_type> samplers = {
        GPT_SAMPLER_TYPE_TOP_K,
        GPT_SAMPLER_TYPE_TFS_Z,
        GPT_SAMPLER_TYPE_TYPICAL_P,
        GPT_SAMPLER_TYPE_TOP_P,
        GPT_SAMPLER_TYPE_MIN_P,
        GPT_SAMPLER_TYPE_TEMPERATURE
    };
    std::string grammar; // optional BNF-like grammar to constrain sampling
    std::vector<llama_logit_bias> logit_bias; // logit biases to apply
    // print the parameters into a string
    std::string print() const;
 };
 // gpt_sampler extends llama_sampler with additional functionality:
 //
 //  - grammar support
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
--- a/examples/cvector-generator/cvector-generator.cpp
+++ b/examples/cvector-generator/cvector-generator.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
 #include "ggml.h"
--- a/examples/cvector-generator/pca.hpp
+++ b/examples/cvector-generator/pca.hpp
@ -12,12 +12,9 @@
 #include <cstdio>
 #include <ctime>
 #include <random>
 #include <string>
 #include <tuple>
 #include <vector>
 #include <algorithm>
 #include <iostream>
 #include <fstream>
 #define DEBUG_POS 5
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
--- a/examples/eval-callback/eval-callback.cpp
+++ b/examples/eval-callback/eval-callback.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
 #include "ggml.h"
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "ggml.h"
 #include "ggml-alloc.h"
--- a/examples/gen-docs/gen-docs.cpp
+++ b/examples/gen-docs/gen-docs.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include <fstream>
--- a/examples/gritlm/gritlm.cpp
+++ b/examples/gritlm/gritlm.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@ -1,6 +1,7 @@
 #include "arg.h"
 #include "common.h"
 #include "console.h"
 #include "sampling.h"
 #include "llama.h"
 #include <cassert>
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@ -1,11 +1,12 @@
-#include "ggml.h"
+#include "arg.h"
 #include "base64.hpp"
 #include "log.h"
 #include "common.h"
 #include "sampling.h"
 #include "clip.h"
 #include "llava.h"
 #include "llama.h"
-
+#include "ggml.h"
 #include "base64.hpp"
 #include <cstdio>
 #include <cstdlib>
--- a/examples/llava/minicpmv-cli.cpp
+++ b/examples/llava/minicpmv-cli.cpp
@ -1,9 +1,11 @@
-#include "ggml.h"
+#include "arg.h"
 #include "log.h"
 #include "common.h"
 #include "sampling.h"
 #include "clip.h"
 #include "llava.h"
 #include "llama.h"
 #include "ggml.h"
 #include <cstdio>
 #include <cstdlib>
--- a/examples/lookahead/lookahead.cpp
+++ b/examples/lookahead/lookahead.cpp
@ -1,4 +1,6 @@
 #include "arg.h"
 #include "common.h"
 #include "sampling.h"
 #include "llama.h"
 #include <cstdio>
--- a/examples/lookup/lookup-create.cpp
+++ b/examples/lookup/lookup-create.cpp
@ -1,7 +1,8 @@
-#include "ggml.h"
+#include "arg.h"
 #include "llama.h"
 #include "common.h"
 #include "ngram-cache.h"
 #include "ggml.h"
 #include "llama.h"
 #include <cstdint>
 #include <fstream>
@ -40,4 +41,6 @@ int main(int argc, char ** argv){
    fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.lookup_cache_static.c_str());
    llama_ngram_cache_save(ngram_cache, params.lookup_cache_static);
    return 0;
 }
--- a/examples/lookup/lookup-stats.cpp
+++ b/examples/lookup/lookup-stats.cpp
@ -1,8 +1,9 @@
-#include "ggml.h"
+#include "arg.h"
 #include "common.h"
 #include "llama.h"
 #include "log.h"
 #include "ngram-cache.h"
 #include "llama.h"
 #include "ggml.h"
 #include <cmath>
 #include <cstdint>
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@ -1,7 +1,9 @@
 #include "arg.h"
 #include "ggml.h"
 #include "llama.h"
 #include "common.h"
 #include "ngram-cache.h"
 #include "sampling.h"
 #include "llama.h"
 #include <cstdint>
 #include <cstdio>
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -1,6 +1,7 @@
 #include "arg.h"
 #include "common.h"
 #include "console.h"
 #include "sampling.h"
 #include "llama.h"
 #include <cassert>
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@ -1,7 +1,9 @@
 // A basic application simulating a server with multiple clients.
 // The clients submit requests to the server and they are processed in parallel.
 #include "arg.h"
 #include "common.h"
 #include "sampling.h"
 #include "llama.h"
 #include <cmath>
--- a/examples/passkey/passkey.cpp
+++ b/examples/passkey/passkey.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@ -1,18 +1,19 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
 #include <array>
 #include <atomic>
 #include <cmath>
 #include <cstdio>
 #include <cstring>
 #include <ctime>
 #include <fstream>
 #include <mutex>
 #include <random>
 #include <sstream>
 #include <thread>
 #include <mutex>
 #include <atomic>
 #include <vector>
 #include <array>
 #include <fstream>
 #include <sstream>
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
--- a/examples/retrieval/retrieval.cpp
+++ b/examples/retrieval/retrieval.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1,6 +1,8 @@
 #include "utils.hpp"
 #include "arg.h"
 #include "common.h"
 #include "sampling.h"
 #include "json-schema-to-grammar.h"
 #include "llama.h"
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@ -1,3 +1,4 @@
 #include "arg.h"
 #include "common.h"
 #include "llama.h"
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@ -1,11 +1,13 @@
 #include "arg.h"
 #include "common.h"
 #include "sampling.h"
 #include "llama.h"
 #include <cmath>
 #include <cstdio>
 #include <string>
 #include <vector>
 #include <set>
 #include <random>
 #define SPEC_VOCAB_MAX_SIZE_DIFFERENCE  100
 #define SPEC_VOCAB_CHECK_START_TOKEN_ID 5
--- a/tests/test-arg-parser.cpp
+++ b/tests/test-arg-parser.cpp
@ -1,3 +1,6 @@
 #include "arg.h"
 #include "common.h"
 #include <string>
 #include <vector>
 #include <sstream>
@ -6,8 +9,6 @@
 #undef NDEBUG
 #include <cassert>
 #include "common.h"
 int main(void) {
    gpt_params params;