common : more explicit includes
This commit is contained in:
parent
3e03807043
commit
6412a598a1
31 changed files with 169 additions and 152 deletions
|
@ -1,5 +1,7 @@
|
|||
#include "arg.h"
|
||||
|
||||
#include "sampling.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
@ -341,10 +343,6 @@ bool gpt_params_parse(int argc, char ** argv, llama_arg_context & ctx_arg) {
|
|||
return true;
|
||||
}
|
||||
|
||||
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex) {
|
||||
return gpt_params_parser_init(params, ex, nullptr);
|
||||
}
|
||||
|
||||
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
||||
llama_arg_context ctx_arg(params);
|
||||
ctx_arg.print_usage = print_usage;
|
||||
|
|
29
common/arg.h
29
common/arg.h
|
@ -2,37 +2,14 @@
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
//
|
||||
// CLI argument parsing
|
||||
//
|
||||
|
||||
struct gpt_params;
|
||||
|
||||
enum llama_example {
|
||||
LLAMA_EXAMPLE_COMMON,
|
||||
LLAMA_EXAMPLE_SPECULATIVE,
|
||||
LLAMA_EXAMPLE_MAIN,
|
||||
LLAMA_EXAMPLE_INFILL,
|
||||
LLAMA_EXAMPLE_EMBEDDING,
|
||||
LLAMA_EXAMPLE_PERPLEXITY,
|
||||
LLAMA_EXAMPLE_RETRIEVAL,
|
||||
LLAMA_EXAMPLE_PASSKEY,
|
||||
LLAMA_EXAMPLE_IMATRIX,
|
||||
LLAMA_EXAMPLE_BENCH,
|
||||
LLAMA_EXAMPLE_SERVER,
|
||||
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
||||
LLAMA_EXAMPLE_EXPORT_LORA,
|
||||
LLAMA_EXAMPLE_LLAVA,
|
||||
LLAMA_EXAMPLE_LOOKUP,
|
||||
LLAMA_EXAMPLE_PARALLEL,
|
||||
|
||||
LLAMA_EXAMPLE_COUNT,
|
||||
};
|
||||
|
||||
struct llama_arg {
|
||||
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
||||
std::vector<const char *> args;
|
||||
|
@ -92,10 +69,8 @@ struct llama_arg_context {
|
|||
llama_arg_context(gpt_params & params) : params(params) {}
|
||||
};
|
||||
|
||||
// initialize list of options (arguments) that can be used by the current example
|
||||
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex);
|
||||
// optionally, we can provide "print_usage" to print example usage
|
||||
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **));
|
||||
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
||||
|
||||
// parse input arguments from CLI
|
||||
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
||||
|
|
155
common/common.h
155
common/common.h
|
@ -4,21 +4,11 @@
|
|||
|
||||
#include "llama.h"
|
||||
|
||||
#include "sampling.h"
|
||||
#include "arg.h"
|
||||
|
||||
#define LOG_NO_FILE_LINE_FUNCTION
|
||||
#include "log.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <tuple>
|
||||
#include <functional>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define DIRECTORY_SEPARATOR '\\'
|
||||
|
@ -57,19 +47,6 @@ struct llama_control_vector_load_info;
|
|||
// CPU utils
|
||||
//
|
||||
|
||||
int32_t cpu_get_num_physical_cores();
|
||||
int32_t cpu_get_num_math();
|
||||
|
||||
//
|
||||
// Common params
|
||||
//
|
||||
|
||||
// dimensionality reduction methods, used by cvector-generator
|
||||
enum dimre_method {
|
||||
DIMRE_METHOD_PCA,
|
||||
DIMRE_METHOD_MEAN,
|
||||
};
|
||||
|
||||
struct cpu_params {
|
||||
int n_threads = -1;
|
||||
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
||||
|
@ -79,6 +56,92 @@ struct cpu_params {
|
|||
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
||||
};
|
||||
|
||||
int32_t cpu_get_num_physical_cores();
|
||||
int32_t cpu_get_num_math();
|
||||
|
||||
//
|
||||
// Common params
|
||||
//
|
||||
|
||||
enum llama_example {
|
||||
LLAMA_EXAMPLE_COMMON,
|
||||
LLAMA_EXAMPLE_SPECULATIVE,
|
||||
LLAMA_EXAMPLE_MAIN,
|
||||
LLAMA_EXAMPLE_INFILL,
|
||||
LLAMA_EXAMPLE_EMBEDDING,
|
||||
LLAMA_EXAMPLE_PERPLEXITY,
|
||||
LLAMA_EXAMPLE_RETRIEVAL,
|
||||
LLAMA_EXAMPLE_PASSKEY,
|
||||
LLAMA_EXAMPLE_IMATRIX,
|
||||
LLAMA_EXAMPLE_BENCH,
|
||||
LLAMA_EXAMPLE_SERVER,
|
||||
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
||||
LLAMA_EXAMPLE_EXPORT_LORA,
|
||||
LLAMA_EXAMPLE_LLAVA,
|
||||
LLAMA_EXAMPLE_LOOKUP,
|
||||
LLAMA_EXAMPLE_PARALLEL,
|
||||
|
||||
LLAMA_EXAMPLE_COUNT,
|
||||
};
|
||||
|
||||
enum gpt_sampler_type {
|
||||
GPT_SAMPLER_TYPE_NONE = 0,
|
||||
GPT_SAMPLER_TYPE_TOP_K = 1,
|
||||
GPT_SAMPLER_TYPE_TOP_P = 2,
|
||||
GPT_SAMPLER_TYPE_MIN_P = 3,
|
||||
GPT_SAMPLER_TYPE_TFS_Z = 4,
|
||||
GPT_SAMPLER_TYPE_TYPICAL_P = 5,
|
||||
GPT_SAMPLER_TYPE_TEMPERATURE = 6,
|
||||
};
|
||||
|
||||
// dimensionality reduction methods, used by cvector-generator
|
||||
enum dimre_method {
|
||||
DIMRE_METHOD_PCA,
|
||||
DIMRE_METHOD_MEAN,
|
||||
};
|
||||
|
||||
// sampler parameters
|
||||
struct gpt_sampler_params {
|
||||
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
||||
|
||||
int32_t n_prev = 64; // number of previous tokens to remember
|
||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||
int32_t top_k = 40; // <= 0 to use vocab size
|
||||
float top_p = 0.95f; // 1.0 = disabled
|
||||
float min_p = 0.05f; // 0.0 = disabled
|
||||
float tfs_z = 1.00f; // 1.0 = disabled
|
||||
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
||||
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||
float penalty_present = 0.00f; // 0.0 = disabled
|
||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||
float mirostat_tau = 5.00f; // target entropy
|
||||
float mirostat_eta = 0.10f; // learning rate
|
||||
bool penalize_nl = false; // consider newlines as a repeatable token
|
||||
bool ignore_eos = false;
|
||||
|
||||
std::vector<enum gpt_sampler_type> samplers = {
|
||||
GPT_SAMPLER_TYPE_TOP_K,
|
||||
GPT_SAMPLER_TYPE_TFS_Z,
|
||||
GPT_SAMPLER_TYPE_TYPICAL_P,
|
||||
GPT_SAMPLER_TYPE_TOP_P,
|
||||
GPT_SAMPLER_TYPE_MIN_P,
|
||||
GPT_SAMPLER_TYPE_TEMPERATURE
|
||||
};
|
||||
|
||||
std::string grammar; // optional BNF-like grammar to constrain sampling
|
||||
|
||||
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
|
||||
|
||||
// print the parameters into a string
|
||||
std::string print() const;
|
||||
};
|
||||
|
||||
struct gpt_params {
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_ctx = 0; // context size
|
||||
|
@ -123,23 +186,23 @@ struct gpt_params {
|
|||
|
||||
struct gpt_sampler_params sparams;
|
||||
|
||||
std::string model = ""; // model path
|
||||
std::string model_draft = ""; // draft model for speculative decoding
|
||||
std::string model_alias = "unknown"; // model alias
|
||||
std::string model_url = ""; // model url to download
|
||||
std::string hf_token = ""; // HF token
|
||||
std::string hf_repo = ""; // HF repo
|
||||
std::string hf_file = ""; // HF file
|
||||
std::string prompt = "";
|
||||
std::string prompt_file = ""; // store the external prompt file name
|
||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
||||
std::string input_prefix = ""; // string to prefix user inputs with
|
||||
std::string input_suffix = ""; // string to suffix user inputs with
|
||||
std::string logdir = ""; // directory in which to save YAML log files
|
||||
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
|
||||
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
|
||||
std::string logits_file = ""; // file for saving *all* logits
|
||||
std::string rpc_servers = ""; // comma separated list of RPC servers
|
||||
std::string model = ""; // model path // NOLINT
|
||||
std::string model_draft = ""; // draft model for speculative decoding // NOLINT
|
||||
std::string model_alias = "unknown"; // model alias // NOLINT
|
||||
std::string model_url = ""; // model url to download // NOLINT
|
||||
std::string hf_token = ""; // HF token // NOLINT
|
||||
std::string hf_repo = ""; // HF repo // NOLINT
|
||||
std::string hf_file = ""; // HF file // NOLINT
|
||||
std::string prompt = ""; // NOLINT
|
||||
std::string prompt_file = ""; // store the external prompt file name // NOLINT
|
||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT
|
||||
std::string input_prefix = ""; // string to prefix user inputs with // NOLINT
|
||||
std::string input_suffix = ""; // string to suffix user inputs with // NOLINT
|
||||
std::string logdir = ""; // directory in which to save YAML log files // NOLINT
|
||||
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT
|
||||
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT
|
||||
std::string logits_file = ""; // file for saving *all* logits // NOLINT
|
||||
std::string rpc_servers = ""; // comma separated list of RPC servers // NOLINT
|
||||
|
||||
std::vector<std::string> in_files; // all input files
|
||||
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
||||
|
@ -200,7 +263,7 @@ struct gpt_params {
|
|||
std::string cache_type_v = "f16"; // KV cache data type for the V
|
||||
|
||||
// multimodal models (see examples/llava)
|
||||
std::string mmproj = ""; // path to multimodal projector
|
||||
std::string mmproj = ""; // path to multimodal projector // NOLINT
|
||||
std::vector<std::string> image; // path to image file(s)
|
||||
|
||||
// embedding
|
||||
|
@ -216,15 +279,15 @@ struct gpt_params {
|
|||
int n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
||||
|
||||
std::string hostname = "127.0.0.1";
|
||||
std::string public_path = "";
|
||||
std::string chat_template = "";
|
||||
std::string system_prompt = "";
|
||||
std::string public_path = ""; // NOLINT
|
||||
std::string chat_template = ""; // NOLINT
|
||||
std::string system_prompt = ""; // NOLINT
|
||||
bool enable_chat_template = true;
|
||||
|
||||
std::vector<std::string> api_keys;
|
||||
|
||||
std::string ssl_file_key = "";
|
||||
std::string ssl_file_cert = "";
|
||||
std::string ssl_file_key = ""; // NOLINT
|
||||
std::string ssl_file_cert = ""; // NOLINT
|
||||
|
||||
bool endpoint_slots = true;
|
||||
bool endpoint_metrics = false;
|
||||
|
|
|
@ -2,61 +2,11 @@
|
|||
|
||||
#include "llama.h"
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
enum gpt_sampler_type {
|
||||
GPT_SAMPLER_TYPE_NONE = 0,
|
||||
GPT_SAMPLER_TYPE_TOP_K = 1,
|
||||
GPT_SAMPLER_TYPE_TOP_P = 2,
|
||||
GPT_SAMPLER_TYPE_MIN_P = 3,
|
||||
GPT_SAMPLER_TYPE_TFS_Z = 4,
|
||||
GPT_SAMPLER_TYPE_TYPICAL_P = 5,
|
||||
GPT_SAMPLER_TYPE_TEMPERATURE = 6,
|
||||
};
|
||||
|
||||
// sampling parameters
|
||||
struct gpt_sampler_params {
|
||||
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
||||
|
||||
int32_t n_prev = 64; // number of previous tokens to remember
|
||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||
int32_t top_k = 40; // <= 0 to use vocab size
|
||||
float top_p = 0.95f; // 1.0 = disabled
|
||||
float min_p = 0.05f; // 0.0 = disabled
|
||||
float tfs_z = 1.00f; // 1.0 = disabled
|
||||
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
||||
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||
float penalty_present = 0.00f; // 0.0 = disabled
|
||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||
float mirostat_tau = 5.00f; // target entropy
|
||||
float mirostat_eta = 0.10f; // learning rate
|
||||
bool penalize_nl = false; // consider newlines as a repeatable token
|
||||
bool ignore_eos = false;
|
||||
|
||||
std::vector<enum gpt_sampler_type> samplers = {
|
||||
GPT_SAMPLER_TYPE_TOP_K,
|
||||
GPT_SAMPLER_TYPE_TFS_Z,
|
||||
GPT_SAMPLER_TYPE_TYPICAL_P,
|
||||
GPT_SAMPLER_TYPE_TOP_P,
|
||||
GPT_SAMPLER_TYPE_MIN_P,
|
||||
GPT_SAMPLER_TYPE_TEMPERATURE
|
||||
};
|
||||
|
||||
std::string grammar; // optional BNF-like grammar to constrain sampling
|
||||
|
||||
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
|
||||
|
||||
// print the parameters into a string
|
||||
std::string print() const;
|
||||
};
|
||||
|
||||
// gpt_sampler extends llama_sampler with additional functionality:
|
||||
//
|
||||
// - grammar support
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
|
|
|
@ -12,12 +12,9 @@
|
|||
|
||||
#include <cstdio>
|
||||
#include <ctime>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#define DEBUG_POS 5
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <fstream>
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
|
||||
#include "console.h"
|
||||
#include "sampling.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cassert>
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
#include "ggml.h"
|
||||
#include "arg.h"
|
||||
#include "base64.hpp"
|
||||
#include "log.h"
|
||||
#include "common.h"
|
||||
#include "sampling.h"
|
||||
#include "clip.h"
|
||||
#include "llava.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include "base64.hpp"
|
||||
#include "ggml.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
#include "ggml.h"
|
||||
#include "arg.h"
|
||||
#include "log.h"
|
||||
#include "common.h"
|
||||
#include "sampling.h"
|
||||
#include "clip.h"
|
||||
#include "llava.h"
|
||||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "sampling.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cstdio>
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "ngram-cache.h"
|
||||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
|
@ -40,4 +41,6 @@ int main(int argc, char ** argv){
|
|||
fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.lookup_cache_static.c_str());
|
||||
|
||||
llama_ngram_cache_save(ngram_cache, params.lookup_cache_static);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
#include "ggml.h"
|
||||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "log.h"
|
||||
#include "ngram-cache.h"
|
||||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
#include "arg.h"
|
||||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
#include "common.h"
|
||||
#include "ngram-cache.h"
|
||||
#include "sampling.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
|
||||
#include "console.h"
|
||||
#include "sampling.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cassert>
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
// A basic application simulating a server with multiple clients.
|
||||
// The clients submit requests to the server and they are processed in parallel.
|
||||
|
||||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "sampling.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cmath>
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,18 +1,19 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#include "utils.hpp"
|
||||
|
||||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "sampling.h"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "sampling.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <random>
|
||||
|
||||
#define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 100
|
||||
#define SPEC_VOCAB_CHECK_START_TOKEN_ID 5
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
@ -6,8 +9,6 @@
|
|||
#undef NDEBUG
|
||||
#include <cassert>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int main(void) {
|
||||
gpt_params params;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue