common : more explicit includes
This commit is contained in:
parent
3e03807043
commit
6412a598a1
31 changed files with 169 additions and 152 deletions
|
@ -1,5 +1,7 @@
|
||||||
#include "arg.h"
|
#include "arg.h"
|
||||||
|
|
||||||
|
#include "sampling.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -341,10 +343,6 @@ bool gpt_params_parse(int argc, char ** argv, llama_arg_context & ctx_arg) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex) {
|
|
||||||
return gpt_params_parser_init(params, ex, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
||||||
llama_arg_context ctx_arg(params);
|
llama_arg_context ctx_arg(params);
|
||||||
ctx_arg.print_usage = print_usage;
|
ctx_arg.print_usage = print_usage;
|
||||||
|
|
29
common/arg.h
29
common/arg.h
|
@ -2,37 +2,14 @@
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <set>
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// CLI argument parsing
|
// CLI argument parsing
|
||||||
//
|
//
|
||||||
|
|
||||||
struct gpt_params;
|
|
||||||
|
|
||||||
enum llama_example {
|
|
||||||
LLAMA_EXAMPLE_COMMON,
|
|
||||||
LLAMA_EXAMPLE_SPECULATIVE,
|
|
||||||
LLAMA_EXAMPLE_MAIN,
|
|
||||||
LLAMA_EXAMPLE_INFILL,
|
|
||||||
LLAMA_EXAMPLE_EMBEDDING,
|
|
||||||
LLAMA_EXAMPLE_PERPLEXITY,
|
|
||||||
LLAMA_EXAMPLE_RETRIEVAL,
|
|
||||||
LLAMA_EXAMPLE_PASSKEY,
|
|
||||||
LLAMA_EXAMPLE_IMATRIX,
|
|
||||||
LLAMA_EXAMPLE_BENCH,
|
|
||||||
LLAMA_EXAMPLE_SERVER,
|
|
||||||
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
|
||||||
LLAMA_EXAMPLE_EXPORT_LORA,
|
|
||||||
LLAMA_EXAMPLE_LLAVA,
|
|
||||||
LLAMA_EXAMPLE_LOOKUP,
|
|
||||||
LLAMA_EXAMPLE_PARALLEL,
|
|
||||||
|
|
||||||
LLAMA_EXAMPLE_COUNT,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_arg {
|
struct llama_arg {
|
||||||
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
||||||
std::vector<const char *> args;
|
std::vector<const char *> args;
|
||||||
|
@ -92,10 +69,8 @@ struct llama_arg_context {
|
||||||
llama_arg_context(gpt_params & params) : params(params) {}
|
llama_arg_context(gpt_params & params) : params(params) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
// initialize list of options (arguments) that can be used by the current example
|
|
||||||
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex);
|
|
||||||
// optionally, we can provide "print_usage" to print example usage
|
// optionally, we can provide "print_usage" to print example usage
|
||||||
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **));
|
llama_arg_context gpt_params_parser_init(gpt_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
||||||
|
|
||||||
// parse input arguments from CLI
|
// parse input arguments from CLI
|
||||||
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
||||||
|
|
155
common/common.h
155
common/common.h
|
@ -4,21 +4,11 @@
|
||||||
|
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include "sampling.h"
|
|
||||||
#include "arg.h"
|
|
||||||
|
|
||||||
#define LOG_NO_FILE_LINE_FUNCTION
|
#define LOG_NO_FILE_LINE_FUNCTION
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <random>
|
|
||||||
#include <thread>
|
|
||||||
#include <set>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <tuple>
|
|
||||||
#include <functional>
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define DIRECTORY_SEPARATOR '\\'
|
#define DIRECTORY_SEPARATOR '\\'
|
||||||
|
@ -57,19 +47,6 @@ struct llama_control_vector_load_info;
|
||||||
// CPU utils
|
// CPU utils
|
||||||
//
|
//
|
||||||
|
|
||||||
int32_t cpu_get_num_physical_cores();
|
|
||||||
int32_t cpu_get_num_math();
|
|
||||||
|
|
||||||
//
|
|
||||||
// Common params
|
|
||||||
//
|
|
||||||
|
|
||||||
// dimensionality reduction methods, used by cvector-generator
|
|
||||||
enum dimre_method {
|
|
||||||
DIMRE_METHOD_PCA,
|
|
||||||
DIMRE_METHOD_MEAN,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct cpu_params {
|
struct cpu_params {
|
||||||
int n_threads = -1;
|
int n_threads = -1;
|
||||||
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
||||||
|
@ -79,6 +56,92 @@ struct cpu_params {
|
||||||
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int32_t cpu_get_num_physical_cores();
|
||||||
|
int32_t cpu_get_num_math();
|
||||||
|
|
||||||
|
//
|
||||||
|
// Common params
|
||||||
|
//
|
||||||
|
|
||||||
|
enum llama_example {
|
||||||
|
LLAMA_EXAMPLE_COMMON,
|
||||||
|
LLAMA_EXAMPLE_SPECULATIVE,
|
||||||
|
LLAMA_EXAMPLE_MAIN,
|
||||||
|
LLAMA_EXAMPLE_INFILL,
|
||||||
|
LLAMA_EXAMPLE_EMBEDDING,
|
||||||
|
LLAMA_EXAMPLE_PERPLEXITY,
|
||||||
|
LLAMA_EXAMPLE_RETRIEVAL,
|
||||||
|
LLAMA_EXAMPLE_PASSKEY,
|
||||||
|
LLAMA_EXAMPLE_IMATRIX,
|
||||||
|
LLAMA_EXAMPLE_BENCH,
|
||||||
|
LLAMA_EXAMPLE_SERVER,
|
||||||
|
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
||||||
|
LLAMA_EXAMPLE_EXPORT_LORA,
|
||||||
|
LLAMA_EXAMPLE_LLAVA,
|
||||||
|
LLAMA_EXAMPLE_LOOKUP,
|
||||||
|
LLAMA_EXAMPLE_PARALLEL,
|
||||||
|
|
||||||
|
LLAMA_EXAMPLE_COUNT,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum gpt_sampler_type {
|
||||||
|
GPT_SAMPLER_TYPE_NONE = 0,
|
||||||
|
GPT_SAMPLER_TYPE_TOP_K = 1,
|
||||||
|
GPT_SAMPLER_TYPE_TOP_P = 2,
|
||||||
|
GPT_SAMPLER_TYPE_MIN_P = 3,
|
||||||
|
GPT_SAMPLER_TYPE_TFS_Z = 4,
|
||||||
|
GPT_SAMPLER_TYPE_TYPICAL_P = 5,
|
||||||
|
GPT_SAMPLER_TYPE_TEMPERATURE = 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
// dimensionality reduction methods, used by cvector-generator
|
||||||
|
enum dimre_method {
|
||||||
|
DIMRE_METHOD_PCA,
|
||||||
|
DIMRE_METHOD_MEAN,
|
||||||
|
};
|
||||||
|
|
||||||
|
// sampler parameters
|
||||||
|
struct gpt_sampler_params {
|
||||||
|
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
||||||
|
|
||||||
|
int32_t n_prev = 64; // number of previous tokens to remember
|
||||||
|
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||||
|
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||||
|
int32_t top_k = 40; // <= 0 to use vocab size
|
||||||
|
float top_p = 0.95f; // 1.0 = disabled
|
||||||
|
float min_p = 0.05f; // 0.0 = disabled
|
||||||
|
float tfs_z = 1.00f; // 1.0 = disabled
|
||||||
|
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
||||||
|
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||||
|
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||||
|
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||||
|
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||||
|
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||||
|
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||||
|
float penalty_present = 0.00f; // 0.0 = disabled
|
||||||
|
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||||
|
float mirostat_tau = 5.00f; // target entropy
|
||||||
|
float mirostat_eta = 0.10f; // learning rate
|
||||||
|
bool penalize_nl = false; // consider newlines as a repeatable token
|
||||||
|
bool ignore_eos = false;
|
||||||
|
|
||||||
|
std::vector<enum gpt_sampler_type> samplers = {
|
||||||
|
GPT_SAMPLER_TYPE_TOP_K,
|
||||||
|
GPT_SAMPLER_TYPE_TFS_Z,
|
||||||
|
GPT_SAMPLER_TYPE_TYPICAL_P,
|
||||||
|
GPT_SAMPLER_TYPE_TOP_P,
|
||||||
|
GPT_SAMPLER_TYPE_MIN_P,
|
||||||
|
GPT_SAMPLER_TYPE_TEMPERATURE
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string grammar; // optional BNF-like grammar to constrain sampling
|
||||||
|
|
||||||
|
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
|
||||||
|
|
||||||
|
// print the parameters into a string
|
||||||
|
std::string print() const;
|
||||||
|
};
|
||||||
|
|
||||||
struct gpt_params {
|
struct gpt_params {
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
int32_t n_ctx = 0; // context size
|
int32_t n_ctx = 0; // context size
|
||||||
|
@ -123,23 +186,23 @@ struct gpt_params {
|
||||||
|
|
||||||
struct gpt_sampler_params sparams;
|
struct gpt_sampler_params sparams;
|
||||||
|
|
||||||
std::string model = ""; // model path
|
std::string model = ""; // model path // NOLINT
|
||||||
std::string model_draft = ""; // draft model for speculative decoding
|
std::string model_draft = ""; // draft model for speculative decoding // NOLINT
|
||||||
std::string model_alias = "unknown"; // model alias
|
std::string model_alias = "unknown"; // model alias // NOLINT
|
||||||
std::string model_url = ""; // model url to download
|
std::string model_url = ""; // model url to download // NOLINT
|
||||||
std::string hf_token = ""; // HF token
|
std::string hf_token = ""; // HF token // NOLINT
|
||||||
std::string hf_repo = ""; // HF repo
|
std::string hf_repo = ""; // HF repo // NOLINT
|
||||||
std::string hf_file = ""; // HF file
|
std::string hf_file = ""; // HF file // NOLINT
|
||||||
std::string prompt = "";
|
std::string prompt = ""; // NOLINT
|
||||||
std::string prompt_file = ""; // store the external prompt file name
|
std::string prompt_file = ""; // store the external prompt file name // NOLINT
|
||||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT
|
||||||
std::string input_prefix = ""; // string to prefix user inputs with
|
std::string input_prefix = ""; // string to prefix user inputs with // NOLINT
|
||||||
std::string input_suffix = ""; // string to suffix user inputs with
|
std::string input_suffix = ""; // string to suffix user inputs with // NOLINT
|
||||||
std::string logdir = ""; // directory in which to save YAML log files
|
std::string logdir = ""; // directory in which to save YAML log files // NOLINT
|
||||||
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
|
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT
|
||||||
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
|
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT
|
||||||
std::string logits_file = ""; // file for saving *all* logits
|
std::string logits_file = ""; // file for saving *all* logits // NOLINT
|
||||||
std::string rpc_servers = ""; // comma separated list of RPC servers
|
std::string rpc_servers = ""; // comma separated list of RPC servers // NOLINT
|
||||||
|
|
||||||
std::vector<std::string> in_files; // all input files
|
std::vector<std::string> in_files; // all input files
|
||||||
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
||||||
|
@ -200,7 +263,7 @@ struct gpt_params {
|
||||||
std::string cache_type_v = "f16"; // KV cache data type for the V
|
std::string cache_type_v = "f16"; // KV cache data type for the V
|
||||||
|
|
||||||
// multimodal models (see examples/llava)
|
// multimodal models (see examples/llava)
|
||||||
std::string mmproj = ""; // path to multimodal projector
|
std::string mmproj = ""; // path to multimodal projector // NOLINT
|
||||||
std::vector<std::string> image; // path to image file(s)
|
std::vector<std::string> image; // path to image file(s)
|
||||||
|
|
||||||
// embedding
|
// embedding
|
||||||
|
@ -216,15 +279,15 @@ struct gpt_params {
|
||||||
int n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
int n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
||||||
|
|
||||||
std::string hostname = "127.0.0.1";
|
std::string hostname = "127.0.0.1";
|
||||||
std::string public_path = "";
|
std::string public_path = ""; // NOLINT
|
||||||
std::string chat_template = "";
|
std::string chat_template = ""; // NOLINT
|
||||||
std::string system_prompt = "";
|
std::string system_prompt = ""; // NOLINT
|
||||||
bool enable_chat_template = true;
|
bool enable_chat_template = true;
|
||||||
|
|
||||||
std::vector<std::string> api_keys;
|
std::vector<std::string> api_keys;
|
||||||
|
|
||||||
std::string ssl_file_key = "";
|
std::string ssl_file_key = ""; // NOLINT
|
||||||
std::string ssl_file_cert = "";
|
std::string ssl_file_cert = ""; // NOLINT
|
||||||
|
|
||||||
bool endpoint_slots = true;
|
bool endpoint_slots = true;
|
||||||
bool endpoint_metrics = false;
|
bool endpoint_metrics = false;
|
||||||
|
|
|
@ -2,61 +2,11 @@
|
||||||
|
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
enum gpt_sampler_type {
|
|
||||||
GPT_SAMPLER_TYPE_NONE = 0,
|
|
||||||
GPT_SAMPLER_TYPE_TOP_K = 1,
|
|
||||||
GPT_SAMPLER_TYPE_TOP_P = 2,
|
|
||||||
GPT_SAMPLER_TYPE_MIN_P = 3,
|
|
||||||
GPT_SAMPLER_TYPE_TFS_Z = 4,
|
|
||||||
GPT_SAMPLER_TYPE_TYPICAL_P = 5,
|
|
||||||
GPT_SAMPLER_TYPE_TEMPERATURE = 6,
|
|
||||||
};
|
|
||||||
|
|
||||||
// sampling parameters
|
|
||||||
struct gpt_sampler_params {
|
|
||||||
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
|
||||||
|
|
||||||
int32_t n_prev = 64; // number of previous tokens to remember
|
|
||||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
|
||||||
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
|
||||||
int32_t top_k = 40; // <= 0 to use vocab size
|
|
||||||
float top_p = 0.95f; // 1.0 = disabled
|
|
||||||
float min_p = 0.05f; // 0.0 = disabled
|
|
||||||
float tfs_z = 1.00f; // 1.0 = disabled
|
|
||||||
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
|
||||||
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
|
||||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
|
||||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
|
||||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
|
||||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
|
||||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
|
||||||
float penalty_present = 0.00f; // 0.0 = disabled
|
|
||||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
|
||||||
float mirostat_tau = 5.00f; // target entropy
|
|
||||||
float mirostat_eta = 0.10f; // learning rate
|
|
||||||
bool penalize_nl = false; // consider newlines as a repeatable token
|
|
||||||
bool ignore_eos = false;
|
|
||||||
|
|
||||||
std::vector<enum gpt_sampler_type> samplers = {
|
|
||||||
GPT_SAMPLER_TYPE_TOP_K,
|
|
||||||
GPT_SAMPLER_TYPE_TFS_Z,
|
|
||||||
GPT_SAMPLER_TYPE_TYPICAL_P,
|
|
||||||
GPT_SAMPLER_TYPE_TOP_P,
|
|
||||||
GPT_SAMPLER_TYPE_MIN_P,
|
|
||||||
GPT_SAMPLER_TYPE_TEMPERATURE
|
|
||||||
};
|
|
||||||
|
|
||||||
std::string grammar; // optional BNF-like grammar to constrain sampling
|
|
||||||
|
|
||||||
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
|
|
||||||
|
|
||||||
// print the parameters into a string
|
|
||||||
std::string print() const;
|
|
||||||
};
|
|
||||||
|
|
||||||
// gpt_sampler extends llama_sampler with additional functionality:
|
// gpt_sampler extends llama_sampler with additional functionality:
|
||||||
//
|
//
|
||||||
// - grammar support
|
// - grammar support
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
|
@ -12,12 +12,9 @@
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
|
#include <random>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <tuple>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
#define DEBUG_POS 5
|
#define DEBUG_POS 5
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "ggml-alloc.h"
|
#include "ggml-alloc.h"
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#include "console.h"
|
#include "console.h"
|
||||||
|
#include "sampling.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
#include "ggml.h"
|
#include "arg.h"
|
||||||
|
#include "base64.hpp"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "sampling.h"
|
||||||
#include "clip.h"
|
#include "clip.h"
|
||||||
#include "llava.h"
|
#include "llava.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
#include "ggml.h"
|
||||||
#include "base64.hpp"
|
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
#include "ggml.h"
|
#include "arg.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "sampling.h"
|
||||||
#include "clip.h"
|
#include "clip.h"
|
||||||
#include "llava.h"
|
#include "llava.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "sampling.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
#include "ggml.h"
|
#include "arg.h"
|
||||||
#include "llama.h"
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "ngram-cache.h"
|
#include "ngram-cache.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "llama.h"
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
@ -40,4 +41,6 @@ int main(int argc, char ** argv){
|
||||||
fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.lookup_cache_static.c_str());
|
fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.lookup_cache_static.c_str());
|
||||||
|
|
||||||
llama_ngram_cache_save(ngram_cache, params.lookup_cache_static);
|
llama_ngram_cache_save(ngram_cache, params.lookup_cache_static);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
#include "ggml.h"
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "ngram-cache.h"
|
#include "ngram-cache.h"
|
||||||
|
#include "llama.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "llama.h"
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "ngram-cache.h"
|
#include "ngram-cache.h"
|
||||||
|
#include "sampling.h"
|
||||||
|
#include "llama.h"
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#include "console.h"
|
#include "console.h"
|
||||||
|
#include "sampling.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
// A basic application simulating a server with multiple clients.
|
// A basic application simulating a server with multiple clients.
|
||||||
// The clients submit requests to the server and they are processed in parallel.
|
// The clients submit requests to the server and they are processed in parallel.
|
||||||
|
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "sampling.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,19 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <atomic>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
|
#include <fstream>
|
||||||
|
#include <mutex>
|
||||||
|
#include <random>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <mutex>
|
|
||||||
#include <atomic>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <array>
|
|
||||||
#include <fstream>
|
|
||||||
#include <sstream>
|
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#include "utils.hpp"
|
#include "utils.hpp"
|
||||||
|
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "sampling.h"
|
||||||
#include "json-schema-to-grammar.h"
|
#include "json-schema-to-grammar.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
|
#include "arg.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "sampling.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
#define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 100
|
#define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 100
|
||||||
#define SPEC_VOCAB_CHECK_START_TOKEN_ID 5
|
#define SPEC_VOCAB_CHECK_START_TOKEN_ID 5
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
#include "arg.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
@ -6,8 +9,6 @@
|
||||||
#undef NDEBUG
|
#undef NDEBUG
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue