cmake : re-enable GCC -Wshadow
ggml-ci
This commit is contained in:
parent
34889bf810
commit
439e68c1e5
16 changed files with 73 additions and 65 deletions
|
@ -15,9 +15,12 @@ function(llama_add_compile_flags)
|
||||||
|
|
||||||
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
||||||
|
|
||||||
# GCC -Wshadow is way too agressive
|
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
|
||||||
list(APPEND CXX_FLAGS -Wshadow)
|
list(APPEND CXX_FLAGS -Wshadow)
|
||||||
|
|
||||||
|
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||||
|
list(APPEND CXX_FLAGS -Wshadow -Wshadow-field-in-constructor)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||||
|
|
34
common/arg.h
34
common/arg.h
|
@ -25,33 +25,33 @@ struct common_arg {
|
||||||
void (*handler_int) (common_params & params, int) = nullptr;
|
void (*handler_int) (common_params & params, int) = nullptr;
|
||||||
|
|
||||||
common_arg(
|
common_arg(
|
||||||
const std::initializer_list<const char *> & args,
|
const std::initializer_list<const char *> & args_,
|
||||||
const char * value_hint,
|
const char * value_hint_,
|
||||||
const std::string & help,
|
const std::string & help_,
|
||||||
void (*handler)(common_params & params, const std::string &)
|
void (*handler)(common_params & params, const std::string &)
|
||||||
) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
|
) : args(args_), value_hint(value_hint_), help(help_), handler_string(handler) {}
|
||||||
|
|
||||||
common_arg(
|
common_arg(
|
||||||
const std::initializer_list<const char *> & args,
|
const std::initializer_list<const char *> & args_,
|
||||||
const char * value_hint,
|
const char * value_hint_,
|
||||||
const std::string & help,
|
const std::string & help_,
|
||||||
void (*handler)(common_params & params, int)
|
void (*handler)(common_params & params, int)
|
||||||
) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
|
) : args(args_), value_hint(value_hint_), help(help_), handler_int(handler) {}
|
||||||
|
|
||||||
common_arg(
|
common_arg(
|
||||||
const std::initializer_list<const char *> & args,
|
const std::initializer_list<const char *> & args_,
|
||||||
const std::string & help,
|
const std::string & help_,
|
||||||
void (*handler)(common_params & params)
|
void (*handler)(common_params & params)
|
||||||
) : args(args), help(help), handler_void(handler) {}
|
) : args(args_), help(help_), handler_void(handler) {}
|
||||||
|
|
||||||
// support 2 values for arg
|
// support 2 values for arg
|
||||||
common_arg(
|
common_arg(
|
||||||
const std::initializer_list<const char *> & args,
|
const std::initializer_list<const char *> & args_,
|
||||||
const char * value_hint,
|
const char * value_hint_,
|
||||||
const char * value_hint_2,
|
const char * value_hint_2_,
|
||||||
const std::string & help,
|
const std::string & help_,
|
||||||
void (*handler)(common_params & params, const std::string &, const std::string &)
|
void (*handler)(common_params & params, const std::string &, const std::string &)
|
||||||
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
|
) : args(args_), value_hint(value_hint_), value_hint_2(value_hint_2_), help(help_), handler_str_str(handler) {}
|
||||||
|
|
||||||
common_arg & set_examples(std::initializer_list<enum llama_example> vals);
|
common_arg & set_examples(std::initializer_list<enum llama_example> vals);
|
||||||
common_arg & set_excludes(std::initializer_list<enum llama_example> vals);
|
common_arg & set_excludes(std::initializer_list<enum llama_example> vals);
|
||||||
|
@ -69,7 +69,7 @@ struct common_params_context {
|
||||||
common_params & params;
|
common_params & params;
|
||||||
std::vector<common_arg> options;
|
std::vector<common_arg> options;
|
||||||
void(*print_usage)(int, char **) = nullptr;
|
void(*print_usage)(int, char **) = nullptr;
|
||||||
common_params_context(common_params & params) : params(params) {}
|
common_params_context(common_params & params_) : params(params_) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
// parse input arguments from CLI
|
// parse input arguments from CLI
|
||||||
|
|
|
@ -66,7 +66,7 @@ struct file_input {
|
||||||
float alpha;
|
float alpha;
|
||||||
float scale;
|
float scale;
|
||||||
|
|
||||||
file_input(std::string & fname, float scale): f_in(fname, std::ios::binary), scale(scale) {
|
file_input(std::string & fname, float scale_): f_in(fname, std::ios::binary), scale(scale_) {
|
||||||
if (!f_in.is_open()) {
|
if (!f_in.is_open()) {
|
||||||
throw std::runtime_error("failed to open input gguf from " + fname);
|
throw std::runtime_error("failed to open input gguf from " + fname);
|
||||||
}
|
}
|
||||||
|
@ -131,7 +131,7 @@ struct lora_merge_ctx {
|
||||||
std::string & base_fname,
|
std::string & base_fname,
|
||||||
std::vector<common_adapter_lora_info> & lora_files,
|
std::vector<common_adapter_lora_info> & lora_files,
|
||||||
std::string & outfile,
|
std::string & outfile,
|
||||||
int n_threads) : base_model(base_fname, 0), n_threads(n_threads), fout(outfile, std::ios::binary) {
|
int n_threads_) : base_model(base_fname, 0), n_threads(n_threads_), fout(outfile, std::ios::binary) {
|
||||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||||
|
|
||||||
if (gguf_find_key(base_model.ctx_gguf, LLM_KV_SPLIT_COUNT) >= 0) {
|
if (gguf_find_key(base_model.ctx_gguf, LLM_KV_SPLIT_COUNT) >= 0) {
|
||||||
|
@ -157,7 +157,7 @@ struct lora_merge_ctx {
|
||||||
allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend));
|
allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend));
|
||||||
}
|
}
|
||||||
|
|
||||||
void check_metadata_lora(file_input * adapter) {
|
void check_metadata_lora(const file_input * adapter) const {
|
||||||
auto general_type = get_kv_str(adapter->ctx_gguf, "general.type");
|
auto general_type = get_kv_str(adapter->ctx_gguf, "general.type");
|
||||||
if (general_type != "adapter") {
|
if (general_type != "adapter") {
|
||||||
throw std::runtime_error("expect general.type to be 'adapter', but got: " + general_type);
|
throw std::runtime_error("expect general.type to be 'adapter', but got: " + general_type);
|
||||||
|
@ -175,7 +175,7 @@ struct lora_merge_ctx {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_type get_out_tensor_type(struct ggml_tensor * t) {
|
static ggml_type get_out_tensor_type(struct ggml_tensor * t) {
|
||||||
if (t->type == GGML_TYPE_F32) {
|
if (t->type == GGML_TYPE_F32) {
|
||||||
return GGML_TYPE_F32;
|
return GGML_TYPE_F32;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -204,14 +204,14 @@ struct split_strategy {
|
||||||
// temporary buffer for reading in tensor data
|
// temporary buffer for reading in tensor data
|
||||||
std::vector<uint8_t> read_buf;
|
std::vector<uint8_t> read_buf;
|
||||||
|
|
||||||
split_strategy(const split_params & params,
|
split_strategy(const split_params & params_,
|
||||||
std::ifstream & f_input,
|
std::ifstream & f_input_,
|
||||||
struct gguf_context * ctx_gguf,
|
struct gguf_context * ctx_gguf_,
|
||||||
struct ggml_context * ctx_meta) :
|
struct ggml_context * ctx_meta_) :
|
||||||
params(params),
|
params(params_),
|
||||||
f_input(f_input),
|
f_input(f_input_),
|
||||||
ctx_gguf(ctx_gguf),
|
ctx_gguf(ctx_gguf_),
|
||||||
ctx_meta(ctx_meta),
|
ctx_meta(ctx_meta_),
|
||||||
n_tensors(gguf_get_n_tensors(ctx_gguf)) {
|
n_tensors(gguf_get_n_tensors(ctx_gguf)) {
|
||||||
|
|
||||||
// because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
|
// because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
|
||||||
|
|
|
@ -4,6 +4,11 @@ install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
# TMP
|
||||||
target_compile_options(${TARGET} PRIVATE -Wno-shadow) # TMP
|
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||||
|
target_compile_options(${TARGET} PRIVATE -Wno-shadow)
|
||||||
|
|
||||||
|
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||||
|
target_compile_options(${TARGET} PRIVATE -Wno-shadow-field-in-constructor)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -200,7 +200,7 @@ struct server_task {
|
||||||
// used by SERVER_TASK_TYPE_SET_LORA
|
// used by SERVER_TASK_TYPE_SET_LORA
|
||||||
std::vector<common_adapter_lora_info> set_lora;
|
std::vector<common_adapter_lora_info> set_lora;
|
||||||
|
|
||||||
server_task(server_task_type type) : type(type) {}
|
server_task(server_task_type type_) : type(type_) {}
|
||||||
|
|
||||||
static slot_params params_from_json_cmpl(
|
static slot_params params_from_json_cmpl(
|
||||||
const llama_context * ctx,
|
const llama_context * ctx,
|
||||||
|
|
|
@ -55,7 +55,7 @@ struct llama_adapter_lora_weight {
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_adapter_lora_weight() = default;
|
llama_adapter_lora_weight() = default;
|
||||||
llama_adapter_lora_weight(struct ggml_tensor * a, struct ggml_tensor * b) : a(a), b(b) {}
|
llama_adapter_lora_weight(struct ggml_tensor * a_, struct ggml_tensor * b_) : a(a_), b(b_) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llama_adapter_lora {
|
struct llama_adapter_lora {
|
||||||
|
|
|
@ -1443,7 +1443,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
||||||
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
||||||
};
|
};
|
||||||
|
|
||||||
LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
|
LLM_KV::LLM_KV(llm_arch arch_) : arch(arch_) {}
|
||||||
|
|
||||||
std::string LLM_KV::operator()(llm_kv kv) const {
|
std::string LLM_KV::operator()(llm_kv kv) const {
|
||||||
return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
|
return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
|
||||||
|
|
|
@ -374,7 +374,7 @@ struct LLM_TN_IMPL {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LLM_TN {
|
struct LLM_TN {
|
||||||
LLM_TN(llm_arch arch) : arch(arch) {}
|
LLM_TN(llm_arch arch_) : arch(arch_) {}
|
||||||
|
|
||||||
llm_arch arch;
|
llm_arch arch;
|
||||||
|
|
||||||
|
|
|
@ -15,8 +15,8 @@
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
struct llama_context {
|
struct llama_context {
|
||||||
llama_context(const llama_model & model)
|
llama_context(const llama_model & model_)
|
||||||
: model(model)
|
: model(model_)
|
||||||
, t_start_us(model.t_start_us)
|
, t_start_us(model.t_start_us)
|
||||||
, t_load_us(model.t_load_us) {}
|
, t_load_us(model.t_load_us) {}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ struct llama_logger_state {
|
||||||
|
|
||||||
static llama_logger_state g_logger_state;
|
static llama_logger_state g_logger_state;
|
||||||
|
|
||||||
time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
|
time_meas::time_meas(int64_t & t_acc_, bool disable) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc_) {}
|
||||||
|
|
||||||
time_meas::~time_meas() {
|
time_meas::~time_meas() {
|
||||||
if (t_start_us >= 0) {
|
if (t_start_us >= 0) {
|
||||||
|
|
|
@ -31,7 +31,7 @@ struct llama_model_loader {
|
||||||
|
|
||||||
ggml_tensor * tensor;
|
ggml_tensor * tensor;
|
||||||
|
|
||||||
llama_tensor_weight(const llama_file * file, uint16_t idx, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
|
llama_tensor_weight(const llama_file * file, uint16_t idx_, const struct gguf_context * gguf_ctx, ggml_tensor * tensor_) : idx(idx_), tensor(tensor_) {
|
||||||
const int tensor_idx = gguf_find_tensor(gguf_ctx, ggml_get_name(tensor));
|
const int tensor_idx = gguf_find_tensor(gguf_ctx, ggml_get_name(tensor));
|
||||||
if (tensor_idx < 0) {
|
if (tensor_idx < 0) {
|
||||||
throw std::runtime_error(format("tensor '%s' not found in the model", ggml_get_name(tensor)));
|
throw std::runtime_error(format("tensor '%s' not found in the model", ggml_get_name(tensor)));
|
||||||
|
|
|
@ -369,7 +369,7 @@ struct llama_model::impl {
|
||||||
std::vector<layer_dev> dev_layer;
|
std::vector<layer_dev> dev_layer;
|
||||||
};
|
};
|
||||||
|
|
||||||
llama_model::llama_model(const struct llama_model_params & params) : params(params), pimpl(std::make_unique<impl>()) {
|
llama_model::llama_model(const struct llama_model_params & params_) : params(params_), pimpl(std::make_unique<impl>()) {
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_model::~llama_model() {}
|
llama_model::~llama_model() {}
|
||||||
|
|
|
@ -41,9 +41,9 @@ struct quantize_state_impl {
|
||||||
// used to figure out if a model shares tok_embd with the output weight
|
// used to figure out if a model shares tok_embd with the output weight
|
||||||
bool has_output = false;
|
bool has_output = false;
|
||||||
|
|
||||||
quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params)
|
quantize_state_impl(const llama_model & model_, const llama_model_quantize_params * params_)
|
||||||
: model(model)
|
: model(model_)
|
||||||
, params(params)
|
, params(params_)
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -115,7 +115,7 @@ struct llm_tokenizer_spm : llm_tokenizer {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llm_tokenizer_spm_session {
|
struct llm_tokenizer_spm_session {
|
||||||
llm_tokenizer_spm_session(const llama_vocab & vocab) : vocab(vocab) {}
|
llm_tokenizer_spm_session(const llama_vocab & vocab_) : vocab(vocab_) {}
|
||||||
|
|
||||||
void tokenize(const std::string & text, std::vector<llama_token> & output) {
|
void tokenize(const std::string & text, std::vector<llama_token> & output) {
|
||||||
// split string into utf8 chars
|
// split string into utf8 chars
|
||||||
|
@ -415,7 +415,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llm_tokenizer_bpe_session {
|
struct llm_tokenizer_bpe_session {
|
||||||
llm_tokenizer_bpe_session(const llama_vocab & vocab, const llm_tokenizer_bpe & tokenizer) : vocab(vocab), tokenizer(tokenizer) {}
|
llm_tokenizer_bpe_session(const llama_vocab & vocab_, const llm_tokenizer_bpe & tokenizer_) : vocab(vocab_), tokenizer(tokenizer_) {}
|
||||||
|
|
||||||
static void append(const llama_token token_id, std::vector<llama_token> & output) {
|
static void append(const llama_token token_id, std::vector<llama_token> & output) {
|
||||||
output.push_back(token_id);
|
output.push_back(token_id);
|
||||||
|
@ -603,7 +603,7 @@ struct llm_tokenizer_wpm : llm_tokenizer {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llm_tokenizer_wpm_session {
|
struct llm_tokenizer_wpm_session {
|
||||||
llm_tokenizer_wpm_session(const llama_vocab & vocab) : vocab(vocab) {}
|
llm_tokenizer_wpm_session(const llama_vocab & vocab_) : vocab(vocab_) {}
|
||||||
|
|
||||||
void tokenize(const std::string & text, std::vector<llama_token> & output) {
|
void tokenize(const std::string & text, std::vector<llama_token> & output) {
|
||||||
// normalize and split by whitespace
|
// normalize and split by whitespace
|
||||||
|
@ -782,7 +782,7 @@ struct llm_tokenizer_ugm : llm_tokenizer {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llm_tokenizer_ugm_session {
|
struct llm_tokenizer_ugm_session {
|
||||||
llm_tokenizer_ugm_session(const llama_vocab & vocab, const llm_tokenizer_ugm & tokenizer) : vocab(vocab), tokenizer(tokenizer) {}
|
llm_tokenizer_ugm_session(const llama_vocab & vocab_, const llm_tokenizer_ugm & tokenizer_) : vocab(vocab_), tokenizer(tokenizer_) {}
|
||||||
|
|
||||||
/* This implementation is based on SentencePiece optimized Viterbi algorithm for
|
/* This implementation is based on SentencePiece optimized Viterbi algorithm for
|
||||||
* unigram language models. The general idea is to:
|
* unigram language models. The general idea is to:
|
||||||
|
@ -949,7 +949,7 @@ private:
|
||||||
*/
|
*/
|
||||||
struct xcda_array_view {
|
struct xcda_array_view {
|
||||||
public:
|
public:
|
||||||
xcda_array_view(const uint32_t * xcda_array, size_t xcda_array_size) : xcda_array(xcda_array), xcda_array_size(xcda_array_size) {
|
xcda_array_view(const uint32_t * xcda_array_, size_t xcda_array_size_) : xcda_array(xcda_array_), xcda_array_size(xcda_array_size_) {
|
||||||
}
|
}
|
||||||
uint32_t get_base(size_t index) {
|
uint32_t get_base(size_t index) {
|
||||||
uint32_t packed_node = get_node(index);
|
uint32_t packed_node = get_node(index);
|
||||||
|
@ -1135,7 +1135,7 @@ struct llm_tokenizer_rwkv : llm_tokenizer {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llm_tokenizer_rwkv_session {
|
struct llm_tokenizer_rwkv_session {
|
||||||
llm_tokenizer_rwkv_session(const llama_vocab & vocab, const llm_tokenizer_rwkv & tokenizer) : vocab(vocab), tokenizer(tokenizer) {}
|
llm_tokenizer_rwkv_session(const llama_vocab & vocab_, const llm_tokenizer_rwkv & tokenizer_) : vocab(vocab_), tokenizer(tokenizer_) {}
|
||||||
|
|
||||||
void tokenize(const std::string & text, std::vector<llama_token> & output) {
|
void tokenize(const std::string & text, std::vector<llama_token> & output) {
|
||||||
uint32_t position = 0;
|
uint32_t position = 0;
|
||||||
|
@ -1262,7 +1262,7 @@ struct llama_vocab::impl {
|
||||||
|
|
||||||
std::vector<char> precompiled_charsmap;
|
std::vector<char> precompiled_charsmap;
|
||||||
|
|
||||||
impl(const llama_vocab & vocab) : vocab(vocab) {
|
impl(const llama_vocab & vocab_) : vocab(vocab_) {
|
||||||
}
|
}
|
||||||
|
|
||||||
~impl() = default;
|
~impl() = default;
|
||||||
|
|
|
@ -1089,16 +1089,16 @@ struct llm_build_context {
|
||||||
|
|
||||||
// TODO: consider making the entire interface noexcept
|
// TODO: consider making the entire interface noexcept
|
||||||
llm_build_context(
|
llm_build_context(
|
||||||
llama_context & lctx,
|
llama_context & lctx_,
|
||||||
const llama_ubatch & ubatch,
|
const llama_ubatch & ubatch_,
|
||||||
const llm_build_cb & cb,
|
const llm_build_cb & cb_,
|
||||||
bool worst_case) :
|
bool worst_case) :
|
||||||
model (lctx.model),
|
model (lctx_.model),
|
||||||
lctx (lctx),
|
lctx (lctx_),
|
||||||
hparams (model.hparams),
|
hparams (model.hparams),
|
||||||
cparams (lctx.cparams),
|
cparams (lctx_.cparams),
|
||||||
ubatch (ubatch),
|
ubatch (ubatch_),
|
||||||
kv_self (lctx.kv_self),
|
kv_self (lctx_.kv_self),
|
||||||
n_embd (hparams.n_embd),
|
n_embd (hparams.n_embd),
|
||||||
n_layer (hparams.n_layer),
|
n_layer (hparams.n_layer),
|
||||||
n_rot (hparams.n_rot),
|
n_rot (hparams.n_rot),
|
||||||
|
@ -1119,17 +1119,17 @@ struct llm_build_context {
|
||||||
beta_slow (cparams.yarn_beta_slow),
|
beta_slow (cparams.yarn_beta_slow),
|
||||||
norm_eps (hparams.f_norm_eps),
|
norm_eps (hparams.f_norm_eps),
|
||||||
norm_rms_eps (hparams.f_norm_rms_eps),
|
norm_rms_eps (hparams.f_norm_rms_eps),
|
||||||
n_tokens (ubatch.n_tokens),
|
n_tokens (ubatch_.n_tokens),
|
||||||
n_kv (worst_case ? kv_self.size : kv_self.n),
|
n_kv (worst_case ? kv_self.size : kv_self.n),
|
||||||
n_outputs (worst_case ? n_tokens : lctx.n_outputs),
|
n_outputs (worst_case ? n_tokens : lctx_.n_outputs),
|
||||||
n_outputs_enc (worst_case ? n_tokens : lctx.embd_enc.size() / hparams.n_embd),
|
n_outputs_enc (worst_case ? n_tokens : lctx_.embd_enc.size() / hparams.n_embd),
|
||||||
kv_head (worst_case ? (kv_self.recurrent ? 0 : kv_self.size - n_tokens) : kv_self.head),
|
kv_head (worst_case ? (kv_self.recurrent ? 0 : kv_self.size - n_tokens) : kv_self.head),
|
||||||
n_ctx_orig (cparams.n_ctx_orig_yarn),
|
n_ctx_orig (cparams.n_ctx_orig_yarn),
|
||||||
flash_attn (cparams.flash_attn),
|
flash_attn (cparams.flash_attn),
|
||||||
pooling_type (cparams.pooling_type),
|
pooling_type (cparams.pooling_type),
|
||||||
rope_type (hparams.rope_type),
|
rope_type (hparams.rope_type),
|
||||||
cb (cb),
|
cb (cb_),
|
||||||
buf_compute_meta (lctx.buf_compute_meta) {
|
buf_compute_meta (lctx_.buf_compute_meta) {
|
||||||
// all initializations should be done in init()
|
// all initializations should be done in init()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue