Implement --no-byteswap argument to disable byteswapping on big endian platform
This commit is contained in:
parent
f4217a81fc
commit
a9db9b0048
19 changed files with 88 additions and 46 deletions
|
@ -1438,6 +1438,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_ARG_NO_MMAP"));
|
).set_env("LLAMA_ARG_NO_MMAP"));
|
||||||
|
add_opt(common_arg(
|
||||||
|
{"--no-byteswap"},
|
||||||
|
"don't byteswap model data on big endian systems (use if model is byteswapped to big endian in advance)",
|
||||||
|
[](common_params & params) {
|
||||||
|
params.no_byteswap = true;
|
||||||
|
}
|
||||||
|
).set_env("LLAMA_NO_BYTESWAP"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--numa"}, "TYPE",
|
{"--numa"}, "TYPE",
|
||||||
"attempt optimizations that help on some NUMA systems\n"
|
"attempt optimizations that help on some NUMA systems\n"
|
||||||
|
|
|
@ -987,7 +987,7 @@ struct common_init_result common_init_from_params(common_params & params) {
|
||||||
// load and optionally apply lora adapters
|
// load and optionally apply lora adapters
|
||||||
for (auto & la : params.lora_adapters) {
|
for (auto & la : params.lora_adapters) {
|
||||||
llama_adapter_lora_ptr lora;
|
llama_adapter_lora_ptr lora;
|
||||||
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
|
lora.reset(llama_adapter_lora_init(model, la.path.c_str(), mparams.no_byteswap));
|
||||||
if (lora == nullptr) {
|
if (lora == nullptr) {
|
||||||
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
|
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
|
||||||
llama_free(lctx);
|
llama_free(lctx);
|
||||||
|
@ -1092,6 +1092,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
||||||
mparams.use_mmap = params.use_mmap;
|
mparams.use_mmap = params.use_mmap;
|
||||||
mparams.use_mlock = params.use_mlock;
|
mparams.use_mlock = params.use_mlock;
|
||||||
mparams.check_tensors = params.check_tensors;
|
mparams.check_tensors = params.check_tensors;
|
||||||
|
mparams.no_byteswap = params.no_byteswap;
|
||||||
if (params.kv_overrides.empty()) {
|
if (params.kv_overrides.empty()) {
|
||||||
mparams.kv_overrides = NULL;
|
mparams.kv_overrides = NULL;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1418,8 +1419,9 @@ struct llama_model * common_load_model_from_url(
|
||||||
int n_split = 0;
|
int n_split = 0;
|
||||||
{
|
{
|
||||||
struct gguf_init_params gguf_params = {
|
struct gguf_init_params gguf_params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ NULL,
|
/*.ctx = */ NULL,
|
||||||
|
/*.no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
|
auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
|
||||||
if (!ctx_gguf) {
|
if (!ctx_gguf) {
|
||||||
|
@ -2063,8 +2065,9 @@ static common_control_vector_data common_control_vector_load_one(const common_co
|
||||||
|
|
||||||
ggml_context * ctx = nullptr;
|
ggml_context * ctx = nullptr;
|
||||||
struct gguf_init_params meta_gguf_params = {
|
struct gguf_init_params meta_gguf_params = {
|
||||||
/* .no_alloc = */ false,
|
/* .no_alloc = */ false,
|
||||||
/* .ctx = */ &ctx,
|
/* .ctx = */ &ctx,
|
||||||
|
/* .no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
|
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
|
||||||
if (!ctx_gguf) {
|
if (!ctx_gguf) {
|
||||||
|
|
|
@ -307,6 +307,7 @@ struct common_params {
|
||||||
bool no_kv_offload = false; // disable KV offloading
|
bool no_kv_offload = false; // disable KV offloading
|
||||||
bool warmup = true; // warmup run
|
bool warmup = true; // warmup run
|
||||||
bool check_tensors = false; // validate tensor data
|
bool check_tensors = false; // validate tensor data
|
||||||
|
bool no_byteswap = false; // skip byteswapping on big endian systems
|
||||||
|
|
||||||
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
||||||
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
||||||
|
|
|
@ -533,8 +533,9 @@ static void load_vocab(const char * filename, const Config * config, struct my_l
|
||||||
struct ggml_context * ctx_data = NULL;
|
struct ggml_context * ctx_data = NULL;
|
||||||
|
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ false,
|
/*.no_alloc = */ false,
|
||||||
/*.ctx = */ &ctx_data,
|
/*.ctx = */ &ctx_data,
|
||||||
|
/*.no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gguf_context * ctx = gguf_init_from_file(filename, params);
|
struct gguf_context * ctx = gguf_init_from_file(filename, params);
|
||||||
|
|
|
@ -48,8 +48,9 @@ static std::string ggml_ne_string(const ggml_tensor * t) {
|
||||||
|
|
||||||
static struct gguf_context * load_gguf(std::string & fname, struct ggml_context ** ctx_ggml) {
|
static struct gguf_context * load_gguf(std::string & fname, struct ggml_context ** ctx_ggml) {
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ ctx_ggml,
|
/*.ctx = */ ctx_ggml,
|
||||||
|
/*.no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
|
struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
|
||||||
if (!ctx_gguf) {
|
if (!ctx_gguf) {
|
||||||
|
|
|
@ -288,8 +288,9 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
|
||||||
struct ggml_context * ctx_data = NULL;
|
struct ggml_context * ctx_data = NULL;
|
||||||
|
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ false,
|
/*.no_alloc = */ false,
|
||||||
/*.ctx = */ &ctx_data,
|
/*.ctx = */ &ctx_data,
|
||||||
|
/*.no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
// xxh64 init
|
// xxh64 init
|
||||||
|
|
|
@ -361,8 +361,9 @@ static void gguf_split(const split_params & split_params) {
|
||||||
struct ggml_context * ctx_meta = NULL;
|
struct ggml_context * ctx_meta = NULL;
|
||||||
|
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ &ctx_meta,
|
/*.ctx = */ &ctx_meta,
|
||||||
|
/*.no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
|
std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
|
||||||
|
@ -426,8 +427,9 @@ static void gguf_merge(const split_params & split_params) {
|
||||||
struct ggml_context * ctx_meta = NULL;
|
struct ggml_context * ctx_meta = NULL;
|
||||||
|
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ &ctx_meta,
|
/*.ctx = */ &ctx_meta,
|
||||||
|
/*.no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (i_split > 0) {
|
if (i_split > 0) {
|
||||||
|
|
|
@ -85,8 +85,9 @@ static bool gguf_ex_write(const std::string & fname) {
|
||||||
// just read tensor info
|
// just read tensor info
|
||||||
static bool gguf_ex_read_0(const std::string & fname) {
|
static bool gguf_ex_read_0(const std::string & fname) {
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ false,
|
/*.no_alloc = */ false,
|
||||||
/*.ctx = */ NULL,
|
/*.ctx = */ NULL,
|
||||||
|
/*.no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||||
|
@ -151,8 +152,9 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
|
||||||
struct ggml_context * ctx_data = NULL;
|
struct ggml_context * ctx_data = NULL;
|
||||||
|
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ false,
|
/*.no_alloc = */ false,
|
||||||
/*.ctx = */ &ctx_data,
|
/*.ctx = */ &ctx_data,
|
||||||
|
/*.no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||||
|
|
|
@ -1122,8 +1122,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
struct ggml_context * meta = NULL;
|
struct ggml_context * meta = NULL;
|
||||||
|
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ &meta,
|
/*.ctx = */ &meta,
|
||||||
|
/*.no_byteswap = */ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gguf_context * ctx = gguf_init_from_file(fname, params);
|
struct gguf_context * ctx = gguf_init_from_file(fname, params);
|
||||||
|
|
|
@ -74,6 +74,8 @@ extern "C" {
|
||||||
|
|
||||||
// if not NULL, create a ggml_context and allocate the tensor data in it
|
// if not NULL, create a ggml_context and allocate the tensor data in it
|
||||||
struct ggml_context ** ctx;
|
struct ggml_context ** ctx;
|
||||||
|
|
||||||
|
bool no_byteswap;
|
||||||
};
|
};
|
||||||
|
|
||||||
GGML_API struct gguf_context * gguf_init_empty(void);
|
GGML_API struct gguf_context * gguf_init_empty(void);
|
||||||
|
|
|
@ -218,13 +218,17 @@ struct gguf_context {
|
||||||
|
|
||||||
struct gguf_reader {
|
struct gguf_reader {
|
||||||
FILE * file;
|
FILE * file;
|
||||||
|
bool no_byteswap = false;
|
||||||
|
|
||||||
gguf_reader(FILE * file) : file(file) {}
|
gguf_reader(FILE * file) : file(file) {}
|
||||||
|
gguf_reader(FILE * file, bool v_no_byteswap) : file(file), no_byteswap(v_no_byteswap) {}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool read(T & dst) const {
|
bool read(T & dst) const {
|
||||||
auto res = fread(&dst, 1, sizeof(dst), file);
|
auto res = fread(&dst, 1, sizeof(dst), file);
|
||||||
ggml_convert_from_le(&dst);
|
if (!no_byteswap) {
|
||||||
|
ggml_convert_from_le(&dst);
|
||||||
|
}
|
||||||
return res == sizeof(dst);
|
return res == sizeof(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -319,7 +323,7 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
|
||||||
}
|
}
|
||||||
|
|
||||||
struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
|
struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
|
||||||
const struct gguf_reader gr(file);
|
const struct gguf_reader gr(file, params.no_byteswap);
|
||||||
struct gguf_context * ctx = new gguf_context;
|
struct gguf_context * ctx = new gguf_context;
|
||||||
|
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
|
@ -1141,6 +1145,7 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
|
||||||
|
|
||||||
struct gguf_writer {
|
struct gguf_writer {
|
||||||
std::vector<int8_t> & buf;
|
std::vector<int8_t> & buf;
|
||||||
|
bool no_byteswap = false;
|
||||||
|
|
||||||
gguf_writer(std::vector<int8_t> & buf) : buf(buf) {}
|
gguf_writer(std::vector<int8_t> & buf) : buf(buf) {}
|
||||||
|
|
||||||
|
@ -1150,7 +1155,11 @@ struct gguf_writer {
|
||||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]);
|
buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]);
|
||||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
buf.push_back(reinterpret_cast<const int8_t *>(&val)[sizeof(val) - i - 1]);
|
if (!no_byteswap) {
|
||||||
|
buf.push_back(reinterpret_cast<const int8_t *>(&val)[sizeof(val) - i - 1]);
|
||||||
|
} else {
|
||||||
|
buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]);
|
||||||
|
}
|
||||||
#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
#error Unexpected or undefined __BYTE_ORDER__
|
#error Unexpected or undefined __BYTE_ORDER__
|
||||||
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
@ -1321,7 +1330,7 @@ struct gguf_writer {
|
||||||
|
|
||||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
auto byteswap = ggml_get_type_traits(info.t.type)->byteswap;
|
auto byteswap = ggml_get_type_traits(info.t.type)->byteswap;
|
||||||
if (byteswap != nullptr) {
|
if (byteswap != nullptr && !no_byteswap) {
|
||||||
byteswap(buf.data() + offset, ggml_nelements(&(info.t)) / ggml_blck_size(info.t.type));
|
byteswap(buf.data() + offset, ggml_nelements(&(info.t)) / ggml_blck_size(info.t.type));
|
||||||
}
|
}
|
||||||
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
|
|
@ -304,6 +304,7 @@ extern "C" {
|
||||||
bool use_mmap; // use mmap if possible
|
bool use_mmap; // use mmap if possible
|
||||||
bool use_mlock; // force system to keep model in RAM
|
bool use_mlock; // force system to keep model in RAM
|
||||||
bool check_tensors; // validate model tensor data
|
bool check_tensors; // validate model tensor data
|
||||||
|
bool no_byteswap; // don't do byteswap, load pre-byteswapped big endian model on big endian system
|
||||||
};
|
};
|
||||||
|
|
||||||
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
||||||
|
@ -542,7 +543,8 @@ extern "C" {
|
||||||
// Load a LoRA adapter from file
|
// Load a LoRA adapter from file
|
||||||
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
|
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
|
||||||
struct llama_model * model,
|
struct llama_model * model,
|
||||||
const char * path_lora);
|
const char * path_lora,
|
||||||
|
bool no_byteswap);
|
||||||
|
|
||||||
// Manually free a LoRA adapter
|
// Manually free a LoRA adapter
|
||||||
// Note: loaded adapters will be free when the associated model is deleted
|
// Note: loaded adapters will be free when the associated model is deleted
|
||||||
|
|
|
@ -146,13 +146,14 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(struct ggml_tensor *
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void llama_adapter_lora_init_impl(struct llama_model & model, const char * path_lora, struct llama_adapter_lora & adapter) {
|
static void llama_adapter_lora_init_impl(struct llama_model & model, const char * path_lora, struct llama_adapter_lora & adapter, bool no_byteswap) {
|
||||||
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
|
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
|
||||||
|
|
||||||
ggml_context * ctx_init;
|
ggml_context * ctx_init;
|
||||||
struct gguf_init_params meta_gguf_params = {
|
struct gguf_init_params meta_gguf_params = {
|
||||||
/* .no_alloc = */ true,
|
/* .no_alloc = */ true,
|
||||||
/* .ctx = */ &ctx_init,
|
/* .ctx = */ &ctx_init,
|
||||||
|
/* .no_byteswap = */ no_byteswap,
|
||||||
};
|
};
|
||||||
|
|
||||||
gguf_context_ptr ctx_gguf { gguf_init_from_file(path_lora, meta_gguf_params) };
|
gguf_context_ptr ctx_gguf { gguf_init_from_file(path_lora, meta_gguf_params) };
|
||||||
|
@ -327,11 +328,11 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
|
||||||
LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
|
LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct llama_adapter_lora * llama_adapter_lora_init(struct llama_model * model, const char * path_lora) {
|
struct llama_adapter_lora * llama_adapter_lora_init(struct llama_model * model, const char * path_lora, bool no_byteswap) {
|
||||||
struct llama_adapter_lora * adapter = new llama_adapter_lora();
|
struct llama_adapter_lora * adapter = new llama_adapter_lora();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
|
llama_adapter_lora_init_impl(*model, path_lora, *adapter, no_byteswap);
|
||||||
return adapter;
|
return adapter;
|
||||||
} catch (const std::exception & err) {
|
} catch (const std::exception & err) {
|
||||||
LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());
|
LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());
|
||||||
|
|
|
@ -445,7 +445,8 @@ llama_model_loader::llama_model_loader(
|
||||||
std::vector<std::string> & splits,
|
std::vector<std::string> & splits,
|
||||||
bool use_mmap,
|
bool use_mmap,
|
||||||
bool check_tensors,
|
bool check_tensors,
|
||||||
const struct llama_model_kv_override * param_overrides_p) {
|
const struct llama_model_kv_override * param_overrides_p,
|
||||||
|
bool no_byteswap) {
|
||||||
int trace = 0;
|
int trace = 0;
|
||||||
if (getenv("LLAMA_TRACE")) {
|
if (getenv("LLAMA_TRACE")) {
|
||||||
trace = atoi(getenv("LLAMA_TRACE"));
|
trace = atoi(getenv("LLAMA_TRACE"));
|
||||||
|
@ -460,8 +461,9 @@ llama_model_loader::llama_model_loader(
|
||||||
// Load the main GGUF
|
// Load the main GGUF
|
||||||
struct ggml_context * ctx = NULL;
|
struct ggml_context * ctx = NULL;
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ &ctx,
|
/*.ctx = */ &ctx,
|
||||||
|
/*.no_byteswap = */ no_byteswap,
|
||||||
};
|
};
|
||||||
|
|
||||||
meta.reset(gguf_init_from_file(fname.c_str(), params));
|
meta.reset(gguf_init_from_file(fname.c_str(), params));
|
||||||
|
@ -520,8 +522,9 @@ llama_model_loader::llama_model_loader(
|
||||||
const char * fname_split = splits[idx].c_str();
|
const char * fname_split = splits[idx].c_str();
|
||||||
|
|
||||||
struct gguf_init_params split_params = {
|
struct gguf_init_params split_params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ &ctx,
|
/*.ctx = */ &ctx,
|
||||||
|
/*.no_byteswap = */ no_byteswap,
|
||||||
};
|
};
|
||||||
gguf_context_ptr ctx_gguf { gguf_init_from_file(fname_split, split_params) };
|
gguf_context_ptr ctx_gguf { gguf_init_from_file(fname_split, split_params) };
|
||||||
if (!ctx_gguf) {
|
if (!ctx_gguf) {
|
||||||
|
@ -681,8 +684,9 @@ llama_model_loader::llama_model_loader(
|
||||||
use_mmap = false;
|
use_mmap = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
this->use_mmap = use_mmap;
|
this->use_mmap = use_mmap;
|
||||||
this->check_tensors = check_tensors;
|
this->check_tensors = check_tensors;
|
||||||
|
this->no_byteswap = no_byteswap;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string llama_model_loader::get_arch_name() const {
|
std::string llama_model_loader::get_arch_name() const {
|
||||||
|
@ -1027,7 +1031,7 @@ bool llama_model_loader::load_all_data(
|
||||||
|
|
||||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
auto byteswap = ggml_get_type_traits(cur->type)->byteswap;
|
auto byteswap = ggml_get_type_traits(cur->type)->byteswap;
|
||||||
if (byteswap != nullptr) {
|
if (byteswap != nullptr && !no_byteswap) {
|
||||||
byteswap(cur->data, ggml_nelements(cur) / ggml_blck_size(cur->type));
|
byteswap(cur->data, ggml_nelements(cur) / ggml_blck_size(cur->type));
|
||||||
}
|
}
|
||||||
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
@ -1063,7 +1067,7 @@ bool llama_model_loader::load_all_data(
|
||||||
|
|
||||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
auto byteswap = ggml_get_type_traits(cur->type)->byteswap;
|
auto byteswap = ggml_get_type_traits(cur->type)->byteswap;
|
||||||
if (byteswap != nullptr) {
|
if (byteswap != nullptr && !no_byteswap) {
|
||||||
byteswap(read_buf.data(), read_buf.size() / ggml_blck_size(cur->type));
|
byteswap(read_buf.data(), read_buf.size() / ggml_blck_size(cur->type));
|
||||||
}
|
}
|
||||||
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
|
|
@ -70,6 +70,7 @@ struct llama_model_loader {
|
||||||
|
|
||||||
bool use_mmap = false;
|
bool use_mmap = false;
|
||||||
bool check_tensors;
|
bool check_tensors;
|
||||||
|
bool no_byteswap = false;
|
||||||
|
|
||||||
llama_files files;
|
llama_files files;
|
||||||
llama_ftype ftype;
|
llama_ftype ftype;
|
||||||
|
@ -95,7 +96,8 @@ struct llama_model_loader {
|
||||||
std::vector<std::string> & splits, // optional, only need if the split does not follow naming scheme
|
std::vector<std::string> & splits, // optional, only need if the split does not follow naming scheme
|
||||||
bool use_mmap,
|
bool use_mmap,
|
||||||
bool check_tensors,
|
bool check_tensors,
|
||||||
const struct llama_model_kv_override * param_overrides_p);
|
const struct llama_model_kv_override * param_overrides_p,
|
||||||
|
bool no_byteswap);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
typename std::enable_if<std::is_integral<T>::value, bool>::type
|
typename std::enable_if<std::is_integral<T>::value, bool>::type
|
||||||
|
|
|
@ -3768,6 +3768,7 @@ struct llama_model_params llama_model_default_params() {
|
||||||
/*.use_mmap =*/ true,
|
/*.use_mmap =*/ true,
|
||||||
/*.use_mlock =*/ false,
|
/*.use_mlock =*/ false,
|
||||||
/*.check_tensors =*/ false,
|
/*.check_tensors =*/ false,
|
||||||
|
/*.no_byteswap =*/ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
|
|
|
@ -527,7 +527,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> splits = {};
|
std::vector<std::string> splits = {};
|
||||||
llama_model_loader ml(fname_inp, splits, use_mmap, /*check_tensors*/ true, kv_overrides);
|
llama_model_loader ml(fname_inp, splits, use_mmap, /*check_tensors*/ true, kv_overrides, /*no_byteswap*/ false);
|
||||||
ml.init_mappings(false); // no prefetching
|
ml.init_mappings(false); // no prefetching
|
||||||
|
|
||||||
llama_model model(llama_model_default_params());
|
llama_model model(llama_model_default_params());
|
||||||
|
|
|
@ -40,7 +40,7 @@ static int llama_model_load(const std::string & fname, std::vector<std::string>
|
||||||
model.t_start_us = tm.t_start_us;
|
model.t_start_us = tm.t_start_us;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
llama_model_loader ml(fname, splits, params.use_mmap, params.check_tensors, params.kv_overrides);
|
llama_model_loader ml(fname, splits, params.use_mmap, params.check_tensors, params.kv_overrides, params.no_byteswap);
|
||||||
|
|
||||||
ml.print_info();
|
ml.print_info();
|
||||||
|
|
||||||
|
|
|
@ -758,8 +758,9 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
|
||||||
|
|
||||||
struct ggml_context * ctx = nullptr;
|
struct ggml_context * ctx = nullptr;
|
||||||
struct gguf_init_params gguf_params = {
|
struct gguf_init_params gguf_params = {
|
||||||
/*no_alloc =*/ false,
|
/*no_alloc =*/ false,
|
||||||
/*ctx =*/ hft >= offset_has_data ? &ctx : nullptr,
|
/*ctx =*/ hft >= offset_has_data ? &ctx : nullptr,
|
||||||
|
/*no_byteswap =*/ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gguf_context * gguf_ctx = gguf_init_from_file_impl(file, gguf_params);
|
struct gguf_context * gguf_ctx = gguf_init_from_file_impl(file, gguf_params);
|
||||||
|
@ -1154,8 +1155,9 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
|
||||||
|
|
||||||
struct ggml_context * ctx_1 = nullptr;
|
struct ggml_context * ctx_1 = nullptr;
|
||||||
struct gguf_init_params gguf_params = {
|
struct gguf_init_params gguf_params = {
|
||||||
/*no_alloc =*/ false,
|
/*no_alloc =*/ false,
|
||||||
/*ctx =*/ only_meta ? nullptr : &ctx_1,
|
/*ctx =*/ only_meta ? nullptr : &ctx_1,
|
||||||
|
/*no_byteswap =*/ false,
|
||||||
};
|
};
|
||||||
struct gguf_context * gguf_ctx_1 = gguf_init_from_file_impl(file, gguf_params);
|
struct gguf_context * gguf_ctx_1 = gguf_init_from_file_impl(file, gguf_params);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue