Remove llama_load_tensor_shard class
This commit is contained in:
parent
e4bb976c25
commit
76752668de
1 changed files with 19 additions and 43 deletions
62
llama.cpp
62
llama.cpp
|
@ -364,23 +364,11 @@ static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml
|
||||||
return size / ggml_blck_size(type);
|
return size / ggml_blck_size(type);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct llama_load_tensor_shard {
|
|
||||||
std::vector<uint32_t> ne;
|
|
||||||
size_t size;
|
|
||||||
enum ggml_type type;
|
|
||||||
size_t file_off;
|
|
||||||
|
|
||||||
void calc_size() {
|
|
||||||
size = llama_calc_tensor_size(ne, type);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_load_tensor {
|
struct llama_load_tensor {
|
||||||
llama_load_tensor_shard first_shard;
|
|
||||||
|
|
||||||
std::string name;
|
std::string name;
|
||||||
enum ggml_type type = GGML_TYPE_F32;
|
enum ggml_type type = GGML_TYPE_F32;
|
||||||
std::vector<uint32_t> ne;
|
std::vector<uint32_t> ne;
|
||||||
|
size_t file_off;
|
||||||
size_t size;
|
size_t size;
|
||||||
struct ggml_tensor * ggml_tensor = NULL;
|
struct ggml_tensor * ggml_tensor = NULL;
|
||||||
uint8_t * data;
|
uint8_t * data;
|
||||||
|
@ -388,20 +376,6 @@ struct llama_load_tensor {
|
||||||
llama_load_tensor(const std::string & name) : name(name) {}
|
llama_load_tensor(const std::string & name) : name(name) {}
|
||||||
|
|
||||||
void calc_all() {
|
void calc_all() {
|
||||||
calc_type();
|
|
||||||
calc_ne();
|
|
||||||
calc_size();
|
|
||||||
}
|
|
||||||
|
|
||||||
void calc_type() {
|
|
||||||
type = first_shard.type;
|
|
||||||
}
|
|
||||||
|
|
||||||
void calc_ne() {
|
|
||||||
ne = first_shard.ne;
|
|
||||||
}
|
|
||||||
|
|
||||||
void calc_size() {
|
|
||||||
size = llama_calc_tensor_size(ne, type);
|
size = llama_calc_tensor_size(ne, type);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -491,17 +465,17 @@ struct llama_file_loader {
|
||||||
}
|
}
|
||||||
void read_tensor_metadata(llama_load_tensors_map & tensors_map) {
|
void read_tensor_metadata(llama_load_tensors_map & tensors_map) {
|
||||||
while (file.tell() < file.size) {
|
while (file.tell() < file.size) {
|
||||||
llama_load_tensor_shard shard;
|
|
||||||
uint32_t n_dims = file.read_u32();
|
uint32_t n_dims = file.read_u32();
|
||||||
uint32_t name_len = file.read_u32();
|
uint32_t name_len = file.read_u32();
|
||||||
shard.type = (enum ggml_type) file.read_u32();
|
ggml_type type = (enum ggml_type) file.read_u32();
|
||||||
shard.ne.resize(n_dims);
|
std::vector<uint32_t> ne;
|
||||||
file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims);
|
ne.resize(n_dims);
|
||||||
|
file.read_raw(ne.data(), sizeof(ne[0]) * n_dims);
|
||||||
std::string name = file.read_string(name_len);
|
std::string name = file.read_string(name_len);
|
||||||
if (n_dims < 1 || n_dims > 2) {
|
if (n_dims < 1 || n_dims > 2) {
|
||||||
throw std::runtime_error(format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims));
|
throw std::runtime_error(format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims));
|
||||||
}
|
}
|
||||||
switch (shard.type) {
|
switch (type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
|
@ -516,7 +490,7 @@ struct llama_file_loader {
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
break;
|
break;
|
||||||
default: {
|
default: {
|
||||||
throw std::runtime_error(format("unrecognized tensor type %u\n", shard.type));
|
throw std::runtime_error(format("unrecognized tensor type %u\n", type));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -525,11 +499,6 @@ struct llama_file_loader {
|
||||||
file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
|
file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
|
||||||
}
|
}
|
||||||
|
|
||||||
shard.file_off = file.tell();
|
|
||||||
|
|
||||||
shard.calc_size();
|
|
||||||
file.seek(shard.size, SEEK_CUR);
|
|
||||||
|
|
||||||
auto it = tensors_map.name_to_idx.find(name);
|
auto it = tensors_map.name_to_idx.find(name);
|
||||||
size_t idx;
|
size_t idx;
|
||||||
if (it != tensors_map.name_to_idx.end()) {
|
if (it != tensors_map.name_to_idx.end()) {
|
||||||
|
@ -539,7 +508,14 @@ struct llama_file_loader {
|
||||||
idx = tensors_map.tensors.size() - 1;
|
idx = tensors_map.tensors.size() - 1;
|
||||||
tensors_map.name_to_idx.emplace(name, idx);
|
tensors_map.name_to_idx.emplace(name, idx);
|
||||||
}
|
}
|
||||||
tensors_map.tensors.at(idx).first_shard = shard;
|
auto tensor = tensors_map.tensors.at(idx);
|
||||||
|
|
||||||
|
tensor.ne = ne;
|
||||||
|
tensor.type = type;
|
||||||
|
tensor.file_off = file.tell();
|
||||||
|
|
||||||
|
tensor.calc_all();
|
||||||
|
file.seek(tensor.size, SEEK_CUR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -633,7 +609,7 @@ struct llama_model_loader {
|
||||||
|
|
||||||
bool alignment_prevents_mmap() {
|
bool alignment_prevents_mmap() {
|
||||||
for (const llama_load_tensor & lt : tensors_map.tensors) {
|
for (const llama_load_tensor & lt : tensors_map.tensors) {
|
||||||
if (lt.first_shard.file_off & 3) {
|
if (lt.file_off & 3) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -646,7 +622,7 @@ struct llama_model_loader {
|
||||||
throw std::runtime_error(std::string("missing tok_embeddings.weight"));
|
throw std::runtime_error(std::string("missing tok_embeddings.weight"));
|
||||||
}
|
}
|
||||||
const llama_load_tensor & lt = tensors_map.tensors.at(it->second);
|
const llama_load_tensor & lt = tensors_map.tensors.at(it->second);
|
||||||
return file_loader->hparams.n_embd / lt.first_shard.ne.at(0);
|
return file_loader->hparams.n_embd / lt.ne.at(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calc_sizes(size_t * ctx_size_p, size_t * mmapped_size_p) const {
|
void calc_sizes(size_t * ctx_size_p, size_t * mmapped_size_p) const {
|
||||||
|
@ -768,10 +744,10 @@ struct llama_model_loader {
|
||||||
|
|
||||||
void load_data_for(llama_load_tensor & lt) {
|
void load_data_for(llama_load_tensor & lt) {
|
||||||
if (use_mmap) {
|
if (use_mmap) {
|
||||||
lt.data = (uint8_t *) mapping->addr + lt.first_shard.file_off;
|
lt.data = (uint8_t *) mapping->addr + lt.file_off;
|
||||||
} else {
|
} else {
|
||||||
llama_file & file = file_loader->file;
|
llama_file & file = file_loader->file;
|
||||||
file.seek(lt.first_shard.file_off, SEEK_SET);
|
file.seek(lt.file_off, SEEK_SET);
|
||||||
file.read_raw(lt.data, lt.size);
|
file.read_raw(lt.data, lt.size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue