GGUF: C++ refactor, backend support, misc fixes (#11030)
* GGUF: C++ refactor, backend support, misc fixes remove ggml_tensor.backend update CODEOWNERS [no ci] remove gguf_get_data from API revise GGUF API data types
This commit is contained in:
parent
017cc5f446
commit
53ff6b9b9f
21 changed files with 1795 additions and 1627 deletions
|
@ -1,4 +1,6 @@
|
|||
#include "ggml.h"
|
||||
#include "gguf.h"
|
||||
|
||||
#include "llama.h"
|
||||
#include "common.h"
|
||||
#include "log.h"
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
#include "ggml.h"
|
||||
#include "gguf.h"
|
||||
|
||||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
#include "pca.hpp"
|
||||
#include "mean.hpp"
|
||||
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
#include "gguf.h"
|
||||
|
||||
#include "arg.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "ggml.h"
|
||||
#include "gguf.h"
|
||||
|
||||
#include <cstdlib> /* abort() */
|
||||
#include <cstddef>
|
||||
|
|
|
@ -1,16 +1,18 @@
|
|||
#include "ggml.h"
|
||||
#include "gguf.h"
|
||||
#include "llama.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include <climits>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <climits>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <stdexcept>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
|
@ -296,7 +298,7 @@ struct split_strategy {
|
|||
total_size += ggml_nbytes(t);
|
||||
}
|
||||
total_size = total_size / 1000 / 1000; // convert to megabytes
|
||||
printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
|
||||
printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
|
||||
i_split++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
#include "ggml.h"
|
||||
#include "gguf.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cinttypes>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#undef MIN
|
||||
|
@ -135,9 +134,10 @@ static bool gguf_ex_read_0(const std::string & fname) {
|
|||
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name (ctx, i);
|
||||
const size_t size = gguf_get_tensor_size (ctx, i);
|
||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
||||
|
||||
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
||||
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -182,9 +182,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
|
|||
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name (ctx, i);
|
||||
const size_t size = gguf_get_tensor_size (ctx, i);
|
||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
||||
|
||||
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
||||
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -199,7 +200,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
|
|||
|
||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
||||
|
||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
|
||||
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
|
||||
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
|
||||
|
||||
// print first 10 elements
|
||||
const float * data = (const float *) cur->data;
|
||||
|
@ -215,7 +217,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
|
|||
const float * data = (const float *) cur->data;
|
||||
for (int j = 0; j < ggml_nelements(cur); ++j) {
|
||||
if (data[j] != 100 + i) {
|
||||
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
|
||||
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
|
||||
gguf_free(ctx);
|
||||
return false;
|
||||
}
|
||||
|
@ -245,6 +247,8 @@ int main(int argc, char ** argv) {
|
|||
check_data = false;
|
||||
}
|
||||
|
||||
srand(123456);
|
||||
|
||||
const std::string fname(argv[1]);
|
||||
const std::string mode (argv[2]);
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "ggml-cpu.h"
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "gguf.h"
|
||||
|
||||
//#ifdef GGML_USE_CUDA
|
||||
//#include "ggml-cuda.h"
|
||||
|
@ -262,7 +263,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
|
|||
{
|
||||
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
|
||||
int arr_n = gguf_get_arr_n(ctx_gguf, i);
|
||||
const void * data = gguf_get_arr_data(ctx_gguf, i);
|
||||
const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
|
||||
std::stringstream ss;
|
||||
ss << "[";
|
||||
for (int j = 0; j < arr_n; j++) {
|
||||
|
@ -2734,7 +2735,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
|||
total_size_org += orig_size;
|
||||
total_size_new += new_size;
|
||||
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
|
||||
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
|
||||
GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
|
||||
gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
|
||||
fout.write((const char *)new_data, new_size);
|
||||
size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
|
||||
for (size_t j = 0; j < pad; ++j) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue