Fixed buffer allocation
This commit is contained in:
parent
028d3f7c89
commit
1103bdb574
2 changed files with 25 additions and 51 deletions
|
@ -139,52 +139,6 @@ void verify_tensor_allocation(struct ggml_context * ctx, ggml_backend_buffer_t b
|
|||
|
||||
int main(int argc, char ** argv) {
|
||||
|
||||
|
||||
// The library allows the user to define a certain function using the available tensor operations. This function
|
||||
// definition is represented internally via a computation graph. Each tensor operation in the function definition
|
||||
// corresponds to a node in the graph. Having the computation graph defined, the user can choose to compute the
|
||||
// function's value and/or its gradient with respect to the input variables. Optionally, the function can be optimized
|
||||
// using one of the available optimization algorithms.
|
||||
//
|
||||
// For example, here we define the function: f(x) = a*x^2 + b
|
||||
|
||||
// memory allocation happens here
|
||||
// Create context allogating memory
|
||||
struct ggml_init_params _params = {
|
||||
.mem_size = 16*1024*1024,
|
||||
.mem_buffer = NULL,
|
||||
.no_alloc = true,
|
||||
};
|
||||
struct ggml_context * _ctx = ggml_init(_params);
|
||||
|
||||
struct ggml_tensor * x = ggml_new_tensor_1d(_ctx, GGML_TYPE_F32, 1);
|
||||
|
||||
// ggml_set_param(_ctx, x); // x is an input variable
|
||||
|
||||
// struct ggml_tensor * a = ggml_new_tensor_1d(_ctx, GGML_TYPE_F32, 1);
|
||||
// struct ggml_tensor * b = ggml_new_tensor_1d(_ctx, GGML_TYPE_F32, 1);
|
||||
// struct ggml_tensor * x2 = ggml_mul(_ctx, x, x);
|
||||
// struct ggml_tensor * f = ggml_add(_ctx, ggml_mul(_ctx, a, x2), b);
|
||||
|
||||
// struct ggml_cgraph * gf = ggml_new_graph(_ctx);
|
||||
|
||||
// // ggml_backend_alloc_ctx_tensors_from_buft(_ctx, ggml_backend_cpu_buffer_type());
|
||||
// // ggml_backend_alloc_ctx_tensors_from_buft(_ctx, ggml_backend_metal_buffer_type());
|
||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(_ctx, ggml_backend_metal_buffer_type());
|
||||
if (buf == nullptr) {
|
||||
throw std::runtime_error("unable to allocate backend buffer");
|
||||
}
|
||||
else {
|
||||
size_t buffer_size = ggml_backend_buft_get_max_size(ggml_backend_metal_buffer_type());
|
||||
|
||||
// Verify tensor allocations
|
||||
verify_tensor_allocation(_ctx, buf, buffer_size);
|
||||
}
|
||||
ggml_used_mem(_ctx);
|
||||
//
|
||||
|
||||
|
||||
|
||||
gpt_params params;
|
||||
g_params = ¶ms;
|
||||
|
||||
|
|
30
llama.cpp
30
llama.cpp
|
@ -150,6 +150,11 @@ struct lora_data {
|
|||
struct lora_info info;
|
||||
std::vector<uint8_t> data;
|
||||
struct ggml_context * ctx;
|
||||
// the backend to perform the computation (CPU, CUDA, METAL)
|
||||
ggml_backend_t backend = NULL;
|
||||
|
||||
// the backend buffer to storage the tensors data of a and b
|
||||
ggml_backend_buffer_t buffer;
|
||||
|
||||
uint32_t lora_r;
|
||||
uint32_t lora_alpha;
|
||||
|
@ -253,9 +258,17 @@ static struct lora_data * load_lora(struct lora_info * info) {
|
|||
struct lora_data * result = new struct lora_data;
|
||||
result->info = *info;
|
||||
result->ctx = NULL;
|
||||
result->backend = NULL;
|
||||
result->buffer = NULL;
|
||||
result->lora_r = 1;
|
||||
result->lora_alpha = 1;
|
||||
|
||||
fprintf(stderr, "%s: using Metal backend\n", __func__);
|
||||
result->backend = ggml_backend_metal_init();
|
||||
if (!result->backend) {
|
||||
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
|
||||
}
|
||||
|
||||
struct llama_file_lora file(info->filename.c_str(), "rb");
|
||||
if (file.fp == NULL) {
|
||||
fprintf(stderr, "warning: Could not open lora adapter '%s'. Ignoring this adapter.\n",
|
||||
|
@ -307,9 +320,10 @@ static struct lora_data * load_lora(struct lora_info * info) {
|
|||
tensors_offset.push_back(offset);
|
||||
file.seek(nbytes, SEEK_CUR);
|
||||
}
|
||||
result->buffer = ggml_backend_alloc_ctx_tensors(result->ctx, result->backend);
|
||||
|
||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx, ggml_backend_metal_buffer_type());
|
||||
if (!buf) {
|
||||
// ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx, ggml_backend_metal_buffer_type());
|
||||
if (!result->buffer) {
|
||||
LLAMA_LOG_ERROR("%s: failed to allocate buffer for lora tensors\n", __func__);
|
||||
}
|
||||
// read tensor data
|
||||
|
@ -321,9 +335,15 @@ static struct lora_data * load_lora(struct lora_info * info) {
|
|||
size_t nbytes = ggml_nbytes(tensor);
|
||||
size_t nbytes_pad = ggml_nbytes_pad(tensor);
|
||||
file.seek(offset, SEEK_SET);
|
||||
tensor->data = result->data.data() + data_offset;
|
||||
file.read_raw(tensor->data, nbytes);
|
||||
data_offset += nbytes_pad;
|
||||
|
||||
std::vector<char> read_buf;
|
||||
read_buf.resize(ggml_nbytes(tensor));
|
||||
file.read_raw(read_buf.data(), ggml_nbytes(tensor));
|
||||
ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor));
|
||||
// tensor_tmp->data = result->data.data() + data_offset;
|
||||
// file.read_raw(tensor_tmp->data, nbytes);
|
||||
// data_offset += nbytes_pad;
|
||||
// ggml_backend_tensor_set(tensor, tensor_tmp->data, 0, ggml_nbytes(tensor));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue