log time measurements
This commit is contained in:
parent
d0304f7656
commit
bd9f6b9dcf
2 changed files with 27 additions and 3 deletions
|
@ -2640,6 +2640,8 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
|
|||
//
|
||||
|
||||
static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
|
||||
auto start = ggml_time_ms();
|
||||
printf("control vector load_one...\n");
|
||||
int32_t n_tensors;
|
||||
|
||||
size_t n_bytes = 0;
|
||||
|
@ -2684,7 +2686,6 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
|||
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
|
||||
gguf_free(meta_ctx_gguf);
|
||||
ggml_free(meta_ctx);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2751,10 +2752,14 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
|||
gguf_free(ctx_gguf);
|
||||
ggml_free(ctx);
|
||||
|
||||
auto end = ggml_time_ms();
|
||||
printf("control vector load_one took %ums\n", end - start);
|
||||
return result;
|
||||
}
|
||||
|
||||
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos) {
|
||||
auto start = ggml_time_ms();
|
||||
printf("control vector load...\n");
|
||||
llama_control_vector_data result = { -1, {} };
|
||||
|
||||
for (const auto & info : load_infos) {
|
||||
|
@ -2764,7 +2769,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
|
|||
return result;
|
||||
}
|
||||
if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
|
||||
fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
|
||||
printf("%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -2778,8 +2783,10 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
|
|||
}
|
||||
|
||||
if (result.n_embd == -1) {
|
||||
fprintf(stderr, "%s: no vectors passed\n", __func__);
|
||||
printf("%s: no vectors passed\n", __func__);
|
||||
}
|
||||
|
||||
auto end = ggml_time_ms();
|
||||
printf("control vector load time: %ums\n", end-start);
|
||||
return result;
|
||||
}
|
||||
|
|
17
llama.cpp
17
llama.cpp
|
@ -13994,6 +13994,8 @@ int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const
|
|||
}
|
||||
|
||||
static bool llama_control_vector_init(struct llama_control_vector & cvec, const llama_model & model) {
|
||||
auto start = ggml_time_ms();
|
||||
fprintf(stderr, "control vector init...\n");
|
||||
GGML_ASSERT(cvec.tensors.empty());
|
||||
GGML_ASSERT(cvec.ctxs.empty());
|
||||
GGML_ASSERT(cvec.bufs.empty());
|
||||
|
@ -14016,6 +14018,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
|
|||
ggml_context * ctx = ggml_init(params);
|
||||
if (!ctx) {
|
||||
LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);
|
||||
auto end = ggml_time_ms();
|
||||
fprintf(stderr, "control vector init took %ums\n", end - start);
|
||||
return true;
|
||||
return 1;
|
||||
}
|
||||
ctx_map[it.first] = ctx;
|
||||
|
@ -14036,6 +14041,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
|
|||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
|
||||
if (!buf) {
|
||||
LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__);
|
||||
auto end = ggml_time_ms();
|
||||
fprintf(stderr, "control vector init took %ums\n", end - start);
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
ggml_backend_buffer_clear(buf, 0);
|
||||
|
@ -14043,10 +14051,14 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
|
|||
cvec.bufs.push_back(buf);
|
||||
}
|
||||
|
||||
auto end = ggml_time_ms();
|
||||
fprintf(stderr, "control vector init took %ums\n", end - start);
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) {
|
||||
auto start = ggml_time_ms();
|
||||
printf("control vector apply...\n");
|
||||
const llama_model & model = lctx->model;
|
||||
llama_control_vector & cvec = lctx->cvec;
|
||||
|
||||
|
@ -14054,6 +14066,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
|
|||
// disable the current control vector (but leave allocated for later)
|
||||
cvec.layer_start = -1;
|
||||
cvec.layer_end = -1;
|
||||
auto end = ggml_time_ms();
|
||||
printf("control vector apply took %ums\n", end - start);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -14064,6 +14078,7 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
|
|||
|
||||
if (cvec.tensors.empty()) {
|
||||
if (!llama_control_vector_init(cvec, model)) {
|
||||
LLAMA_LOG_ERROR("%s: control vector init failed\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -14080,6 +14095,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
|
|||
}
|
||||
}
|
||||
|
||||
auto end = ggml_time_ms();
|
||||
printf("control vector apply took %ums\n", end - start);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue