log time measurements
This commit is contained in:
parent
d0304f7656
commit
bd9f6b9dcf
2 changed files with 27 additions and 3 deletions
|
@ -2640,6 +2640,8 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
|
||||||
//
|
//
|
||||||
|
|
||||||
static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
|
static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
|
||||||
|
auto start = ggml_time_ms();
|
||||||
|
printf("control vector load_one...\n");
|
||||||
int32_t n_tensors;
|
int32_t n_tensors;
|
||||||
|
|
||||||
size_t n_bytes = 0;
|
size_t n_bytes = 0;
|
||||||
|
@ -2684,7 +2686,6 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
||||||
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
|
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
|
||||||
gguf_free(meta_ctx_gguf);
|
gguf_free(meta_ctx_gguf);
|
||||||
ggml_free(meta_ctx);
|
ggml_free(meta_ctx);
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2751,10 +2752,14 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
||||||
gguf_free(ctx_gguf);
|
gguf_free(ctx_gguf);
|
||||||
ggml_free(ctx);
|
ggml_free(ctx);
|
||||||
|
|
||||||
|
auto end = ggml_time_ms();
|
||||||
|
printf("control vector load_one took %ums\n", end - start);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos) {
|
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos) {
|
||||||
|
auto start = ggml_time_ms();
|
||||||
|
printf("control vector load...\n");
|
||||||
llama_control_vector_data result = { -1, {} };
|
llama_control_vector_data result = { -1, {} };
|
||||||
|
|
||||||
for (const auto & info : load_infos) {
|
for (const auto & info : load_infos) {
|
||||||
|
@ -2764,7 +2769,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
|
if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
|
||||||
fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
|
printf("%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2778,8 +2783,10 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.n_embd == -1) {
|
if (result.n_embd == -1) {
|
||||||
fprintf(stderr, "%s: no vectors passed\n", __func__);
|
printf("%s: no vectors passed\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto end = ggml_time_ms();
|
||||||
|
printf("control vector load time: %ums\n", end-start);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
17
llama.cpp
17
llama.cpp
|
@ -13994,6 +13994,8 @@ int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool llama_control_vector_init(struct llama_control_vector & cvec, const llama_model & model) {
|
static bool llama_control_vector_init(struct llama_control_vector & cvec, const llama_model & model) {
|
||||||
|
auto start = ggml_time_ms();
|
||||||
|
fprintf(stderr, "control vector init...\n");
|
||||||
GGML_ASSERT(cvec.tensors.empty());
|
GGML_ASSERT(cvec.tensors.empty());
|
||||||
GGML_ASSERT(cvec.ctxs.empty());
|
GGML_ASSERT(cvec.ctxs.empty());
|
||||||
GGML_ASSERT(cvec.bufs.empty());
|
GGML_ASSERT(cvec.bufs.empty());
|
||||||
|
@ -14016,6 +14018,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
|
||||||
ggml_context * ctx = ggml_init(params);
|
ggml_context * ctx = ggml_init(params);
|
||||||
if (!ctx) {
|
if (!ctx) {
|
||||||
LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);
|
LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);
|
||||||
|
auto end = ggml_time_ms();
|
||||||
|
fprintf(stderr, "control vector init took %ums\n", end - start);
|
||||||
|
return true;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
ctx_map[it.first] = ctx;
|
ctx_map[it.first] = ctx;
|
||||||
|
@ -14036,6 +14041,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
|
||||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
|
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
|
||||||
if (!buf) {
|
if (!buf) {
|
||||||
LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__);
|
LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__);
|
||||||
|
auto end = ggml_time_ms();
|
||||||
|
fprintf(stderr, "control vector init took %ums\n", end - start);
|
||||||
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
ggml_backend_buffer_clear(buf, 0);
|
ggml_backend_buffer_clear(buf, 0);
|
||||||
|
@ -14043,10 +14051,14 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
|
||||||
cvec.bufs.push_back(buf);
|
cvec.bufs.push_back(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto end = ggml_time_ms();
|
||||||
|
fprintf(stderr, "control vector init took %ums\n", end - start);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) {
|
int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) {
|
||||||
|
auto start = ggml_time_ms();
|
||||||
|
printf("control vector apply...\n");
|
||||||
const llama_model & model = lctx->model;
|
const llama_model & model = lctx->model;
|
||||||
llama_control_vector & cvec = lctx->cvec;
|
llama_control_vector & cvec = lctx->cvec;
|
||||||
|
|
||||||
|
@ -14054,6 +14066,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
|
||||||
// disable the current control vector (but leave allocated for later)
|
// disable the current control vector (but leave allocated for later)
|
||||||
cvec.layer_start = -1;
|
cvec.layer_start = -1;
|
||||||
cvec.layer_end = -1;
|
cvec.layer_end = -1;
|
||||||
|
auto end = ggml_time_ms();
|
||||||
|
printf("control vector apply took %ums\n", end - start);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14064,6 +14078,7 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
|
||||||
|
|
||||||
if (cvec.tensors.empty()) {
|
if (cvec.tensors.empty()) {
|
||||||
if (!llama_control_vector_init(cvec, model)) {
|
if (!llama_control_vector_init(cvec, model)) {
|
||||||
|
LLAMA_LOG_ERROR("%s: control vector init failed\n", __func__);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14080,6 +14095,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto end = ggml_time_ms();
|
||||||
|
printf("control vector apply took %ums\n", end - start);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue