log time measurements

This commit is contained in:
trollkotze 2024-03-26 18:52:31 +01:00
parent d0304f7656
commit bd9f6b9dcf
2 changed files with 27 additions and 3 deletions

View file

@ -2640,6 +2640,8 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
// //
static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) { static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
auto start = ggml_time_ms();
printf("control vector load_one...\n");
int32_t n_tensors; int32_t n_tensors;
size_t n_bytes = 0; size_t n_bytes = 0;
@ -2684,7 +2686,6 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str()); fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
gguf_free(meta_ctx_gguf); gguf_free(meta_ctx_gguf);
ggml_free(meta_ctx); ggml_free(meta_ctx);
return result;
} }
} }
@ -2751,10 +2752,14 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
gguf_free(ctx_gguf); gguf_free(ctx_gguf);
ggml_free(ctx); ggml_free(ctx);
auto end = ggml_time_ms();
printf("control vector load_one took %ums\n", end - start);
return result; return result;
} }
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos) { llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos) {
auto start = ggml_time_ms();
printf("control vector load...\n");
llama_control_vector_data result = { -1, {} }; llama_control_vector_data result = { -1, {} };
for (const auto & info : load_infos) { for (const auto & info : load_infos) {
@ -2764,7 +2769,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
return result; return result;
} }
if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) { if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str()); printf("%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
return result; return result;
} }
@ -2778,8 +2783,10 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
} }
if (result.n_embd == -1) { if (result.n_embd == -1) {
fprintf(stderr, "%s: no vectors passed\n", __func__); printf("%s: no vectors passed\n", __func__);
} }
auto end = ggml_time_ms();
printf("control vector load time: %ums\n", end-start);
return result; return result;
} }

View file

@ -13994,6 +13994,8 @@ int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const
} }
static bool llama_control_vector_init(struct llama_control_vector & cvec, const llama_model & model) { static bool llama_control_vector_init(struct llama_control_vector & cvec, const llama_model & model) {
auto start = ggml_time_ms();
fprintf(stderr, "control vector init...\n");
GGML_ASSERT(cvec.tensors.empty()); GGML_ASSERT(cvec.tensors.empty());
GGML_ASSERT(cvec.ctxs.empty()); GGML_ASSERT(cvec.ctxs.empty());
GGML_ASSERT(cvec.bufs.empty()); GGML_ASSERT(cvec.bufs.empty());
@ -14016,6 +14018,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
ggml_context * ctx = ggml_init(params); ggml_context * ctx = ggml_init(params);
if (!ctx) { if (!ctx) {
LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__); LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);
auto end = ggml_time_ms();
fprintf(stderr, "control vector init took %ums\n", end - start);
return true;
return 1; return 1;
} }
ctx_map[it.first] = ctx; ctx_map[it.first] = ctx;
@ -14036,6 +14041,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft); ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
if (!buf) { if (!buf) {
LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__); LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__);
auto end = ggml_time_ms();
fprintf(stderr, "control vector init took %ums\n", end - start);
return true;
return false; return false;
} }
ggml_backend_buffer_clear(buf, 0); ggml_backend_buffer_clear(buf, 0);
@ -14043,10 +14051,14 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
cvec.bufs.push_back(buf); cvec.bufs.push_back(buf);
} }
auto end = ggml_time_ms();
fprintf(stderr, "control vector init took %ums\n", end - start);
return true; return true;
} }
int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) { int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) {
auto start = ggml_time_ms();
printf("control vector apply...\n");
const llama_model & model = lctx->model; const llama_model & model = lctx->model;
llama_control_vector & cvec = lctx->cvec; llama_control_vector & cvec = lctx->cvec;
@ -14054,6 +14066,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
// disable the current control vector (but leave allocated for later) // disable the current control vector (but leave allocated for later)
cvec.layer_start = -1; cvec.layer_start = -1;
cvec.layer_end = -1; cvec.layer_end = -1;
auto end = ggml_time_ms();
printf("control vector apply took %ums\n", end - start);
return 0; return 0;
} }
@ -14064,6 +14078,7 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
if (cvec.tensors.empty()) { if (cvec.tensors.empty()) {
if (!llama_control_vector_init(cvec, model)) { if (!llama_control_vector_init(cvec, model)) {
LLAMA_LOG_ERROR("%s: control vector init failed\n", __func__);
return 1; return 1;
} }
} }
@ -14080,6 +14095,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
} }
} }
auto end = ggml_time_ms();
printf("control vector apply took %ums\n", end - start);
return 0; return 0;
} }