common : use common_ prefix for common library functions (#9805)
* common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
0e9f760eb1
commit
7eee341bee
45 changed files with 1284 additions and 1284 deletions
|
@ -15,11 +15,11 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
|
|||
llama_batch batch = llama_batch_init(llama_n_batch(ctx), 0, 1);
|
||||
|
||||
for (uint64_t i = 0; i < sentences.size(); i++) {
|
||||
llama_batch_clear(batch);
|
||||
common_batch_clear(batch);
|
||||
|
||||
const std::string input_string = instruction + sentences[i];
|
||||
|
||||
std::vector<llama_token> inputs = llama_tokenize(model, input_string, true, false);
|
||||
std::vector<llama_token> inputs = common_tokenize(model, input_string, true, false);
|
||||
|
||||
const int32_t n_toks = inputs.size();
|
||||
|
||||
|
@ -28,7 +28,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
|
|||
// inputs.push_back(llama_token_eos(model));
|
||||
|
||||
// we want to ignore instruction tokens for mean pooling
|
||||
const int32_t n_inst = llama_tokenize(model, instruction, true, false).size();
|
||||
const int32_t n_inst = common_tokenize(model, instruction, true, false).size();
|
||||
|
||||
#ifdef GRIT_DEBUG
|
||||
// debug tokens - should be matching as referenced in the GritLM sample
|
||||
|
@ -40,7 +40,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
|
|||
|
||||
// add input to batch (this increments n_tokens)
|
||||
for (int32_t j = 0; j < n_toks; j++) {
|
||||
llama_batch_add(batch, inputs[j], j, { 0 }, j >= n_inst);
|
||||
common_batch_add(batch, inputs[j], j, { 0 }, j >= n_inst);
|
||||
}
|
||||
|
||||
// clear previous kv_cache values (irrelevant for embeddings)
|
||||
|
@ -75,7 +75,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
|
|||
}
|
||||
|
||||
std::vector<float> emb_norm(emb_unorm.size());
|
||||
llama_embd_normalize(emb_unorm.data(), emb_norm.data(), n_embd);
|
||||
common_embd_normalize(emb_unorm.data(), emb_norm.data(), n_embd);
|
||||
result.push_back(emb_norm);
|
||||
|
||||
#ifdef GRIT_DEBUG
|
||||
|
@ -105,16 +105,16 @@ static std::string generate(llama_context * ctx, llama_sampler * smpl, const std
|
|||
|
||||
llama_batch bat = llama_batch_init(llama_n_batch(ctx), 0, 1);
|
||||
|
||||
std::vector<llama_token> inputs = llama_tokenize(model, prompt, false, true);
|
||||
std::vector<llama_token> inputs = common_tokenize(model, prompt, false, true);
|
||||
int32_t i_current_token = 0;
|
||||
|
||||
while (true) {
|
||||
llama_batch_clear(bat);
|
||||
common_batch_clear(bat);
|
||||
{
|
||||
const int32_t n_inputs = inputs.size();
|
||||
|
||||
for (int32_t i = 0; i < n_inputs; i++) {
|
||||
llama_batch_add(bat, inputs[i], i_current_token++, { 0 }, i == n_inputs - 1);
|
||||
common_batch_add(bat, inputs[i], i_current_token++, { 0 }, i == n_inputs - 1);
|
||||
}
|
||||
}
|
||||
inputs.clear();
|
||||
|
@ -127,7 +127,7 @@ static std::string generate(llama_context * ctx, llama_sampler * smpl, const std
|
|||
break;
|
||||
}
|
||||
|
||||
std::string piece = llama_token_to_piece(ctx, token);
|
||||
std::string piece = common_token_to_piece(ctx, token);
|
||||
if (stream) {
|
||||
std::printf("%s", piece.c_str());
|
||||
std::fflush(stdout);
|
||||
|
@ -152,16 +152,16 @@ static std::string gritlm_instruction(const std::string & instruction) {
|
|||
}
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
gpt_params params;
|
||||
common_params params;
|
||||
|
||||
if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
gpt_init();
|
||||
common_init();
|
||||
|
||||
llama_model_params mparams = llama_model_params_from_gpt_params(params);
|
||||
llama_context_params cparams = llama_context_params_from_gpt_params(params);
|
||||
llama_model_params mparams = common_model_params_to_llama(params);
|
||||
llama_context_params cparams = common_context_params_to_llama(params);
|
||||
|
||||
llama_backend_init();
|
||||
|
||||
|
@ -199,10 +199,10 @@ int main(int argc, char * argv[]) {
|
|||
|
||||
const int n_embd = llama_n_embd(model);
|
||||
|
||||
const float cosine_sim_q0_d0 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[0].data(), n_embd);
|
||||
const float cosine_sim_q0_d1 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[1].data(), n_embd);
|
||||
const float cosine_sim_q1_d0 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[0].data(), n_embd);
|
||||
const float cosine_sim_q1_d1 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[1].data(), n_embd);
|
||||
const float cosine_sim_q0_d0 = common_embd_similarity_cos(q_rep[0].data(), d_rep[0].data(), n_embd);
|
||||
const float cosine_sim_q0_d1 = common_embd_similarity_cos(q_rep[0].data(), d_rep[1].data(), n_embd);
|
||||
const float cosine_sim_q1_d0 = common_embd_similarity_cos(q_rep[1].data(), d_rep[0].data(), n_embd);
|
||||
const float cosine_sim_q1_d1 = common_embd_similarity_cos(q_rep[1].data(), d_rep[1].data(), n_embd);
|
||||
|
||||
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[0].c_str(), cosine_sim_q0_d0);
|
||||
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[1].c_str(), cosine_sim_q0_d1);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue