common : use common_ prefix for common library functions (#9805)
* common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
0e9f760eb1
commit
7eee341bee
45 changed files with 1284 additions and 1284 deletions
|
@ -13,13 +13,13 @@
|
|||
#include <vector>
|
||||
|
||||
int main(int argc, char ** argv){
|
||||
gpt_params params;
|
||||
common_params params;
|
||||
|
||||
if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_LOOKUP)) {
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LOOKUP)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
gpt_init();
|
||||
common_init();
|
||||
|
||||
const int n_draft = params.n_draft;
|
||||
|
||||
|
@ -28,18 +28,18 @@ int main(int argc, char ** argv){
|
|||
llama_numa_init(params.numa);
|
||||
|
||||
// load the model
|
||||
llama_init_result llama_init = llama_init_from_gpt_params(params);
|
||||
common_init_result llama_init = common_init_from_params(params);
|
||||
|
||||
llama_model * model = llama_init.model;
|
||||
llama_context * ctx = llama_init.context;
|
||||
|
||||
// tokenize the prompt
|
||||
std::vector<llama_token> inp;
|
||||
inp = ::llama_tokenize(ctx, params.prompt, true, true);
|
||||
inp = common_tokenize(ctx, params.prompt, true, true);
|
||||
|
||||
llama_ngram_cache ngram_cache_context;
|
||||
llama_ngram_cache ngram_cache_dynamic;
|
||||
llama_ngram_cache ngram_cache_static;
|
||||
common_ngram_cache ngram_cache_context;
|
||||
common_ngram_cache ngram_cache_dynamic;
|
||||
common_ngram_cache ngram_cache_static;
|
||||
int64_t t_draft_flat_us = 0;
|
||||
int64_t t_draft_us = 0;
|
||||
|
||||
|
@ -48,7 +48,7 @@ int main(int argc, char ** argv){
|
|||
|
||||
if (!params.lookup_cache_static.empty()) {
|
||||
try {
|
||||
ngram_cache_static = llama_ngram_cache_load(params.lookup_cache_static);
|
||||
ngram_cache_static = common_ngram_cache_load(params.lookup_cache_static);
|
||||
} catch (std::ifstream::failure const &) {
|
||||
LOG_ERR("failed to open static lookup cache: %s", params.lookup_cache_static.c_str());
|
||||
exit(1);
|
||||
|
@ -57,7 +57,7 @@ int main(int argc, char ** argv){
|
|||
|
||||
if (!params.lookup_cache_dynamic.empty()) {
|
||||
try {
|
||||
ngram_cache_dynamic = llama_ngram_cache_load(params.lookup_cache_dynamic);
|
||||
ngram_cache_dynamic = common_ngram_cache_load(params.lookup_cache_dynamic);
|
||||
} catch (std::ifstream::failure const &) {} // if the file does not exist it will simply be created at the end of the program
|
||||
}
|
||||
|
||||
|
@ -86,7 +86,7 @@ int main(int argc, char ** argv){
|
|||
|
||||
{
|
||||
const int64_t t_start_draft_us = ggml_time_us();
|
||||
llama_ngram_cache_draft(pseudo_output, draft, n_draft, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, ngram_cache_context, ngram_cache_dynamic, ngram_cache_static);
|
||||
common_ngram_cache_draft(pseudo_output, draft, n_draft, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, ngram_cache_context, ngram_cache_dynamic, ngram_cache_static);
|
||||
t_draft_us += ggml_time_us() - t_start_draft_us;
|
||||
}
|
||||
|
||||
|
@ -105,7 +105,7 @@ int main(int argc, char ** argv){
|
|||
|
||||
{
|
||||
const int64_t t_start_draft_us = ggml_time_us();
|
||||
llama_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, pseudo_output, 1, false);
|
||||
common_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, pseudo_output, 1, false);
|
||||
t_draft_us += ggml_time_us() - t_start_draft_us;
|
||||
}
|
||||
}
|
||||
|
@ -115,7 +115,7 @@ int main(int argc, char ** argv){
|
|||
pseudo_output.push_back(inp_slice[pseudo_output.size()]);
|
||||
{
|
||||
const int64_t t_start_draft_us = ggml_time_us();
|
||||
llama_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, pseudo_output, 1, false);
|
||||
common_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, pseudo_output, 1, false);
|
||||
t_draft_us += ggml_time_us() - t_start_draft_us;
|
||||
}
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ int main(int argc, char ** argv){
|
|||
}
|
||||
|
||||
// After each chunk, update the dynamic ngram cache with the context ngram cache:
|
||||
llama_ngram_cache_merge(ngram_cache_dynamic, ngram_cache_context);
|
||||
common_ngram_cache_merge(ngram_cache_dynamic, ngram_cache_context);
|
||||
ngram_cache_context.clear();
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue