llama : add llama_vocab
, functions -> methods, naming (#11110)
* llama : functions -> methods (#11110) * llama : add struct llama_vocab to the API (#11156) ggml-ci * hparams : move vocab params to llama_vocab (#11159) ggml-ci * vocab : more pimpl (#11165) ggml-ci * vocab : minor tokenization optimizations (#11160) ggml-ci Co-authored-by: Diego Devesa <slarengh@gmail.com> * lora : update API names (#11167) ggml-ci * llama : update API names to use correct prefix (#11174) * llama : update API names to use correct prefix ggml-ci * cont ggml-ci * cont ggml-ci * minor [no ci] * vocab : llama_vocab_add_[be]os -> llama_vocab_get_add_[be]os (#11174) ggml-ci * vocab : llama_vocab_n_vocab -> llama_vocab_n_tokens (#11174) ggml-ci --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
parent
c05e8c9934
commit
afa8a9ec9b
68 changed files with 5855 additions and 5400 deletions
|
@ -84,6 +84,7 @@ int main(int argc, char ** argv) {
|
|||
model_params.n_gpu_layers = ngl;
|
||||
|
||||
llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
if (model == NULL) {
|
||||
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
|
||||
|
@ -93,11 +94,11 @@ int main(int argc, char ** argv) {
|
|||
// tokenize the prompt
|
||||
|
||||
// find the number of tokens in the prompt
|
||||
const int n_prompt = -llama_tokenize(model, prompt.c_str(), prompt.size(), NULL, 0, true, true);
|
||||
const int n_prompt = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, true, true);
|
||||
|
||||
// allocate space for the tokens and tokenize the prompt
|
||||
std::vector<llama_token> prompt_tokens(n_prompt);
|
||||
if (llama_tokenize(model, prompt.c_str(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), true, true) < 0) {
|
||||
if (llama_tokenize(vocab, prompt.c_str(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), true, true) < 0) {
|
||||
fprintf(stderr, "%s: error: failed to tokenize the prompt\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
@ -112,7 +113,7 @@ int main(int argc, char ** argv) {
|
|||
// enable performance counters
|
||||
ctx_params.no_perf = false;
|
||||
|
||||
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
||||
llama_context * ctx = llama_init_from_model(model, ctx_params);
|
||||
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
|
||||
|
@ -131,7 +132,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
for (auto id : prompt_tokens) {
|
||||
char buf[128];
|
||||
int n = llama_token_to_piece(model, id, buf, sizeof(buf), 0, true);
|
||||
int n = llama_token_to_piece(vocab, id, buf, sizeof(buf), 0, true);
|
||||
if (n < 0) {
|
||||
fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__);
|
||||
return 1;
|
||||
|
@ -164,12 +165,12 @@ int main(int argc, char ** argv) {
|
|||
new_token_id = llama_sampler_sample(smpl, ctx, -1);
|
||||
|
||||
// is it an end of generation?
|
||||
if (llama_token_is_eog(model, new_token_id)) {
|
||||
if (llama_vocab_is_eog(vocab, new_token_id)) {
|
||||
break;
|
||||
}
|
||||
|
||||
char buf[128];
|
||||
int n = llama_token_to_piece(model, new_token_id, buf, sizeof(buf), 0, true);
|
||||
int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true);
|
||||
if (n < 0) {
|
||||
fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__);
|
||||
return 1;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue