llama : add llama_vocab
, functions -> methods, naming (#11110)
* llama : functions -> methods (#11110) * llama : add struct llama_vocab to the API (#11156) ggml-ci * hparams : move vocab params to llama_vocab (#11159) ggml-ci * vocab : more pimpl (#11165) ggml-ci * vocab : minor tokenization optimizations (#11160) ggml-ci Co-authored-by: Diego Devesa <slarengh@gmail.com> * lora : update API names (#11167) ggml-ci * llama : update API names to use correct prefix (#11174) * llama : update API names to use correct prefix ggml-ci * cont ggml-ci * cont ggml-ci * minor [no ci] * vocab : llama_vocab_add_[be]os -> llama_vocab_get_add_[be]os (#11174) ggml-ci * vocab : llama_vocab_n_vocab -> llama_vocab_n_tokens (#11174) ggml-ci --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
parent
c05e8c9934
commit
afa8a9ec9b
68 changed files with 5855 additions and 5400 deletions
|
@ -48,10 +48,12 @@ int main(int argc, char ** argv) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
// tokenize the prompt
|
||||
|
||||
std::vector<llama_token> tokens_list;
|
||||
tokens_list = common_tokenize(model, params.prompt, true);
|
||||
tokens_list = common_tokenize(vocab, params.prompt, true);
|
||||
|
||||
const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size())*n_parallel;
|
||||
|
||||
|
@ -62,7 +64,7 @@ int main(int argc, char ** argv) {
|
|||
ctx_params.n_ctx = n_kv_req;
|
||||
ctx_params.n_batch = std::max(n_predict, n_parallel);
|
||||
|
||||
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
||||
llama_context * ctx = llama_init_from_model(model, ctx_params);
|
||||
|
||||
auto sparams = llama_sampler_chain_default_params();
|
||||
sparams.no_perf = false;
|
||||
|
@ -121,7 +123,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
|
||||
if (decoder_start_token_id == LLAMA_TOKEN_NULL) {
|
||||
decoder_start_token_id = llama_token_bos(model);
|
||||
decoder_start_token_id = llama_vocab_bos(vocab);
|
||||
}
|
||||
|
||||
common_batch_clear(batch);
|
||||
|
@ -174,7 +176,7 @@ int main(int argc, char ** argv) {
|
|||
const llama_token new_token_id = llama_sampler_sample(smpl, ctx, i_batch[i]);
|
||||
|
||||
// is it an end of generation? -> mark the stream as finished
|
||||
if (llama_token_is_eog(model, new_token_id) || n_cur == n_predict) {
|
||||
if (llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_predict) {
|
||||
i_batch[i] = -1;
|
||||
LOG("\n");
|
||||
if (n_parallel > 1) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue