speculative : refactor and add a simpler example (#10362)
* speculative : refactor and add a simpler example ggml-ci * speculative : clean-up and add comments and TODOs [no ci] * speculative : manage context in common_speculative ggml-ci * speculative : simplify ggml-ci * speculative : simplify (cont) ggml-ci * speculative : add --draft-min CLI arg * speculative : minor fixup * make : build fixes * speculative : do not redraft previous drafts ggml-ci * speculative : fix the draft sampling ggml-ci * speculative : fix compile warning * common : refactor args ggml-ci * common : change defaults [no ci] * common : final touches ggml-ci
This commit is contained in:
parent
cce5a90075
commit
d9d54e498d
28 changed files with 1028 additions and 326 deletions
|
@ -21,7 +21,7 @@ int main(int argc, char ** argv){
|
|||
|
||||
common_init();
|
||||
|
||||
const int n_draft = params.n_draft;
|
||||
const int n_draft = params.speculative.n_max;
|
||||
|
||||
// init llama.cpp
|
||||
llama_backend_init();
|
||||
|
@ -40,6 +40,7 @@ int main(int argc, char ** argv){
|
|||
common_ngram_cache ngram_cache_context;
|
||||
common_ngram_cache ngram_cache_dynamic;
|
||||
common_ngram_cache ngram_cache_static;
|
||||
|
||||
int64_t t_draft_flat_us = 0;
|
||||
int64_t t_draft_us = 0;
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ int main(int argc, char ** argv){
|
|||
common_init();
|
||||
|
||||
// max. number of additional tokens to draft if match is found
|
||||
const int n_draft = params.n_draft;
|
||||
const int n_draft = params.speculative.n_max;
|
||||
|
||||
const bool dump_kv_cache = params.dump_kv_cache;
|
||||
|
||||
|
@ -102,7 +102,7 @@ int main(int argc, char ** argv){
|
|||
|
||||
bool has_eos = false;
|
||||
|
||||
struct common_sampler * smpl = common_sampler_init(model, params.sparams);
|
||||
struct common_sampler * smpl = common_sampler_init(model, params.sampling);
|
||||
|
||||
std::vector<llama_token> draft;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue