From 8f65eecf20383f716dcc36f558c6924a5f27e21b Mon Sep 17 00:00:00 2001 From: SuperUserNameMan Date: Wed, 14 Jun 2023 09:33:31 +0200 Subject: [PATCH] typo and comments simple.cpp --- examples/simple/simple.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 45ed4b8b9..76f991cdc 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -79,7 +79,7 @@ int main(int argc, char ** argv) } //--------------------------------- - // Tokenize the prompt + // Tokenize the prompt : //--------------------------------- std::vector tokens_list; @@ -103,6 +103,7 @@ int main(int argc, char ** argv) { printf( "%s" , llama_token_to_str( ctx , id ) ); } + fflush(stdout); @@ -113,11 +114,10 @@ int main(int argc, char ** argv) // The LLM keeps a contextual cache memory of previous token evaluation. // Usually, once this cache is full, it is required to recompute a compressed context based on previous // tokens (see "infinite text generation via context swapping" in the main example), but in this minimalist - // example, we will just going to stop the loop. + // example, we will just stop the loop once this cache is full or once an end of stream is detected. while ( llama_get_kv_cache_token_count( ctx ) < max_context_size ) { - //--------------------------------- // Evaluate the tokens : //--------------------------------- @@ -137,7 +137,7 @@ int main(int argc, char ** argv) llama_token new_token_id = 0; auto logits = llama_get_logits( ctx ); - auto n_vocab = llama_n_vocab( ctx ); + auto n_vocab = llama_n_vocab( ctx ); // the size of the LLM vocabulary (in tokens) std::vector candidates; candidates.reserve( n_vocab ); @@ -150,7 +150,7 @@ int main(int argc, char ** argv) llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false }; // Select it using the "Greedy sampling" method : - new_token_id = llama_sample_token_greedy(ctx, &candidates_p); + new_token_id = llama_sample_token_greedy( ctx , &candidates_p ); // is it an end of stream ? @@ -162,14 +162,14 @@ int main(int argc, char ** argv) // Print the new token : printf( "%s" , llama_token_to_str( ctx , new_token_id ) ); - fflush(stdout); + fflush( stdout ); // Push this new token for next evaluation : tokens_list.push_back( new_token_id ); } // wend of main loop - llama_free(ctx); + llama_free( ctx ); return 0; }