typo and comments simple.cpp

This commit is contained in:
SuperUserNameMan 2023-06-14 09:33:31 +02:00 committed by GitHub
parent 7a4f712a29
commit 8f65eecf20
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -79,7 +79,7 @@ int main(int argc, char ** argv)
} }
//--------------------------------- //---------------------------------
// Tokenize the prompt // Tokenize the prompt :
//--------------------------------- //---------------------------------
std::vector<llama_token> tokens_list; std::vector<llama_token> tokens_list;
@ -103,6 +103,7 @@ int main(int argc, char ** argv)
{ {
printf( "%s" , llama_token_to_str( ctx , id ) ); printf( "%s" , llama_token_to_str( ctx , id ) );
} }
fflush(stdout); fflush(stdout);
@ -113,11 +114,10 @@ int main(int argc, char ** argv)
// The LLM keeps a contextual cache memory of previous token evaluation. // The LLM keeps a contextual cache memory of previous token evaluation.
// Usually, once this cache is full, it is required to recompute a compressed context based on previous // Usually, once this cache is full, it is required to recompute a compressed context based on previous
// tokens (see "infinite text generation via context swapping" in the main example), but in this minimalist // tokens (see "infinite text generation via context swapping" in the main example), but in this minimalist
// example, we will just going to stop the loop. // example, we will just stop the loop once this cache is full or once an end of stream is detected.
while ( llama_get_kv_cache_token_count( ctx ) < max_context_size ) while ( llama_get_kv_cache_token_count( ctx ) < max_context_size )
{ {
//--------------------------------- //---------------------------------
// Evaluate the tokens : // Evaluate the tokens :
//--------------------------------- //---------------------------------
@ -137,7 +137,7 @@ int main(int argc, char ** argv)
llama_token new_token_id = 0; llama_token new_token_id = 0;
auto logits = llama_get_logits( ctx ); auto logits = llama_get_logits( ctx );
auto n_vocab = llama_n_vocab( ctx ); auto n_vocab = llama_n_vocab( ctx ); // the size of the LLM vocabulary (in tokens)
std::vector<llama_token_data> candidates; std::vector<llama_token_data> candidates;
candidates.reserve( n_vocab ); candidates.reserve( n_vocab );
@ -150,7 +150,7 @@ int main(int argc, char ** argv)
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false }; llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
// Select it using the "Greedy sampling" method : // Select it using the "Greedy sampling" method :
new_token_id = llama_sample_token_greedy(ctx, &candidates_p); new_token_id = llama_sample_token_greedy( ctx , &candidates_p );
// is it an end of stream ? // is it an end of stream ?
@ -162,14 +162,14 @@ int main(int argc, char ** argv)
// Print the new token : // Print the new token :
printf( "%s" , llama_token_to_str( ctx , new_token_id ) ); printf( "%s" , llama_token_to_str( ctx , new_token_id ) );
fflush(stdout); fflush( stdout );
// Push this new token for next evaluation : // Push this new token for next evaluation :
tokens_list.push_back( new_token_id ); tokens_list.push_back( new_token_id );
} // wend of main loop } // wend of main loop
llama_free(ctx); llama_free( ctx );
return 0; return 0;
} }