Merge branch 'master' into compilade/refactor-kv-cache
This commit is contained in:
commit
10c3c419e9
518 changed files with 78202 additions and 66427 deletions
|
@ -1,4 +1,4 @@
|
|||
set(TARGET gritlm)
|
||||
set(TARGET llama-gritlm)
|
||||
add_executable(${TARGET} gritlm.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
|
|
@ -26,7 +26,7 @@ $ scripts/hf.sh --repo cohesionet/GritLM-7B_gguf --file gritlm-7b_q4_1.gguf --ou
|
|||
|
||||
Run the example using the downloaded model:
|
||||
```console
|
||||
$ ./gritlm -m models/gritlm-7b_q4_1.gguf
|
||||
$ ./llama-gritlm -m models/gritlm-7b_q4_1.gguf
|
||||
|
||||
Cosine similarity between "Bitcoin: A Peer-to-Peer Electronic Cash System" and "A purely peer-to-peer version of electronic cash w" is: 0.605
|
||||
Cosine similarity between "Bitcoin: A Peer-to-Peer Electronic Cash System" and "All text-based language problems can be reduced to" is: 0.103
|
||||
|
|
|
@ -44,6 +44,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
|
|||
|
||||
// clear previous kv_cache values (irrelevant for embeddings)
|
||||
llama_past_clear(ctx);
|
||||
llama_set_embeddings(ctx, true);
|
||||
llama_set_causal_attn(ctx, false);
|
||||
|
||||
// run model
|
||||
|
@ -98,7 +99,9 @@ static std::string generate(llama_context * ctx, const std::string & prompt, boo
|
|||
llama_token eos_token = llama_token_eos(mdl);
|
||||
|
||||
llama_past_clear(ctx);
|
||||
llama_set_embeddings(ctx, false);
|
||||
llama_set_causal_attn(ctx, true);
|
||||
|
||||
llama_batch bat = llama_batch_init(llama_n_batch(ctx), 0, 1);
|
||||
|
||||
std::vector<llama_token> inputs = llama_tokenize(mdl, prompt, false, true);
|
||||
|
@ -166,8 +169,7 @@ int main(int argc, char * argv[]) {
|
|||
|
||||
llama_model * mdl = llama_load_model_from_file(params.model.c_str(), mparams);
|
||||
|
||||
// create new context - set to embedding mode
|
||||
cparams.embeddings = true;
|
||||
// create generation context
|
||||
llama_context * ctx = llama_new_context_with_model(mdl, cparams);
|
||||
|
||||
// ### Embedding/Representation ###
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue