diff --git a/examples/low_level_api_llama_cpp.py b/examples/low_level_api_llama_cpp.py index 2a639aad5..b048c0ac8 100644 --- a/examples/low_level_api_llama_cpp.py +++ b/examples/low_level_api_llama_cpp.py @@ -9,7 +9,7 @@ N_THREADS = multiprocessing.cpu_count() prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n" lparams = llama_cpp.llama_context_default_params() -ctx = llama_cpp.llama_init_from_file(b"models/ggml-alpaca-7b-q4.bin", lparams) +ctx = llama_cpp.llama_init_from_file(b"../models/7B/ggml-model.bin", lparams) # determine the required inference memory per token: tmp = [0, 1, 2, 3]