initial commit, going through initializations

2023-12-04 21:52:17 +01:00 · 2023-12-04 21:52:17 +01:00 · cae8f50b1a
commit cae8f50b1a
parent 23b5e12eb5
3 changed files with 118 additions and 0 deletions
--- a/examples/lookup/CMakeLists.txt
+++ b/examples/lookup/CMakeLists.txt
@ -0,0 +1,5 @@
 set(TARGET lookup)
 add_executable(${TARGET} lookup.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
--- a/examples/lookup/README.md
+++ b/examples/lookup/README.md
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@ -0,0 +1,113 @@
 #include "common.h"
 #include "llama.h"
 #include <cmath>
 #include <cstdio>
 #include <string>
 #include <vector>
 /*
 def find_candidate_pred_tokens(input_ids, max_ngram_size=3, num_pred_tokens=10):
    input_length = input_ids.size(1)
    for ngram_size in range(max_ngram_size, 0, -1):
        # Extract the last n tokens as our search ngram
        ngram = input_ids[0, -ngram_size:].tolist()
        # Create sliding windows of size ngram_size
        windows = input_ids.unfold(dimension=1, size=ngram_size, step=1)
        # Convert ngram to a tensor for comparison
        ngram_tensor = torch.tensor(ngram, device=input_ids.device).unsqueeze(0)
        # Find where the windows match the ngram
        matches = (windows == ngram_tensor).all(dim=2)
        # Get the indices of matches
        match_indices = matches.nonzero(as_tuple=True)[1]
        # Iterate through match indices to find a valid continuation
        for idx in match_indices:
            start_idx = idx + ngram_size
            end_idx = start_idx + num_pred_tokens
            # Ensure we don't go beyond the length of input_ids and avoid self-match
            if end_idx <= input_length and start_idx < input_length - ngram_size:
                return input_ids[0, start_idx:end_idx]
    # If no match is found, return an empty tensor
    return torch.tensor([], dtype=torch.long, device=input_ids.device)
 */
 int main(int argc, char ** argv){
    gpt_params params;
    if(gpt_params_parse(argc, argv, params) == false){
        return 1;
    }
    // maximum n-grams to search for in prompt
    const int max_ngram_size = 3;
    // length of the candidate sequence, if match is found
    const int num_pred_tokens = 10;
 #ifndef LOG_DISABLE_LOGS
    log_set_target(log_filename_generator("lookup", "log"));
    LOG_TEE("Log start\n");
    log_dump_cmdline(argc, argv);
 #endif // LOG_DISABLE_LOGS
    // init llama.cpp
    llama_backend_init(params.numa);
    llama_model * model = NULL;
    llama_context * ctx = NULL;
    // load the model
    std::tie(model, ctx) = llama_init_from_gpt_params(params);
    // tokenize the prompt
    const bool add_bos = llama_should_add_bos_token(model);
    LOG("add_bos tgt: %d\n", add_bos);
    std::vector<llama_token> inp;
    inp = ::llama_tokenize(ctx, params.prompt, add_bos, true);
    const int max_context_size     = llama_n_ctx(ctx);
    const int max_tokens_list_size = max_context_size - 4;
    if ((int) inp.size() > max_tokens_list_size) {
        fprintf(stderr, "%s: error: prompt too long (%d tokens, max %d)\n", __func__, (int) inp.size(), max_tokens_list_size);
        return 1;
    }
    fprintf(stderr, "\n\n");
    for (auto id : inp) {
        fprintf(stderr, "%s", llama_token_to_piece(ctx, id).c_str());
    }
    fflush(stderr);
    const int n_input = inp.size();
    const auto t_enc_start = ggml_time_us();
    llama_decode(ctx, llama_batch_get_one( inp.data(), n_input - 1, 0,           0));
    llama_decode(ctx, llama_batch_get_one(&inp.back(),           1, n_input - 1, 0));
    const auto t_enc_end = ggml_time_us();
    int n_accept  = 0;
    int n_past = inp.size();
    bool has_eos = false;
    struct llama_sampling_context * ctx_sampling = llama_sampling_init(params.sparams);
    const auto t_dec_start = ggml_time_us();
 }