From cd097710d783292480dfc39e37ebfd5293e02920 Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 9 Oct 2024 12:50:19 +0200 Subject: [PATCH] update ngram-cache --- common/ngram-cache.cpp | 72 +++++++++++++++---------------- common/ngram-cache.h | 38 ++++++++-------- examples/lookup/lookup-create.cpp | 6 +-- examples/lookup/lookup-merge.cpp | 8 ++-- examples/lookup/lookup-stats.cpp | 18 ++++---- examples/lookup/lookup.cpp | 22 +++++----- 6 files changed, 82 insertions(+), 82 deletions(-) diff --git a/common/ngram-cache.cpp b/common/ngram-cache.cpp index 7953c723e..a9dfb6714 100644 --- a/common/ngram-cache.cpp +++ b/common/ngram-cache.cpp @@ -8,7 +8,7 @@ #include #include -void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, int ngram_max, +void common_ngram_cache_update(common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector & inp, int nnew, bool print_progress) { const int64_t t_start_ms = ggml_time_ms(); const int64_t inp_size = inp.size(); @@ -20,16 +20,16 @@ void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, in const int64_t i_start = std::max(inp_size - nnew, ngram_size); for (int64_t i = i_start; i < inp_size; ++i) { const int64_t ngram_start = i - ngram_size; - llama_ngram ngram(&inp[ngram_start], ngram_size); + common_ngram ngram(&inp[ngram_start], ngram_size); const llama_token token = inp[i]; - llama_ngram_cache::iterator part_it = ngram_cache.find(ngram); + common_ngram_cache::iterator part_it = ngram_cache.find(ngram); if (part_it == ngram_cache.end()) { - llama_ngram_cache_part part; + common_ngram_cache_part part; part.emplace(token, 1); ngram_cache.emplace(ngram, part); } else { - llama_ngram_cache_part::iterator token_count_it = part_it->second.find(token); + common_ngram_cache_part::iterator token_count_it = part_it->second.find(token); if (token_count_it == part_it->second.end()) { part_it->second.emplace(token, 1); } else { @@ -62,12 +62,12 @@ constexpr int draft_min_sample_size_strict[LLAMA_NGRAM_MAX] = { 4, 3, 2, 2}; constexpr int draft_min_percent_strict[LLAMA_NGRAM_MAX] = {75, 66, 66, 66}; // Helper function that tries to draft a token from only the static ngram cache: -static llama_token try_draft(llama_ngram_cache & nc_static, const llama_ngram ngram_static) { - llama_ngram_cache::iterator part_static_it = nc_static.find(ngram_static); +static llama_token try_draft(common_ngram_cache & nc_static, const common_ngram ngram_static) { + common_ngram_cache::iterator part_static_it = nc_static.find(ngram_static); if (part_static_it == nc_static.end()) { return -1; } - const llama_ngram_cache_part part_static = part_static_it->second; + const common_ngram_cache_part part_static = part_static_it->second; int max_count_static = 0; int sum_count_static = 0; @@ -95,19 +95,19 @@ static llama_token try_draft(llama_ngram_cache & nc_static, const llama_ngram ng // Try to draft a token from primary cache (context/dynamic), validate with static cache: static llama_token try_draft( - llama_ngram_cache & nc_primary, const std::vector & ngrams_primary, llama_ngram_cache_part & part_static, + common_ngram_cache & nc_primary, const std::vector & ngrams_primary, common_ngram_cache_part & part_static, const int * min_sample_size, const int * min_percent) { llama_token drafted_token = -1; for (int i = ngrams_primary.size()-1; i >= 0 && drafted_token == -1; --i) { - const llama_ngram ngram_primary = ngrams_primary[i]; + const common_ngram ngram_primary = ngrams_primary[i]; - llama_ngram_cache::iterator part_primary_it = nc_primary.find(ngram_primary); + common_ngram_cache::iterator part_primary_it = nc_primary.find(ngram_primary); if (part_primary_it == nc_primary.end()) { continue; } - const llama_ngram_cache_part part_primary = part_primary_it->second; + const common_ngram_cache_part part_primary = part_primary_it->second; int max_count_primary = 0; int max_count_static = 0; @@ -117,7 +117,7 @@ static llama_token try_draft( for (std::pair token_count_primary : part_primary) { const llama_token token = token_count_primary.first; - llama_ngram_cache_part::iterator token_count_static_it = part_static.find(token); + common_ngram_cache_part::iterator token_count_static_it = part_static.find(token); const int32_t count_primary = token_count_primary.second; const int32_t count_static = token_count_static_it != part_static.end() ? 100*token_count_static_it->second : 1; @@ -142,9 +142,9 @@ static llama_token try_draft( return drafted_token; } -void llama_ngram_cache_draft( +void common_ngram_cache_draft( std::vector & inp, std::vector & draft, int n_draft, int ngram_min, int ngram_max, - llama_ngram_cache & nc_context, llama_ngram_cache & nc_dynamic, llama_ngram_cache & nc_static + common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static ) { GGML_ASSERT(draft.size() == 1); const int inp_size = inp.size(); @@ -157,21 +157,21 @@ void llama_ngram_cache_draft( llama_token drafted_token = -1; const int ngram_start_static = inp_size-LLAMA_NGRAM_STATIC + draft.size()-1; - llama_ngram ngram_static; + common_ngram ngram_static; for (int j = ngram_start_static; j < ngram_start_static + LLAMA_NGRAM_STATIC; ++j) { ngram_static.tokens[j-ngram_start_static] = get_token(inp, draft, j); } - llama_ngram_cache::iterator part_static_it = nc_static.find(ngram_static); - llama_ngram_cache_part part_static; + common_ngram_cache::iterator part_static_it = nc_static.find(ngram_static); + common_ngram_cache_part part_static; if (part_static_it != nc_static.end()) { part_static = part_static_it->second; } // cd = context + dynamic - std::vector ngrams_cd; + std::vector ngrams_cd; for (int ngram_size_cd = ngram_min; ngram_size_cd <= ngram_max; ++ngram_size_cd) { const int ngram_start_cd = inp_size-ngram_size_cd + draft.size()-1; - llama_ngram ngram_cd; + common_ngram ngram_cd; for (int j = ngram_start_cd; j < ngram_start_cd + ngram_size_cd; ++j) { ngram_cd.tokens[j-ngram_start_cd] = get_token(inp, draft, j); } @@ -196,16 +196,16 @@ void llama_ngram_cache_draft( } } -void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filename) { +void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename) { std::ofstream file_out(filename, std::ios::binary); - for (std::pair item : ngram_cache) { - const llama_ngram ngram = item.first; - llama_ngram_cache_part token_counts = item.second; + for (std::pair item : ngram_cache) { + const common_ngram ngram = item.first; + common_ngram_cache_part token_counts = item.second; GGML_ASSERT(!token_counts.empty()); const int32_t ntokens = token_counts.size(); GGML_ASSERT(ntokens > 0); - file_out.write(reinterpret_cast(&ngram), sizeof(llama_ngram)); + file_out.write(reinterpret_cast(&ngram), sizeof(common_ngram)); file_out.write(reinterpret_cast(&ntokens), sizeof(int32_t)); for (std::pair item2 : token_counts) { const llama_token token = item2.first; @@ -219,14 +219,14 @@ void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filen } -llama_ngram_cache llama_ngram_cache_load(std::string & filename) { +common_ngram_cache common_ngram_cache_load(std::string & filename) { std::ifstream hashmap_file(filename, std::ios::binary); if (!hashmap_file) { throw std::ifstream::failure("Unable to open file " + filename); } - llama_ngram_cache ngram_cache; + common_ngram_cache ngram_cache; - llama_ngram ngram; + common_ngram ngram; int32_t ntokens; llama_token token; int32_t count; @@ -235,11 +235,11 @@ llama_ngram_cache llama_ngram_cache_load(std::string & filename) { char * ntokensc = reinterpret_cast(&ntokens); char * tokenc = reinterpret_cast(&token); char * countc = reinterpret_cast(&count); - while(hashmap_file.read(ngramc, sizeof(llama_ngram))) { + while(hashmap_file.read(ngramc, sizeof(common_ngram))) { GGML_ASSERT(!hashmap_file.eof()); GGML_ASSERT(hashmap_file.read(ntokensc, sizeof(int32_t))); GGML_ASSERT(ntokens > 0); - llama_ngram_cache_part token_counts; + common_ngram_cache_part token_counts; for (int i = 0; i < ntokens; ++i) { GGML_ASSERT(!hashmap_file.eof()); @@ -257,12 +257,12 @@ llama_ngram_cache llama_ngram_cache_load(std::string & filename) { return ngram_cache; } -void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram_cache & ngram_cache_add) { - for (std::pair ngram_part : ngram_cache_add) { - const llama_ngram ngram = ngram_part.first; - llama_ngram_cache_part part = ngram_part.second; +void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add) { + for (std::pair ngram_part : ngram_cache_add) { + const common_ngram ngram = ngram_part.first; + common_ngram_cache_part part = ngram_part.second; - llama_ngram_cache::iterator part_merged_it = ngram_cache_target.find(ngram); + common_ngram_cache::iterator part_merged_it = ngram_cache_target.find(ngram); if (part_merged_it == ngram_cache_target.end()) { ngram_cache_target.emplace(ngram, part); continue; @@ -273,7 +273,7 @@ void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram const int32_t count = token_count.second; GGML_ASSERT(count > 0); - llama_ngram_cache_part::iterator token_count_merged_it = part_merged_it->second.find(token); + common_ngram_cache_part::iterator token_count_merged_it = part_merged_it->second.find(token); if (token_count_merged_it == part_merged_it->second.end()) { part_merged_it->second.emplace(token, count); continue; diff --git a/common/ngram-cache.h b/common/ngram-cache.h index ab4c9b376..09c2b0319 100644 --- a/common/ngram-cache.h +++ b/common/ngram-cache.h @@ -12,22 +12,22 @@ // Data structures to map n-grams to empirical token probabilities: -struct llama_ngram { +struct common_ngram { llama_token tokens[LLAMA_NGRAM_MAX]; - llama_ngram() { + common_ngram() { for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) { tokens[i] = -1; } } - llama_ngram(const llama_token * input, const int ngram_size) { + common_ngram(const llama_token * input, const int ngram_size) { for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) { tokens[i] = i < ngram_size ? input[i] : -1; } } - bool operator==(const llama_ngram & other) const { + bool operator==(const common_ngram & other) const { for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) { if (tokens[i] != other.tokens[i]) { return false; @@ -37,28 +37,28 @@ struct llama_ngram { } }; -struct llama_token_hash_function { +struct common_token_hash_function { size_t operator()(const llama_token token) const { // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ return token * 11400714819323198485llu; } }; -struct llama_ngram_hash_function { - size_t operator()(const llama_ngram & ngram) const { - size_t hash = llama_token_hash_function{}(ngram.tokens[0]); +struct common_ngram_hash_function { + size_t operator()(const common_ngram & ngram) const { + size_t hash = common_token_hash_function{}(ngram.tokens[0]); for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) { - hash ^= llama_token_hash_function{}(ngram.tokens[i]); + hash ^= common_token_hash_function{}(ngram.tokens[i]); } return hash; } }; // token -> number of times token has been seen -typedef std::unordered_map llama_ngram_cache_part; +typedef std::unordered_map common_ngram_cache_part; // n-gram -> empirical distribution of following tokens -typedef std::unordered_map llama_ngram_cache; +typedef std::unordered_map common_ngram_cache; // Update an ngram cache with tokens. @@ -70,8 +70,8 @@ typedef std::unordered_map & inp_data, int nnew, bool print_progress); +void common_ngram_cache_update( + common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector & inp_data, int nnew, bool print_progress); // Try to draft tokens from ngram caches. // inp: the tokens generated so far. @@ -81,21 +81,21 @@ void llama_ngram_cache_update( // nc_context: ngram cache based on current context. // nc_dynamic: ngram cache based on previous user generations. // nc_static: ngram cache generated from a large text corpus, used for validation. -void llama_ngram_cache_draft( +void common_ngram_cache_draft( std::vector & inp, std::vector & draft, int n_draft, int ngram_min, int ngram_max, - llama_ngram_cache & nc_context, llama_ngram_cache & nc_dynamic, llama_ngram_cache & nc_static); + common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static); // Save an ngram cache to a file. // ngram_cache: the ngram cache to save. // filename: the path under which to save the ngram cache. -void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filename); +void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename); -// Load an ngram cache saved with llama_ngram_cache_save. +// Load an ngram cache saved with common_ngram_cache_save. // filename: the path from which to load the ngram cache. // returns: an ngram cache containing the information saved to filename. -llama_ngram_cache llama_ngram_cache_load(std::string & filename); +common_ngram_cache common_ngram_cache_load(std::string & filename); // Merge two ngram caches. // ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add. // ngram_cache_add: the ngram cache to add to ngram_cache_target. -void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram_cache & ngram_cache_add); +void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add); diff --git a/examples/lookup/lookup-create.cpp b/examples/lookup/lookup-create.cpp index f76b7581f..28cedec61 100644 --- a/examples/lookup/lookup-create.cpp +++ b/examples/lookup/lookup-create.cpp @@ -35,11 +35,11 @@ int main(int argc, char ** argv){ fprintf(stderr, "%s: tokenization done\n", __func__); - llama_ngram_cache ngram_cache; - llama_ngram_cache_update(ngram_cache, LLAMA_NGRAM_STATIC, LLAMA_NGRAM_STATIC, inp, inp.size(), true); + common_ngram_cache ngram_cache; + common_ngram_cache_update(ngram_cache, LLAMA_NGRAM_STATIC, LLAMA_NGRAM_STATIC, inp, inp.size(), true); fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.lookup_cache_static.c_str()); - llama_ngram_cache_save(ngram_cache, params.lookup_cache_static); + common_ngram_cache_save(ngram_cache, params.lookup_cache_static); return 0; } diff --git a/examples/lookup/lookup-merge.cpp b/examples/lookup/lookup-merge.cpp index 81e2b0436..6871c0f5f 100644 --- a/examples/lookup/lookup-merge.cpp +++ b/examples/lookup/lookup-merge.cpp @@ -33,15 +33,15 @@ int main(int argc, char ** argv){ } fprintf(stderr, "lookup-merge: loading file %s\n", args[0].c_str()); - llama_ngram_cache ngram_cache_merged = llama_ngram_cache_load(args[0]); + common_ngram_cache ngram_cache_merged = common_ngram_cache_load(args[0]); for (size_t i = 1; i < args.size()-1; ++i) { fprintf(stderr, "lookup-merge: loading file %s\n", args[i].c_str()); - llama_ngram_cache ngram_cache = llama_ngram_cache_load(args[i]); + common_ngram_cache ngram_cache = common_ngram_cache_load(args[i]); - llama_ngram_cache_merge(ngram_cache_merged, ngram_cache); + common_ngram_cache_merge(ngram_cache_merged, ngram_cache); } fprintf(stderr, "lookup-merge: saving file %s\n", args.back().c_str()); - llama_ngram_cache_save(ngram_cache_merged, args.back()); + common_ngram_cache_save(ngram_cache_merged, args.back()); } diff --git a/examples/lookup/lookup-stats.cpp b/examples/lookup/lookup-stats.cpp index 286a85c46..d73290d66 100644 --- a/examples/lookup/lookup-stats.cpp +++ b/examples/lookup/lookup-stats.cpp @@ -37,9 +37,9 @@ int main(int argc, char ** argv){ std::vector inp; inp = common_tokenize(ctx, params.prompt, true, true); - llama_ngram_cache ngram_cache_context; - llama_ngram_cache ngram_cache_dynamic; - llama_ngram_cache ngram_cache_static; + common_ngram_cache ngram_cache_context; + common_ngram_cache ngram_cache_dynamic; + common_ngram_cache ngram_cache_static; int64_t t_draft_flat_us = 0; int64_t t_draft_us = 0; @@ -48,7 +48,7 @@ int main(int argc, char ** argv){ if (!params.lookup_cache_static.empty()) { try { - ngram_cache_static = llama_ngram_cache_load(params.lookup_cache_static); + ngram_cache_static = common_ngram_cache_load(params.lookup_cache_static); } catch (std::ifstream::failure const &) { LOG_ERR("failed to open static lookup cache: %s", params.lookup_cache_static.c_str()); exit(1); @@ -57,7 +57,7 @@ int main(int argc, char ** argv){ if (!params.lookup_cache_dynamic.empty()) { try { - ngram_cache_dynamic = llama_ngram_cache_load(params.lookup_cache_dynamic); + ngram_cache_dynamic = common_ngram_cache_load(params.lookup_cache_dynamic); } catch (std::ifstream::failure const &) {} // if the file does not exist it will simply be created at the end of the program } @@ -86,7 +86,7 @@ int main(int argc, char ** argv){ { const int64_t t_start_draft_us = ggml_time_us(); - llama_ngram_cache_draft(pseudo_output, draft, n_draft, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, ngram_cache_context, ngram_cache_dynamic, ngram_cache_static); + common_ngram_cache_draft(pseudo_output, draft, n_draft, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, ngram_cache_context, ngram_cache_dynamic, ngram_cache_static); t_draft_us += ggml_time_us() - t_start_draft_us; } @@ -105,7 +105,7 @@ int main(int argc, char ** argv){ { const int64_t t_start_draft_us = ggml_time_us(); - llama_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, pseudo_output, 1, false); + common_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, pseudo_output, 1, false); t_draft_us += ggml_time_us() - t_start_draft_us; } } @@ -115,7 +115,7 @@ int main(int argc, char ** argv){ pseudo_output.push_back(inp_slice[pseudo_output.size()]); { const int64_t t_start_draft_us = ggml_time_us(); - llama_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, pseudo_output, 1, false); + common_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, pseudo_output, 1, false); t_draft_us += ggml_time_us() - t_start_draft_us; } } @@ -133,7 +133,7 @@ int main(int argc, char ** argv){ } // After each chunk, update the dynamic ngram cache with the context ngram cache: - llama_ngram_cache_merge(ngram_cache_dynamic, ngram_cache_context); + common_ngram_cache_merge(ngram_cache_dynamic, ngram_cache_context); ngram_cache_context.clear(); } diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp index d77d4754f..32549904e 100644 --- a/examples/lookup/lookup.cpp +++ b/examples/lookup/lookup.cpp @@ -40,20 +40,20 @@ int main(int argc, char ** argv){ std::vector inp; inp = common_tokenize(ctx, params.prompt, true, true); - llama_ngram_cache ngram_cache_context; - llama_ngram_cache ngram_cache_dynamic; - llama_ngram_cache ngram_cache_static; + common_ngram_cache ngram_cache_context; + common_ngram_cache ngram_cache_dynamic; + common_ngram_cache ngram_cache_static; int64_t t_draft_flat_us = 0; int64_t t_draft_us = 0; { // Fill up context ngram cache with tokens from user input: const int64_t t_start_draft_us = ggml_time_us(); - llama_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, inp, inp.size(), false); + common_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, inp, inp.size(), false); if (!params.lookup_cache_static.empty()) { try { - ngram_cache_static = llama_ngram_cache_load(params.lookup_cache_static); + ngram_cache_static = common_ngram_cache_load(params.lookup_cache_static); } catch (std::ifstream::failure const &) { LOG_ERR("failed to open static lookup cache: %s", params.lookup_cache_static.c_str()); exit(1); @@ -62,7 +62,7 @@ int main(int argc, char ** argv){ if (!params.lookup_cache_dynamic.empty()) { try { - ngram_cache_dynamic = llama_ngram_cache_load(params.lookup_cache_dynamic); + ngram_cache_dynamic = common_ngram_cache_load(params.lookup_cache_dynamic); } catch (std::ifstream::failure const &) {} // if the file does not exist it will simply be created at the end of the program } @@ -152,7 +152,7 @@ int main(int argc, char ** argv){ { // Update context ngram cache with the newly accepted token: const int64_t t_start_draft_us = ggml_time_us(); - llama_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, inp, 1, false); + common_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, inp, 1, false); t_draft_us += ggml_time_us() - t_start_draft_us; } @@ -178,7 +178,7 @@ int main(int argc, char ** argv){ { // Update context ngram cache with the newly accepted token: const int64_t t_start_draft_us = ggml_time_us(); - llama_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, inp, 1, false); + common_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, inp, 1, false); t_draft_us += ggml_time_us() - t_start_draft_us; } break; @@ -200,7 +200,7 @@ int main(int argc, char ** argv){ GGML_ASSERT(draft[0] == inp.back()); const int64_t t_start_draft_us = ggml_time_us(); - llama_ngram_cache_draft(inp, draft, n_draft, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, ngram_cache_context, ngram_cache_dynamic, ngram_cache_static); + common_ngram_cache_draft(inp, draft, n_draft, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, ngram_cache_context, ngram_cache_dynamic, ngram_cache_static); for (size_t i = 1; i < draft.size(); ++i) { common_batch_add(batch_tgt, draft[i], n_past + i, { 0 }, true); @@ -218,8 +218,8 @@ int main(int argc, char ** argv){ auto t_dec_end = ggml_time_us(); // Update dynamic ngram cache with context ngram cache and save it to disk: - llama_ngram_cache_merge(ngram_cache_dynamic, ngram_cache_context); - llama_ngram_cache_save(ngram_cache_dynamic, params.lookup_cache_dynamic); + common_ngram_cache_merge(ngram_cache_dynamic, ngram_cache_context); + common_ngram_cache_save(ngram_cache_dynamic, params.lookup_cache_dynamic); LOG("\n\n");