From c683f2c76aaacdc3f2cfb7ed72d97c4589c54677 Mon Sep 17 00:00:00 2001 From: mike dupont Date: Wed, 22 Nov 2023 17:43:16 -0500 Subject: [PATCH] debugging --- Makefile | 5 ++++- examples/main/main.cpp | 19 ++++++++++++++++--- llama.h | 11 ++++------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 2fed76e36..6b941d622 100644 --- a/Makefile +++ b/Makefile @@ -582,7 +582,7 @@ clean: # Examples # -main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS) +main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) @echo @echo '==== Run ./main -h for help. ====' @@ -678,6 +678,9 @@ common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh build-info.o: common/build-info.cpp $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@ +#print.o: print.cpp # print.hpp +# $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@ + # # Tests # diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 98d07bd5f..f2ada3439 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -31,6 +31,8 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif +#include "print.hpp" + static llama_context ** g_ctx; static llama_model ** g_model; static gpt_params * g_params; @@ -105,7 +107,9 @@ int main(int argc, char ** argv) { gpt_params params; g_params = ¶ms; - using Td = type_descriptor; + //using Td = type_descriptor; + print_fields(g_params); + //constexpr auto tbl = descriptor::get_attribute(Td{}); //constexpr auto tbl_name = REFL_MAKE_CONST_STRING(tbl.name); @@ -180,6 +184,9 @@ int main(int argc, char ** argv) { g_model = &model; g_ctx = &ctx; + print_fields(g_model); + print_fields(g_ctx); + // load the model and apply lora adapter, if any LOG("%s: load the model and apply lora adapter, if any\n", __func__); std::tie(model, ctx) = llama_init_from_gpt_params(params); @@ -239,6 +246,8 @@ int main(int argc, char ** argv) { std::vector embd_inp; + + if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) { LOG("tokenize the prompt\n"); if (params.chatml) { @@ -258,7 +267,7 @@ int main(int argc, char ** argv) { embd_inp.push_back(llama_token_bos(model)); LOG("embd_inp was considered empty and bos was added: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str()); } - + //print_fields(embd_inp); // Tokenize negative prompt std::vector guidance_inp; int guidance_offset = 0; @@ -283,6 +292,7 @@ int main(int argc, char ** argv) { return 1; } + //print_fields(session_tokens); // debug message about similarity of saved session, if applicable size_t n_matching_session_tokens = 0; if (!session_tokens.empty()) { @@ -478,7 +488,8 @@ int main(int argc, char ** argv) { std::vector embd_guidance; struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams); - + print_fields(ctx_sampling); + while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (!embd.empty()) { @@ -487,6 +498,7 @@ int main(int argc, char ** argv) { int max_embd_size = n_ctx - 4; // Ensure the input doesn't exceed the context size by truncating embd if necessary. + //print_fields(embd); if ((int) embd.size() > max_embd_size) { const int skipped_tokens = (int) embd.size() - max_embd_size; embd.resize(max_embd_size); @@ -513,6 +525,7 @@ int main(int argc, char ** argv) { LOG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n", n_past, n_left, n_ctx, params.n_keep, n_discard); + print_fields(ctx); llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1); llama_kv_cache_seq_shift(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard); diff --git a/llama.h b/llama.h index ba8656341..b016b48f8 100644 --- a/llama.h +++ b/llama.h @@ -115,13 +115,8 @@ extern "C" { }; typedef struct llama_token_data : refl::attr::usage::type{ - llama_token_data( llama_token id, - float logit, - float p): - id( id), - logit(logit), - p(p){ - } + llama_token_data( llama_token id, float logit, float p): + id( id),logit(logit),p(p){ } llama_token id; // token id float logit; // log-odds of the token float p; // probability of the token @@ -833,4 +828,6 @@ const std::vector> & llama_internal #endif // LLAMA_API_INTERNAL +template void print_fields(const T& obj); + #endif // LLAMA_H