debugging
This commit is contained in:
parent
b598cf84fa
commit
c683f2c76a
3 changed files with 24 additions and 11 deletions
5
Makefile
5
Makefile
|
@ -582,7 +582,7 @@ clean:
|
||||||
# Examples
|
# Examples
|
||||||
#
|
#
|
||||||
|
|
||||||
main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
@echo
|
@echo
|
||||||
@echo '==== Run ./main -h for help. ===='
|
@echo '==== Run ./main -h for help. ===='
|
||||||
|
@ -678,6 +678,9 @@ common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
|
||||||
build-info.o: common/build-info.cpp
|
build-info.o: common/build-info.cpp
|
||||||
$(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
|
$(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
|
||||||
|
|
||||||
|
#print.o: print.cpp # print.hpp
|
||||||
|
# $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
|
||||||
|
|
||||||
#
|
#
|
||||||
# Tests
|
# Tests
|
||||||
#
|
#
|
||||||
|
|
|
@ -31,6 +31,8 @@
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "print.hpp"
|
||||||
|
|
||||||
static llama_context ** g_ctx;
|
static llama_context ** g_ctx;
|
||||||
static llama_model ** g_model;
|
static llama_model ** g_model;
|
||||||
static gpt_params * g_params;
|
static gpt_params * g_params;
|
||||||
|
@ -105,7 +107,9 @@ int main(int argc, char ** argv) {
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
g_params = ¶ms;
|
g_params = ¶ms;
|
||||||
|
|
||||||
using Td = type_descriptor<gpt_params>;
|
//using Td = type_descriptor<gpt_params>;
|
||||||
|
print_fields(g_params);
|
||||||
|
|
||||||
//constexpr auto tbl = descriptor::get_attribute<gpt_params>(Td{});
|
//constexpr auto tbl = descriptor::get_attribute<gpt_params>(Td{});
|
||||||
//constexpr auto tbl_name = REFL_MAKE_CONST_STRING(tbl.name);
|
//constexpr auto tbl_name = REFL_MAKE_CONST_STRING(tbl.name);
|
||||||
|
|
||||||
|
@ -180,6 +184,9 @@ int main(int argc, char ** argv) {
|
||||||
g_model = &model;
|
g_model = &model;
|
||||||
g_ctx = &ctx;
|
g_ctx = &ctx;
|
||||||
|
|
||||||
|
print_fields(g_model);
|
||||||
|
print_fields(g_ctx);
|
||||||
|
|
||||||
// load the model and apply lora adapter, if any
|
// load the model and apply lora adapter, if any
|
||||||
LOG("%s: load the model and apply lora adapter, if any\n", __func__);
|
LOG("%s: load the model and apply lora adapter, if any\n", __func__);
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
|
@ -239,6 +246,8 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
std::vector<llama_token> embd_inp;
|
std::vector<llama_token> embd_inp;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) {
|
if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) {
|
||||||
LOG("tokenize the prompt\n");
|
LOG("tokenize the prompt\n");
|
||||||
if (params.chatml) {
|
if (params.chatml) {
|
||||||
|
@ -258,7 +267,7 @@ int main(int argc, char ** argv) {
|
||||||
embd_inp.push_back(llama_token_bos(model));
|
embd_inp.push_back(llama_token_bos(model));
|
||||||
LOG("embd_inp was considered empty and bos was added: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str());
|
LOG("embd_inp was considered empty and bos was added: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str());
|
||||||
}
|
}
|
||||||
|
//print_fields(embd_inp);
|
||||||
// Tokenize negative prompt
|
// Tokenize negative prompt
|
||||||
std::vector<llama_token> guidance_inp;
|
std::vector<llama_token> guidance_inp;
|
||||||
int guidance_offset = 0;
|
int guidance_offset = 0;
|
||||||
|
@ -283,6 +292,7 @@ int main(int argc, char ** argv) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//print_fields(session_tokens);
|
||||||
// debug message about similarity of saved session, if applicable
|
// debug message about similarity of saved session, if applicable
|
||||||
size_t n_matching_session_tokens = 0;
|
size_t n_matching_session_tokens = 0;
|
||||||
if (!session_tokens.empty()) {
|
if (!session_tokens.empty()) {
|
||||||
|
@ -478,6 +488,7 @@ int main(int argc, char ** argv) {
|
||||||
std::vector<llama_token> embd_guidance;
|
std::vector<llama_token> embd_guidance;
|
||||||
|
|
||||||
struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
|
struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
|
||||||
|
print_fields(ctx_sampling);
|
||||||
|
|
||||||
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
||||||
// predict
|
// predict
|
||||||
|
@ -487,6 +498,7 @@ int main(int argc, char ** argv) {
|
||||||
int max_embd_size = n_ctx - 4;
|
int max_embd_size = n_ctx - 4;
|
||||||
|
|
||||||
// Ensure the input doesn't exceed the context size by truncating embd if necessary.
|
// Ensure the input doesn't exceed the context size by truncating embd if necessary.
|
||||||
|
//print_fields(embd);
|
||||||
if ((int) embd.size() > max_embd_size) {
|
if ((int) embd.size() > max_embd_size) {
|
||||||
const int skipped_tokens = (int) embd.size() - max_embd_size;
|
const int skipped_tokens = (int) embd.size() - max_embd_size;
|
||||||
embd.resize(max_embd_size);
|
embd.resize(max_embd_size);
|
||||||
|
@ -513,6 +525,7 @@ int main(int argc, char ** argv) {
|
||||||
LOG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n",
|
LOG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n",
|
||||||
n_past, n_left, n_ctx, params.n_keep, n_discard);
|
n_past, n_left, n_ctx, params.n_keep, n_discard);
|
||||||
|
|
||||||
|
print_fields(ctx);
|
||||||
llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1);
|
llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1);
|
||||||
llama_kv_cache_seq_shift(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
|
llama_kv_cache_seq_shift(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
|
||||||
|
|
||||||
|
|
11
llama.h
11
llama.h
|
@ -115,13 +115,8 @@ extern "C" {
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct llama_token_data : refl::attr::usage::type{
|
typedef struct llama_token_data : refl::attr::usage::type{
|
||||||
llama_token_data( llama_token id,
|
llama_token_data( llama_token id, float logit, float p):
|
||||||
float logit,
|
id( id),logit(logit),p(p){ }
|
||||||
float p):
|
|
||||||
id( id),
|
|
||||||
logit(logit),
|
|
||||||
p(p){
|
|
||||||
}
|
|
||||||
llama_token id; // token id
|
llama_token id; // token id
|
||||||
float logit; // log-odds of the token
|
float logit; // log-odds of the token
|
||||||
float p; // probability of the token
|
float p; // probability of the token
|
||||||
|
@ -833,4 +828,6 @@ const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal
|
||||||
|
|
||||||
#endif // LLAMA_API_INTERNAL
|
#endif // LLAMA_API_INTERNAL
|
||||||
|
|
||||||
|
template<typename T> void print_fields(const T& obj);
|
||||||
|
|
||||||
#endif // LLAMA_H
|
#endif // LLAMA_H
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue