compiling and running

2023-11-22 16:46:32 -05:00 · 2023-11-22 16:46:32 -05:00 · b598cf84fa
commit b598cf84fa
parent 09a1f053e7
12 changed files with 158 additions and 103 deletions
--- a/4
+++ b/4
@ -734,5 +734,5 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM
 tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

-tests/test-c.o: tests/test-c.c llama.h
-	$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
+tests/test-c.o: tests/test-c.cpp llama.h
+	$(CXX) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
--- a/common/common.h
+++ b/common/common.h
@ -42,7 +42,7 @@ extern char const *LLAMA_BUILD_TARGET;
 //
 int32_t get_num_physical_cores();

-struct gpt_params {
+struct gpt_params : refl::attr::usage::type{
    uint32_t seed                           = -1;    // RNG seed

    int32_t n_threads                       = get_num_physical_cores();
--- a/common/grammar-parser.cpp
+++ b/common/grammar-parser.cpp
@ -219,7 +219,7 @@ namespace grammar_parser {

                // in original rule, replace previous symbol with reference to generated rule
                out_elements.resize(last_sym_start);
-		llama_grammar_element(LLAMA_GRETYPE_RULE_REF, sub_rule_id) a;
+		llama_grammar_element a(LLAMA_GRETYPE_RULE_REF, sub_rule_id);
                out_elements.push_back(a);

                pos = parse_space(pos + 1, is_nested);
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@ -121,18 +121,18 @@ int main(int argc, char ** argv) {
        for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
            const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));

-            llama_batch batch_view = {
-	      .n_tokens=n_tokens,
-	      .token=batch.token    + i,
-	      .embd=nullptr,
-	      .pos=batch.pos      + i,
-              .n_seq_id=batch.n_seq_id + i,
-	      .seq_id=batch.seq_id   + i,
-	      .logits=batch.logits   + i,
-	      .all_pos_0=0,
-	      .all_pos_1=0,
-	      .all_seq_id=0, // unused
-            };
+            llama_batch batch_view(
+	      /* .n_tokens= */ n_tokens,
+	      /* .token=    */  batch.token    + i,
+	      /* .embd=     */ nullptr,
+	      /* .pos= */      batch.pos      + i,
+              /* .n_seq_id= */ batch.n_seq_id + i,
+	      /* .seq_id= */ batch.seq_id   + i,
+	      /* .logits= */ batch.logits   + i,
+	      /* .all_pos_0= */0,
+	      /* .all_pos_1= */0,
+	      /* .all_seq_id= */0 // unused
+				   );

            const int ret = llama_decode(ctx, batch_view);
            if (ret != 0) {
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@ -169,13 +169,13 @@ int main(int argc, char ** argv) {
            candidates.reserve(n_vocab);

            for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
-                candidates.emplace_back(llama_token_data{
-		    .id=token_id,
-		    .logit=logits[token_id],
-		    .p=0.0f });
+	      candidates.emplace_back(llama_token_data(
+						       token_id,
+						       logits[token_id],
+						       0.0f ));
            }

-            llama_token_data_array candidates_p = { .data=candidates.data(), .size=candidates.size(), .sorted=false };
+            llama_token_data_array candidates_p (candidates.data(), candidates.size(), false );

            const int   top_k = 40;
            const float top_p = 0.9f;
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@ -75,18 +75,18 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_
        if (n_eval > n_batch) {
            n_eval = n_batch;
        }
-        llama_batch batch = {
-	  .n_tokens=int32_t(n_eval),
-	  .token=nullptr,
-	  .embd=(image_embed->embed+i*n_embd),
-	  .pos=nullptr,
-	  .n_seq_id=nullptr,
-	  .seq_id=nullptr,
-	  .logits=nullptr,
-	  .all_pos_0=*n_past,
-	  .all_pos_1=1,
-	  .all_seq_id=0
-	};
+        llama_batch batch(
+			  /* .n_tokens= */int32_t(n_eval),
+	  /* .token= */nullptr,
+	  /* .embd= */(image_embed->embed+i*n_embd),
+	  /* .pos= */nullptr,
+	  /* .n_seq_id= */nullptr,
+	  /* .seq_id= */nullptr,
+	  /* .logits= */nullptr,
+	  /* .all_pos_0= */*n_past,
+	  /* .all_pos_1= */1,
+	  /* .all_seq_id= */0
+			  );
        if (llama_decode(ctx_llama, batch)) {
            fprintf(stderr, "%s : failed to eval\n", __func__);
            return false;
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -99,11 +99,16 @@ static void sigint_handler(int signo) {
    }
 }
 #endif
+using namespace refl;

 int main(int argc, char ** argv) {
    gpt_params params;
    g_params = &params;

+    using Td = type_descriptor<gpt_params>;
+    //constexpr auto tbl = descriptor::get_attribute<gpt_params>(Td{}); 
+    //constexpr auto tbl_name = REFL_MAKE_CONST_STRING(tbl.name);
+	
    if (!gpt_params_parse(argc, argv, params)) {
        return 1;
    }
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@ -67,10 +67,10 @@ int main(int argc, char ** argv) {
        std::vector<llama_token_data> candidates;
        candidates.reserve(n_vocab);
        for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
-            candidates.emplace_back(llama_token_data{
-		.id=token_id,
-		.logit=logits[token_id],
-		.p=0.0f});
+	  candidates.emplace_back(llama_token_data(
+						   token_id,
+						   logits[token_id],
+						   0.0f));
        }
        llama_token_data_array candidates_p(candidates.data(), candidates.size(), false );
        auto next_token = llama_sample_token(ctx, &candidates_p);
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -31,8 +31,16 @@

 using json = nlohmann::json;

-struct server_params
+struct server_params : refl::attr::usage::type
 {
+  
+  server_params():
+    hostname( "127.0.0.1"),
+    public_path(public_path),
+    port(port),
+    read_timeout(read_timeout),
+    write_timeout( 600) {};
+  
    std::string hostname = "127.0.0.1";
    std::string public_path = "examples/server/public";
    int32_t port = 8080;
@ -522,6 +530,28 @@ struct llama_server_context
    std::vector<task_result> queue_results;
    std::mutex mutex_tasks;
    std::mutex mutex_results;
+  llama_server_context():
+    model(nullptr),
+    ctx(nullptr),
+    clp_ctx(nullptr),
+    params(params),
+    batch(batch),
+    multimodal(false),
+    clean_kv_cache( true),
+    all_slots_are_idle( false),
+    add_bos_token(  true),
+    //int32_t id_gen;
+    //int32_t n_ctx;  // total context for all clients / slots
+    system_need_update(false){}
+    //std::string              system_prompt;
+    //std::vector<llama_token> system_tokens;
+    //std::string name_user;      // this should be the antiprompt
+    //std::string name_assistant;
+    //std::vector<llama_client_slot> slots;
+    //std::vector<task_server> queue_tasks;
+    //std::vector<task_result> queue_results;
+    //std::mutex mutex_tasks;
+    //std::mutex mutex_results;

    ~llama_server_context()
    {
@ -1303,7 +1333,7 @@ struct llama_server_context
            for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
            {
                const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
-                llama_batch batch_view = {
+                llama_batch batch_view(
                    n_tokens,
                    batch.token    + i,
                    nullptr,
@ -1311,8 +1341,8 @@ struct llama_server_context
                    batch.n_seq_id + i,
                    batch.seq_id   + i,
                    batch.logits   + i,
-                    0, 0, 0, // unused
-                };
+                    0, 0, 0 // unused
+		    );
                if (llama_decode(ctx, batch_view))
                {
                    LOG_TEE("%s : failed to eval\n", __func__);
@ -1665,19 +1695,18 @@ struct llama_server_context
        for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
        {
            const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
-            llama_batch batch_view =
-            {
-                .n_tokens=n_tokens,
-                .token=batch.token    + i,
-                .embd=nullptr,
-                .pos=batch.pos      + i,
-                .n_seq_id=batch.n_seq_id + i,
-                .seq_id=batch.seq_id   + i,
-                .logits=batch.logits   + i,
-                .all_pos_0=.0,
-		.all_pos_1=0,
-		.all_seq_id=0, // unused
-            };
+            llama_batch batch_view(
+				   /* .n_tokens= */n_tokens,
+                /* .token= */batch.token    + i,
+                /* .embd= */nullptr,
+                /* .pos= */batch.pos      + i,
+                /* .n_seq_id= */batch.n_seq_id + i,
+                /* .seq_id= */batch.seq_id   + i,
+                /* .logits= */batch.logits   + i,
+                /* .all_pos_0= */.0,
+		/* .all_pos_1= */0,
+		/* .all_seq_id= */0 // unused
+		);

            const int ret = llama_decode(ctx, batch_view);
            if (ret != 0)
@ -1724,10 +1753,10 @@ struct llama_server_context
                    slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3;
                }

-                llama_token_data_array cur_p = {
-		  .data=slot.ctx_sampling->cur.data(),
-		  .size=slot.ctx_sampling->cur.size(),
-		  .sorted=false };
+                llama_token_data_array cur_p(
+					     slot.ctx_sampling->cur.data(),
+					     slot.ctx_sampling->cur.size(),
+					     false );
                result.tok = id;

                const int32_t n_probs = slot.sparams.n_probs;
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@ -124,15 +124,15 @@ int main(int argc, char ** argv) {
            candidates.reserve(n_vocab);

            for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
-                candidates.emplace_back(llama_token_data{ .id=token_id,
-							  .logit=logits[token_id],
-							  .p=0.0f });
+	      candidates.emplace_back(llama_token_data( token_id,
+							logits[token_id],
+							0.0f ));
            }

-            llama_token_data_array candidates_p = {
-	      .data=candidates.data(),
-	      .size=candidates.size(),
-	      .sorted=false };
+            llama_token_data_array candidates_p(
+						candidates.data(),
+						candidates.size(),
+						false );

            // sample the most likely token
            const llama_token new_token_id = llama_sample_token_greedy(ctx, &candidates_p);
--- a/llama.cpp
+++ b/llama.cpp
@ -9321,18 +9321,18 @@ int llama_eval_embd(
                             int   n_past) {
    llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);

-    llama_batch batch = {
-      .n_tokens=n_tokens,
-      .token=nullptr,
-      .embd=embd,
-      .pos=nullptr,
-      .n_seq_id=nullptr,
-      .seq_id=nullptr,
-      .logits=nullptr,
-      .all_pos_0=n_past,
-      .all_pos_1=1,
-      .all_seq_id=0
-    };
+    llama_batch batch(
+		      n_tokens,
+		      nullptr,
+		      embd,
+		      nullptr,
+		      nullptr,
+		      nullptr,
+		      nullptr,
+		      n_past,
+		      1,
+		      0
+		      );

    const int ret = llama_decode_internal(*ctx, batch);
    if (ret < 0) {
@ -9352,34 +9352,32 @@ struct llama_batch llama_batch_get_one(
                 int32_t   n_tokens,
               llama_pos   pos_0,
            llama_seq_id   seq_id) {
-    llama_batch b ={
-        .n_tokens       = n_tokens,
-        .token          = tokens,
-        .embd           = nullptr,
-        .pos            = nullptr,
-        .n_seq_id       = nullptr,
-        .seq_id         = nullptr,
-        .logits         = nullptr,
-        .all_pos_0      = pos_0,
-        .all_pos_1      = 1,
-        .all_seq_id     = seq_id,
-    };
+  llama_batch b(
+		n_tokens,
+		tokens,
+		nullptr,
+		nullptr,
+		nullptr,
+		nullptr,
+		nullptr,
+		pos_0,
+		1,
+		seq_id);
    return b;
 }

 struct llama_batch llama_batch_init(int32_t n_tokens, int32_t embd, int32_t n_seq_max) {
-    llama_batch batch = {
-      .n_tokens = 0,
-      .embd=nullptr, 
-      .pos=nullptr,
-      .n_seq_id=nullptr,
-      .seq_id=nullptr,
-      .logits=nullptr,
-      .all_pos_0=0,
-      .all_pos_1=0,
-      .all_seq_id=0
-
-    };
+  llama_batch batch(
+		    /* .n_tokens = */ 0,
+		    /* .token */  (llama_token  *)nullptr,
+		    /* .embd= */  (float        *)nullptr,
+		    /* .pos= */  (llama_pos    *)nullptr,
+		    /* .n_seq_id= */ (int32_t      *)nullptr,
+		    /* .seq_id= */  (llama_seq_id **)nullptr,
+		    /* .logits= */ (int8_t       *)nullptr,
+		    /* .all_pos_0= */ 0,
+		    /* .all_pos_1= */ 0 ,
+		    /* .all_seq_id= */ 0);

    if (embd) {
        batch.embd = (float *) malloc(sizeof(float) * n_tokens * embd);
--- a/llama.h
+++ b/llama.h
@ -152,6 +152,29 @@ extern "C" {
    // - logits : if zero, the logits for the respective token will not be output
    //
    typedef struct llama_batch : refl::attr::usage::type{
+
+      llama_batch(int32_t n_tokens,
+		  llama_token  *  token,
+		  float        *  embd,
+		  llama_pos    *  pos,
+		  int32_t      *  n_seq_id,
+		  llama_seq_id ** seq_id,
+		  int8_t       *  logits,
+		  llama_pos    all_pos_0,
+		  llama_pos    all_pos_1,
+		  llama_seq_id all_seq_id
+		  ) :
+	n_tokens(n_tokens),
+	token(token),
+	embd(embd),
+	pos(pos),
+	n_seq_id(n_seq_id),
+	seq_id(seq_id),
+	logits(logits),      
+	all_pos_0(all_pos_0),
+	all_pos_1(all_pos_1),
+	all_seq_id(all_seq_id) {}
+      
        int32_t n_tokens;

        llama_token  *  token;
@ -254,7 +277,7 @@ extern "C" {
      llama_grammar_element(        enum llama_gretype type,
 				    uint32_t           value // Unicode code point or rule ID
 				    ):type(type), value(value){}
-      llama_grammar_element( ):type(0), value(0){}
+      llama_grammar_element( ):type(llama_gretype(0)), value(0){}
        enum llama_gretype type;
        uint32_t           value; // Unicode code point or rule ID
    } llama_grammar_element;