Change argument processing to allow prompt or file args. (#103)

2023-03-21 16:55:56 -04:00 · 2023-03-21 16:55:56 -04:00 · 9116ae9b53
commit 9116ae9b53
parent 428aa7025a
3 changed files with 38 additions and 26 deletions
--- a/chat.cpp
+++ b/chat.cpp
@ -318,7 +318,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
    fin.close();

    std::vector<uint8_t> tmp;
-
+    
    for (int i = 0; i < n_parts; ++i) {
        const int part_id = i;
        //const int part_id = n_parts - i - 1;
@ -797,14 +797,6 @@ int main(int argc, char ** argv) {

    gpt_params params;

-    params.temp = 0.1f;
-    params.top_p = 0.95f;
-    params.n_ctx = 2048;
-    params.interactive = true;
-    params.interactive_start = true;
-    params.use_color = true;
-    params.model = "ggml-alpaca-7b-q4.bin";
-
    if (gpt_params_parse(argc, argv, params) == false) {
        return 1;
    }
@ -856,13 +848,26 @@ int main(int argc, char ** argv) {
    // Add a space in front of the first character to match OG llama tokenizer behavior
    // params.prompt.insert(0, 1, ' ');
    // tokenize the prompt
-    std::vector<gpt_vocab::id> embd_inp;// = ::llama_tokenize(vocab, params.prompt, true);
+    std::vector<gpt_vocab::id> embd_inp;

    // params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());

    // // tokenize the reverse prompt
    // std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false);

+
+    std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
+    std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
+    std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);
+    embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());
+
+    if(!params.prompt.empty()) {
+        std::vector<gpt_vocab::id> param_inp = ::llama_tokenize(vocab, params.prompt, true);
+        embd_inp.insert(embd_inp.end(), prompt_inp.begin(), prompt_inp.end());
+        embd_inp.insert(embd_inp.end(), param_inp.begin(), param_inp.end());
+        embd_inp.insert(embd_inp.end(), response_inp.begin(), response_inp.end());
+    }
+
    // fprintf(stderr, "\n");
    // fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
    // fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
@ -871,13 +876,6 @@ int main(int argc, char ** argv) {
    // }
    // fprintf(stderr, "\n");

-    std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
-    std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
-    std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);
-
-    embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());
-
-
    if (params.interactive) {
 #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
        struct sigaction sigint_action;
@ -1076,9 +1074,14 @@ int main(int argc, char ** argv) {

        // end of text token
        if (embd.back() == 2) {
-            // fprintf(stderr, " [end of text]\n");
-            is_interacting = true;
-            continue;
+            if (params.interactive) {
+                is_interacting = true;
+                continue;
+            } else {
+                printf("\n");
+                fprintf(stderr, " [end of text]\n");
+                break;
+            }
        }
    }

--- a/utils.cpp
+++ b/utils.cpp
@ -24,9 +24,17 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
        } else if (arg == "-t" || arg == "--threads") {
            params.n_threads = std::stoi(argv[++i]);
        } else if (arg == "-p" || arg == "--prompt") {
+            params.interactive = false;
+            params.interactive_start = false;
+            params.use_color = false;
+
            params.prompt = argv[++i];
        } else if (arg == "-f" || arg == "--file") {

+            params.interactive = false;
+            params.interactive_start = false;
+            params.use_color = false;
+
            std::ifstream file(argv[++i]);

            std::copy(std::istreambuf_iterator<char>(file),
--- a/utils.h
+++ b/utils.h
@ -12,28 +12,29 @@
 // CLI argument parsing
 //

+// The default parameters
 struct gpt_params {
    int32_t seed      = -1; // RNG seed
    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
    int32_t n_predict = 128; // new tokens to predict
    int32_t repeat_last_n = 64;  // last n tokens to penalize
-    int32_t n_ctx = 512; //context size
+    int32_t n_ctx = 2048; //context size
    
    // sampling parameters
    int32_t top_k = 40;
    float   top_p = 0.95f;
-    float   temp  = 0.80f;
+    float   temp  = 0.10f;
    float   repeat_penalty  = 1.30f;

    int32_t n_batch = 8; // batch size for prompt processing

-    std::string model = "models/lamma-7B/ggml-model.bin"; // model path
+    std::string model = "ggml-alpaca-7b-q4.bin"; // model path
    std::string prompt;

-    bool use_color = false; // use color to distinguish generations and inputs
+    bool use_color = true; // use color to distinguish generations and inputs

-    bool interactive = false; // interactive mode
-    bool interactive_start = false; // reverse prompt immediately
+    bool interactive = true; // interactive mode
+    bool interactive_start = true; // reverse prompt immediately
    std::string antiprompt = ""; // string upon seeing which more user input is prompted
 };