Fix editorconfig

2023-04-27 17:14:18 +05:30 · 2023-04-27 17:14:18 +05:30 · 846ee2c850
commit 846ee2c850
parent d2af46e371
2 changed files with 8923 additions and 8923 deletions
--- a/examples/server/crow.h
+++ b/examples/server/crow.h
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -34,20 +34,20 @@ static bool is_interacting = false;
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
 void sigint_handler(int signo)
 {
-	set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
+    set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
-	printf("\n"); // this also force flush stdout.
+    printf("\n"); // this also force flush stdout.
-	if (signo == SIGINT)
+    if (signo == SIGINT)
-	{
+    {
-		if (!is_interacting)
+        if (!is_interacting)
-		{
+        {
-			is_interacting = true;
+            is_interacting = true;
-		}
+        }
-		else
+        else
-		{
+        {
-			llama_print_timings(*g_ctx);
+            llama_print_timings(*g_ctx);
-			_exit(130);
+            _exit(130);
-		}
+        }
-	}
+    }
 }
 #endif
@ -55,487 +55,487 @@ auto const BINDPORT = 8001;
 int run_llama(llama_context *ctx, gpt_params params, std::ostream *outfile)
 {
-	if (!params.lora_adapter.empty())
+    if (!params.lora_adapter.empty())
-	{
+    {
-		int err = llama_apply_lora_from_file(ctx,
+        int err = llama_apply_lora_from_file(ctx,
-											 params.lora_adapter.c_str(),
+                                             params.lora_adapter.c_str(),
-											 params.lora_base.empty() ? NULL : params.lora_base.c_str(),
+                                             params.lora_base.empty() ? NULL : params.lora_base.c_str(),
-											 params.n_threads);
+                                             params.n_threads);
-		if (err != 0)
+        if (err != 0)
-		{
+        {
-			fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
+            fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
-			return 1;
+            return 1;
-		}
+        }
-	}
+    }
-	// print system information
+    // print system information
-	{
+    {
-		fprintf(stderr, "\n");
+        fprintf(stderr, "\n");
-		fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
+        fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
-				params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
+                params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
-	}
+    }
-	// determine the maximum memory usage needed to do inference for the given n_batch and n_predict parameters
+    // determine the maximum memory usage needed to do inference for the given n_batch and n_predict parameters
-	// uncomment the "used_mem" line in llama.cpp to see the results
+    // uncomment the "used_mem" line in llama.cpp to see the results
-	if (params.mem_test)
+    if (params.mem_test)
-	{
+    {
-		{
+        {
-			const std::vector<llama_token> tmp(params.n_batch, 0);
+            const std::vector<llama_token> tmp(params.n_batch, 0);
-			llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
+            llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
-		}
+        }
-		{
+        {
-			const std::vector<llama_token> tmp = {
+            const std::vector<llama_token> tmp = {
-				0,
+                0,
-			};
+            };
-			llama_eval(ctx, tmp.data(), tmp.size(), params.n_predict - 1, params.n_threads);
+            llama_eval(ctx, tmp.data(), tmp.size(), params.n_predict - 1, params.n_threads);
-		}
+        }
-		llama_print_timings(ctx);
+        llama_print_timings(ctx);
-		llama_free(ctx);
+        llama_free(ctx);
-		return 0;
+        return 0;
-	}
+    }
-	// Add a space in front of the first character to match OG llama tokenizer behavior
+    // Add a space in front of the first character to match OG llama tokenizer behavior
-	params.prompt.insert(0, 1, ' ');
+    params.prompt.insert(0, 1, ' ');
-	// tokenize the prompt
+    // tokenize the prompt
-	auto embd_inp = ::llama_tokenize(ctx, params.prompt, true);
+    auto embd_inp = ::llama_tokenize(ctx, params.prompt, true);
-	const int n_ctx = llama_n_ctx(ctx);
+    const int n_ctx = llama_n_ctx(ctx);
-	if ((int)embd_inp.size() > n_ctx - 4)
+    if ((int)embd_inp.size() > n_ctx - 4)
-	{
+    {
-		fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int)embd_inp.size(), n_ctx - 4);
+        fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int)embd_inp.size(), n_ctx - 4);
-		return 1;
+        return 1;
-	}
+    }
-	// number of tokens to keep when resetting context
+    // number of tokens to keep when resetting context
-	if (params.n_keep < 0 || params.n_keep > (int)embd_inp.size() || params.instruct)
+    if (params.n_keep < 0 || params.n_keep > (int)embd_inp.size() || params.instruct)
-	{
+    {
-		params.n_keep = (int)embd_inp.size();
+        params.n_keep = (int)embd_inp.size();
-	}
+    }
-	// prefix & suffix for instruct mode
+    // prefix & suffix for instruct mode
-	const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true);
+    const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true);
-	const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false);
+    const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false);
-	// in instruct mode, we inject a prefix and a suffix to each input by the user
+    // in instruct mode, we inject a prefix and a suffix to each input by the user
-	if (params.instruct)
+    if (params.instruct)
-	{
+    {
-		params.interactive_first = true;
+        params.interactive_first = true;
-		params.antiprompt.push_back("### Instruction:\n\n");
+        params.antiprompt.push_back("### Instruction:\n\n");
-	}
+    }
-	// enable interactive mode if reverse prompt or interactive start is specified
+    // enable interactive mode if reverse prompt or interactive start is specified
-	if (params.antiprompt.size() != 0 || params.interactive_first)
+    if (params.antiprompt.size() != 0 || params.interactive_first)
-	{
+    {
-		params.interactive = true;
+        params.interactive = true;
-	}
+    }
-	// determine newline token
+    // determine newline token
-	auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
+    auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
-	if (params.verbose_prompt)
+    if (params.verbose_prompt)
-	{
+    {
-		fprintf(stderr, "\n");
+        fprintf(stderr, "\n");
-		fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
+        fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
-		fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
+        fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
-		for (int i = 0; i < (int)embd_inp.size(); i++)
+        for (int i = 0; i < (int)embd_inp.size(); i++)
-		{
+        {
-			fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
+            fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
-		}
+        }
-		if (params.n_keep > 0)
+        if (params.n_keep > 0)
-		{
+        {
-			fprintf(stderr, "%s: static prompt based on n_keep: '", __func__);
+            fprintf(stderr, "%s: static prompt based on n_keep: '", __func__);
-			for (int i = 0; i < params.n_keep; i++)
+            for (int i = 0; i < params.n_keep; i++)
-			{
+            {
-				fprintf(stderr, "%s", llama_token_to_str(ctx, embd_inp[i]));
+                fprintf(stderr, "%s", llama_token_to_str(ctx, embd_inp[i]));
-			}
+            }
-			fprintf(stderr, "'\n");
+            fprintf(stderr, "'\n");
-		}
+        }
-		fprintf(stderr, "\n");
+        fprintf(stderr, "\n");
-	}
+    }
-	if (params.interactive)
+    if (params.interactive)
-	{
+    {
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
-		struct sigaction sigint_action;
+        struct sigaction sigint_action;
-		sigint_action.sa_handler = sigint_handler;
+        sigint_action.sa_handler = sigint_handler;
-		sigemptyset(&sigint_action.sa_mask);
+        sigemptyset(&sigint_action.sa_mask);
-		sigint_action.sa_flags = 0;
+        sigint_action.sa_flags = 0;
-		sigaction(SIGINT, &sigint_action, NULL);
+        sigaction(SIGINT, &sigint_action, NULL);
 #elif defined(_WIN32)
-		signal(SIGINT, sigint_handler);
+        signal(SIGINT, sigint_handler);
 #endif
-		fprintf(stderr, "%s: interactive mode on.\n", __func__);
+        fprintf(stderr, "%s: interactive mode on.\n", __func__);
-		if (params.antiprompt.size())
+        if (params.antiprompt.size())
-		{
+        {
-			for (auto antiprompt : params.antiprompt)
+            for (auto antiprompt : params.antiprompt)
-			{
+            {
-				fprintf(stderr, "Reverse prompt: '%s'\n", antiprompt.c_str());
+                fprintf(stderr, "Reverse prompt: '%s'\n", antiprompt.c_str());
-			}
+            }
-		}
+        }
-		if (!params.input_prefix.empty())
+        if (!params.input_prefix.empty())
-		{
+        {
-			fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str());
+            fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str());
-		}
+        }
-	}
+    }
-	fprintf(stderr, "sampling: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n",
+    fprintf(stderr, "sampling: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n",
-			params.temp, params.top_k, params.top_p, params.repeat_last_n, params.repeat_penalty);
+            params.temp, params.top_k, params.top_p, params.repeat_last_n, params.repeat_penalty);
-	fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
+    fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
-	fprintf(stderr, "\n\n");
+    fprintf(stderr, "\n\n");
-	// TODO: replace with ring-buffer
+    // TODO: replace with ring-buffer
-	std::vector<llama_token> last_n_tokens(n_ctx);
+    std::vector<llama_token> last_n_tokens(n_ctx);
-	std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
+    std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
-	if (params.interactive)
+    if (params.interactive)
-	{
+    {
-		fprintf(stderr, "== Running in interactive mode. ==\n"
+        fprintf(stderr, "== Running in interactive mode. ==\n"
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
-						" - Press Ctrl+C to interject at any time.\n"
+                        " - Press Ctrl+C to interject at any time.\n"
 #endif
-						" - Press Return to return control to LLaMa.\n"
+                        " - Press Return to return control to LLaMa.\n"
-						" - If you want to submit another line, end your input in '\\'.\n\n");
+                        " - If you want to submit another line, end your input in '\\'.\n\n");
-		is_interacting = params.interactive_first;
+        is_interacting = params.interactive_first;
-	}
+    }
-	bool is_antiprompt = false;
+    bool is_antiprompt = false;
-	bool input_noecho = false;
+    bool input_noecho = false;
-	int n_past = 0;
+    int n_past = 0;
-	int n_remain = params.n_predict;
+    int n_remain = params.n_predict;
-	int n_consumed = 0;
+    int n_consumed = 0;
-	// the first thing we will do is to output the prompt, so set color accordingly
+    // the first thing we will do is to output the prompt, so set color accordingly
-	set_console_color(con_st, CONSOLE_COLOR_PROMPT);
+    set_console_color(con_st, CONSOLE_COLOR_PROMPT);
-	std::vector<llama_token> embd;
+    std::vector<llama_token> embd;
-	while (n_remain != 0 || params.interactive)
+    while (n_remain != 0 || params.interactive)
-	{
+    {
-		// predict
+        // predict
-		if (embd.size() > 0)
+        if (embd.size() > 0)
-		{
+        {
-			// infinite text generation via context swapping
+            // infinite text generation via context swapping
-			// if we run out of context:
+            // if we run out of context:
-			// - take the n_keep first tokens from the original prompt (via n_past)
+            // - take the n_keep first tokens from the original prompt (via n_past)
-			// - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches
+            // - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches
-			if (n_past + (int)embd.size() > n_ctx)
+            if (n_past + (int)embd.size() > n_ctx)
-			{
+            {
-				const int n_left = n_past - params.n_keep;
+                const int n_left = n_past - params.n_keep;
-				n_past = params.n_keep;
+                n_past = params.n_keep;
-				// insert n_left/2 tokens at the start of embd from last_n_tokens
+                // insert n_left/2 tokens at the start of embd from last_n_tokens
-				embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left / 2 - embd.size(), last_n_tokens.end() - embd.size());
+                embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left / 2 - embd.size(), last_n_tokens.end() - embd.size());
-				// printf("\n---\n");
+                // printf("\n---\n");
-				// printf("resetting: '");
+                // printf("resetting: '");
-				// for (int i = 0; i < (int) embd.size(); i++) {
+                // for (int i = 0; i < (int) embd.size(); i++) {
-				//     printf("%s", llama_token_to_str(ctx, embd[i]));
+                //     printf("%s", llama_token_to_str(ctx, embd[i]));
-				// }
+                // }
-				// printf("'\n");
+                // printf("'\n");
-				// printf("\n---\n");
+                // printf("\n---\n");
-			}
+            }
-			// evaluate tokens in batches
+            // evaluate tokens in batches
-			// embd is typically prepared beforehand to fit within a batch, but not always
+            // embd is typically prepared beforehand to fit within a batch, but not always
-			for (int i = 0; i < (int)embd.size(); i += params.n_batch)
+            for (int i = 0; i < (int)embd.size(); i += params.n_batch)
-			{
+            {
-				int n_eval = (int)embd.size() - i;
+                int n_eval = (int)embd.size() - i;
-				if (n_eval > params.n_batch)
+                if (n_eval > params.n_batch)
-				{
+                {
-					n_eval = params.n_batch;
+                    n_eval = params.n_batch;
-				}
+                }
-				if (llama_eval(ctx, &embd[i], n_eval, n_past, params.n_threads))
+                if (llama_eval(ctx, &embd[i], n_eval, n_past, params.n_threads))
-				{
+                {
-					fprintf(stderr, "%s : failed to eval\n", __func__);
+                    fprintf(stderr, "%s : failed to eval\n", __func__);
-					return 1;
+                    return 1;
-				}
+                }
-				n_past += n_eval;
+                n_past += n_eval;
-			}
+            }
-		}
+        }
-		embd.clear();
+        embd.clear();
-		if ((int)embd_inp.size() <= n_consumed && !is_interacting)
+        if ((int)embd_inp.size() <= n_consumed && !is_interacting)
-		{
+        {
-			// out of user input, sample next token
+            // out of user input, sample next token
-			const int32_t top_k = params.top_k;
+            const int32_t top_k = params.top_k;
-			const float top_p = params.top_p;
+            const float top_p = params.top_p;
-			const float temp = params.temp;
+            const float temp = params.temp;
-			const float repeat_penalty = params.repeat_penalty;
+            const float repeat_penalty = params.repeat_penalty;
-			llama_token id = 0;
+            llama_token id = 0;
-			{
+            {
-				auto logits = llama_get_logits(ctx);
+                auto logits = llama_get_logits(ctx);
-				if (params.ignore_eos)
+                if (params.ignore_eos)
-				{
+                {
-					logits[llama_token_eos()] = 0;
+                    logits[llama_token_eos()] = 0;
-				}
+                }
-				id = llama_sample_top_p_top_k(ctx,
+                id = llama_sample_top_p_top_k(ctx,
-											  last_n_tokens.data() + n_ctx - params.repeat_last_n,
+                                              last_n_tokens.data() + n_ctx - params.repeat_last_n,
-											  params.repeat_last_n, top_k, top_p, temp, repeat_penalty);
+                                              params.repeat_last_n, top_k, top_p, temp, repeat_penalty);
-				last_n_tokens.erase(last_n_tokens.begin());
+                last_n_tokens.erase(last_n_tokens.begin());
-				last_n_tokens.push_back(id);
+                last_n_tokens.push_back(id);
-			}
+            }
-			// replace end of text token with newline token when in interactive mode
+            // replace end of text token with newline token when in interactive mode
-			if (id == llama_token_eos() && params.interactive && !params.instruct)
+            if (id == llama_token_eos() && params.interactive && !params.instruct)
-			{
+            {
-				id = llama_token_newline.front();
+                id = llama_token_newline.front();
-				if (params.antiprompt.size() != 0)
+                if (params.antiprompt.size() != 0)
-				{
+                {
-					// tokenize and inject first reverse prompt
+                    // tokenize and inject first reverse prompt
-					const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
+                    const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
-					embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
+                    embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
-				}
+                }
-			}
+            }
-			// add it to the context
+            // add it to the context
-			embd.push_back(id);
+            embd.push_back(id);
-			// echo this to console
+            // echo this to console
-			input_noecho = false;
+            input_noecho = false;
-			// decrement remaining sampling budget
+            // decrement remaining sampling budget
-			--n_remain;
+            --n_remain;
-		}
+        }
-		else
+        else
-		{
+        {
-			// some user input remains from prompt or interaction, forward it to processing
+            // some user input remains from prompt or interaction, forward it to processing
-			while ((int)embd_inp.size() > n_consumed)
+            while ((int)embd_inp.size() > n_consumed)
-			{
+            {
-				embd.push_back(embd_inp[n_consumed]);
+                embd.push_back(embd_inp[n_consumed]);
-				last_n_tokens.erase(last_n_tokens.begin());
+                last_n_tokens.erase(last_n_tokens.begin());
-				last_n_tokens.push_back(embd_inp[n_consumed]);
+                last_n_tokens.push_back(embd_inp[n_consumed]);
-				++n_consumed;
+                ++n_consumed;
-				if ((int)embd.size() >= params.n_batch)
+                if ((int)embd.size() >= params.n_batch)
-				{
+                {
-					break;
+                    break;
-				}
+                }
-			}
+            }
-		}
+        }
-		// display text
+        // display text
-		if (!input_noecho)
+        if (!input_noecho)
-		{
+        {
-			for (auto id : embd)
+            for (auto id : embd)
-			{
+            {
-				*outfile << llama_token_to_str(ctx, id) << std::flush;
+                *outfile << llama_token_to_str(ctx, id) << std::flush;
-			}
+            }
-		}
+        }
-		// reset color to default if we there is no pending user input
+        // reset color to default if we there is no pending user input
-		if (!input_noecho && (int)embd_inp.size() == n_consumed)
+        if (!input_noecho && (int)embd_inp.size() == n_consumed)
-		{
+        {
-			set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
+            set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
-		}
+        }
-		// in interactive mode, and not currently processing queued inputs;
+        // in interactive mode, and not currently processing queued inputs;
-		// check if we should prompt the user for more
+        // check if we should prompt the user for more
-		if (params.interactive && (int)embd_inp.size() <= n_consumed)
+        if (params.interactive && (int)embd_inp.size() <= n_consumed)
-		{
+        {
-			// check for reverse prompt
+            // check for reverse prompt
-			if (params.antiprompt.size())
+            if (params.antiprompt.size())
-			{
+            {
-				std::string last_output;
+                std::string last_output;
-				for (auto id : last_n_tokens)
+                for (auto id : last_n_tokens)
-				{
+                {
-					last_output += llama_token_to_str(ctx, id);
+                    last_output += llama_token_to_str(ctx, id);
-				}
+                }
-				is_antiprompt = false;
+                is_antiprompt = false;
-				// Check if each of the reverse prompts appears at the end of the output.
+                // Check if each of the reverse prompts appears at the end of the output.
-				for (std::string &antiprompt : params.antiprompt)
+                for (std::string &antiprompt : params.antiprompt)
-				{
+                {
-					if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos)
+                    if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos)
-					{
+                    {
-						is_interacting = true;
+                        is_interacting = true;
-						is_antiprompt = true;
+                        is_antiprompt = true;
-						set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
+                        set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
-						fflush(stdout);
+                        fflush(stdout);
-						break;
+                        break;
-					}
+                    }
-				}
+                }
-			}
+            }
-			if (n_past > 0 && is_interacting)
+            if (n_past > 0 && is_interacting)
-			{
+            {
-				// potentially set color to indicate we are taking user input
+                // potentially set color to indicate we are taking user input
-				set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
+                set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
 #if defined(_WIN32)
-				// Windows: must reactivate sigint handler after each signal
+                // Windows: must reactivate sigint handler after each signal
-				signal(SIGINT, sigint_handler);
+                signal(SIGINT, sigint_handler);
 #endif
-				if (params.instruct)
+                if (params.instruct)
-				{
+                {
-					printf("\n> ");
+                    printf("\n> ");
-				}
+                }
-				std::string buffer;
+                std::string buffer;
-				if (!params.input_prefix.empty())
+                if (!params.input_prefix.empty())
-				{
+                {
-					buffer += params.input_prefix;
+                    buffer += params.input_prefix;
-					printf("%s", buffer.c_str());
+                    printf("%s", buffer.c_str());
-				}
+                }
-				std::string line;
+                std::string line;
-				bool another_line = true;
+                bool another_line = true;
-				do
+                do
-				{
+                {
 #if defined(_WIN32)
-					std::wstring wline;
+                    std::wstring wline;
-					if (!std::getline(std::wcin, wline))
+                    if (!std::getline(std::wcin, wline))
-					{
+                    {
-						// input stream is bad or EOF received
+                        // input stream is bad or EOF received
-						return 0;
+                        return 0;
-					}
+                    }
-					win32_utf8_encode(wline, line);
+                    win32_utf8_encode(wline, line);
 #else
-					if (!std::getline(std::cin, line))
+                    if (!std::getline(std::cin, line))
-					{
+                    {
-						// input stream is bad or EOF received
+                        // input stream is bad or EOF received
-						return 0;
+                        return 0;
-					}
+                    }
 #endif
-					if (line.empty() || line.back() != '\\')
+                    if (line.empty() || line.back() != '\\')
-					{
+                    {
-						another_line = false;
+                        another_line = false;
-					}
+                    }
-					else
+                    else
-					{
+                    {
-						line.pop_back(); // Remove the continue character
+                        line.pop_back(); // Remove the continue character
-					}
+                    }
-					buffer += line + '\n'; // Append the line to the result
+                    buffer += line + '\n'; // Append the line to the result
-				} while (another_line);
+                } while (another_line);
-				// done taking input, reset color
+                // done taking input, reset color
-				set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
+                set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
-				// Add tokens to embd only if the input buffer is non-empty
+                // Add tokens to embd only if the input buffer is non-empty
-				// Entering a empty line lets the user pass control back
+                // Entering a empty line lets the user pass control back
-				if (buffer.length() > 1)
+                if (buffer.length() > 1)
-				{
+                {
-					// instruct mode: insert instruction prefix
+                    // instruct mode: insert instruction prefix
-					if (params.instruct && !is_antiprompt)
+                    if (params.instruct && !is_antiprompt)
-					{
+                    {
-						n_consumed = embd_inp.size();
+                        n_consumed = embd_inp.size();
-						embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
+                        embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
-					}
+                    }
-					auto line_inp = ::llama_tokenize(ctx, buffer, false);
+                    auto line_inp = ::llama_tokenize(ctx, buffer, false);
-					embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
+                    embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
-					// instruct mode: insert response suffix
+                    // instruct mode: insert response suffix
-					if (params.instruct)
+                    if (params.instruct)
-					{
+                    {
-						embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
+                        embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
-					}
+                    }
-					n_remain -= line_inp.size();
+                    n_remain -= line_inp.size();
-				}
+                }
-				input_noecho = true; // do not echo this again
+                input_noecho = true; // do not echo this again
-			}
+            }
-			if (n_past > 0)
+            if (n_past > 0)
-			{
+            {
-				is_interacting = false;
+                is_interacting = false;
-			}
+            }
-		}
+        }
-		// end of text token
+        // end of text token
-		if (!embd.empty() && embd.back() == llama_token_eos())
+        if (!embd.empty() && embd.back() == llama_token_eos())
-		{
+        {
-			if (params.instruct)
+            if (params.instruct)
-			{
+            {
-				is_interacting = true;
+                is_interacting = true;
-			}
+            }
-			else
+            else
-			{
+            {
-				fprintf(stderr, " [end of text]\n");
+                fprintf(stderr, " [end of text]\n");
-				break;
+                break;
-			}
+            }
-		}
+        }
-		// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
+        // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
-		if (params.interactive && n_remain <= 0 && params.n_predict != -1)
+        if (params.interactive && n_remain <= 0 && params.n_predict != -1)
-		{
+        {
-			n_remain = params.n_predict;
+            n_remain = params.n_predict;
-			is_interacting = true;
+            is_interacting = true;
-		}
+        }
-	}
+    }
 #if defined(_WIN32)
-	signal(SIGINT, SIG_DFL);
+    signal(SIGINT, SIG_DFL);
 #endif
-	llama_print_timings(ctx);
+    llama_print_timings(ctx);
-	llama_free(ctx);
+    llama_free(ctx);
-	set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
+    set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
-	return 0;
+    return 0;
 }
 int main(int argc, char **argv)
 {
-	gpt_params params;
+    gpt_params params;
-	params.model = "models/llama-7B/ggml-model.bin";
+    params.model = "models/llama-7B/ggml-model.bin";
-	if (gpt_params_parse(argc, argv, params) == false)
+    if (gpt_params_parse(argc, argv, params) == false)
-		return 1;
+        return 1;
-	if (params.n_ctx > 2048)
+    if (params.n_ctx > 2048)
-		fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
+        fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
-						"expect poor results\n",
+                        "expect poor results\n",
-				__func__, params.n_ctx);
+                __func__, params.n_ctx);
-	if (params.seed <= 0)
+    if (params.seed <= 0)
-		params.seed = time(NULL);
+        params.seed = time(NULL);
-	llama_context *ctx;
+    llama_context *ctx;
-	// load the model
+    // load the model
-	{
+    {
-		auto lparams = llama_context_default_params();
+        auto lparams = llama_context_default_params();
-		lparams.n_ctx = params.n_ctx;
+        lparams.n_ctx = params.n_ctx;
-		lparams.n_parts = params.n_parts;
+        lparams.n_parts = params.n_parts;
-		lparams.seed = params.seed;
+        lparams.seed = params.seed;
-		lparams.f16_kv = params.memory_f16;
+        lparams.f16_kv = params.memory_f16;
-		lparams.use_mlock = params.use_mlock;
+        lparams.use_mlock = params.use_mlock;
-		lparams.logits_all = params.perplexity;
+        lparams.logits_all = params.perplexity;
-		lparams.embedding = true;
+        lparams.embedding = true;
-		ctx = llama_init_from_file(params.model.c_str(), lparams);
+        ctx = llama_init_from_file(params.model.c_str(), lparams);
-		if (ctx == NULL)
+        if (ctx == NULL)
-		{
+        {
-			fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
+            fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
-			return 1;
+            return 1;
-		}
+        }
-	}
+    }
-	crow::SimpleApp app;
+    crow::SimpleApp app;
-	// app.loglevel(crow::LogLevel::Warning);
+    // app.loglevel(crow::LogLevel::Warning);
-	CROW_ROUTE(app, "/completion").methods("POST"_method)([&params, &ctx](const crow::request &req)
+    CROW_ROUTE(app, "/completion").methods("POST"_method)([&params, &ctx](const crow::request &req)
-														  {
+                                                          {
        auto body = crow::json::load(req.body);
        if (!body) return crow::response(crow::status::BAD_REQUEST);
@ -570,27 +570,27 @@ int main(int argc, char **argv)
        return crow::response(crow::status::OK); });
-	// CROW_ROUTE(app, "/embedding").methods("POST"_method)
+    // CROW_ROUTE(app, "/embedding").methods("POST"_method)
-	// ([&params, &ctx](const crow::request& req){
+    // ([&params, &ctx](const crow::request& req){
-	//     auto body = crow::json::load(req.body);
+    //     auto body = crow::json::load(req.body);
-	//     if (!body) return crow::response(crow::status::BAD_REQUEST);
+    //     if (!body) return crow::response(crow::status::BAD_REQUEST);
-	//     // Create new params for this request only
+    //     // Create new params for this request only
-	//     gpt_params runparams = params;
+    //     gpt_params runparams = params;
-	//     // Set run params from body
+    //     // Set run params from body
-	//     runparams.prompt    = body["prompt"].s();
+    //     runparams.prompt    = body["prompt"].s();
-	//     runparams.embedding = true;
+    //     runparams.embedding = true;
-	//     // Open the tempfile into a stream.
+    //     // Open the tempfile into a stream.
-	//     std::ofstream outfile(body["tempfile"].s(), std::ios::out);
+    //     std::ofstream outfile(body["tempfile"].s(), std::ios::out);
-	//     // Write output of LLaMA to file stream.
+    //     // Write output of LLaMA to file stream.
-	//     run_llama_embedding(ctx, runparams, &outfile);
+    //     run_llama_embedding(ctx, runparams, &outfile);
-	//     return crow::response(crow::status::OK);
+    //     return crow::response(crow::status::OK);
-	// });
+    // });
-	app.port(BINDPORT).multithreaded().run();
+    app.port(BINDPORT).multithreaded().run();
-	return 0;
+    return 0;
 }