Fix editorconfig

2023-04-27 16:56:43 +05:30 · 2023-04-27 16:56:43 +05:30 · d2af46e371
commit d2af46e371
parent 2b50d21423
3 changed files with 8874 additions and 8969 deletions
--- a/examples/server/crow.h
+++ b/examples/server/crow.h
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -32,13 +32,18 @@ static llama_context ** g_ctx;
 static bool is_interacting = false;

 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
-void sigint_handler(int signo) {
+void sigint_handler(int signo)
+{
 	set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
 	printf("\n"); // this also force flush stdout.
-    if (signo == SIGINT) {
-        if (!is_interacting) {
+	if (signo == SIGINT)
+	{
+		if (!is_interacting)
+		{
 			is_interacting = true;
-        } else {
+		}
+		else
+		{
 			llama_print_timings(*g_ctx);
 			_exit(130);
 		}
@ -46,16 +51,18 @@ void sigint_handler(int signo) {
 }
 #endif

-
 auto const BINDPORT = 8001;

-int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
-    if (!params.lora_adapter.empty()) {
+int run_llama(llama_context *ctx, gpt_params params, std::ostream *outfile)
+{
+	if (!params.lora_adapter.empty())
+	{
 		int err = llama_apply_lora_from_file(ctx,
 											 params.lora_adapter.c_str(),
 											 params.lora_base.empty() ? NULL : params.lora_base.c_str(),
 											 params.n_threads);
-        if (err != 0) {
+		if (err != 0)
+		{
 			fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
 			return 1;
 		}
@ -70,14 +77,17 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {

 	// determine the maximum memory usage needed to do inference for the given n_batch and n_predict parameters
 	// uncomment the "used_mem" line in llama.cpp to see the results
-    if (params.mem_test) {
+	if (params.mem_test)
+	{
 		{
 			const std::vector<llama_token> tmp(params.n_batch, 0);
 			llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
 		}

 		{
-            const std::vector<llama_token> tmp = { 0, };
+			const std::vector<llama_token> tmp = {
+				0,
+			};
 			llama_eval(ctx, tmp.data(), tmp.size(), params.n_predict - 1, params.n_threads);
 		}

@ -95,13 +105,15 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {

 	const int n_ctx = llama_n_ctx(ctx);

-    if ((int) embd_inp.size() > n_ctx - 4) {
+	if ((int)embd_inp.size() > n_ctx - 4)
+	{
 		fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int)embd_inp.size(), n_ctx - 4);
 		return 1;
 	}

 	// number of tokens to keep when resetting context
-    if (params.n_keep < 0 || params.n_keep > (int)embd_inp.size() || params.instruct) {
+	if (params.n_keep < 0 || params.n_keep > (int)embd_inp.size() || params.instruct)
+	{
 		params.n_keep = (int)embd_inp.size();
 	}

@ -110,29 +122,35 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 	const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false);

 	// in instruct mode, we inject a prefix and a suffix to each input by the user
-    if (params.instruct) {
+	if (params.instruct)
+	{
 		params.interactive_first = true;
 		params.antiprompt.push_back("### Instruction:\n\n");
 	}

 	// enable interactive mode if reverse prompt or interactive start is specified
-    if (params.antiprompt.size() != 0 || params.interactive_first) {
+	if (params.antiprompt.size() != 0 || params.interactive_first)
+	{
 		params.interactive = true;
 	}

 	// determine newline token
 	auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);

-    if (params.verbose_prompt) {
+	if (params.verbose_prompt)
+	{
 		fprintf(stderr, "\n");
 		fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
 		fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
-        for (int i = 0; i < (int) embd_inp.size(); i++) {
+		for (int i = 0; i < (int)embd_inp.size(); i++)
+		{
 			fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
 		}
-        if (params.n_keep > 0) {
+		if (params.n_keep > 0)
+		{
 			fprintf(stderr, "%s: static prompt based on n_keep: '", __func__);
-            for (int i = 0; i < params.n_keep; i++) {
+			for (int i = 0; i < params.n_keep; i++)
+			{
 				fprintf(stderr, "%s", llama_token_to_str(ctx, embd_inp[i]));
 			}
 			fprintf(stderr, "'\n");
@ -140,7 +158,8 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 		fprintf(stderr, "\n");
 	}

-    if (params.interactive) {
+	if (params.interactive)
+	{
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 		struct sigaction sigint_action;
 		sigint_action.sa_handler = sigint_handler;
@ -153,13 +172,16 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {

 		fprintf(stderr, "%s: interactive mode on.\n", __func__);

-        if (params.antiprompt.size()) {
-            for (auto antiprompt : params.antiprompt) {
+		if (params.antiprompt.size())
+		{
+			for (auto antiprompt : params.antiprompt)
+			{
 				fprintf(stderr, "Reverse prompt: '%s'\n", antiprompt.c_str());
 			}
 		}

-        if (!params.input_prefix.empty()) {
+		if (!params.input_prefix.empty())
+		{
 			fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str());
 		}
 	}
@ -172,7 +194,8 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 	std::vector<llama_token> last_n_tokens(n_ctx);
 	std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);

-    if (params.interactive) {
+	if (params.interactive)
+	{
 		fprintf(stderr, "== Running in interactive mode. ==\n"
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
 						" - Press Ctrl+C to interject at any time.\n"
@ -194,14 +217,17 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {

 	std::vector<llama_token> embd;

-    while (n_remain != 0 || params.interactive) {
+	while (n_remain != 0 || params.interactive)
+	{
 		// predict
-        if (embd.size() > 0) {
+		if (embd.size() > 0)
+		{
 			// infinite text generation via context swapping
 			// if we run out of context:
 			// - take the n_keep first tokens from the original prompt (via n_past)
 			// - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches
-            if (n_past + (int) embd.size() > n_ctx) {
+			if (n_past + (int)embd.size() > n_ctx)
+			{
 				const int n_left = n_past - params.n_keep;

 				n_past = params.n_keep;
@ -220,12 +246,15 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {

 			// evaluate tokens in batches
 			// embd is typically prepared beforehand to fit within a batch, but not always
-            for (int i = 0; i < (int) embd.size(); i += params.n_batch) {
+			for (int i = 0; i < (int)embd.size(); i += params.n_batch)
+			{
 				int n_eval = (int)embd.size() - i;
-                if (n_eval > params.n_batch) {
+				if (n_eval > params.n_batch)
+				{
 					n_eval = params.n_batch;
 				}
-                if (llama_eval(ctx, &embd[i], n_eval, n_past, params.n_threads)) {
+				if (llama_eval(ctx, &embd[i], n_eval, n_past, params.n_threads))
+				{
 					fprintf(stderr, "%s : failed to eval\n", __func__);
 					return 1;
 				}
@ -235,7 +264,8 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {

 		embd.clear();

-        if ((int) embd_inp.size() <= n_consumed && !is_interacting) {
+		if ((int)embd_inp.size() <= n_consumed && !is_interacting)
+		{
 			// out of user input, sample next token
 			const int32_t top_k = params.top_k;
 			const float top_p = params.top_p;
@ -247,7 +277,8 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 			{
 				auto logits = llama_get_logits(ctx);

-                if (params.ignore_eos) {
+				if (params.ignore_eos)
+				{
 					logits[llama_token_eos()] = 0;
 				}

@ -260,9 +291,11 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 			}

 			// replace end of text token with newline token when in interactive mode
-            if (id == llama_token_eos() && params.interactive && !params.instruct) {
+			if (id == llama_token_eos() && params.interactive && !params.instruct)
+			{
 				id = llama_token_newline.front();
-                if (params.antiprompt.size() != 0) {
+				if (params.antiprompt.size() != 0)
+				{
 					// tokenize and inject first reverse prompt
 					const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
 					embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
@ -277,45 +310,57 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {

 			// decrement remaining sampling budget
 			--n_remain;
-        } else {
+		}
+		else
+		{
 			// some user input remains from prompt or interaction, forward it to processing
-            while ((int) embd_inp.size() > n_consumed) {
+			while ((int)embd_inp.size() > n_consumed)
+			{
 				embd.push_back(embd_inp[n_consumed]);
 				last_n_tokens.erase(last_n_tokens.begin());
 				last_n_tokens.push_back(embd_inp[n_consumed]);
 				++n_consumed;
-                if ((int) embd.size() >= params.n_batch) {
+				if ((int)embd.size() >= params.n_batch)
+				{
 					break;
 				}
 			}
 		}

 		// display text
-        if (!input_noecho) {
-            for (auto id : embd) {
+		if (!input_noecho)
+		{
+			for (auto id : embd)
+			{
 				*outfile << llama_token_to_str(ctx, id) << std::flush;
 			}
 		}
 		// reset color to default if we there is no pending user input
-        if (!input_noecho && (int)embd_inp.size() == n_consumed) {
+		if (!input_noecho && (int)embd_inp.size() == n_consumed)
+		{
 			set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
 		}

 		// in interactive mode, and not currently processing queued inputs;
 		// check if we should prompt the user for more
-        if (params.interactive && (int) embd_inp.size() <= n_consumed) {
+		if (params.interactive && (int)embd_inp.size() <= n_consumed)
+		{

 			// check for reverse prompt
-            if (params.antiprompt.size()) {
+			if (params.antiprompt.size())
+			{
 				std::string last_output;
-                for (auto id : last_n_tokens) {
+				for (auto id : last_n_tokens)
+				{
 					last_output += llama_token_to_str(ctx, id);
 				}

 				is_antiprompt = false;
 				// Check if each of the reverse prompts appears at the end of the output.
-                for (std::string & antiprompt : params.antiprompt) {
-                    if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) {
+				for (std::string &antiprompt : params.antiprompt)
+				{
+					if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos)
+					{
 						is_interacting = true;
 						is_antiprompt = true;
 						set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
@ -325,7 +370,8 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 				}
 			}

-            if (n_past > 0 && is_interacting) {
+			if (n_past > 0 && is_interacting)
+			{
 				// potentially set color to indicate we are taking user input
 				set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);

@ -334,35 +380,43 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 				signal(SIGINT, sigint_handler);
 #endif

-                if (params.instruct) {
+				if (params.instruct)
+				{
 					printf("\n> ");
 				}

 				std::string buffer;
-                if (!params.input_prefix.empty()) {
+				if (!params.input_prefix.empty())
+				{
 					buffer += params.input_prefix;
 					printf("%s", buffer.c_str());
 				}

 				std::string line;
 				bool another_line = true;
-                do {
+				do
+				{
 #if defined(_WIN32)
 					std::wstring wline;
-                    if (!std::getline(std::wcin, wline)) {
+					if (!std::getline(std::wcin, wline))
+					{
 						// input stream is bad or EOF received
 						return 0;
 					}
 					win32_utf8_encode(wline, line);
 #else
-                    if (!std::getline(std::cin, line)) {
+					if (!std::getline(std::cin, line))
+					{
 						// input stream is bad or EOF received
 						return 0;
 					}
 #endif
-                    if (line.empty() || line.back() != '\\') {
+					if (line.empty() || line.back() != '\\')
+					{
 						another_line = false;
-                    } else {
+					}
+					else
+					{
 						line.pop_back(); // Remove the continue character
 					}
 					buffer += line + '\n'; // Append the line to the result
@ -373,10 +427,12 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {

 				// Add tokens to embd only if the input buffer is non-empty
 				// Entering a empty line lets the user pass control back
-                if (buffer.length() > 1) {
+				if (buffer.length() > 1)
+				{

 					// instruct mode: insert instruction prefix
-                    if (params.instruct && !is_antiprompt) {
+					if (params.instruct && !is_antiprompt)
+					{
 						n_consumed = embd_inp.size();
 						embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
 					}
@ -385,7 +441,8 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 					embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());

 					// instruct mode: insert response suffix
-                    if (params.instruct) {
+					if (params.instruct)
+					{
 						embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
 					}

@ -395,23 +452,29 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 				input_noecho = true; // do not echo this again
 			}

-            if (n_past > 0) {
+			if (n_past > 0)
+			{
 				is_interacting = false;
 			}
 		}

 		// end of text token
-        if (!embd.empty() && embd.back() == llama_token_eos()) {
-            if (params.instruct) {
+		if (!embd.empty() && embd.back() == llama_token_eos())
+		{
+			if (params.instruct)
+			{
 				is_interacting = true;
-            } else {
+			}
+			else
+			{
 				fprintf(stderr, " [end of text]\n");
 				break;
 			}
 		}

 		// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
-        if (params.interactive && n_remain <= 0 && params.n_predict != -1) {
+		if (params.interactive && n_remain <= 0 && params.n_predict != -1)
+		{
 			n_remain = params.n_predict;
 			is_interacting = true;
 		}
@ -429,7 +492,8 @@ int run_llama(llama_context * ctx, gpt_params params, std::ostream * outfile) {
 	return 0;
 }

-int main(int argc, char ** argv) {
+int main(int argc, char **argv)
+{
 	gpt_params params;
 	params.model = "models/llama-7B/ggml-model.bin";

@ -438,7 +502,8 @@ int main(int argc, char ** argv) {

 	if (params.n_ctx > 2048)
 		fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
-                "expect poor results\n", __func__, params.n_ctx);
+						"expect poor results\n",
+				__func__, params.n_ctx);

 	if (params.seed <= 0)
 		params.seed = time(NULL);
@ -459,7 +524,8 @@ int main(int argc, char ** argv) {

 		ctx = llama_init_from_file(params.model.c_str(), lparams);

-        if (ctx == NULL) {
+		if (ctx == NULL)
+		{
 			fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
 			return 1;
 		}
@ -468,8 +534,8 @@ int main(int argc, char ** argv) {
 	crow::SimpleApp app;
 	// app.loglevel(crow::LogLevel::Warning);

-    CROW_ROUTE(app, "/completion").methods("POST"_method)
-    ([&params, &ctx](const crow::request& req){
+	CROW_ROUTE(app, "/completion").methods("POST"_method)([&params, &ctx](const crow::request &req)
+														  {
        auto body = crow::json::load(req.body);
        if (!body) return crow::response(crow::status::BAD_REQUEST);

@ -502,8 +568,7 @@ int main(int argc, char ** argv) {
        // Write output of LLaMA to file stream.
        run_llama(ctx, runparams, &outfile);

-        return crow::response(crow::status::OK);
-    });
+        return crow::response(crow::status::OK); });

 	// CROW_ROUTE(app, "/embedding").methods("POST"_method)
 	// ([&params, &ctx](const crow::request& req){