diff --git a/Makefile b/Makefile index 897c5cb9a..8772107ea 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ OPT = -O3 endif CFLAGS = -I. $(OPT) -std=c11 -fPIC CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC -LDFLAGS = +LDFLAGS = -lsqlite3 ifdef LLAMA_DEBUG CFLAGS += -O0 -g diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 56ada7e69..fb4c828bc 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -17,8 +17,10 @@ #include #include #include +#include #include #include +#include #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #include @@ -163,6 +165,30 @@ int main(int argc, char ** argv) { return 0; } + sqlite3 * db = NULL; + int return_code; + const int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE; + return_code = sqlite3_open_v2("llama.sqlite", &db, flags, NULL); + fprintf(stderr, "\nsqlite open: %d %s\n\n", return_code, sqlite3_errmsg(db)); + + const std::string sql_create_table ="CREATE TABLE IF NOT EXISTS llama_runs(" + "id INTEGER PRIMARY KEY AUTOINCREMENT," + "build_number INTEGER NOT NULL," + "build_commit TEXT NOT NULL," + + "n_gpu_layers BIGINT NOT NULL," + + "t_sample_us BIGINT NOT NULL," + "t_eval_us BIGINT NOT NULL," + "t_p_eval_us BIGINT NOT NULL," + "n_sample BIGINT NOT NULL," + "n_eval BIGINT NOT NULL," + "n_p_eval BIGINT NOT NULL);"; + + char * errmsg; + return_code = sqlite3_exec(db, sql_create_table.c_str(), NULL, NULL, &errmsg); + fprintf(stderr, "\nsqlite create table: %d %s\n\n", return_code, errmsg); + std::string path_session = params.path_prompt_cache; std::vector session_tokens; @@ -808,6 +834,17 @@ int main(int argc, char ** argv) { } llama_print_timings(ctx); + + std::ostringstream sql_insert_values; + sql_insert_values << "INSERT INTO llama_runs(build_number, build_commit, n_gpu_layers, " + "t_sample_us, t_eval_us, t_p_eval_us, n_sample, n_eval, n_p_eval) VALUES ("; + sql_insert_values << BUILD_NUMBER << ","; + sql_insert_values << "'" << BUILD_COMMIT << "',"; + sql_insert_values << params.n_gpu_layers << ","; + llama_sqlite_append_timings(ctx, sql_insert_values); + return_code = sqlite3_exec(db, sql_insert_values.str().c_str(), NULL, NULL, &errmsg); + fprintf(stderr, "\nsqlite insert data: %d %s\n\n", return_code, errmsg); + if (ctx_guidance) { llama_free(ctx_guidance); } llama_free(ctx); llama_free_model(model); diff --git a/llama.cpp b/llama.cpp index 71061aab9..a9256121b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4243,6 +4243,15 @@ void llama_print_timings(struct llama_context * ctx) { fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms)); } +void llama_sqlite_append_timings(struct llama_context * ctx, std::ostringstream & sql_insert_values) { + sql_insert_values << ctx->t_sample_us << ","; + sql_insert_values << ctx->t_eval_us << ","; + sql_insert_values << ctx->t_p_eval_us << ","; + sql_insert_values << ctx->n_sample << ","; + sql_insert_values << ctx->n_eval << ","; + sql_insert_values << ctx->n_p_eval << ");"; +} + void llama_reset_timings(struct llama_context * ctx) { ctx->t_start_us = ggml_time_us(); ctx->t_sample_us = ctx->n_sample = 0; diff --git a/llama.h b/llama.h index fa1977f2d..8585942ef 100644 --- a/llama.h +++ b/llama.h @@ -8,6 +8,7 @@ #else #define LLAMA_MAX_DEVICES 1 #endif // GGML_USE_CUBLAS +#include #include #include #include @@ -446,6 +447,7 @@ extern "C" { // Performance information LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx); LLAMA_API void llama_print_timings(struct llama_context * ctx); + LLAMA_API void llama_sqlite_append_timings(struct llama_context * ctx, std::ostringstream & sql_insert_values); LLAMA_API void llama_reset_timings(struct llama_context * ctx); // Print system information