From cac70312e3922a7901a6d5eac50f889dba1b258a Mon Sep 17 00:00:00 2001 From: slaren Date: Thu, 17 Aug 2023 02:50:04 +0200 Subject: [PATCH] add basic cpu and gpu info (linx/cuda only) --- examples/llama-bench/llama-bench.cpp | 90 ++++++++++++++++++++-------- ggml-cuda.cu | 12 ++++ ggml-cuda.h | 38 ++++++------ 3 files changed, 95 insertions(+), 45 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 9c654d281..c8a940c98 100755 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -1,21 +1,26 @@ #include +#include #include #include -#include #include +#include +#include +#include +#include +#include #include +#include #include #include #include -#include -#include -#include -#include -#include + #include "ggml.h" #include "llama.h" #include "common.h" #include "build-info.h" +#ifdef GGML_USE_CUBLAS +#include "ggml-cuda.h" +#endif // utils static uint64_t get_time_ns() { @@ -50,7 +55,7 @@ static std::vector split(const std::string & str, char delim) { } template -T avg(const std::vector & v) { +static T avg(const std::vector & v) { if (v.empty()) { return 0; } @@ -59,7 +64,7 @@ T avg(const std::vector & v) { } template -T stdev(const std::vector & v) { +static T stdev(const std::vector & v) { if (v.size() <= 1) { return 0; } @@ -77,6 +82,50 @@ static bool ggml_cpu_has_metal() { #endif } +static std::string get_cpu_info() { + std::string id; +#ifdef __linux__ + FILE * f = fopen("/proc/cpuinfo", "r"); + if (f) { + char buf[1024]; + while (fgets(buf, sizeof(buf), f)) { + if (strncmp(buf, "model name", 10) == 0) { + char * p = strchr(buf, ':'); + if (p) { + p++; + while (std::isspace(*p)) { + p++; + } + while (std::isspace(p[strlen(p) - 1])) { + p[strlen(p) - 1] = '\0'; + } + id = p; + break; + } + } + } + } +#endif + // TODO: other platforms + return id; +} + +static std::string get_gpu_info(void) { + std::string id; +#ifdef GGML_USE_CUBLAS + int count = ggml_cuda_get_device_count(); + for (int i = 0; i < count; i++) { + char buf[128]; + ggml_cuda_get_device_description(i, buf, sizeof(buf)); + id += buf; + if (i < count - 1) { + id += "/"; + } + } +#endif + // TODO: other backends + return id; +} // command line params enum output_formats {CSV, JSON, MARKDOWN, SQL}; @@ -392,6 +441,8 @@ struct test { static const bool metal; static const bool gpu_blas; static const bool blas; + static const std::string cpu_info; + static const std::string gpu_info; std::string model_filename; std::string model_type; int n_batch; @@ -476,6 +527,7 @@ struct test { static const std::vector fields = { "build_commit", "build_number", "cuda", "opencl", "metal", "gpu_blas", "blas", + "cpu_info", "gpu_info", "model_filename", "model_type", "n_batch", "n_threads", "f16_kv", "n_gpu_layers", "main_gpu", "mul_mat_q", "low_vram", "tensor_split", @@ -503,6 +555,7 @@ struct test { std::vector values = { build_commit, std::to_string(build_number), std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas), + cpu_info, gpu_info, model_filename, model_type, std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv), std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), std::to_string(low_vram), tensor_split_str, @@ -530,7 +583,8 @@ const bool test::opencl = !!ggml_cpu_has_clblast(); const bool test::metal = !!ggml_cpu_has_metal(); const bool test::gpu_blas = !!ggml_cpu_has_gpublas(); const bool test::blas = !!ggml_cpu_has_blas(); - +const std::string test::cpu_info = get_cpu_info(); +const std::string test::gpu_info = get_gpu_info(); struct printer { FILE * fout; @@ -691,30 +745,18 @@ struct markdown_printer : public printer { struct sql_printer : public printer { static std::string get_field_type(const std::string & field) { - if (field == "build_commit") { - return "TEXT"; - } if (field == "build_number") { return "INTEGER"; } if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas") { return "INTEGER"; } - if (field == "model_filename" || field == "model_type") { - return "TEXT"; - } if (field == "n_batch" || field == "n_threads" || field == "f16_kv" || field == "n_gpu_layers" || field == "main_gpu" || field == "mul_mat_q" || field == "low_vram") { return "INTEGER"; } - if (field == "tensor_split") { - return "TEXT"; - } if (field == "n_prompt" || field == "n_gen") { return "INTEGER"; } - if (field == "test_time") { - return "TEXT"; - } if (field == "avg_ns" || field == "stddev_ns" || field == "avg_ts" || field == "stddev_ts") { return "REAL"; } @@ -743,7 +785,7 @@ struct sql_printer : public printer { } }; -void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) { +static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) { std::vector tokens(n_batch, llama_token_bos()); int n_processed = 0; while (n_processed < n_prompt) { @@ -753,14 +795,14 @@ void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int } } -void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) { +static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) { llama_token token = llama_token_bos(); for (int i = 0; i < n_gen; i++) { llama_eval(ctx, &token, 1, n_past + i, n_threads); } } -void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) { +static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) { (void)level; (void)text; (void)user_data; diff --git a/ggml-cuda.cu b/ggml-cuda.cu index df0cbe18f..5b415c646 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -6469,3 +6469,15 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_ func(tensor->src[0], tensor->src[1], tensor); return true; } + +int ggml_cuda_get_device_count() { + int device_count; + CUDA_CHECK(cudaGetDeviceCount(&device_count)); + return device_count; +} + +void ggml_cuda_get_device_description(int device, char * description, size_t description_size) { + cudaDeviceProp prop; + CUDA_CHECK(cudaGetDeviceProperties(&prop, device)); + snprintf(description, description_size, "%s", prop.name); +} diff --git a/ggml-cuda.h b/ggml-cuda.h index 72d7afa46..cad05f5fa 100644 --- a/ggml-cuda.h +++ b/ggml-cuda.h @@ -8,29 +8,25 @@ extern "C" { #define GGML_CUDA_MAX_DEVICES 16 -void ggml_init_cublas(void); -void ggml_cuda_set_tensor_split(const float * tensor_split); +GGML_API void ggml_init_cublas(void); +GGML_API void * ggml_cuda_host_malloc(size_t size); +GGML_API void ggml_cuda_host_free(void * ptr); -void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); -bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); -size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); -void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize); +GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); +GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split); +GGML_API void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor); +GGML_API void ggml_cuda_free_data(struct ggml_tensor * tensor); +GGML_API void ggml_cuda_assign_buffers(struct ggml_tensor * tensor); +GGML_API void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor); +GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor); +GGML_API void ggml_cuda_set_main_device(int main_device); +GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q); +GGML_API void ggml_cuda_set_scratch_size(size_t scratch_size); +GGML_API void ggml_cuda_free_scratch(void); +GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor); -// TODO: export these with GGML_API -void * ggml_cuda_host_malloc(size_t size); -void ggml_cuda_host_free(void * ptr); - -void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor); - -void ggml_cuda_free_data(struct ggml_tensor * tensor); -void ggml_cuda_assign_buffers(struct ggml_tensor * tensor); -void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor); -void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor); -void ggml_cuda_set_main_device(int main_device); -void ggml_cuda_set_mul_mat_q(bool mul_mat_q); -void ggml_cuda_set_scratch_size(size_t scratch_size); -void ggml_cuda_free_scratch(void); -bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor); +GGML_API int ggml_cuda_get_device_count(void); +GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size); #ifdef __cplusplus }