From cac70312e3922a7901a6d5eac50f889dba1b258a Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Thu, 17 Aug 2023 02:50:04 +0200
Subject: [PATCH] add basic cpu and gpu info (linx/cuda only)

---
 examples/llama-bench/llama-bench.cpp | 90 ++++++++++++++++++++--------
 ggml-cuda.cu                         | 12 ++++
 ggml-cuda.h                          | 38 ++++++------
 3 files changed, 95 insertions(+), 45 deletions(-)
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 9c654d281..c8a940c98 100755
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -1,21 +1,26 @@
 #include <algorithm>
+#include <array>
 #include <cassert>
 #include <chrono>
-#include <array>
 #include <cinttypes>
+#include <cstring>
+#include <ctime>
+#include <iterator>
+#include <map>
+#include <numeric>
 #include <regex>
+#include <sstream>
 #include <stdio.h>
 #include <string>
 #include <vector>
-#include <sstream>
-#include <iterator>
-#include <numeric>
-#include <map>
-#include <ctime>
+
 #include "ggml.h"
 #include "llama.h"
 #include "common.h"
 #include "build-info.h"
+#ifdef GGML_USE_CUBLAS
+#include "ggml-cuda.h"
+#endif
 
 // utils
 static uint64_t get_time_ns() {
@@ -50,7 +55,7 @@ static std::vector<T> split(const std::string & str, char delim) {
 }
 
 template<typename T>
-T avg(const std::vector<T> & v) {
+static T avg(const std::vector<T> & v) {
     if (v.empty()) {
         return 0;
     }
@@ -59,7 +64,7 @@ T avg(const std::vector<T> & v) {
 }
 
 template<typename T>
-T stdev(const std::vector<T> & v) {
+static T stdev(const std::vector<T> & v) {
     if (v.size() <= 1) {
         return 0;
     }
@@ -77,6 +82,50 @@ static bool ggml_cpu_has_metal() {
 #endif
 }
 
+static std::string get_cpu_info() {
+    std::string id;
+#ifdef __linux__
+    FILE * f = fopen("/proc/cpuinfo", "r");
+    if (f) {
+        char buf[1024];
+        while (fgets(buf, sizeof(buf), f)) {
+            if (strncmp(buf, "model name", 10) == 0) {
+                char * p = strchr(buf, ':');
+                if (p) {
+                    p++;
+                    while (std::isspace(*p)) {
+                        p++;
+                    }
+                    while (std::isspace(p[strlen(p) - 1])) {
+                        p[strlen(p) - 1] = '\0';
+                    }
+                    id = p;
+                    break;
+                }
+            }
+        }
+    }
+#endif
+    // TODO: other platforms
+    return id;
+}
+
+static std::string get_gpu_info(void) {
+    std::string id;
+#ifdef GGML_USE_CUBLAS
+    int count = ggml_cuda_get_device_count();
+    for (int i = 0; i < count; i++) {
+        char buf[128];
+        ggml_cuda_get_device_description(i, buf, sizeof(buf));
+        id += buf;
+        if (i < count - 1) {
+            id += "/";
+        }
+    }
+#endif
+    // TODO: other backends
+    return id;
+}
 
 // command line params
 enum output_formats {CSV, JSON, MARKDOWN, SQL};
@@ -392,6 +441,8 @@ struct test {
     static const bool metal;
     static const bool gpu_blas;
     static const bool blas;
+    static const std::string cpu_info;
+    static const std::string gpu_info;
     std::string model_filename;
     std::string model_type;
     int n_batch;
@@ -476,6 +527,7 @@ struct test {
         static const std::vector<std::string> fields = {
             "build_commit", "build_number",
             "cuda", "opencl", "metal", "gpu_blas", "blas",
+            "cpu_info", "gpu_info",
             "model_filename", "model_type",
             "n_batch", "n_threads", "f16_kv",
             "n_gpu_layers", "main_gpu", "mul_mat_q", "low_vram", "tensor_split",
@@ -503,6 +555,7 @@ struct test {
         std::vector<std::string> values = {
             build_commit, std::to_string(build_number),
             std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
+            cpu_info, gpu_info,
             model_filename, model_type,
             std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv),
             std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), std::to_string(low_vram), tensor_split_str,
@@ -530,7 +583,8 @@ const bool test::opencl   = !!ggml_cpu_has_clblast();
 const bool test::metal    = !!ggml_cpu_has_metal();
 const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
 const bool test::blas     = !!ggml_cpu_has_blas();
-
+const std::string test::cpu_info = get_cpu_info();
+const std::string test::gpu_info = get_gpu_info();
 
 struct printer {
     FILE * fout;
@@ -691,30 +745,18 @@ struct markdown_printer : public printer {
 
 struct sql_printer : public printer {
     static std::string get_field_type(const std::string & field) {
-        if (field == "build_commit") {
-            return "TEXT";
-        }
         if (field == "build_number") {
             return "INTEGER";
         }
         if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas") {
             return "INTEGER";
         }
-        if (field == "model_filename" || field == "model_type") {
-            return "TEXT";
-        }
         if (field == "n_batch" || field == "n_threads" || field == "f16_kv" || field == "n_gpu_layers" || field == "main_gpu" || field == "mul_mat_q" || field == "low_vram") {
             return "INTEGER";
         }
-        if (field == "tensor_split") {
-            return "TEXT";
-        }
         if (field == "n_prompt" || field == "n_gen") {
             return "INTEGER";
         }
-        if (field == "test_time") {
-            return "TEXT";
-        }
         if (field == "avg_ns" || field == "stddev_ns" || field == "avg_ts" || field == "stddev_ts") {
             return "REAL";
         }
@@ -743,7 +785,7 @@ struct sql_printer : public printer {
     }
 };
 
-void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
+static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
     std::vector<llama_token> tokens(n_batch, llama_token_bos());
     int n_processed = 0;
     while (n_processed < n_prompt) {
@@ -753,14 +795,14 @@ void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int
     }
 }
 
-void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
+static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
     llama_token token = llama_token_bos();
     for (int i = 0; i < n_gen; i++) {
         llama_eval(ctx, &token, 1, n_past + i, n_threads);
     }
 }
 
-void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
+static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
     (void)level;
     (void)text;
     (void)user_data;
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index df0cbe18f..5b415c646 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -6469,3 +6469,15 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
     func(tensor->src[0], tensor->src[1], tensor);
     return true;
 }
+
+int ggml_cuda_get_device_count() {
+    int device_count;
+    CUDA_CHECK(cudaGetDeviceCount(&device_count));
+    return device_count;
+}
+
+void ggml_cuda_get_device_description(int device, char * description, size_t description_size) {
+    cudaDeviceProp prop;
+    CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
+    snprintf(description, description_size, "%s", prop.name);
+}
diff --git a/ggml-cuda.h b/ggml-cuda.h
index 72d7afa46..cad05f5fa 100644
--- a/ggml-cuda.h
+++ b/ggml-cuda.h
@@ -8,29 +8,25 @@ extern "C" {
 
 #define GGML_CUDA_MAX_DEVICES       16
 
-void   ggml_init_cublas(void);
-void   ggml_cuda_set_tensor_split(const float * tensor_split);
+GGML_API void   ggml_init_cublas(void);
+GGML_API void * ggml_cuda_host_malloc(size_t size);
+GGML_API void   ggml_cuda_host_free(void * ptr);
 
-void   ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
-bool   ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
-size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
-void   ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
+GGML_API bool   ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
+GGML_API void   ggml_cuda_set_tensor_split(const float * tensor_split);
+GGML_API void   ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_free_data(struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_set_main_device(int main_device);
+GGML_API void   ggml_cuda_set_mul_mat_q(bool mul_mat_q);
+GGML_API void   ggml_cuda_set_scratch_size(size_t scratch_size);
+GGML_API void   ggml_cuda_free_scratch(void);
+GGML_API bool   ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
 
-// TODO: export these with GGML_API
-void * ggml_cuda_host_malloc(size_t size);
-void   ggml_cuda_host_free(void * ptr);
-
-void   ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
-
-void   ggml_cuda_free_data(struct ggml_tensor * tensor);
-void   ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
-void   ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
-void   ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
-void   ggml_cuda_set_main_device(int main_device);
-void   ggml_cuda_set_mul_mat_q(bool mul_mat_q);
-void   ggml_cuda_set_scratch_size(size_t scratch_size);
-void   ggml_cuda_free_scratch(void);
-bool   ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
+GGML_API int    ggml_cuda_get_device_count(void);
+GGML_API void   ggml_cuda_get_device_description(int device, char * description, size_t description_size);
 
 #ifdef  __cplusplus
 }