add basic cpu and gpu info (linx/cuda only)

This commit is contained in:
slaren 2023-08-17 02:50:04 +02:00
parent 67362d9db0
commit cac70312e3
3 changed files with 95 additions and 45 deletions

View file

@ -1,21 +1,26 @@
#include <algorithm> #include <algorithm>
#include <array>
#include <cassert> #include <cassert>
#include <chrono> #include <chrono>
#include <array>
#include <cinttypes> #include <cinttypes>
#include <cstring>
#include <ctime>
#include <iterator>
#include <map>
#include <numeric>
#include <regex> #include <regex>
#include <sstream>
#include <stdio.h> #include <stdio.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include <sstream>
#include <iterator>
#include <numeric>
#include <map>
#include <ctime>
#include "ggml.h" #include "ggml.h"
#include "llama.h" #include "llama.h"
#include "common.h" #include "common.h"
#include "build-info.h" #include "build-info.h"
#ifdef GGML_USE_CUBLAS
#include "ggml-cuda.h"
#endif
// utils // utils
static uint64_t get_time_ns() { static uint64_t get_time_ns() {
@ -50,7 +55,7 @@ static std::vector<T> split(const std::string & str, char delim) {
} }
template<typename T> template<typename T>
T avg(const std::vector<T> & v) { static T avg(const std::vector<T> & v) {
if (v.empty()) { if (v.empty()) {
return 0; return 0;
} }
@ -59,7 +64,7 @@ T avg(const std::vector<T> & v) {
} }
template<typename T> template<typename T>
T stdev(const std::vector<T> & v) { static T stdev(const std::vector<T> & v) {
if (v.size() <= 1) { if (v.size() <= 1) {
return 0; return 0;
} }
@ -77,6 +82,50 @@ static bool ggml_cpu_has_metal() {
#endif #endif
} }
static std::string get_cpu_info() {
std::string id;
#ifdef __linux__
FILE * f = fopen("/proc/cpuinfo", "r");
if (f) {
char buf[1024];
while (fgets(buf, sizeof(buf), f)) {
if (strncmp(buf, "model name", 10) == 0) {
char * p = strchr(buf, ':');
if (p) {
p++;
while (std::isspace(*p)) {
p++;
}
while (std::isspace(p[strlen(p) - 1])) {
p[strlen(p) - 1] = '\0';
}
id = p;
break;
}
}
}
}
#endif
// TODO: other platforms
return id;
}
static std::string get_gpu_info(void) {
std::string id;
#ifdef GGML_USE_CUBLAS
int count = ggml_cuda_get_device_count();
for (int i = 0; i < count; i++) {
char buf[128];
ggml_cuda_get_device_description(i, buf, sizeof(buf));
id += buf;
if (i < count - 1) {
id += "/";
}
}
#endif
// TODO: other backends
return id;
}
// command line params // command line params
enum output_formats {CSV, JSON, MARKDOWN, SQL}; enum output_formats {CSV, JSON, MARKDOWN, SQL};
@ -392,6 +441,8 @@ struct test {
static const bool metal; static const bool metal;
static const bool gpu_blas; static const bool gpu_blas;
static const bool blas; static const bool blas;
static const std::string cpu_info;
static const std::string gpu_info;
std::string model_filename; std::string model_filename;
std::string model_type; std::string model_type;
int n_batch; int n_batch;
@ -476,6 +527,7 @@ struct test {
static const std::vector<std::string> fields = { static const std::vector<std::string> fields = {
"build_commit", "build_number", "build_commit", "build_number",
"cuda", "opencl", "metal", "gpu_blas", "blas", "cuda", "opencl", "metal", "gpu_blas", "blas",
"cpu_info", "gpu_info",
"model_filename", "model_type", "model_filename", "model_type",
"n_batch", "n_threads", "f16_kv", "n_batch", "n_threads", "f16_kv",
"n_gpu_layers", "main_gpu", "mul_mat_q", "low_vram", "tensor_split", "n_gpu_layers", "main_gpu", "mul_mat_q", "low_vram", "tensor_split",
@ -503,6 +555,7 @@ struct test {
std::vector<std::string> values = { std::vector<std::string> values = {
build_commit, std::to_string(build_number), build_commit, std::to_string(build_number),
std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas), std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
cpu_info, gpu_info,
model_filename, model_type, model_filename, model_type,
std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv), std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv),
std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), std::to_string(low_vram), tensor_split_str, std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), std::to_string(low_vram), tensor_split_str,
@ -530,7 +583,8 @@ const bool test::opencl = !!ggml_cpu_has_clblast();
const bool test::metal = !!ggml_cpu_has_metal(); const bool test::metal = !!ggml_cpu_has_metal();
const bool test::gpu_blas = !!ggml_cpu_has_gpublas(); const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
const bool test::blas = !!ggml_cpu_has_blas(); const bool test::blas = !!ggml_cpu_has_blas();
const std::string test::cpu_info = get_cpu_info();
const std::string test::gpu_info = get_gpu_info();
struct printer { struct printer {
FILE * fout; FILE * fout;
@ -691,30 +745,18 @@ struct markdown_printer : public printer {
struct sql_printer : public printer { struct sql_printer : public printer {
static std::string get_field_type(const std::string & field) { static std::string get_field_type(const std::string & field) {
if (field == "build_commit") {
return "TEXT";
}
if (field == "build_number") { if (field == "build_number") {
return "INTEGER"; return "INTEGER";
} }
if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas") { if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas") {
return "INTEGER"; return "INTEGER";
} }
if (field == "model_filename" || field == "model_type") {
return "TEXT";
}
if (field == "n_batch" || field == "n_threads" || field == "f16_kv" || field == "n_gpu_layers" || field == "main_gpu" || field == "mul_mat_q" || field == "low_vram") { if (field == "n_batch" || field == "n_threads" || field == "f16_kv" || field == "n_gpu_layers" || field == "main_gpu" || field == "mul_mat_q" || field == "low_vram") {
return "INTEGER"; return "INTEGER";
} }
if (field == "tensor_split") {
return "TEXT";
}
if (field == "n_prompt" || field == "n_gen") { if (field == "n_prompt" || field == "n_gen") {
return "INTEGER"; return "INTEGER";
} }
if (field == "test_time") {
return "TEXT";
}
if (field == "avg_ns" || field == "stddev_ns" || field == "avg_ts" || field == "stddev_ts") { if (field == "avg_ns" || field == "stddev_ns" || field == "avg_ts" || field == "stddev_ts") {
return "REAL"; return "REAL";
} }
@ -743,7 +785,7 @@ struct sql_printer : public printer {
} }
}; };
void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) { static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
std::vector<llama_token> tokens(n_batch, llama_token_bos()); std::vector<llama_token> tokens(n_batch, llama_token_bos());
int n_processed = 0; int n_processed = 0;
while (n_processed < n_prompt) { while (n_processed < n_prompt) {
@ -753,14 +795,14 @@ void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int
} }
} }
void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) { static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
llama_token token = llama_token_bos(); llama_token token = llama_token_bos();
for (int i = 0; i < n_gen; i++) { for (int i = 0; i < n_gen; i++) {
llama_eval(ctx, &token, 1, n_past + i, n_threads); llama_eval(ctx, &token, 1, n_past + i, n_threads);
} }
} }
void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) { static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
(void)level; (void)level;
(void)text; (void)text;
(void)user_data; (void)user_data;

View file

@ -6469,3 +6469,15 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
func(tensor->src[0], tensor->src[1], tensor); func(tensor->src[0], tensor->src[1], tensor);
return true; return true;
} }
int ggml_cuda_get_device_count() {
int device_count;
CUDA_CHECK(cudaGetDeviceCount(&device_count));
return device_count;
}
void ggml_cuda_get_device_description(int device, char * description, size_t description_size) {
cudaDeviceProp prop;
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
snprintf(description, description_size, "%s", prop.name);
}

View file

@ -8,29 +8,25 @@ extern "C" {
#define GGML_CUDA_MAX_DEVICES 16 #define GGML_CUDA_MAX_DEVICES 16
void ggml_init_cublas(void); GGML_API void ggml_init_cublas(void);
void ggml_cuda_set_tensor_split(const float * tensor_split); GGML_API void * ggml_cuda_host_malloc(size_t size);
GGML_API void ggml_cuda_host_free(void * ptr);
void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);
size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); GGML_API void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize); GGML_API void ggml_cuda_free_data(struct ggml_tensor * tensor);
GGML_API void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
GGML_API void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
GGML_API void ggml_cuda_set_main_device(int main_device);
GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
GGML_API void ggml_cuda_set_scratch_size(size_t scratch_size);
GGML_API void ggml_cuda_free_scratch(void);
GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
// TODO: export these with GGML_API GGML_API int ggml_cuda_get_device_count(void);
void * ggml_cuda_host_malloc(size_t size); GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
void ggml_cuda_host_free(void * ptr);
void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
void ggml_cuda_free_data(struct ggml_tensor * tensor);
void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
void ggml_cuda_set_main_device(int main_device);
void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
void ggml_cuda_set_scratch_size(size_t scratch_size);
void ggml_cuda_free_scratch(void);
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
#ifdef __cplusplus #ifdef __cplusplus
} }