add basic cpu and gpu info (linx/cuda only)
This commit is contained in:
parent
67362d9db0
commit
cac70312e3
3 changed files with 95 additions and 45 deletions
|
@ -1,21 +1,26 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <array>
|
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
|
#include <cstring>
|
||||||
|
#include <ctime>
|
||||||
|
#include <iterator>
|
||||||
|
#include <map>
|
||||||
|
#include <numeric>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
|
#include <sstream>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sstream>
|
|
||||||
#include <iterator>
|
|
||||||
#include <numeric>
|
|
||||||
#include <map>
|
|
||||||
#include <ctime>
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "build-info.h"
|
#include "build-info.h"
|
||||||
|
#ifdef GGML_USE_CUBLAS
|
||||||
|
#include "ggml-cuda.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
// utils
|
// utils
|
||||||
static uint64_t get_time_ns() {
|
static uint64_t get_time_ns() {
|
||||||
|
@ -50,7 +55,7 @@ static std::vector<T> split(const std::string & str, char delim) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
T avg(const std::vector<T> & v) {
|
static T avg(const std::vector<T> & v) {
|
||||||
if (v.empty()) {
|
if (v.empty()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -59,7 +64,7 @@ T avg(const std::vector<T> & v) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
T stdev(const std::vector<T> & v) {
|
static T stdev(const std::vector<T> & v) {
|
||||||
if (v.size() <= 1) {
|
if (v.size() <= 1) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -77,6 +82,50 @@ static bool ggml_cpu_has_metal() {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::string get_cpu_info() {
|
||||||
|
std::string id;
|
||||||
|
#ifdef __linux__
|
||||||
|
FILE * f = fopen("/proc/cpuinfo", "r");
|
||||||
|
if (f) {
|
||||||
|
char buf[1024];
|
||||||
|
while (fgets(buf, sizeof(buf), f)) {
|
||||||
|
if (strncmp(buf, "model name", 10) == 0) {
|
||||||
|
char * p = strchr(buf, ':');
|
||||||
|
if (p) {
|
||||||
|
p++;
|
||||||
|
while (std::isspace(*p)) {
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
while (std::isspace(p[strlen(p) - 1])) {
|
||||||
|
p[strlen(p) - 1] = '\0';
|
||||||
|
}
|
||||||
|
id = p;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
// TODO: other platforms
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string get_gpu_info(void) {
|
||||||
|
std::string id;
|
||||||
|
#ifdef GGML_USE_CUBLAS
|
||||||
|
int count = ggml_cuda_get_device_count();
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
char buf[128];
|
||||||
|
ggml_cuda_get_device_description(i, buf, sizeof(buf));
|
||||||
|
id += buf;
|
||||||
|
if (i < count - 1) {
|
||||||
|
id += "/";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
// TODO: other backends
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
// command line params
|
// command line params
|
||||||
enum output_formats {CSV, JSON, MARKDOWN, SQL};
|
enum output_formats {CSV, JSON, MARKDOWN, SQL};
|
||||||
|
@ -392,6 +441,8 @@ struct test {
|
||||||
static const bool metal;
|
static const bool metal;
|
||||||
static const bool gpu_blas;
|
static const bool gpu_blas;
|
||||||
static const bool blas;
|
static const bool blas;
|
||||||
|
static const std::string cpu_info;
|
||||||
|
static const std::string gpu_info;
|
||||||
std::string model_filename;
|
std::string model_filename;
|
||||||
std::string model_type;
|
std::string model_type;
|
||||||
int n_batch;
|
int n_batch;
|
||||||
|
@ -476,6 +527,7 @@ struct test {
|
||||||
static const std::vector<std::string> fields = {
|
static const std::vector<std::string> fields = {
|
||||||
"build_commit", "build_number",
|
"build_commit", "build_number",
|
||||||
"cuda", "opencl", "metal", "gpu_blas", "blas",
|
"cuda", "opencl", "metal", "gpu_blas", "blas",
|
||||||
|
"cpu_info", "gpu_info",
|
||||||
"model_filename", "model_type",
|
"model_filename", "model_type",
|
||||||
"n_batch", "n_threads", "f16_kv",
|
"n_batch", "n_threads", "f16_kv",
|
||||||
"n_gpu_layers", "main_gpu", "mul_mat_q", "low_vram", "tensor_split",
|
"n_gpu_layers", "main_gpu", "mul_mat_q", "low_vram", "tensor_split",
|
||||||
|
@ -503,6 +555,7 @@ struct test {
|
||||||
std::vector<std::string> values = {
|
std::vector<std::string> values = {
|
||||||
build_commit, std::to_string(build_number),
|
build_commit, std::to_string(build_number),
|
||||||
std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
|
std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
|
||||||
|
cpu_info, gpu_info,
|
||||||
model_filename, model_type,
|
model_filename, model_type,
|
||||||
std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv),
|
std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv),
|
||||||
std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), std::to_string(low_vram), tensor_split_str,
|
std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), std::to_string(low_vram), tensor_split_str,
|
||||||
|
@ -530,7 +583,8 @@ const bool test::opencl = !!ggml_cpu_has_clblast();
|
||||||
const bool test::metal = !!ggml_cpu_has_metal();
|
const bool test::metal = !!ggml_cpu_has_metal();
|
||||||
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
|
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
|
||||||
const bool test::blas = !!ggml_cpu_has_blas();
|
const bool test::blas = !!ggml_cpu_has_blas();
|
||||||
|
const std::string test::cpu_info = get_cpu_info();
|
||||||
|
const std::string test::gpu_info = get_gpu_info();
|
||||||
|
|
||||||
struct printer {
|
struct printer {
|
||||||
FILE * fout;
|
FILE * fout;
|
||||||
|
@ -691,30 +745,18 @@ struct markdown_printer : public printer {
|
||||||
|
|
||||||
struct sql_printer : public printer {
|
struct sql_printer : public printer {
|
||||||
static std::string get_field_type(const std::string & field) {
|
static std::string get_field_type(const std::string & field) {
|
||||||
if (field == "build_commit") {
|
|
||||||
return "TEXT";
|
|
||||||
}
|
|
||||||
if (field == "build_number") {
|
if (field == "build_number") {
|
||||||
return "INTEGER";
|
return "INTEGER";
|
||||||
}
|
}
|
||||||
if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas") {
|
if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas") {
|
||||||
return "INTEGER";
|
return "INTEGER";
|
||||||
}
|
}
|
||||||
if (field == "model_filename" || field == "model_type") {
|
|
||||||
return "TEXT";
|
|
||||||
}
|
|
||||||
if (field == "n_batch" || field == "n_threads" || field == "f16_kv" || field == "n_gpu_layers" || field == "main_gpu" || field == "mul_mat_q" || field == "low_vram") {
|
if (field == "n_batch" || field == "n_threads" || field == "f16_kv" || field == "n_gpu_layers" || field == "main_gpu" || field == "mul_mat_q" || field == "low_vram") {
|
||||||
return "INTEGER";
|
return "INTEGER";
|
||||||
}
|
}
|
||||||
if (field == "tensor_split") {
|
|
||||||
return "TEXT";
|
|
||||||
}
|
|
||||||
if (field == "n_prompt" || field == "n_gen") {
|
if (field == "n_prompt" || field == "n_gen") {
|
||||||
return "INTEGER";
|
return "INTEGER";
|
||||||
}
|
}
|
||||||
if (field == "test_time") {
|
|
||||||
return "TEXT";
|
|
||||||
}
|
|
||||||
if (field == "avg_ns" || field == "stddev_ns" || field == "avg_ts" || field == "stddev_ts") {
|
if (field == "avg_ns" || field == "stddev_ns" || field == "avg_ts" || field == "stddev_ts") {
|
||||||
return "REAL";
|
return "REAL";
|
||||||
}
|
}
|
||||||
|
@ -743,7 +785,7 @@ struct sql_printer : public printer {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
|
static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
|
||||||
std::vector<llama_token> tokens(n_batch, llama_token_bos());
|
std::vector<llama_token> tokens(n_batch, llama_token_bos());
|
||||||
int n_processed = 0;
|
int n_processed = 0;
|
||||||
while (n_processed < n_prompt) {
|
while (n_processed < n_prompt) {
|
||||||
|
@ -753,14 +795,14 @@ void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
|
static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
|
||||||
llama_token token = llama_token_bos();
|
llama_token token = llama_token_bos();
|
||||||
for (int i = 0; i < n_gen; i++) {
|
for (int i = 0; i < n_gen; i++) {
|
||||||
llama_eval(ctx, &token, 1, n_past + i, n_threads);
|
llama_eval(ctx, &token, 1, n_past + i, n_threads);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
|
static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
|
||||||
(void)level;
|
(void)level;
|
||||||
(void)text;
|
(void)text;
|
||||||
(void)user_data;
|
(void)user_data;
|
||||||
|
|
12
ggml-cuda.cu
12
ggml-cuda.cu
|
@ -6469,3 +6469,15 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
|
||||||
func(tensor->src[0], tensor->src[1], tensor);
|
func(tensor->src[0], tensor->src[1], tensor);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ggml_cuda_get_device_count() {
|
||||||
|
int device_count;
|
||||||
|
CUDA_CHECK(cudaGetDeviceCount(&device_count));
|
||||||
|
return device_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_cuda_get_device_description(int device, char * description, size_t description_size) {
|
||||||
|
cudaDeviceProp prop;
|
||||||
|
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
|
||||||
|
snprintf(description, description_size, "%s", prop.name);
|
||||||
|
}
|
||||||
|
|
38
ggml-cuda.h
38
ggml-cuda.h
|
@ -8,29 +8,25 @@ extern "C" {
|
||||||
|
|
||||||
#define GGML_CUDA_MAX_DEVICES 16
|
#define GGML_CUDA_MAX_DEVICES 16
|
||||||
|
|
||||||
void ggml_init_cublas(void);
|
GGML_API void ggml_init_cublas(void);
|
||||||
void ggml_cuda_set_tensor_split(const float * tensor_split);
|
GGML_API void * ggml_cuda_host_malloc(size_t size);
|
||||||
|
GGML_API void ggml_cuda_host_free(void * ptr);
|
||||||
|
|
||||||
void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||||
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);
|
||||||
size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
GGML_API void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
|
||||||
void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
GGML_API void ggml_cuda_free_data(struct ggml_tensor * tensor);
|
||||||
|
GGML_API void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
|
||||||
|
GGML_API void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
|
||||||
|
GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
|
||||||
|
GGML_API void ggml_cuda_set_main_device(int main_device);
|
||||||
|
GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
|
||||||
|
GGML_API void ggml_cuda_set_scratch_size(size_t scratch_size);
|
||||||
|
GGML_API void ggml_cuda_free_scratch(void);
|
||||||
|
GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
||||||
|
|
||||||
// TODO: export these with GGML_API
|
GGML_API int ggml_cuda_get_device_count(void);
|
||||||
void * ggml_cuda_host_malloc(size_t size);
|
GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
|
||||||
void ggml_cuda_host_free(void * ptr);
|
|
||||||
|
|
||||||
void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
|
|
||||||
|
|
||||||
void ggml_cuda_free_data(struct ggml_tensor * tensor);
|
|
||||||
void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
|
|
||||||
void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
|
|
||||||
void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
|
|
||||||
void ggml_cuda_set_main_device(int main_device);
|
|
||||||
void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
|
|
||||||
void ggml_cuda_set_scratch_size(size_t scratch_size);
|
|
||||||
void ggml_cuda_free_scratch(void);
|
|
||||||
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue