remove excessive codes and prints

This commit is contained in:
liang 2023-05-28 08:45:51 +08:00
parent e09c67d141
commit 0d308e2ef2

View file

@ -197,7 +197,6 @@ static __global__ void dequantize_block(const void * vx, float * y, const int k)
// dequantize
float & v0 = y[iybs + iqs + 0];
float & v1 = y[iybs + iqs + y_offset];
dequantize_kernel(vx, ib, iqs, v0, v1);
}
@ -415,42 +414,6 @@ static cudaStream_t g_cudaStreams2[GGML_CUDA_MAX_STREAMS] = { nullptr };
static cudaEvent_t g_cudaEvents[GGML_CUDA_MAX_EVENTS] = { nullptr };
void ggml_init_cublas() {
int device_id = 0;
cudaSetDevice(device_id);
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, device_id);
fprintf(stderr, "Device id: %d\n",
device_id);
fprintf(stderr, "Device name: %s\n",
prop.name);
fprintf(stderr, "Compute capability: %d.%d\n",
prop.major, prop.minor);
fprintf(stderr, "Amount of global memory: %g GB\n",
prop.totalGlobalMem / (1024.0 * 1024 * 1024));
fprintf(stderr, "Amount of constant memory: %g KB\n",
prop.totalConstMem / 1024.0);
fprintf(stderr, "Maximum grid size: %d %d %d\n",
prop.maxGridSize[0],
prop.maxGridSize[1], prop.maxGridSize[2]);
fprintf(stderr, "Maximum block size: %d %d %d\n",
prop.maxThreadsDim[0], prop.maxThreadsDim[1],
prop.maxThreadsDim[2]);
fprintf(stderr, "Number of SMs: %d\n",
prop.multiProcessorCount);
fprintf(stderr, "Maximum amount of shared memory per block: %g KB\n",
prop.sharedMemPerBlock / 1024.0);
fprintf(stderr, "Maximum amount of shared memory per SM: %g KB\n",
prop.sharedMemPerMultiprocessor / 1024.0);
fprintf(stderr, "Maximum number of registers per block: %d K\n",
prop.regsPerBlock / 1024);
fprintf(stderr, "Maximum number of registers per SM: %d K\n",
prop.regsPerMultiprocessor / 1024);
fprintf(stderr, "Maximum number of threads per block: %d\n",
prop.maxThreadsPerBlock);
fprintf(stderr, "Maximum number of threads per SM: %d\n",
prop.maxThreadsPerMultiProcessor);
if (g_cublasH == nullptr) {
// create streams
for (int i = 0; i < GGML_CUDA_MAX_STREAMS; ++i) {
@ -486,10 +449,6 @@ void * ggml_cuda_host_malloc(size_t size) {
size/1024.0/1024.0, cudaGetErrorString(err));
return nullptr;
}
else{
fprintf(stderr, "INFO: succeed to allocate %.2f MB of pinned memory\n",
size/1024.0/1024.0);
}
return ptr;
}