remove excessive codes and prints
This commit is contained in:
parent
e09c67d141
commit
0d308e2ef2
1 changed files with 0 additions and 41 deletions
41
ggml-cuda.cu
41
ggml-cuda.cu
|
@ -197,7 +197,6 @@ static __global__ void dequantize_block(const void * vx, float * y, const int k)
|
||||||
// dequantize
|
// dequantize
|
||||||
float & v0 = y[iybs + iqs + 0];
|
float & v0 = y[iybs + iqs + 0];
|
||||||
float & v1 = y[iybs + iqs + y_offset];
|
float & v1 = y[iybs + iqs + y_offset];
|
||||||
|
|
||||||
dequantize_kernel(vx, ib, iqs, v0, v1);
|
dequantize_kernel(vx, ib, iqs, v0, v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -415,42 +414,6 @@ static cudaStream_t g_cudaStreams2[GGML_CUDA_MAX_STREAMS] = { nullptr };
|
||||||
static cudaEvent_t g_cudaEvents[GGML_CUDA_MAX_EVENTS] = { nullptr };
|
static cudaEvent_t g_cudaEvents[GGML_CUDA_MAX_EVENTS] = { nullptr };
|
||||||
|
|
||||||
void ggml_init_cublas() {
|
void ggml_init_cublas() {
|
||||||
int device_id = 0;
|
|
||||||
cudaSetDevice(device_id);
|
|
||||||
|
|
||||||
cudaDeviceProp prop;
|
|
||||||
cudaGetDeviceProperties(&prop, device_id);
|
|
||||||
|
|
||||||
fprintf(stderr, "Device id: %d\n",
|
|
||||||
device_id);
|
|
||||||
fprintf(stderr, "Device name: %s\n",
|
|
||||||
prop.name);
|
|
||||||
fprintf(stderr, "Compute capability: %d.%d\n",
|
|
||||||
prop.major, prop.minor);
|
|
||||||
fprintf(stderr, "Amount of global memory: %g GB\n",
|
|
||||||
prop.totalGlobalMem / (1024.0 * 1024 * 1024));
|
|
||||||
fprintf(stderr, "Amount of constant memory: %g KB\n",
|
|
||||||
prop.totalConstMem / 1024.0);
|
|
||||||
fprintf(stderr, "Maximum grid size: %d %d %d\n",
|
|
||||||
prop.maxGridSize[0],
|
|
||||||
prop.maxGridSize[1], prop.maxGridSize[2]);
|
|
||||||
fprintf(stderr, "Maximum block size: %d %d %d\n",
|
|
||||||
prop.maxThreadsDim[0], prop.maxThreadsDim[1],
|
|
||||||
prop.maxThreadsDim[2]);
|
|
||||||
fprintf(stderr, "Number of SMs: %d\n",
|
|
||||||
prop.multiProcessorCount);
|
|
||||||
fprintf(stderr, "Maximum amount of shared memory per block: %g KB\n",
|
|
||||||
prop.sharedMemPerBlock / 1024.0);
|
|
||||||
fprintf(stderr, "Maximum amount of shared memory per SM: %g KB\n",
|
|
||||||
prop.sharedMemPerMultiprocessor / 1024.0);
|
|
||||||
fprintf(stderr, "Maximum number of registers per block: %d K\n",
|
|
||||||
prop.regsPerBlock / 1024);
|
|
||||||
fprintf(stderr, "Maximum number of registers per SM: %d K\n",
|
|
||||||
prop.regsPerMultiprocessor / 1024);
|
|
||||||
fprintf(stderr, "Maximum number of threads per block: %d\n",
|
|
||||||
prop.maxThreadsPerBlock);
|
|
||||||
fprintf(stderr, "Maximum number of threads per SM: %d\n",
|
|
||||||
prop.maxThreadsPerMultiProcessor);
|
|
||||||
if (g_cublasH == nullptr) {
|
if (g_cublasH == nullptr) {
|
||||||
// create streams
|
// create streams
|
||||||
for (int i = 0; i < GGML_CUDA_MAX_STREAMS; ++i) {
|
for (int i = 0; i < GGML_CUDA_MAX_STREAMS; ++i) {
|
||||||
|
@ -486,10 +449,6 @@ void * ggml_cuda_host_malloc(size_t size) {
|
||||||
size/1024.0/1024.0, cudaGetErrorString(err));
|
size/1024.0/1024.0, cudaGetErrorString(err));
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
else{
|
|
||||||
fprintf(stderr, "INFO: succeed to allocate %.2f MB of pinned memory\n",
|
|
||||||
size/1024.0/1024.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue