reinstated the reusable buffers -> approx 10% speedup for prompt processing
This commit is contained in:
parent
811989c2ad
commit
cd6c121357
2 changed files with 73 additions and 26 deletions
|
@ -23,6 +23,9 @@ cl_program program;
|
||||||
cl_kernel kernel_q4_0, kernel_q4_1;
|
cl_kernel kernel_q4_0, kernel_q4_1;
|
||||||
bool cl_initialized = false;
|
bool cl_initialized = false;
|
||||||
|
|
||||||
|
size_t cl_size_a = 0, cl_size_b = 0, cl_size_qb = 0, cl_size_c = 0;
|
||||||
|
cl_mem cl_buffer_a, cl_buffer_b, cl_buffer_qb, cl_buffer_c;
|
||||||
|
|
||||||
// Function taken from https://github.com/rsnemmen/OpenCL-examples/blob/master/add_numbers/add_numbers.c
|
// Function taken from https://github.com/rsnemmen/OpenCL-examples/blob/master/add_numbers/add_numbers.c
|
||||||
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {
|
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {
|
||||||
|
|
||||||
|
@ -163,6 +166,33 @@ static void ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
size_t defaultBufSize = 8*1024*1024;
|
||||||
|
cl_size_a = defaultBufSize * sizeof(float);
|
||||||
|
cl_size_b = defaultBufSize * sizeof(float);
|
||||||
|
cl_size_qb = defaultBufSize * sizeof(float);
|
||||||
|
cl_size_c = defaultBufSize * sizeof(float);
|
||||||
|
// Prepare buffers
|
||||||
|
cl_buffer_a = clCreateBuffer(context, CL_MEM_READ_ONLY, cl_size_a, NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("Error creating OpenCL Buffer A: %d\n", err);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
cl_buffer_b = clCreateBuffer(context, CL_MEM_READ_WRITE, cl_size_b, NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("Error creating OpenCL Buffer B: %d\n", err);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
cl_buffer_qb = clCreateBuffer(context, CL_MEM_READ_WRITE, cl_size_qb, NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("Error creating OpenCL Buffer B: %d\n", err);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
cl_buffer_c = clCreateBuffer(context, CL_MEM_READ_WRITE, cl_size_c, NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("Error creating OpenCL Buffer C: %d\n", err);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
cl_initialized = true;
|
cl_initialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,31 +200,54 @@ static void ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS
|
||||||
cl_kernel kernel = btype == 2 ? kernel_q4_0 : kernel_q4_1;
|
cl_kernel kernel = btype == 2 ? kernel_q4_0 : kernel_q4_1;
|
||||||
|
|
||||||
size_t global = n * k, local = 16, qb_size;
|
size_t global = n * k, local = 16, qb_size;
|
||||||
cl_mem cl_buffer_a, cl_buffer_qb, cl_buffer_b, cl_buffer_c;
|
|
||||||
|
|
||||||
// Prepare buffers
|
// Prepare buffers
|
||||||
cl_buffer_a = clCreateBuffer(context, CL_MEM_READ_ONLY, m*k*sizeof(float), NULL, &err);
|
if(m*k*sizeof(float) > cl_size_a)
|
||||||
if (err != CL_SUCCESS) {
|
{
|
||||||
printf("Error creating OpenCL Buffer A: %d\n", err);
|
cl_size_a = m*k*sizeof(float);
|
||||||
fflush(stdout);
|
clReleaseMemObject(cl_buffer_a);
|
||||||
}
|
cl_buffer_a = clCreateBuffer(context, CL_MEM_READ_ONLY, cl_size_a, NULL, &err);
|
||||||
if (dequant) {
|
|
||||||
qb_size = global * (sizeof(float) * (btype == 2 ? 1 : 2) + 16) / 32;
|
|
||||||
cl_buffer_qb = clCreateBuffer(context, CL_MEM_READ_ONLY, qb_size, NULL, &err);
|
|
||||||
if (err != CL_SUCCESS) {
|
if (err != CL_SUCCESS) {
|
||||||
printf("Error creating OpenCL Buffer QB: %d\n", err);
|
printf("Error creating OpenCL Buffer A: %d\n", err);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
//printf("\nRealloc A: %d",cl_size_a);
|
||||||
}
|
}
|
||||||
cl_buffer_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, &err);
|
if (dequant) {
|
||||||
if (err != CL_SUCCESS) {
|
qb_size = global * (sizeof(float) * (btype == 2 ? 1 : 2) + 16) / 32;
|
||||||
printf("Error creating OpenCL Buffer B: %d\n", err);
|
if(qb_size > cl_size_qb)
|
||||||
fflush(stdout);
|
{
|
||||||
|
cl_size_qb = qb_size;
|
||||||
|
clReleaseMemObject(cl_buffer_qb);
|
||||||
|
cl_buffer_qb = clCreateBuffer(context, CL_MEM_READ_ONLY, qb_size, NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("Error creating OpenCL Buffer QB: %d\n", err);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
//printf("\nRealloc qB: %d",cl_size_qb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
cl_buffer_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, &err);
|
if(n*k*sizeof(float) > cl_size_b)
|
||||||
if (err != CL_SUCCESS) {
|
{
|
||||||
printf("Error creating OpenCL Buffer C: %d\n", err);
|
cl_size_b = n*k*sizeof(float);
|
||||||
fflush(stdout);
|
clReleaseMemObject(cl_buffer_b);
|
||||||
|
cl_buffer_b = clCreateBuffer(context, CL_MEM_READ_WRITE, cl_size_b, NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("Error creating OpenCL Buffer B: %d\n", err);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
//printf("\nRealloc B: %d",cl_size_b);
|
||||||
|
}
|
||||||
|
if(m*n*sizeof(float) > cl_size_c)
|
||||||
|
{
|
||||||
|
cl_size_c = m*n*sizeof(float);
|
||||||
|
clReleaseMemObject(cl_buffer_c);
|
||||||
|
cl_buffer_c = clCreateBuffer(context, CL_MEM_READ_WRITE, cl_size_c, NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("Error creating OpenCL Buffer C: %d\n", err);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
//printf("\nRealloc C: %d",cl_size_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dequant) {
|
if (dequant) {
|
||||||
|
@ -245,13 +298,7 @@ static void ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS
|
||||||
clReleaseEvent(events[0]);
|
clReleaseEvent(events[0]);
|
||||||
clReleaseEvent(events[1]);
|
clReleaseEvent(events[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
clReleaseMemObject(cl_buffer_a);
|
|
||||||
if (dequant) {
|
|
||||||
clReleaseMemObject(cl_buffer_qb);
|
|
||||||
}
|
|
||||||
clReleaseMemObject(cl_buffer_b);
|
|
||||||
clReleaseMemObject(cl_buffer_c);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -139,7 +139,7 @@ maxctx = 2048
|
||||||
maxlen = 128
|
maxlen = 128
|
||||||
modelbusy = False
|
modelbusy = False
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.11"
|
KcppVersion = "1.12"
|
||||||
|
|
||||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
sys_version = ""
|
sys_version = ""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue