Upload generated file ggml-vulkan-shaders.hpp, remove redundant shaders
This commit is contained in:
parent
ff93769cb1
commit
0c708c1dca
3 changed files with 58811 additions and 17 deletions
58808
ggml-vulkan-shaders.hpp
Normal file
58808
ggml-vulkan-shaders.hpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -206,7 +206,6 @@ vk_pipeline vk_pipeline_matmul_f16_f32_l, vk_pipeline_matmul_f16_f32_m, vk_pipel
|
|||
vk_pipeline vk_pipeline_matmul_f16_f32_aligned_l, vk_pipeline_matmul_f16_f32_aligned_m, vk_pipeline_matmul_f16_f32_aligned_s;
|
||||
vk_pipeline vk_pipeline_matmul_split_k_reduce;
|
||||
vk_pipeline vk_pipeline_dequant[VK_NUM_TYPES];
|
||||
vk_pipeline vk_pipeline_dequant_mul_mat_vec[VK_NUM_TYPES];
|
||||
vk_pipeline vk_pipeline_dequant_mul_mat_vec_f32[VK_NUM_TYPES];
|
||||
vk_pipeline vk_pipeline_mul_mat_vec_p021_f16_f32;
|
||||
vk_pipeline vk_pipeline_mul_mat_vec_nc_f16_f32;
|
||||
|
@ -862,18 +861,6 @@ static void ggml_vk_load_shaders() {
|
|||
vk_pipeline_get_rows_f32[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("get_rows_q8_0_f32", get_rows_q8_0_f32_fp32_len, get_rows_q8_0_f32_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1);
|
||||
}
|
||||
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_F16] = ggml_vk_create_pipeline("mul_mat_vec_f16", mul_mat_vec_f16_len, mul_mat_vec_f16_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("mul_mat_vec_q4_0", mul_mat_vec_q4_0_len, mul_mat_vec_q4_0_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("mul_mat_vec_q4_1", mul_mat_vec_q4_1_len, mul_mat_vec_q4_1_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("mul_mat_vec_q5_0", mul_mat_vec_q5_0_len, mul_mat_vec_q5_0_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("mul_mat_vec_q5_1", mul_mat_vec_q5_1_len, mul_mat_vec_q5_1_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("mul_mat_vec_q8_0", mul_mat_vec_q8_0_len, mul_mat_vec_q8_0_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q2_K] = ggml_vk_create_pipeline("mul_mat_vec_q2_K", mul_mat_vec_q2_K_len, mul_mat_vec_q2_K_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q3_K] = ggml_vk_create_pipeline("mul_mat_vec_q3_K", mul_mat_vec_q3_K_len, mul_mat_vec_q3_K_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q4_K] = ggml_vk_create_pipeline("mul_mat_vec_q4_K", mul_mat_vec_q4_K_len, mul_mat_vec_q4_K_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q5_K] = ggml_vk_create_pipeline("mul_mat_vec_q5_K", mul_mat_vec_q5_K_len, mul_mat_vec_q5_K_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec[GGML_TYPE_Q6_K] = ggml_vk_create_pipeline("mul_mat_vec_q6_K", mul_mat_vec_q6_K_len, mul_mat_vec_q6_K_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
|
||||
vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_F16] = ggml_vk_create_pipeline("mul_mat_vec_f16_f32", mul_mat_vec_f16_f32_len, mul_mat_vec_f16_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("mul_mat_vec_q4_0_f32", mul_mat_vec_q4_0_f32_len, mul_mat_vec_q4_0_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("mul_mat_vec_q4_1_f32", mul_mat_vec_q4_1_f32_len, mul_mat_vec_q4_1_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1);
|
||||
|
@ -1165,7 +1152,7 @@ static vk_pipeline* ggml_vk_get_to_fp16(ggml_type type) {
|
|||
return &vk_pipeline_dequant[type];
|
||||
}
|
||||
|
||||
static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type, bool f16_y) {
|
||||
static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type) {
|
||||
#ifdef VK_DEBUG
|
||||
std::cerr << "ggml_vk_get_dequantize_mul_mat_vec()" << std::endl;
|
||||
#endif
|
||||
|
@ -1186,7 +1173,7 @@ static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type, bool f16_
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
return f16_y ? &vk_pipeline_dequant_mul_mat_vec[type] : &vk_pipeline_dequant_mul_mat_vec_f32[type];
|
||||
return &vk_pipeline_dequant_mul_mat_vec_f32[type];
|
||||
}
|
||||
|
||||
// buffer pool for vulkan
|
||||
|
@ -2222,7 +2209,7 @@ static void ggml_vk_mul_mat_vec_q_f16(const ggml_tensor * src0, const ggml_tenso
|
|||
} else {
|
||||
to_fp16_vk_1 = ggml_vk_get_to_fp16(src1->type);
|
||||
}
|
||||
vk_pipeline* dmmv = ggml_vk_get_dequantize_mul_mat_vec(src0->type, !f16_f32_kernel);
|
||||
vk_pipeline* dmmv = ggml_vk_get_dequantize_mul_mat_vec(src0->type);
|
||||
GGML_ASSERT(!qx_needs_dequant || to_fp16_vk_0 != nullptr); // NOLINT
|
||||
GGML_ASSERT(!qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT
|
||||
GGML_ASSERT(dmmv != nullptr);
|
||||
|
|
|
@ -2191,7 +2191,6 @@ async def main():
|
|||
else:
|
||||
continue
|
||||
|
||||
tasks.append(string_to_spv(f"mul_mat_vec_{type_names[i]}", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float16_t", "K_QUANTS_PER_ITERATION": K_QUANTS_PER_ITERATION}, fp16))
|
||||
tasks.append(string_to_spv(f"mul_mat_vec_{type_names[i]}_f32", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float", "K_QUANTS_PER_ITERATION": K_QUANTS_PER_ITERATION}, fp16))
|
||||
|
||||
tasks.append(string_to_spv(f"mul_mat_vec_p021_f16_f32", mul_mat_p021_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}, True))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue