diff --git a/ggml-cuda/template-instances/generate_cu_files.py b/ggml-cuda/template-instances/generate_cu_files.py index 50d2be7c8..ee5b460e0 100644 --- a/ggml-cuda/template-instances/generate_cu_files.py +++ b/ggml-cuda/template-instances/generate_cu_files.py @@ -20,9 +20,11 @@ SOURCE_FATTN_WMMA_START = """// This file has been autogenerated by generate_cu_ SOURCE_FATTN_WMMA_CASE = "DECL_FATTN_WMMA_F16_CASE({head_size}, {cols_per_block}, {kq_acc_t});\n" + def get_short_name(long_quant_name): return long_quant_name.replace("GGML_TYPE_", "").lower() + def get_head_sizes(type_k, type_v): if type_k == "GGML_TYPE_F16" and type_v == "GGML_TYPE_F16": return [64, 128, 256] @@ -30,6 +32,7 @@ def get_head_sizes(type_k, type_v): return [64, 128] return [128] + for filename in glob("*.cu"): os.remove(filename)