ggml-cuda : add TQ2_0 support

This commit is contained in:
Francis Couture-Harpin 2024-12-27 20:21:28 -05:00
parent 5cd85b5e00
commit 970b5ab7ca
11 changed files with 241 additions and 2 deletions

View file

@ -3375,7 +3375,8 @@ static const ggml_type all_types[] = {
GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
GGML_TYPE_Q4_K, GGML_TYPE_Q5_K,
GGML_TYPE_Q6_K,
// GGML_TYPE_TQ1_0, GGML_TYPE_TQ2_0, // TODO: implement for all backends
// GGML_TYPE_TQ1_0,
GGML_TYPE_TQ2_0,
GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
@ -3387,6 +3388,7 @@ static const ggml_type base_types[] = {
GGML_TYPE_Q4_0,
GGML_TYPE_Q4_1, // for I8MM tests
GGML_TYPE_Q4_K,
GGML_TYPE_TQ2_0,
GGML_TYPE_IQ2_XXS
};
@ -3397,7 +3399,8 @@ static const ggml_type other_types[] = {
GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
GGML_TYPE_Q5_K,
GGML_TYPE_Q6_K,
// GGML_TYPE_TQ1_0, GGML_TYPE_TQ2_0, // TODO: implement for all backends
// GGML_TYPE_TQ1_0,
GGML_TYPE_TQ2_0,
GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,