iq1s_blocks16: going to blocks of 32

with 2048 lattice points, so same bpw.
This is even better than blocks of 16.
Should I try blocks of 64? But to keep the same
bpw, when I go to 4096 lattice points, I need to
remove blocks alltogether and just have superblocks of
256 weights.
This commit is contained in:
Iwan Kawrakow 2024-03-08 14:42:55 +02:00
parent cd83a7d362
commit 4c4404ace5
3 changed files with 150 additions and 90 deletions

View file

@ -565,9 +565,8 @@ static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N
#define QI1_S (QK_K / (4*QR1_S)) #define QI1_S (QK_K / (4*QR1_S))
typedef struct { typedef struct {
half d; half d;
uint8_t qs[QK_K/8]; uint8_t qs[QK_K/8];
uint8_t qh[QK_K/32]; uint16_t qh[QK_K/32];
uint8_t scales[QK_K/32];
} block_iq1_s; } block_iq1_s;
static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
@ -1723,9 +1722,8 @@ static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_
const int il = tid/8; // 0...3 const int il = tid/8; // 0...3
const int ib = tid%8; // 0...7 const int ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il; dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const int i8 = 4*ib+il; const int8_t * grid = (const int8_t *)(iq1s_grid + (x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)));
const int8_t * grid = (const int8_t *)(iq1s_grid + (x[i].qs[i8] | (((x[i].qh[i8/4] >> 2*(i8%4)) & 3) << 8))); const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 0xf) + 1);
const float d = (float)x[i].d * (2*((x[i].scales[ib] >> 4*(il/2)) & 0xf) + 1);
for (int j = 0; j < 8; ++j) y[j] = d * grid[j]; for (int j = 0; j < 8; ++j) y[j] = d * grid[j];
#else #else
assert(false); assert(false);
@ -4546,8 +4544,8 @@ static __device__ __forceinline__ float vec_dot_iq1_s_q8_1(
const int ib32 = iqs; const int ib32 = iqs;
int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0; int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0;
const uint8_t h1 = bq1->scales[2*ib32+0]; const uint8_t h1 = bq1->qh[2*ib32+0]; //bq1->scales[2*ib32+0];
const uint8_t h2 = bq1->scales[2*ib32+1]; const uint8_t h2 = bq1->qh[2*ib32+1]; //bq1->scales[2*ib32+1];
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics #if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
const int * q8 = (const int *)bq8_1[ib32].qs; const int * q8 = (const int *)bq8_1[ib32].qs;
const int * grid1 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5))); const int * grid1 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5)));

View file

@ -3456,7 +3456,7 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in
for (int i = 0; i < nb; i++) { for (int i = 0; i < nb; i++) {
const float d = GGML_FP16_TO_FP32(x[i].d); const float d = GGML_FP16_TO_FP32(x[i].d);
const uint8_t * sc = x[i].scales; const uint8_t * sc = (const uint8_t *)x[i].qh; //x[i].scales;
const uint8_t * qs = x[i].qs; const uint8_t * qs = x[i].qs;
for (int i8 = 0; i8 < QK_K/8; i8 += 4) { for (int i8 = 0; i8 < QK_K/8; i8 += 4) {
@ -9673,7 +9673,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
const int8_t * q8 = y[i].qs; const int8_t * q8 = y[i].qs;
const uint8_t * qs = x[i].qs; const uint8_t * qs = x[i].qs;
const uint8_t * sc = x[i].scales; const uint8_t * sc = (const uint8_t *)x[i].qh; //x[i].scales;
__m256i sumi = _mm256_setzero_si256(); __m256i sumi = _mm256_setzero_si256();
for (int i128 = 0; i128 < QK_K/128; ++i128) { for (int i128 = 0; i128 < QK_K/128; ++i128) {
@ -10064,70 +10064,134 @@ void iq2xs_init_impl(enum ggml_type type) {
42133, 42597, 42648, 43018, 43040, 43042, 43048, 43168, 43176, 43268, 43396, 43398, 43560, 43562, 43665, 43690, 42133, 42597, 42648, 43018, 43040, 43042, 43048, 43168, 43176, 43268, 43396, 43398, 43560, 43562, 43665, 43690,
}; };
static const uint16_t kgrid_1bit_512[NGRID_IQ1S] = { static const uint16_t kgrid_1bit_512[NGRID_IQ1S] = {
5, 32, 40, 89, 101, 128, 138, 149, 160, 162, 170, 273, 281, 294, 329, 336, 0, 2, 5, 8, 10, 17, 21, 32, 34, 40, 42, 69, 81, 84, 86, 101,
338, 341, 344, 346, 353, 356, 389, 401, 404, 409, 421, 517, 552, 584, 586, 593, 128, 130, 136, 138, 149, 160, 162, 168, 170, 260, 261, 273, 276, 278, 281, 282,
640, 642, 661, 672, 674, 1108, 1110, 1160, 1169, 1192, 1286, 1301, 1306, 1313, 1349, 1361, 293, 321, 326, 329, 338, 341, 346, 353, 356, 358, 360, 389, 401, 404, 406, 421,
1365, 1369, 1381, 1429, 1441, 1449, 1536, 1561, 1620, 1622, 1669, 1704, 1706, 2048, 2080, 2082, 512, 514, 520, 522, 533, 544, 546, 552, 554, 581, 593, 601, 612, 617, 640, 642,
2122, 2129, 2176, 2178, 2197, 2208, 2210, 2326, 2329, 2341, 2369, 2384, 2389, 2401, 2404, 2409, 648, 650, 657, 661, 665, 672, 674, 680, 682, 1041, 1044, 1046, 1061, 1089, 1097, 1109,
2469, 2562, 2570, 2581, 2592, 2594, 2600, 2602, 2629, 2649, 2661, 2696, 2698, 2705, 4113, 4133, 1114, 1124, 1125, 1169, 1177, 1189, 1281, 1284, 1285, 1286, 1301, 1304, 1306, 1321, 1344, 1349,
4181, 4186, 4193, 4226, 4249, 4261, 4353, 4356, 4358, 4361, 4370, 4373, 4378, 4385, 4393, 4416, 1354, 1360, 1361, 1364, 1365, 1366, 1369, 1376, 1378, 1381, 1384, 1386, 1409, 1425, 1429, 1432,
4421, 4433, 4437, 4441, 4448, 4450, 4453, 4484, 4489, 4502, 4516, 4625, 4645, 4692, 4694, 4705, 1434, 1441, 1444, 1445, 1446, 1449, 1556, 1561, 1601, 1604, 1616, 1618, 1621, 1624, 1632, 1633,
4753, 4773, 5141, 5153, 5161, 5190, 5193, 5201, 5205, 5208, 5216, 5221, 5253, 5268, 5393, 5397, 1638, 1641, 1669, 1681, 1684, 1689, 2048, 2050, 2056, 2058, 2069, 2080, 2082, 2088, 2090, 2117,
5398, 5401, 5410, 5412, 5442, 5444, 5445, 5450, 5457, 5460, 5461, 5462, 5465, 5473, 5477, 5480, 2129, 2134, 2149, 2176, 2178, 2184, 2186, 2197, 2208, 2210, 2216, 2218, 2309, 2321, 2324, 2329,
5482, 5504, 5510, 5521, 5525, 5528, 5541, 5653, 5697, 5702, 5705, 5712, 5714, 5717, 5720, 5722, 2340, 2341, 2369, 2384, 2385, 2389, 2401, 2404, 2409, 2449, 2452, 2454, 2457, 2469, 2560, 2562,
5732, 5734, 5737, 5781, 6146, 6152, 6181, 6186, 6213, 6228, 6230, 6233, 6241, 6289, 6309, 6314, 2568, 2570, 2581, 2592, 2594, 2600, 2602, 2629, 2641, 2649, 2657, 2661, 2688, 2690, 2693, 2696,
6405, 6420, 6470, 6473, 6481, 6485, 6488, 6490, 6496, 6501, 6533, 6548, 6550, 6553, 6561, 6664, 2698, 2709, 2720, 2722, 2728, 2730, 4112, 4113, 4116, 4121, 4132, 4133, 4161, 4164, 4176, 4181,
6678, 6741, 6753, 6786, 8194, 8213, 8261, 8281, 8294, 8328, 8330, 8337, 8360, 8464, 8472, 8485, 4184, 4193, 4196, 4197, 4201, 4241, 4244, 4246, 4257, 4261, 4353, 4356, 4358, 4361, 4368, 4370,
8533, 8545, 8548, 8581, 8596, 8613, 8725, 8738, 8776, 8785, 8793, 8805, 8832, 8834, 8858, 8864, 4373, 4376, 4385, 4388, 4393, 4421, 4426, 4432, 4433, 4434, 4436, 4437, 4438, 4441, 4448, 4453,
8866, 8872, 9226, 9236, 9253, 9301, 9321, 9381, 9477, 9505, 9542, 9545, 9553, 9556, 9557, 9562, 4484, 4498, 4501, 4513, 4516, 4625, 4628, 4630, 4645, 4672, 4678, 4681, 4690, 4693, 4696, 4698,
9573, 9622, 9633, 9641, 9728, 9730, 9738, 9770, 9813, 10261, 10272, 10274, 10304, 10321, 10344, 10370, 4708, 4710, 4741, 4753, 4756, 4758, 4773, 5121, 5126, 5129, 5140, 5141, 5144, 5145, 5153, 5158,
10376, 10378, 10400, 10402, 10521, 10533, 10576, 10578, 10581, 10598, 10661, 10769, 10856, 10888, 10890, 10897, 5185, 5189, 5190, 5192, 5194, 5201, 5204, 5205, 5206, 5209, 5218, 5221, 5224, 5252, 5257, 5264,
16384, 16457, 16469, 16472, 16484, 16529, 16646, 16649, 16661, 16664, 16666, 16681, 16709, 16721, 16724, 16725, 5268, 5269, 5272, 5273, 5274, 5281, 5284, 5285, 5289, 5378, 5381, 5386, 5393, 5396, 5397, 5398,
16726, 16729, 16741, 16746, 16769, 16784, 16789, 16804, 16809, 16918, 16928, 16961, 17001, 17033, 17041, 17425, 5401, 5408, 5410, 5413, 5416, 5418, 5441, 5444, 5445, 5446, 5457, 5458, 5460, 5461, 5462, 5465,
17428, 17430, 17445, 17473, 17476, 17478, 17490, 17493, 17510, 17513, 17541, 17556, 17558, 17573, 17665, 17668, 5466, 5473, 5476, 5477, 5478, 5481, 5504, 5506, 5508, 5509, 5512, 5514, 5520, 5521, 5524, 5525,
17680, 17682, 17685, 17689, 17728, 17730, 17733, 17736, 17738, 17745, 17748, 17749, 17750, 17753, 17762, 17765, 5526, 5529, 5530, 5536, 5538, 5541, 5633, 5636, 5637, 5638, 5653, 5654, 5656, 5658, 5665, 5670,
17768, 17796, 17798, 17809, 17813, 17817, 17830, 17929, 17937, 17940, 17957, 17989, 18000, 18002, 18005, 18008, 5696, 5698, 5700, 5701, 5704, 5706, 5713, 5717, 5718, 5720, 5721, 5729, 5732, 5733, 5736, 5737,
18010, 18017, 18020, 18022, 18049, 18068, 18070, 18085, 18472, 18512, 18517, 18577, 18694, 18709, 18721, 18757, 5738, 5766, 5770, 5778, 5781, 5796, 5801, 6161, 6166, 6181, 6209, 6212, 6214, 6217, 6224, 6229,
18772, 18773, 18778, 18784, 18789, 18817, 18825, 18837, 18849, 18949, 18966, 19017, 19029, 20501, 20513, 20521, 6232, 6234, 6240, 6241, 6244, 6246, 6249, 6277, 6289, 6292, 6309, 6416, 6418, 6421, 6426, 6433,
20545, 20550, 20564, 20565, 20569, 20581, 20629, 20741, 20753, 20757, 20758, 20773, 20776, 20805, 20808, 20816, 6437, 6466, 6468, 6469, 6472, 6481, 6484, 6485, 6486, 6489, 6490, 6496, 6501, 6506, 6537, 6545,
20817, 20818, 20820, 20821, 20822, 20825, 20833, 20836, 20837, 20838, 20841, 20870, 20881, 20884, 20885, 20889, 6546, 6549, 6552, 6561, 6566, 6569, 6665, 6678, 6692, 6694, 6724, 6726, 6729, 6736, 6738, 6741,
20901, 21001, 21012, 21060, 21062, 21073, 21077, 21080, 21082, 21141, 21509, 21520, 21522, 21525, 21528, 21530, 6744, 6753, 6758, 6761, 6789, 6801, 6806, 6810, 8192, 8194, 8200, 8202, 8213, 8224, 8226, 8229,
21540, 21542, 21568, 21570, 21573, 21576, 21578, 21585, 21588, 21589, 21590, 21593, 21602, 21605, 21608, 21610, 8232, 8234, 8261, 8273, 8281, 8289, 8293, 8320, 8322, 8328, 8330, 8341, 8352, 8354, 8357, 8360,
21636, 21638, 21641, 21648, 21650, 21653, 21656, 21665, 21668, 21670, 21673, 21760, 21762, 21765, 21768, 21770, 8362, 8453, 8465, 8468, 8473, 8485, 8514, 8516, 8521, 8533, 8536, 8538, 8545, 8548, 8549, 8550,
21777, 21780, 21781, 21782, 21785, 21793, 21797, 21802, 21825, 21828, 21829, 21830, 21833, 21840, 21841, 21842, 8581, 8592, 8598, 8601, 8613, 8705, 8712, 8714, 8721, 8725, 8736, 8738, 8744, 8746, 8773, 8785,
21844, 21845, 21846, 21848, 21849, 21850, 21857, 21860, 21861, 21862, 21865, 21889, 21893, 21896, 21898, 21905, 8790, 8793, 8805, 8833, 8840, 8842, 8849, 8853, 8864, 8866, 8872, 8874, 9221, 9236, 9238, 9241,
21908, 21909, 21910, 21913, 21920, 21922, 21925, 21928, 21930, 22017, 22020, 22032, 22034, 22037, 22042, 22052, 9253, 9284, 9285, 9286, 9289, 9298, 9301, 9304, 9306, 9318, 9349, 9361, 9364, 9369, 9377, 9381,
22054, 22057, 22080, 22082, 22085, 22088, 22090, 22097, 22100, 22101, 22102, 22105, 22112, 22114, 22117, 22120, 9481, 9493, 9505, 9513, 9536, 9541, 9544, 9553, 9556, 9557, 9561, 9570, 9573, 9576, 9609, 9616,
22122, 22148, 22150, 22160, 22162, 22165, 22168, 22170, 22177, 22180, 22182, 22185, 22548, 22550, 22561, 22598, 9620, 9621, 9624, 9626, 9633, 9636, 9638, 9641, 9733, 9744, 9746, 9753, 9765, 9793, 9801, 9813,
22601, 22609, 22613, 22616, 22618, 22624, 22630, 22633, 22677, 22793, 22801, 22805, 22808, 22810, 22825, 22849, 9824, 9825, 9833, 9860, 9862, 9872, 9882, 10240, 10242, 10248, 10250, 10261, 10272, 10274, 10280, 10282,
22852, 22853, 22858, 22865, 22866, 22868, 22869, 22870, 22873, 22884, 22885, 22890, 22912, 22918, 22929, 22933, 10309, 10321, 10324, 10341, 10368, 10370, 10376, 10378, 10400, 10402, 10408, 10410, 10505, 10513, 10516, 10521,
22936, 22938, 22950, 22953, 23060, 23065, 23077, 23110, 23121, 23125, 23130, 23142, 23145, 23169, 23188, 23190, 10533, 10566, 10569, 10578, 10581, 10593, 10596, 10598, 10601, 10629, 10640, 10646, 10649, 10660, 10661, 10752,
23205, 24581, 24593, 24596, 24601, 24661, 24664, 24709, 24726, 24729, 24833, 24853, 24865, 24868, 24870, 24873, 10754, 10760, 10762, 10784, 10786, 10792, 10794, 10821, 10833, 10838, 10841, 10853, 10880, 10882, 10888, 10890,
24900, 24902, 24913, 24917, 24921, 24933, 24938, 24981, 24993, 24996, 25001, 25105, 25173, 25188, 25221, 25233, 10901, 10912, 10914, 10920, 10922, 16389, 16401, 16406, 16421, 16457, 16466, 16469, 16472, 16474, 16481, 16484,
25253, 25621, 25633, 25641, 25669, 25680, 25682, 25685, 25689, 25701, 25749, 25860, 25862, 25865, 25873, 25877, 16486, 16532, 16537, 16545, 16550, 16640, 16641, 16644, 16646, 16649, 16658, 16661, 16662, 16664, 16666, 16673,
25882, 25896, 25920, 25922, 25925, 25928, 25930, 25937, 25940, 25941, 25942, 25945, 25952, 25957, 25958, 25988, 16678, 16681, 16709, 16712, 16714, 16721, 16724, 16725, 16726, 16729, 16730, 16741, 16744, 16746, 16769, 16772,
25990, 25993, 26001, 26005, 26021, 26117, 26132, 26134, 26137, 26149, 26177, 26180, 26182, 26185, 26192, 26194, 16774, 16784, 16786, 16789, 16800, 16801, 16802, 16901, 16913, 16916, 16918, 16933, 16961, 16978, 16981, 16986,
26197, 26200, 26202, 26209, 26217, 26260, 26262, 26265, 26625, 26649, 26709, 26757, 26769, 26789, 26896, 26901, 16996, 17001, 17033, 17044, 17061, 17409, 17429, 17433, 17449, 17477, 17480, 17482, 17489, 17492, 17493, 17494,
26918, 26950, 26953, 26965, 26968, 26970, 26977, 27024, 27026, 27029, 27044, 27205, 27220, 27222, 27225, 27237, 17505, 17506, 17509, 17512, 17514, 17537, 17542, 17545, 17552, 17554, 17557, 17568, 17569, 17577, 17665, 17666,
27306, 32770, 32776, 32778, 32789, 32800, 32802, 32808, 32810, 32837, 32849, 32854, 32857, 32869, 32896, 32898, 17669, 17674, 17681, 17684, 17685, 17686, 17689, 17696, 17701, 17706, 17729, 17732, 17733, 17734, 17737, 17744,
32904, 32906, 32917, 32928, 32936, 32938, 33029, 33046, 33089, 33106, 33109, 33121, 33124, 33126, 33169, 33172, 17745, 17748, 17749, 17750, 17752, 17753, 17761, 17764, 17765, 17766, 17769, 17794, 17796, 17797, 17800, 17809,
33174, 33189, 33282, 33314, 33322, 33352, 33354, 33408, 33429, 33448, 33450, 33817, 33872, 33877, 33945, 33954, 17812, 17813, 17814, 17817, 17818, 17829, 17832, 17834, 17921, 17925, 17929, 17940, 17941, 17944, 17946, 17953,
34054, 34081, 34086, 34116, 34121, 34129, 34133, 34136, 34138, 34153, 34177, 34194, 34212, 34304, 34325, 34344, 17956, 17961, 17984, 17986, 17989, 17992, 18000, 18001, 18002, 18005, 18006, 18009, 18018, 18021, 18024, 18049,
34388, 34390, 34393, 34405, 34437, 34821, 34848, 34850, 34888, 34890, 34922, 34944, 34946, 34965, 34976, 35089, 18053, 18058, 18068, 18069, 18081, 18084, 18086, 18437, 18449, 18453, 18458, 18469, 18498, 18505, 18512, 18517,
35092, 35109, 35152, 35157, 35172, 35336, 35338, 35360, 35394, 35409, 35426, 35456, 35464, 35466, 35496, 35498, 18520, 18529, 18532, 18534, 18537, 18565, 18577, 18580, 18582, 18585, 18597, 18689, 18693, 18694, 18698, 18704,
36881, 36889, 36901, 36949, 36997, 37009, 37012, 37014, 37029, 37121, 37124, 37153, 37161, 37189, 37204, 37205, 18708, 18709, 18712, 18721, 18724, 18726, 18752, 18757, 18762, 18769, 18770, 18772, 18773, 18774, 18777, 18784,
37209, 37221, 37269, 37274, 37284, 37397, 37462, 37481, 37506, 37536, 37538, 37889, 37909, 37956, 37958, 37961, 18786, 18789, 18790, 18794, 18822, 18825, 18834, 18837, 18838, 18840, 18849, 18852, 18854, 18857, 18966, 19012,
37973, 37976, 37978, 37988, 37990, 37993, 38037, 38161, 38165, 38170, 38180, 38208, 38213, 38216, 38218, 38225, 19014, 19017, 19029, 19032, 19034, 19044, 19049, 19092, 19109, 20481, 20484, 20485, 20486, 20489, 20498, 20501,
38226, 38228, 38229, 38230, 38233, 38241, 38245, 38248, 38250, 38277, 38288, 38293, 38310, 38313, 38405, 38420, 20506, 20513, 20516, 20521, 20544, 20549, 20552, 20561, 20564, 20565, 20566, 20569, 20581, 20584, 20614, 20617,
38422, 38425, 38437, 38465, 38468, 38470, 38473, 38480, 38485, 38490, 38500, 38502, 38545, 38548, 38550, 38553, 20629, 20632, 20640, 20641, 20646, 20649, 20741, 20744, 20745, 20746, 20753, 20756, 20757, 20758, 20760, 20761,
38565, 38929, 38937, 38977, 38994, 38996, 39013, 39045, 39057, 39080, 39169, 39172, 39184, 39186, 39189, 39201, 20768, 20773, 20774, 20776, 20778, 20801, 20804, 20805, 20806, 20809, 20816, 20817, 20818, 20820, 20821, 20822,
39238, 39241, 39253, 39258, 39264, 39270, 39316, 39318, 39321, 39333, 39466, 39493, 39510, 39512, 39525, 39573, 20824, 20825, 20826, 20833, 20836, 20837, 20838, 20841, 20866, 20869, 20881, 20884, 20885, 20886, 20889, 20896,
39584, 40960, 40962, 40968, 40970, 40981, 40992, 41000, 41002, 41029, 41049, 41061, 41096, 41109, 41128, 41221, 20901, 20906, 20993, 20998, 21010, 21013, 21018, 21025, 21028, 21058, 21061, 21066, 21073, 21076, 21077, 21078,
41236, 41238, 41241, 41253, 41298, 41301, 41304, 41306, 41313, 41321, 41361, 41364, 41369, 41480, 41482, 41512, 21081, 21090, 21093, 21125, 21136, 21138, 21141, 21145, 21146, 21156, 21508, 21509, 21521, 21524, 21525, 21526,
41514, 41608, 41610, 41640, 41642, 42021, 42065, 42068, 42070, 42112, 42114, 42122, 42137, 42144, 42146, 42154, 21528, 21529, 21537, 21541, 21544, 21546, 21569, 21572, 21573, 21574, 21577, 21578, 21584, 21585, 21588, 21589,
42261, 42264, 42274, 42281, 42305, 42308, 42310, 42313, 42320, 42325, 42390, 42392, 42496, 42498, 42528, 42565, 21590, 21592, 21593, 21594, 21601, 21602, 21604, 21605, 21606, 21609, 21632, 21640, 21642, 21649, 21652, 21653,
42577, 42580, 42582, 42585, 42597, 42624, 42645, 43014, 43016, 43029, 43040, 43048, 43050, 43097, 43144, 43157, 21654, 21657, 21665, 21668, 21669, 21674, 21761, 21762, 21764, 21765, 21766, 21769, 21776, 21777, 21778, 21780,
43284, 43289, 43301, 43333, 43345, 43350, 43369, 43528, 43530, 43560, 43605, 43650, 43656, 43658, 43682, 43688, 21781, 21782, 21785, 21786, 21793, 21796, 21797, 21798, 21801, 21824, 21825, 21826, 21828, 21829, 21830, 21832,
21833, 21840, 21841, 21842, 21844, 21845, 21846, 21848, 21849, 21850, 21856, 21857, 21860, 21861, 21862, 21864,
21865, 21866, 21889, 21892, 21893, 21897, 21898, 21904, 21905, 21908, 21909, 21910, 21912, 21913, 21921, 21924,
21925, 21926, 21929, 22016, 22017, 22018, 22020, 22022, 22024, 22025, 22033, 22036, 22037, 22040, 22041, 22048,
22049, 22050, 22052, 22053, 22054, 22056, 22057, 22081, 22085, 22086, 22088, 22089, 22090, 22096, 22097, 22098,
22100, 22101, 22102, 22104, 22105, 22106, 22113, 22116, 22117, 22121, 22146, 22149, 22150, 22152, 22153, 22154,
22161, 22165, 22170, 22178, 22181, 22182, 22184, 22185, 22532, 22533, 22534, 22537, 22544, 22549, 22552, 22561,
22570, 22597, 22600, 22602, 22609, 22612, 22613, 22614, 22616, 22617, 22624, 22626, 22628, 22629, 22658, 22665,
22672, 22674, 22677, 22680, 22689, 22697, 22785, 22786, 22789, 22794, 22801, 22804, 22805, 22806, 22809, 22821,
22849, 22852, 22853, 22854, 22857, 22864, 22865, 22866, 22868, 22869, 22870, 22872, 22873, 22874, 22881, 22884,
22885, 22886, 22889, 22913, 22917, 22921, 22929, 22932, 22933, 22934, 22936, 22937, 22949, 23044, 23048, 23061,
23066, 23072, 23077, 23078, 23081, 23109, 23112, 23113, 23121, 23125, 23126, 23128, 23129, 23138, 23141, 23144,
23146, 23169, 23178, 23186, 23189, 23190, 23192, 23194, 23201, 24581, 24596, 24598, 24601, 24613, 24644, 24656,
24661, 24662, 24664, 24666, 24673, 24676, 24678, 24681, 24705, 24726, 24741, 24833, 24836, 24838, 24841, 24850,
24853, 24865, 24866, 24870, 24873, 24901, 24905, 24913, 24917, 24918, 24921, 24933, 24934, 24938, 24964, 24970,
24978, 24981, 24993, 24998, 25001, 25105, 25110, 25113, 25152, 25153, 25158, 25173, 25174, 25176, 25184, 25221,
25233, 25238, 25253, 25617, 25618, 25621, 25622, 25626, 25633, 25638, 25641, 25664, 25666, 25669, 25672, 25674,
25681, 25684, 25685, 25686, 25689, 25690, 25696, 25698, 25701, 25732, 25733, 25737, 25744, 25746, 25748, 25749,
25750, 25752, 25754, 25761, 25764, 25769, 25861, 25864, 25866, 25873, 25877, 25878, 25881, 25924, 25925, 25926,
25929, 25936, 25937, 25940, 25941, 25942, 25945, 25953, 25956, 25957, 25958, 25961, 25990, 25993, 25994, 26001,
26005, 26006, 26009, 26010, 26018, 26021, 26022, 26024, 26114, 26121, 26133, 26144, 26150, 26152, 26153, 26176,
26181, 26184, 26186, 26193, 26196, 26197, 26198, 26200, 26202, 26208, 26213, 26216, 26240, 26242, 26245, 26250,
26260, 26262, 26264, 26265, 26272, 26276, 26278, 26282, 26646, 26649, 26661, 26689, 26706, 26709, 26714, 26721,
26729, 26757, 26769, 26776, 26790, 26881, 26884, 26896, 26901, 26913, 26916, 26918, 26921, 26944, 26945, 26949,
26950, 26952, 26961, 26964, 26965, 26966, 26969, 26976, 26981, 26986, 27010, 27012, 27018, 27029, 27041, 27044,
27045, 27049, 27153, 27158, 27160, 27201, 27204, 27209, 27216, 27221, 27224, 27226, 27236, 27237, 27241, 27270,
27284, 27288, 27290, 27302, 32768, 32770, 32776, 32778, 32800, 32802, 32808, 32810, 32837, 32848, 32849, 32852,
32854, 32857, 32869, 32896, 32898, 32904, 32906, 32917, 32928, 32930, 32936, 32938, 33029, 33041, 33044, 33046,
33049, 33061, 33089, 33092, 33097, 33104, 33106, 33109, 33110, 33112, 33113, 33124, 33126, 33129, 33157, 33161,
33172, 33174, 33177, 33189, 33280, 33282, 33288, 33290, 33301, 33312, 33314, 33320, 33322, 33361, 33364, 33369,
33381, 33408, 33410, 33416, 33418, 33429, 33440, 33442, 33448, 33450, 33812, 33817, 33857, 33860, 33873, 33877,
33882, 33889, 33892, 33897, 33940, 33945, 34049, 34057, 34066, 34069, 34074, 34086, 34089, 34112, 34113, 34117,
34120, 34129, 34132, 34133, 34134, 34137, 34138, 34149, 34150, 34152, 34154, 34177, 34180, 34182, 34185, 34192,
34194, 34197, 34200, 34214, 34321, 34326, 34329, 34341, 34369, 34372, 34377, 34378, 34384, 34389, 34393, 34394,
34401, 34406, 34410, 34437, 34449, 34458, 34468, 34816, 34818, 34824, 34826, 34837, 34848, 34850, 34856, 34858,
34881, 34885, 34897, 34900, 34905, 34917, 34921, 34944, 34946, 34952, 34954, 34965, 34976, 34978, 34984, 34986,
35077, 35078, 35089, 35092, 35094, 35109, 35137, 35140, 35142, 35145, 35152, 35154, 35157, 35162, 35169, 35172,
35205, 35222, 35225, 35237, 35328, 35330, 35336, 35338, 35349, 35360, 35362, 35368, 35370, 35397, 35409, 35412,
35414, 35456, 35458, 35464, 35466, 35477, 35488, 35490, 35496, 35498, 36869, 36881, 36886, 36888, 36889, 36901,
36929, 36934, 36937, 36949, 36952, 36954, 36969, 36970, 36997, 37009, 37012, 37014, 37017, 37029, 37121, 37124,
37126, 37129, 37136, 37141, 37144, 37146, 37153, 37156, 37158, 37161, 37184, 37189, 37200, 37201, 37204, 37205,
37206, 37209, 37218, 37221, 37252, 37254, 37266, 37269, 37272, 37281, 37284, 37286, 37289, 37381, 37393, 37396,
37401, 37413, 37444, 37446, 37449, 37456, 37458, 37461, 37464, 37478, 37481, 37509, 37524, 37526, 37545, 37889,
37892, 37894, 37904, 37909, 37912, 37926, 37952, 37962, 37969, 37972, 37973, 37974, 37976, 37977, 37984, 37985,
37986, 37989, 38020, 38022, 38034, 38036, 38037, 38040, 38049, 38057, 38144, 38149, 38152, 38154, 38160, 38161,
38164, 38165, 38166, 38169, 38177, 38181, 38185, 38186, 38209, 38212, 38213, 38214, 38217, 38224, 38225, 38226,
38228, 38229, 38230, 38232, 38233, 38234, 38241, 38244, 38245, 38246, 38249, 38273, 38277, 38280, 38289, 38290,
38292, 38293, 38294, 38297, 38298, 38304, 38306, 38309, 38312, 38314, 38401, 38404, 38416, 38421, 38425, 38432,
38438, 38441, 38469, 38472, 38473, 38481, 38482, 38485, 38486, 38489, 38501, 38504, 38530, 38532, 38537, 38538,
38546, 38548, 38549, 38564, 38566, 38569, 38917, 38934, 38937, 38949, 38977, 38982, 38992, 38994, 38997, 38998,
39002, 39012, 39013, 39045, 39057, 39062, 39065, 39077, 39172, 39174, 39177, 39184, 39186, 39189, 39192, 39194,
39200, 39201, 39204, 39206, 39232, 39234, 39237, 39240, 39242, 39249, 39252, 39253, 39254, 39257, 39266, 39269,
39270, 39274, 39297, 39300, 39312, 39314, 39317, 39322, 39329, 39334, 39429, 39445, 39461, 39492, 39494, 39497,
39504, 39509, 39512, 39521, 39557, 39569, 39572, 39573, 39574, 40960, 40962, 40968, 40970, 40981, 40992, 40994,
41000, 41002, 41029, 41041, 41044, 41046, 41049, 41088, 41090, 41096, 41098, 41109, 41120, 41122, 41128, 41130,
41221, 41225, 41233, 41236, 41238, 41241, 41242, 41286, 41289, 41297, 41301, 41304, 41306, 41313, 41316, 41349,
41360, 41362, 41366, 41369, 41474, 41480, 41482, 41488, 41497, 41506, 41512, 41514, 41541, 41553, 41558, 41561,
41573, 41600, 41602, 41608, 41610, 41621, 41632, 41634, 41640, 41642, 42009, 42021, 42049, 42052, 42064, 42068,
42069, 42072, 42074, 42081, 42085, 42086, 42088, 42089, 42117, 42246, 42249, 42256, 42258, 42261, 42264, 42278,
42281, 42306, 42309, 42321, 42324, 42325, 42326, 42329, 42341, 42346, 42369, 42372, 42373, 42374, 42377, 42386,
42389, 42392, 42501, 42513, 42518, 42522, 42529, 42533, 42564, 42566, 42570, 42578, 42581, 42582, 42584, 42592,
42594, 42630, 42640, 42645, 42646, 42649, 42657, 42660, 42662, 43008, 43010, 43016, 43018, 43040, 43042, 43048,
43050, 43089, 43092, 43094, 43097, 43136, 43138, 43144, 43146, 43157, 43168, 43170, 43176, 43178, 43269, 43284,
43289, 43297, 43301, 43329, 43344, 43349, 43354, 43361, 43366, 43369, 43408, 43414, 43520, 43522, 43528, 43530,
43552, 43554, 43560, 43562, 43601, 43604, 43606, 43648, 43650, 43656, 43658, 43669, 43680, 43682, 43688, 43690,
}; };
static const uint16_t kgrid_2bit_1024[1024] = { static const uint16_t kgrid_2bit_1024[1024] = {
0, 2, 5, 8, 10, 17, 20, 22, 25, 32, 34, 37, 40, 65, 68, 70, 0, 2, 5, 8, 10, 17, 20, 22, 25, 32, 34, 37, 40, 65, 68, 70,
@ -11503,7 +11567,7 @@ static int iq1_sort_helper(const void * left, const void * right) {
return *l < *r ? -1 : *l > *r ? 1 : 0; return *l < *r ? -1 : *l > *r ? 1 : 0;
} }
#define IQ1S_BLOCK_SIZE 16 #define IQ1S_BLOCK_SIZE 32
static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int n, const float * restrict quant_weights) { static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int n, const float * restrict quant_weights) {
const int gindex = iq2_data_index(GGML_TYPE_IQ1_S); const int gindex = iq2_data_index(GGML_TYPE_IQ1_S);
@ -11535,8 +11599,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
y[ibl].d = GGML_FP32_TO_FP16(0.f); y[ibl].d = GGML_FP32_TO_FP16(0.f);
memset(y[ibl].qs, 0, QK_K/8); memset(y[ibl].qs, 0, QK_K/8);
memset(y[ibl].qh, 0, QK_K/32); memset(y[ibl].qh, 0, QK_K/16);
memset(y[ibl].scales, 0, QK_K/32);
float max_scale = 0; float max_scale = 0;
@ -11620,10 +11683,12 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
} }
if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2; if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
} }
y[ibl].qs[2*ib+0] = index[0] & 255; uint16_t h = 0;
y[ibl].qs[2*ib+1] = index[1] & 255; for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) {
if (ib%2 == 0) y[ibl].qh[ib/2] = (index[0] >> 8) | ((index[1] >> 8) << 2); y[ibl].qs[(IQ1S_BLOCK_SIZE/8)*ib + k] = index[k] & 255;
else y[ibl].qh[ib/2] |= ((index[0] >> 8) | ((index[1] >> 8) << 2)) << 4; h |= (index[k] >> 8) << 3*k;
}
y[ibl].qh[ib] = h;
GGML_ASSERT(scale >= 0); GGML_ASSERT(scale >= 0);
scales[ib] = scale; scales[ib] = scale;
max_scale = MAX(max_scale, scale); max_scale = MAX(max_scale, scale);
@ -11637,12 +11702,10 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
float d = max_scale/31; float d = max_scale/31;
y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.085f is another fudge factor. Don't ask me why it is needed. y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.085f is another fudge factor. Don't ask me why it is needed.
float id = 1/d; float id = 1/d;
for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ib += 2) { for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ++ib) {
int l1 = nearest_int(0.5f*(id*scales[ib+0]-1)); int l = nearest_int(0.5f*(id*scales[ib]-1));
l1 = MAX(0, MIN(15, l1)); l = MAX(0, MIN(15, l));
int l2 = nearest_int(0.5f*(id*scales[ib+1]-1)); y[ibl].qh[ib] |= (l << 12);
l2 = MAX(0, MIN(15, l2));
y[ibl].scales[ib/2] = l1 | (l2 << 4);
} }
} }
} }

View file

@ -217,9 +217,8 @@ static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N
typedef struct { typedef struct {
ggml_fp16_t d; ggml_fp16_t d;
uint8_t qs[QK_K/8]; uint8_t qs[QK_K/8];
uint8_t qh[QK_K/32]; uint16_t qh[QK_K/32];
uint8_t scales[QK_K/32];
} block_iq1_s; } block_iq1_s;
static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");