sync : ggml (conv 1d + 2d updates, UB fixes) (#3468)
* sync : ggml (conv 1d + 2d updates) ggml-ci * ggml : fix UB in q5_0 and q5_1 quantize code ggml.c:1033:39: runtime error: left shift of 1 by 31 places cannot be represented in type 'int' SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ggml.c:1081:39: runtime error: left shift of 1 by 31 places cannot be represented in type 'int' SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ggml-ci * tests : fix UB in test-quantize-perf
This commit is contained in:
parent
f72f8f22c9
commit
f93af02488
6 changed files with 725 additions and 379 deletions
|
@ -69,7 +69,6 @@ inline static int32_t vaddvq_s32(int32x4_t v) {
|
|||
// 2-6 bit quantization in super-blocks
|
||||
//
|
||||
|
||||
|
||||
//
|
||||
// ===================== Helper functions
|
||||
//
|
||||
|
@ -348,7 +347,6 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
|
|||
const float q4scale = 15.f;
|
||||
|
||||
for (int i = 0; i < nb; i++) {
|
||||
|
||||
float max_scale = 0; // as we are deducting the min, scales are always positive
|
||||
float max_min = 0;
|
||||
for (int j = 0; j < QK_K/16; ++j) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue