ggml : reduce hash table reset cost (#8698)

* ggml : reduce hash table reset cost * fix unreachable code warnings after GGML_ASSERT(false) * GGML_ASSERT(false) -> GGML_ABORT("fatal error") * GGML_ABORT use format string
2024-07-27 04:41:55 +02:00 · 2024-07-27 04:41:55 +02:00 · 2b1f616b20
commit 2b1f616b20
parent 01245f5b16
46 changed files with 851 additions and 754 deletions
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@ -166,12 +166,12 @@ static void test_sampler_queue(
    for (auto s : samplers_sequence) {
        switch (s){
            case 'k': llama_sample_top_k    (nullptr, &candidates_p, top_k, 1); break;
-            case 'f': GGML_ASSERT(false && "tail_free test not implemented");   break;
-            case 'y': GGML_ASSERT(false && "typical test not implemented");     break;
+            case 'f': GGML_ABORT("tail_free test not implemented");   break;
+            case 'y': GGML_ABORT("typical test not implemented");     break;
            case 'p': llama_sample_top_p    (nullptr, &candidates_p, top_p, 1); break;
            case 'm': llama_sample_min_p    (nullptr, &candidates_p, min_p, 1); break;
-            case 't': GGML_ASSERT(false && "temperature test not implemented"); break;
-            default : GGML_ASSERT(false && "Unknown sampler");                  break;
+            case 't': GGML_ABORT("temperature test not implemented"); break;
+            default : GGML_ABORT("Unknown sampler");                  break;
        }

        llama_sample_softmax(nullptr, &candidates_p); // make sure tokens are sorted for tests
@ -222,7 +222,7 @@ static void test_sampler_queue(
            GGML_ASSERT(candidates_p.data[0].id == max_token_id);
            GGML_ASSERT(candidates_p.data[expected_size-1].id == min_token_id);
        } else {
-            GGML_ASSERT(false);
+            GGML_ABORT("fatal error");
        }
    }