From 3ed1bc09f518ac205279ef06921764430bf82daa Mon Sep 17 00:00:00 2001 From: slaren Date: Fri, 26 Jul 2024 16:39:10 +0200 Subject: [PATCH] GGML_ASSERT(false) -> GGML_ABORT("fatal error") --- examples/eval-callback/eval-callback.cpp | 2 +- examples/imatrix/imatrix.cpp | 4 +- examples/llama-bench/llama-bench.cpp | 6 +- examples/llava/clip.cpp | 2 +- examples/tokenize/tokenize.cpp | 2 +- ggml/include/ggml.h | 5 +- ggml/src/ggml-alloc.c | 9 +- ggml/src/ggml-backend.c | 2 +- ggml/src/ggml-blas.cpp | 2 +- ggml/src/ggml-cann.cpp | 6 +- ggml/src/ggml-cann/aclnn_ops.cpp | 26 +- ggml/src/ggml-cuda.cu | 6 +- ggml/src/ggml-cuda/argsort.cu | 2 +- ggml/src/ggml-cuda/binbcast.cu | 2 +- ggml/src/ggml-cuda/common.cuh | 2 +- ggml/src/ggml-cuda/cpy.cu | 4 +- ggml/src/ggml-cuda/dmmv.cu | 2 +- ggml/src/ggml-cuda/fattn-common.cuh | 6 +- ggml/src/ggml-cuda/fattn-tile-f16.cu | 2 +- ggml/src/ggml-cuda/fattn-tile-f32.cu | 2 +- ggml/src/ggml-cuda/fattn.cu | 10 +- ggml/src/ggml-cuda/getrows.cu | 2 +- ggml/src/ggml-cuda/mmq.cu | 2 +- ggml/src/ggml-cuda/mmq.cuh | 4 +- ggml/src/ggml-cuda/mmvq.cu | 6 +- ggml/src/ggml-cuda/quantize.cu | 2 +- ggml/src/ggml-cuda/rope.cu | 4 +- ggml/src/ggml-impl.h | 4 +- ggml/src/ggml-kompute.cpp | 8 +- ggml/src/ggml-metal.m | 42 +-- ggml/src/ggml-quants.c | 10 +- ggml/src/ggml-sycl.cpp | 16 +- ggml/src/ggml-sycl/common.hpp | 2 +- ggml/src/ggml-sycl/dmmv.cpp | 2 +- ggml/src/ggml-sycl/dpct/helper.hpp | 2 +- ggml/src/ggml-sycl/mmq.cpp | 22 +- ggml/src/ggml-sycl/mmvq.cpp | 2 +- ggml/src/ggml-sycl/rope.cpp | 4 +- ggml/src/ggml-vulkan.cpp | 58 ++-- ggml/src/ggml.c | 358 +++++++++++------------ src/llama-grammar.cpp | 4 +- src/llama-vocab.cpp | 12 +- src/llama.cpp | 28 +- tests/test-backend-ops.cpp | 8 +- tests/test-sampling.cpp | 10 +- 45 files changed, 358 insertions(+), 358 deletions(-) diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp index c8a3016a4..37d30ab8c 100644 --- a/examples/eval-callback/eval-callback.cpp +++ b/examples/eval-callback/eval-callback.cpp @@ -62,7 +62,7 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne } else if (type == GGML_TYPE_I8) { v = (float) *(int8_t *) &data[i]; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } printf("%12.4f", v); sum += v; diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 574f5ed9c..6ce1863cf 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -127,7 +127,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * } else if (e.values.size() != (size_t)src1->ne[0]*n_as) { fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as); - exit(1); //GGML_ASSERT(false); + exit(1); //GGML_ABORT("fatal error"); } if (m_params.verbosity > 1) { printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type); @@ -176,7 +176,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * } else if (e.values.size() != (size_t)src1->ne[0]) { fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]); - exit(1); //GGML_ASSERT(false); + exit(1); //GGML_ABORT("fatal error"); } ++e.ncall; if (m_params.verbosity > 1) { diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index a6497b6e0..521fa8880 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -150,7 +150,7 @@ static const char * output_format_str(output_formats format) { case JSON: return "json"; case MARKDOWN: return "md"; case SQL: return "sql"; - default: GGML_ASSERT(!"invalid output format"); + default: GGML_ABORT("invalid output format"); } } @@ -176,7 +176,7 @@ static const char * split_mode_str(llama_split_mode mode) { case LLAMA_SPLIT_MODE_NONE: return "none"; case LLAMA_SPLIT_MODE_LAYER: return "layer"; case LLAMA_SPLIT_MODE_ROW: return "row"; - default: GGML_ASSERT(!"invalid split mode"); + default: GGML_ABORT("invalid split mode"); } } @@ -1326,7 +1326,7 @@ static std::unique_ptr create_printer(output_formats format) { case SQL: return std::unique_ptr(new sql_printer()); } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } int main(int argc, char ** argv) { diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index d23e282fb..7cda5f10c 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -869,7 +869,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 embeddings = peg_0; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp index 2afb6024c..17f5e4961 100644 --- a/examples/tokenize/tokenize.cpp +++ b/examples/tokenize/tokenize.cpp @@ -163,7 +163,7 @@ static void write_utf8_cstr_to_stdout(const char * str, bool & invalid_utf8) { printf(">"); return; } - GGML_ASSERT(false && "MultiByteToWideChar() failed in an unexpected way."); + GGML_ABORT("MultiByteToWideChar() failed in an unexpected way."); } LPWSTR wstr = (LPWSTR) calloc(length_needed+1, sizeof(*wstr)); diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index d0311f6ca..263f590eb 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -272,7 +272,8 @@ #define GGML_NORETURN _Noreturn #endif -#define GGML_ASSERT(x) if (!(x)) ggml_abort(__FILE__, __LINE__, #x) +#define GGML_ABORT(x) ggml_abort(__FILE__, __LINE__, x) +#define GGML_ASSERT(x) if (!(x)) GGML_ABORT(#x) // used to copy the number of elements and stride in bytes of tensors into local variables. // main purpose is to reduce code duplication and improve readability. @@ -322,7 +323,7 @@ extern "C" { #endif - GGML_API GGML_NORETURN void ggml_abort(const char * file, int line, const char * expr); + GGML_NORETURN GGML_API void ggml_abort(const char * file, int line, const char * expr); enum ggml_status { GGML_STATUS_ALLOC_FAILED = -2, diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index 108b04257..f11af0a6f 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -91,7 +91,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) { fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n", __func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset); - GGML_ASSERT(!"not enough space in the buffer"); + GGML_ABORT("not enough space in the buffer"); } void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset; @@ -132,7 +132,7 @@ static void add_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, return; } } - GGML_ASSERT(!"out of allocated_tensors"); + GGML_ABORT("out of allocated_tensors"); } static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) { for (int i = 0; i < 1024; i++) { @@ -142,7 +142,7 @@ static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offs } } fprintf(stderr, "tried to free tensor %s not found\n", tensor->name); - GGML_ASSERT(!"tensor not found"); + GGML_ABORT("tensor not found"); } #endif @@ -175,8 +175,7 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz // this should never happen fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n", __func__, size, max_avail); - GGML_ASSERT(!"not enough space in the buffer"); - GGML_UNREACHABLE(); + GGML_ABORT("not enough space in the buffer"); } } diff --git a/ggml/src/ggml-backend.c b/ggml/src/ggml-backend.c index 240136a00..8e9ba5b73 100644 --- a/ggml/src/ggml-backend.c +++ b/ggml/src/ggml-backend.c @@ -1280,7 +1280,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg sched->ctx = ggml_init(params); if (sched->ctx == NULL) { fprintf(stderr, "%s: failed to initialize context\n", __func__); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } // pass 1: assign backends to ops with pre-allocated inputs diff --git a/ggml/src/ggml-blas.cpp b/ggml/src/ggml-blas.cpp index a37aa4072..f620a9f93 100644 --- a/ggml/src/ggml-blas.cpp +++ b/ggml/src/ggml-blas.cpp @@ -276,7 +276,7 @@ GGML_CALL static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t default: fprintf(stderr, "%s: unsupported op %s\n", __func__, ggml_op_desc(node)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-cann.cpp b/ggml/src/ggml-cann.cpp index 9bf7e332a..ad5feea05 100644 --- a/ggml/src/ggml-cann.cpp +++ b/ggml/src/ggml-cann.cpp @@ -120,7 +120,7 @@ static void ggml_cann_log(enum ggml_log_level level, const char* format, ...) { file, line); GGML_CANN_LOG_ERROR(" %s\n", stmt); // abort with GGML_ASSERT to get a stack trace - GGML_ASSERT(!"CANN error"); + GGML_ABORT("CANN error"); } /** @@ -342,7 +342,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { // memory should always buffered. these memory may still needed by // tasks in stream. // TODO, fix me. - GGML_ASSERT(!"Cann buffer pool full, increase MAX_CANN_BUFFERS\n"); + GGML_ABORT("Cann buffer pool full, increase MAX_CANN_BUFFERS\n"); } }; @@ -1874,7 +1874,7 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend, ACL_CHECK(aclrtStreamWaitEvent(cann_ctx->stream(), (aclrtEvent)event->context)); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index a02efc828..f27666970 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -844,7 +844,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ggml_cann_max_pool2d(ctx, dst); break; case GGML_OP_POOL_COUNT: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } @@ -931,9 +931,9 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ((ggml_tensor*)dst->extra)->nb); return; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } if (dst->type == GGML_TYPE_F32) { if (ggml_are_same_shape(src, dst)) { @@ -955,12 +955,12 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ((ggml_tensor*)dst->extra)->nb); return; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } // TODO - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } else if (src->type == GGML_TYPE_F32) { // TODO: if (src0->type == dst->type && ne00 == ne0 && nb00 == type_size // && nb0 == type_size) @@ -991,10 +991,10 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ((ggml_tensor*)dst->extra)->nb); return; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } else { // TODO: dst not contiguous - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } if (dst->type == GGML_TYPE_F16) { @@ -1017,11 +1017,11 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ((ggml_tensor*)dst->extra)->nb); return; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } // TODO - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } else { if (ggml_are_same_shape(src, dst)) { cann_copy(ctx, acl_src, acl_dst); @@ -1029,7 +1029,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ACL_CHECK(aclDestroyTensor(acl_dst)); return; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -2219,7 +2219,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ((ggml_tensor*)dst->extra)->nb); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } @@ -2492,7 +2492,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ggml_cann_mul_mat_q8_0(ctx, dst); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index e48269e46..54ccf6bb1 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -98,7 +98,7 @@ void ggml_cuda_error(const char * stmt, const char * func, const char * file, in GGML_CUDA_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line); GGML_CUDA_LOG_ERROR(" %s\n", stmt); // abort with GGML_ASSERT to get a stack trace - GGML_ASSERT(!"CUDA error"); + GGML_ABORT("CUDA error"); } // this is faster on Windows @@ -1596,7 +1596,7 @@ static void ggml_cuda_op_mul_mat( CUDA_CHECK(ggml_cuda_cpy_tensor_2d( src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream)); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } if (quantize_src1 && !src1_is_contiguous) { @@ -2945,7 +2945,7 @@ static void ggml_backend_cuda_event_wait(ggml_backend_t backend, ggml_backend_ev CUDA_CHECK(cudaLaunchHostFunc(cuda_ctx->stream(), wait_fn, event)); #endif - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml/src/ggml-cuda/argsort.cu index 15757ca18..607ded855 100644 --- a/ggml/src/ggml-cuda/argsort.cu +++ b/ggml/src/ggml-cuda/argsort.cu @@ -81,7 +81,7 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co } else if (order == GGML_SORT_ORDER_DESC) { k_argsort_f32_i32<<>>(x, dst, ncols, ncols_pad); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-cuda/binbcast.cu b/ggml/src/ggml-cuda/binbcast.cu index 19b08b74f..34bc67acd 100644 --- a/ggml/src/ggml-cuda/binbcast.cu +++ b/ggml/src/ggml-cuda/binbcast.cu @@ -259,7 +259,7 @@ static void ggml_cuda_op_bin_bcast( } else { fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 1c2e00c1e..eac026f47 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -348,7 +348,7 @@ static __device__ void no_device_code( #ifdef __CUDA_ARCH__ #define NO_DEVICE_CODE no_device_code(__FILE__, __LINE__, __FUNCTION__, __CUDA_ARCH__, STRINGIZE(__CUDA_ARCH_LIST__)) #else -#define NO_DEVICE_CODE //GGML_ASSERT(false && "NO_DEVICE_CODE not valid in host code.") +#define NO_DEVICE_CODE //GGML_ABORT("NO_DEVICE_CODE not valid in host code.") #endif // __CUDA_ARCH__ static __device__ __forceinline__ float warp_reduce_sum(float x) { diff --git a/ggml/src/ggml-cuda/cpy.cu b/ggml/src/ggml-cuda/cpy.cu index 3db57034b..aad34bfe5 100644 --- a/ggml/src/ggml-cuda/cpy.cu +++ b/ggml/src/ggml-cuda/cpy.cu @@ -451,7 +451,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg } else { fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__, ggml_type_name(src0->type), ggml_type_name(src1->type)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -484,6 +484,6 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) { } else { fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__, ggml_type_name(src0->type), ggml_type_name(src1->type)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-cuda/dmmv.cu b/ggml/src/ggml-cuda/dmmv.cu index 174489e06..d7a2a2513 100644 --- a/ggml/src/ggml-cuda/dmmv.cu +++ b/ggml/src/ggml-cuda/dmmv.cu @@ -662,7 +662,7 @@ void ggml_cuda_op_dequantize_mul_mat_vec( convert_mul_mat_vec_f16_cuda(src0_dd_i, src1_dfloat, dst_dd_i, ne00, row_diff, stream); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index f24312dd0..950fd93df 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -564,7 +564,7 @@ static void on_no_fattn_vec_case(const int D) { fprintf(stderr, "Unsupported KV type combination for head_size 64.\n"); fprintf(stderr, "By default only f16 KV cache is supported.\n"); fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n"); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } else if (D == 128) { fprintf(stderr, "Unsupported KV type combination for head_size 128.\n"); fprintf(stderr, "Supported combinations:\n"); @@ -572,11 +572,11 @@ static void on_no_fattn_vec_case(const int D) { fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n"); fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n"); fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n"); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } else { fprintf(stderr, "Unsupported KV type combination for head_size 256.\n"); fprintf(stderr, "Only f16 is supported.\n"); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-cuda/fattn-tile-f16.cu b/ggml/src/ggml-cuda/fattn-tile-f16.cu index c6c35134d..1b2fd500b 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f16.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f16.cu @@ -287,7 +287,7 @@ void launch_fattn_tile_f16_64_128(ggml_backend_cuda_context & ctx, ggml_tensor * launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); } break; default: { - GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128."); + GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128."); } break; } } diff --git a/ggml/src/ggml-cuda/fattn-tile-f32.cu b/ggml/src/ggml-cuda/fattn-tile-f32.cu index 15e22f495..f3e68dbfa 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f32.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f32.cu @@ -284,7 +284,7 @@ void launch_fattn_tile_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor * launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); } break; default: { - GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128."); + GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128."); } break; } } diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index 38d30b210..29f608b0f 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -38,7 +38,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } else { @@ -63,7 +63,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g // ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); // break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } @@ -86,7 +86,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } return; @@ -114,7 +114,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } return; @@ -141,7 +141,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } diff --git a/ggml/src/ggml-cuda/getrows.cu b/ggml/src/ggml-cuda/getrows.cu index 55af195fd..5b0598ecf 100644 --- a/ggml/src/ggml-cuda/getrows.cu +++ b/ggml/src/ggml-cuda/getrows.cu @@ -172,7 +172,7 @@ void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { default: // TODO: k-quants fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu index 84f6387e2..78d70cd7a 100644 --- a/ggml/src/ggml-cuda/mmq.cu +++ b/ggml/src/ggml-cuda/mmq.cu @@ -84,7 +84,7 @@ void ggml_cuda_op_mul_mat_q( mul_mat_q_case(ctx, args, stream); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index f08a4758d..e8a957447 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -75,7 +75,7 @@ static mmq_q8_1_ds_layout mmq_get_q8_1_ds_layout(const ggml_type type_x) { case GGML_TYPE_IQ4_NL: return MMQ_Q8_1_DS_LAYOUT_D4; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } @@ -2898,7 +2898,7 @@ void mul_mat_q_case(ggml_backend_cuda_context & ctx, const mmq_args & args, cuda break; default: fprintf(stderr, "mmq_x_best=%d\n", mmq_x_best); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } diff --git a/ggml/src/ggml-cuda/mmvq.cu b/ggml/src/ggml-cuda/mmvq.cu index e22faf69b..7dbbc9939 100644 --- a/ggml/src/ggml-cuda/mmvq.cu +++ b/ggml/src/ggml-cuda/mmvq.cu @@ -162,7 +162,7 @@ static void mul_mat_vec_q_cuda( rows_per_cuda_block = 2; break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } @@ -196,7 +196,7 @@ static void mul_mat_vec_q_cuda( mul_mat_vec_q<<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } @@ -413,7 +413,7 @@ void ggml_cuda_op_mul_mat_vec_q( mul_mat_vec_iq3_s_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } diff --git a/ggml/src/ggml-cuda/quantize.cu b/ggml/src/ggml-cuda/quantize.cu index aa7f1eff0..45408ce86 100644 --- a/ggml/src/ggml-cuda/quantize.cu +++ b/ggml/src/ggml-cuda/quantize.cu @@ -163,7 +163,7 @@ void quantize_mmq_q8_1_cuda( <<>>(x, vy, kx0, kx1, kx0_padded); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } diff --git a/ggml/src/ggml-cuda/rope.cu b/ggml/src/ggml-cuda/rope.cu index 596fb7c13..99ec1dd98 100644 --- a/ggml/src/ggml-cuda/rope.cu +++ b/ggml/src/ggml-cuda/rope.cu @@ -251,7 +251,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { attn_factor, corr_dims, freq_factors, stream ); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } else { if (src0->type == GGML_TYPE_F32) { @@ -265,7 +265,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { attn_factor, corr_dims, freq_factors, stream ); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index 7245c1f66..7f7afdbfc 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -726,7 +726,7 @@ static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tens } while (i != h); // visited all hash table entries -> not found - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) { @@ -747,7 +747,7 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g } while (i != h); // visited all hash table entries -> not found - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } #ifdef __cplusplus diff --git a/ggml/src/ggml-kompute.cpp b/ggml/src/ggml-kompute.cpp index ed5f2e349..41ac63fa4 100644 --- a/ggml/src/ggml-kompute.cpp +++ b/ggml/src/ggml-kompute.cpp @@ -566,7 +566,7 @@ uint32_t safe_divide(uint32_t a, uint32_t b) { } if ((a % b) != 0) { fprintf(stderr, "((%u %% %u) == %u) != 0\n", a, b, a % b); - GGML_ASSERT(!"safe_divide result would've had remainder"); + GGML_ABORT("safe_divide result would've had remainder"); } return a / b; } @@ -1460,7 +1460,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml if (!ggml_vk_supports_op(dst)) { fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst)); - GGML_ASSERT(!"unsupported op"); + GGML_ABORT("unsupported op"); } const int32_t ne00 = src0 ? src0->ne[0] : 0; @@ -1562,7 +1562,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml default: { fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } break; @@ -1745,7 +1745,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml continue; not_implemented: {} fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); - //GGML_ASSERT(false); + //GGML_ABORT("fatal error"); } // Evaluate sequence diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m index a7619bcca..48b813131 100644 --- a/ggml/src/ggml-metal.m +++ b/ggml/src/ggml-metal.m @@ -869,7 +869,7 @@ static enum ggml_status ggml_metal_graph_compute( NSError * error = nil; if (![[MTLCaptureManager sharedCaptureManager] startCaptureWithDescriptor:descriptor error:&error]) { GGML_METAL_LOG_ERROR("%s: error: unable to start capture '%s'\n", __func__, [[error localizedDescription] UTF8String]); - GGML_ASSERT(!"capture failed"); + GGML_ABORT("capture failed"); } } @@ -931,7 +931,7 @@ static enum ggml_status ggml_metal_graph_compute( if (!ggml_metal_supports_op(ctx, dst)) { GGML_METAL_LOG_ERROR("%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst)); - GGML_ASSERT(!"unsupported op"); + GGML_ABORT("unsupported op"); } if (should_capture) { @@ -1068,7 +1068,7 @@ static enum ggml_status ggml_metal_graph_compute( case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD_ROW].pipeline; break; case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_ROW].pipeline; break; case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV_ROW].pipeline; break; - default: GGML_ASSERT(false); + default: GGML_ABORT("fatal error"); } bcast_row = true; @@ -1077,7 +1077,7 @@ static enum ggml_status ggml_metal_graph_compute( case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD].pipeline; break; case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL].pipeline; break; case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV].pipeline; break; - default: GGML_ASSERT(false); + default: GGML_ABORT("fatal error"); } } @@ -1131,7 +1131,7 @@ static enum ggml_status ggml_metal_graph_compute( case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_F16].pipeline; break; case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I32].pipeline; break; case GGML_TYPE_I16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I16].pipeline; break; - default: GGML_ASSERT(false); + default: GGML_ABORT("fatal error"); } [encoder setComputePipelineState:pipeline]; @@ -1387,7 +1387,7 @@ static enum ggml_status ggml_metal_graph_compute( default: { GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } break; case GGML_OP_SQR: @@ -1609,7 +1609,7 @@ static enum ggml_status ggml_metal_graph_compute( case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_M_F32 ].pipeline; break; case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_NL_F32 ].pipeline; break; case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_XS_F32 ].pipeline; break; - default: GGML_ASSERT(false && "MUL MAT-MAT not implemented"); + default: GGML_ABORT("MUL MAT-MAT not implemented"); } [encoder setComputePipelineState:pipeline]; @@ -1782,7 +1782,7 @@ static enum ggml_status ggml_metal_graph_compute( default: { GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t); - GGML_ASSERT(false && "not implemented"); + GGML_ABORT("not implemented"); } }; @@ -1911,7 +1911,7 @@ static enum ggml_status ggml_metal_graph_compute( case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_M_F32 ].pipeline; break; case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_NL_F32 ].pipeline; break; case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_XS_F32 ].pipeline; break; - default: GGML_ASSERT(false && "MUL_MAT_ID not implemented"); + default: GGML_ABORT("MUL_MAT_ID not implemented"); } [encoder setComputePipelineState:pipeline]; @@ -2078,7 +2078,7 @@ static enum ggml_status ggml_metal_graph_compute( default: { GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src2t); - GGML_ASSERT(false && "not implemented"); + GGML_ABORT("not implemented"); } }; @@ -2178,7 +2178,7 @@ static enum ggml_status ggml_metal_graph_compute( case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL ].pipeline; break; case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_XS ].pipeline; break; case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_I32 ].pipeline; break; - default: GGML_ASSERT(false && "not implemented"); + default: GGML_ABORT("not implemented"); } [encoder setComputePipelineState:pipeline]; @@ -2316,13 +2316,13 @@ static enum ggml_status ggml_metal_graph_compute( switch (src0->type) { case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F32].pipeline; break; case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F16].pipeline; break; - default: GGML_ASSERT(false); + default: GGML_ABORT("fatal error"); }; } else { switch (src0->type) { case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F32].pipeline; break; case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F16].pipeline; break; - default: GGML_ASSERT(false); + default: GGML_ABORT("fatal error"); }; } @@ -2399,7 +2399,7 @@ static enum ggml_status ggml_metal_graph_compute( switch (dst->type) { case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F32].pipeline; break; case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F16].pipeline; break; - default: GGML_ASSERT(false); + default: GGML_ABORT("fatal error"); }; [encoder setComputePipelineState:pipeline]; @@ -2556,7 +2556,7 @@ static enum ggml_status ggml_metal_graph_compute( switch (order) { case GGML_SORT_ORDER_ASC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC].pipeline; break; case GGML_SORT_ORDER_DESC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC].pipeline; break; - default: GGML_ASSERT(false); + default: GGML_ABORT("fatal error"); }; [encoder setComputePipelineState:pipeline]; @@ -2645,7 +2645,7 @@ static enum ggml_status ggml_metal_graph_compute( { GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00); GGML_METAL_LOG_ERROR("add template specialization for this size\n"); - GGML_ASSERT(false && "add template specialization for this size"); + GGML_ABORT("add template specialization for this size"); } } } else { @@ -2658,7 +2658,7 @@ static enum ggml_status ggml_metal_graph_compute( { GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00); GGML_METAL_LOG_ERROR("add template specialization for this size\n"); - GGML_ASSERT(false && "add template specialization for this size"); + GGML_ABORT("add template specialization for this size"); } } } @@ -2779,7 +2779,7 @@ static enum ggml_status ggml_metal_graph_compute( case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0].pipeline; break; case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1].pipeline; break; case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL].pipeline; break; - default: GGML_ASSERT(false && "not implemented"); + default: GGML_ABORT("not implemented"); }; } break; case GGML_TYPE_F16: @@ -2787,10 +2787,10 @@ static enum ggml_status ggml_metal_graph_compute( switch (dstt) { case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F32].pipeline; break; case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F16].pipeline; break; - default: GGML_ASSERT(false && "not implemented"); + default: GGML_ABORT("not implemented"); }; } break; - default: GGML_ASSERT(false && "not implemented"); + default: GGML_ABORT("not implemented"); } [encoder setComputePipelineState:pipeline]; @@ -2818,7 +2818,7 @@ static enum ggml_status ggml_metal_graph_compute( default: { GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index 091a55e2c..1c6c85aac 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -12692,7 +12692,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict printf("Oops: found point %u not on grid:", u); for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]); printf("\n"); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } q2[2*ib+0] |= ((uint32_t) grid_index << 8*k); q2[2*ib+1] |= (block_signs[k] << 7*k); @@ -12871,7 +12871,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v printf("Oops: found point %u not on grid:", u); for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]); printf("\n"); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } q2[2*ib+k] = grid_index | (block_signs[k] << 9); } @@ -13314,7 +13314,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v printf("Oops: found point %u not on grid:", u); for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]); printf("\n"); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } if (grid_size == 256) { q3[8*ib+k] = grid_index; @@ -13527,7 +13527,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo printf("Oops: found point %u not on grid:", u); for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]); printf("\n"); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } qs[k] = grid_index & 255; qh[(ib*bs4+k)/8] |= ((grid_index >> 8) << ((ib*bs4+k)%8)); @@ -14503,7 +14503,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy printf("Oops: found point %u not on grid:", u); for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]); printf("\n"); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int i8 = 2*ib + k; y[ibl].qs[i8] = grid_index & 255; diff --git a/ggml/src/ggml-sycl.cpp b/ggml/src/ggml-sycl.cpp index 36518ff93..7cb07d0dc 100644 --- a/ggml/src/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl.cpp @@ -1723,7 +1723,7 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols, }); }); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -2075,8 +2075,8 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, // GGML_SYCL_DEBUG("current device index %d\n", id); src_ptr = (char *) extra->data_device[id]; } else { - // GGML_SYCL_DEBUG("GGML_ASSERT(false)\n"); - GGML_ASSERT(false); + // GGML_SYCL_DEBUG("GGML_ABORT("fatal error")\n"); + GGML_ABORT("fatal error"); } char * dst_ptr = (char *) dst; @@ -2163,7 +2163,7 @@ static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_te default: // TODO: k-quants fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } @@ -2192,7 +2192,7 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t } else { fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -2476,7 +2476,7 @@ static int64_t get_row_rounding(ggml_type type, const std::arraytype), ggml_type_name(src1->type)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } (void) dst; diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 397bd98dd..86d8b40e8 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -100,7 +100,7 @@ static void crash() { const char* msg) { fprintf(stderr, "SYCL error: %s: %s\n", stmt, msg); fprintf(stderr, " in function %s at %s:%d\n", func, file, line); - GGML_ASSERT(!"SYCL error"); + GGML_ABORT("SYCL error"); } #define SYCL_CHECK(err) \ diff --git a/ggml/src/ggml-sycl/dmmv.cpp b/ggml/src/ggml-sycl/dmmv.cpp index 70a94fc16..ae45630e1 100644 --- a/ggml/src/ggml-sycl/dmmv.cpp +++ b/ggml/src/ggml-sycl/dmmv.cpp @@ -1011,7 +1011,7 @@ void ggml_sycl_op_dequantize_mul_mat_vec( break; default: printf("ggml_sycl_op_dequantize_mul_mat_vec unsupported GGML_TYPE %d\n", src0->type); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index 4aaa76bfb..ef4609e32 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -975,7 +975,7 @@ namespace dpct if (backend == "opencl:cpu") return 4; if (backend == "opencl:acc") return 5; printf("convert_backend_index: can't handle backend=%s\n", backend.c_str()); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } static bool compare_backend(std::string &backend1, std::string &backend2) { return convert_backend_index(backend1) < convert_backend_index(backend2); diff --git a/ggml/src/ggml-sycl/mmq.cpp b/ggml/src/ggml-sycl/mmq.cpp index 3107ba919..e952533d3 100644 --- a/ggml/src/ggml-sycl/mmq.cpp +++ b/ggml/src/ggml-sycl/mmq.cpp @@ -1799,7 +1799,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q4_0_PASCAL; nwarps = NWARPS_Q4_0_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -1914,7 +1914,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q4_1_PASCAL; nwarps = NWARPS_Q4_1_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -2029,7 +2029,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q5_0_PASCAL; nwarps = NWARPS_Q5_0_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -2144,7 +2144,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q5_1_PASCAL; nwarps = NWARPS_Q5_1_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -2259,7 +2259,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q8_0_PASCAL; nwarps = NWARPS_Q8_0_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -2374,7 +2374,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q2_K_PASCAL; nwarps = NWARPS_Q2_K_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -2497,7 +2497,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q3_K_PASCAL; nwarps = NWARPS_Q3_K_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -2625,7 +2625,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q4_K_PASCAL; nwarps = NWARPS_Q4_K_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -2746,7 +2746,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q5_K_PASCAL; nwarps = NWARPS_Q5_K_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -2867,7 +2867,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy, mmq_y = MMQ_Y_Q6_K_PASCAL; nwarps = NWARPS_Q6_K_PASCAL; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; @@ -3016,7 +3016,7 @@ void ggml_sycl_op_mul_mat_q( ggml_mul_mat_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_ncols, src1_padded_row_size, nrows_dst, stream); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } diff --git a/ggml/src/ggml-sycl/mmvq.cpp b/ggml/src/ggml-sycl/mmvq.cpp index 3fbc4dd60..23232357e 100644 --- a/ggml/src/ggml-sycl/mmvq.cpp +++ b/ggml/src/ggml-sycl/mmvq.cpp @@ -1017,7 +1017,7 @@ void ggml_sycl_op_mul_mat_vec_q( mul_mat_vec_iq4_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); break; } } diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml/src/ggml-sycl/rope.cpp index 6f507941a..c7545bcc1 100644 --- a/ggml/src/ggml-sycl/rope.cpp +++ b/ggml/src/ggml-sycl/rope.cpp @@ -251,7 +251,7 @@ void ggml_sycl_op_rope( attn_factor, corr_dims, freq_factors, main_stream ); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } else { if (src0->type == GGML_TYPE_F32) { @@ -265,7 +265,7 @@ void ggml_sycl_op_rope( attn_factor, corr_dims, freq_factors, main_stream ); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index 6bcd81a7b..74991f6d1 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -1961,7 +1961,7 @@ void ggml_vk_instance_init() { // Make sure at least one device exists if (devices.empty()) { std::cerr << "ggml_vulkan: Error: No devices found." << std::endl; - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } // Default to using all dedicated GPUs @@ -2459,7 +2459,7 @@ static void ggml_vk_buffer_write_nc_async(ggml_backend_vk_context * ctx, vk_cont // Buffer is already mapped if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) { std::cerr << "ggml_vulkan: buffer_write_nc_async dst buffer is host_visible. Use synchronous write." << std::endl; - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } // Check if src is pinned memory vk_buffer buf; @@ -2527,7 +2527,7 @@ static void ggml_vk_buffer_write_nc_async(ggml_backend_vk_context * ctx, vk_cont staging = ctx->device->sync_staging; staging_offset = 0; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -2563,7 +2563,7 @@ static void ggml_vk_buffer_write_2d_async(vk_context * subctx, vk_buffer& dst, s // Buffer is already mapped if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) { std::cerr << "ggml_vulkan: buffer_write_async dst buffer is host_visible. Use synchronous write." << std::endl; - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } // Check if src is pinned memory vk_buffer buf = nullptr; @@ -2602,7 +2602,7 @@ static void ggml_vk_buffer_write_2d_async(vk_context * subctx, vk_buffer& dst, s staging_buffer = dst->device->sync_staging; staging_offset = 0; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -2704,7 +2704,7 @@ static void ggml_vk_buffer_read_2d_async(vk_context * subctx, vk_buffer& src, si staging_buffer = src->device->sync_staging; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -2913,7 +2913,7 @@ static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, ggml_ } std::cerr << "Missing CPY op for types: " << ggml_type_name(from) << " " << ggml_type_name(to) << std::endl; - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context * subctx, vk_pipeline pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out) { @@ -3499,7 +3499,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context * const bool qy_needs_dequant = (src1->type != GGML_TYPE_F16 && !y_f32_kernel) || y_non_contig; if (mmp == nullptr) { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } // Not implemented @@ -4078,7 +4078,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c std::cerr << " and " << ggml_type_name(src1->type); } std::cerr << " to " << ggml_type_name(dst->type) << std::endl; - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } op_func(ctx, subctx, src0, src1, dst); @@ -4521,7 +4521,7 @@ static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0 } else if (type == GGML_TYPE_F16) { val = ggml_fp16_to_fp32(*((const ggml_fp16_t *) data + i2*ne1*ne0 + idx1*ne0 + idx0)); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } fprintf(stderr, "% 7.2f ", val); } else { @@ -4555,7 +4555,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t p = ctx->device->pipeline_matmul_f16->a_s; shname = "F16_ALIGNED_S"; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } else if (shader_size == 1) { if (std::is_same() && std::is_same()) { @@ -4571,7 +4571,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t p = ctx->device->pipeline_matmul_f16->a_m; shname = "F16_ALIGNED_M"; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } else if (shader_size == 2) { if (std::is_same() && std::is_same()) { @@ -4587,7 +4587,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t p = ctx->device->pipeline_matmul_f16->a_l; shname = "F16_ALIGNED_L"; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } else { GGML_ASSERT(0); @@ -4668,7 +4668,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t } else if (std::is_same()) { x[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } for (size_t i = 0; i < y_ne; i++) { @@ -4679,7 +4679,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t // y[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f); y[i] = ggml_fp32_to_fp16((i % k == i / k) ? 1.0f : 0.0f); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -4727,14 +4727,14 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t } else if (std::is_same()) { src0_type = GGML_TYPE_F16; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } if (std::is_same()) { src1_type = GGML_TYPE_F32; } else if (std::is_same()) { src1_type = GGML_TYPE_F16; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } ggml_tensor * src0_ggml = ggml_new_tensor_3d(ggml_ctx, src0_type, k, m, batch); @@ -4841,7 +4841,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, int i0, int i1 } else if (tensor->type == GGML_TYPE_F16) { val = ggml_fp16_to_fp32(*(ggml_fp16_t *) ((char *) tensor->data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0])); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } fprintf(stderr, "% 7.2f ", val); } else { @@ -5391,7 +5391,7 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { std::cerr << std::endl; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); #endif if (ctx->prealloc_x == nullptr || (ctx->prealloc_size_x > 0 && ctx->prealloc_x->size < ctx->prealloc_size_x)) { @@ -5486,7 +5486,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod break; default: std::cerr << "ggml_vulkan: Error: Missing op: " << ggml_op_name(node->op) << std::endl; - GGML_ASSERT(false); + GGML_ABORT("fatal error"); return; } @@ -6498,7 +6498,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, const void * d } else if (tensor->type == GGML_TYPE_I32) { val = *(const int32_t *) ((const char *) data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } fprintf(stderr, "% 7.2f ", val); } else { @@ -6620,7 +6620,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor * memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS); } } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } if (vk_output_tensor > 0 && vk_output_tensor == check_counter) { @@ -6662,7 +6662,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor * memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS); } } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } if (vk_output_tensor > 0 && vk_output_tensor == check_counter) { @@ -6720,7 +6720,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor * memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS); } } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } if (vk_output_tensor > 0 && vk_output_tensor == check_counter) { @@ -6797,7 +6797,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor * break; default: std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl; - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } else if (tensor->op == GGML_OP_CPY || tensor->op == GGML_OP_DUP) { if (src1 == nullptr) { @@ -6825,7 +6825,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor * tensor_clone = ggml_sum_rows(ggml_ctx, src0_clone); } else { std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl; - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } ggml_cgraph * cgraph = ggml_new_graph(ggml_ctx); @@ -6912,7 +6912,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor * } } else { std::cerr << "Missing debug code for type " << ggml_type_name(tensor->type) << std::endl; - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } if ((std::isnan(correct) != std::isnan(result)) || (std::isinf(correct) != std::isinf(result)) || !buffer_size_fit) { @@ -6935,7 +6935,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor * std::cerr << std::endl; std::vector done; ggml_vk_print_graph_origin(tensor, done); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } if (first_error[0] == -1 && std::fabs(correct - result) > 0.1f) { first_error[0] = i0; @@ -7006,7 +7006,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor * std::cerr << std::endl; std::vector done; ggml_vk_print_graph_origin(tensor, done); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } else { std::cerr << check_counter << " " << tensor->name << " op=" << ggml_op_name(tensor->op) << " avg_err=" << avg_err << std::endl; } diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 3fd9c4fe2..8c346a502 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -269,7 +269,7 @@ inline static void * ggml_aligned_malloc(size_t size) { break; } GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); return NULL; } return aligned_memory; @@ -290,7 +290,7 @@ inline static void * ggml_malloc(size_t size) { void * result = malloc(size); if (result == NULL) { GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } return result; } @@ -304,7 +304,7 @@ inline static void * ggml_calloc(size_t num, size_t size) { void * result = calloc(num, size); if (result == NULL) { GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0)); - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } return result; } @@ -3902,7 +3902,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) { } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -3961,7 +3961,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) { } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -4031,7 +4031,7 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) { } default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -4076,7 +4076,7 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) { } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -4097,7 +4097,7 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i case GGML_TYPE_F32: return ((float *) data)[0]; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -4130,7 +4130,7 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -4168,7 +4168,7 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) { } default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -4207,7 +4207,7 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) { } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -4228,7 +4228,7 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, case GGML_TYPE_F32: return ((float *) data)[0]; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -4261,7 +4261,7 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -4860,7 +4860,7 @@ struct ggml_tensor * ggml_mean( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement is_node = true; } @@ -4883,7 +4883,7 @@ struct ggml_tensor * ggml_argmax( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); is_node = true; } @@ -5206,7 +5206,7 @@ static struct ggml_tensor * ggml_norm_impl( bool is_node = false; if (!inplace && (a->grad)) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -5309,7 +5309,7 @@ static struct ggml_tensor * ggml_group_norm_impl( bool is_node = false; if (!inplace && (a->grad)) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -5723,7 +5723,7 @@ struct ggml_tensor * ggml_reshape( if (b->grad) { // gradient propagation is not supported - //GGML_ASSERT(false); + //GGML_ABORT("fatal error"); } struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0); @@ -6506,7 +6506,7 @@ struct ggml_tensor * ggml_clamp( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -6582,7 +6582,7 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d( bool is_node = false; if (a->grad || b->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -6654,7 +6654,7 @@ struct ggml_tensor * ggml_im2col( bool is_node = false; if (a->grad || b->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -6740,7 +6740,7 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0( bool is_node = false; if (a->grad || b->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -6781,7 +6781,7 @@ struct ggml_tensor * ggml_pool_1d( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -6819,7 +6819,7 @@ struct ggml_tensor * ggml_pool_2d( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -6852,7 +6852,7 @@ static struct ggml_tensor * ggml_upscale_impl( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -6902,7 +6902,7 @@ struct ggml_tensor * ggml_pad( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -6951,7 +6951,7 @@ struct ggml_tensor * ggml_timestep_embedding( bool is_node = false; if (timesteps->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -7077,7 +7077,7 @@ struct ggml_tensor * ggml_flash_attn_back( struct ggml_tensor * v, struct ggml_tensor * d, bool masked) { - GGML_ASSERT(false && "TODO: adapt to ggml_flash_attn_ext() changes"); + GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes"); GGML_ASSERT(ggml_can_mul_mat(k, q)); // TODO: check if vT can be multiplied by (k*qT) @@ -7176,7 +7176,7 @@ struct ggml_tensor * ggml_ssm_conv( bool is_node = false; if (s->grad || x->grad || c->grad || sq->grad) { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement is_node = true; } @@ -7230,7 +7230,7 @@ struct ggml_tensor * ggml_ssm_scan( bool is_node = false; if (s->grad || x->grad || dt->grad || A->grad || B->grad || C->grad || sq->grad) { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement is_node = true; } @@ -7262,7 +7262,7 @@ struct ggml_tensor * ggml_win_part( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -7300,7 +7300,7 @@ struct ggml_tensor * ggml_win_unpart( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -7330,7 +7330,7 @@ struct ggml_tensor * ggml_get_rel_pos( bool is_node = false; if (a->grad) { - GGML_ASSERT(false); // TODO: implement backward + GGML_ABORT("fatal error"); // TODO: implement backward is_node = true; } @@ -8020,7 +8020,7 @@ static void ggml_compute_forward_dup_f16( } } } else { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } } else { //printf("%s: this is not optimal - fix me\n", __func__); @@ -8062,7 +8062,7 @@ static void ggml_compute_forward_dup_f16( } } } else { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } } return; @@ -8179,7 +8179,7 @@ static void ggml_compute_forward_dup_f16( } } } else { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } } @@ -8306,7 +8306,7 @@ static void ggml_compute_forward_dup_bf16( } } } else { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } } else { //printf("%s: this is not optimal - fix me\n", __func__); @@ -8366,7 +8366,7 @@ static void ggml_compute_forward_dup_bf16( } } } else { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } } return; @@ -8535,7 +8535,7 @@ static void ggml_compute_forward_dup_bf16( } } } else { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } } @@ -8621,7 +8621,7 @@ static void ggml_compute_forward_dup_f32( } } } else { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } } else { //printf("%s: this is not optimal - fix me\n", __func__); @@ -8681,7 +8681,7 @@ static void ggml_compute_forward_dup_f32( } } } else { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } } @@ -8852,7 +8852,7 @@ static void ggml_compute_forward_dup_f32( } } } else { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } } @@ -9030,7 +9030,7 @@ static void ggml_compute_forward_dup( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -9183,7 +9183,7 @@ static void ggml_compute_forward_add_f16_f32( } else { // src1 is not contiguous - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -9258,7 +9258,7 @@ static void ggml_compute_forward_add_bf16_f32( } else { // src1 is not contiguous - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -9310,7 +9310,7 @@ static void ggml_compute_forward_add_f16_f16( } else { // src1 is not contiguous - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -9362,7 +9362,7 @@ static void ggml_compute_forward_add_bf16_bf16( } else { // src1 is not contiguous - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -9456,7 +9456,7 @@ static void ggml_compute_forward_add( ggml_compute_forward_add_f32(params, dst); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } break; case GGML_TYPE_F16: @@ -9468,7 +9468,7 @@ static void ggml_compute_forward_add( ggml_compute_forward_add_f16_f32(params, dst); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } break; case GGML_TYPE_BF16: @@ -9480,7 +9480,7 @@ static void ggml_compute_forward_add( ggml_compute_forward_add_bf16_f32(params, dst); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } break; case GGML_TYPE_Q4_0: @@ -9510,7 +9510,7 @@ static void ggml_compute_forward_add( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -9845,7 +9845,7 @@ static void ggml_compute_forward_add1( ggml_compute_forward_add1_f16_f32(params, dst); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } break; case GGML_TYPE_BF16: @@ -9857,7 +9857,7 @@ static void ggml_compute_forward_add1( ggml_compute_forward_add1_bf16_f32(params, dst); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } break; case GGML_TYPE_Q4_0: @@ -9888,7 +9888,7 @@ static void ggml_compute_forward_add1( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10013,7 +10013,7 @@ static void ggml_compute_forward_acc( case GGML_TYPE_Q4_0_8_8: default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10094,7 +10094,7 @@ static void ggml_compute_forward_sub( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10188,7 +10188,7 @@ static void ggml_compute_forward_mul( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10279,7 +10279,7 @@ static void ggml_compute_forward_div( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10324,7 +10324,7 @@ static void ggml_compute_forward_sqr( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10369,7 +10369,7 @@ static void ggml_compute_forward_sqrt( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10414,7 +10414,7 @@ static void ggml_compute_forward_log( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10543,7 +10543,7 @@ static void ggml_compute_forward_sum( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10596,7 +10596,7 @@ static void ggml_compute_forward_sum_rows( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10653,7 +10653,7 @@ static void ggml_compute_forward_mean( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10701,7 +10701,7 @@ static void ggml_compute_forward_argmax( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10819,7 +10819,7 @@ static void ggml_compute_forward_repeat( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10897,7 +10897,7 @@ static void ggml_compute_forward_repeat_back( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -10966,7 +10966,7 @@ static void ggml_compute_forward_concat( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11010,7 +11010,7 @@ static void ggml_compute_forward_abs( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11054,7 +11054,7 @@ static void ggml_compute_forward_sgn( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11098,7 +11098,7 @@ static void ggml_compute_forward_neg( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11142,7 +11142,7 @@ static void ggml_compute_forward_step( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11186,7 +11186,7 @@ static void ggml_compute_forward_tanh( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11230,7 +11230,7 @@ static void ggml_compute_forward_elu( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11274,7 +11274,7 @@ static void ggml_compute_forward_relu( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11318,7 +11318,7 @@ static void ggml_compute_forward_sigmoid( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11377,7 +11377,7 @@ static void ggml_compute_forward_gelu( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11436,7 +11436,7 @@ static void ggml_compute_forward_gelu_quick( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11495,7 +11495,7 @@ static void ggml_compute_forward_silu( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11544,7 +11544,7 @@ static void ggml_compute_forward_leaky_relu( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11607,7 +11607,7 @@ static void ggml_compute_forward_silu_back( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11649,7 +11649,7 @@ static void ggml_compute_forward_hardswish( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11691,7 +11691,7 @@ static void ggml_compute_forward_hardsigmoid( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11763,7 +11763,7 @@ static void ggml_compute_forward_norm( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -11831,7 +11831,7 @@ static void ggml_compute_forward_rms_norm( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -12004,7 +12004,7 @@ static void ggml_compute_forward_rms_norm_back( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -12098,7 +12098,7 @@ static void ggml_compute_forward_group_norm( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -12857,7 +12857,7 @@ static void ggml_compute_forward_out_prod( } break; case GGML_TYPE_F16: { - GGML_ASSERT(false); // todo + GGML_ABORT("fatal error"); // todo // ggml_compute_forward_out_prod_f16_f32(params, dst); } case GGML_TYPE_F32: @@ -12866,7 +12866,7 @@ static void ggml_compute_forward_out_prod( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -12926,7 +12926,7 @@ static void ggml_compute_forward_scale( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -13042,7 +13042,7 @@ static void ggml_compute_forward_set( case GGML_TYPE_Q4_0_8_8: default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -13320,7 +13320,7 @@ static void ggml_compute_forward_get_rows( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -13428,7 +13428,7 @@ static void ggml_compute_forward_get_rows_back( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -13506,7 +13506,7 @@ static void ggml_compute_forward_diag( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -13576,7 +13576,7 @@ static void ggml_compute_forward_diag_mask_inf( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -13594,7 +13594,7 @@ static void ggml_compute_forward_diag_mask_zero( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -13712,7 +13712,7 @@ static void ggml_compute_forward_soft_max( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -13808,7 +13808,7 @@ static void ggml_compute_forward_soft_max_back( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -13899,7 +13899,7 @@ static void ggml_compute_forward_clamp( case GGML_TYPE_F64: case GGML_TYPE_COUNT: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -14229,7 +14229,7 @@ static void ggml_compute_forward_rope( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -14253,7 +14253,7 @@ static void ggml_compute_forward_rope_back( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -14453,7 +14453,7 @@ static void ggml_compute_forward_conv_transpose_1d( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -14625,7 +14625,7 @@ static void ggml_compute_forward_im2col( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -14758,20 +14758,20 @@ static void ggml_compute_forward_pool_1d_sk_p0( switch (op) { case GGML_OP_POOL_AVG: drow[i] = 0; break; case GGML_OP_POOL_MAX: drow[i] = -FLT_MAX; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } for (int ki = 0; ki < k; ++ki) { switch (op) { case GGML_OP_POOL_AVG: drow[i] += srow[j]; break; case GGML_OP_POOL_MAX: if (srow[j] > drow[i]) drow[i] = srow[j]; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } ++j; } switch (op) { case GGML_OP_POOL_AVG: drow[i] /= k; break; case GGML_OP_POOL_MAX: break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } } @@ -14840,7 +14840,7 @@ static void ggml_compute_forward_pool_2d( switch (op) { case GGML_OP_POOL_AVG: *out = 0; break; case GGML_OP_POOL_MAX: *out = -FLT_MAX; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } const int ix = offset0 + ox * s0; @@ -14855,14 +14855,14 @@ static void ggml_compute_forward_pool_2d( switch (op) { case GGML_OP_POOL_AVG: *out += srow[j]; break; case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } } } switch (op) { case GGML_OP_POOL_AVG: *out /= ka; break; case GGML_OP_POOL_MAX: break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } } } @@ -14926,7 +14926,7 @@ static void ggml_compute_forward_upscale( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -14984,7 +14984,7 @@ static void ggml_compute_forward_pad( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -15025,7 +15025,7 @@ static void ggml_compute_forward_arange( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -15076,7 +15076,7 @@ static void ggml_compute_forward_timestep_embedding( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -15135,7 +15135,7 @@ static void ggml_compute_forward_argsort( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -15358,7 +15358,7 @@ static void ggml_compute_forward_flash_attn_ext( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -15694,7 +15694,7 @@ static void ggml_compute_forward_flash_attn_back( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -15816,7 +15816,7 @@ static void ggml_compute_forward_ssm_conv( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -15937,7 +15937,7 @@ static void ggml_compute_forward_ssm_scan( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16000,7 +16000,7 @@ static void ggml_compute_forward_win_part( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16061,7 +16061,7 @@ static void ggml_compute_forward_win_unpart( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16129,7 +16129,7 @@ static void ggml_compute_forward_unary( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16176,7 +16176,7 @@ static void ggml_compute_forward_get_rel_pos( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16257,7 +16257,7 @@ static void ggml_compute_forward_add_rel_pos( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16303,7 +16303,7 @@ static void ggml_compute_forward_map_unary( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16352,7 +16352,7 @@ static void ggml_compute_forward_map_binary( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16551,7 +16551,7 @@ static void ggml_compute_forward_cross_entropy_loss( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16638,7 +16638,7 @@ static void ggml_compute_forward_cross_entropy_loss_back( } break; default: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -16974,7 +16974,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm } break; case GGML_OP_COUNT: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -17364,7 +17364,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor case GGML_OP_MEAN: case GGML_OP_ARGMAX: { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } case GGML_OP_REPEAT: { @@ -17388,15 +17388,15 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_CONCAT: { - GGML_ASSERT(false); // TODO: implement + GGML_ABORT("fatal error"); // TODO: implement } case GGML_OP_SILU_BACK: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_NORM: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_RMS_NORM: { @@ -17413,11 +17413,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_RMS_NORM_BACK: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_GROUP_NORM: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_MUL_MAT: { @@ -17479,11 +17479,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_MUL_MAT_ID: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_OUT_PROD: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_SCALE: { @@ -17660,11 +17660,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_GET_ROWS_BACK: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_DIAG: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_DIAG_MASK_INF: { @@ -17703,7 +17703,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_SOFT_MAX_BACK: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_ROPE: { @@ -17779,51 +17779,51 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_CLAMP: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_CONV_TRANSPOSE_1D: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_IM2COL: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_CONV_TRANSPOSE_2D: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_POOL_1D: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_POOL_2D: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_UPSCALE: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_PAD: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_ARANGE: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_TIMESTEP_EMBEDDING: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_ARGSORT: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_LEAKY_RELU: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_FLASH_ATTN_EXT: { @@ -17880,12 +17880,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_FLASH_ATTN_BACK: { - GGML_ASSERT(false); // not supported + GGML_ABORT("fatal error"); // not supported } case GGML_OP_SSM_CONV: case GGML_OP_SSM_SCAN: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_OP_WIN_PART: case GGML_OP_WIN_UNPART: @@ -17924,11 +17924,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_UNARY_OP_TANH: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_UNARY_OP_ELU: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_UNARY_OP_RELU: { @@ -17943,15 +17943,15 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_UNARY_OP_SIGMOID: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_UNARY_OP_GELU: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_UNARY_OP_GELU_QUICK: { - GGML_ASSERT(false); // TODO: not implemented + GGML_ABORT("fatal error"); // TODO: not implemented } case GGML_UNARY_OP_SILU: { @@ -17964,7 +17964,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } } break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } break; case GGML_OP_GET_REL_POS: @@ -17978,7 +17978,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor case GGML_OP_MAP_CUSTOM2: case GGML_OP_MAP_CUSTOM3: { - GGML_ASSERT(false); // not supported + GGML_ABORT("fatal error"); // not supported } case GGML_OP_CROSS_ENTROPY_LOSS: { @@ -17994,7 +17994,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_CROSS_ENTROPY_LOSS_BACK: { - GGML_ASSERT(false); // not supported + GGML_ABORT("fatal error"); // not supported } case GGML_OP_NONE: { @@ -18002,7 +18002,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_COUNT: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -18460,7 +18460,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { n_tasks = n_threads; } break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } break; case GGML_OP_SILU_BACK: @@ -18587,7 +18587,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { } break; case GGML_OP_COUNT: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } default: { @@ -18597,7 +18597,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { } else { fprintf(stderr, "%d\n", node->op); } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -18708,7 +18708,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa cur += sizeof(float)*ne00*ne01*ne02; cur += sizeof(float)*ne10*ne11; } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } break; case GGML_OP_CONV_TRANSPOSE_2D: @@ -18754,7 +18754,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa } break; case GGML_OP_COUNT: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } default: break; @@ -19989,7 +19989,7 @@ static enum ggml_opt_result linesearch_backtracking( (*step) *= width; } - GGML_ASSERT(false && "line search failed"); + GGML_ABORT("line search failed"); //return GGML_LINESEARCH_FAIL; } @@ -20259,7 +20259,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( step[0] = 1.0; } - GGML_ASSERT(false && "lbfgs failed"); + GGML_ABORT("lbfgs failed"); //return GGML_OPT_RESULT_DID_NOT_CONVERGE; } @@ -20956,10 +20956,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p } } break; case GGUF_TYPE_ARRAY: - default: GGML_ASSERT(false && "invalid type"); + default: GGML_ABORT("invalid type"); } } break; - default: GGML_ASSERT(false && "invalid type"); + default: GGML_ABORT("invalid type"); } if (!ok) { @@ -21540,12 +21540,12 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); GGML_FREE((void *)data); } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) { - GGML_ASSERT(false && "nested arrays not supported"); + GGML_ABORT("nested arrays not supported"); } else { gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n); } } break; - default: GGML_ASSERT(false && "invalid type"); + default: GGML_ABORT("invalid type"); } } } @@ -21554,7 +21554,7 @@ void gguf_add_tensor( struct gguf_context * ctx, const struct ggml_tensor * tensor) { if (gguf_find_tensor(ctx, tensor->name) != -1) { - GGML_ASSERT(false && "duplicated tensor name"); + GGML_ABORT("duplicated tensor name"); } const int idx = ctx->header.n_tensors; @@ -21587,7 +21587,7 @@ void gguf_add_tensor( void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) { const int idx = gguf_find_tensor(ctx, name); if (idx < 0) { - GGML_ASSERT(false && "tensor not found"); + GGML_ABORT("tensor not found"); } ctx->infos[idx].type = type; @@ -21596,7 +21596,7 @@ void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggm void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) { const int idx = gguf_find_tensor(ctx, name); if (idx < 0) { - GGML_ASSERT(false && "tensor not found"); + GGML_ABORT("tensor not found"); } ctx->infos[idx].data = data; @@ -21725,10 +21725,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * } } break; case GGUF_TYPE_ARRAY: - default: GGML_ASSERT(false && "invalid type"); + default: GGML_ABORT("invalid type"); } } break; - default: GGML_ASSERT(false && "invalid type"); + default: GGML_ABORT("invalid type"); } } @@ -21789,7 +21789,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) { FILE * file = ggml_fopen(fname, "wb"); if (!file) { - GGML_ASSERT(false && "failed to open file for writing"); + GGML_ABORT("failed to open file for writing"); } struct gguf_buf buf = gguf_buf_init(16*1024); diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp index bd9322e2f..b123d7331 100644 --- a/src/llama-grammar.cpp +++ b/src/llama-grammar.cpp @@ -221,7 +221,7 @@ static void llama_grammar_advance_stack( // end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range // (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on // those - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -517,7 +517,7 @@ void llama_grammar_accept_token_impl(struct llama_grammar * grammar, const struc return; } } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } const std::string & piece = vocab->cache_token_to_piece.at(token); diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 66f517036..133094904 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -152,14 +152,14 @@ static uint8_t llama_token_to_byte(const llama_vocab & vocab, llama_token id) { return strtol(buf.c_str(), NULL, 16); } case LLAMA_VOCAB_TYPE_BPE: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); //return unicode_utf8_to_byte(token_data.text); // TODO: why is this here after GGML_ASSERT? } case LLAMA_VOCAB_TYPE_WPM: { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -1396,7 +1396,7 @@ std::vector llama_tokenize_internal(const llama_vocab & vocab, } } break; case LLAMA_VOCAB_TYPE_NONE: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } return output; @@ -1422,7 +1422,7 @@ llama_token llama_byte_to_token_impl(const llama_vocab & vocab, uint8_t ch) { return vocab.token_to_id.at(unicode_byte_to_utf8(ch)); } default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -1606,7 +1606,7 @@ int32_t llama_token_to_piece_impl(const struct llama_vocab & vocab, llama_token break; } default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } diff --git a/src/llama.cpp b/src/llama.cpp index 67a61e805..bc830c0ef 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2259,7 +2259,7 @@ struct llama_hparams { return n_head_arr[il]; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } uint32_t n_head_kv(uint32_t il = 0) const { @@ -2267,7 +2267,7 @@ struct llama_hparams { return n_head_kv_arr[il]; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } uint32_t n_ff(uint32_t il = 0) const { @@ -2275,7 +2275,7 @@ struct llama_hparams { return n_ff_arr[il]; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } uint32_t n_gqa(uint32_t il = 0) const { @@ -8069,7 +8069,7 @@ static struct ggml_tensor * llm_build_moe_ffn( cb(gate, "ffn_moe_gelu", il); } break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } ggml_tensor * par = ggml_mul(ctx, up, gate); // [n_ff, n_expert_used, n_tokens] @@ -8632,7 +8632,7 @@ struct llm_build_context { } break; default: { - GGML_ASSERT(false && "unknown pooling type"); + GGML_ABORT("unknown pooling type"); } } @@ -8888,7 +8888,7 @@ struct llm_build_context { Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd/n_head, n_head, n_tokens); break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } cb(Qcur, "Qcur", il); cb(Kcur, "Kcur", il); @@ -11720,7 +11720,7 @@ struct llm_build_context { switch (model.type) { case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break; case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break; - default: GGML_ASSERT(false); + default: GGML_ABORT("fatal error"); }; cb(Qcur, "Qcur_scaled", il); @@ -13885,7 +13885,7 @@ static struct ggml_cgraph * llama_build_graph( result = llm.build_jais(); } break; default: - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } // add on pooling layer @@ -14684,7 +14684,7 @@ static int llama_decode_internal( } break; case LLAMA_POOLING_TYPE_UNSPECIFIED: { - GGML_ASSERT(false && "unknown pooling type"); + GGML_ABORT("unknown pooling type"); } } } @@ -15076,7 +15076,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) { // apply K-shift if needed if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) { if (lctx.model.arch == LLM_ARCH_DEEPSEEK2) { // not supported due to MLA - GGML_ASSERT(false && "Deepseek2 does not support K-shift"); + GGML_ABORT("Deepseek2 does not support K-shift"); } { @@ -15215,7 +15215,7 @@ static void llama_tensor_dequantize_internal( } else if (ggml_is_quantized(tensor->type)) { qtype.to_float(tensor->data, f32_output, nelements); } else { - GGML_ASSERT(false); // unreachable + GGML_ABORT("fatal error"); // unreachable } return; } @@ -16901,7 +16901,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) { // all model arches should be listed explicitly here case LLM_ARCH_UNKNOWN: - GGML_ASSERT(false && "unknown architecture"); + GGML_ABORT("unknown architecture"); } return LLAMA_ROPE_TYPE_NONE; @@ -18465,7 +18465,7 @@ float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) { } catch (const std::exception & err) { LLAMA_LOG_ERROR("%s: invalid logits id %d, reason: %s\n", __func__, i, err.what()); #ifndef NDEBUG - GGML_ASSERT(false); + GGML_ABORT("fatal error"); #endif return nullptr; } @@ -18510,7 +18510,7 @@ float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i) { } catch (const std::exception & err) { LLAMA_LOG_ERROR("%s: invalid embeddings id %d, reason: %s\n", __func__, i, err.what()); #ifndef NDEBUG - GGML_ASSERT(false); + GGML_ABORT("fatal error"); #endif return nullptr; } diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 2c03c60d4..2fa59fd0a 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -94,7 +94,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m // This is going to create some weird integers though. ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor)); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } @@ -132,7 +132,7 @@ static std::vector tensor_to_float(const ggml_tensor * t) { tt.to_float(&buf[i], vq.data(), bs); tv.insert(tv.end(), vq.begin(), vq.end()); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -1435,7 +1435,7 @@ struct test_argsort : public test_case { ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float)); } } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } } } @@ -2462,7 +2462,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op return true; } - GGML_ASSERT(false); + GGML_ABORT("fatal error"); return false; } diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index 6374958fe..de858bd3b 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -166,12 +166,12 @@ static void test_sampler_queue( for (auto s : samplers_sequence) { switch (s){ case 'k': llama_sample_top_k (nullptr, &candidates_p, top_k, 1); break; - case 'f': GGML_ASSERT(false && "tail_free test not implemented"); break; - case 'y': GGML_ASSERT(false && "typical test not implemented"); break; + case 'f': GGML_ABORT("tail_free test not implemented"); break; + case 'y': GGML_ABORT("typical test not implemented"); break; case 'p': llama_sample_top_p (nullptr, &candidates_p, top_p, 1); break; case 'm': llama_sample_min_p (nullptr, &candidates_p, min_p, 1); break; - case 't': GGML_ASSERT(false && "temperature test not implemented"); break; - default : GGML_ASSERT(false && "Unknown sampler"); break; + case 't': GGML_ABORT("temperature test not implemented"); break; + default : GGML_ABORT("Unknown sampler"); break; } llama_sample_softmax(nullptr, &candidates_p); // make sure tokens are sorted for tests @@ -222,7 +222,7 @@ static void test_sampler_queue( GGML_ASSERT(candidates_p.data[0].id == max_token_id); GGML_ASSERT(candidates_p.data[expected_size-1].id == min_token_id); } else { - GGML_ASSERT(false); + GGML_ABORT("fatal error"); } }