GGML_ASSERT(false) -> GGML_ABORT("fatal error")
This commit is contained in:
parent
dd84a88a58
commit
3ed1bc09f5
45 changed files with 358 additions and 358 deletions
|
@ -62,7 +62,7 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne
|
|||
} else if (type == GGML_TYPE_I8) {
|
||||
v = (float) *(int8_t *) &data[i];
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
printf("%12.4f", v);
|
||||
sum += v;
|
||||
|
|
|
@ -127,7 +127,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|||
}
|
||||
else if (e.values.size() != (size_t)src1->ne[0]*n_as) {
|
||||
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
|
||||
exit(1); //GGML_ASSERT(false);
|
||||
exit(1); //GGML_ABORT("fatal error");
|
||||
}
|
||||
if (m_params.verbosity > 1) {
|
||||
printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type);
|
||||
|
@ -176,7 +176,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|||
}
|
||||
else if (e.values.size() != (size_t)src1->ne[0]) {
|
||||
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]);
|
||||
exit(1); //GGML_ASSERT(false);
|
||||
exit(1); //GGML_ABORT("fatal error");
|
||||
}
|
||||
++e.ncall;
|
||||
if (m_params.verbosity > 1) {
|
||||
|
|
|
@ -150,7 +150,7 @@ static const char * output_format_str(output_formats format) {
|
|||
case JSON: return "json";
|
||||
case MARKDOWN: return "md";
|
||||
case SQL: return "sql";
|
||||
default: GGML_ASSERT(!"invalid output format");
|
||||
default: GGML_ABORT("invalid output format");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,7 +176,7 @@ static const char * split_mode_str(llama_split_mode mode) {
|
|||
case LLAMA_SPLIT_MODE_NONE: return "none";
|
||||
case LLAMA_SPLIT_MODE_LAYER: return "layer";
|
||||
case LLAMA_SPLIT_MODE_ROW: return "row";
|
||||
default: GGML_ASSERT(!"invalid split mode");
|
||||
default: GGML_ABORT("invalid split mode");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1326,7 +1326,7 @@ static std::unique_ptr<printer> create_printer(output_formats format) {
|
|||
case SQL:
|
||||
return std::unique_ptr<printer>(new sql_printer());
|
||||
}
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
|
|
|
@ -869,7 +869,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|||
embeddings = peg_0;
|
||||
}
|
||||
else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -163,7 +163,7 @@ static void write_utf8_cstr_to_stdout(const char * str, bool & invalid_utf8) {
|
|||
printf(">");
|
||||
return;
|
||||
}
|
||||
GGML_ASSERT(false && "MultiByteToWideChar() failed in an unexpected way.");
|
||||
GGML_ABORT("MultiByteToWideChar() failed in an unexpected way.");
|
||||
}
|
||||
|
||||
LPWSTR wstr = (LPWSTR) calloc(length_needed+1, sizeof(*wstr));
|
||||
|
|
|
@ -272,7 +272,8 @@
|
|||
#define GGML_NORETURN _Noreturn
|
||||
#endif
|
||||
|
||||
#define GGML_ASSERT(x) if (!(x)) ggml_abort(__FILE__, __LINE__, #x)
|
||||
#define GGML_ABORT(x) ggml_abort(__FILE__, __LINE__, x)
|
||||
#define GGML_ASSERT(x) if (!(x)) GGML_ABORT(#x)
|
||||
|
||||
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
||||
// main purpose is to reduce code duplication and improve readability.
|
||||
|
@ -322,7 +323,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
GGML_API GGML_NORETURN void ggml_abort(const char * file, int line, const char * expr);
|
||||
GGML_NORETURN GGML_API void ggml_abort(const char * file, int line, const char * expr);
|
||||
|
||||
enum ggml_status {
|
||||
GGML_STATUS_ALLOC_FAILED = -2,
|
||||
|
|
|
@ -91,7 +91,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
|
|||
if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) {
|
||||
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",
|
||||
__func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset);
|
||||
GGML_ASSERT(!"not enough space in the buffer");
|
||||
GGML_ABORT("not enough space in the buffer");
|
||||
}
|
||||
|
||||
void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset;
|
||||
|
@ -132,7 +132,7 @@ static void add_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset,
|
|||
return;
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(!"out of allocated_tensors");
|
||||
GGML_ABORT("out of allocated_tensors");
|
||||
}
|
||||
static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) {
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
|
@ -142,7 +142,7 @@ static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offs
|
|||
}
|
||||
}
|
||||
fprintf(stderr, "tried to free tensor %s not found\n", tensor->name);
|
||||
GGML_ASSERT(!"tensor not found");
|
||||
GGML_ABORT("tensor not found");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -175,8 +175,7 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
|
|||
// this should never happen
|
||||
fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
|
||||
__func__, size, max_avail);
|
||||
GGML_ASSERT(!"not enough space in the buffer");
|
||||
GGML_UNREACHABLE();
|
||||
GGML_ABORT("not enough space in the buffer");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1280,7 +1280,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|||
sched->ctx = ggml_init(params);
|
||||
if (sched->ctx == NULL) {
|
||||
fprintf(stderr, "%s: failed to initialize context\n", __func__);
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
// pass 1: assign backends to ops with pre-allocated inputs
|
||||
|
|
|
@ -276,7 +276,7 @@ GGML_CALL static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t
|
|||
|
||||
default:
|
||||
fprintf(stderr, "%s: unsupported op %s\n", __func__, ggml_op_desc(node));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -120,7 +120,7 @@ static void ggml_cann_log(enum ggml_log_level level, const char* format, ...) {
|
|||
file, line);
|
||||
GGML_CANN_LOG_ERROR(" %s\n", stmt);
|
||||
// abort with GGML_ASSERT to get a stack trace
|
||||
GGML_ASSERT(!"CANN error");
|
||||
GGML_ABORT("CANN error");
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -342,7 +342,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|||
// memory should always buffered. these memory may still needed by
|
||||
// tasks in stream.
|
||||
// TODO, fix me.
|
||||
GGML_ASSERT(!"Cann buffer pool full, increase MAX_CANN_BUFFERS\n");
|
||||
GGML_ABORT("Cann buffer pool full, increase MAX_CANN_BUFFERS\n");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1874,7 +1874,7 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
|
|||
ACL_CHECK(aclrtStreamWaitEvent(cann_ctx->stream(),
|
||||
(aclrtEvent)event->context));
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -844,7 +844,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
ggml_cann_max_pool2d(ctx, dst);
|
||||
break;
|
||||
case GGML_OP_POOL_COUNT:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -931,9 +931,9 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
((ggml_tensor*)dst->extra)->nb);
|
||||
return;
|
||||
}
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
if (dst->type == GGML_TYPE_F32) {
|
||||
if (ggml_are_same_shape(src, dst)) {
|
||||
|
@ -955,12 +955,12 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
((ggml_tensor*)dst->extra)->nb);
|
||||
return;
|
||||
}
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
// TODO
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
} else if (src->type == GGML_TYPE_F32) {
|
||||
// TODO: if (src0->type == dst->type && ne00 == ne0 && nb00 == type_size
|
||||
// && nb0 == type_size)
|
||||
|
@ -991,10 +991,10 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
((ggml_tensor*)dst->extra)->nb);
|
||||
return;
|
||||
}
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
} else {
|
||||
// TODO: dst not contiguous
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
if (dst->type == GGML_TYPE_F16) {
|
||||
|
@ -1017,11 +1017,11 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
((ggml_tensor*)dst->extra)->nb);
|
||||
return;
|
||||
}
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
// TODO
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
} else {
|
||||
if (ggml_are_same_shape(src, dst)) {
|
||||
cann_copy(ctx, acl_src, acl_dst);
|
||||
|
@ -1029,7 +1029,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
return;
|
||||
}
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2219,7 +2219,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
((ggml_tensor*)dst->extra)->nb);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -2492,7 +2492,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
ggml_cann_mul_mat_q8_0(ctx, dst);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -98,7 +98,7 @@ void ggml_cuda_error(const char * stmt, const char * func, const char * file, in
|
|||
GGML_CUDA_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
|
||||
GGML_CUDA_LOG_ERROR(" %s\n", stmt);
|
||||
// abort with GGML_ASSERT to get a stack trace
|
||||
GGML_ASSERT(!"CUDA error");
|
||||
GGML_ABORT("CUDA error");
|
||||
}
|
||||
|
||||
// this is faster on Windows
|
||||
|
@ -1596,7 +1596,7 @@ static void ggml_cuda_op_mul_mat(
|
|||
CUDA_CHECK(ggml_cuda_cpy_tensor_2d(
|
||||
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
if (quantize_src1 && !src1_is_contiguous) {
|
||||
|
@ -2945,7 +2945,7 @@ static void ggml_backend_cuda_event_wait(ggml_backend_t backend, ggml_backend_ev
|
|||
|
||||
CUDA_CHECK(cudaLaunchHostFunc(cuda_ctx->stream(), wait_fn, event));
|
||||
#endif
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
|
|||
} else if (order == GGML_SORT_ORDER_DESC) {
|
||||
k_argsort_f32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -259,7 +259,7 @@ static void ggml_cuda_op_bin_bcast(
|
|||
} else {
|
||||
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
|
||||
ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -348,7 +348,7 @@ static __device__ void no_device_code(
|
|||
#ifdef __CUDA_ARCH__
|
||||
#define NO_DEVICE_CODE no_device_code(__FILE__, __LINE__, __FUNCTION__, __CUDA_ARCH__, STRINGIZE(__CUDA_ARCH_LIST__))
|
||||
#else
|
||||
#define NO_DEVICE_CODE //GGML_ASSERT(false && "NO_DEVICE_CODE not valid in host code.")
|
||||
#define NO_DEVICE_CODE //GGML_ABORT("NO_DEVICE_CODE not valid in host code.")
|
||||
#endif // __CUDA_ARCH__
|
||||
|
||||
static __device__ __forceinline__ float warp_reduce_sum(float x) {
|
||||
|
|
|
@ -451,7 +451,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
|
|||
} else {
|
||||
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -484,6 +484,6 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
|
|||
} else {
|
||||
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -662,7 +662,7 @@ void ggml_cuda_op_dequantize_mul_mat_vec(
|
|||
convert_mul_mat_vec_f16_cuda(src0_dd_i, src1_dfloat, dst_dd_i, ne00, row_diff, stream);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -564,7 +564,7 @@ static void on_no_fattn_vec_case(const int D) {
|
|||
fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
|
||||
fprintf(stderr, "By default only f16 KV cache is supported.\n");
|
||||
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
} else if (D == 128) {
|
||||
fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
|
||||
fprintf(stderr, "Supported combinations:\n");
|
||||
|
@ -572,11 +572,11 @@ static void on_no_fattn_vec_case(const int D) {
|
|||
fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n");
|
||||
fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n");
|
||||
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
} else {
|
||||
fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
|
||||
fprintf(stderr, "Only f16 is supported.\n");
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -287,7 +287,7 @@ void launch_fattn_tile_f16_64_128(ggml_backend_cuda_context & ctx, ggml_tensor *
|
|||
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
|
||||
} break;
|
||||
default: {
|
||||
GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128.");
|
||||
GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -284,7 +284,7 @@ void launch_fattn_tile_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor *
|
|||
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
|
||||
} break;
|
||||
default: {
|
||||
GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128.");
|
||||
GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|||
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -63,7 +63,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|||
// ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst);
|
||||
// break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|||
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
return;
|
||||
|
@ -114,7 +114,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|||
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
return;
|
||||
|
@ -141,7 +141,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|||
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -172,7 +172,7 @@ void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|||
default:
|
||||
// TODO: k-quants
|
||||
fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ void ggml_cuda_op_mul_mat_q(
|
|||
mul_mat_q_case<GGML_TYPE_IQ4_NL>(ctx, args, stream);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ static mmq_q8_1_ds_layout mmq_get_q8_1_ds_layout(const ggml_type type_x) {
|
|||
case GGML_TYPE_IQ4_NL:
|
||||
return MMQ_Q8_1_DS_LAYOUT_D4;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -2898,7 +2898,7 @@ void mul_mat_q_case(ggml_backend_cuda_context & ctx, const mmq_args & args, cuda
|
|||
break;
|
||||
default:
|
||||
fprintf(stderr, "mmq_x_best=%d\n", mmq_x_best);
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -162,7 +162,7 @@ static void mul_mat_vec_q_cuda(
|
|||
rows_per_cuda_block = 2;
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -196,7 +196,7 @@ static void mul_mat_vec_q_cuda(
|
|||
mul_mat_vec_q<type, 8><<<block_nums, block_dims, 0, stream>>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -413,7 +413,7 @@ void ggml_cuda_op_mul_mat_vec_q(
|
|||
mul_mat_vec_iq3_s_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -163,7 +163,7 @@ void quantize_mmq_q8_1_cuda(
|
|||
<<<num_blocks, block_size, 0, stream>>>(x, vy, kx0, kx1, kx0_padded);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -251,7 +251,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|||
attn_factor, corr_dims, freq_factors, stream
|
||||
);
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
} else {
|
||||
if (src0->type == GGML_TYPE_F32) {
|
||||
|
@ -265,7 +265,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|||
attn_factor, corr_dims, freq_factors, stream
|
||||
);
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -726,7 +726,7 @@ static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tens
|
|||
} while (i != h);
|
||||
|
||||
// visited all hash table entries -> not found
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
|
||||
|
@ -747,7 +747,7 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g
|
|||
} while (i != h);
|
||||
|
||||
// visited all hash table entries -> not found
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -566,7 +566,7 @@ uint32_t safe_divide(uint32_t a, uint32_t b) {
|
|||
}
|
||||
if ((a % b) != 0) {
|
||||
fprintf(stderr, "((%u %% %u) == %u) != 0\n", a, b, a % b);
|
||||
GGML_ASSERT(!"safe_divide result would've had remainder");
|
||||
GGML_ABORT("safe_divide result would've had remainder");
|
||||
}
|
||||
return a / b;
|
||||
}
|
||||
|
@ -1460,7 +1460,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
|
|||
|
||||
if (!ggml_vk_supports_op(dst)) {
|
||||
fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
|
||||
GGML_ASSERT(!"unsupported op");
|
||||
GGML_ABORT("unsupported op");
|
||||
}
|
||||
|
||||
const int32_t ne00 = src0 ? src0->ne[0] : 0;
|
||||
|
@ -1562,7 +1562,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
|
|||
default:
|
||||
{
|
||||
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
@ -1745,7 +1745,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
|
|||
continue;
|
||||
not_implemented: {}
|
||||
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
||||
//GGML_ASSERT(false);
|
||||
//GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
// Evaluate sequence
|
||||
|
|
|
@ -869,7 +869,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
NSError * error = nil;
|
||||
if (![[MTLCaptureManager sharedCaptureManager] startCaptureWithDescriptor:descriptor error:&error]) {
|
||||
GGML_METAL_LOG_ERROR("%s: error: unable to start capture '%s'\n", __func__, [[error localizedDescription] UTF8String]);
|
||||
GGML_ASSERT(!"capture failed");
|
||||
GGML_ABORT("capture failed");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -931,7 +931,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
|
||||
if (!ggml_metal_supports_op(ctx, dst)) {
|
||||
GGML_METAL_LOG_ERROR("%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
|
||||
GGML_ASSERT(!"unsupported op");
|
||||
GGML_ABORT("unsupported op");
|
||||
}
|
||||
|
||||
if (should_capture) {
|
||||
|
@ -1068,7 +1068,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD_ROW].pipeline; break;
|
||||
case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_ROW].pipeline; break;
|
||||
case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV_ROW].pipeline; break;
|
||||
default: GGML_ASSERT(false);
|
||||
default: GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
bcast_row = true;
|
||||
|
@ -1077,7 +1077,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD].pipeline; break;
|
||||
case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL].pipeline; break;
|
||||
case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV].pipeline; break;
|
||||
default: GGML_ASSERT(false);
|
||||
default: GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1131,7 +1131,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_F16].pipeline; break;
|
||||
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I32].pipeline; break;
|
||||
case GGML_TYPE_I16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I16].pipeline; break;
|
||||
default: GGML_ASSERT(false);
|
||||
default: GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
|
@ -1387,7 +1387,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
default:
|
||||
{
|
||||
GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_SQR:
|
||||
|
@ -1609,7 +1609,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_M_F32 ].pipeline; break;
|
||||
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_NL_F32 ].pipeline; break;
|
||||
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_XS_F32 ].pipeline; break;
|
||||
default: GGML_ASSERT(false && "MUL MAT-MAT not implemented");
|
||||
default: GGML_ABORT("MUL MAT-MAT not implemented");
|
||||
}
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
|
@ -1782,7 +1782,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
default:
|
||||
{
|
||||
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t);
|
||||
GGML_ASSERT(false && "not implemented");
|
||||
GGML_ABORT("not implemented");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1911,7 +1911,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_M_F32 ].pipeline; break;
|
||||
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_NL_F32 ].pipeline; break;
|
||||
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_XS_F32 ].pipeline; break;
|
||||
default: GGML_ASSERT(false && "MUL_MAT_ID not implemented");
|
||||
default: GGML_ABORT("MUL_MAT_ID not implemented");
|
||||
}
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
|
@ -2078,7 +2078,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
default:
|
||||
{
|
||||
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src2t);
|
||||
GGML_ASSERT(false && "not implemented");
|
||||
GGML_ABORT("not implemented");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -2178,7 +2178,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL ].pipeline; break;
|
||||
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_XS ].pipeline; break;
|
||||
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_I32 ].pipeline; break;
|
||||
default: GGML_ASSERT(false && "not implemented");
|
||||
default: GGML_ABORT("not implemented");
|
||||
}
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
|
@ -2316,13 +2316,13 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
switch (src0->type) {
|
||||
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F32].pipeline; break;
|
||||
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F16].pipeline; break;
|
||||
default: GGML_ASSERT(false);
|
||||
default: GGML_ABORT("fatal error");
|
||||
};
|
||||
} else {
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F32].pipeline; break;
|
||||
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F16].pipeline; break;
|
||||
default: GGML_ASSERT(false);
|
||||
default: GGML_ABORT("fatal error");
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -2399,7 +2399,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
switch (dst->type) {
|
||||
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F32].pipeline; break;
|
||||
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F16].pipeline; break;
|
||||
default: GGML_ASSERT(false);
|
||||
default: GGML_ABORT("fatal error");
|
||||
};
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
|
@ -2556,7 +2556,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
switch (order) {
|
||||
case GGML_SORT_ORDER_ASC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC].pipeline; break;
|
||||
case GGML_SORT_ORDER_DESC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC].pipeline; break;
|
||||
default: GGML_ASSERT(false);
|
||||
default: GGML_ABORT("fatal error");
|
||||
};
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
|
@ -2645,7 +2645,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
{
|
||||
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
|
||||
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
|
||||
GGML_ASSERT(false && "add template specialization for this size");
|
||||
GGML_ABORT("add template specialization for this size");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -2658,7 +2658,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
{
|
||||
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
|
||||
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
|
||||
GGML_ASSERT(false && "add template specialization for this size");
|
||||
GGML_ABORT("add template specialization for this size");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2779,7 +2779,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0].pipeline; break;
|
||||
case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1].pipeline; break;
|
||||
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL].pipeline; break;
|
||||
default: GGML_ASSERT(false && "not implemented");
|
||||
default: GGML_ABORT("not implemented");
|
||||
};
|
||||
} break;
|
||||
case GGML_TYPE_F16:
|
||||
|
@ -2787,10 +2787,10 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
switch (dstt) {
|
||||
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F32].pipeline; break;
|
||||
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F16].pipeline; break;
|
||||
default: GGML_ASSERT(false && "not implemented");
|
||||
default: GGML_ABORT("not implemented");
|
||||
};
|
||||
} break;
|
||||
default: GGML_ASSERT(false && "not implemented");
|
||||
default: GGML_ABORT("not implemented");
|
||||
}
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
|
@ -2818,7 +2818,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
default:
|
||||
{
|
||||
GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12692,7 +12692,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
|
|||
printf("Oops: found point %u not on grid:", u);
|
||||
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
|
||||
printf("\n");
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
q2[2*ib+0] |= ((uint32_t) grid_index << 8*k);
|
||||
q2[2*ib+1] |= (block_signs[k] << 7*k);
|
||||
|
@ -12871,7 +12871,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
|
|||
printf("Oops: found point %u not on grid:", u);
|
||||
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
|
||||
printf("\n");
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
q2[2*ib+k] = grid_index | (block_signs[k] << 9);
|
||||
}
|
||||
|
@ -13314,7 +13314,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
|
|||
printf("Oops: found point %u not on grid:", u);
|
||||
for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
|
||||
printf("\n");
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
if (grid_size == 256) {
|
||||
q3[8*ib+k] = grid_index;
|
||||
|
@ -13527,7 +13527,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
|
|||
printf("Oops: found point %u not on grid:", u);
|
||||
for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
|
||||
printf("\n");
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
qs[k] = grid_index & 255;
|
||||
qh[(ib*bs4+k)/8] |= ((grid_index >> 8) << ((ib*bs4+k)%8));
|
||||
|
@ -14503,7 +14503,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
|
|||
printf("Oops: found point %u not on grid:", u);
|
||||
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
|
||||
printf("\n");
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
const int i8 = 2*ib + k;
|
||||
y[ibl].qs[i8] = grid_index & 255;
|
||||
|
|
|
@ -1723,7 +1723,7 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|||
});
|
||||
});
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2075,8 +2075,8 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
|
|||
// GGML_SYCL_DEBUG("current device index %d\n", id);
|
||||
src_ptr = (char *) extra->data_device[id];
|
||||
} else {
|
||||
// GGML_SYCL_DEBUG("GGML_ASSERT(false)\n");
|
||||
GGML_ASSERT(false);
|
||||
// GGML_SYCL_DEBUG("GGML_ABORT("fatal error")\n");
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
char * dst_ptr = (char *) dst;
|
||||
|
||||
|
@ -2163,7 +2163,7 @@ static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_te
|
|||
default:
|
||||
// TODO: k-quants
|
||||
fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -2192,7 +2192,7 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
|
|||
} else {
|
||||
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
|
||||
ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2476,7 +2476,7 @@ static int64_t get_row_rounding(ggml_type type, const std::array<float, GGML_SYC
|
|||
case GGML_TYPE_Q6_K:
|
||||
return 64;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3101,7 +3101,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
|
|||
SYCL_CHECK(ggml_sycl_cpy_tensor_2d(
|
||||
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
if (convert_src1_to_q8_1 && !src1_is_contiguous) {
|
||||
|
@ -3896,7 +3896,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
|
|||
} else {
|
||||
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
(void) dst;
|
||||
|
|
|
@ -100,7 +100,7 @@ static void crash() {
|
|||
const char* msg) {
|
||||
fprintf(stderr, "SYCL error: %s: %s\n", stmt, msg);
|
||||
fprintf(stderr, " in function %s at %s:%d\n", func, file, line);
|
||||
GGML_ASSERT(!"SYCL error");
|
||||
GGML_ABORT("SYCL error");
|
||||
}
|
||||
|
||||
#define SYCL_CHECK(err) \
|
||||
|
|
|
@ -1011,7 +1011,7 @@ void ggml_sycl_op_dequantize_mul_mat_vec(
|
|||
break;
|
||||
default:
|
||||
printf("ggml_sycl_op_dequantize_mul_mat_vec unsupported GGML_TYPE %d\n", src0->type);
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -975,7 +975,7 @@ namespace dpct
|
|||
if (backend == "opencl:cpu") return 4;
|
||||
if (backend == "opencl:acc") return 5;
|
||||
printf("convert_backend_index: can't handle backend=%s\n", backend.c_str());
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
static bool compare_backend(std::string &backend1, std::string &backend2) {
|
||||
return convert_backend_index(backend1) < convert_backend_index(backend2);
|
||||
|
|
|
@ -1799,7 +1799,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q4_0_PASCAL;
|
||||
nwarps = NWARPS_Q4_0_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -1914,7 +1914,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q4_1_PASCAL;
|
||||
nwarps = NWARPS_Q4_1_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -2029,7 +2029,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q5_0_PASCAL;
|
||||
nwarps = NWARPS_Q5_0_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -2144,7 +2144,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q5_1_PASCAL;
|
||||
nwarps = NWARPS_Q5_1_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -2259,7 +2259,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q8_0_PASCAL;
|
||||
nwarps = NWARPS_Q8_0_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -2374,7 +2374,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q2_K_PASCAL;
|
||||
nwarps = NWARPS_Q2_K_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -2497,7 +2497,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q3_K_PASCAL;
|
||||
nwarps = NWARPS_Q3_K_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -2625,7 +2625,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q4_K_PASCAL;
|
||||
nwarps = NWARPS_Q4_K_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -2746,7 +2746,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q5_K_PASCAL;
|
||||
nwarps = NWARPS_Q5_K_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -2867,7 +2867,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|||
mmq_y = MMQ_Y_Q6_K_PASCAL;
|
||||
nwarps = NWARPS_Q6_K_PASCAL;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
||||
|
@ -3016,7 +3016,7 @@ void ggml_sycl_op_mul_mat_q(
|
|||
ggml_mul_mat_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_ncols, src1_padded_row_size, nrows_dst, stream);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -1017,7 +1017,7 @@ void ggml_sycl_op_mul_mat_vec_q(
|
|||
mul_mat_vec_iq4_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -251,7 +251,7 @@ void ggml_sycl_op_rope(
|
|||
attn_factor, corr_dims, freq_factors, main_stream
|
||||
);
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
} else {
|
||||
if (src0->type == GGML_TYPE_F32) {
|
||||
|
@ -265,7 +265,7 @@ void ggml_sycl_op_rope(
|
|||
attn_factor, corr_dims, freq_factors, main_stream
|
||||
);
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1961,7 +1961,7 @@ void ggml_vk_instance_init() {
|
|||
// Make sure at least one device exists
|
||||
if (devices.empty()) {
|
||||
std::cerr << "ggml_vulkan: Error: No devices found." << std::endl;
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
// Default to using all dedicated GPUs
|
||||
|
@ -2459,7 +2459,7 @@ static void ggml_vk_buffer_write_nc_async(ggml_backend_vk_context * ctx, vk_cont
|
|||
// Buffer is already mapped
|
||||
if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
|
||||
std::cerr << "ggml_vulkan: buffer_write_nc_async dst buffer is host_visible. Use synchronous write." << std::endl;
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
// Check if src is pinned memory
|
||||
vk_buffer buf;
|
||||
|
@ -2527,7 +2527,7 @@ static void ggml_vk_buffer_write_nc_async(ggml_backend_vk_context * ctx, vk_cont
|
|||
staging = ctx->device->sync_staging;
|
||||
staging_offset = 0;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2563,7 +2563,7 @@ static void ggml_vk_buffer_write_2d_async(vk_context * subctx, vk_buffer& dst, s
|
|||
// Buffer is already mapped
|
||||
if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
|
||||
std::cerr << "ggml_vulkan: buffer_write_async dst buffer is host_visible. Use synchronous write." << std::endl;
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
// Check if src is pinned memory
|
||||
vk_buffer buf = nullptr;
|
||||
|
@ -2602,7 +2602,7 @@ static void ggml_vk_buffer_write_2d_async(vk_context * subctx, vk_buffer& dst, s
|
|||
staging_buffer = dst->device->sync_staging;
|
||||
staging_offset = 0;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2704,7 +2704,7 @@ static void ggml_vk_buffer_read_2d_async(vk_context * subctx, vk_buffer& src, si
|
|||
|
||||
staging_buffer = src->device->sync_staging;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2913,7 +2913,7 @@ static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, ggml_
|
|||
}
|
||||
|
||||
std::cerr << "Missing CPY op for types: " << ggml_type_name(from) << " " << ggml_type_name(to) << std::endl;
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context * subctx, vk_pipeline pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out) {
|
||||
|
@ -3499,7 +3499,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context *
|
|||
const bool qy_needs_dequant = (src1->type != GGML_TYPE_F16 && !y_f32_kernel) || y_non_contig;
|
||||
|
||||
if (mmp == nullptr) {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
// Not implemented
|
||||
|
@ -4078,7 +4078,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|||
std::cerr << " and " << ggml_type_name(src1->type);
|
||||
}
|
||||
std::cerr << " to " << ggml_type_name(dst->type) << std::endl;
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
op_func(ctx, subctx, src0, src1, dst);
|
||||
|
@ -4521,7 +4521,7 @@ static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0
|
|||
} else if (type == GGML_TYPE_F16) {
|
||||
val = ggml_fp16_to_fp32(*((const ggml_fp16_t *) data + i2*ne1*ne0 + idx1*ne0 + idx0));
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
fprintf(stderr, "% 7.2f ", val);
|
||||
} else {
|
||||
|
@ -4555,7 +4555,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|||
p = ctx->device->pipeline_matmul_f16->a_s;
|
||||
shname = "F16_ALIGNED_S";
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
} else if (shader_size == 1) {
|
||||
if (std::is_same<float, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
||||
|
@ -4571,7 +4571,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|||
p = ctx->device->pipeline_matmul_f16->a_m;
|
||||
shname = "F16_ALIGNED_M";
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
} else if (shader_size == 2) {
|
||||
if (std::is_same<float, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
||||
|
@ -4587,7 +4587,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|||
p = ctx->device->pipeline_matmul_f16->a_l;
|
||||
shname = "F16_ALIGNED_L";
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
} else {
|
||||
GGML_ASSERT(0);
|
||||
|
@ -4668,7 +4668,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|||
} else if (std::is_same<ggml_fp16_t, X_TYPE>()) {
|
||||
x[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f);
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < y_ne; i++) {
|
||||
|
@ -4679,7 +4679,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|||
// y[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f);
|
||||
y[i] = ggml_fp32_to_fp16((i % k == i / k) ? 1.0f : 0.0f);
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4727,14 +4727,14 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|||
} else if (std::is_same<ggml_fp16_t, X_TYPE>()) {
|
||||
src0_type = GGML_TYPE_F16;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
if (std::is_same<float, Y_TYPE>()) {
|
||||
src1_type = GGML_TYPE_F32;
|
||||
} else if (std::is_same<ggml_fp16_t, Y_TYPE>()) {
|
||||
src1_type = GGML_TYPE_F16;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
ggml_tensor * src0_ggml = ggml_new_tensor_3d(ggml_ctx, src0_type, k, m, batch);
|
||||
|
@ -4841,7 +4841,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, int i0, int i1
|
|||
} else if (tensor->type == GGML_TYPE_F16) {
|
||||
val = ggml_fp16_to_fp32(*(ggml_fp16_t *) ((char *) tensor->data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]));
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
fprintf(stderr, "% 7.2f ", val);
|
||||
} else {
|
||||
|
@ -5391,7 +5391,7 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
|||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
#endif
|
||||
|
||||
if (ctx->prealloc_x == nullptr || (ctx->prealloc_size_x > 0 && ctx->prealloc_x->size < ctx->prealloc_size_x)) {
|
||||
|
@ -5486,7 +5486,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|||
break;
|
||||
default:
|
||||
std::cerr << "ggml_vulkan: Error: Missing op: " << ggml_op_name(node->op) << std::endl;
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -6498,7 +6498,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, const void * d
|
|||
} else if (tensor->type == GGML_TYPE_I32) {
|
||||
val = *(const int32_t *) ((const char *) data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]);
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
fprintf(stderr, "% 7.2f ", val);
|
||||
} else {
|
||||
|
@ -6620,7 +6620,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|||
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
||||
}
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
|
||||
|
@ -6662,7 +6662,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|||
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
||||
}
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
|
||||
|
@ -6720,7 +6720,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|||
memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
||||
}
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
|
||||
|
@ -6797,7 +6797,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|||
break;
|
||||
default:
|
||||
std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl;
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
} else if (tensor->op == GGML_OP_CPY || tensor->op == GGML_OP_DUP) {
|
||||
if (src1 == nullptr) {
|
||||
|
@ -6825,7 +6825,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|||
tensor_clone = ggml_sum_rows(ggml_ctx, src0_clone);
|
||||
} else {
|
||||
std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl;
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
ggml_cgraph * cgraph = ggml_new_graph(ggml_ctx);
|
||||
|
@ -6912,7 +6912,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
|
|||
}
|
||||
} else {
|
||||
std::cerr << "Missing debug code for type " << ggml_type_name(tensor->type) << std::endl;
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
if ((std::isnan(correct) != std::isnan(result)) || (std::isinf(correct) != std::isinf(result)) || !buffer_size_fit) {
|
||||
|
@ -6935,7 +6935,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
|
|||
std::cerr << std::endl;
|
||||
std::vector<const ggml_tensor *> done;
|
||||
ggml_vk_print_graph_origin(tensor, done);
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
if (first_error[0] == -1 && std::fabs(correct - result) > 0.1f) {
|
||||
first_error[0] = i0;
|
||||
|
@ -7006,7 +7006,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
|
|||
std::cerr << std::endl;
|
||||
std::vector<const ggml_tensor *> done;
|
||||
ggml_vk_print_graph_origin(tensor, done);
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
} else {
|
||||
std::cerr << check_counter << " " << tensor->name << " op=" << ggml_op_name(tensor->op) << " avg_err=" << avg_err << std::endl;
|
||||
}
|
||||
|
|
358
ggml/src/ggml.c
358
ggml/src/ggml.c
File diff suppressed because it is too large
Load diff
|
@ -221,7 +221,7 @@ static void llama_grammar_advance_stack(
|
|||
// end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
|
||||
// (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
|
||||
// those
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -517,7 +517,7 @@ void llama_grammar_accept_token_impl(struct llama_grammar * grammar, const struc
|
|||
return;
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
const std::string & piece = vocab->cache_token_to_piece.at(token);
|
||||
|
|
|
@ -152,14 +152,14 @@ static uint8_t llama_token_to_byte(const llama_vocab & vocab, llama_token id) {
|
|||
return strtol(buf.c_str(), NULL, 16);
|
||||
}
|
||||
case LLAMA_VOCAB_TYPE_BPE: {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
//return unicode_utf8_to_byte(token_data.text); // TODO: why is this here after GGML_ASSERT?
|
||||
}
|
||||
case LLAMA_VOCAB_TYPE_WPM: {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1396,7 +1396,7 @@ std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab,
|
|||
}
|
||||
} break;
|
||||
case LLAMA_VOCAB_TYPE_NONE:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
return output;
|
||||
|
@ -1422,7 +1422,7 @@ llama_token llama_byte_to_token_impl(const llama_vocab & vocab, uint8_t ch) {
|
|||
return vocab.token_to_id.at(unicode_byte_to_utf8(ch));
|
||||
}
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1606,7 +1606,7 @@ int32_t llama_token_to_piece_impl(const struct llama_vocab & vocab, llama_token
|
|||
break;
|
||||
}
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2259,7 +2259,7 @@ struct llama_hparams {
|
|||
return n_head_arr[il];
|
||||
}
|
||||
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
uint32_t n_head_kv(uint32_t il = 0) const {
|
||||
|
@ -2267,7 +2267,7 @@ struct llama_hparams {
|
|||
return n_head_kv_arr[il];
|
||||
}
|
||||
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
uint32_t n_ff(uint32_t il = 0) const {
|
||||
|
@ -2275,7 +2275,7 @@ struct llama_hparams {
|
|||
return n_ff_arr[il];
|
||||
}
|
||||
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
uint32_t n_gqa(uint32_t il = 0) const {
|
||||
|
@ -8069,7 +8069,7 @@ static struct ggml_tensor * llm_build_moe_ffn(
|
|||
cb(gate, "ffn_moe_gelu", il);
|
||||
} break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
ggml_tensor * par = ggml_mul(ctx, up, gate); // [n_ff, n_expert_used, n_tokens]
|
||||
|
@ -8632,7 +8632,7 @@ struct llm_build_context {
|
|||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false && "unknown pooling type");
|
||||
GGML_ABORT("unknown pooling type");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8888,7 +8888,7 @@ struct llm_build_context {
|
|||
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd/n_head, n_head, n_tokens);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
cb(Qcur, "Qcur", il);
|
||||
cb(Kcur, "Kcur", il);
|
||||
|
@ -11720,7 +11720,7 @@ struct llm_build_context {
|
|||
switch (model.type) {
|
||||
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
||||
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
|
||||
default: GGML_ASSERT(false);
|
||||
default: GGML_ABORT("fatal error");
|
||||
};
|
||||
cb(Qcur, "Qcur_scaled", il);
|
||||
|
||||
|
@ -13885,7 +13885,7 @@ static struct ggml_cgraph * llama_build_graph(
|
|||
result = llm.build_jais();
|
||||
} break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
// add on pooling layer
|
||||
|
@ -14684,7 +14684,7 @@ static int llama_decode_internal(
|
|||
} break;
|
||||
case LLAMA_POOLING_TYPE_UNSPECIFIED:
|
||||
{
|
||||
GGML_ASSERT(false && "unknown pooling type");
|
||||
GGML_ABORT("unknown pooling type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -15076,7 +15076,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) {
|
|||
// apply K-shift if needed
|
||||
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
|
||||
if (lctx.model.arch == LLM_ARCH_DEEPSEEK2) { // not supported due to MLA
|
||||
GGML_ASSERT(false && "Deepseek2 does not support K-shift");
|
||||
GGML_ABORT("Deepseek2 does not support K-shift");
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -15215,7 +15215,7 @@ static void llama_tensor_dequantize_internal(
|
|||
} else if (ggml_is_quantized(tensor->type)) {
|
||||
qtype.to_float(tensor->data, f32_output, nelements);
|
||||
} else {
|
||||
GGML_ASSERT(false); // unreachable
|
||||
GGML_ABORT("fatal error"); // unreachable
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -16901,7 +16901,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
|||
|
||||
// all model arches should be listed explicitly here
|
||||
case LLM_ARCH_UNKNOWN:
|
||||
GGML_ASSERT(false && "unknown architecture");
|
||||
GGML_ABORT("unknown architecture");
|
||||
}
|
||||
|
||||
return LLAMA_ROPE_TYPE_NONE;
|
||||
|
@ -18465,7 +18465,7 @@ float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
|
|||
} catch (const std::exception & err) {
|
||||
LLAMA_LOG_ERROR("%s: invalid logits id %d, reason: %s\n", __func__, i, err.what());
|
||||
#ifndef NDEBUG
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
#endif
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -18510,7 +18510,7 @@ float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i) {
|
|||
} catch (const std::exception & err) {
|
||||
LLAMA_LOG_ERROR("%s: invalid embeddings id %d, reason: %s\n", __func__, i, err.what());
|
||||
#ifndef NDEBUG
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
#endif
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
|||
// This is going to create some weird integers though.
|
||||
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -132,7 +132,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|||
tt.to_float(&buf[i], vq.data(), bs);
|
||||
tv.insert(tv.end(), vq.begin(), vq.end());
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1435,7 +1435,7 @@ struct test_argsort : public test_case {
|
|||
ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
|
||||
}
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2462,7 +2462,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|||
return true;
|
||||
}
|
||||
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -166,12 +166,12 @@ static void test_sampler_queue(
|
|||
for (auto s : samplers_sequence) {
|
||||
switch (s){
|
||||
case 'k': llama_sample_top_k (nullptr, &candidates_p, top_k, 1); break;
|
||||
case 'f': GGML_ASSERT(false && "tail_free test not implemented"); break;
|
||||
case 'y': GGML_ASSERT(false && "typical test not implemented"); break;
|
||||
case 'f': GGML_ABORT("tail_free test not implemented"); break;
|
||||
case 'y': GGML_ABORT("typical test not implemented"); break;
|
||||
case 'p': llama_sample_top_p (nullptr, &candidates_p, top_p, 1); break;
|
||||
case 'm': llama_sample_min_p (nullptr, &candidates_p, min_p, 1); break;
|
||||
case 't': GGML_ASSERT(false && "temperature test not implemented"); break;
|
||||
default : GGML_ASSERT(false && "Unknown sampler"); break;
|
||||
case 't': GGML_ABORT("temperature test not implemented"); break;
|
||||
default : GGML_ABORT("Unknown sampler"); break;
|
||||
}
|
||||
|
||||
llama_sample_softmax(nullptr, &candidates_p); // make sure tokens are sorted for tests
|
||||
|
@ -222,7 +222,7 @@ static void test_sampler_queue(
|
|||
GGML_ASSERT(candidates_p.data[0].id == max_token_id);
|
||||
GGML_ASSERT(candidates_p.data[expected_size-1].id == min_token_id);
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue