vulkan: less assertions
This commit is contained in:
parent
d283dbcd37
commit
281d629380
1 changed files with 18 additions and 40 deletions
|
@ -654,8 +654,7 @@ static std::condition_variable compile_count_cond;
|
|||
|
||||
static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipeline, const std::string name, size_t spv_size, const void* spv_data, const std::string entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t, 3> wg_denoms, std::vector<uint32_t> specialization_constants, uint32_t align) {
|
||||
VK_LOG_DEBUG("ggml_vk_create_pipeline(" << device->name << ", " << name << ", " << entrypoint << ", " << parameter_count << ", " << push_constant_size << ", (" << wg_denoms[0] << "," << wg_denoms[1] << "," << wg_denoms[2] << "), specialization_constants, " << align << ")");
|
||||
GGML_ASSERT(parameter_count > 0);
|
||||
GGML_ASSERT(wg_denoms[0] > 0 && wg_denoms[1] > 0 && wg_denoms[2] > 0); // NOLINT
|
||||
GGML_ASSERT(parameter_count > 0 && wg_denoms[0] > 0 && wg_denoms[1] > 0 && wg_denoms[2] > 0); // NOLINT
|
||||
|
||||
pipeline = std::make_shared<vk_pipeline_struct>();
|
||||
pipeline->name = name;
|
||||
|
@ -2265,8 +2264,7 @@ static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context&
|
|||
std::cerr << "(" << buffer.buffer << ", " << buffer.offset << ", " << buffer.range << "), ";
|
||||
}
|
||||
std::cerr << "}, (" << wg0 << "," << wg1 << "," << wg2 << "))");
|
||||
GGML_ASSERT(pipeline->descriptor_set_idx < pipeline->descriptor_sets.size());
|
||||
GGML_ASSERT(descriptor_buffer_infos.size() == pipeline->parameter_count);
|
||||
GGML_ASSERT(pipeline->descriptor_set_idx < pipeline->descriptor_sets.size() && descriptor_buffer_infos.size() == pipeline->parameter_count);
|
||||
|
||||
vk::DescriptorSet& descriptor_set = pipeline->descriptor_sets[pipeline->descriptor_set_idx++];
|
||||
vk::WriteDescriptorSet write_descriptor_set{ descriptor_set, 0, 0, pipeline->parameter_count, vk::DescriptorType::eStorageBuffer, nullptr, descriptor_buffer_infos.begin() };
|
||||
|
@ -2528,9 +2526,7 @@ static void ggml_vk_buffer_write(vk_buffer& dst, size_t offset, const void * src
|
|||
|
||||
static void ggml_vk_buffer_read_2d_async(vk_context subctx, vk_buffer& src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height, bool sync_staging = false) {
|
||||
VK_LOG_DEBUG("ggml_vk_buffer_read_2d_async(offset=" << offset << ", width=" << width << ", height=" << height << ")");
|
||||
GGML_ASSERT(width > 0);
|
||||
GGML_ASSERT(height > 0);
|
||||
GGML_ASSERT(src != nullptr);
|
||||
GGML_ASSERT(width > 0 && height > 0 && src != nullptr);
|
||||
|
||||
// TODO: staging_offset is not used
|
||||
|
||||
|
@ -2827,8 +2823,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
|
|||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3];
|
||||
std::cerr << "), " << (dryrun ? "dryrun" : "") << ")");
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT
|
||||
|
||||
const uint64_t ne00 = src0->ne[0];
|
||||
const uint64_t ne01 = src0->ne[1];
|
||||
|
@ -2913,8 +2908,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
|
|||
} else {
|
||||
to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type);
|
||||
}
|
||||
GGML_ASSERT(!qx_needs_dequant || to_fp16_vk_0 != nullptr); // NOLINT
|
||||
GGML_ASSERT(!qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT
|
||||
GGML_ASSERT(!qx_needs_dequant || to_fp16_vk_0 != nullptr || !qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT
|
||||
|
||||
if (dryrun) {
|
||||
const uint64_t x_sz_upd = x_sz * ne02 * ne03;
|
||||
|
@ -2952,8 +2946,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
|
|||
|
||||
vk_buffer d_D = dst_buf_ctx->dev_buffer;
|
||||
const uint64_t d_buf_offset = vk_tensor_offset(dst) + dst->view_offs;
|
||||
GGML_ASSERT(d_D != nullptr);
|
||||
GGML_ASSERT(d_D->size >= d_buf_offset + d_sz * ne02 * ne03);
|
||||
GGML_ASSERT(d_D != nullptr && d_D->size >= d_buf_offset + d_sz * ne02 * ne03);
|
||||
vk_buffer d_X;
|
||||
uint64_t x_buf_offset = 0;
|
||||
vk_buffer d_Y;
|
||||
|
@ -3023,8 +3016,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3];
|
||||
std::cerr << "), " << (dryrun ? "dryrun" : "") << "),)");
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT
|
||||
|
||||
const uint64_t ne00 = src0->ne[0];
|
||||
const uint64_t ne01 = src0->ne[1];
|
||||
|
@ -3097,9 +3089,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|||
to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type);
|
||||
}
|
||||
vk_pipeline dmmv = ggml_vk_get_dequantize_mul_mat_vec(ctx, src0->type, src1->type);
|
||||
GGML_ASSERT(!qx_needs_dequant || to_fp16_vk_0 != nullptr); // NOLINT
|
||||
GGML_ASSERT(!qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT
|
||||
GGML_ASSERT(dmmv != nullptr);
|
||||
GGML_ASSERT(dmmv != nullptr && !qx_needs_dequant || to_fp16_vk_0 != nullptr || !qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT
|
||||
|
||||
if (dryrun) {
|
||||
const uint64_t x_sz_upd = x_sz * ne02 * ne03;
|
||||
|
@ -3206,11 +3196,9 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3];
|
||||
std::cerr << "), " << (dryrun ? "dryrun" : "") << ")");
|
||||
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
||||
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // NOLINT
|
||||
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // NOLINT
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1) &&
|
||||
src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3] && src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3] && // NOLINT
|
||||
src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32);
|
||||
|
||||
const uint64_t ne00 = src0->ne[0];
|
||||
const uint64_t ne01 = src0->ne[1];
|
||||
|
@ -3281,11 +3269,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3];
|
||||
std::cerr << "), " << (dryrun ? "dryrun" : "") << ")");
|
||||
GGML_ASSERT(!ggml_is_transposed(src0));
|
||||
GGML_ASSERT(!ggml_is_transposed(src1));
|
||||
GGML_ASSERT(!ggml_is_permuted(src0));
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(!ggml_is_transposed(src0) && !ggml_is_transposed(src1) && !ggml_is_permuted(src0) && src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32);
|
||||
|
||||
const uint64_t ne00 = src0->ne[0];
|
||||
const uint64_t ne01 = src0->ne[1];
|
||||
|
@ -3384,8 +3368,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||
std::cerr << "), (" << ids << ", name=" << ids->name << ", type=" << ids->type << ", ne0=" << ids->ne[0] << ", ne1=" << ids->ne[1] << ", ne2=" << ids->ne[2] << ", ne3=" << ids->ne[3] << ", nb0=" << ids->nb[0] << ", nb1=" << ids->nb[1] << ", nb2=" << ids->nb[2] << ", nb3=" << ids->nb[3];
|
||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)");
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
||||
GGML_ASSERT(ids->type == GGML_TYPE_I32 && (ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16)); // NOLINT
|
||||
|
||||
const uint64_t ne00 = src0->ne[0];
|
||||
const uint64_t ne01 = src0->ne[1];
|
||||
|
@ -3482,8 +3465,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|||
} else {
|
||||
to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type);
|
||||
}
|
||||
GGML_ASSERT(!qx_needs_dequant || to_fp16_vk_0 != nullptr); // NOLINT
|
||||
GGML_ASSERT(!qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT
|
||||
GGML_ASSERT(!qx_needs_dequant || to_fp16_vk_0 != nullptr || !qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT
|
||||
|
||||
if (dryrun) {
|
||||
const uint64_t x_sz_upd = x_sz * ne02 * ne03;
|
||||
|
@ -3590,9 +3572,7 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
|
|||
std::cerr << "), (" << ids << ", name=" << ids->name << ", type=" << ids->type << ", ne0=" << ids->ne[0] << ", ne1=" << ids->ne[1] << ", ne2=" << ids->ne[2] << ", ne3=" << ids->ne[3] << ", nb0=" << ids->nb[0] << ", nb1=" << ids->nb[1] << ", nb2=" << ids->nb[2] << ", nb3=" << ids->nb[3];
|
||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3];
|
||||
std::cerr << "), " << (dryrun ? "dryrun" : "") << ")");
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
||||
GGML_ASSERT(ids->type == GGML_TYPE_I32 && (ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16)); // NOLINT
|
||||
|
||||
const uint64_t ne00 = src0->ne[0];
|
||||
const uint64_t ne01 = src0->ne[1];
|
||||
|
@ -3674,9 +3654,7 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
|
|||
to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type);
|
||||
}
|
||||
vk_pipeline dmmv = ggml_vk_get_dequantize_mul_mat_vec_id(ctx, src0->type, src1->type);
|
||||
GGML_ASSERT(!qx_needs_dequant || to_fp16_vk_0 != nullptr); // NOLINT
|
||||
GGML_ASSERT(!qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT
|
||||
GGML_ASSERT(dmmv != nullptr);
|
||||
GGML_ASSERT(dmmv != nullptr && (!qx_needs_dequant || to_fp16_vk_0 != nullptr || !qy_needs_dequant || to_fp16_vk_1 != nullptr)); // NOLINT
|
||||
|
||||
if (dryrun) {
|
||||
const uint64_t x_sz_upd = x_sz * ne02 * ne03;
|
||||
|
@ -4034,8 +4012,8 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
|||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3];
|
||||
std::cerr << "), " << ggml_op_name(op) << ", " << (dryrun ? "dryrun" : "") << ")");
|
||||
GGML_ASSERT(op == GGML_OP_GET_ROWS || (!ggml_is_quantized(src0->type) && (src1 == nullptr || !ggml_is_quantized(src1->type)))); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_op_supports_incontiguous(op) || ggml_vk_dim01_contiguous(src0)); // NOLINT
|
||||
GGML_ASSERT(dst->buffer != nullptr);
|
||||
GGML_ASSERT((ggml_vk_op_supports_incontiguous(op) || ggml_vk_dim01_contiguous(src0)) && dst->buffer != nullptr); // NOLINT
|
||||
|
||||
const uint64_t ne00 = src0->ne[0];
|
||||
const uint64_t ne01 = src0->ne[1];
|
||||
const uint64_t ne02 = src0->ne[2];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue