restore some modifications
This commit is contained in:
parent
cf6b987be3
commit
e05a398fb3
2 changed files with 11 additions and 10 deletions
|
@ -2427,6 +2427,7 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
|
|||
aclTensor* acl_weight, aclTensor* acl_dst) {
|
||||
int8_t cube_math_type = 1; // ALLOW_FP32_DOWN_PRECISION, when input is
|
||||
// fp32, atlas a2 will transpose it to HFLOAT32.
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
void* workspaceAddr = nullptr;
|
||||
|
@ -2617,8 +2618,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|||
size_t weight_stride = src0->ne[1] * src0->ne[0] * weight_elem_size;
|
||||
size_t weight_size = weight_stride * src0->ne[2] * src0->ne[3];
|
||||
|
||||
// scale stored at the end of weight.
|
||||
// scale need transpose.
|
||||
// scale stored at the end of weight. Also need transpose.
|
||||
size_t scale_elem_size = sizeof(uint16_t);
|
||||
size_t scale_nb[] = {src0->ne[0] / QK8_0 * scale_elem_size, scale_elem_size};
|
||||
size_t scale_stride = src0->ne[1] * src0->ne[0] / QK8_0 * scale_elem_size;
|
||||
|
@ -2677,8 +2677,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|||
int64_t batch0 = (n0 * src0->ne[2]) + c0;
|
||||
|
||||
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
|
||||
(char*)input_buffer + batch1 * input_stride,
|
||||
ACL_FLOAT16,
|
||||
(char*)input_buffer + batch1 * input_stride, ACL_FLOAT16,
|
||||
input_elem_size, input_ne, input_nb, 2);
|
||||
|
||||
// first split
|
||||
|
|
|
@ -301,7 +301,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|||
*/
|
||||
struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
||||
/**
|
||||
* @brief The maximum size of the virtual memory pool.
|
||||
* @brief The maximum size of the virtual memory pool (32 GB).
|
||||
*/
|
||||
size_t max_size;
|
||||
|
||||
|
@ -483,12 +483,14 @@ std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
|
|||
*/
|
||||
struct ggml_backend_cann_buffer_context {
|
||||
int32_t device; ///< The device ID associated with this buffer context.
|
||||
void* dev_ptr = nullptr;
|
||||
void* dev_ptr =
|
||||
nullptr; ///< Pointer to the device memory allocated for the buffer.
|
||||
|
||||
/**
|
||||
* @brief Constructor to initialize the CANN buffer context.
|
||||
*
|
||||
* @param device The device ID associated with this buffer context.
|
||||
* @param dev_ptr Pointer to the device memory allocated for the buffer.
|
||||
*/
|
||||
ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr)
|
||||
: device(device),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue