restore some modifications
This commit is contained in:
parent
cf6b987be3
commit
e05a398fb3
2 changed files with 11 additions and 10 deletions
|
@ -2427,6 +2427,7 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
|
||||||
aclTensor* acl_weight, aclTensor* acl_dst) {
|
aclTensor* acl_weight, aclTensor* acl_dst) {
|
||||||
int8_t cube_math_type = 1; // ALLOW_FP32_DOWN_PRECISION, when input is
|
int8_t cube_math_type = 1; // ALLOW_FP32_DOWN_PRECISION, when input is
|
||||||
// fp32, atlas a2 will transpose it to HFLOAT32.
|
// fp32, atlas a2 will transpose it to HFLOAT32.
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
uint64_t workspaceSize = 0;
|
||||||
aclOpExecutor* executor;
|
aclOpExecutor* executor;
|
||||||
void* workspaceAddr = nullptr;
|
void* workspaceAddr = nullptr;
|
||||||
|
@ -2617,8 +2618,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
||||||
size_t weight_stride = src0->ne[1] * src0->ne[0] * weight_elem_size;
|
size_t weight_stride = src0->ne[1] * src0->ne[0] * weight_elem_size;
|
||||||
size_t weight_size = weight_stride * src0->ne[2] * src0->ne[3];
|
size_t weight_size = weight_stride * src0->ne[2] * src0->ne[3];
|
||||||
|
|
||||||
// scale stored at the end of weight.
|
// scale stored at the end of weight. Also need transpose.
|
||||||
// scale need transpose.
|
|
||||||
size_t scale_elem_size = sizeof(uint16_t);
|
size_t scale_elem_size = sizeof(uint16_t);
|
||||||
size_t scale_nb[] = {src0->ne[0] / QK8_0 * scale_elem_size, scale_elem_size};
|
size_t scale_nb[] = {src0->ne[0] / QK8_0 * scale_elem_size, scale_elem_size};
|
||||||
size_t scale_stride = src0->ne[1] * src0->ne[0] / QK8_0 * scale_elem_size;
|
size_t scale_stride = src0->ne[1] * src0->ne[0] / QK8_0 * scale_elem_size;
|
||||||
|
@ -2677,8 +2677,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
||||||
int64_t batch0 = (n0 * src0->ne[2]) + c0;
|
int64_t batch0 = (n0 * src0->ne[2]) + c0;
|
||||||
|
|
||||||
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
|
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
|
||||||
(char*)input_buffer + batch1 * input_stride,
|
(char*)input_buffer + batch1 * input_stride, ACL_FLOAT16,
|
||||||
ACL_FLOAT16,
|
|
||||||
input_elem_size, input_ne, input_nb, 2);
|
input_elem_size, input_ne, input_nb, 2);
|
||||||
|
|
||||||
// first split
|
// first split
|
||||||
|
|
|
@ -301,7 +301,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
||||||
*/
|
*/
|
||||||
struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
||||||
/**
|
/**
|
||||||
* @brief The maximum size of the virtual memory pool.
|
* @brief The maximum size of the virtual memory pool (32 GB).
|
||||||
*/
|
*/
|
||||||
size_t max_size;
|
size_t max_size;
|
||||||
|
|
||||||
|
@ -483,12 +483,14 @@ std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
|
||||||
*/
|
*/
|
||||||
struct ggml_backend_cann_buffer_context {
|
struct ggml_backend_cann_buffer_context {
|
||||||
int32_t device; ///< The device ID associated with this buffer context.
|
int32_t device; ///< The device ID associated with this buffer context.
|
||||||
void* dev_ptr = nullptr;
|
void* dev_ptr =
|
||||||
|
nullptr; ///< Pointer to the device memory allocated for the buffer.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Constructor to initialize the CANN buffer context.
|
* @brief Constructor to initialize the CANN buffer context.
|
||||||
*
|
*
|
||||||
* @param device The device ID associated with this buffer context.
|
* @param device The device ID associated with this buffer context.
|
||||||
|
* @param dev_ptr Pointer to the device memory allocated for the buffer.
|
||||||
*/
|
*/
|
||||||
ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr)
|
ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr)
|
||||||
: device(device),
|
: device(device),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue