Apply suggestions from code review
Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
parent
c111e8a5b2
commit
4973a298b6
1 changed files with 3 additions and 3 deletions
|
@ -479,7 +479,7 @@ static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, gg
|
||||||
|
|
||||||
// CUDA backend on the server pads everything to 512 due to CUDA limitations.
|
// CUDA backend on the server pads everything to 512 due to CUDA limitations.
|
||||||
// Due to bandwidth constraints, we only call the server init tensor functions if necessary.
|
// Due to bandwidth constraints, we only call the server init tensor functions if necessary.
|
||||||
// In particular, this is tensors with padding that needs to be cleared, so base tensors only and only misaligned.
|
// In particular, only quantized tensors need padding
|
||||||
if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0) && (tensor->view_src == nullptr)) {
|
if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0) && (tensor->view_src == nullptr)) {
|
||||||
rpc_msg_init_tensor_req request;
|
rpc_msg_init_tensor_req request;
|
||||||
|
|
||||||
|
@ -600,7 +600,7 @@ static size_t ggml_backend_rpc_get_max_size(ggml_backend_buffer_type_t buft) {
|
||||||
|
|
||||||
static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
||||||
// See comments in init_tensor.
|
// See comments in init_tensor.
|
||||||
if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0)) {
|
if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0) && (tensor->view_src == nullptr)) {
|
||||||
ggml_backend_rpc_buffer_type_context * buft_ctx = (ggml_backend_rpc_buffer_type_context *)buft->context;
|
ggml_backend_rpc_buffer_type_context * buft_ctx = (ggml_backend_rpc_buffer_type_context *)buft->context;
|
||||||
auto sock = get_socket(buft_ctx->endpoint);
|
auto sock = get_socket(buft_ctx->endpoint);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue