Merge remote-tracking branch 'origin/master' into tool-call

This commit is contained in:
Olivier Chafik 2025-01-21 13:44:58 +00:00
commit fec0260366
6 changed files with 190 additions and 165 deletions

View file

@ -345,8 +345,18 @@ struct lora_merge_ctx {
gf = ggml_new_graph(ctx0);
struct ggml_tensor * cur = inp_base;
for (size_t i = 0; i < adapters.size(); ++i) {
struct ggml_tensor * a_T = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32)));
struct ggml_tensor * delta = ggml_mul_mat(ctx0, a_T, ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32));
struct ggml_tensor * delta;
bool is_tok_embd = string_starts_with(name_base, "token_embd");
if (is_tok_embd) {
printf("%s : detected token embeddings tensor\n", __func__);
delta = ggml_mul_mat(ctx0,
ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32),
ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32));
} else {
delta = ggml_mul_mat(ctx0,
ggml_cont(ctx0, ggml_transpose(ctx0, ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32))),
ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32));
}
// scale
const float alpha = adapters[i]->alpha;
const float rank = (float) inp_b[i]->ne[0];

View file

@ -103,24 +103,26 @@
*
*/
#include <termios.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
# include "linenoise.h"
# include <ctype.h>
# include <errno.h>
# include <stdio.h>
# include <string.h>
# include <sys/file.h>
# include <sys/ioctl.h>
# include <sys/stat.h>
# include <sys/types.h>
#include <sys/ioctl.h>
# include <termios.h>
# include <unistd.h>
# include <memory>
# include <string>
# include <vector>
#include "linenoise.h"
# define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100
# define LINENOISE_MAX_LINE 4096
static std::vector<const char*> unsupported_term = {"dumb","cons25","emacs",nullptr};
static std::vector<const char *> unsupported_term = { "dumb", "cons25", "emacs" };
static linenoiseCompletionCallback *completionCallback = NULL;
static linenoiseHintsCallback *hintsCallback = NULL;
static linenoiseFreeHintsCallback *freeHintsCallback = NULL;
@ -166,21 +168,58 @@ int linenoiseHistoryAdd(const char *line);
#define REFRESH_ALL (REFRESH_CLEAN|REFRESH_WRITE) // Do both.
static void refreshLine(struct linenoiseState *l);
class File {
public:
FILE * file = nullptr;
FILE * open(const std::string & filename, const char * mode) {
file = fopen(filename.c_str(), mode);
return file;
}
int lock() {
if (file) {
fd = fileno(file);
if (flock(fd, LOCK_EX | LOCK_NB) != 0) {
fd = -1;
return 1;
}
}
return 0;
}
~File() {
if (fd >= 0) {
flock(fd, LOCK_UN);
}
if (file) {
fclose(file);
}
}
private:
int fd = -1;
};
__attribute__((format(printf, 1, 2)))
/* Debugging function. */
#if 0
static void lndebug(const char *fmt, ...) {
static FILE *lndebug_fp = NULL;
if (lndebug_fp == NULL) {
lndebug_fp = fopen("/tmp/lndebug.txt", "a");
static File file;
if (file.file == nullptr) {
file.open("/tmp/lndebug.txt", "a");
}
if (lndebug_fp != NULL) {
if (file.file != nullptr) {
va_list args;
va_start(args, fmt);
vfprintf(lndebug_fp, fmt, args);
vfprintf(file.file, fmt, args);
va_end(args);
fflush(lndebug_fp);
fflush(file.file);
}
}
#else
@ -213,8 +252,11 @@ void linenoiseSetMultiLine(int ml) {
static int isUnsupportedTerm(void) {
char *term = getenv("TERM");
if (term == NULL) return 0;
for (int j = 0; unsupported_term[j]; ++j)
if (!strcasecmp(term, unsupported_term[j])) return 1;
for (size_t j = 0; j < unsupported_term.size(); ++j) {
if (!strcasecmp(term, unsupported_term[j])) {
return 1;
}
}
return 0;
}
@ -334,17 +376,6 @@ static void linenoiseBeep(void) {
fflush(stderr);
}
/* ============================== Completion ================================ */
/* Free a list of completion option populated by linenoiseAddCompletion(). */
static void freeCompletions(linenoiseCompletions *lc) {
size_t i;
for (i = 0; i < lc->len; i++)
free(lc->cvec[i]);
if (lc->cvec != NULL)
free(lc->cvec);
}
/* Called by completeLine() and linenoiseShow() to render the current
* edited line with the proposed completion. If the current completion table
* is already available, it is passed as second argument, otherwise the
@ -353,7 +384,7 @@ static void freeCompletions(linenoiseCompletions *lc) {
* Flags are the same as refreshLine*(), that is REFRESH_* macros. */
static void refreshLineWithCompletion(struct linenoiseState *ls, linenoiseCompletions *lc, int flags) {
/* Obtain the table of completions if the caller didn't provide one. */
linenoiseCompletions ctable = { 0, NULL };
linenoiseCompletions ctable;
if (lc == NULL) {
completionCallback(ls->buf, &ctable);
lc = &ctable;
@ -372,8 +403,9 @@ static void refreshLineWithCompletion(struct linenoiseState *ls, linenoiseComple
refreshLineWithFlags(ls, flags);
}
/* Free the completions table if needed. */
if (lc != &ctable) freeCompletions(&ctable);
if (lc == &ctable) {
ctable.to_free = false;
}
}
/* This is an helper function for linenoiseEdit*() and is called when the
@ -391,7 +423,7 @@ static void refreshLineWithCompletion(struct linenoiseState *ls, linenoiseComple
* possible completions, and the caller should read for the next characters
* from stdin. */
static int completeLine(struct linenoiseState *ls, int keypressed) {
linenoiseCompletions lc = { 0, NULL };
linenoiseCompletions lc;
int nwritten;
char c = keypressed;
@ -420,8 +452,7 @@ static int completeLine(struct linenoiseState *ls, int keypressed) {
default:
/* Update buffer and return */
if (ls->completion_idx < lc.len) {
nwritten = snprintf(ls->buf,ls->buflen,"%s",
lc.cvec[ls->completion_idx]);
nwritten = snprintf(ls->buf, ls->buflen, "%s", lc.cvec[ls->completion_idx]);
ls->len = ls->pos = nwritten;
}
ls->in_completion = 0;
@ -436,7 +467,6 @@ static int completeLine(struct linenoiseState *ls, int keypressed) {
}
}
freeCompletions(&lc);
return c; /* Return last read character */
}
@ -462,53 +492,25 @@ void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *fn) {
* user typed <tab>. See the example.c source code for a very easy to
* understand example. */
void linenoiseAddCompletion(linenoiseCompletions *lc, const char *str) {
size_t len = strlen(str);
char *copy, **cvec;
copy = (char*) malloc(len + 1);
if (copy == NULL) return;
memcpy(copy,str,len+1);
cvec = (char**) realloc(lc->cvec,sizeof(char*)*(lc->len+1));
if (cvec == NULL) {
free(copy);
const size_t len = strlen(str);
auto copy = std::make_unique<char[]>(len + 1);
if (!copy) {
return;
}
memcpy(copy.get(), str, len + 1);
char ** cvec = static_cast<char **>(std::realloc(lc->cvec, sizeof(char *) * (lc->len + 1)));
if (cvec == nullptr) {
return;
}
lc->cvec = cvec;
lc->cvec[lc->len++] = copy;
}
/* =========================== Line editing ================================= */
/* We define a very simple "append buffer" structure, that is an heap
* allocated string where we can append to. This is useful in order to
* write all the escape sequences in a buffer and flush them to the standard
* output in a single call, to avoid flickering effects. */
struct abuf {
char *b;
int len;
};
static void abInit(struct abuf *ab) {
ab->b = NULL;
ab->len = 0;
}
static void abAppend(struct abuf *ab, const char *s, int len) {
char *new_ptr = (char*) realloc(ab->b,ab->len+len);
if (new_ptr == NULL) return;
memcpy(new_ptr+ab->len,s,len);
ab->b = new_ptr;
ab->len += len;
}
static void abFree(struct abuf *ab) {
free(ab->b);
lc->cvec[lc->len++] = copy.release();
}
/* Helper of refreshSingleLine() and refreshMultiLine() to show hints
* to the right of the prompt. */
static void refreshShowHints(struct abuf * ab, struct linenoiseState * l, int plen) {
static void refreshShowHints(std::string & ab, struct linenoiseState * l, int plen) {
char seq[64];
if (hintsCallback && plen+l->len < l->cols) {
int color = -1, bold = 0;
@ -522,10 +524,11 @@ static void refreshShowHints(struct abuf * ab, struct linenoiseState * l, int pl
snprintf(seq,64,"\033[%d;%d;49m",bold,color);
else
seq[0] = '\0';
abAppend(ab,seq,strlen(seq));
abAppend(ab,hint,hintlen);
ab.append(seq);
ab.append(hint, hintlen);
if (color != -1 || bold != 0)
abAppend(ab,"\033[0m",4);
ab.append("\033[0m");
/* Call the function to free the hint returned. */
if (freeHintsCallback) freeHintsCallback(hint);
}
@ -546,8 +549,7 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) {
char *buf = l->buf;
size_t len = l->len;
size_t pos = l->pos;
struct abuf ab;
std::string ab;
while((plen+pos) >= l->cols) {
buf++;
len--;
@ -557,35 +559,34 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) {
len--;
}
abInit(&ab);
/* Cursor to left edge */
snprintf(seq,sizeof(seq),"\r");
abAppend(&ab,seq,strlen(seq));
ab.append(seq);
if (flags & REFRESH_WRITE) {
/* Write the prompt and the current buffer content */
abAppend(&ab,l->prompt,strlen(l->prompt));
ab.append(l->prompt);
if (maskmode == 1) {
while (len--) abAppend(&ab,"*",1);
while (len--) {
ab.append("*");
}
} else {
abAppend(&ab,buf,len);
ab.append(buf, len);
}
/* Show hits if any. */
refreshShowHints(&ab,l,plen);
refreshShowHints(ab, l, plen);
}
/* Erase to right */
snprintf(seq,sizeof(seq),"\x1b[0K");
abAppend(&ab,seq,strlen(seq));
ab.append(seq);
if (flags & REFRESH_WRITE) {
/* Move cursor to original position. */
snprintf(seq,sizeof(seq),"\r\x1b[%dC", (int)(pos+plen));
abAppend(&ab,seq,strlen(seq));
ab.append(seq);
}
if (write(fd,ab.b,ab.len) == -1) {} /* Can't recover from write error. */
abFree(&ab);
(void) !write(fd, ab.c_str(), ab.size()); /* Can't recover from write error. */
}
/* Multi line low level line refresh.
@ -604,26 +605,23 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
int col; /* colum position, zero-based. */
int old_rows = l->oldrows;
int fd = l->ofd, j;
struct abuf ab;
std::string ab;
l->oldrows = rows;
/* First step: clear all the lines used before. To do so start by
* going to the last row. */
abInit(&ab);
if (flags & REFRESH_CLEAN) {
if (old_rows-rpos > 0) {
lndebug("go down %d", old_rows-rpos);
snprintf(seq,64,"\x1b[%dB", old_rows-rpos);
abAppend(&ab,seq,strlen(seq));
ab.append(seq);
}
/* Now for every row clear it, go up. */
for (j = 0; j < old_rows-1; j++) {
lndebug("clear+up");
snprintf(seq,64,"\r\x1b[0K\x1b[1A");
abAppend(&ab,seq,strlen(seq));
ab.append(seq);
}
}
@ -631,21 +629,22 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
/* Clean the top line. */
lndebug("clear");
snprintf(seq,64,"\r\x1b[0K");
abAppend(&ab,seq,strlen(seq));
ab.append(seq);
}
if (flags & REFRESH_WRITE) {
/* Write the prompt and the current buffer content */
abAppend(&ab,l->prompt,strlen(l->prompt));
ab.append(l->prompt);
if (maskmode == 1) {
unsigned int i;
for (i = 0; i < l->len; i++) abAppend(&ab,"*",1);
for (unsigned int i = 0; i < l->len; ++i) {
ab.append("*");
}
} else {
abAppend(&ab,l->buf,l->len);
ab.append(l->buf, l->len);
}
/* Show hits if any. */
refreshShowHints(&ab,l,plen);
refreshShowHints(ab, l, plen);
/* If we are at the very end of the screen with our prompt, we need to
* emit a newline and move the prompt to the first column. */
@ -654,9 +653,9 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
(l->pos+plen) % l->cols == 0)
{
lndebug("<newline>");
abAppend(&ab,"\n",1);
ab.append("\n");
snprintf(seq,64,"\r");
abAppend(&ab,seq,strlen(seq));
ab.append(seq);
rows++;
if (rows > (int)l->oldrows) l->oldrows = rows;
}
@ -669,7 +668,7 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
if (rows-rpos2 > 0) {
lndebug("go-up %d", rows-rpos2);
snprintf(seq,64,"\x1b[%dA", rows-rpos2);
abAppend(&ab,seq,strlen(seq));
ab.append(seq);
}
/* Set column. */
@ -679,14 +678,12 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
snprintf(seq,64,"\r\x1b[%dC", col);
else
snprintf(seq,64,"\r");
abAppend(&ab,seq,strlen(seq));
ab.append(seq);
}
lndebug("\n");
l->oldpos = l->pos;
if (write(fd,ab.b,ab.len) == -1) {} /* Can't recover from write error. */
abFree(&ab);
(void) !write(fd, ab.c_str(), ab.size()); /* Can't recover from write error. */
}
/* Calls the two low level functions refreshSingleLine() or
@ -1313,16 +1310,17 @@ int linenoiseHistorySetMaxLen(int len) {
* otherwise -1 is returned. */
int linenoiseHistorySave(const char *filename) {
mode_t old_umask = umask(S_IXUSR|S_IRWXG|S_IRWXO);
FILE *fp;
int j;
fp = fopen(filename,"w");
File file;
file.open(filename, "w");
umask(old_umask);
if (fp == NULL) return -1;
if (file.file == NULL) {
return -1;
}
chmod(filename,S_IRUSR|S_IWUSR);
for (j = 0; j < history_len; j++)
fprintf(fp,"%s\n",history[j]);
fclose(fp);
for (int j = 0; j < history_len; ++j) {
fprintf(file.file, "%s\n", history[j]);
}
return 0;
}
@ -1332,12 +1330,14 @@ int linenoiseHistorySave(const char *filename) {
* If the file exists and the operation succeeded 0 is returned, otherwise
* on error -1 is returned. */
int linenoiseHistoryLoad(const char *filename) {
FILE *fp = fopen(filename,"r");
File file;
file.open(filename, "r");
char buf[LINENOISE_MAX_LINE];
if (file.file == NULL) {
return -1;
}
if (fp == NULL) return -1;
while (fgets(buf,LINENOISE_MAX_LINE,fp) != NULL) {
while (fgets(buf, LINENOISE_MAX_LINE, file.file) != NULL) {
char *p;
p = strchr(buf,'\r');
@ -1345,7 +1345,6 @@ int linenoiseHistoryLoad(const char *filename) {
if (p) *p = '\0';
linenoiseHistoryAdd(buf);
}
fclose(fp);
return 0;
}
#endif

View file

@ -45,6 +45,7 @@ extern "C" {
#endif
#include <stddef.h> /* For size_t. */
#include <stdlib.h>
extern const char *linenoiseEditMore;
@ -69,10 +70,23 @@ struct linenoiseState {
int history_index; /* The history index we are currently editing. */
};
typedef struct linenoiseCompletions {
size_t len;
char **cvec;
} linenoiseCompletions;
struct linenoiseCompletions {
size_t len = 0;
char ** cvec = nullptr;
bool to_free = true;
~linenoiseCompletions() {
if (!to_free) {
return;
}
for (size_t i = 0; i < len; ++i) {
free(cvec[i]);
}
free(cvec);
}
};
/* Non blocking API. */
int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt);

View file

@ -3779,9 +3779,9 @@ int main(int argc, char ** argv) {
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
{ "total_slots", ctx_server.params_base.n_parallel },
{ "model_path", ctx_server.params_base.model },
{ "chat_template", ctx_server.chat_templates.template_default->source() },
{ "bos_token", ctx_server.chat_templates.template_default->bos_token() },
{ "eos_token", ctx_server.chat_templates.template_default->eos_token() },
{ "chat_template", ctx_server.chat_templates.template_default->source() },
{ "build_info", build_info },
};
if (ctx_server.params_base.use_jinja && ctx_server.chat_templates.template_tool_use) {

View file

@ -4416,7 +4416,6 @@ void kernel_mul_mv_q2_K_f32_impl(
device const half * dh = &x[ib].d;
for (int row = 0; row < N_DST; row++) {
float4 acc1 = {0.f, 0.f, 0.f, 0.f};
float4 acc2 = {0.f, 0.f, 0.f, 0.f};
for (int i = 0; i < 8; i += 2) {
@ -4447,7 +4446,7 @@ void kernel_mul_mv_q2_K_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < N_DST; ++row) {
for (int row = 0; row < N_DST && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum;
@ -4613,7 +4612,7 @@ void kernel_mul_mv_q3_K_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
if (tiisg == 0) {
for (int row = 0; row < 2; ++row) {
for (int row = 0; row < 2 && first_row + row < args.ne0; ++row) {
dst_f32[first_row + row] = sumf1[row];
}
}
@ -4729,7 +4728,7 @@ void kernel_mul_mv_q4_K_f32_impl(
device float * dst_f32 = (device float *) dst + (int64_t)im*args.ne0*args.ne1 + (int64_t)r1*args.ne0;
for (int row = 0; row < N_DST; ++row) {
for (int row = 0; row < N_DST && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum;
@ -4861,7 +4860,7 @@ void kernel_mul_mv_q5_K_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < 2; ++row) {
for (int row = 0; row < 2 && first_row + row < args.ne0; ++row) {
const float tot = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = tot;
@ -4906,6 +4905,10 @@ void kernel_mul_mv_q6_K_f32_impl(
const int row = 2*r0 + sgitg;
if (row >= args.ne0) {
return;
}
const uint i12 = im%args.ne12;
const uint i13 = im/args.ne12;
@ -5061,7 +5064,7 @@ void kernel_mul_mv_iq2_xxs_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < N_DST; ++row) {
for (int row = 0; row < N_DST && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum * 0.25f;
@ -5179,7 +5182,7 @@ void kernel_mul_mv_iq2_xs_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < N_DST; ++row) {
for (int row = 0; row < N_DST && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum * 0.25f;
@ -5289,7 +5292,7 @@ void kernel_mul_mv_iq3_xxs_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < N_DST; ++row) {
for (int row = 0; row < N_DST && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum * 0.5f;
@ -5401,7 +5404,7 @@ void kernel_mul_mv_iq3_s_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < N_DST; ++row) {
for (int row = 0; row < N_DST && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum;
@ -5514,7 +5517,7 @@ void kernel_mul_mv_iq2_s_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < N_DST; ++row) {
for (int row = 0; row < N_DST && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum * 0.25f;
@ -5614,7 +5617,7 @@ void kernel_mul_mv_iq1_s_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < N_DST; ++row) {
for (int row = 0; row < N_DST && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum;
@ -5709,7 +5712,7 @@ void kernel_mul_mv_iq1_m_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < N_DST; ++row) {
for (int row = 0; row < N_DST && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum;
@ -5799,7 +5802,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < 2 && first_row + row < args.ne01; ++row) {
for (int row = 0; row < 2 && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum;
@ -5888,7 +5891,7 @@ void kernel_mul_mv_iq4_xs_f32_impl(
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
for (int row = 0; row < 2; ++row) {
for (int row = 0; row < 2 && first_row + row < args.ne0; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst_f32[first_row + row] = all_sum;

View file

@ -181,7 +181,7 @@ struct ggml_backend_rpc_context {
struct ggml_backend_rpc_buffer_context {
std::shared_ptr<socket_t> sock;
std::unordered_map<ggml_backend_buffer_t, void *> base_cache;
void * base_ptr;
uint64_t remote_ptr;
};
@ -423,16 +423,15 @@ static void ggml_backend_rpc_buffer_free_buffer(ggml_backend_buffer_t buffer) {
static void * ggml_backend_rpc_buffer_get_base(ggml_backend_buffer_t buffer) {
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
if (ctx->base_cache.find(buffer) != ctx->base_cache.end()) {
return ctx->base_cache[buffer];
if (ctx->base_ptr != nullptr) {
return ctx->base_ptr;
}
rpc_msg_buffer_get_base_req request = {ctx->remote_ptr};
rpc_msg_buffer_get_base_rsp response;
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_BUFFER_GET_BASE, &request, sizeof(request), &response, sizeof(response));
GGML_ASSERT(status);
void * base_ptr = reinterpret_cast<void *>(response.base_ptr);
ctx->base_cache[buffer] = base_ptr;
return base_ptr;
ctx->base_ptr = reinterpret_cast<void *>(response.base_ptr);
return ctx->base_ptr;
}
static rpc_tensor serialize_tensor(const ggml_tensor * tensor) {
@ -557,7 +556,7 @@ static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer(ggml_back
if (response.remote_ptr != 0) {
ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft,
ggml_backend_rpc_buffer_interface,
new ggml_backend_rpc_buffer_context{sock, {}, response.remote_ptr},
new ggml_backend_rpc_buffer_context{sock, nullptr, response.remote_ptr},
response.remote_size);
return buffer;
} else {