Merge branch 'master' into finetune-lora

This commit is contained in:
xaedes 2023-08-30 13:28:29 +02:00
commit b1709f2d25
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1
6 changed files with 137 additions and 190 deletions

View file

@ -729,8 +729,6 @@ python3 convert.py pygmalion-7b/ --outtype q4_1
- [LLaMA 2 7B chat](https://huggingface.co/TheBloke/Llama-2-7B-chat-GGML) - [LLaMA 2 7B chat](https://huggingface.co/TheBloke/Llama-2-7B-chat-GGML)
- [LLaMA 2 13B chat](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML) - [LLaMA 2 13B chat](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML)
- [LLaMA 2 70B chat](https://huggingface.co/TheBloke/Llama-2-70B-chat-GGML) - [LLaMA 2 70B chat](https://huggingface.co/TheBloke/Llama-2-70B-chat-GGML)
- Specify `-eps 1e-5` for best generation quality
- Specify `-gqa 8` for 70B models to work
### Verifying the model files ### Verifying the model files

View file

@ -142,6 +142,14 @@ results_perplexity perplexity_v2(llama_context * ctx, const gpt_params & params)
fprintf(stderr, "%s: tokenizing the input ..\n", __func__); fprintf(stderr, "%s: tokenizing the input ..\n", __func__);
std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos); std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos);
if (int(tokens.size()) < 2*params.n_ctx) {
fprintf(stderr, "%s: you need at least %d tokens to evaluate perplexity with a context of %d\n",__func__,2*params.n_ctx,
params.n_ctx);
fprintf(stderr, "%s: the data file you provided tokenizes to only %zu tokens\n",__func__,tokens.size());
return {std::move(tokens), 0., {}, {}};
}
std::vector<float> logit_history; std::vector<float> logit_history;
std::vector<float> prob_history; std::vector<float> prob_history;
@ -274,6 +282,13 @@ results_perplexity perplexity(llama_context * ctx, const gpt_params & params) {
auto tim2 = std::chrono::high_resolution_clock::now(); auto tim2 = std::chrono::high_resolution_clock::now();
fprintf(stderr, "%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count()); fprintf(stderr, "%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
if (int(tokens.size()) < 2*params.n_ctx) {
fprintf(stderr, "%s: you need at least %d tokens to evaluate perplexity with a context of %d\n",__func__,2*params.n_ctx,
params.n_ctx);
fprintf(stderr, "%s: the data file you provided tokenizes to only %zu tokens\n",__func__,tokens.size());
return {std::move(tokens), 0., {}, {}};
}
std::vector<float> logit_history; std::vector<float> logit_history;
logit_history.resize(tokens.size()); logit_history.resize(tokens.size());

View file

@ -321,8 +321,7 @@ bool ggml_allocr_is_measure(struct ggml_allocr * alloc) {
//////////// compute graph allocator //////////// compute graph allocator
static bool ggml_is_view(struct ggml_tensor * t) { static bool ggml_is_view(struct ggml_tensor * t) {
return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE || return t->view_src != NULL;
t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
} }
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) { static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
@ -340,28 +339,6 @@ static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml
return true; return true;
} }
static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
switch (t->op) {
case GGML_OP_PERMUTE:
case GGML_OP_RESHAPE:
case GGML_OP_TRANSPOSE:
case GGML_OP_VIEW:
return t->src[0];
case GGML_OP_CPY:
return t->src[1];
default:
return NULL;
}
}
static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
struct ggml_tensor * parent = t;
do {
parent = get_view_parent(parent);
} while (ggml_is_view(parent));
return parent;
}
static bool ggml_op_can_inplace(enum ggml_op op) { static bool ggml_op_can_inplace(enum ggml_op op) {
switch (op) { switch (op) {
case GGML_OP_SCALE: case GGML_OP_SCALE:
@ -369,7 +346,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
case GGML_OP_DIAG_MASK_INF: case GGML_OP_DIAG_MASK_INF:
case GGML_OP_ADD: case GGML_OP_ADD:
case GGML_OP_ADD1: case GGML_OP_ADD1:
case GGML_OP_ACC:
case GGML_OP_SUB: case GGML_OP_SUB:
case GGML_OP_MUL: case GGML_OP_MUL:
case GGML_OP_DIV: case GGML_OP_DIV:
@ -379,7 +355,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
case GGML_OP_UNARY: case GGML_OP_UNARY:
case GGML_OP_ROPE: case GGML_OP_ROPE:
case GGML_OP_RMS_NORM: case GGML_OP_RMS_NORM:
case GGML_OP_SET:
case GGML_OP_SOFT_MAX: case GGML_OP_SOFT_MAX:
case GGML_OP_CONT: case GGML_OP_CONT:
return true; return true;
@ -393,24 +368,8 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
struct hash_node * ht = alloc->hash_table; struct hash_node * ht = alloc->hash_table;
if (node->data == NULL) { if (node->data == NULL) {
if (ggml_is_view(node)) { if (ggml_is_view(node)) {
size_t offset; assert(node->view_src->data != NULL);
switch(node->op) { node->data = (char *)node->view_src->data + node->view_offs;
case GGML_OP_VIEW:
memcpy(&offset, node->op_params, sizeof(size_t));
node->data = (char *) node->src[0]->data + offset;
break;
case GGML_OP_PERMUTE:
case GGML_OP_RESHAPE:
case GGML_OP_TRANSPOSE:
node->data = node->src[0]->data;
break;
case GGML_OP_CPY:
node->data = node->src[1]->data;
break;
default:
GGML_ASSERT(!"unknown view op");
break;
}
} else { } else {
// see if we can reuse a parent's buffer (inplace) // see if we can reuse a parent's buffer (inplace)
if (ggml_op_can_inplace(node->op)) { if (ggml_op_can_inplace(node->op)) {
@ -430,7 +389,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
struct hash_node * p_hn = hash_get(ht, parent); struct hash_node * p_hn = hash_get(ht, parent);
if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) { if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) {
if (ggml_is_view(parent)) { if (ggml_is_view(parent)) {
struct ggml_tensor * view_src = get_view_source(parent); struct ggml_tensor * view_src = parent->view_src;
struct hash_node * view_src_hn = hash_get(ht, view_src); struct hash_node * view_src_hn = hash_get(ht, view_src);
if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) { if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) {
// TODO: the offset of the view parent must be kept to ensure that the op doesn't overwrite // TODO: the offset of the view parent must be kept to ensure that the op doesn't overwrite
@ -472,7 +431,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
struct ggml_tensor * node = gf->nodes[i]; struct ggml_tensor * node = gf->nodes[i];
if (ggml_is_view(node)) { if (ggml_is_view(node)) {
struct ggml_tensor * view_src = get_view_source(node); struct ggml_tensor * view_src = node->view_src;
hash_get(ht, view_src)->n_views += 1; hash_get(ht, view_src)->n_views += 1;
} }
@ -557,7 +516,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
if (p_hn->n_children == 0 && p_hn->n_views == 0) { if (p_hn->n_children == 0 && p_hn->n_views == 0) {
if (ggml_is_view(parent)) { if (ggml_is_view(parent)) {
struct ggml_tensor * view_src = get_view_source(parent); struct ggml_tensor * view_src = parent->view_src;
struct hash_node * view_src_hn = hash_get(ht, view_src); struct hash_node * view_src_hn = hash_get(ht, view_src);
view_src_hn->n_views -= 1; view_src_hn->n_views -= 1;
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views); AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views);

201
ggml.c
View file

@ -4104,16 +4104,11 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) {
} }
size_t ggml_nbytes(const struct ggml_tensor * tensor) { size_t ggml_nbytes(const struct ggml_tensor * tensor) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); size_t nbytes = tensor->ne[0]*tensor->nb[0]/ggml_blck_size(tensor->type);
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
// this should handle cases where the tensor is not contiguous in memory nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
// probaby just: }
// return nbytes;
// return tensor->ne[3]*tensor->nb[3]
//
// is enough, but just in case, adding the second part
return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*ggml_type_size(tensor->type))/ggml_blck_size(tensor->type));
} }
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) { size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
@ -4567,20 +4562,33 @@ static struct ggml_tensor * ggml_new_tensor_impl(
enum ggml_type type, enum ggml_type type,
int n_dims, int n_dims,
const int64_t * ne, const int64_t * ne,
void * data) { struct ggml_tensor * view_src,
size_t view_offs) {
assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS); assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
size_t data_size = 0; // find the base tensor and absolute offset
if (view_src != NULL && view_src->view_src != NULL) {
view_offs += view_src->view_offs;
view_src = view_src->view_src;
}
if (data == NULL && !ctx->no_alloc) { size_t data_size = ggml_type_size(type)*(ne[0]/ggml_blck_size(type));
data_size += ggml_type_size(type)*(ne[0]/ggml_blck_size(type));
for (int i = 1; i < n_dims; i++) { for (int i = 1; i < n_dims; i++) {
data_size *= ne[i]; data_size *= ne[i];
} }
GGML_ASSERT(view_src == NULL || data_size + view_offs <= ggml_nbytes(view_src));
void * data = view_src != NULL ? view_src->data : NULL;
if (data != NULL) {
data = (char *) data + view_offs;
} }
if (ctx->scratch.data != NULL && data == NULL) { size_t obj_alloc_size = 0;
if (view_src == NULL && ctx->no_alloc == false) {
if (ctx->scratch.data != NULL) {
// allocate tensor data in the scratch buffer // allocate tensor data in the scratch buffer
if (ctx->scratch.offs + data_size > ctx->scratch.size) { if (ctx->scratch.offs + data_size > ctx->scratch.size) {
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n", GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
@ -4592,11 +4600,13 @@ static struct ggml_tensor * ggml_new_tensor_impl(
data = (char * const) ctx->scratch.data + ctx->scratch.offs; data = (char * const) ctx->scratch.data + ctx->scratch.offs;
ctx->scratch.offs += data_size; ctx->scratch.offs += data_size;
} else {
data_size = 0; // allocate tensor data in the context's memory pool
obj_alloc_size = data_size;
}
} }
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size); struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
@ -4616,7 +4626,9 @@ static struct ggml_tensor * ggml_new_tensor_impl(
/*.perf_runs =*/ 0, /*.perf_runs =*/ 0,
/*.perf_cycles =*/ 0, /*.perf_cycles =*/ 0,
/*.perf_time_us =*/ 0, /*.perf_time_us =*/ 0,
/*.data =*/ (data == NULL && !ctx->no_alloc) ? (void *)(result + 1) : data, /*.view_src =*/ view_src,
/*.view_offs =*/ view_offs,
/*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
/*.name =*/ { 0 }, /*.name =*/ { 0 },
/*.extra =*/ NULL, /*.extra =*/ NULL,
/*.padding =*/ { 0 }, /*.padding =*/ { 0 },
@ -4640,28 +4652,12 @@ static struct ggml_tensor * ggml_new_tensor_impl(
return result; return result;
} }
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
assert(params_size <= GGML_MAX_OP_PARAMS);
memcpy(tensor->op_params, params, params_size);
}
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
return ((const int32_t *)(tensor->op_params))[i];
}
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
((int32_t *)(tensor->op_params))[i] = value;
}
struct ggml_tensor * ggml_new_tensor( struct ggml_tensor * ggml_new_tensor(
struct ggml_context * ctx, struct ggml_context * ctx,
enum ggml_type type, enum ggml_type type,
int n_dims, int n_dims,
const int64_t * ne) { const int64_t * ne) {
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL); return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
} }
struct ggml_tensor * ggml_new_tensor_1d( struct ggml_tensor * ggml_new_tensor_1d(
@ -4726,7 +4722,23 @@ struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
} }
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) { struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
return ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, NULL); return ggml_new_tensor(ctx, src->type, src->n_dims, src->ne);
}
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
assert(params_size <= GGML_MAX_OP_PARAMS);
memcpy(tensor->op_params, params, params_size);
}
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
return ((const int32_t *)(tensor->op_params))[i];
}
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
((int32_t *)(tensor->op_params))[i] = value;
} }
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) { struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
@ -5183,14 +5195,13 @@ struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char *
struct ggml_tensor * ggml_view_tensor( struct ggml_tensor * ggml_view_tensor(
struct ggml_context * ctx, struct ggml_context * ctx,
const struct ggml_tensor * src) { struct ggml_tensor * src) {
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src->data); struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src, 0);
ggml_format_name(result, "%s (view)", src->name); ggml_format_name(result, "%s (view)", src->name);
result->nb[0] = src->nb[0]; for (int i = 0; i < GGML_MAX_DIMS; i++) {
result->nb[1] = src->nb[1]; result->nb[i] = src->nb[i];
result->nb[2] = src->nb[2]; }
result->nb[3] = src->nb[3];
return result; return result;
} }
@ -5799,7 +5810,7 @@ struct ggml_tensor * ggml_repeat_back(
// ggml_concat // ggml_concat
struct ggml_tensor* ggml_concat( struct ggml_tensor * ggml_concat(
struct ggml_context* ctx, struct ggml_context* ctx,
struct ggml_tensor* a, struct ggml_tensor* a,
struct ggml_tensor* b) { struct ggml_tensor* b) {
@ -6408,7 +6419,7 @@ struct ggml_tensor * ggml_reshape(
//GGML_ASSERT(false); //GGML_ASSERT(false);
} }
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a->data); struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a, 0);
ggml_format_name(result, "%s (reshaped)", a->name); ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE; result->op = GGML_OP_RESHAPE;
@ -6432,7 +6443,7 @@ struct ggml_tensor * ggml_reshape_1d(
} }
const int64_t ne[1] = { ne0 }; const int64_t ne[1] = { ne0 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a->data); struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
ggml_format_name(result, "%s (reshaped)", a->name); ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE; result->op = GGML_OP_RESHAPE;
@ -6457,7 +6468,7 @@ struct ggml_tensor * ggml_reshape_2d(
} }
const int64_t ne[2] = { ne0, ne1 }; const int64_t ne[2] = { ne0, ne1 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a->data); struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
ggml_format_name(result, "%s (reshaped)", a->name); ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE; result->op = GGML_OP_RESHAPE;
@ -6483,7 +6494,7 @@ struct ggml_tensor * ggml_reshape_3d(
} }
const int64_t ne[3] = { ne0, ne1, ne2 }; const int64_t ne[3] = { ne0, ne1, ne2 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a->data); struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
ggml_format_name(result, "%s (reshaped)", a->name); ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE; result->op = GGML_OP_RESHAPE;
@ -6493,7 +6504,6 @@ struct ggml_tensor * ggml_reshape_3d(
return result; return result;
} }
struct ggml_tensor * ggml_reshape_4d( struct ggml_tensor * ggml_reshape_4d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
@ -6511,7 +6521,7 @@ struct ggml_tensor * ggml_reshape_4d(
} }
const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a->data); struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
ggml_format_name(result, "%s (reshaped)", a->name); ggml_format_name(result, "%s (reshaped)", a->name);
result->op = GGML_OP_RESHAPE; result->op = GGML_OP_RESHAPE;
@ -6521,34 +6531,12 @@ struct ggml_tensor * ggml_reshape_4d(
return result; return result;
} }
// ggml_view_1d static struct ggml_tensor * ggml_view_impl(
static struct ggml_tensor * ggml_view_tensor_offset(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
int n_dims, int n_dims,
const int64_t * ne, const int64_t * ne,
size_t offset) { size_t offset) {
// don't calculate an offset from an unallocated tensor
void * data = NULL;
if (a->data != NULL) {
data = (char *) a->data + offset;
}
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, data);
ggml_format_name(result, "%s (view)", a->name);
ggml_set_op_params(result, &offset, sizeof(offset));
return result;
}
struct ggml_tensor * ggml_view_1d(
struct ggml_context * ctx,
struct ggml_tensor * a,
int64_t ne0,
size_t offset) {
bool is_node = false; bool is_node = false;
@ -6556,7 +6544,10 @@ struct ggml_tensor * ggml_view_1d(
is_node = true; is_node = true;
} }
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset); struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
ggml_format_name(result, "%s (view)", a->name);
ggml_set_op_params(result, &offset, sizeof(offset));
result->op = GGML_OP_VIEW; result->op = GGML_OP_VIEW;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6565,6 +6556,19 @@ struct ggml_tensor * ggml_view_1d(
return result; return result;
} }
// ggml_view_1d
struct ggml_tensor * ggml_view_1d(
struct ggml_context * ctx,
struct ggml_tensor * a,
int64_t ne0,
size_t offset) {
struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
return result;
}
// ggml_view_2d // ggml_view_2d
struct ggml_tensor * ggml_view_2d( struct ggml_tensor * ggml_view_2d(
@ -6575,24 +6579,14 @@ struct ggml_tensor * ggml_view_2d(
size_t nb1, size_t nb1,
size_t offset) { size_t offset) {
bool is_node = false; const int64_t ne[2] = { ne0, ne1 };
if (a->grad) { struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
is_node = true;
}
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 2, ne, offset);
result->nb[1] = nb1; result->nb[1] = nb1;
result->nb[2] = result->nb[1]*ne1; result->nb[2] = result->nb[1]*ne1;
result->nb[3] = result->nb[2]; result->nb[3] = result->nb[2];
result->op = GGML_OP_VIEW;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
return result; return result;
} }
@ -6608,24 +6602,14 @@ struct ggml_tensor * ggml_view_3d(
size_t nb2, size_t nb2,
size_t offset) { size_t offset) {
bool is_node = false; const int64_t ne[3] = { ne0, ne1, ne2 };
if (a->grad) { struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
is_node = true;
}
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 3, ne, offset);
result->nb[1] = nb1; result->nb[1] = nb1;
result->nb[2] = nb2; result->nb[2] = nb2;
result->nb[3] = result->nb[2]*ne2; result->nb[3] = result->nb[2]*ne2;
result->op = GGML_OP_VIEW;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
return result; return result;
} }
@ -6643,24 +6627,14 @@ struct ggml_tensor * ggml_view_4d(
size_t nb3, size_t nb3,
size_t offset) { size_t offset) {
bool is_node = false; const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
if (a->grad) { struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
is_node = true;
}
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 4, ne, offset);
result->nb[1] = nb1; result->nb[1] = nb1;
result->nb[2] = nb2; result->nb[2] = nb2;
result->nb[3] = nb3; result->nb[3] = nb3;
result->op = GGML_OP_VIEW;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
return result; return result;
} }
@ -6846,7 +6820,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
int32_t params[] = { n_past, inplace ? 1 : 0 }; int32_t params[] = { n_past };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_DIAG_MASK_INF; result->op = GGML_OP_DIAG_MASK_INF;
@ -6863,7 +6837,6 @@ struct ggml_tensor * ggml_diag_mask_inf(
return ggml_diag_mask_inf_impl(ctx, a, n_past, false); return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
} }
struct ggml_tensor * ggml_diag_mask_inf_inplace( struct ggml_tensor * ggml_diag_mask_inf_inplace(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
@ -6886,7 +6859,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
int32_t params[] = { n_past, inplace ? 1 : 0 }; int32_t params[] = { n_past };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_DIAG_MASK_ZERO; result->op = GGML_OP_DIAG_MASK_ZERO;
@ -12306,7 +12279,7 @@ static void ggml_compute_forward_diag_mask_f32(
const int nth = params->nth; const int nth = params->nth;
const int n_past = ((int32_t *) dst->op_params)[0]; const int n_past = ((int32_t *) dst->op_params)[0];
const bool inplace = (bool)((int32_t *) dst->op_params)[1]; const bool inplace = src0->data == dst->data;
GGML_ASSERT(n_past >= 0); GGML_ASSERT(n_past >= 0);

5
ggml.h
View file

@ -479,6 +479,9 @@ extern "C" {
int64_t perf_cycles; int64_t perf_cycles;
int64_t perf_time_us; int64_t perf_time_us;
struct ggml_tensor * view_src;
size_t view_offs;
void * data; void * data;
char name[GGML_MAX_NAME]; char name[GGML_MAX_NAME];
@ -663,7 +666,7 @@ extern "C" {
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value); GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src); GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src); GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);

View file

@ -3211,7 +3211,7 @@ private:
struct llm_bigram_bpe { struct llm_bigram_bpe {
struct comparator { struct comparator {
bool operator()(llm_bigram_bpe & l, llm_bigram_bpe & r) { bool operator()(const llm_bigram_bpe & l, const llm_bigram_bpe & r) const {
return l.rank > r.rank || (l.rank == r.rank && l.left > r.left); return l.rank > r.rank || (l.rank == r.rank && l.left > r.left);
} }
}; };
@ -3359,23 +3359,22 @@ private:
} }
// probably not 100% correct // probably not 100% correct
// TODO: this is quite slow - how to make it more efficient? static std::vector<std::string> bpe_gpt2_preprocess(const std::string & text) {
static std::vector<std::string> bpe_gpt2_preprocess(std::string text) {
std::vector<std::string> words; std::vector<std::string> words;
// ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53 // ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53
const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"; const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
const std::regex re(pattern); const std::regex re(pattern);
std::smatch m;
while (std::regex_search(text, m, re)) { auto words_begin = std::sregex_iterator(text.begin(), text.end(), re);
for (auto x : m) { auto words_end = std::sregex_iterator();
words.push_back(x); auto n_words = std::distance(words_begin, words_end);
words.reserve(n_words);
for (auto it = words_begin; it != words_end; ++it) {
words.push_back(it->str());
} }
text = m.suffix();
}
return words; return words;
} }
const llama_vocab & vocab; const llama_vocab & vocab;