fix: trailing whitespace
This commit is contained in:
parent
204d08be3d
commit
95da79e740
2 changed files with 18 additions and 24 deletions
|
@ -233,7 +233,6 @@ struct clip_ctx {
|
||||||
struct gguf_context * ctx_gguf;
|
struct gguf_context * ctx_gguf;
|
||||||
//struct clip_buffer buf_compute;
|
//struct clip_buffer buf_compute;
|
||||||
|
|
||||||
|
|
||||||
// reusable buffer for `struct ggml_graph_plan.work_data`
|
// reusable buffer for `struct ggml_graph_plan.work_data`
|
||||||
std::vector<uint8_t> work_buffer;
|
std::vector<uint8_t> work_buffer;
|
||||||
|
|
||||||
|
@ -285,7 +284,6 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
|
||||||
struct ggml_context * ctx0 = ggml_init(params);
|
struct ggml_context * ctx0 = ggml_init(params);
|
||||||
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
|
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
|
||||||
|
|
||||||
|
|
||||||
struct ggml_tensor * inp_raw = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, image_size, image_size, 3, batch_size);
|
struct ggml_tensor * inp_raw = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, image_size, image_size, 3, batch_size);
|
||||||
ggml_allocr_alloc(ctx->alloc, inp_raw);
|
ggml_allocr_alloc(ctx->alloc, inp_raw);
|
||||||
|
|
||||||
|
@ -440,7 +438,7 @@ if (!ggml_allocr_is_measure(ctx->alloc)) {
|
||||||
|
|
||||||
if (ctx->has_llava_projector) {
|
if (ctx->has_llava_projector) {
|
||||||
embeddings = ggml_reshape_2d(ctx0, embeddings, embeddings->ne[0], embeddings->ne[1]);
|
embeddings = ggml_reshape_2d(ctx0, embeddings, embeddings->ne[0], embeddings->ne[1]);
|
||||||
|
|
||||||
struct ggml_tensor * patches = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_patches);
|
struct ggml_tensor * patches = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_patches);
|
||||||
ggml_allocr_alloc(ctx->alloc, patches);
|
ggml_allocr_alloc(ctx->alloc, patches);
|
||||||
if (!ggml_allocr_is_measure(ctx->alloc)) {
|
if (!ggml_allocr_is_measure(ctx->alloc)) {
|
||||||
|
@ -448,20 +446,20 @@ if (!ggml_allocr_is_measure(ctx->alloc)) {
|
||||||
ggml_set_i32_1d(patches, i, i+1);
|
ggml_set_i32_1d(patches, i, i+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
embeddings = ggml_get_rows(ctx0, embeddings, patches);
|
embeddings = ggml_get_rows(ctx0, embeddings, patches);
|
||||||
|
|
||||||
// mm projection 0
|
// mm projection 0
|
||||||
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
||||||
embeddings = ggml_add(ctx0, ggml_repeat(ctx0, model.mm_0_b, embeddings), embeddings);
|
embeddings = ggml_add(ctx0, ggml_repeat(ctx0, model.mm_0_b, embeddings), embeddings);
|
||||||
|
|
||||||
embeddings = ggml_gelu(ctx0, embeddings);
|
embeddings = ggml_gelu(ctx0, embeddings);
|
||||||
|
|
||||||
embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
||||||
embeddings = ggml_add(ctx0, ggml_repeat(ctx0, model.mm_2_b, embeddings), embeddings);
|
embeddings = ggml_add(ctx0, ggml_repeat(ctx0, model.mm_2_b, embeddings), embeddings);
|
||||||
|
|
||||||
ggml_set_name(embeddings, "check");
|
ggml_set_name(embeddings, "check");
|
||||||
}
|
}
|
||||||
|
|
||||||
// build the graph
|
// build the graph
|
||||||
ggml_build_forward_expand(gf, embeddings);
|
ggml_build_forward_expand(gf, embeddings);
|
||||||
|
@ -680,7 +678,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
ggml_free(meta);
|
ggml_free(meta);
|
||||||
|
|
||||||
new_clip->ctx_gguf = ctx;
|
new_clip->ctx_gguf = ctx;
|
||||||
|
|
||||||
// measure mem requirement and allocate
|
// measure mem requirement and allocate
|
||||||
{
|
{
|
||||||
static const size_t tensor_alignment = 32;
|
static const size_t tensor_alignment = 32;
|
||||||
|
@ -919,7 +917,7 @@ struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1];
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
free(plan.work_data);
|
free(plan.work_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
static bool eval_image_embd(llama_context * ctx_llama, float * embd, int N, int n_batch, int * n_past) {
|
static bool eval_image_embd(llama_context * ctx_llama, float * embd, int N, int n_batch, int * n_past) {
|
||||||
int n_embd = llama_n_embd(llama_get_model(ctx_llama));
|
int n_embd = llama_n_embd(llama_get_model(ctx_llama));
|
||||||
|
|
||||||
for (int i = 0; i < N; i += n_batch) {
|
for (int i = 0; i < N; i += n_batch) {
|
||||||
int n_eval = N - i;
|
int n_eval = N - i;
|
||||||
if (n_eval > n_batch) {
|
if (n_eval > n_batch) {
|
||||||
|
@ -144,16 +144,13 @@ const char * sample(struct llama_context * ctx_llama, gpt_params & params, int *
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
|
|
||||||
if (argc < 3) {
|
if (argc < 4) {
|
||||||
printf("usage: %s <path/to/llava-rlhf-qe_k.gguf> <path/to/llava-encoder-f16.gguf> [path/to/an/image.jpg] [a text prompt]\n", argv[0]);
|
printf("usage: %s <path/to/llava-v1.5/ggml-model-f16.gguf> <path/to/llava-v1.5/llava-encoder-f16.gguf> <path/to/an/image.jpg> [a text prompt]\n", argv[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
params.model = argv[1];
|
params.model = argv[1];
|
||||||
const char * clip_path = argv[2];
|
const char * clip_path = argv[2];
|
||||||
const char * img_path;
|
const char * img_path = argv[3];
|
||||||
if (argc >= 4) {
|
|
||||||
img_path = argv[3];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (argc >= 5) {
|
if (argc >= 5) {
|
||||||
params.prompt = argv[4];
|
params.prompt = argv[4];
|
||||||
|
@ -162,9 +159,8 @@ int main(int argc, char ** argv) {
|
||||||
if (params.prompt.empty()) {
|
if (params.prompt.empty()) {
|
||||||
params.prompt = "describe the image in detail.";
|
params.prompt = "describe the image in detail.";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto ctx_clip = clip_model_load(clip_path, 1);
|
||||||
auto ctx_clip = clip_model_load(clip_path, 3);
|
|
||||||
clip_image_u8 img;
|
clip_image_u8 img;
|
||||||
clip_image_f32 img_res;
|
clip_image_f32 img_res;
|
||||||
clip_image_load_from_file(img_path, &img);
|
clip_image_load_from_file(img_path, &img);
|
||||||
|
@ -172,7 +168,7 @@ int main(int argc, char ** argv) {
|
||||||
float * vec = (float *)malloc(4096 * 576 * sizeof(float));
|
float * vec = (float *)malloc(4096 * 576 * sizeof(float));
|
||||||
clip_image_encode(ctx_clip, params.n_threads, &img_res, vec, false);
|
clip_image_encode(ctx_clip, params.n_threads, &img_res, vec, false);
|
||||||
clip_free(ctx_clip);
|
clip_free(ctx_clip);
|
||||||
|
|
||||||
llama_backend_init(params.numa);
|
llama_backend_init(params.numa);
|
||||||
|
|
||||||
llama_model_params model_params = llama_model_default_params();
|
llama_model_params model_params = llama_model_default_params();
|
||||||
|
@ -182,19 +178,19 @@ int main(int argc, char ** argv) {
|
||||||
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
|
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_context_params ctx_params = llama_context_default_params();
|
llama_context_params ctx_params = llama_context_default_params();
|
||||||
ctx_params.seed = 1234;
|
ctx_params.seed = 1234;
|
||||||
ctx_params.n_ctx = 2048;
|
ctx_params.n_ctx = 2048;
|
||||||
ctx_params.n_threads = params.n_threads;
|
ctx_params.n_threads = params.n_threads;
|
||||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||||
llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
|
llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
|
||||||
|
|
||||||
if (ctx_llama == NULL) {
|
if (ctx_llama == NULL) {
|
||||||
fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
|
fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int n_past = 0;
|
int n_past = 0;
|
||||||
int max_tgt_len = 256;
|
int max_tgt_len = 256;
|
||||||
eval_string(ctx_llama, "user: ", params.n_batch, &n_past);
|
eval_string(ctx_llama, "user: ", params.n_batch, &n_past);
|
||||||
|
@ -202,7 +198,7 @@ int main(int argc, char ** argv) {
|
||||||
eval_string(ctx_llama, params.prompt.c_str(), params.n_batch, &n_past);
|
eval_string(ctx_llama, params.prompt.c_str(), params.n_batch, &n_past);
|
||||||
eval_string(ctx_llama, "\nassistant:", params.n_batch, &n_past);
|
eval_string(ctx_llama, "\nassistant:", params.n_batch, &n_past);
|
||||||
printf("n_past = %d\n", n_past);
|
printf("n_past = %d\n", n_past);
|
||||||
|
|
||||||
const char* tmp;
|
const char* tmp;
|
||||||
for (int i=0; i<max_tgt_len; i++) {
|
for (int i=0; i<max_tgt_len; i++) {
|
||||||
tmp = sample(ctx_llama, params, &n_past);
|
tmp = sample(ctx_llama, params, &n_past);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue