another push
This commit is contained in:
parent
f70fdf5a86
commit
6c1f137ba5
2 changed files with 14 additions and 9 deletions
|
@ -629,7 +629,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|||
|
||||
if (ctx->has_xgenmm_projector) {
|
||||
//TODO: implement something for example, image masks
|
||||
printf("use has_xgenmm_projector\n");
|
||||
printf(" use has_xgenmm_projector\n");
|
||||
}
|
||||
const int patch_size = hparams.patch_size;
|
||||
const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size));
|
||||
|
@ -667,18 +667,14 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|||
struct ggml_tensor * embeddings = inp;
|
||||
struct ggml_tensor * pos_embed = nullptr;
|
||||
if (ctx->has_llava_projector) {
|
||||
printf(" use has_llava_projector\n");
|
||||
// concat class_embeddings and patch_embeddings
|
||||
if (ctx->has_class_embedding) {
|
||||
embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size);
|
||||
ggml_set_name(embeddings, "embeddings");
|
||||
ggml_set_input(embeddings);
|
||||
embeddings = ggml_acc(ctx0, embeddings, model.class_embedding,
|
||||
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0);
|
||||
printf(" first acc worked\n");
|
||||
embeddings = ggml_acc(ctx0, embeddings, inp,
|
||||
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]);
|
||||
printf(" second acc worked\n");
|
||||
}
|
||||
}
|
||||
// printf(" after ctx->has_llava_projector\n");
|
||||
|
@ -2499,7 +2495,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
|
||||
// build the inference graph
|
||||
ggml_cgraph * gf = clip_image_build_graph(ctx, imgs, ctx->load_image_size, true);
|
||||
printf(" build graph done\n");
|
||||
ggml_gallocr_alloc_graph(ctx->compute_alloc, gf);
|
||||
// set inputs
|
||||
const auto & model = ctx->vision_model;
|
||||
|
@ -2546,7 +2541,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
ggml_backend_tensor_set(inp_raw, data, 0, ggml_nbytes(inp_raw));
|
||||
free(data);
|
||||
}
|
||||
printf(" before ctx->has_minicpmv_projector\n");
|
||||
if (ctx->has_minicpmv_projector) {
|
||||
{
|
||||
// inspired from siglip:
|
||||
|
@ -2617,6 +2611,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions));
|
||||
free(positions_data);
|
||||
}
|
||||
// FIXEME: this is a hack;
|
||||
// {
|
||||
// std::cout << __LINE__ << std::endl;
|
||||
// struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||
|
@ -2639,9 +2634,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
ggml_backend_metal_set_n_cb(ctx->backend, n_threads);
|
||||
}
|
||||
#endif
|
||||
printf(" before ggml_backend_graph_compute\n");
|
||||
ggml_backend_graph_compute(ctx->backend, gf);
|
||||
printf(" after ggml_backend_graph_compute\n");
|
||||
// the last node is the embedding tensor
|
||||
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1];
|
||||
// copy the embeddings to the location passed by the user
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#ifndef _MSC_VER
|
||||
#include <cxxabi.h>
|
||||
#endif
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
@ -620,6 +621,11 @@ int main(){
|
|||
for (size_t i = 0; i < img_res_v.size; i++)
|
||||
{
|
||||
printf("encode patch %d\n", i);
|
||||
const int nx = img_res_v.data[i].nx;
|
||||
const int ny = img_res_v.data[i].ny;
|
||||
const int vec_len = img_res_v.data[i].buf.size();
|
||||
printf(" i:%d | nx:%d | ny:%d | vec len:%d\n", i, nx, ny, vec_len); // 384^2 * 3(channel) = 442368
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
image_embd_v[i] =
|
||||
(float*)malloc(clip_embd_nbytes(ctx_clip)); // 576 patches * 4096 embeddings * 4 bytes = 9437184
|
||||
const bool encoded = clip_image_encode(
|
||||
|
@ -630,8 +636,14 @@ int main(){
|
|||
LOG_TEE("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int)i + 1, (int)img_res_v.size);
|
||||
return false;
|
||||
}
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> duration = end - start;
|
||||
std::cout << " Wall time: " << duration.count() << " seconds" << std::endl;
|
||||
}
|
||||
|
||||
// handle patches goes here
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue