From fd2c58286aaeb4ed51d6b963344a6d2584e25ab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Thu, 21 Nov 2024 20:10:27 +0800 Subject: [PATCH 1/5] remove reference interface from extern C in qwen2audio examples --- examples/qwen2-audio/qwen2-cli.cpp | 12 ++++++--- examples/qwen2-audio/qwen2.cpp | 40 ++++++++++++++++++++++-------- examples/qwen2-audio/qwen2.h | 29 +++++----------------- 3 files changed, 44 insertions(+), 37 deletions(-) diff --git a/examples/qwen2-audio/qwen2-cli.cpp b/examples/qwen2-audio/qwen2-cli.cpp index 9221780a9..1d139c5a5 100644 --- a/examples/qwen2-audio/qwen2-cli.cpp +++ b/examples/qwen2-audio/qwen2-cli.cpp @@ -1,9 +1,13 @@ #include "qwen2.h" +#include + +using std::cout; +using std::endl; int main(int argc, char **argv) { - omni_context_params ctx_params = omni_context_default_params(); + omni_context_params * ctx_params = omni_context_default_params(); if (!omni_context_params_parse(argc, argv, ctx_params)) { return 1; @@ -11,9 +15,11 @@ int main(int argc, char **argv) omni_context *ctx_omni = omni_init_context(ctx_params); - omni_process_full(ctx_omni, ctx_params); + auto* ret_str = omni_process_full(ctx_omni, ctx_params); + cout << "RET: " << ret_str << endl; + omni_free(ctx_omni); return 0; -} \ No newline at end of file +} diff --git a/examples/qwen2-audio/qwen2.cpp b/examples/qwen2-audio/qwen2.cpp index 8a08a7ac6..02d3c94d8 100644 --- a/examples/qwen2-audio/qwen2.cpp +++ b/examples/qwen2-audio/qwen2.cpp @@ -27,10 +27,26 @@ void* internal_chars = nullptr; static const char *AUDIO_TOKEN = "<|AUDIO|>"; +struct omni_context_params +{ + const char *model; + const char *mmproj; + const char *file; + const char *prompt; + int32_t n_gpu_layers; +}; + +struct omni_context +{ + struct whisper_context *ctx_whisper; + struct audio_projector *projector; + struct llama_context *ctx_llama; + struct llama_model *model; +}; + // // Whisper // - struct whisper_params { int32_t n_threads = std::min(4, (int32_t)std::thread::hardware_concurrency()); @@ -476,8 +492,9 @@ static void omni_print_usage(int, char **argv) LOG("\n note: a lower temperature value like 0.1 is recommended for better quality.\n"); } -bool omni_context_params_parse(int argc, char **argv, omni_context_params ¶ms) +bool omni_context_params_parse(int argc, char **argv, omni_context_params * in_params) { + auto& params = *in_params; for (int i = 1; i < argc; i++) { std::string arg = argv[i]; @@ -523,15 +540,15 @@ bool omni_context_params_parse(int argc, char **argv, omni_context_params ¶m return true; } -omni_context_params omni_context_default_params() +omni_context_params * omni_context_default_params() { - omni_context_params params; + static omni_context_params params; params.model = ""; params.mmproj = ""; params.file = ""; params.prompt = "this conversation talks about"; params.n_gpu_layers = -1; - return params; + return ¶ms; } struct omni_params @@ -540,8 +557,9 @@ struct omni_params whisper_params whisper; }; -bool omni_params_parse(int argc, char **argv, omni_params ¶ms) +bool omni_params_parse(int argc, char **argv, omni_params * in_params) { + auto& params = *in_params; if (!gpt_params_parse(argc, argv, params.gpt)) { return false; @@ -564,8 +582,9 @@ bool omni_params_parse(int argc, char **argv, omni_params ¶ms) return true; } -static omni_params get_omni_params_from_context_params(omni_context_params ¶ms) +static omni_params get_omni_params_from_context_params(omni_context_params * in_params) { + auto& params = *in_params; omni_params all_params; // Initialize gpt params @@ -639,10 +658,9 @@ static size_t find_audio_token(const std::string &prompt) return prompt.find(AUDIO_TOKEN); } -struct omni_context *omni_init_context(omni_context_params ¶ms) +struct omni_context *omni_init_context(omni_context_params * in_params) { - - omni_params all_params = get_omni_params_from_context_params(params); + omni_params all_params = get_omni_params_from_context_params(in_params); // llama LLAMA_LOG_INFO("------- llama --------\n"); @@ -877,7 +895,7 @@ const char* omni_process_prompt(struct omni_context *ctx_omni, ggml_tensor *audi return (const char*)(internal_chars); } -const char* omni_process_full(struct omni_context *ctx_omni, omni_context_params ¶ms) +const char* omni_process_full(struct omni_context *ctx_omni, omni_context_params * params) { omni_params all_params = get_omni_params_from_context_params(params); diff --git a/examples/qwen2-audio/qwen2.h b/examples/qwen2-audio/qwen2.h index dcadb4288..61538b837 100644 --- a/examples/qwen2-audio/qwen2.h +++ b/examples/qwen2-audio/qwen2.h @@ -29,34 +29,17 @@ extern "C" { #endif -struct omni_context_params -{ - const char *model; - const char *mmproj; - const char *file; - const char *prompt; - int32_t n_gpu_layers; -}; +OMNI_AUDIO_API bool omni_context_params_parse(int argc, char **argv, struct omni_context_params * params); -struct omni_context -{ - struct whisper_context *ctx_whisper; - struct audio_projector *projector; - struct llama_context *ctx_llama; - struct llama_model *model; -}; +OMNI_AUDIO_API struct omni_context_params * omni_context_default_params(); -OMNI_AUDIO_API bool omni_context_params_parse(int argc, char **argv, omni_context_params ¶ms); +OMNI_AUDIO_API struct omni_context * omni_init_context(struct omni_context_params * params); -OMNI_AUDIO_API omni_context_params omni_context_default_params(); - -OMNI_AUDIO_API struct omni_context *omni_init_context(omni_context_params ¶ms); - -OMNI_AUDIO_API void omni_free(struct omni_context *ctx_omni); +OMNI_AUDIO_API void omni_free(struct omni_context * ctx_omni); OMNI_AUDIO_API const char* omni_process_full( - struct omni_context *ctx_omni, - omni_context_params ¶ms + struct omni_context * ctx_omni, + struct omni_context_params * params ); #ifdef __cplusplus From 7589158595091a88b7844c83569f68c780469d5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Thu, 21 Nov 2024 20:44:49 +0800 Subject: [PATCH 2/5] expose omni_context_params struct --- examples/qwen2-audio/qwen2.cpp | 8 -------- examples/qwen2-audio/qwen2.h | 9 +++++++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/examples/qwen2-audio/qwen2.cpp b/examples/qwen2-audio/qwen2.cpp index 02d3c94d8..d172ce087 100644 --- a/examples/qwen2-audio/qwen2.cpp +++ b/examples/qwen2-audio/qwen2.cpp @@ -27,14 +27,6 @@ void* internal_chars = nullptr; static const char *AUDIO_TOKEN = "<|AUDIO|>"; -struct omni_context_params -{ - const char *model; - const char *mmproj; - const char *file; - const char *prompt; - int32_t n_gpu_layers; -}; struct omni_context { diff --git a/examples/qwen2-audio/qwen2.h b/examples/qwen2-audio/qwen2.h index 61538b837..c0894cb1a 100644 --- a/examples/qwen2-audio/qwen2.h +++ b/examples/qwen2-audio/qwen2.h @@ -29,6 +29,15 @@ extern "C" { #endif +struct omni_context_params +{ + const char *model; + const char *mmproj; + const char *file; + const char *prompt; + int32_t n_gpu_layers; +}; + OMNI_AUDIO_API bool omni_context_params_parse(int argc, char **argv, struct omni_context_params * params); OMNI_AUDIO_API struct omni_context_params * omni_context_default_params(); From be54cb02ff14354ac78dd8ec8a9efa170475b00d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Tue, 3 Dec 2024 11:47:28 +0800 Subject: [PATCH 3/5] bug fix --- common/common-nexa.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/common/common-nexa.cpp b/common/common-nexa.cpp index cda4706b7..e774fc505 100644 --- a/common/common-nexa.cpp +++ b/common/common-nexa.cpp @@ -152,7 +152,6 @@ bool load_hparams_and_tensors_from_gguf(const std::string &fname, NexaBaseModel ggml_free(meta); gguf_free(ctx_gguf); - gguf_free(ctx_gguf); return true; } From ca7e8ef19e1e3ca1558d64e184218e83294ebb5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Tue, 3 Dec 2024 14:54:52 +0800 Subject: [PATCH 4/5] fix clip_n_patch() allocation size error for 81-series omni-vlm models --- examples/omni-vlm/clip.cpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/examples/omni-vlm/clip.cpp b/examples/omni-vlm/clip.cpp index 73abe85a8..ad2d8c102 100644 --- a/examples/omni-vlm/clip.cpp +++ b/examples/omni-vlm/clip.cpp @@ -39,12 +39,12 @@ #include #include #include -// #include +#include // #include // #include -// using std::cout; -// using std::endl; +using std::cout; +using std::endl; #define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0) #define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0) @@ -1927,16 +1927,20 @@ int clip_n_patches(const struct clip_ctx * ctx) { int n_patches = (params.image_size / params.patch_size) * (params.image_size / params.patch_size); - if (ctx->proj_type == PROJECTOR_TYPE_LDP || ctx->proj_type == PROJECTOR_TYPE_LDPV2) { - n_patches /= 4; - } else if (ctx->proj_type == PROJECTOR_TYPE_RESAMPLER) { - if (ctx->minicpmv_version == 2) { - n_patches = 96; - } - else if (ctx->minicpmv_version == 3) { - n_patches = 64; - } + if(ctx->omni_vlm_ver_type == omni_vlm_version_type::VLM_81_OCR + || ctx->omni_vlm_ver_type == omni_vlm_version_type::VLM_81_INSTRUCT) { + n_patches /= 9; } + // if (ctx->proj_type == PROJECTOR_TYPE_LDP || ctx->proj_type == PROJECTOR_TYPE_LDPV2) { + // n_patches /= 4; + // } else if (ctx->proj_type == PROJECTOR_TYPE_RESAMPLER) { + // if (ctx->minicpmv_version == 2) { + // n_patches = 96; + // } + // else if (ctx->minicpmv_version == 3) { + // n_patches = 64; + // } + // } return n_patches; } From b86cdedb7e5d0b9b2fe61404c39010a149da99be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Tue, 3 Dec 2024 15:03:55 +0800 Subject: [PATCH 5/5] remove iostream header --- examples/omni-vlm/clip.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/omni-vlm/clip.cpp b/examples/omni-vlm/clip.cpp index ad2d8c102..2a4f37cf4 100644 --- a/examples/omni-vlm/clip.cpp +++ b/examples/omni-vlm/clip.cpp @@ -39,12 +39,12 @@ #include #include #include -#include +// #include // #include // #include -using std::cout; -using std::endl; +// using std::cout; +// using std::endl; #define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0) #define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)