Merge pull request #32 from HanClinto/ffmpeg_flag

ffmpeg compiler flag for video understanding
This commit is contained in:
tc-mb 2024-10-09 15:25:08 +08:00 committed by GitHub
commit 4bd0c618d1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 33 additions and 4 deletions

View file

@ -69,6 +69,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
# 3rd party libs
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
option(LLAMA_FFMPEG "llama: use ffmpeg to load video files" OFF)
# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)

View file

@ -968,6 +968,11 @@ override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
override LDFLAGS := $(LDFLAGS) -lcurl
endif
ifdef LLAMA_FFMPEG
override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_FFMPEG $(shell pkg-config --cflags libavformat libavcodec libavutil)
override LDFLAGS := $(LDFLAGS) $(shell pkg-config --libs libavformat libavcodec libavutil) -lswscale
endif
#
# Print build information
#
@ -1465,16 +1470,13 @@ llama-llava-cli: examples/llava/llava-cli.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
FFMPEG_CFLAGS := $(shell pkg-config --cflags libavformat libavcodec libavutil)
FFMPEG_LIBS := $(shell pkg-config --libs libavformat libavcodec libavutil) -lswscale
llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
examples/llava/llava.cpp \
examples/llava/llava.h \
examples/llava/clip.cpp \
examples/llava/clip.h \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) $(FFMPEG_CFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) $(FFMPEG_LIBS) -Wno-cast-qual
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
ifeq ($(UNAME_S),Darwin)
swift: examples/batched.swift

View file

@ -83,6 +83,19 @@ if (LLAMA_CURL)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
endif ()
# Use ffmpeg to load video files
if (LLAMA_FFMPEG)
find_package(PkgConfig REQUIRED)
pkg_check_modules(FFMPEG REQUIRED
libavformat
libavcodec
libavutil
)
add_definitions(-DLLAMA_USE_FFMPEG)
include_directories(${FFMPEG_INCLUDE_DIRS})
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${FFMPEG_LIBRARIES})
endif ()
target_include_directories(${TARGET} PUBLIC .)
target_compile_features (${TARGET} PUBLIC cxx_std_11)
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)

View file

@ -9,12 +9,14 @@
#include <cstdlib>
#include <vector>
#if defined(LLAMA_USE_FFMPEG)
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
#endif // LLAMA_USE_FFMPEG
struct llava_context {
struct clip_ctx * ctx_clip = NULL;
@ -28,6 +30,8 @@ struct clip_image_u8 {
std::vector<uint8_t> buf;
};
#if defined(LLAMA_USE_FFMPEG)
static std::vector<clip_image_u8 *> extract_frames(const std::string& video_path, const int frame_num) {
AVFormatContext* format_ctx = nullptr;
if (avformat_open_input(&format_ctx, video_path.c_str(), nullptr, nullptr) < 0) {
@ -156,6 +160,15 @@ static std::vector<clip_image_u8 *> extract_frames(const std::string& video_path
return frames;
}
#else
static std::vector<clip_image_u8 *> extract_frames(const std::string& video_path, const int frame_num) {
LOG_TEE("%s: llama.cpp built without ffmpeg, processing video files is not supported. Please recompile with LLAMA_FFMPEG=1 to add video support.\n", __func__);
return {};
}
#endif // LLAMA_USE_FFMPEG
static void show_additional_info(int /*argc*/, char ** argv) {
LOG_TEE("\n example usage: %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> [--video <path/to/an/video.mp4>] [--image <path/to/an/image.jpg>] [--image <path/to/another/image.jpg>] [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
LOG_TEE(" note: a lower temperature value like 0.1 is recommended for better quality.\n");