diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bc295d52d..22be233e6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -498,6 +498,17 @@ jobs: path: | cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip + ios-xcode-build: + runs-on: macos-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Build Xcode project + run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build + + # freeBSD-latest: # runs-on: macos-12 # steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index f32df5fe5..3e0009415 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,7 @@ else() endif() # general +option(BUILD_SHARED_LIBS "build shared libraries" OFF) option(LLAMA_STATIC "llama: static link libraries" OFF) option(LLAMA_NATIVE "llama: enable -march=native flag" ON) option(LLAMA_LTO "llama: enable link time optimization" OFF) @@ -100,6 +101,9 @@ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALO option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_SERVER "llama: build server example" ON) +# Required for relocatable CMake package +include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) + # # Compile flags # diff --git a/README.md b/README.md index 2892132c4..0118eb03e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,53 @@ + + +# This fork is ONLY a modified server UI. + +I will maintain it and keep it up-to-date with the origin repo, so almost everything here will be like a mirror, except the server ui (which is in `./examples/server/public`) + +![Server UI-1](https://github.com/mounta11n/plusplus-caMalL/blob/master/media/ppcamall-ui-min_p.jpeg?raw=true) +--- + +- UI with CSS to make it look nicer and tidier overall. +- CSS outsourced as a separate file +- Added a dropdown menu with prompt style templates +- Added a dropdown menu with system prompts +- Prompt Styles and System Prompts are separate files, so editing is very easy. +- Created a script that uses "dialog" to compose the command for the server. +- Script offers the possibility to save and load configs + + + +In planning or already started: + +- WIP Multilingual: You will be able to select the language from a dropdown menu. So far language files only for English and German. (concerns UI elements and system prompts). +- Dark Mode +- Templates for the values of the UI options (samplers etc.), e.g. deterministic template, creative template, balanced template etc... + + +--- + + +![Server UI-2](https://github.com/mounta11n/plusplus-caMalL/blob/master/media/ppCamall-ui02.jpeg?raw=true) + + +--- + + +![Server TUI-1](https://github.com/mounta11n/plusplus-caMalL/blob/master/media/ppCamall-tui01.jpeg?raw=true) + + +--- + + # llama.cpp + ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) + + + + [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) [Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml) @@ -116,6 +162,7 @@ as the main playground for developing new features for the [ggml](https://github - [nat/openplayground](https://github.com/nat/openplayground) - [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) - [withcatai/catai](https://github.com/withcatai/catai) +- [semperai/amica](https://github.com/semperai/amica) --- diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 4f930bdc5..71891edc3 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -26,7 +26,7 @@ add_custom_command( COMMENT "Generating build details from Git" COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION} -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/build-info.cmake" + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/gen-build-info-cpp.cmake" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.." DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX} VERBATIM diff --git a/examples/llama.swiftui/.gitignore b/examples/llama.swiftui/.gitignore new file mode 100644 index 000000000..9bce6af39 --- /dev/null +++ b/examples/llama.swiftui/.gitignore @@ -0,0 +1 @@ +xcuserdata diff --git a/examples/llama.swiftui/README.md b/examples/llama.swiftui/README.md new file mode 100644 index 000000000..fa68e6ed8 --- /dev/null +++ b/examples/llama.swiftui/README.md @@ -0,0 +1,7 @@ +# llama.swiftui + +Local inference of llama.cpp on an iPhone. +So far I only tested with starcoder 1B model, but it can most likely handle 7B models as well. + +https://github.com/bachittle/llama.cpp/assets/39804642/e290827a-4edb-4093-9642-2a5e399ec545 + diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift new file mode 100644 index 000000000..aaef09611 --- /dev/null +++ b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift @@ -0,0 +1,176 @@ +import Foundation + +// import llama + +enum LlamaError: Error { + case couldNotInitializeContext +} + +actor LlamaContext { + private var model: OpaquePointer + private var context: OpaquePointer + private var batch: llama_batch + private var tokens_list: [llama_token] + + var n_len: Int32 = 512 + var n_cur: Int32 = 0 + var n_decode: Int32 = 0 + + init(model: OpaquePointer, context: OpaquePointer) { + self.model = model + self.context = context + self.tokens_list = [] + self.batch = llama_batch_init(512, 0, 1) + } + + deinit { + llama_free(context) + llama_free_model(model) + llama_backend_free() + } + + static func createContext(path: String) throws -> LlamaContext { + llama_backend_init(false) + let model_params = llama_model_default_params() + + let model = llama_load_model_from_file(path, model_params) + guard let model else { + print("Could not load model at \(path)") + throw LlamaError.couldNotInitializeContext + } + var ctx_params = llama_context_default_params() + ctx_params.seed = 1234 + ctx_params.n_ctx = 2048 + ctx_params.n_threads = 8 + ctx_params.n_threads_batch = 8 + + let context = llama_new_context_with_model(model, ctx_params) + guard let context else { + print("Could not load context!") + throw LlamaError.couldNotInitializeContext + } + + return LlamaContext(model: model, context: context) + } + + func get_n_tokens() -> Int32 { + return batch.n_tokens; + } + + func completion_init(text: String) { + print("attempting to complete \"\(text)\"") + + tokens_list = tokenize(text: text, add_bos: true) + + let n_ctx = llama_n_ctx(context) + let n_kv_req = tokens_list.count + (Int(n_len) - tokens_list.count) + + print("\n n_len = \(n_len), n_ctx = \(n_ctx), n_kv_req = \(n_kv_req)") + + if n_kv_req > n_ctx { + print("error: n_kv_req > n_ctx, the required KV cache size is not big enough") + } + + for id in tokens_list { + print(token_to_piece(token: id)) + } + + // batch = llama_batch_init(512, 0) // done in init() + batch.n_tokens = Int32(tokens_list.count) + + for i1 in 0.. String { + var new_token_id: llama_token = 0 + + let n_vocab = llama_n_vocab(model) + let logits = llama_get_logits_ith(context, batch.n_tokens - 1) + + var candidates = Array() + candidates.reserveCapacity(Int(n_vocab)) + + for token_id in 0.. [llama_token] { + let n_tokens = text.count + (add_bos ? 1 : 0) + let tokens = UnsafeMutablePointer.allocate(capacity: n_tokens) + let tokenCount = llama_tokenize(model, text, Int32(text.count), tokens, Int32(n_tokens), add_bos, false) + + var swiftTokens: [llama_token] = [] + for i in 0.. String { + let result = UnsafeMutablePointer.allocate(capacity: 8) + result.initialize(repeating: Int8(0), count: 8) + + let _ = llama_token_to_piece(model, token, result, 8) + + let resultStr = String(cString: result) + + result.deallocate() + + return resultStr + } +} diff --git a/examples/llama.swiftui/llama.cpp.swift/bridging-header.h b/examples/llama.swiftui/llama.cpp.swift/bridging-header.h new file mode 100644 index 000000000..6cd72c979 --- /dev/null +++ b/examples/llama.swiftui/llama.cpp.swift/bridging-header.h @@ -0,0 +1,5 @@ +// +// Use this file to import your target's public headers that you would like to expose to Swift. +// + +#import "llama.h" diff --git a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj new file mode 100644 index 000000000..bc1fd15ce --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj @@ -0,0 +1,481 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 56; + objects = { + +/* Begin PBXBuildFile section */ + 542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; }; + 5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; }; + 542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; }; + 542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; }; + 542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; }; + 542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; }; + 549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; }; + 549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 549479C52AC9E0F200E0F78B /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc -DGGML_SWIFT -DGGML_USE_METAL -O3"; }; }; + 8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */; }; + 8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83782AC328BD0096AF73 /* ContentView.swift */; }; + 8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */; }; + 8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */; }; + 8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A39BE092AC7601000BFEB40 /* Accelerate.framework */; }; + 8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; }; + 8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; }; + 8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = ""; }; + 542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = ""; }; + 542376092B0D9C40008E6A1C /* ggml-backend.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../ggml-backend.h"; sourceTree = ""; }; + 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../ggml-backend.c"; sourceTree = ""; }; + 542EA09B2AC8723900A8AEE9 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../ggml.c; sourceTree = ""; }; + 542EA09C2AC8723900A8AEE9 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../ggml.h; sourceTree = ""; }; + 542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../ggml-alloc.h"; sourceTree = ""; }; + 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../ggml-alloc.c"; sourceTree = ""; }; + 542EA0A12AC8729100A8AEE9 /* llama.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = llama.cpp; path = ../../llama.cpp; sourceTree = ""; }; + 542EA0A22AC8729100A8AEE9 /* llama.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = llama.h; path = ../../llama.h; sourceTree = ""; }; + 549479C52AC9E0F200E0F78B /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../ggml-metal.m"; sourceTree = ""; }; + 549479C62AC9E0F200E0F78B /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../ggml-metal.h"; sourceTree = ""; }; + 549479C82AC9E10B00E0F78B /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../ggml-metal.metal"; sourceTree = ""; }; + 549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; }; + 8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "bridging-header.h"; sourceTree = ""; }; + 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llama.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = llama_swiftuiApp.swift; sourceTree = ""; }; + 8A1C83782AC328BD0096AF73 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = ""; }; + 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; + 8A39BE092AC7601000BFEB40 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; }; + 8A3F841F2AC4C824005E2EE8 /* llama-2-7b-chat.Q2_K.gguf */ = {isa = PBXFileReference; lastKnownFileType = file; path = "llama-2-7b-chat.Q2_K.gguf"; sourceTree = ""; }; + 8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = ""; }; + 8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = ""; }; + 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 8A1C83702AC328BD0096AF73 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */, + 8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 8A08D1F62AC7383900FE6CD4 /* llama.cpp */ = { + isa = PBXGroup; + children = ( + 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */, + 542376092B0D9C40008E6A1C /* ggml-backend.h */, + 542376062B0D9BEA008E6A1C /* ggml-quants.h */, + 542376072B0D9BFB008E6A1C /* ggml-quants.c */, + 549479C82AC9E10B00E0F78B /* ggml-metal.metal */, + 549479C62AC9E0F200E0F78B /* ggml-metal.h */, + 549479C52AC9E0F200E0F78B /* ggml-metal.m */, + 542EA09B2AC8723900A8AEE9 /* ggml.c */, + 542EA09C2AC8723900A8AEE9 /* ggml.h */, + 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */, + 542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */, + 542EA0A12AC8729100A8AEE9 /* llama.cpp */, + 542EA0A22AC8729100A8AEE9 /* llama.h */, + ); + name = llama.cpp; + sourceTree = ""; + }; + 8A1C836A2AC328BD0096AF73 = { + isa = PBXGroup; + children = ( + 8A08D1F62AC7383900FE6CD4 /* llama.cpp */, + 8A907F312AC7134E006146EA /* llama.cpp.swift */, + 8A3F84232AC4C891005E2EE8 /* models */, + 8A1C83752AC328BD0096AF73 /* llama.swiftui */, + 8A1C83742AC328BD0096AF73 /* Products */, + 8A39BE082AC7601000BFEB40 /* Frameworks */, + ); + sourceTree = ""; + }; + 8A1C83742AC328BD0096AF73 /* Products */ = { + isa = PBXGroup; + children = ( + 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */, + ); + name = Products; + sourceTree = ""; + }; + 8A1C83752AC328BD0096AF73 /* llama.swiftui */ = { + isa = PBXGroup; + children = ( + 8A3F84102AC4BD85005E2EE8 /* Resources */, + 8A9F7C4B2AC332DC008AE1EA /* Models */, + 8A9F7C4A2AC332BF008AE1EA /* UI */, + 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */, + 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */, + 8A1C837C2AC328BE0096AF73 /* Preview Content */, + ); + path = llama.swiftui; + sourceTree = ""; + }; + 8A1C837C2AC328BE0096AF73 /* Preview Content */ = { + isa = PBXGroup; + children = ( + 8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */, + ); + path = "Preview Content"; + sourceTree = ""; + }; + 8A39BE082AC7601000BFEB40 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 549479CA2AC9E16000E0F78B /* Metal.framework */, + 8A39BE092AC7601000BFEB40 /* Accelerate.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; + 8A3F84102AC4BD85005E2EE8 /* Resources */ = { + isa = PBXGroup; + children = ( + 8A3F84112AC4BD8C005E2EE8 /* models */, + ); + path = Resources; + sourceTree = ""; + }; + 8A3F84112AC4BD8C005E2EE8 /* models */ = { + isa = PBXGroup; + children = ( + 8A3F841F2AC4C824005E2EE8 /* llama-2-7b-chat.Q2_K.gguf */, + ); + path = models; + sourceTree = ""; + }; + 8A907F312AC7134E006146EA /* llama.cpp.swift */ = { + isa = PBXGroup; + children = ( + 8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */, + 8A907F322AC7134E006146EA /* LibLlama.swift */, + ); + path = llama.cpp.swift; + sourceTree = ""; + }; + 8A9F7C4A2AC332BF008AE1EA /* UI */ = { + isa = PBXGroup; + children = ( + 8A1C83782AC328BD0096AF73 /* ContentView.swift */, + ); + path = UI; + sourceTree = ""; + }; + 8A9F7C4B2AC332DC008AE1EA /* Models */ = { + isa = PBXGroup; + children = ( + 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */, + ); + path = Models; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 8A1C83722AC328BD0096AF73 /* llama.swiftui */ = { + isa = PBXNativeTarget; + buildConfigurationList = 8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */; + buildPhases = ( + 8A1C836F2AC328BD0096AF73 /* Sources */, + 8A1C83702AC328BD0096AF73 /* Frameworks */, + 8A1C83712AC328BD0096AF73 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = llama.swiftui; + packageProductDependencies = ( + ); + productName = llama.swiftui; + productReference = 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 8A1C836B2AC328BD0096AF73 /* Project object */ = { + isa = PBXProject; + attributes = { + BuildIndependentTargetsInParallel = 1; + LastSwiftUpdateCheck = 1500; + LastUpgradeCheck = 1500; + TargetAttributes = { + 8A1C83722AC328BD0096AF73 = { + CreatedOnToolsVersion = 15.0; + LastSwiftMigration = 1500; + }; + }; + }; + buildConfigurationList = 8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */; + compatibilityVersion = "Xcode 14.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 8A1C836A2AC328BD0096AF73; + packageReferences = ( + ); + productRefGroup = 8A1C83742AC328BD0096AF73 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 8A1C83722AC328BD0096AF73 /* llama.swiftui */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 8A1C83712AC328BD0096AF73 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */, + 8A3F84242AC4C891005E2EE8 /* models in Resources */, + 8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */, + 8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 8A1C836F2AC328BD0096AF73 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */, + 549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */, + 542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */, + 8A907F332AC7138A006146EA /* LibLlama.swift in Sources */, + 542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */, + 8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */, + 8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */, + 8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */, + 542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */, + 5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 8A1C837F2AC328BE0096AF73 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 17.0; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)"; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + 8A1C83802AC328BE0096AF73 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 17.0; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 8A1C83822AC328BE0096AF73 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1; + DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\""; + DEVELOPMENT_TEAM = STLSG3FG8Q; + ENABLE_PREVIEWS = YES; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; + INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; + INFOPLIST_KEY_UILaunchScreen_Generation = YES; + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; + IPHONEOS_DEPLOYMENT_TARGET = 16.0; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h"; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 8A1C83832AC328BE0096AF73 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1; + DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\""; + DEVELOPMENT_TEAM = STLSG3FG8Q; + ENABLE_PREVIEWS = YES; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; + INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; + INFOPLIST_KEY_UILaunchScreen_Generation = YES; + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; + IPHONEOS_DEPLOYMENT_TARGET = 16.0; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 8A1C837F2AC328BE0096AF73 /* Debug */, + 8A1C83802AC328BE0096AF73 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 8A1C83822AC328BE0096AF73 /* Debug */, + 8A1C83832AC328BE0096AF73 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 8A1C836B2AC328BD0096AF73 /* Project object */; +} diff --git a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 000000000..919434a62 --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 000000000..3d4c1e552 --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AccentColor.colorset/Contents.json b/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AccentColor.colorset/Contents.json new file mode 100644 index 000000000..eb8789700 --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AccentColor.colorset/Contents.json @@ -0,0 +1,11 @@ +{ + "colors" : [ + { + "idiom" : "universal" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json b/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 000000000..13613e3ee --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,13 @@ +{ + "images" : [ + { + "idiom" : "universal", + "platform" : "ios", + "size" : "1024x1024" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json b/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json new file mode 100644 index 000000000..73c00596a --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift b/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift new file mode 100644 index 000000000..babc60cdc --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift @@ -0,0 +1,45 @@ +import Foundation + +@MainActor +class LlamaState: ObservableObject { + @Published var messageLog = "" + + private var llamaContext: LlamaContext? + private var modelUrl: URL? { + Bundle.main.url(forResource: "q8_0", withExtension: "gguf", subdirectory: "models") + // Bundle.main.url(forResource: "llama-2-7b-chat", withExtension: "Q2_K.gguf", subdirectory: "models") + } + init() { + do { + try loadModel() + } catch { + messageLog += "Error!\n" + } + } + + private func loadModel() throws { + messageLog += "Loading model...\n" + if let modelUrl { + llamaContext = try LlamaContext.createContext(path: modelUrl.path()) + messageLog += "Loaded model \(modelUrl.lastPathComponent)\n" + } else { + messageLog += "Could not locate model\n" + } + } + + func complete(text: String) async { + guard let llamaContext else { + return + } + messageLog += "Attempting to complete text...\n" + await llamaContext.completion_init(text: text) + messageLog += "\(text)" + + while await llamaContext.n_cur <= llamaContext.n_len { + let result = await llamaContext.completion_loop() + messageLog += "\(result)" + } + await llamaContext.clear() + messageLog += "\n\ndone\n" + } +} diff --git a/examples/llama.swiftui/llama.swiftui/Preview Content/Preview Assets.xcassets/Contents.json b/examples/llama.swiftui/llama.swiftui/Preview Content/Preview Assets.xcassets/Contents.json new file mode 100644 index 000000000..73c00596a --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui/Preview Content/Preview Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/llama.swiftui/llama.swiftui/Resources/models/.gitignore b/examples/llama.swiftui/llama.swiftui/Resources/models/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift b/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift new file mode 100644 index 000000000..0bd16a806 --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift @@ -0,0 +1,42 @@ +import SwiftUI + +struct ContentView: View { + @StateObject var llamaState = LlamaState() + + @State private var multiLineText = "" + + var body: some View { + VStack { + ScrollView(.vertical) { + Text(llamaState.messageLog) + } + + TextEditor(text: $multiLineText) + .frame(height: 200) + .padding() + .border(Color.gray, width: 0.5) + Button(action: { + sendText() + }) { + Text("Send") + .padding() + .background(Color.blue) + .foregroundColor(.white) + .cornerRadius(8) + } + } + .padding() + } + + func sendText() { + Task { + await llamaState.complete(text: multiLineText) + multiLineText = "" + } + } +} +/* +#Preview { + ContentView() +} +*/ diff --git a/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift b/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift new file mode 100644 index 000000000..cccda8a97 --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift @@ -0,0 +1,10 @@ +import SwiftUI + +@main +struct llama_swiftuiApp: App { + var body: some Scene { + WindowGroup { + ContentView() + } + } +} diff --git a/ggml.c b/ggml.c index 0c7264a36..c522a101f 100644 --- a/ggml.c +++ b/ggml.c @@ -9373,7 +9373,7 @@ static bool ggml_compute_forward_mul_mat_use_blas( // TODO: find the optimal values for these if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && - src0->type == GGML_TYPE_F32 && + //src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && (ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) { diff --git a/icon.png b/icon.png new file mode 100755 index 000000000..c071a5b3c Binary files /dev/null and b/icon.png differ diff --git a/llama.cpp b/llama.cpp index f2b5967d7..cb544228b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -5550,18 +5550,8 @@ static int llama_decode_internal( n_threads = std::min(4, n_threads); } - // If all tensors can be run on the GPU then using more than 1 thread is detrimental. - const bool full_offload_supported = - model.arch == LLM_ARCH_LLAMA || - model.arch == LLM_ARCH_BAICHUAN || - model.arch == LLM_ARCH_FALCON || - model.arch == LLM_ARCH_REFACT || - model.arch == LLM_ARCH_MPT || - model.arch == LLM_ARCH_STARCODER || - model.arch == LLM_ARCH_STABLELM; - const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 3; - if (ggml_cpu_has_cublas() && full_offload_supported && fully_offloaded) { + if (ggml_cpu_has_cublas() && fully_offloaded) { n_threads = 1; } diff --git a/media/llama-server-icon.png b/media/llama-server-icon.png new file mode 100755 index 000000000..c071a5b3c Binary files /dev/null and b/media/llama-server-icon.png differ diff --git a/media/ppcamall-ui-min_p.jpeg b/media/ppcamall-ui-min_p.jpeg new file mode 100644 index 000000000..e21cd041d Binary files /dev/null and b/media/ppcamall-ui-min_p.jpeg differ diff --git a/scripts/build-info.cmake b/scripts/build-info.cmake index 73853dfa4..ea3dc55c8 100644 --- a/scripts/build-info.cmake +++ b/scripts/build-info.cmake @@ -1,5 +1,3 @@ -set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in") -set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp") set(BUILD_NUMBER 0) set(BUILD_COMMIT "unknown") set(BUILD_COMPILER "unknown") @@ -58,23 +56,3 @@ else() ) set(BUILD_TARGET ${OUT}) endif() - -# Only write the build info if it changed -if(EXISTS ${OUTPUT_FILE}) - file(READ ${OUTPUT_FILE} CONTENTS) - string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_COMMIT ${CMAKE_MATCH_1}) - string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_COMPILER ${CMAKE_MATCH_1}) - string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_TARGET ${CMAKE_MATCH_1}) - if ( - NOT OLD_COMMIT STREQUAL BUILD_COMMIT OR - NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR - NOT OLD_TARGET STREQUAL BUILD_TARGET - ) - configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) - endif() -else() - configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) -endif() diff --git a/scripts/gen-build-info-cpp.cmake b/scripts/gen-build-info-cpp.cmake new file mode 100644 index 000000000..d89338920 --- /dev/null +++ b/scripts/gen-build-info-cpp.cmake @@ -0,0 +1,24 @@ +include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) + +set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in") +set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp") + +# Only write the build info if it changed +if(EXISTS ${OUTPUT_FILE}) + file(READ ${OUTPUT_FILE} CONTENTS) + string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS}) + set(OLD_COMMIT ${CMAKE_MATCH_1}) + string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS}) + set(OLD_COMPILER ${CMAKE_MATCH_1}) + string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS}) + set(OLD_TARGET ${CMAKE_MATCH_1}) + if ( + NOT OLD_COMMIT STREQUAL BUILD_COMMIT OR + NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR + NOT OLD_TARGET STREQUAL BUILD_TARGET + ) + configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) + endif() +else() + configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) +endif() diff --git a/start-server-dialog.sh b/start-server-dialog.sh new file mode 100755 index 000000000..aeb6535e8 --- /dev/null +++ b/start-server-dialog.sh @@ -0,0 +1,343 @@ +#!/bin/bash + +# Set default values +model_path="./models/" +mmproj_path="" +threads=4 +ctx_size=512 +batch_size=512 +n_gpu_layers=0 +cont_batching="off" +mlock="off" +no_mmap="off" +host="127.0.0.1" +port="8080" +advanced_options="" + + + +model_selection() { + # User selects a file or folder + exec 3>&1 + model_path=$(dialog --backtitle "Model Selection" \ + --title "Select Model File or Folder" \ + --fselect "$HOME/" 14 60 \ + 2>&1 1>&3) + exit_status=$? + exec 3>&- + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # If a folder has been selected, search for *.gguf files + if [ -d "$model_path" ]; then + model_files=($(find "$model_path" -name "*.gguf" 2>/dev/null)) + elif [ -f "$model_path" ]; then + model_files=("$model_path") + else + dialog --backtitle "Model Selection" \ + --title "Invalid Selection" \ + --msgbox "The selected path is not valid." 7 50 + return + fi + +# Selection menu for models found +exec 3>&1 +model_choice=$(dialog --backtitle "Model Selection" \ + --title "Select a Model File" \ + --menu "Choose one of the found models:" 15 60 4 \ + $(for i in "${!model_files[@]}"; do echo "$((i+1))" "$(basename "${model_files[$i]}")"; done) \ + 2>&1 1>&3) +exit_status=$? +exec 3>&- + +# Check whether user has selected 'Cancel' +if [ $exit_status = 1 ]; then + return +fi + +# Set path to the selected model +model_path=${model_files[$((model_choice-1))]} +} + + + +multimodal_model_selection() { + # User selects a file or folder + exec 3>&1 + mmproj_path=$(dialog --backtitle "Multimodal Model" \ + --title "Select Model File or Folder" \ + --fselect "$HOME/" 14 60 \ + 2>&1 1>&3) + exit_status=$? + exec 3>&- + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # If a folder has been selected, search for *.bin files + if [ -d "$mmproj_path" ]; then + multi_modal_files=($(find "$mmproj_path" -name "*.bin" 2>/dev/null)) + elif [ -f "$mmproj_path" ]; then + multi_modal_files=("$mmproj_path") + else + dialog --backtitle "Multimodal Model" \ + --title "Invalid Selection" \ + --msgbox "The selected path is not valid." 7 50 + return + fi + +# Selection menu for models found +exec 3>&1 +multi_modal_choice=$(dialog --backtitle "Multimodal Model" \ + --title "Select a Model File" \ + --menu "Choose one of the found models:" 15 60 4 \ + $(for i in "${!multi_modal_files[@]}"; do echo "$((i+1))" "$(basename "${multi_modal_files[$i]}")"; done) \ + 2>&1 1>&3) +exit_status=$? +exec 3>&- + +# Check whether user has selected 'Cancel' +if [ $exit_status = 1 ]; then + return +fi + +# Set path to the selected model +mmproj_path=${multi_modal_files[$((multi_modal_choice-1))]} +} + + + +options() { + # Show form for entering the options + exec 3>&1 + form_values=$(dialog --backtitle "Options Configuration" \ + --title "Set Options" \ + --form "Enter the values for the following options:" \ + 15 50 0 \ + "Number of Threads (-t):" 1 1 "$threads" 1 25 25 5 \ + "Context Size (-c):" 2 1 "$ctx_size" 2 25 25 5 \ + "Batch Size (-b):" 3 1 "$batch_size" 3 25 25 5 \ + "GPU Layers (-ngl):" 4 1 "$n_gpu_layers" 4 25 25 5 \ + 2>&1 1>&3) + exit_status=$? + exec 3>&- + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Save the entered values in the corresponding variables + IFS=$'\n' read -r threads ctx_size batch_size n_gpu_layers <<< "$form_values" +} + + + +further_options() { + # Initial values for the checkboxes based on current settings + cb_value=$([ "$cont_batching" = "on" ] && echo "on" || echo "off") + mlock_value=$([ "$mlock" = "on" ] && echo "on" || echo "off") + no_mmap_value=$([ "$no_mmap" = "on" ] && echo "on" || echo "off") + + # Show dialog for setting options + exec 3>&1 + choices=$(dialog --backtitle "Further Options" \ + --title "Boolean Options" \ + --checklist "Select options:" 15 60 3 \ + "1" "Continuous Batching (-cb)" $cb_value \ + "2" "Memory Lock (--mlock)" $mlock_value \ + "3" "No Memory Map (--no-mmap)" $no_mmap_value \ + 2>&1 1>&3) + exit_status=$? + exec 3>&- + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Set options based on user selection + cont_batching="off" + mlock="off" + no_mmap="off" + for choice in $choices; do + case $choice in + 1) cont_batching="on" ;; + 2) mlock="on" ;; + 3) no_mmap="on" ;; + esac + done +} + + + +advanced_options() { + # Input fields for Advanced Options + exec 3>&1 + advanced_values=$(dialog --backtitle "Advanced Options" \ + --title "Advanced Server Configuration" \ + --form "Enter the advanced configuration options:" \ + 15 60 0 \ + "Host IP:" 1 1 "$host" 1 15 15 0 \ + "Port:" 2 1 "$port" 2 15 5 0 \ + "Additional Options:" 3 1 "$advanced_options" 3 15 30 0 \ + 2>&1 1>&3) + exit_status=$? + exec 3>&- + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Read the entries and save them in the corresponding variables + read -r host port advanced_options <<< "$advanced_values" +} + + + +start_server() { + # Compiling the command with the selected options + cmd="./server" + [ -n "$model_path" ] && cmd+=" -m $model_path" + [ -n "$mmproj_path" ] && cmd+=" --mmproj $mmproj_path" + [ "$threads" -ne 4 ] && cmd+=" -t $threads" + [ "$ctx_size" -ne 512 ] && cmd+=" -c $ctx_size" + [ "$batch_size" -ne 512 ] && cmd+=" -b $batch_size" + [ "$n_gpu_layers" -ne 0 ] && cmd+=" -ngl $n_gpu_layers" + [ "$cont_batching" = "on" ] && cmd+=" -cb" + [ "$mlock" = "on" ] && cmd+=" --mlock" + [ "$no_mmap" = "off" ] && cmd+=" --no-mmap" + [ -n "$host" ] && cmd+=" --host $host" + [ -n "$port" ] && cmd+=" --port $port" + [ -n "$advanced_options" ] && cmd+=" $advanced_options" + + eval "$cmd" + read -p 'Do not forget to quit the server later with Ctrl+C as soon as you are finished. Press Enter to continue...' +} + + + +# Function to save the current configuration +save_config() { + exec 3>&1 + config_file=$(dialog --backtitle "Save Configuration" \ + --title "Save Configuration File" \ + --fselect "$HOME/" 14 60 \ + 2>&1 1>&3) + exit_status=$? + exec 3>&- + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Saving the configuration to the file + cat > "$config_file" << EOF +model_path=$model_path +mmproj_path=$mmproj_path +threads=$threads +ctx_size=$ctx_size +batch_size=$batch_size +n_gpu_layers=$n_gpu_layers +cont_batching=$cont_batching +mlock=$mlock +no_mmap=$no_mmap +host=$host +port=$port +advanced_options=$advanced_options +EOF + + dialog --backtitle "Save Configuration" \ + --title "Configuration Saved" \ + --msgbox "Configuration has been saved to $config_file" 7 50 +} + + + +# Function for loading the configuration from a file +load_config() { + exec 3>&1 + config_file=$(dialog --backtitle "Load Configuration" \ + --title "Load Configuration File" \ + --fselect "$HOME/" 14 60 \ + 2>&1 1>&3) + exit_status=$? + exec 3>&- + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Check whether the configuration file exists + if [ ! -f "$config_file" ]; then + dialog --backtitle "Load Configuration" \ + --title "File Not Found" \ + --msgbox "The file $config_file was not found." 7 50 + return + fi + + # Load configuration from the file + source "$config_file" + + dialog --backtitle "Load Configuration" \ + --title "Configuration Loaded" \ + --msgbox "Configuration has been loaded from $config_file" 7 50 +} + + + +# Function to show the main menu +show_main_menu() { + while true; do + exec 3>&1 + selection=$(dialog \ + --backtitle "Server Configuration" \ + --title "Main Menu" \ + --clear \ + --cancel-label "Exit" \ + --menu "Please select:" 15 50 6 \ + "1" "Model Selection" \ + "2" "Multimodal Model Selection" \ + "3" "Options" \ + "4" "Further Options" \ + "5" "Advanced Options" \ + "6" "Save Config" \ + "7" "Load Config" \ + "8" "Start Server" \ + 2>&1 1>&3) + exit_status=$? + exec 3>&- + + # Check whether user has selected 'Exit' + if [ $exit_status = 1 ]; then + clear + exit + fi + + # Call up the corresponding function based on the selection + case $selection in + 1) model_selection ;; + 2) multimodal_model_selection ;; + 3) options ;; + 4) further_options ;; + 5) advanced_options ;; + 6) save_config ;; + 7) load_config ;; + 8) start_server ;; + *) clear ;; + esac + done +} + + + +# Show main menu +show_main_menu diff --git a/start-server-zenity.sh b/start-server-zenity.sh new file mode 100755 index 000000000..5415dcfda --- /dev/null +++ b/start-server-zenity.sh @@ -0,0 +1,294 @@ +#!/bin/bash + +# Set default values +model_path="./models/" +mmproj_path="" +threads=4 +ctx_size=512 +batch_size=512 +n_gpu_layers=0 +cont_batching="off" +mlock="off" +no_mmap="off" +host="127.0.0.1" +port="8080" +advanced_options="" + + + +# Function to install Zenity +install_zenity() { + echo "Try to install Zenity with $1..." + if ! $1 install zenity -y; then + echo "Error: Zenity could not be installed." + exit 1 + fi + echo "Zenity was successfully installed." +} + +# Check whether Zenity is already installed +if ! command -v zenity &> /dev/null; then + # Zenity is not installed, try to find the package manager + PACKAGE_MANAGERS=(brew apt apt-get yum pacman) + for manager in "${PACKAGE_MANAGERS[@]}"; do + if command -v $manager &> /dev/null; then + # Package manager found, ask the user for permission + read -p "Zenity is not installed. Would you like to install Zenity with $manager? (y/N) " response + if [[ "$response" =~ ^[Yy]$ ]]; then + # User has agreed, install Zenity + install_zenity $manager + break + else + echo "Installation canceled." + exit 1 + fi + fi + done + if ! command -v zenity &> /dev/null; then + echo "No supported package manager found or Zenity could not be installed. Please install Zenity manually." + exit 1 + fi +fi + + + +model_selection() { + # User selects a file or folder + model_path=$(zenity --file-selection --title="Select Model File or Folder" --filename="$HOME/" --file-filter="*.gguf" --file-filter="*" --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # If a folder has been selected, search for *.gguf files + if [ -d "$model_path" ]; then + model_files=($(find "$model_path" -name "*.gguf" 2>/dev/null)) + elif [ -f "$model_path" ]; then + model_files=("$model_path") + else + zenity --error --title="Invalid Selection" --text="The selected path is not valid." + return + fi + + # Selection menu for models found + model_choice=$(zenity --list --title="Select a Model File" --column="Index" --column="Model File" $(for i in "${!model_files[@]}"; do echo "$((i+1))" "$(basename "${model_files[$i]}")"; done) --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Set path to the selected model + model_path=${model_files[$((model_choice-1))]} +} + + + +multimodal_model_selection() { + # User selects a file or folder + mmproj_path=$(zenity --file-selection --title="Select Multimodal Model File or Folder" --filename="$HOME/" --file-filter="*.bin" --file-filter="*" --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # If a folder has been selected, search for *.bin files + if [ -d "$mmproj_path" ]; then + multi_modal_files=($(find "$mmproj_path" -name "*.bin" 2>/dev/null)) + elif [ -f "$mmproj_path" ]; then + multi_modal_files=("$mmproj_path") + else + zenity --error --title="Invalid Selection" --text="The selected path is not valid." + return + fi + + # Selection menu for models found + multi_modal_choice=$(zenity --list --title="Select a Multimodal Model File" --column="Index" --column="Model File" $(for i in "${!multi_modal_files[@]}"; do echo "$((i+1))" "$(basename "${multi_modal_files[$i]}")"; done) --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Set path to the selected model + mmproj_path=${multi_modal_files[$((multi_modal_choice-1))]} +} + + + +options() { + # Show form for entering the options + form_values=$(zenity --forms --title="Set Options" --text="Enter the values for the following options:" --add-entry="Number of Threads (-t):" --add-entry="Context Size (-c):" --add-entry="Batch Size (-b):" --add-entry="GPU Layers (-ngl):" --separator="|" --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Save the entered values in the corresponding variables + IFS="|" read -r threads ctx_size batch_size n_gpu_layers <<< "$form_values" +} + + + +further_options() { + # Initial values for the checkboxes based on current settings + cb_value=$([ "$cont_batching" = "on" ] && echo "TRUE" || echo "FALSE") + mlock_value=$([ "$mlock" = "on" ] && echo "TRUE" || echo "FALSE") + no_mmap_value=$([ "$no_mmap" = "on" ] && echo "TRUE" || echo "FALSE") + + # Show dialog for setting options + choices=$(zenity --list --title="Boolean Options" --text="Select options:" --checklist --column="Select" --column="Option" TRUE "Continuous Batching (-cb)" FALSE "Memory Lock (--mlock)" FALSE "No Memory Map (--no-mmap)" --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Set options based on user selection + cont_batching="off" + mlock="off" + no_mmap="off" + for choice in $choices; do + case $choice in + "Continuous Batching (-cb)") cont_batching="on" ;; + "Memory Lock (--mlock)") mlock="on" ;; + "No Memory Map (--no-mmap)") no_mmap="on" ;; + esac + done +} + + + +advanced_options() { + # Input fields for Advanced Options + advanced_values=$(zenity --forms --title="Advanced Server Configuration" --text="Enter the advanced configuration options:" --add-entry="Host IP:" --add-entry="Port:" --add-entry="Additional Options:" --separator="|" --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Read the entries and save them in the corresponding variables + IFS="|" read -r host port advanced_options <<< "$advanced_values" +} + + + +start_server() { + # Compiling the command with the selected options + cmd="./server" + [ -n "$model_path" ] && cmd+=" -m $model_path" + [ -n "$mmproj_path" ] && cmd+=" --mmproj $mmproj_path" + [ "$threads" -ne 4 ] && cmd+=" -t $threads" + [ "$ctx_size" -ne 512 ] && cmd+=" -c $ctx_size" + [ "$batch_size" -ne 512 ] && cmd+=" -b $batch_size" + [ "$n_gpu_layers" -ne 0 ] && cmd+=" -ngl $n_gpu_layers" + [ "$cont_batching" = "on" ] && cmd+=" -cb" + [ "$mlock" = "on" ] && cmd+=" --mlock" + [ "$no_mmap" = "off" ] && cmd+=" --no-mmap" + [ -n "$host" ] && cmd+=" --host $host" + [ -n "$port" ] && cmd+=" --port $port" + [ -n "$advanced_options" ] && cmd+=" $advanced_options" + + eval "$cmd" + read -p 'Press Enter to continue...' +} + + + +# Function to save the current configuration +save_config() { + config_file=$(zenity --file-selection --title="Save Configuration File" --filename="$HOME/" --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Saving the configuration to the file + cat > "$config_file" << EOF +model_path=$model_path +mmproj_path=$mmproj_path +threads=$threads +ctx_size=$ctx_size +batch_size=$batch_size +n_gpu_layers=$n_gpu_layers +cont_batching=$cont_batching +mlock=$mlock +no_mmap=$no_mmap +host=$host +port=$port +advanced_options=$advanced_options +EOF + + zenity --info --title="Configuration Saved" --text="Configuration has been saved to $config_file" --width=300 --height=400 +} + + + +# Function for loading the configuration from a file +load_config() { + config_file=$(zenity --file-selection --title="Load Configuration File" --filename="$HOME/" --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Cancel' + if [ $exit_status = 1 ]; then + return + fi + + # Check whether the configuration file exists + if [ ! -f "$config_file" ]; then + zenity --error --title="File Not Found" --text="The file $config_file was not found." --width=300 --height=400 + return + fi + + # Load configuration from the file + source "$config_file" + + zenity --info --title="Configuration Loaded" --text="Configuration has been loaded from $config_file" --width=300 --height=400 +} + + + +# Function to show the main menu +show_main_menu() { + while true; do + selection=$(zenity --list --title="Main Menu" --text="Please select:" --cancel-label="Exit" --column="Index" --column="Option" 1 "Model Selection" 2 "Multimodal Model Selection" 3 "Options" 4 "Further Options" 5 "Advanced Options" 6 "Save Config" 7 "Load Config" 8 "Start Server" --width=300 --height=400) + exit_status=$? + + # Check whether user has selected 'Exit' + if [ $exit_status = 1 ]; then + clear + exit + fi + + # Call up the corresponding function based on the selection + case $selection in + 1) model_selection ;; + 2) multimodal_model_selection ;; + 3) options ;; + 4) further_options ;; + 5) advanced_options ;; + 6) save_config ;; + 7) load_config ;; + 8) start_server ;; + *) clear ;; + esac + done +} + + + +# Show main menu +show_main_menu