From b97f21851cdef3d9e95c4ec1c9383079d0405a30 Mon Sep 17 00:00:00 2001 From: Jason Flax Date: Sun, 10 Nov 2024 18:51:20 -0500 Subject: [PATCH] Add corrections --- Package.swift | 104 +++++++++------------------------ objc/LlamaSession.mm | 3 - swift/main/main.swift | 2 +- swift/test/LlamaKitTests.swift | 3 +- 4 files changed, 31 insertions(+), 81 deletions(-) diff --git a/Package.swift b/Package.swift index c49ab6b90..4db2c8d93 100644 --- a/Package.swift +++ b/Package.swift @@ -1,40 +1,8 @@ // swift-tools-version:5.9 -import CompilerPluginSupport + import PackageDescription +import CompilerPluginSupport -var cppSources = [ - "src/llama.cpp", - "src/llama-vocab.cpp", - "src/llama-grammar.cpp", - "src/llama-sampling.cpp", - "src/unicode.cpp", - "src/unicode-data.cpp", - "common/sampling.cpp", - "common/common.cpp", - "common/json-schema-to-grammar.cpp", - "common/log.cpp", - "common/console.cpp" -] - -var ggmlSources = [ - "src/ggml.c", - "src/ggml-alloc.c", - "src/ggml-backend.cpp", - "src/ggml-cpu.c", - "src/ggml-quants.c", - "src/ggml-aarch64.c" -] -var resources: [Resource] = [] -var linkerSettings: [LinkerSetting] = [] -var cSettings: [CSetting] = [ - .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]), - .unsafeFlags(["-fno-objc-arc"]), - // NOTE: NEW_LAPACK will required iOS version 16.4+ - // We should consider add this in the future when we drop support for iOS 14 - // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc) - .define("ACCELERATE_NEW_LAPACK"), - .define("ACCELERATE_LAPACK_ILP64"), -] var sources = [ "src/llama.cpp", "src/llama-vocab.cpp", @@ -52,13 +20,24 @@ var sources = [ "common/common.cpp", "common/json-schema-to-grammar.cpp", "common/log.cpp", + "common/console.cpp" ] + +var resources: [Resource] = [] +var linkerSettings: [LinkerSetting] = [] +var cSettings: [CSetting] = [ + .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]), + .unsafeFlags(["-fno-objc-arc"]), + // NOTE: NEW_LAPACK will required iOS version 16.4+ + // We should consider add this in the future when we drop support for iOS 14 + // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc) + // .define("ACCELERATE_NEW_LAPACK"), + // .define("ACCELERATE_LAPACK_ILP64") +] + #if canImport(Darwin) sources.append("ggml/src/ggml-metal.m") -ggmlSources.append("src/ggml-metal.m") -//resources.append(.process("src/ggml-metal.metal")) resources.append(.process("ggml/src/ggml-metal.metal")) - linkerSettings.append(.linkedFramework("Accelerate")) cSettings.append( contentsOf: [ @@ -69,7 +48,7 @@ cSettings.append( #endif #if os(Linux) - cSettings.append(.define("_GNU_SOURCE")) +cSettings.append(.define("_GNU_SOURCE")) #endif let package = Package( @@ -81,8 +60,7 @@ let package = Package( .tvOS(.v14) ], products: [ - .library(name: "LlamaKit", targets: ["LlamaKit"]), - .executable(name: "LlamaKitMain", targets: ["LlamaKitMain"]) + .library(name: "llama", targets: ["llama"]), ], dependencies: [ .package(url: "https://github.com/apple/swift-syntax.git", branch: "main") @@ -92,15 +70,15 @@ let package = Package( name: "llama", path: ".", exclude: [ - "build", - "cmake", - "examples", - "scripts", - "models", - "tests", - "CMakeLists.txt", - "Makefile", - "ggml/src/ggml-metal-embed.metal" + "build", + "cmake", + "examples", + "scripts", + "models", + "tests", + "CMakeLists.txt", + "Makefile", + "ggml/src/ggml-metal-embed.metal" ], sources: sources, resources: resources, @@ -108,30 +86,6 @@ let package = Package( cSettings: cSettings, linkerSettings: linkerSettings ), -// .target(name: "llama_cpp", -// path: ".", -// exclude: [ -// "cmake", -// "examples", -// "scripts", -// "models", -// "tests", -// "CMakeLists.txt", -// "Makefile", -// "ggml" -// ], -// sources: cppSources, -// publicHeadersPath: "spm-headers", -// cSettings: cSettings), -// .target( -// name: "llama", -// dependencies: ["llama_cpp"], -// path: "ggml", -// sources: ggmlSources, -// resources: resources, -// publicHeadersPath: "include", -// cSettings: cSettings, -// linkerSettings: linkerSettings), .target(name: "LlamaObjC", dependencies: ["llama"], path: "objc", @@ -155,7 +109,7 @@ let package = Package( .product(name: "SwiftSyntax", package: "swift-syntax"), .product(name: "SwiftSyntaxMacros", package: "swift-syntax"), .product(name: "SwiftCompilerPlugin", package: "swift-syntax"), - ], + ], path: "swift/JSONSchemaMacros" ), .macro( @@ -186,7 +140,7 @@ let package = Package( .executableTarget(name: "LlamaKitMain", dependencies: ["LlamaKit"], path: "swift/main", - cSettings: cSettings), + resources: [.process("Llama-3.2-3B-Instruct-Q4_0.gguf")]), ], cxxLanguageStandard: .cxx17 ) diff --git a/objc/LlamaSession.mm b/objc/LlamaSession.mm index db9318481..83694fec9 100644 --- a/objc/LlamaSession.mm +++ b/objc/LlamaSession.mm @@ -424,8 +424,6 @@ static BOOL file_is_empty(NSString *path) { } os_log_info(os_log_inst, "sampler seed: %u\n", [_smpl seed]); - // LOG_INF("sampler params: \n%s\n", sparams.print().c_str()); - // LOG_INF("sampler chain: %s\n", gpt_sampler_print(smpl).c_str()); os_log_info(os_log_inst, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.nBatch, params.nPredict, params.nKeep); // group-attention state @@ -679,7 +677,6 @@ static BOOL file_is_empty(NSString *path) { output_tokens.push_back(idToken); output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding]; last_output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding]; - NSLog(@"Generated %s", last_output_ss.str().c_str()); [self willChangeValueForKey:@"lastOutput"]; [_mutableLastOutput appendString:token_str]; [self didChangeValueForKey:@"lastOutput"]; diff --git a/swift/main/main.swift b/swift/main/main.swift index 1d6600e7b..7b11c2257 100644 --- a/swift/main/main.swift +++ b/swift/main/main.swift @@ -5,7 +5,7 @@ import CoreLocation func downloadFile() async throws -> String { let fm = FileManager.default let tmpDir = fm.temporaryDirectory - let destinationURL = tmpDir.appending(path: "llama_groq_gguf.gguf") + let destinationURL = tmpDir.appending(path: "llama_tools.gguf") guard !fm.fileExists(atPath: destinationURL.path()) else { return destinationURL.path() diff --git a/swift/test/LlamaKitTests.swift b/swift/test/LlamaKitTests.swift index 80ca60780..0092b9aff 100644 --- a/swift/test/LlamaKitTests.swift +++ b/swift/test/LlamaKitTests.swift @@ -146,6 +146,7 @@ struct LlamaSessionSuite { } // MARK: Session dealloc Test + // Note this test will fail if run in parallel @Test func llamaToolSessionDealloc() async throws { let params = try await baseParams(url: "https://huggingface.co/bartowski/Llama-3-Groq-8B-Tool-Use-GGUF/resolve/main/Llama-3-Groq-8B-Tool-Use-Q8_0.gguf?download=true", to: "llama_tools.gguf") func reportMemoryUsage() -> UInt64? { @@ -175,7 +176,6 @@ struct LlamaSessionSuite { output = try await llama.infer("What question did i just ask you?") print(output) }.value - sleep(1) var memDealloc = reportMemoryUsage()! / 1024 / 1024 #expect(memDealloc < 200) try await Task { @@ -184,7 +184,6 @@ struct LlamaSessionSuite { #expect(memPostAlloc > 500) _ = try await llama.infer("What was the first question I asked you?") }.value - sleep(1) memDealloc = reportMemoryUsage()! / 1024 / 1024 #expect(memDealloc < 200) }