From b97f21851cdef3d9e95c4ec1c9383079d0405a30 Mon Sep 17 00:00:00 2001
From: Jason Flax <jsflax@gmail.com>
Date: Sun, 10 Nov 2024 18:51:20 -0500
Subject: [PATCH] Add corrections

---
 Package.swift                  | 104 +++++++++------------------------
 objc/LlamaSession.mm           |   3 -
 swift/main/main.swift          |   2 +-
 swift/test/LlamaKitTests.swift |   3 +-
 4 files changed, 31 insertions(+), 81 deletions(-)

diff --git a/Package.swift b/Package.swift
index c49ab6b90..4db2c8d93 100644
--- a/Package.swift
+++ b/Package.swift
@@ -1,40 +1,8 @@
 // swift-tools-version:5.9
-import CompilerPluginSupport
+
 import PackageDescription
+import CompilerPluginSupport
 
-var cppSources = [
-    "src/llama.cpp",
-    "src/llama-vocab.cpp",
-    "src/llama-grammar.cpp",
-    "src/llama-sampling.cpp",
-    "src/unicode.cpp",
-    "src/unicode-data.cpp",
-    "common/sampling.cpp",
-    "common/common.cpp",
-    "common/json-schema-to-grammar.cpp",
-    "common/log.cpp",
-    "common/console.cpp"
-]
-
-var ggmlSources = [
-    "src/ggml.c",
-    "src/ggml-alloc.c",
-    "src/ggml-backend.cpp",
-    "src/ggml-cpu.c",
-    "src/ggml-quants.c",
-    "src/ggml-aarch64.c"
-]
-var resources: [Resource] = []
-var linkerSettings: [LinkerSetting] = []
-var cSettings: [CSetting] =  [
-    .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
-    .unsafeFlags(["-fno-objc-arc"]),
-    // NOTE: NEW_LAPACK will required iOS version 16.4+
-    // We should consider add this in the future when we drop support for iOS 14
-    // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
-    .define("ACCELERATE_NEW_LAPACK"),
-    .define("ACCELERATE_LAPACK_ILP64"),
-]
 var sources = [
     "src/llama.cpp",
     "src/llama-vocab.cpp",
@@ -52,13 +20,24 @@ var sources = [
     "common/common.cpp",
     "common/json-schema-to-grammar.cpp",
     "common/log.cpp",
+    "common/console.cpp"
 ]
+
+var resources: [Resource] = []
+var linkerSettings: [LinkerSetting] = []
+var cSettings: [CSetting] =  [
+    .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
+    .unsafeFlags(["-fno-objc-arc"]),
+    // NOTE: NEW_LAPACK will required iOS version 16.4+
+    // We should consider add this in the future when we drop support for iOS 14
+    // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
+    // .define("ACCELERATE_NEW_LAPACK"),
+    // .define("ACCELERATE_LAPACK_ILP64")
+]
+
 #if canImport(Darwin)
 sources.append("ggml/src/ggml-metal.m")
-ggmlSources.append("src/ggml-metal.m")
-//resources.append(.process("src/ggml-metal.metal"))
 resources.append(.process("ggml/src/ggml-metal.metal"))
-
 linkerSettings.append(.linkedFramework("Accelerate"))
 cSettings.append(
     contentsOf: [
@@ -69,7 +48,7 @@ cSettings.append(
 #endif
 
 #if os(Linux)
-    cSettings.append(.define("_GNU_SOURCE"))
+cSettings.append(.define("_GNU_SOURCE"))
 #endif
 
 let package = Package(
@@ -81,8 +60,7 @@ let package = Package(
         .tvOS(.v14)
     ],
     products: [
-        .library(name: "LlamaKit", targets: ["LlamaKit"]),
-        .executable(name: "LlamaKitMain", targets: ["LlamaKitMain"])
+        .library(name: "llama", targets: ["llama"]),
     ],
     dependencies: [
         .package(url: "https://github.com/apple/swift-syntax.git", branch: "main")
@@ -92,15 +70,15 @@ let package = Package(
             name: "llama",
             path: ".",
             exclude: [
-               "build",
-               "cmake",
-               "examples",
-               "scripts",
-               "models",
-               "tests",
-               "CMakeLists.txt",
-               "Makefile",
-               "ggml/src/ggml-metal-embed.metal"
+                "build",
+                "cmake",
+                "examples",
+                "scripts",
+                "models",
+                "tests",
+                "CMakeLists.txt",
+                "Makefile",
+                "ggml/src/ggml-metal-embed.metal"
             ],
             sources: sources,
             resources: resources,
@@ -108,30 +86,6 @@ let package = Package(
             cSettings: cSettings,
             linkerSettings: linkerSettings
         ),
-//        .target(name: "llama_cpp",
-//                path: ".",
-//                exclude: [
-//                   "cmake",
-//                   "examples",
-//                   "scripts",
-//                   "models",
-//                   "tests",
-//                   "CMakeLists.txt",
-//                   "Makefile",
-//                   "ggml"
-//                ],
-//                sources: cppSources,
-//                publicHeadersPath: "spm-headers",
-//                cSettings: cSettings),
-//        .target(
-//            name: "llama",
-//            dependencies: ["llama_cpp"],
-//            path: "ggml",
-//            sources: ggmlSources,
-//            resources: resources,
-//            publicHeadersPath: "include",
-//            cSettings: cSettings,
-//            linkerSettings: linkerSettings),
         .target(name: "LlamaObjC",
                 dependencies: ["llama"],
                 path: "objc",
@@ -155,7 +109,7 @@ let package = Package(
                 .product(name: "SwiftSyntax", package: "swift-syntax"),
                 .product(name: "SwiftSyntaxMacros", package: "swift-syntax"),
                 .product(name: "SwiftCompilerPlugin", package: "swift-syntax"),
-            ], 
+            ],
             path: "swift/JSONSchemaMacros"
         ),
         .macro(
@@ -186,7 +140,7 @@ let package = Package(
         .executableTarget(name: "LlamaKitMain",
                           dependencies: ["LlamaKit"],
                           path: "swift/main",
-                          cSettings: cSettings),
+                          resources: [.process("Llama-3.2-3B-Instruct-Q4_0.gguf")]),
     ],
     cxxLanguageStandard: .cxx17
 )
diff --git a/objc/LlamaSession.mm b/objc/LlamaSession.mm
index db9318481..83694fec9 100644
--- a/objc/LlamaSession.mm
+++ b/objc/LlamaSession.mm
@@ -424,8 +424,6 @@ static BOOL file_is_empty(NSString *path) {
     }
     
     os_log_info(os_log_inst, "sampler seed: %u\n", [_smpl seed]);
-    //    LOG_INF("sampler params: \n%s\n", sparams.print().c_str());
-    //    LOG_INF("sampler chain: %s\n",    gpt_sampler_print(smpl).c_str());
     os_log_info(os_log_inst, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.nBatch, params.nPredict, params.nKeep);
 
     // group-attention state
@@ -679,7 +677,6 @@ static BOOL file_is_empty(NSString *path) {
                     output_tokens.push_back(idToken);
                     output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding];
                     last_output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding];
-                    NSLog(@"Generated %s", last_output_ss.str().c_str());
                     [self willChangeValueForKey:@"lastOutput"];
                     [_mutableLastOutput appendString:token_str];
                     [self didChangeValueForKey:@"lastOutput"];
diff --git a/swift/main/main.swift b/swift/main/main.swift
index 1d6600e7b..7b11c2257 100644
--- a/swift/main/main.swift
+++ b/swift/main/main.swift
@@ -5,7 +5,7 @@ import CoreLocation
 func downloadFile() async throws -> String {
     let fm = FileManager.default
     let tmpDir = fm.temporaryDirectory
-    let destinationURL = tmpDir.appending(path: "llama_groq_gguf.gguf")
+    let destinationURL = tmpDir.appending(path: "llama_tools.gguf")
     
     guard !fm.fileExists(atPath: destinationURL.path()) else {
         return destinationURL.path()
diff --git a/swift/test/LlamaKitTests.swift b/swift/test/LlamaKitTests.swift
index 80ca60780..0092b9aff 100644
--- a/swift/test/LlamaKitTests.swift
+++ b/swift/test/LlamaKitTests.swift
@@ -146,6 +146,7 @@ struct LlamaSessionSuite {
     }
 
     // MARK: Session dealloc Test
+    // Note this test will fail if run in parallel
     @Test func llamaToolSessionDealloc() async throws {
         let params = try await baseParams(url: "https://huggingface.co/bartowski/Llama-3-Groq-8B-Tool-Use-GGUF/resolve/main/Llama-3-Groq-8B-Tool-Use-Q8_0.gguf?download=true", to: "llama_tools.gguf")
         func reportMemoryUsage() -> UInt64? {
@@ -175,7 +176,6 @@ struct LlamaSessionSuite {
             output = try await llama.infer("What question did i just ask you?")
             print(output)
         }.value
-        sleep(1)
         var memDealloc = reportMemoryUsage()! / 1024 / 1024
         #expect(memDealloc < 200)
         try await Task {
@@ -184,7 +184,6 @@ struct LlamaSessionSuite {
             #expect(memPostAlloc > 500)
             _ = try await llama.infer("What was the first question I asked you?")
         }.value
-        sleep(1)
         memDealloc = reportMemoryUsage()! / 1024 / 1024
         #expect(memDealloc < 200)
     }