Add corrections

This commit is contained in:
Jason Flax 2024-11-10 18:51:20 -05:00
parent 7752c97f3e
commit b97f21851c
4 changed files with 31 additions and 81 deletions

View file

@ -1,40 +1,8 @@
// swift-tools-version:5.9 // swift-tools-version:5.9
import CompilerPluginSupport
import PackageDescription import PackageDescription
import CompilerPluginSupport
var cppSources = [
"src/llama.cpp",
"src/llama-vocab.cpp",
"src/llama-grammar.cpp",
"src/llama-sampling.cpp",
"src/unicode.cpp",
"src/unicode-data.cpp",
"common/sampling.cpp",
"common/common.cpp",
"common/json-schema-to-grammar.cpp",
"common/log.cpp",
"common/console.cpp"
]
var ggmlSources = [
"src/ggml.c",
"src/ggml-alloc.c",
"src/ggml-backend.cpp",
"src/ggml-cpu.c",
"src/ggml-quants.c",
"src/ggml-aarch64.c"
]
var resources: [Resource] = []
var linkerSettings: [LinkerSetting] = []
var cSettings: [CSetting] = [
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
.unsafeFlags(["-fno-objc-arc"]),
// NOTE: NEW_LAPACK will required iOS version 16.4+
// We should consider add this in the future when we drop support for iOS 14
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
.define("ACCELERATE_NEW_LAPACK"),
.define("ACCELERATE_LAPACK_ILP64"),
]
var sources = [ var sources = [
"src/llama.cpp", "src/llama.cpp",
"src/llama-vocab.cpp", "src/llama-vocab.cpp",
@ -52,13 +20,24 @@ var sources = [
"common/common.cpp", "common/common.cpp",
"common/json-schema-to-grammar.cpp", "common/json-schema-to-grammar.cpp",
"common/log.cpp", "common/log.cpp",
"common/console.cpp"
] ]
var resources: [Resource] = []
var linkerSettings: [LinkerSetting] = []
var cSettings: [CSetting] = [
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
.unsafeFlags(["-fno-objc-arc"]),
// NOTE: NEW_LAPACK will required iOS version 16.4+
// We should consider add this in the future when we drop support for iOS 14
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
// .define("ACCELERATE_NEW_LAPACK"),
// .define("ACCELERATE_LAPACK_ILP64")
]
#if canImport(Darwin) #if canImport(Darwin)
sources.append("ggml/src/ggml-metal.m") sources.append("ggml/src/ggml-metal.m")
ggmlSources.append("src/ggml-metal.m")
//resources.append(.process("src/ggml-metal.metal"))
resources.append(.process("ggml/src/ggml-metal.metal")) resources.append(.process("ggml/src/ggml-metal.metal"))
linkerSettings.append(.linkedFramework("Accelerate")) linkerSettings.append(.linkedFramework("Accelerate"))
cSettings.append( cSettings.append(
contentsOf: [ contentsOf: [
@ -69,7 +48,7 @@ cSettings.append(
#endif #endif
#if os(Linux) #if os(Linux)
cSettings.append(.define("_GNU_SOURCE")) cSettings.append(.define("_GNU_SOURCE"))
#endif #endif
let package = Package( let package = Package(
@ -81,8 +60,7 @@ let package = Package(
.tvOS(.v14) .tvOS(.v14)
], ],
products: [ products: [
.library(name: "LlamaKit", targets: ["LlamaKit"]), .library(name: "llama", targets: ["llama"]),
.executable(name: "LlamaKitMain", targets: ["LlamaKitMain"])
], ],
dependencies: [ dependencies: [
.package(url: "https://github.com/apple/swift-syntax.git", branch: "main") .package(url: "https://github.com/apple/swift-syntax.git", branch: "main")
@ -92,15 +70,15 @@ let package = Package(
name: "llama", name: "llama",
path: ".", path: ".",
exclude: [ exclude: [
"build", "build",
"cmake", "cmake",
"examples", "examples",
"scripts", "scripts",
"models", "models",
"tests", "tests",
"CMakeLists.txt", "CMakeLists.txt",
"Makefile", "Makefile",
"ggml/src/ggml-metal-embed.metal" "ggml/src/ggml-metal-embed.metal"
], ],
sources: sources, sources: sources,
resources: resources, resources: resources,
@ -108,30 +86,6 @@ let package = Package(
cSettings: cSettings, cSettings: cSettings,
linkerSettings: linkerSettings linkerSettings: linkerSettings
), ),
// .target(name: "llama_cpp",
// path: ".",
// exclude: [
// "cmake",
// "examples",
// "scripts",
// "models",
// "tests",
// "CMakeLists.txt",
// "Makefile",
// "ggml"
// ],
// sources: cppSources,
// publicHeadersPath: "spm-headers",
// cSettings: cSettings),
// .target(
// name: "llama",
// dependencies: ["llama_cpp"],
// path: "ggml",
// sources: ggmlSources,
// resources: resources,
// publicHeadersPath: "include",
// cSettings: cSettings,
// linkerSettings: linkerSettings),
.target(name: "LlamaObjC", .target(name: "LlamaObjC",
dependencies: ["llama"], dependencies: ["llama"],
path: "objc", path: "objc",
@ -155,7 +109,7 @@ let package = Package(
.product(name: "SwiftSyntax", package: "swift-syntax"), .product(name: "SwiftSyntax", package: "swift-syntax"),
.product(name: "SwiftSyntaxMacros", package: "swift-syntax"), .product(name: "SwiftSyntaxMacros", package: "swift-syntax"),
.product(name: "SwiftCompilerPlugin", package: "swift-syntax"), .product(name: "SwiftCompilerPlugin", package: "swift-syntax"),
], ],
path: "swift/JSONSchemaMacros" path: "swift/JSONSchemaMacros"
), ),
.macro( .macro(
@ -186,7 +140,7 @@ let package = Package(
.executableTarget(name: "LlamaKitMain", .executableTarget(name: "LlamaKitMain",
dependencies: ["LlamaKit"], dependencies: ["LlamaKit"],
path: "swift/main", path: "swift/main",
cSettings: cSettings), resources: [.process("Llama-3.2-3B-Instruct-Q4_0.gguf")]),
], ],
cxxLanguageStandard: .cxx17 cxxLanguageStandard: .cxx17
) )

View file

@ -424,8 +424,6 @@ static BOOL file_is_empty(NSString *path) {
} }
os_log_info(os_log_inst, "sampler seed: %u\n", [_smpl seed]); os_log_info(os_log_inst, "sampler seed: %u\n", [_smpl seed]);
// LOG_INF("sampler params: \n%s\n", sparams.print().c_str());
// LOG_INF("sampler chain: %s\n", gpt_sampler_print(smpl).c_str());
os_log_info(os_log_inst, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.nBatch, params.nPredict, params.nKeep); os_log_info(os_log_inst, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.nBatch, params.nPredict, params.nKeep);
// group-attention state // group-attention state
@ -679,7 +677,6 @@ static BOOL file_is_empty(NSString *path) {
output_tokens.push_back(idToken); output_tokens.push_back(idToken);
output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding]; output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding];
last_output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding]; last_output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding];
NSLog(@"Generated %s", last_output_ss.str().c_str());
[self willChangeValueForKey:@"lastOutput"]; [self willChangeValueForKey:@"lastOutput"];
[_mutableLastOutput appendString:token_str]; [_mutableLastOutput appendString:token_str];
[self didChangeValueForKey:@"lastOutput"]; [self didChangeValueForKey:@"lastOutput"];

View file

@ -5,7 +5,7 @@ import CoreLocation
func downloadFile() async throws -> String { func downloadFile() async throws -> String {
let fm = FileManager.default let fm = FileManager.default
let tmpDir = fm.temporaryDirectory let tmpDir = fm.temporaryDirectory
let destinationURL = tmpDir.appending(path: "llama_groq_gguf.gguf") let destinationURL = tmpDir.appending(path: "llama_tools.gguf")
guard !fm.fileExists(atPath: destinationURL.path()) else { guard !fm.fileExists(atPath: destinationURL.path()) else {
return destinationURL.path() return destinationURL.path()

View file

@ -146,6 +146,7 @@ struct LlamaSessionSuite {
} }
// MARK: Session dealloc Test // MARK: Session dealloc Test
// Note this test will fail if run in parallel
@Test func llamaToolSessionDealloc() async throws { @Test func llamaToolSessionDealloc() async throws {
let params = try await baseParams(url: "https://huggingface.co/bartowski/Llama-3-Groq-8B-Tool-Use-GGUF/resolve/main/Llama-3-Groq-8B-Tool-Use-Q8_0.gguf?download=true", to: "llama_tools.gguf") let params = try await baseParams(url: "https://huggingface.co/bartowski/Llama-3-Groq-8B-Tool-Use-GGUF/resolve/main/Llama-3-Groq-8B-Tool-Use-Q8_0.gguf?download=true", to: "llama_tools.gguf")
func reportMemoryUsage() -> UInt64? { func reportMemoryUsage() -> UInt64? {
@ -175,7 +176,6 @@ struct LlamaSessionSuite {
output = try await llama.infer("What question did i just ask you?") output = try await llama.infer("What question did i just ask you?")
print(output) print(output)
}.value }.value
sleep(1)
var memDealloc = reportMemoryUsage()! / 1024 / 1024 var memDealloc = reportMemoryUsage()! / 1024 / 1024
#expect(memDealloc < 200) #expect(memDealloc < 200)
try await Task { try await Task {
@ -184,7 +184,6 @@ struct LlamaSessionSuite {
#expect(memPostAlloc > 500) #expect(memPostAlloc > 500)
_ = try await llama.infer("What was the first question I asked you?") _ = try await llama.infer("What was the first question I asked you?")
}.value }.value
sleep(1)
memDealloc = reportMemoryUsage()! / 1024 / 1024 memDealloc = reportMemoryUsage()! / 1024 / 1024
#expect(memDealloc < 200) #expect(memDealloc < 200)
} }