Add corrections
This commit is contained in:
parent
7752c97f3e
commit
b97f21851c
4 changed files with 31 additions and 81 deletions
104
Package.swift
104
Package.swift
|
@ -1,40 +1,8 @@
|
||||||
// swift-tools-version:5.9
|
// swift-tools-version:5.9
|
||||||
import CompilerPluginSupport
|
|
||||||
import PackageDescription
|
import PackageDescription
|
||||||
|
import CompilerPluginSupport
|
||||||
|
|
||||||
var cppSources = [
|
|
||||||
"src/llama.cpp",
|
|
||||||
"src/llama-vocab.cpp",
|
|
||||||
"src/llama-grammar.cpp",
|
|
||||||
"src/llama-sampling.cpp",
|
|
||||||
"src/unicode.cpp",
|
|
||||||
"src/unicode-data.cpp",
|
|
||||||
"common/sampling.cpp",
|
|
||||||
"common/common.cpp",
|
|
||||||
"common/json-schema-to-grammar.cpp",
|
|
||||||
"common/log.cpp",
|
|
||||||
"common/console.cpp"
|
|
||||||
]
|
|
||||||
|
|
||||||
var ggmlSources = [
|
|
||||||
"src/ggml.c",
|
|
||||||
"src/ggml-alloc.c",
|
|
||||||
"src/ggml-backend.cpp",
|
|
||||||
"src/ggml-cpu.c",
|
|
||||||
"src/ggml-quants.c",
|
|
||||||
"src/ggml-aarch64.c"
|
|
||||||
]
|
|
||||||
var resources: [Resource] = []
|
|
||||||
var linkerSettings: [LinkerSetting] = []
|
|
||||||
var cSettings: [CSetting] = [
|
|
||||||
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
|
||||||
.unsafeFlags(["-fno-objc-arc"]),
|
|
||||||
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
|
||||||
// We should consider add this in the future when we drop support for iOS 14
|
|
||||||
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
|
||||||
.define("ACCELERATE_NEW_LAPACK"),
|
|
||||||
.define("ACCELERATE_LAPACK_ILP64"),
|
|
||||||
]
|
|
||||||
var sources = [
|
var sources = [
|
||||||
"src/llama.cpp",
|
"src/llama.cpp",
|
||||||
"src/llama-vocab.cpp",
|
"src/llama-vocab.cpp",
|
||||||
|
@ -52,13 +20,24 @@ var sources = [
|
||||||
"common/common.cpp",
|
"common/common.cpp",
|
||||||
"common/json-schema-to-grammar.cpp",
|
"common/json-schema-to-grammar.cpp",
|
||||||
"common/log.cpp",
|
"common/log.cpp",
|
||||||
|
"common/console.cpp"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
var resources: [Resource] = []
|
||||||
|
var linkerSettings: [LinkerSetting] = []
|
||||||
|
var cSettings: [CSetting] = [
|
||||||
|
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
||||||
|
.unsafeFlags(["-fno-objc-arc"]),
|
||||||
|
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
||||||
|
// We should consider add this in the future when we drop support for iOS 14
|
||||||
|
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
||||||
|
// .define("ACCELERATE_NEW_LAPACK"),
|
||||||
|
// .define("ACCELERATE_LAPACK_ILP64")
|
||||||
|
]
|
||||||
|
|
||||||
#if canImport(Darwin)
|
#if canImport(Darwin)
|
||||||
sources.append("ggml/src/ggml-metal.m")
|
sources.append("ggml/src/ggml-metal.m")
|
||||||
ggmlSources.append("src/ggml-metal.m")
|
|
||||||
//resources.append(.process("src/ggml-metal.metal"))
|
|
||||||
resources.append(.process("ggml/src/ggml-metal.metal"))
|
resources.append(.process("ggml/src/ggml-metal.metal"))
|
||||||
|
|
||||||
linkerSettings.append(.linkedFramework("Accelerate"))
|
linkerSettings.append(.linkedFramework("Accelerate"))
|
||||||
cSettings.append(
|
cSettings.append(
|
||||||
contentsOf: [
|
contentsOf: [
|
||||||
|
@ -69,7 +48,7 @@ cSettings.append(
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if os(Linux)
|
#if os(Linux)
|
||||||
cSettings.append(.define("_GNU_SOURCE"))
|
cSettings.append(.define("_GNU_SOURCE"))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
let package = Package(
|
let package = Package(
|
||||||
|
@ -81,8 +60,7 @@ let package = Package(
|
||||||
.tvOS(.v14)
|
.tvOS(.v14)
|
||||||
],
|
],
|
||||||
products: [
|
products: [
|
||||||
.library(name: "LlamaKit", targets: ["LlamaKit"]),
|
.library(name: "llama", targets: ["llama"]),
|
||||||
.executable(name: "LlamaKitMain", targets: ["LlamaKitMain"])
|
|
||||||
],
|
],
|
||||||
dependencies: [
|
dependencies: [
|
||||||
.package(url: "https://github.com/apple/swift-syntax.git", branch: "main")
|
.package(url: "https://github.com/apple/swift-syntax.git", branch: "main")
|
||||||
|
@ -92,15 +70,15 @@ let package = Package(
|
||||||
name: "llama",
|
name: "llama",
|
||||||
path: ".",
|
path: ".",
|
||||||
exclude: [
|
exclude: [
|
||||||
"build",
|
"build",
|
||||||
"cmake",
|
"cmake",
|
||||||
"examples",
|
"examples",
|
||||||
"scripts",
|
"scripts",
|
||||||
"models",
|
"models",
|
||||||
"tests",
|
"tests",
|
||||||
"CMakeLists.txt",
|
"CMakeLists.txt",
|
||||||
"Makefile",
|
"Makefile",
|
||||||
"ggml/src/ggml-metal-embed.metal"
|
"ggml/src/ggml-metal-embed.metal"
|
||||||
],
|
],
|
||||||
sources: sources,
|
sources: sources,
|
||||||
resources: resources,
|
resources: resources,
|
||||||
|
@ -108,30 +86,6 @@ let package = Package(
|
||||||
cSettings: cSettings,
|
cSettings: cSettings,
|
||||||
linkerSettings: linkerSettings
|
linkerSettings: linkerSettings
|
||||||
),
|
),
|
||||||
// .target(name: "llama_cpp",
|
|
||||||
// path: ".",
|
|
||||||
// exclude: [
|
|
||||||
// "cmake",
|
|
||||||
// "examples",
|
|
||||||
// "scripts",
|
|
||||||
// "models",
|
|
||||||
// "tests",
|
|
||||||
// "CMakeLists.txt",
|
|
||||||
// "Makefile",
|
|
||||||
// "ggml"
|
|
||||||
// ],
|
|
||||||
// sources: cppSources,
|
|
||||||
// publicHeadersPath: "spm-headers",
|
|
||||||
// cSettings: cSettings),
|
|
||||||
// .target(
|
|
||||||
// name: "llama",
|
|
||||||
// dependencies: ["llama_cpp"],
|
|
||||||
// path: "ggml",
|
|
||||||
// sources: ggmlSources,
|
|
||||||
// resources: resources,
|
|
||||||
// publicHeadersPath: "include",
|
|
||||||
// cSettings: cSettings,
|
|
||||||
// linkerSettings: linkerSettings),
|
|
||||||
.target(name: "LlamaObjC",
|
.target(name: "LlamaObjC",
|
||||||
dependencies: ["llama"],
|
dependencies: ["llama"],
|
||||||
path: "objc",
|
path: "objc",
|
||||||
|
@ -155,7 +109,7 @@ let package = Package(
|
||||||
.product(name: "SwiftSyntax", package: "swift-syntax"),
|
.product(name: "SwiftSyntax", package: "swift-syntax"),
|
||||||
.product(name: "SwiftSyntaxMacros", package: "swift-syntax"),
|
.product(name: "SwiftSyntaxMacros", package: "swift-syntax"),
|
||||||
.product(name: "SwiftCompilerPlugin", package: "swift-syntax"),
|
.product(name: "SwiftCompilerPlugin", package: "swift-syntax"),
|
||||||
],
|
],
|
||||||
path: "swift/JSONSchemaMacros"
|
path: "swift/JSONSchemaMacros"
|
||||||
),
|
),
|
||||||
.macro(
|
.macro(
|
||||||
|
@ -186,7 +140,7 @@ let package = Package(
|
||||||
.executableTarget(name: "LlamaKitMain",
|
.executableTarget(name: "LlamaKitMain",
|
||||||
dependencies: ["LlamaKit"],
|
dependencies: ["LlamaKit"],
|
||||||
path: "swift/main",
|
path: "swift/main",
|
||||||
cSettings: cSettings),
|
resources: [.process("Llama-3.2-3B-Instruct-Q4_0.gguf")]),
|
||||||
],
|
],
|
||||||
cxxLanguageStandard: .cxx17
|
cxxLanguageStandard: .cxx17
|
||||||
)
|
)
|
||||||
|
|
|
@ -424,8 +424,6 @@ static BOOL file_is_empty(NSString *path) {
|
||||||
}
|
}
|
||||||
|
|
||||||
os_log_info(os_log_inst, "sampler seed: %u\n", [_smpl seed]);
|
os_log_info(os_log_inst, "sampler seed: %u\n", [_smpl seed]);
|
||||||
// LOG_INF("sampler params: \n%s\n", sparams.print().c_str());
|
|
||||||
// LOG_INF("sampler chain: %s\n", gpt_sampler_print(smpl).c_str());
|
|
||||||
os_log_info(os_log_inst, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.nBatch, params.nPredict, params.nKeep);
|
os_log_info(os_log_inst, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.nBatch, params.nPredict, params.nKeep);
|
||||||
|
|
||||||
// group-attention state
|
// group-attention state
|
||||||
|
@ -679,7 +677,6 @@ static BOOL file_is_empty(NSString *path) {
|
||||||
output_tokens.push_back(idToken);
|
output_tokens.push_back(idToken);
|
||||||
output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding];
|
output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding];
|
||||||
last_output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding];
|
last_output_ss << [token_str cStringUsingEncoding:NSUTF8StringEncoding];
|
||||||
NSLog(@"Generated %s", last_output_ss.str().c_str());
|
|
||||||
[self willChangeValueForKey:@"lastOutput"];
|
[self willChangeValueForKey:@"lastOutput"];
|
||||||
[_mutableLastOutput appendString:token_str];
|
[_mutableLastOutput appendString:token_str];
|
||||||
[self didChangeValueForKey:@"lastOutput"];
|
[self didChangeValueForKey:@"lastOutput"];
|
||||||
|
|
|
@ -5,7 +5,7 @@ import CoreLocation
|
||||||
func downloadFile() async throws -> String {
|
func downloadFile() async throws -> String {
|
||||||
let fm = FileManager.default
|
let fm = FileManager.default
|
||||||
let tmpDir = fm.temporaryDirectory
|
let tmpDir = fm.temporaryDirectory
|
||||||
let destinationURL = tmpDir.appending(path: "llama_groq_gguf.gguf")
|
let destinationURL = tmpDir.appending(path: "llama_tools.gguf")
|
||||||
|
|
||||||
guard !fm.fileExists(atPath: destinationURL.path()) else {
|
guard !fm.fileExists(atPath: destinationURL.path()) else {
|
||||||
return destinationURL.path()
|
return destinationURL.path()
|
||||||
|
|
|
@ -146,6 +146,7 @@ struct LlamaSessionSuite {
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: Session dealloc Test
|
// MARK: Session dealloc Test
|
||||||
|
// Note this test will fail if run in parallel
|
||||||
@Test func llamaToolSessionDealloc() async throws {
|
@Test func llamaToolSessionDealloc() async throws {
|
||||||
let params = try await baseParams(url: "https://huggingface.co/bartowski/Llama-3-Groq-8B-Tool-Use-GGUF/resolve/main/Llama-3-Groq-8B-Tool-Use-Q8_0.gguf?download=true", to: "llama_tools.gguf")
|
let params = try await baseParams(url: "https://huggingface.co/bartowski/Llama-3-Groq-8B-Tool-Use-GGUF/resolve/main/Llama-3-Groq-8B-Tool-Use-Q8_0.gguf?download=true", to: "llama_tools.gguf")
|
||||||
func reportMemoryUsage() -> UInt64? {
|
func reportMemoryUsage() -> UInt64? {
|
||||||
|
@ -175,7 +176,6 @@ struct LlamaSessionSuite {
|
||||||
output = try await llama.infer("What question did i just ask you?")
|
output = try await llama.infer("What question did i just ask you?")
|
||||||
print(output)
|
print(output)
|
||||||
}.value
|
}.value
|
||||||
sleep(1)
|
|
||||||
var memDealloc = reportMemoryUsage()! / 1024 / 1024
|
var memDealloc = reportMemoryUsage()! / 1024 / 1024
|
||||||
#expect(memDealloc < 200)
|
#expect(memDealloc < 200)
|
||||||
try await Task {
|
try await Task {
|
||||||
|
@ -184,7 +184,6 @@ struct LlamaSessionSuite {
|
||||||
#expect(memPostAlloc > 500)
|
#expect(memPostAlloc > 500)
|
||||||
_ = try await llama.infer("What was the first question I asked you?")
|
_ = try await llama.infer("What was the first question I asked you?")
|
||||||
}.value
|
}.value
|
||||||
sleep(1)
|
|
||||||
memDealloc = reportMemoryUsage()! / 1024 / 1024
|
memDealloc = reportMemoryUsage()! / 1024 / 1024
|
||||||
#expect(memDealloc < 200)
|
#expect(memDealloc < 200)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue