llama.swiftui : add bench functionality (#4483)

* llama.swiftui : add bench button

* llama.swiftui : initial bench functionality

* force to use n_gpu_layers on simulator

* add download buttons & expose llamaState.loadModel

* update project.pbxproj

* comment #Preview & fix editorconfig check

* gitignore : xcode stuff

* llama.swiftui : UX improvements

* llama.swiftui : avoid data copy via "downloadTask"

* llama.swiftui : remove model from project

* llama : remove "mostly" from model infos

* llama.swiftui : improve bench

---------

Co-authored-by: jhen <developer@jhen.me>
This commit is contained in:
Georgi Gerganov 2023-12-17 19:38:41 +02:00 committed by GitHub
parent f7f468a97d
commit 800a489e4a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 895 additions and 510 deletions

View file

@ -3,24 +3,26 @@ import Foundation
@MainActor
class LlamaState: ObservableObject {
@Published var messageLog = ""
@Published var cacheCleared = false
private var llamaContext: LlamaContext?
private var modelUrl: URL? {
Bundle.main.url(forResource: "q8_0", withExtension: "gguf", subdirectory: "models")
private var defaultModelUrl: URL? {
Bundle.main.url(forResource: "ggml-model", withExtension: "gguf", subdirectory: "models")
// Bundle.main.url(forResource: "llama-2-7b-chat", withExtension: "Q2_K.gguf", subdirectory: "models")
}
init() {
do {
try loadModel()
try loadModel(modelUrl: defaultModelUrl)
} catch {
messageLog += "Error!\n"
}
}
private func loadModel() throws {
func loadModel(modelUrl: URL?) throws {
messageLog += "Loading model...\n"
if let modelUrl {
llamaContext = try LlamaContext.createContext(path: modelUrl.path())
llamaContext = try LlamaContext.create_context(path: modelUrl.path())
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
} else {
messageLog += "Could not locate model\n"
@ -31,7 +33,7 @@ class LlamaState: ObservableObject {
guard let llamaContext else {
return
}
messageLog += "Attempting to complete text...\n"
await llamaContext.completion_init(text: text)
messageLog += "\(text)"
@ -42,4 +44,42 @@ class LlamaState: ObservableObject {
await llamaContext.clear()
messageLog += "\n\ndone\n"
}
func bench() async {
guard let llamaContext else {
return
}
messageLog += "\n"
messageLog += "Running benchmark...\n"
messageLog += "Model info: "
messageLog += await llamaContext.model_info() + "\n"
let t_start = DispatchTime.now().uptimeNanoseconds
await llamaContext.bench(pp: 8, tg: 4, pl: 1) // heat up
let t_end = DispatchTime.now().uptimeNanoseconds
let t_heat = Double(t_end - t_start) / 1_000_000_000.0
messageLog += "Heat up time: \(t_heat) seconds, please wait...\n"
// if more than 5 seconds, then we're probably running on a slow device
if t_heat > 5.0 {
messageLog += "Heat up time is too long, aborting benchmark\n"
return
}
let result = await llamaContext.bench(pp: 512, tg: 128, pl: 1, nr: 3)
messageLog += "\(result)"
messageLog += "\n"
}
func clear() async {
guard let llamaContext else {
return
}
await llamaContext.clear()
messageLog = ""
}
}