Merge branch 'ggerganov:master' into master
This commit is contained in:
commit
746d5fb3c9
12 changed files with 148 additions and 70 deletions
20
.github/workflows/bench.yml
vendored
20
.github/workflows/bench.yml
vendored
|
@ -79,12 +79,18 @@ jobs:
|
|||
sleep 0.1
|
||||
done
|
||||
|
||||
- name: Install k6
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21'
|
||||
|
||||
- name: Install k6 and xk6-sse
|
||||
id: k6_installation
|
||||
run: |
|
||||
cd examples/server/bench
|
||||
wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
|
||||
tar xzf k6*.tar.gz --strip-components=1
|
||||
go install go.k6.io/xk6/cmd/xk6@latest
|
||||
xk6 build master \
|
||||
--with github.com/phymbert/xk6-sse
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
|
@ -118,7 +124,7 @@ jobs:
|
|||
|
||||
cd examples/server/bench
|
||||
source venv/bin/activate
|
||||
BENCH_K6_BIN_PATH=./k6 python bench.py \
|
||||
python bench.py \
|
||||
--runner-label ${{ env.RUNNER_LABEL }} \
|
||||
--name ${{ github.job }} \
|
||||
--branch ${{ github.head_ref || github.ref_name }} \
|
||||
|
@ -228,9 +234,9 @@ jobs:
|
|||
<summary>Expand details for performance related PR only</summary>
|
||||
|
||||
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
||||
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
||||
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
|
||||
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
|
||||
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
||||
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
|
||||
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
|
||||
- ${{ env.BENCH_GRAPH_XLABEL }}
|
||||
|
||||
|
||||
|
|
|
@ -181,6 +181,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
|
|||
- [LLMFarm](https://github.com/guinmoon/LLMFarm?tab=readme-ov-file) (MIT)
|
||||
- [KanTV](https://github.com/zhouwg/kantv?tab=readme-ov-file)(Apachev2.0 or later)
|
||||
- [Dot](https://github.com/alexpinel/Dot) (GPL)
|
||||
- [MindMac](https://mindmac.app) (proprietary)
|
||||
|
||||
*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*
|
||||
|
||||
|
|
|
@ -10,16 +10,16 @@ There are 2 modes of operation:
|
|||
- `prompt is shared` - there is a common prompt of size `PP` used by all batches (i.e. `N_KV = PP + B*TG`)
|
||||
|
||||
```bash
|
||||
./batched-bench MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] [MMQ] <PP> <TG> <PL>
|
||||
./batched-bench MODEL_PATH [N_KV_MAX] [N_BATCH] [N_UBATCH] [IS_PP_SHARED] [NGL] [MMQ] <PP> <TG> <PL>
|
||||
|
||||
# LLaMA 7B, F16, N_KV_MAX = 16384 (8GB), prompt not shared
|
||||
./batched-bench ./models/llama-7b/ggml-model-f16.gguf 16384 0 99
|
||||
./batched-bench ./models/llama-7b/ggml-model-f16.gguf 16384 2048 512 0 99
|
||||
|
||||
# LLaMA 7B, Q8_0, N_KV_MAX = 16384 (8GB), prompt is shared
|
||||
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 16384 1 99
|
||||
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 16384 2048 512 1 99
|
||||
|
||||
# custom set of batches
|
||||
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 2048 0 999 0 128,256,512 128,256 1,2,4,8,16,32
|
||||
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 2048 512 512 0 999 0 128,256,512 128,256 1,2,4,8,16,32
|
||||
```
|
||||
|
||||
## Sample results
|
||||
|
|
|
@ -32,13 +32,15 @@ int main(int argc, char ** argv) {
|
|||
gpt_params params;
|
||||
|
||||
if (argc == 1 || argv[1][0] == '-') {
|
||||
printf("usage: %s MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] <PP> <TG> <PL>\n" , argv[0]);
|
||||
printf("usage: %s MODEL_PATH [N_KV_MAX] [N_BATCH] [N_UBATCH] [IS_PP_SHARED] [NGL] <PP> <TG> <PL>\n" , argv[0]);
|
||||
printf(" <PP>, <TG> and PL are comma-separated lists of numbers without spaces\n\n");
|
||||
printf(" example: %s ggml-model-f16.gguf 2048 0 999 128,256,512 128,256 1,2,4,8,16,32\n\n", argv[0]);
|
||||
printf(" example: %s ggml-model-f16.gguf 2048 2048 512 0 999 128,256,512 128,256 1,2,4,8,16,32\n\n", argv[0]);
|
||||
return 1 ;
|
||||
}
|
||||
|
||||
int n_kv_max = 2048;
|
||||
int n_batch = 2048;
|
||||
int n_ubatch = 512;
|
||||
int is_pp_shared = 0;
|
||||
int n_gpu_layers = 0;
|
||||
|
||||
|
@ -56,23 +58,31 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
if (argc >= 4) {
|
||||
is_pp_shared = std::atoi(argv[3]);
|
||||
n_batch = std::atoi(argv[3]);
|
||||
}
|
||||
|
||||
if (argc >= 5) {
|
||||
n_gpu_layers = std::atoi(argv[4]);
|
||||
n_ubatch = std::atoi(argv[4]);
|
||||
}
|
||||
|
||||
if (argc >= 6) {
|
||||
n_pp = parse_list(argv[5]);
|
||||
is_pp_shared = std::atoi(argv[5]);
|
||||
}
|
||||
|
||||
if (argc >= 7) {
|
||||
n_tg = parse_list(argv[6]);
|
||||
n_gpu_layers = std::atoi(argv[6]);
|
||||
}
|
||||
|
||||
if (argc >= 8) {
|
||||
n_pl = parse_list(argv[7]);
|
||||
n_pp = parse_list(argv[7]);
|
||||
}
|
||||
|
||||
if (argc >= 9) {
|
||||
n_tg = parse_list(argv[8]);
|
||||
}
|
||||
|
||||
if (argc >= 10) {
|
||||
n_pl = parse_list(argv[9]);
|
||||
}
|
||||
|
||||
// init LLM
|
||||
|
@ -100,7 +110,8 @@ int main(int argc, char ** argv) {
|
|||
|
||||
ctx_params.seed = 1234;
|
||||
ctx_params.n_ctx = n_kv_max;
|
||||
ctx_params.n_batch = 512;
|
||||
ctx_params.n_batch = n_batch;
|
||||
ctx_params.n_ubatch = n_ubatch;
|
||||
|
||||
ctx_params.n_threads = params.n_threads;
|
||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||
|
@ -158,7 +169,7 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
LOG_TEE("\n");
|
||||
LOG_TEE("%s: n_kv_max = %d, is_pp_shared = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n", __func__, n_kv_max, is_pp_shared, n_gpu_layers, ctx_params.n_threads, ctx_params.n_threads_batch);
|
||||
LOG_TEE("%s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, is_pp_shared = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n", __func__, n_kv_max, n_batch, n_ubatch, is_pp_shared, n_gpu_layers, ctx_params.n_threads, ctx_params.n_threads_batch);
|
||||
LOG_TEE("\n");
|
||||
|
||||
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
|
||||
|
|
|
@ -2,13 +2,15 @@
|
|||
|
||||
Benchmark is using [k6](https://k6.io/).
|
||||
|
||||
##### Install k6
|
||||
##### Install k6 and sse extension
|
||||
|
||||
Follow instruction from: https://k6.io/docs/get-started/installation/
|
||||
SSE is not supported by default in k6, you have to build k6 with the [xk6-sse](https://github.com/phymbert/xk6-sse) extension.
|
||||
|
||||
Example for ubuntu:
|
||||
Example:
|
||||
```shell
|
||||
snap install k6
|
||||
go install go.k6.io/xk6/cmd/xk6@latest
|
||||
xk6 build master \
|
||||
--with github.com/phymbert/xk6-sse
|
||||
```
|
||||
|
||||
#### Download a dataset
|
||||
|
@ -46,7 +48,7 @@ server --host localhost --port 8080 \
|
|||
|
||||
For 500 chat completions request with 8 concurrent users during maximum 10 minutes, run:
|
||||
```shell
|
||||
k6 run script.js --duration 10m --iterations 500 --vus 8
|
||||
./k6 run script.js --duration 10m --iterations 500 --vus 8
|
||||
```
|
||||
|
||||
The benchmark values can be overridden with:
|
||||
|
@ -86,3 +88,33 @@ K6 metrics might be compared against [server metrics](../README.md), with:
|
|||
```shell
|
||||
curl http://localhost:8080/metrics
|
||||
```
|
||||
|
||||
### Using the CI python script
|
||||
The `bench.py` script does several steps:
|
||||
- start the server
|
||||
- define good variable for k6
|
||||
- run k6 script
|
||||
- extract metrics from prometheus
|
||||
|
||||
It aims to be used in the CI, but you can run it manually:
|
||||
|
||||
```shell
|
||||
LLAMA_SERVER_BIN_PATH=../../../cmake-build-release/bin/server python bench.py \
|
||||
--runner-label local \
|
||||
--name local \
|
||||
--branch `git rev-parse --abbrev-ref HEAD` \
|
||||
--commit `git rev-parse HEAD` \
|
||||
--scenario script.js \
|
||||
--duration 5m \
|
||||
--hf-repo ggml-org/models \
|
||||
--hf-file phi-2/ggml-model-q4_0.gguf \
|
||||
--model-path-prefix models \
|
||||
--parallel 4 \
|
||||
-ngl 33 \
|
||||
--batch-size 2048 \
|
||||
--ubatch-size 256 \
|
||||
--ctx-size 4096 \
|
||||
--n-prompts 200 \
|
||||
--max-prompt-tokens 256 \
|
||||
--max-tokens 256
|
||||
```
|
||||
|
|
|
@ -76,7 +76,6 @@ def main(args_in: list[str] | None = None) -> None:
|
|||
data['metrics'][metric_name][metric_metric]=value
|
||||
github_env.write(
|
||||
f"{escape_metric_name(metric_name)}_{escape_metric_name(metric_metric)}={value}\n")
|
||||
token_seconds = data['metrics']['llamacpp_tokens_second']['avg']
|
||||
iterations = data['root_group']['checks']['success completion']['passes']
|
||||
|
||||
except Exception:
|
||||
|
@ -181,16 +180,16 @@ xychart-beta
|
|||
bench_results = {
|
||||
"i": iterations,
|
||||
"req": {
|
||||
"p90": round(data['metrics']["http_req_duration"]["p(90)"], 2),
|
||||
"p95": round(data['metrics']["http_req_duration"]["p(95)"], 2),
|
||||
"avg": round(data['metrics']["http_req_duration"]["avg"], 2),
|
||||
},
|
||||
"pp": {
|
||||
"p90": round(data['metrics']["llamacpp_prompt_tokens"]["p(90)"], 2),
|
||||
"avg": round(data['metrics']["llamacpp_prompt_tokens"]["avg"], 2),
|
||||
"p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
|
||||
"avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
|
||||
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
|
||||
},
|
||||
"tg": {
|
||||
"p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2),
|
||||
"p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
|
||||
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
|
||||
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
|
||||
},
|
||||
|
@ -206,7 +205,7 @@ xychart-beta
|
|||
|
||||
|
||||
def start_benchmark(args):
|
||||
k6_path = 'k6'
|
||||
k6_path = './k6'
|
||||
if 'BENCH_K6_BIN_PATH' in os.environ:
|
||||
k6_path = os.environ['BENCH_K6_BIN_PATH']
|
||||
k6_args = [
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import http from 'k6/http'
|
||||
import sse from 'k6/x/sse'
|
||||
import {check, sleep} from 'k6'
|
||||
import {SharedArray} from 'k6/data'
|
||||
import {Counter, Rate, Trend} from 'k6/metrics'
|
||||
|
@ -53,7 +53,9 @@ const data = new SharedArray('conversations', function () {
|
|||
|
||||
const llamacpp_prompt_tokens = new Trend('llamacpp_prompt_tokens')
|
||||
const llamacpp_completion_tokens = new Trend('llamacpp_completion_tokens')
|
||||
|
||||
const llamacpp_tokens_second = new Trend('llamacpp_tokens_second')
|
||||
const llamacpp_prompt_processing_second = new Trend('llamacpp_prompt_processing_second')
|
||||
|
||||
const llamacpp_prompt_tokens_total_counter = new Counter('llamacpp_prompt_tokens_total_counter')
|
||||
const llamacpp_completion_tokens_total_counter = new Counter('llamacpp_completion_tokens_total_counter')
|
||||
|
@ -86,36 +88,62 @@ export default function () {
|
|||
}
|
||||
],
|
||||
"model": model,
|
||||
"stream": false,
|
||||
"stream": true,
|
||||
"seed": 42,
|
||||
"max_tokens": max_tokens
|
||||
}
|
||||
|
||||
const body = JSON.stringify(payload)
|
||||
const params = {method: 'POST', body: JSON.stringify(payload)};
|
||||
|
||||
let res = http.post(`${server_url}/chat/completions`, body, {
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
timeout: '300s'
|
||||
const startTime = new Date()
|
||||
let promptEvalEndTime = null
|
||||
let prompt_tokens = 0
|
||||
let completions_tokens = 0
|
||||
let finish_reason = null
|
||||
const res = sse.open(`${server_url}/chat/completions`, params, function (client) {
|
||||
client.on('event', function (event) {
|
||||
if (promptEvalEndTime == null) {
|
||||
promptEvalEndTime = new Date()
|
||||
}
|
||||
|
||||
let chunk = JSON.parse(event.data)
|
||||
let choice = chunk.choices[0]
|
||||
if (choice.finish_reason) {
|
||||
finish_reason = choice.finish_reason
|
||||
}
|
||||
|
||||
if (chunk.usage) {
|
||||
prompt_tokens = chunk.usage.prompt_tokens
|
||||
llamacpp_prompt_tokens.add(prompt_tokens)
|
||||
llamacpp_prompt_tokens_total_counter.add(prompt_tokens)
|
||||
|
||||
completions_tokens = chunk.usage.completion_tokens
|
||||
llamacpp_completion_tokens.add(completions_tokens)
|
||||
llamacpp_completion_tokens_total_counter.add(completions_tokens)
|
||||
}
|
||||
})
|
||||
|
||||
client.on('error', function (e) {
|
||||
console.log('An unexpected error occurred: ', e.error());
|
||||
throw e;
|
||||
})
|
||||
})
|
||||
|
||||
check(res, {'success completion': (r) => r.status === 200})
|
||||
|
||||
if (res.status === 200) {
|
||||
const completions = res.json()
|
||||
const endTime = new Date()
|
||||
|
||||
llamacpp_prompt_tokens.add(completions.usage.prompt_tokens)
|
||||
llamacpp_prompt_tokens_total_counter.add(completions.usage.prompt_tokens)
|
||||
|
||||
llamacpp_completion_tokens.add(completions.usage.completion_tokens)
|
||||
llamacpp_completion_tokens_total_counter.add(completions.usage.completion_tokens)
|
||||
|
||||
llamacpp_completions_truncated_rate.add(completions.choices[0].finish_reason === 'length')
|
||||
llamacpp_completions_stop_rate.add(completions.choices[0].finish_reason === 'stop')
|
||||
|
||||
llamacpp_tokens_second.add(completions.usage.total_tokens / res.timings.duration * 1.e3)
|
||||
} else {
|
||||
console.error(`response: ${res.body} request=${payload}`)
|
||||
const promptEvalTime = promptEvalEndTime - startTime
|
||||
if (promptEvalTime > 0) {
|
||||
llamacpp_prompt_processing_second.add(prompt_tokens / (promptEvalEndTime - startTime) * 1.e3)
|
||||
}
|
||||
|
||||
const completion_time = endTime - promptEvalEndTime
|
||||
if (completions_tokens > 0 && completion_time > 0) {
|
||||
llamacpp_tokens_second.add(completions_tokens / completion_time * 1.e3)
|
||||
}
|
||||
llamacpp_completions_truncated_rate.add(finish_reason === 'length')
|
||||
llamacpp_completions_stop_rate.add(finish_reason === 'stop')
|
||||
|
||||
sleep(0.3)
|
||||
}
|
||||
|
|
|
@ -567,6 +567,15 @@ static std::vector<json> format_partial_response_oaicompat(json result, const st
|
|||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}
|
||||
};
|
||||
if (!finish_reason.empty()) {
|
||||
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
|
||||
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
|
||||
ret.push_back({"usage", json {
|
||||
{"completion_tokens", num_tokens_predicted},
|
||||
{"prompt_tokens", num_prompt_tokens},
|
||||
{"total_tokens", num_tokens_predicted + num_prompt_tokens}
|
||||
}});
|
||||
}
|
||||
|
||||
return std::vector<json>({ret});
|
||||
}
|
||||
|
|
|
@ -1664,24 +1664,6 @@ namespace dpct
|
|||
const void *alpha, const void *a, int lda, const void *b,
|
||||
int ldb, const void *beta, void *c, int ldc)
|
||||
{
|
||||
#ifndef __INTEL_MKL__
|
||||
GGML_UNUSED(q);
|
||||
GGML_UNUSED(a_trans);
|
||||
GGML_UNUSED(b_trans);
|
||||
GGML_UNUSED(m);
|
||||
GGML_UNUSED(n);
|
||||
GGML_UNUSED(k);
|
||||
GGML_UNUSED(alpha);
|
||||
GGML_UNUSED(a);
|
||||
GGML_UNUSED(lda);
|
||||
GGML_UNUSED(b);
|
||||
GGML_UNUSED(ldb);
|
||||
GGML_UNUSED(beta);
|
||||
GGML_UNUSED(c);
|
||||
GGML_UNUSED(ldc);
|
||||
throw std::runtime_error("The oneAPI Math Kernel Library (oneMKL) Interfaces "
|
||||
"Project does not support this API.");
|
||||
#else
|
||||
Ts alpha_value = dpct::get_value(reinterpret_cast<const Ts *>(alpha), q);
|
||||
Ts beta_value = dpct::get_value(reinterpret_cast<const Ts *>(beta), q);
|
||||
auto data_a = get_memory<const Ta>(a);
|
||||
|
@ -1690,7 +1672,6 @@ namespace dpct
|
|||
oneapi::mkl::blas::column_major::gemm(
|
||||
q, a_trans, b_trans, m, n, k, alpha_value, data_a, lda,
|
||||
data_b, ldb, beta_value, data_c, ldc);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename VecT, class BinaryOperation, class = void>
|
||||
|
@ -2330,6 +2311,7 @@ namespace dpct
|
|||
lda, b, ldb, beta, c, ldc);
|
||||
break;
|
||||
}
|
||||
#ifdef __INTEL_MKL__
|
||||
case detail::get_type_combination_id(
|
||||
library_data_t::real_bfloat16, library_data_t::real_bfloat16,
|
||||
library_data_t::real_float, library_data_t::real_float):
|
||||
|
@ -2391,6 +2373,7 @@ namespace dpct
|
|||
q, a_trans, b_trans, m, n, k, &alpha_float, a, lda, b, ldb, &beta_float, c, ldc);
|
||||
break;
|
||||
}
|
||||
#endif // __INTEL_MKL__
|
||||
default:
|
||||
throw std::runtime_error("the combination of data type is unsupported");
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ class Keys:
|
|||
ALIGNMENT = "general.alignment"
|
||||
NAME = "general.name"
|
||||
AUTHOR = "general.author"
|
||||
VERSION = "general.version"
|
||||
URL = "general.url"
|
||||
DESCRIPTION = "general.description"
|
||||
LICENSE = "general.license"
|
||||
|
|
|
@ -296,6 +296,9 @@ class GGUFWriter:
|
|||
def add_author(self, author: str) -> None:
|
||||
self.add_string(Keys.General.AUTHOR, author)
|
||||
|
||||
def add_version(self, version: str) -> None:
|
||||
self.add_string(Keys.General.VERSION, version)
|
||||
|
||||
def add_tensor_data_layout(self, layout: str) -> None:
|
||||
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
||||
|
||||
|
@ -305,6 +308,9 @@ class GGUFWriter:
|
|||
def add_description(self, description: str) -> None:
|
||||
self.add_string(Keys.General.DESCRIPTION, description)
|
||||
|
||||
def add_licence(self, licence: str) -> None:
|
||||
self.add_string(Keys.General.LICENSE, licence)
|
||||
|
||||
def add_source_url(self, url: str) -> None:
|
||||
self.add_string(Keys.General.SOURCE_URL, url)
|
||||
|
||||
|
|
|
@ -261,6 +261,7 @@ enum llm_kv {
|
|||
LLM_KV_GENERAL_ALIGNMENT,
|
||||
LLM_KV_GENERAL_NAME,
|
||||
LLM_KV_GENERAL_AUTHOR,
|
||||
LLM_KV_GENERAL_VERSION,
|
||||
LLM_KV_GENERAL_URL,
|
||||
LLM_KV_GENERAL_DESCRIPTION,
|
||||
LLM_KV_GENERAL_LICENSE,
|
||||
|
@ -330,6 +331,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|||
{ LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
|
||||
{ LLM_KV_GENERAL_NAME, "general.name" },
|
||||
{ LLM_KV_GENERAL_AUTHOR, "general.author" },
|
||||
{ LLM_KV_GENERAL_VERSION, "general.version" },
|
||||
{ LLM_KV_GENERAL_URL, "general.url" },
|
||||
{ LLM_KV_GENERAL_DESCRIPTION, "general.description" },
|
||||
{ LLM_KV_GENERAL_LICENSE, "general.license" },
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue