From 80912f07414a19f79022e9795fec77524eeaf966 Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Sun, 20 Aug 2023 13:15:01 -0600 Subject: [PATCH] Improve help text, expand warning --- convert-llama-ggmlv3-to-gguf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/convert-llama-ggmlv3-to-gguf.py b/convert-llama-ggmlv3-to-gguf.py index 336767fb6..137c222cb 100644 --- a/convert-llama-ggmlv3-to-gguf.py +++ b/convert-llama-ggmlv3-to-gguf.py @@ -227,19 +227,19 @@ def handle_args(): parser.add_argument('--input', '-i', help = 'Input GGMLv3 filename') parser.add_argument('--output', '-o', help ='Output GGUF filename') parser.add_argument('--gqa', type = int, default = 1, help = 'grouped-query attention factor (use 8 for LLaMA2 70B)') - parser.add_argument('--eps', default = '5.0e-06', help = 'RMS norm eps (use 1e-5 for LLaMA2)') - parser.add_argument('--context-length', '-c', type=int, default = 2048, help = 'Default max context length') + parser.add_argument('--eps', default = '5.0e-06', help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2') + parser.add_argument('--context-length', '-c', type=int, default = 2048, help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096') return parser.parse_args() def main(): cfg = handle_args() print(f'* Using config: {cfg}') - print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n') + print('\n=== WARNING === Be aware that this conversion script is best-effort. Special tokens may not be converted correctly. Use a native GGUF model if possible. === WARNING ===\n') data = np.memmap(cfg.input, mode = 'r') model = GGMLV3Model() print('* Scanning GGML input file') offset = model.load(data, 0) - print(model.hyperparameters) + print(f'* GGML model hyperparameters: {model.hyperparameters}') converter = GGMLToGGUF(model, data, cfg) converter.save() print(f'* Successful completion. Output saved to: {cfg.output}')