gguf : deduplicate (#2629)

* gguf : better type names * dedup : CPU + Metal is working * ggml : fix warnings about unused results * llama.cpp : fix line feed and compiler warning * llama : fix strncpy warning + note token_to_str does not write null * llama : restore the original load/save session implementation Will migrate this to GGUF in the future * convert-llama-h5-to-gguf.py : support alt ctx param name * ggml : assert when using ggml_mul with non-F32 src1 * examples : dedup simple --------- Co-authored-by: klosax <131523366+klosax@users.noreply.github.com>
2023-08-16 19:25:29 +03:00 · 2023-08-16 19:25:29 +03:00 · 88b5769487
commit 88b5769487
parent 758ff1bbb5
21 changed files with 1630 additions and 7398 deletions
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@ -68,10 +68,10 @@ bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std:
 }

 // usage:
-//  ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads]
+//  ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
 //
 void usage(const char * executable) {
-    fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.bin [model-quant.bin] type [nthreads]\n\n", executable);
+    fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
    fprintf(stderr, "  --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
    fprintf(stderr, "  --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n");
    fprintf(stderr, "\nAllowed quantization types:\n");
@ -118,8 +118,8 @@ int main(int argc, char ** argv) {
        if (pos != std::string::npos) {
            fpath = fname_inp.substr(0, pos + 1);
        }
-        // export as [inp path]/ggml-model-[ftype].bin
-        fname_out = fpath + "ggml-model-" + ftype_str + ".bin";
+        // export as [inp path]/ggml-model-[ftype].gguf
+        fname_out = fpath + "ggml-model-" + ftype_str + ".gguf";
        arg_idx++;
    }
    else {