quantize: options for output and token embedding tensors qtype (#6239)

* quantize: be able to specify the output tensor type * quantize: be able to specify the token embedding tensor type --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2024-03-22 19:47:14 +01:00 · 2024-03-22 19:47:14 +01:00 · 1d0331c12a
commit 1d0331c12a
parent dba1af6129
3 changed files with 61 additions and 26 deletions
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@ -189,6 +189,18 @@ static void prepare_imatrix(const std::string& imatrix_file,
    }
 }

+static ggml_type parse_ggml_type(const char * arg) {
+    ggml_type result = GGML_TYPE_COUNT;
+    for (int j = 0; j < GGML_TYPE_COUNT; ++j) {
+        auto type = ggml_type(j);
+        const auto * name = ggml_type_name(type);
+        if (name && strcmp(arg, name) == 0) {
+            result = type; break;
+        }
+    }
+    return result;
+}
+
 int main(int argc, char ** argv) {
    if (argc < 3) {
        usage(argv[0]);
@ -203,6 +215,18 @@ int main(int argc, char ** argv) {
    for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
        if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) {
            params.quantize_output_tensor = false;
+        } else if (strcmp(argv[arg_idx], "--output-tensor-type") == 0) {
+            if (arg_idx < argc-1) {
+                params.output_tensor_type = parse_ggml_type(argv[++arg_idx]);
+            } else {
+                usage(argv[0]);
+            }
+        } else if (strcmp(argv[arg_idx], "--token-embedding-type") == 0) {
+            if (arg_idx < argc-1) {
+                params.token_embedding_type = parse_ggml_type(argv[++arg_idx]);
+            } else {
+                usage(argv[0]);
+            }
        } else if (strcmp(argv[arg_idx], "--allow-requantize") == 0) {
            params.allow_requantize = true;
        } else if (strcmp(argv[arg_idx], "--pure") == 0) {