From e2bfaeb9c14ed9e4ba235946c2044504d2c860d5 Mon Sep 17 00:00:00 2001
From: Gerardo Romero <suaj9@protonmail.com>
Date: Sun, 19 Mar 2023 10:26:38 -0600
Subject: [PATCH] Added support for Windows and updated README to use this
 script

New code to set the name of the quantize script binary depending on the platform has been added (quantize.exe if working on Windows) and the README.md file has been updated to use this script instead of the Bash one.
---
 README.md   |  2 +-
 quantize.py | 43 ++++++++++++++++++++++++++++---------------
 quantize.sh | 15 ---------------
 3 files changed, 29 insertions(+), 31 deletions(-)
 delete mode 100755 quantize.sh

diff --git a/README.md b/README.md
index 15e1b9a2d..877fae53b 100644
--- a/README.md
+++ b/README.md
@@ -146,7 +146,7 @@ python3 -m pip install torch numpy sentencepiece
 python3 convert-pth-to-ggml.py models/7B/ 1
 
 # quantize the model to 4-bits
-./quantize.sh 7B
+python3 quantize.py 7B
 
 # run the inference
 ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128
diff --git a/quantize.py b/quantize.py
index 2466047fa..97d841777 100644
--- a/quantize.py
+++ b/quantize.py
@@ -10,15 +10,27 @@ import os
 
 
 def main():
-    """Parse the command line arguments and execute the script."""
+    """Update the quantize binary name depending on the platform and parse
+    the command line arguments and execute the script.
+    """
+
+    if "linux" in sys.platform or "darwin" in sys.platform:
+        quantize_script_binary = "quantize"
+
+    elif "win32" in sys.platform or "cygwin" in sys.platform:
+        quantize_script_binary = "quantize.exe"
+
+    else:
+        print("WARNING: Unknown platform. Assuming a UNIX-like OS.\n")
+        quantize_script_binary = "quantize"
 
     parser = argparse.ArgumentParser(
         prog='Quantization Script',
         description='This script quantizes the given models by applying the '
-        '"quantize" script on them.'
+        f'"{quantize_script_binary}" script on them.'
     )
     parser.add_argument(
-        "models", nargs='+', choices=('7B', '13B', '30B', '65B'),
+        'models', nargs='+', choices=('7B', '13B', '30B', '65B'),
         help='The models to quantize.'
     )
     parser.add_argument(
@@ -32,7 +44,7 @@ def main():
     )
     parser.add_argument(
         '-q', '--quantize-script-path', dest='quantize_script_path',
-        default=os.path.join(os.getcwd(), "quantize"),
+        default=os.path.join(os.getcwd(), quantize_script_binary),
         help='Specify the path to the "quantize" script.'
     )
 
@@ -48,20 +60,21 @@ def main():
 
     if not os.path.isfile(args.quantize_script_path):
         print(
-            'The "quantize" script was not found in the current location.\n'
-            "If you want to use it from another location, set the "
-            "--quantize-script-path argument from the command line."
+            f'The "{quantize_script_binary}" script was not found in the '
+            "current location.\nIf you want to use it from another location, "
+            "set the --quantize-script-path argument from the command line."
         )
         sys.exit(1)
 
     for model in args.models:
-        # The model is separated in various parts (ggml-model-f16.bin.0...)
+        # The model is separated in various parts
+        # (ggml-model-f16.bin, ggml-model-f16.bin.0, ggml-model-f16.bin.1...)
         f16_model_path_base = os.path.join(
             args.models_path, model, "ggml-model-f16.bin"
         )
 
         f16_model_parts_paths = map(
-            lambda x: os.path.join(f16_model_path_base, x),
+            lambda filename: os.path.join(f16_model_path_base, filename),
             glob.glob(f"{f16_model_path_base}*")
         )
 
@@ -69,9 +82,9 @@ def main():
             if not os.path.isfile(f16_model_part_path):
                 print(
                     f"The f16 model {os.path.basename(f16_model_part_path)} "
-                    f"was not found in models/{model}. If you want to use it "
-                    "from another location, set the --models-path argument "
-                    "from the command line."
+                    f"was not found in {args.models_path}{os.path.sep}{model}"
+                    ". If you want to use it from another location, set the "
+                    "--models-path argument from the command line."
                 )
                 sys.exit(1)
 
@@ -86,14 +99,14 @@ def main():
 # This was extracted to a top-level function for parallelization, if
 # implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406
 
-def __run_quantize_script(script_path, f16_model_path):
+def __run_quantize_script(script_path, f16_model_part_path):
     """Run the quantize script specifying the path to it and the path to the
     f16 model to quantize.
     """
 
-    new_quantized_model_path = f16_model_path.replace("16", "q4_0")
+    new_quantized_model_path = f16_model_part_path.replace("16", "q4_0")
     subprocess.run(
-        [script_path, f16_model_path, new_quantized_model_path, "2"],
+        [script_path, f16_model_part_path, new_quantized_model_path, "2"],
         shell=True, check=True
     )
 
diff --git a/quantize.sh b/quantize.sh
deleted file mode 100755
index 6194649b3..000000000
--- a/quantize.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-
-if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then
-    echo
-    echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]"
-    echo
-    exit 1
-fi
-
-for i in `ls models/$1/ggml-model-f16.bin*`; do
-    ./quantize "$i" "${i/f16/q4_0}" 2
-    if [[ "$2" == "--remove-f16" ]]; then
-        rm "$i"
-    fi
-done