Added support for Windows and updated README to use this script

New code to set the name of the quantize script binary depending on the platform has been added (quantize.exe if working on Windows) and the README.md file has been updated to use this script instead of the Bash one.
2023-03-19 10:26:38 -06:00 · 2023-03-19 10:26:38 -06:00 · e2bfaeb9c1
commit e2bfaeb9c1
parent c028226704
3 changed files with 29 additions and 31 deletions
--- a/README.md
+++ b/README.md
@ -146,7 +146,7 @@ python3 -m pip install torch numpy sentencepiece
 python3 convert-pth-to-ggml.py models/7B/ 1
 # quantize the model to 4-bits
-./quantize.sh 7B
+python3 quantize.py 7B
 # run the inference
 ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128
--- a/quantize.py
+++ b/quantize.py
@ -10,15 +10,27 @@ import os
 def main():
-    """Parse the command line arguments and execute the script."""
+    """Update the quantize binary name depending on the platform and parse
    the command line arguments and execute the script.
    """
    if "linux" in sys.platform or "darwin" in sys.platform:
        quantize_script_binary = "quantize"
    elif "win32" in sys.platform or "cygwin" in sys.platform:
        quantize_script_binary = "quantize.exe"
    else:
        print("WARNING: Unknown platform. Assuming a UNIX-like OS.\n")
        quantize_script_binary = "quantize"
    parser = argparse.ArgumentParser(
        prog='Quantization Script',
        description='This script quantizes the given models by applying the '
-        '"quantize" script on them.'
+        f'"{quantize_script_binary}" script on them.'
    )
    parser.add_argument(
-        "models", nargs='+', choices=('7B', '13B', '30B', '65B'),
+        'models', nargs='+', choices=('7B', '13B', '30B', '65B'),
        help='The models to quantize.'
    )
    parser.add_argument(
@ -32,7 +44,7 @@ def main():
    )
    parser.add_argument(
        '-q', '--quantize-script-path', dest='quantize_script_path',
-        default=os.path.join(os.getcwd(), "quantize"),
+        default=os.path.join(os.getcwd(), quantize_script_binary),
        help='Specify the path to the "quantize" script.'
    )
@ -48,20 +60,21 @@ def main():
    if not os.path.isfile(args.quantize_script_path):
        print(
-            'The "quantize" script was not found in the current location.\n'
+            f'The "{quantize_script_binary}" script was not found in the '
-            "If you want to use it from another location, set the "
+            "current location.\nIf you want to use it from another location, "
-            "--quantize-script-path argument from the command line."
+            "set the --quantize-script-path argument from the command line."
        )
        sys.exit(1)
    for model in args.models:
-        # The model is separated in various parts (ggml-model-f16.bin.0...)
+        # The model is separated in various parts
        # (ggml-model-f16.bin, ggml-model-f16.bin.0, ggml-model-f16.bin.1...)
        f16_model_path_base = os.path.join(
            args.models_path, model, "ggml-model-f16.bin"
        )
        f16_model_parts_paths = map(
-            lambda x: os.path.join(f16_model_path_base, x),
+            lambda filename: os.path.join(f16_model_path_base, filename),
            glob.glob(f"{f16_model_path_base}*")
        )
@ -69,9 +82,9 @@ def main():
            if not os.path.isfile(f16_model_part_path):
                print(
                    f"The f16 model {os.path.basename(f16_model_part_path)} "
-                    f"was not found in models/{model}. If you want to use it "
+                    f"was not found in {args.models_path}{os.path.sep}{model}"
-                    "from another location, set the --models-path argument "
+                    ". If you want to use it from another location, set the "
-                    "from the command line."
+                    "--models-path argument from the command line."
                )
                sys.exit(1)
@ -86,14 +99,14 @@ def main():
 # This was extracted to a top-level function for parallelization, if
 # implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406
-def __run_quantize_script(script_path, f16_model_path):
+def __run_quantize_script(script_path, f16_model_part_path):
    """Run the quantize script specifying the path to it and the path to the
    f16 model to quantize.
    """
-    new_quantized_model_path = f16_model_path.replace("16", "q4_0")
+    new_quantized_model_path = f16_model_part_path.replace("16", "q4_0")
    subprocess.run(
-        [script_path, f16_model_path, new_quantized_model_path, "2"],
+        [script_path, f16_model_part_path, new_quantized_model_path, "2"],
        shell=True, check=True
    )
--- a/quantize.sh
+++ b/quantize.sh
@ -1,15 +0,0 @@
 #!/usr/bin/env bash
 if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then
    echo
    echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]"
    echo
    exit 1
 fi
 for i in `ls models/$1/ggml-model-f16.bin*`; do
    ./quantize "$i" "${i/f16/q4_0}" 2
    if [[ "$2" == "--remove-f16" ]]; then
        rm "$i"
    fi
 done