From e2bfaeb9c14ed9e4ba235946c2044504d2c860d5 Mon Sep 17 00:00:00 2001 From: Gerardo Romero Date: Sun, 19 Mar 2023 10:26:38 -0600 Subject: [PATCH] Added support for Windows and updated README to use this script New code to set the name of the quantize script binary depending on the platform has been added (quantize.exe if working on Windows) and the README.md file has been updated to use this script instead of the Bash one. --- README.md | 2 +- quantize.py | 43 ++++++++++++++++++++++++++++--------------- quantize.sh | 15 --------------- 3 files changed, 29 insertions(+), 31 deletions(-) delete mode 100755 quantize.sh diff --git a/README.md b/README.md index 15e1b9a2d..877fae53b 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ python3 -m pip install torch numpy sentencepiece python3 convert-pth-to-ggml.py models/7B/ 1 # quantize the model to 4-bits -./quantize.sh 7B +python3 quantize.py 7B # run the inference ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128 diff --git a/quantize.py b/quantize.py index 2466047fa..97d841777 100644 --- a/quantize.py +++ b/quantize.py @@ -10,15 +10,27 @@ import os def main(): - """Parse the command line arguments and execute the script.""" + """Update the quantize binary name depending on the platform and parse + the command line arguments and execute the script. + """ + + if "linux" in sys.platform or "darwin" in sys.platform: + quantize_script_binary = "quantize" + + elif "win32" in sys.platform or "cygwin" in sys.platform: + quantize_script_binary = "quantize.exe" + + else: + print("WARNING: Unknown platform. Assuming a UNIX-like OS.\n") + quantize_script_binary = "quantize" parser = argparse.ArgumentParser( prog='Quantization Script', description='This script quantizes the given models by applying the ' - '"quantize" script on them.' + f'"{quantize_script_binary}" script on them.' ) parser.add_argument( - "models", nargs='+', choices=('7B', '13B', '30B', '65B'), + 'models', nargs='+', choices=('7B', '13B', '30B', '65B'), help='The models to quantize.' ) parser.add_argument( @@ -32,7 +44,7 @@ def main(): ) parser.add_argument( '-q', '--quantize-script-path', dest='quantize_script_path', - default=os.path.join(os.getcwd(), "quantize"), + default=os.path.join(os.getcwd(), quantize_script_binary), help='Specify the path to the "quantize" script.' ) @@ -48,20 +60,21 @@ def main(): if not os.path.isfile(args.quantize_script_path): print( - 'The "quantize" script was not found in the current location.\n' - "If you want to use it from another location, set the " - "--quantize-script-path argument from the command line." + f'The "{quantize_script_binary}" script was not found in the ' + "current location.\nIf you want to use it from another location, " + "set the --quantize-script-path argument from the command line." ) sys.exit(1) for model in args.models: - # The model is separated in various parts (ggml-model-f16.bin.0...) + # The model is separated in various parts + # (ggml-model-f16.bin, ggml-model-f16.bin.0, ggml-model-f16.bin.1...) f16_model_path_base = os.path.join( args.models_path, model, "ggml-model-f16.bin" ) f16_model_parts_paths = map( - lambda x: os.path.join(f16_model_path_base, x), + lambda filename: os.path.join(f16_model_path_base, filename), glob.glob(f"{f16_model_path_base}*") ) @@ -69,9 +82,9 @@ def main(): if not os.path.isfile(f16_model_part_path): print( f"The f16 model {os.path.basename(f16_model_part_path)} " - f"was not found in models/{model}. If you want to use it " - "from another location, set the --models-path argument " - "from the command line." + f"was not found in {args.models_path}{os.path.sep}{model}" + ". If you want to use it from another location, set the " + "--models-path argument from the command line." ) sys.exit(1) @@ -86,14 +99,14 @@ def main(): # This was extracted to a top-level function for parallelization, if # implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406 -def __run_quantize_script(script_path, f16_model_path): +def __run_quantize_script(script_path, f16_model_part_path): """Run the quantize script specifying the path to it and the path to the f16 model to quantize. """ - new_quantized_model_path = f16_model_path.replace("16", "q4_0") + new_quantized_model_path = f16_model_part_path.replace("16", "q4_0") subprocess.run( - [script_path, f16_model_path, new_quantized_model_path, "2"], + [script_path, f16_model_part_path, new_quantized_model_path, "2"], shell=True, check=True ) diff --git a/quantize.sh b/quantize.sh deleted file mode 100755 index 6194649b3..000000000 --- a/quantize.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash - -if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then - echo - echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]" - echo - exit 1 -fi - -for i in `ls models/$1/ggml-model-f16.bin*`; do - ./quantize "$i" "${i/f16/q4_0}" 2 - if [[ "$2" == "--remove-f16" ]]; then - rm "$i" - fi -done