diff --git a/README.md b/README.md index 15e1b9a2d..877fae53b 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ python3 -m pip install torch numpy sentencepiece python3 convert-pth-to-ggml.py models/7B/ 1 # quantize the model to 4-bits -./quantize.sh 7B +python3 quantize.py 7B # run the inference ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128 diff --git a/quantize.py b/quantize.py index 2466047fa..97d841777 100644 --- a/quantize.py +++ b/quantize.py @@ -10,15 +10,27 @@ import os def main(): - """Parse the command line arguments and execute the script.""" + """Update the quantize binary name depending on the platform and parse + the command line arguments and execute the script. + """ + + if "linux" in sys.platform or "darwin" in sys.platform: + quantize_script_binary = "quantize" + + elif "win32" in sys.platform or "cygwin" in sys.platform: + quantize_script_binary = "quantize.exe" + + else: + print("WARNING: Unknown platform. Assuming a UNIX-like OS.\n") + quantize_script_binary = "quantize" parser = argparse.ArgumentParser( prog='Quantization Script', description='This script quantizes the given models by applying the ' - '"quantize" script on them.' + f'"{quantize_script_binary}" script on them.' ) parser.add_argument( - "models", nargs='+', choices=('7B', '13B', '30B', '65B'), + 'models', nargs='+', choices=('7B', '13B', '30B', '65B'), help='The models to quantize.' ) parser.add_argument( @@ -32,7 +44,7 @@ def main(): ) parser.add_argument( '-q', '--quantize-script-path', dest='quantize_script_path', - default=os.path.join(os.getcwd(), "quantize"), + default=os.path.join(os.getcwd(), quantize_script_binary), help='Specify the path to the "quantize" script.' ) @@ -48,20 +60,21 @@ def main(): if not os.path.isfile(args.quantize_script_path): print( - 'The "quantize" script was not found in the current location.\n' - "If you want to use it from another location, set the " - "--quantize-script-path argument from the command line." + f'The "{quantize_script_binary}" script was not found in the ' + "current location.\nIf you want to use it from another location, " + "set the --quantize-script-path argument from the command line." ) sys.exit(1) for model in args.models: - # The model is separated in various parts (ggml-model-f16.bin.0...) + # The model is separated in various parts + # (ggml-model-f16.bin, ggml-model-f16.bin.0, ggml-model-f16.bin.1...) f16_model_path_base = os.path.join( args.models_path, model, "ggml-model-f16.bin" ) f16_model_parts_paths = map( - lambda x: os.path.join(f16_model_path_base, x), + lambda filename: os.path.join(f16_model_path_base, filename), glob.glob(f"{f16_model_path_base}*") ) @@ -69,9 +82,9 @@ def main(): if not os.path.isfile(f16_model_part_path): print( f"The f16 model {os.path.basename(f16_model_part_path)} " - f"was not found in models/{model}. If you want to use it " - "from another location, set the --models-path argument " - "from the command line." + f"was not found in {args.models_path}{os.path.sep}{model}" + ". If you want to use it from another location, set the " + "--models-path argument from the command line." ) sys.exit(1) @@ -86,14 +99,14 @@ def main(): # This was extracted to a top-level function for parallelization, if # implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406 -def __run_quantize_script(script_path, f16_model_path): +def __run_quantize_script(script_path, f16_model_part_path): """Run the quantize script specifying the path to it and the path to the f16 model to quantize. """ - new_quantized_model_path = f16_model_path.replace("16", "q4_0") + new_quantized_model_path = f16_model_part_path.replace("16", "q4_0") subprocess.run( - [script_path, f16_model_path, new_quantized_model_path, "2"], + [script_path, f16_model_part_path, new_quantized_model_path, "2"], shell=True, check=True ) diff --git a/quantize.sh b/quantize.sh deleted file mode 100755 index 6194649b3..000000000 --- a/quantize.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash - -if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then - echo - echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]" - echo - exit 1 -fi - -for i in `ls models/$1/ggml-model-f16.bin*`; do - ./quantize "$i" "${i/f16/q4_0}" 2 - if [[ "$2" == "--remove-f16" ]]; then - rm "$i" - fi -done