Added support for Windows and updated README to use this script

New code to set the name of the quantize script binary depending on the platform has been added (quantize.exe if working on Windows) and the README.md file has been updated to use this script instead of the Bash one.
This commit is contained in:
Gerardo Romero 2023-03-19 10:26:38 -06:00
parent c028226704
commit e2bfaeb9c1
3 changed files with 29 additions and 31 deletions

View file

@ -146,7 +146,7 @@ python3 -m pip install torch numpy sentencepiece
python3 convert-pth-to-ggml.py models/7B/ 1 python3 convert-pth-to-ggml.py models/7B/ 1
# quantize the model to 4-bits # quantize the model to 4-bits
./quantize.sh 7B python3 quantize.py 7B
# run the inference # run the inference
./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128 ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128

View file

@ -10,15 +10,27 @@ import os
def main(): def main():
"""Parse the command line arguments and execute the script.""" """Update the quantize binary name depending on the platform and parse
the command line arguments and execute the script.
"""
if "linux" in sys.platform or "darwin" in sys.platform:
quantize_script_binary = "quantize"
elif "win32" in sys.platform or "cygwin" in sys.platform:
quantize_script_binary = "quantize.exe"
else:
print("WARNING: Unknown platform. Assuming a UNIX-like OS.\n")
quantize_script_binary = "quantize"
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog='Quantization Script', prog='Quantization Script',
description='This script quantizes the given models by applying the ' description='This script quantizes the given models by applying the '
'"quantize" script on them.' f'"{quantize_script_binary}" script on them.'
) )
parser.add_argument( parser.add_argument(
"models", nargs='+', choices=('7B', '13B', '30B', '65B'), 'models', nargs='+', choices=('7B', '13B', '30B', '65B'),
help='The models to quantize.' help='The models to quantize.'
) )
parser.add_argument( parser.add_argument(
@ -32,7 +44,7 @@ def main():
) )
parser.add_argument( parser.add_argument(
'-q', '--quantize-script-path', dest='quantize_script_path', '-q', '--quantize-script-path', dest='quantize_script_path',
default=os.path.join(os.getcwd(), "quantize"), default=os.path.join(os.getcwd(), quantize_script_binary),
help='Specify the path to the "quantize" script.' help='Specify the path to the "quantize" script.'
) )
@ -48,20 +60,21 @@ def main():
if not os.path.isfile(args.quantize_script_path): if not os.path.isfile(args.quantize_script_path):
print( print(
'The "quantize" script was not found in the current location.\n' f'The "{quantize_script_binary}" script was not found in the '
"If you want to use it from another location, set the " "current location.\nIf you want to use it from another location, "
"--quantize-script-path argument from the command line." "set the --quantize-script-path argument from the command line."
) )
sys.exit(1) sys.exit(1)
for model in args.models: for model in args.models:
# The model is separated in various parts (ggml-model-f16.bin.0...) # The model is separated in various parts
# (ggml-model-f16.bin, ggml-model-f16.bin.0, ggml-model-f16.bin.1...)
f16_model_path_base = os.path.join( f16_model_path_base = os.path.join(
args.models_path, model, "ggml-model-f16.bin" args.models_path, model, "ggml-model-f16.bin"
) )
f16_model_parts_paths = map( f16_model_parts_paths = map(
lambda x: os.path.join(f16_model_path_base, x), lambda filename: os.path.join(f16_model_path_base, filename),
glob.glob(f"{f16_model_path_base}*") glob.glob(f"{f16_model_path_base}*")
) )
@ -69,9 +82,9 @@ def main():
if not os.path.isfile(f16_model_part_path): if not os.path.isfile(f16_model_part_path):
print( print(
f"The f16 model {os.path.basename(f16_model_part_path)} " f"The f16 model {os.path.basename(f16_model_part_path)} "
f"was not found in models/{model}. If you want to use it " f"was not found in {args.models_path}{os.path.sep}{model}"
"from another location, set the --models-path argument " ". If you want to use it from another location, set the "
"from the command line." "--models-path argument from the command line."
) )
sys.exit(1) sys.exit(1)
@ -86,14 +99,14 @@ def main():
# This was extracted to a top-level function for parallelization, if # This was extracted to a top-level function for parallelization, if
# implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406 # implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406
def __run_quantize_script(script_path, f16_model_path): def __run_quantize_script(script_path, f16_model_part_path):
"""Run the quantize script specifying the path to it and the path to the """Run the quantize script specifying the path to it and the path to the
f16 model to quantize. f16 model to quantize.
""" """
new_quantized_model_path = f16_model_path.replace("16", "q4_0") new_quantized_model_path = f16_model_part_path.replace("16", "q4_0")
subprocess.run( subprocess.run(
[script_path, f16_model_path, new_quantized_model_path, "2"], [script_path, f16_model_part_path, new_quantized_model_path, "2"],
shell=True, check=True shell=True, check=True
) )

View file

@ -1,15 +0,0 @@
#!/usr/bin/env bash
if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then
echo
echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]"
echo
exit 1
fi
for i in `ls models/$1/ggml-model-f16.bin*`; do
./quantize "$i" "${i/f16/q4_0}" 2
if [[ "$2" == "--remove-f16" ]]; then
rm "$i"
fi
done