Added support for Windows and updated README to use this script

New code to set the name of the quantize script binary depending on the platform has been added (quantize.exe if working on Windows) and the README.md file has been updated to use this script instead of the Bash one.
This commit is contained in:
Gerardo Romero 2023-03-19 10:26:38 -06:00
parent c028226704
commit e2bfaeb9c1
3 changed files with 29 additions and 31 deletions

View file

@ -146,7 +146,7 @@ python3 -m pip install torch numpy sentencepiece
python3 convert-pth-to-ggml.py models/7B/ 1
# quantize the model to 4-bits
./quantize.sh 7B
python3 quantize.py 7B
# run the inference
./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128

View file

@ -10,15 +10,27 @@ import os
def main():
"""Parse the command line arguments and execute the script."""
"""Update the quantize binary name depending on the platform and parse
the command line arguments and execute the script.
"""
if "linux" in sys.platform or "darwin" in sys.platform:
quantize_script_binary = "quantize"
elif "win32" in sys.platform or "cygwin" in sys.platform:
quantize_script_binary = "quantize.exe"
else:
print("WARNING: Unknown platform. Assuming a UNIX-like OS.\n")
quantize_script_binary = "quantize"
parser = argparse.ArgumentParser(
prog='Quantization Script',
description='This script quantizes the given models by applying the '
'"quantize" script on them.'
f'"{quantize_script_binary}" script on them.'
)
parser.add_argument(
"models", nargs='+', choices=('7B', '13B', '30B', '65B'),
'models', nargs='+', choices=('7B', '13B', '30B', '65B'),
help='The models to quantize.'
)
parser.add_argument(
@ -32,7 +44,7 @@ def main():
)
parser.add_argument(
'-q', '--quantize-script-path', dest='quantize_script_path',
default=os.path.join(os.getcwd(), "quantize"),
default=os.path.join(os.getcwd(), quantize_script_binary),
help='Specify the path to the "quantize" script.'
)
@ -48,20 +60,21 @@ def main():
if not os.path.isfile(args.quantize_script_path):
print(
'The "quantize" script was not found in the current location.\n'
"If you want to use it from another location, set the "
"--quantize-script-path argument from the command line."
f'The "{quantize_script_binary}" script was not found in the '
"current location.\nIf you want to use it from another location, "
"set the --quantize-script-path argument from the command line."
)
sys.exit(1)
for model in args.models:
# The model is separated in various parts (ggml-model-f16.bin.0...)
# The model is separated in various parts
# (ggml-model-f16.bin, ggml-model-f16.bin.0, ggml-model-f16.bin.1...)
f16_model_path_base = os.path.join(
args.models_path, model, "ggml-model-f16.bin"
)
f16_model_parts_paths = map(
lambda x: os.path.join(f16_model_path_base, x),
lambda filename: os.path.join(f16_model_path_base, filename),
glob.glob(f"{f16_model_path_base}*")
)
@ -69,9 +82,9 @@ def main():
if not os.path.isfile(f16_model_part_path):
print(
f"The f16 model {os.path.basename(f16_model_part_path)} "
f"was not found in models/{model}. If you want to use it "
"from another location, set the --models-path argument "
"from the command line."
f"was not found in {args.models_path}{os.path.sep}{model}"
". If you want to use it from another location, set the "
"--models-path argument from the command line."
)
sys.exit(1)
@ -86,14 +99,14 @@ def main():
# This was extracted to a top-level function for parallelization, if
# implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406
def __run_quantize_script(script_path, f16_model_path):
def __run_quantize_script(script_path, f16_model_part_path):
"""Run the quantize script specifying the path to it and the path to the
f16 model to quantize.
"""
new_quantized_model_path = f16_model_path.replace("16", "q4_0")
new_quantized_model_path = f16_model_part_path.replace("16", "q4_0")
subprocess.run(
[script_path, f16_model_path, new_quantized_model_path, "2"],
[script_path, f16_model_part_path, new_quantized_model_path, "2"],
shell=True, check=True
)

View file

@ -1,15 +0,0 @@
#!/usr/bin/env bash
if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then
echo
echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]"
echo
exit 1
fi
for i in `ls models/$1/ggml-model-f16.bin*`; do
./quantize "$i" "${i/f16/q4_0}" 2
if [[ "$2" == "--remove-f16" ]]; then
rm "$i"
fi
done