diff --git a/README.md b/README.md
index 15e1b9a2d..877fae53b 100644
--- a/README.md
+++ b/README.md
@@ -146,7 +146,7 @@ python3 -m pip install torch numpy sentencepiece
 python3 convert-pth-to-ggml.py models/7B/ 1
 
 # quantize the model to 4-bits
-./quantize.sh 7B
+python3 quantize.py 7B
 
 # run the inference
 ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128
diff --git a/quantize.py b/quantize.py
index 2466047fa..97d841777 100644
--- a/quantize.py
+++ b/quantize.py
@@ -10,15 +10,27 @@ import os
 
 
 def main():
-    """Parse the command line arguments and execute the script."""
+    """Update the quantize binary name depending on the platform and parse
+    the command line arguments and execute the script.
+    """
+
+    if "linux" in sys.platform or "darwin" in sys.platform:
+        quantize_script_binary = "quantize"
+
+    elif "win32" in sys.platform or "cygwin" in sys.platform:
+        quantize_script_binary = "quantize.exe"
+
+    else:
+        print("WARNING: Unknown platform. Assuming a UNIX-like OS.\n")
+        quantize_script_binary = "quantize"
 
     parser = argparse.ArgumentParser(
         prog='Quantization Script',
         description='This script quantizes the given models by applying the '
-        '"quantize" script on them.'
+        f'"{quantize_script_binary}" script on them.'
     )
     parser.add_argument(
-        "models", nargs='+', choices=('7B', '13B', '30B', '65B'),
+        'models', nargs='+', choices=('7B', '13B', '30B', '65B'),
         help='The models to quantize.'
     )
     parser.add_argument(
@@ -32,7 +44,7 @@ def main():
     )
     parser.add_argument(
         '-q', '--quantize-script-path', dest='quantize_script_path',
-        default=os.path.join(os.getcwd(), "quantize"),
+        default=os.path.join(os.getcwd(), quantize_script_binary),
         help='Specify the path to the "quantize" script.'
     )
 
@@ -48,20 +60,21 @@ def main():
 
     if not os.path.isfile(args.quantize_script_path):
         print(
-            'The "quantize" script was not found in the current location.\n'
-            "If you want to use it from another location, set the "
-            "--quantize-script-path argument from the command line."
+            f'The "{quantize_script_binary}" script was not found in the '
+            "current location.\nIf you want to use it from another location, "
+            "set the --quantize-script-path argument from the command line."
         )
         sys.exit(1)
 
     for model in args.models:
-        # The model is separated in various parts (ggml-model-f16.bin.0...)
+        # The model is separated in various parts
+        # (ggml-model-f16.bin, ggml-model-f16.bin.0, ggml-model-f16.bin.1...)
         f16_model_path_base = os.path.join(
             args.models_path, model, "ggml-model-f16.bin"
         )
 
         f16_model_parts_paths = map(
-            lambda x: os.path.join(f16_model_path_base, x),
+            lambda filename: os.path.join(f16_model_path_base, filename),
             glob.glob(f"{f16_model_path_base}*")
         )
 
@@ -69,9 +82,9 @@ def main():
             if not os.path.isfile(f16_model_part_path):
                 print(
                     f"The f16 model {os.path.basename(f16_model_part_path)} "
-                    f"was not found in models/{model}. If you want to use it "
-                    "from another location, set the --models-path argument "
-                    "from the command line."
+                    f"was not found in {args.models_path}{os.path.sep}{model}"
+                    ". If you want to use it from another location, set the "
+                    "--models-path argument from the command line."
                 )
                 sys.exit(1)
 
@@ -86,14 +99,14 @@ def main():
 # This was extracted to a top-level function for parallelization, if
 # implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406
 
-def __run_quantize_script(script_path, f16_model_path):
+def __run_quantize_script(script_path, f16_model_part_path):
     """Run the quantize script specifying the path to it and the path to the
     f16 model to quantize.
     """
 
-    new_quantized_model_path = f16_model_path.replace("16", "q4_0")
+    new_quantized_model_path = f16_model_part_path.replace("16", "q4_0")
     subprocess.run(
-        [script_path, f16_model_path, new_quantized_model_path, "2"],
+        [script_path, f16_model_part_path, new_quantized_model_path, "2"],
         shell=True, check=True
     )
 
diff --git a/quantize.sh b/quantize.sh
deleted file mode 100755
index 6194649b3..000000000
--- a/quantize.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-
-if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then
-    echo
-    echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]"
-    echo
-    exit 1
-fi
-
-for i in `ls models/$1/ggml-model-f16.bin*`; do
-    ./quantize "$i" "${i/f16/q4_0}" 2
-    if [[ "$2" == "--remove-f16" ]]; then
-        rm "$i"
-    fi
-done