diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py
index 42f537769..108eb1fcc 100644
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -17,6 +17,7 @@
 # and vocabulary.
 #
 import argparse
+import os
 import sys
 import json
 import struct
@@ -44,8 +45,14 @@ def get_n_parts(dim):
 
 def load_hparams_and_tokenizer(dir_model):
 
+    # `dir_model` is something like `models/7B` or `models/7B/`.
+    # "tokenizer.model" is expected under model's parent dir.
+    # When `dir_model` is a symlink, f"{dir_model}/../tokenizer.model" would not be found.
+    # Let's use the model's parent dir directly.
+    model_parent_dir = os.path.dirname(os.path.normpath(dir_model))
+
     fname_hparams = f"{dir_model}/params.json"
-    fname_tokenizer = f"{dir_model}/../tokenizer.model"
+    fname_tokenizer = f"{model_parent_dir}/tokenizer.model"
 
     with open(fname_hparams, "r") as f:
         hparams = json.load(f)
diff --git a/llamacpp.dll b/llamacpp.dll
index f888c6e28..8463cc428 100644
Binary files a/llamacpp.dll and b/llamacpp.dll differ
diff --git a/main.cpp b/main.cpp
index 1f6e7dabe..5d3b4ec14 100644
--- a/main.cpp
+++ b/main.cpp
@@ -107,14 +107,14 @@ int llama_model_load(const std::string & fname, llama_model & model, gpt_vocab &
     {
         uint32_t magic;
         fin.read((char *) &magic, sizeof(magic));
-        if (magic == 0x67676d6c) {
+        if (magic == FILE_MAGIC_UNVERSIONED) {
             fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n",
                     __func__, fname.c_str());
             legacy_file_format = true;
         }
         else
         {
-        if (magic != 0x67676d66) {
+        if (magic != FILE_MAGIC) {
             fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
             return false;
         }
@@ -122,9 +122,9 @@ int llama_model_load(const std::string & fname, llama_model & model, gpt_vocab &
         uint32_t format_version;
         fin.read((char *) &format_version, sizeof(format_version));
 
-        if (format_version != 1) {
-            fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ")\n",
-                    __func__, fname.c_str(), format_version);
+        if (format_version != FILE_VERSION) {
+            fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n",
+                    __func__, fname.c_str(), format_version, FILE_VERSION);
             return false;
         }
         }
diff --git a/main.exe b/main.exe
index e3d94527d..504f4ecb1 100644
Binary files a/main.exe and b/main.exe differ
diff --git a/quantize.cpp b/quantize.cpp
index 166e9163a..07db33a3c 100644
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -64,12 +64,12 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
     {
         uint32_t magic;
         finp.read((char *) &magic, sizeof(magic));
-        if (magic == 0x67676d6c) {
+        if (magic == FILE_MAGIC_UNVERSIONED) {
             fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n",
                     __func__, fname_inp.c_str());
             return false;
         }
-        if (magic != 0x67676d66) {
+        if (magic != FILE_MAGIC) {
             fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str());
             return false;
         }
@@ -79,9 +79,9 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
         uint32_t format_version;
         finp.read((char *) &format_version, sizeof(format_version));
 
-        if (format_version != 1) {
-            fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ")\n",
-                    __func__, fname_inp.c_str(), format_version);
+        if (format_version != FILE_VERSION) {
+            fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n",
+                    __func__, fname_inp.c_str(), format_version, FILE_VERSION);
             return false;
         }
 
diff --git a/quantize.exe b/quantize.exe
index 41c0cc9c8..2480fa92d 100644
Binary files a/quantize.exe and b/quantize.exe differ
diff --git a/utils.h b/utils.h
index b3a0f4724..65fe02ba1 100644
--- a/utils.h
+++ b/utils.h
@@ -48,6 +48,14 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
 
 std::string gpt_random_prompt(std::mt19937 & rng);
 
+//
+// Model file parsing
+//
+
+#define FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files
+#define FILE_MAGIC 0x67676d66 // 'ggmf' in hex
+#define FILE_VERSION 1
+
 //
 // Vocab utils
 //