diff --git a/common/common.cpp b/common/common.cpp
index e624fc7f3..b5aaad439 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1354,7 +1354,8 @@ void gpt_params_handle_model_default(gpt_params & params) {
             }
             params.hf_file = params.model;
         } else if (params.model.empty()) {
-            params.model = "models/" + string_split(params.hf_file, '/').back();
+            const char* cache_dir = getenv("LLAMA_CACHE") ? getenv("LLAMA_CACHE") : DEFAULT_LLAMA_CACHE;
+            params.model = cache_dir + string_split(params.hf_file, '/').back();
         }
     } else if (!params.model_url.empty()) {
         if (params.model.empty()) {
@@ -2076,6 +2077,17 @@ static bool llama_download_file(const std::string & url, const std::string & pat
             }
         }
 
+        // Create parent directories if not exist
+        const std::vector<std::string> path_parts = string_split(path_temporary, DIRECTORY_SEPARATOR);
+        std::string parent_dir = "";
+        struct stat st;
+        for (unsigned i = 0; i < path_parts.size() - 1; i++) {
+            parent_dir += path_parts[i] + DIRECTORY_SEPARATOR;
+            if (stat(parent_dir.c_str(), &st) != 0) {
+                mkdir(parent_dir.c_str(), S_IRWXU);
+            }
+        }
+
         // Set the output file
         std::unique_ptr<FILE, decltype(&fclose)> outfile(fopen(path_temporary.c_str(), "wb"), fclose);
         if (!outfile) {
diff --git a/common/common.h b/common/common.h
index 566490e2f..40feb1ad4 100644
--- a/common/common.h
+++ b/common/common.h
@@ -32,6 +32,12 @@
 } while(0)
 
 #define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
+#define DEFAULT_LLAMA_CACHE ".cache/"
+
+#ifdef _WIN32
+#include <direct.h>
+#define mkdir(path, mode) _mkdir(path) // On Windows, _mkdir does not take mode
+#endif
 
 // build info
 extern int LLAMA_BUILD_NUMBER;