diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index b651f9e61..2c0ae4e2a 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -24,7 +24,7 @@
     useOpenCL
     useRocm
     useVulkan
-  ],
+  ] && blas.meta.available,
   useCuda ? config.cudaSupport,
   useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
   useMpi ? false, # Increases the runtime closure size by ~700M
@@ -67,10 +67,15 @@ let
     strings.optionalString (suffices != [ ])
       ", accelerated with ${strings.concatStringsSep ", " suffices}";
 
+  executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
+
   # TODO: package the Python in this repository in a Nix-like way.
   # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
   # is PEP 517-compatible, and ensure the correct .dist-info is generated.
   # https://peps.python.org/pep-0517/
+  #
+  # TODO: Package up each Python script or service appropriately, by making
+  # them into "entrypoints"
   llama-python = python3.withPackages (
     ps: [
       ps.numpy
@@ -159,11 +164,6 @@ effectiveStdenv.mkDerivation (
         --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
       substituteInPlace ./ggml-metal.m \
         --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
-
-      # TODO: Package up each Python script or service appropriately.
-      # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
-      # we could make those *.py into setuptools' entrypoints
-      substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
     '';
 
     # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
@@ -244,8 +244,8 @@ effectiveStdenv.mkDerivation (
     # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
     # if they haven't been added yet.
     postInstall = ''
-      mv $out/bin/main $out/bin/llama
-      mv $out/bin/server $out/bin/llama-server
+      mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
+      mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
       mkdir -p $out/include
       cp $src/llama.h $out/include/
     '';
diff --git a/examples/llava/MobileVLM-README.md b/examples/llava/MobileVLM-README.md
index b3b66331f..1fc83247a 100644
--- a/examples/llava/MobileVLM-README.md
+++ b/examples/llava/MobileVLM-README.md
@@ -6,7 +6,7 @@ for more information, please go to [Meituan-AutoML/MobileVLM](https://github.com
 
 The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava.
 
-Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion  is a little different. Therefore, using MobiVLM as an example, the different conversion step will be shown.
+Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion  is a little different. Therefore, using MobileVLM as an example, the different conversion step will be shown.
 
 ## Usage
 Build with cmake or run `make llava-cli` to build it.
diff --git a/examples/main/README.md b/examples/main/README.md
index 9c83fd3bf..bb696b562 100644
--- a/examples/main/README.md
+++ b/examples/main/README.md
@@ -296,7 +296,9 @@ These options help improve the performance and memory usage of the LLaMA models.
 
 ### Batch Size
 
--   `-b N, --batch-size N`: Set the batch size for prompt processing (default: 512). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations.
+-   `-b N, --batch-size N`: Set the batch size for prompt processing (default: `2048`). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations.
+
+- `-ub N`, `--ubatch-size N`: physical maximum batch size. This is for pipeline parallelization. Default: `512`.
 
 ### Prompt Caching
 
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 53ad9239e..92090b920 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -3566,6 +3566,7 @@ int main(int argc, char ** argv) {
     sigemptyset (&sigint_action.sa_mask);
     sigint_action.sa_flags = 0;
     sigaction(SIGINT, &sigint_action, NULL);
+    sigaction(SIGTERM, &sigint_action, NULL);
 #elif defined (_WIN32)
     auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
         return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
diff --git a/flake.nix b/flake.nix
index 9a528a66b..9cd3756e5 100644
--- a/flake.nix
+++ b/flake.nix
@@ -145,6 +145,7 @@
             # the same path you would with an overlay.
             legacyPackages = {
               llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
+              llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
               llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
               llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
             };
@@ -155,6 +156,7 @@
               {
                 default = config.legacyPackages.llamaPackages.llama-cpp;
                 vulkan = config.packages.default.override { useVulkan = true; };
+                windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
               }
               // lib.optionalAttrs pkgs.stdenv.isLinux {
                 opencl = config.packages.default.override { useOpenCL = true; };