diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp
index 270b91010..fa7baf7e8 100644
--- a/ggml-kompute.cpp
+++ b/ggml-kompute.cpp
@@ -118,9 +118,9 @@ static void enable_sam() {
 }
 #endif
 
-static bool ggml_vk_checkPhysicalDeviceFeatures(vk::PhysicalDevice physicalDevice) {
+static bool ggml_vk_checkPhysicalDeviceFeatures(vk::PhysicalDevice physical_device) {
     vk::PhysicalDeviceFeatures availableFeatures;
-    physicalDevice.getFeatures(&availableFeatures);
+    physical_device.getFeatures(&availableFeatures);
 
     if (!availableFeatures.shaderInt16)
         return false;
@@ -134,7 +134,7 @@ static bool ggml_vk_checkPhysicalDeviceFeatures(vk::PhysicalDevice physicalDevic
     vk::PhysicalDeviceFeatures2 features2;
     features2.pNext = &availableFeatures11;
 
-    physicalDevice.getFeatures2(&features2);
+    physical_device.getFeatures2(&features2);
 
     if (!availableFeatures11.uniformAndStorageBuffer16BitAccess ||
         !availableFeatures11.storageBuffer16BitAccess) {
@@ -169,29 +169,31 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem
     if (!komputeManager()->hasVulkan() || !komputeManager()->hasInstance())
         return results;
 
-    std::vector<vk::PhysicalDevice> physicalDevices;
+    std::vector<vk::PhysicalDevice> physical_devices;
     try {
-        physicalDevices = komputeManager()->listDevices();
+        physical_devices = komputeManager()->listDevices();
     } catch (vk::SystemError & err) {
         std::cerr << __func__ << ": ignoring Vulkan exception: " << err.what() << "\n";
         return results;
     }
 
-    uint32_t deviceCount = physicalDevices.size();
+    uint32_t deviceCount = physical_devices.size();
     if (deviceCount == 0)
         return results;
 
     std::unordered_map<std::string, size_t> count_by_name;
 
     for (uint32_t i = 0; i < deviceCount; i++) {
-        VkPhysicalDeviceProperties properties = physicalDevices.at(i).getProperties();
-        VkPhysicalDeviceMemoryProperties memoryProperties = physicalDevices.at(i).getMemoryProperties();
-        const uint32_t major = VK_VERSION_MAJOR(properties.apiVersion);
-        const uint32_t minor = VK_VERSION_MINOR(properties.apiVersion);
+        const auto & physical_device = physical_devices[i];
+
+        VkPhysicalDeviceProperties dev_props = physical_device.getProperties();
+        VkPhysicalDeviceMemoryProperties memoryProperties = physical_device.getMemoryProperties();
+        const uint32_t major = VK_VERSION_MAJOR(dev_props.apiVersion);
+        const uint32_t minor = VK_VERSION_MINOR(dev_props.apiVersion);
         if (major < 1 || minor < 2)
             continue;
 
-        if (!ggml_vk_checkPhysicalDeviceFeatures(physicalDevices.at(i)))
+        if (!ggml_vk_checkPhysicalDeviceFeatures(physical_device))
             continue;
 
         size_t heapSize = 0;
@@ -206,23 +208,45 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem
         if (heapSize < memoryRequired)
             continue;
 
-        vk::PhysicalDeviceSubgroupProperties subgroupProperties;
-        vk::PhysicalDeviceProperties2 deviceProperties2;
-        deviceProperties2.pNext = &subgroupProperties;
-        physicalDevices.at(i).getProperties2(&deviceProperties2);
+        auto ext_props = physical_device.enumerateDeviceExtensionProperties();
+        bool has_maintenance4 = false;
 
-        if (subgroupProperties.subgroupSize < 32)
+        // Check if maintenance4 is supported
+        for (const auto & properties : ext_props) {
+            if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) {
+                has_maintenance4 = true;
+            }
+        }
+
+        vk::PhysicalDeviceSubgroupProperties subgroup_props;
+        vk::PhysicalDeviceProperties2 dev_props2;
+        vk::PhysicalDeviceMaintenance3Properties dev_props3;
+        vk::PhysicalDeviceMaintenance4Properties dev_props4;
+        dev_props2.pNext = &dev_props3;
+        dev_props3.pNext = &subgroup_props;
+        if (has_maintenance4) {
+            subgroup_props.pNext = &dev_props4;
+        }
+        physical_device.getProperties2(&dev_props2);
+
+        if (subgroup_props.subgroupSize < 32)
             continue;
 
         ggml_vk_device d;
         d.index = i;
-        d.type = properties.deviceType;
+        d.type = dev_props.deviceType;
         d.heapSize = heapSize;
-        d.vendor = strdup(ggml_vk_getVendorName(properties.vendorID));
-        d.subgroupSize = subgroupProperties.subgroupSize;
-        d.bufferAlignment = properties.limits.minStorageBufferOffsetAlignment;
+        d.vendor = strdup(ggml_vk_getVendorName(dev_props.vendorID));
+        d.subgroupSize = subgroup_props.subgroupSize;
+        d.bufferAlignment = dev_props.limits.minStorageBufferOffsetAlignment;
 
-        std::string name(properties.deviceName);
+        if (has_maintenance4) {
+            d.maxAlloc = std::min(dev_props3.maxMemoryAllocationSize, dev_props4.maxBufferSize);
+        } else {
+            d.maxAlloc = dev_props3.maxMemoryAllocationSize;
+        }
+
+        std::string name(dev_props.deviceName);
         size_t n_idx = ++count_by_name[name];
         if (n_idx > 1) {
             name += " (" + std::to_string(n_idx) + ")";
@@ -413,12 +437,6 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v
 
 static size_t ggml_vk_aligned_offset(ggml_backend_buffer_t buffer, size_t offset) {
     size_t minStorageBufferOffsetAlignment = ggml_backend_buffer_get_alignment(buffer);
-    if (minStorageBufferOffsetAlignment == 0) {
-        vk::PhysicalDeviceProperties deviceProperties;
-        deviceProperties = komputeManager()->physicalDevice()->getProperties();
-        vk::PhysicalDeviceLimits deviceLimits = deviceProperties.limits;
-        minStorageBufferOffsetAlignment = deviceLimits.minStorageBufferOffsetAlignment;
-    }
 
     // If offset is already aligned, return it directly
     if (offset % minStorageBufferOffsetAlignment == 0) {
@@ -1731,10 +1749,11 @@ struct ggml_backend_kompute_buffer_type_context {
     int         device;
     int         device_ref = 0;
     uint64_t    buffer_alignment;
+    uint64_t    max_alloc;
     std::string name;
 
-    ggml_backend_kompute_buffer_type_context(int device, uint64_t buffer_alignment)
-        : device(device), buffer_alignment(buffer_alignment), name(ggml_kompute_format_name(device)) {}
+    ggml_backend_kompute_buffer_type_context(int device, uint64_t buffer_alignment, uint64_t max_alloc)
+        : device(device), buffer_alignment(buffer_alignment), max_alloc(max_alloc), name(ggml_kompute_format_name(device)) {}
 };
 
 static void ggml_backend_kompute_device_ref(ggml_backend_buffer_type_t buft) {
@@ -1842,6 +1861,11 @@ static size_t ggml_backend_kompute_buffer_type_get_alignment(ggml_backend_buffer
     return ctx->buffer_alignment;
 }
 
+static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
+    auto * ctx = static_cast<ggml_backend_kompute_buffer_type_context *>(buft->context);
+    return ctx->max_alloc;
+}
+
 static bool ggml_backend_kompute_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
     GGML_UNUSED(buft);
     return ggml_backend_is_kompute(backend);
@@ -1851,6 +1875,7 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
     /* .get_name         = */ ggml_backend_kompute_buffer_type_get_name,
     /* .alloc_buffer     = */ ggml_backend_kompute_buffer_type_alloc_buffer,
     /* .get_alignment    = */ ggml_backend_kompute_buffer_type_get_alignment,
+    /* .get_max_size     = */ ggml_backend_vk_buffer_type_get_max_size,
     /* .get_alloc_size   = */ NULL, // defaults to ggml_nbytes
     /* .supports_backend = */ ggml_backend_kompute_buffer_type_supports_backend,
     /* .is_host          = */ NULL,
@@ -1865,7 +1890,7 @@ ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) {
         for (const auto & dev : devices) {
             vec.push_back({
                 /* .iface   = */ ggml_backend_kompute_buffer_type_interface,
-                /* .context = */ new ggml_backend_kompute_buffer_type_context(dev.index, dev.bufferAlignment)
+                /* .context = */ new ggml_backend_kompute_buffer_type_context(dev.index, dev.bufferAlignment, dev.maxAlloc)
             });
         }
         return vec;
diff --git a/ggml-kompute.h b/ggml-kompute.h
index c56e42f8e..171465456 100644
--- a/ggml-kompute.h
+++ b/ggml-kompute.h
@@ -19,6 +19,7 @@ struct ggml_vk_device {
     const char * vendor;
     int subgroupSize;
     uint64_t bufferAlignment;
+    uint64_t maxAlloc;
 };
 
 struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);