sync: minja (a72057e519) (#11774 )

Update README.md [no ci] (#11781 )
typo: `\` -> `/` Change the UNIX path separator to` \`.
2025-02-10 09:34:09 +00:00 · 2025-02-10 09:05:57 +01:00 · 2025-02-10 07:17:21 +01:00 · 2025-02-10 07:08:22 +01:00
5 changed files with 63 additions and 26 deletions
--- a/common/chat-template.hpp
+++ b/common/chat-template.hpp
@ -249,16 +249,30 @@ class chat_template {
                    inputs.add_generation_prompt = false;
                    full = apply(inputs);
                }
-
-                if (full.find(prefix) != 0) {
-                    if (prefix.rfind(eos_token_) == prefix.size() - eos_token_.size()) {
-                        prefix = prefix.substr(0, prefix.size() - eos_token_.size());
+                auto eos_pos_last = full.rfind(eos_token_);
+                if (eos_pos_last == prefix.size() - eos_token_.size() ||
+                      (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
+                    full = full.substr(0, eos_pos_last);
+                }
+                size_t common_prefix_length = 0;
+                for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
+                    if (prefix[i] != full[i]) {
+                        break;
                    }
+                    if (prefix[i] == '<') {
+                        // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
+                        // but it removes thinking tags for past messages.
+                        // The prefix and full strings diverge at <think> vs. <｜tool▁calls▁begin｜>, we avoid consuming the leading <.
+                        continue;
+                    }
+                    common_prefix_length = i + 1;
                }
-                if (full.find(prefix) != 0) {
+                auto example = full.substr(common_prefix_length);
+                if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
                    fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
+                } else {
+                    tool_call_example_ = example;
                }
-                tool_call_example_ = full.substr(prefix.size());
            }
        } catch (const std::exception & e) {
            fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
@ -363,7 +377,7 @@ class chat_template {
            if (polyfill_tools) {
                adjusted_messages = add_system(inputs.messages,
                    "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
-                    (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_));
+                    (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
            } else {
                adjusted_messages = inputs.messages;
            }
--- a/common/minja.hpp
+++ b/common/minja.hpp
@ -1385,6 +1385,13 @@ static std::string strip(const std::string & s) {
  return s.substr(start, end - start + 1);
 }

+static std::string capitalize(const std::string & s) {
+  if (s.empty()) return s;
+  auto result = s;
+  result[0] = std::toupper(result[0]);
+  return result;
+}
+
 static std::string html_escape(const std::string & s) {
  std::string result;
  result.reserve(s.size());
@ -1462,6 +1469,9 @@ public:
          if (method->get_name() == "strip") {
            vargs.expectArgs("strip method", {0, 0}, {0, 0});
            return Value(strip(str));
+          } else if (method->get_name() == "capitalize") {
+            vargs.expectArgs("capitalize method", {0, 0}, {0, 0});
+            return Value(capitalize(str));
          } else if (method->get_name() == "endswith") {
            vargs.expectArgs("endswith method", {1, 1}, {0, 0});
            auto suffix = vargs.args[0].get<std::string>();
@ -1792,7 +1802,7 @@ private:
        auto left = parseStringConcat();
        if (!left) throw std::runtime_error("Expected left side of 'logical compare' expression");

-        static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not[\r\n\s]+in\b)");
+        static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not\s+in\b)");
        static std::regex not_tok(R"(not\b)");
        std::string op_str;
        while (!(op_str = consumeToken(compare_tok)).empty()) {
@ -2171,7 +2181,7 @@ private:
    using TemplateTokenIterator = TemplateTokenVector::const_iterator;

    std::vector<std::string> parseVarNames() {
-      static std::regex varnames_regex(R"(((?:\w+)(?:[\r\n\s]*,[\r\n\s]*(?:\w+))*)[\r\n\s]*)");
+      static std::regex varnames_regex(R"(((?:\w+)(?:\s*,\s*(?:\w+))*)\s*)");

      std::vector<std::string> group;
      if ((group = consumeTokenGroups(varnames_regex)).empty()) throw std::runtime_error("Expected variable names");
@ -2194,13 +2204,13 @@ private:
    }

    TemplateTokenVector tokenize() {
-      static std::regex comment_tok(R"(\{#([-~]?)([\s\S\r\n]*?)([-~]?)#\})");
+      static std::regex comment_tok(R"(\{#([-~]?)([\s\S]*?)([-~]?)#\})");
      static std::regex expr_open_regex(R"(\{\{([-~])?)");
-      static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)");
+      static std::regex block_open_regex(R"(^\{%([-~])?\s*)");
      static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter|break|continue)\b)");
      static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)");
-      static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})");
-      static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})");
+      static std::regex expr_close_regex(R"(\s*([-~])?\}\})");
+      static std::regex block_close_regex(R"(\s*([-~])?%\})");

      TemplateTokenVector tokens;
      std::vector<std::string> group;
@ -2284,7 +2294,7 @@ private:
              auto post_space = parseBlockClose();
              tokens.push_back(std::make_unique<EndGenerationTemplateToken>(location, pre_space, post_space));
            } else if (keyword == "set") {
-              static std::regex namespaced_var_regex(R"((\w+)[\s\n\r]*\.[\s\n\r]*(\w+))");
+              static std::regex namespaced_var_regex(R"((\w+)\s*\.\s*(\w+))");

              std::string ns;
              std::vector<std::string> var_names;
@ -2336,6 +2346,11 @@ private:
              throw std::runtime_error("Unexpected block: " + keyword);
            }
          } else if (std::regex_search(it, end, match, non_text_open_regex)) {
+            if (!match.position()) {
+                if (match[0] != "{#")
+                    throw std::runtime_error("Internal error: Expected a comment");
+                throw std::runtime_error("Missing end of comment tag");
+            }
            auto text_end = it + match.position();
            text = std::string(it, text_end);
            it = text_end;
@ -2400,7 +2415,7 @@ private:

              auto text = text_token->text;
              if (post_space == SpaceHandling::Strip) {
-                static std::regex trailing_space_regex(R"((\s|\r|\n)+$)");
+                static std::regex trailing_space_regex(R"(\s+$)");
                text = std::regex_replace(text, trailing_space_regex, "");
              } else if (options.lstrip_blocks && it != end) {
                auto i = text.size();
@ -2410,7 +2425,7 @@ private:
                }
              }
              if (pre_space == SpaceHandling::Strip) {
-                static std::regex leading_space_regex(R"(^(\s|\r|\n)+)");
+                static std::regex leading_space_regex(R"(^\s+)");
                text = std::regex_replace(text, leading_space_regex, "");
              } else if (options.trim_blocks && (it - 1) != begin && !dynamic_cast<ExpressionTemplateToken*>((*(it - 2)).get())) {
                if (text.length() > 0 && text[0] == '\n') {
--- a/examples/main/README.md
+++ b/examples/main/README.md
@ -37,7 +37,7 @@ Once downloaded, place your model in the models folder in llama.cpp.

 ##### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it):
 ```bash
-./llama-cli -m models\gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1
+./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1
 ```

 ### Windows:
--- a/ggml/include/ggml-vulkan.h
+++ b/ggml/include/ggml-vulkan.h
@ -10,8 +10,6 @@ extern "C" {
 #define GGML_VK_NAME "Vulkan"
 #define GGML_VK_MAX_DEVICES 16

-GGML_BACKEND_API void ggml_vk_instance_init(void);
-
 // backend API
 GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);

--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@ -167,6 +167,7 @@ struct vk_device_struct {
    uint32_t subgroup_size;
    uint32_t shader_core_count;
    bool uma;
+    bool prefer_host_memory;
    bool float_controls_rte_fp16;

    bool subgroup_size_control;
@ -1294,7 +1295,9 @@ static vk_buffer ggml_vk_create_buffer_check(vk_device& device, size_t size, vk:
 static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
    vk_buffer buf;
    try {
-        if (device->uma) {
+        if (device->prefer_host_memory) {
+            buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eDeviceLocal);
+        } else if (device->uma) {
            // Fall back to host memory type
            buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
        } else {
@ -2199,6 +2202,9 @@ static vk_device ggml_vk_get_device(size_t idx) {
        device->physical_device = physical_devices[dev_num];
        const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();

+        const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY");
+        device->prefer_host_memory = GGML_VK_PREFER_HOST_MEMORY != nullptr;
+
        bool fp16_storage = false;
        bool fp16_compute = false;
        bool maintenance4_support = false;
@ -2787,14 +2793,12 @@ static void ggml_vk_print_gpu_info(size_t idx) {
 static bool ggml_vk_instance_validation_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions);
 static bool ggml_vk_instance_portability_enumeration_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions);

-void ggml_vk_instance_init() {
+static void ggml_vk_instance_init() {
    if (vk_instance_initialized) {
        return;
    }
    VK_LOG_DEBUG("ggml_vk_instance_init()");

-    vk_instance_initialized = true;
-
    uint32_t api_version = vk::enumerateInstanceVersion();

    if (api_version < VK_API_VERSION_1_2) {
@ -2845,6 +2849,7 @@ void ggml_vk_instance_init() {
        GGML_LOG_DEBUG("ggml_vulkan: Validation layers enabled\n");
    }
    vk_instance.instance = vk::createInstance(instance_create_info);
+    vk_instance_initialized = true;

    size_t num_available_devices = vk_instance.instance.enumeratePhysicalDevices().size();

@ -2869,7 +2874,7 @@ void ggml_vk_instance_init() {
        // Make sure at least one device exists
        if (devices.empty()) {
            std::cerr << "ggml_vulkan: Error: No devices found." << std::endl;
-            GGML_ABORT("fatal error");
+            return;
        }

        // Default to using all dedicated GPUs
@ -8344,8 +8349,13 @@ ggml_backend_reg_t ggml_backend_vk_reg() {
        /* .iface       = */ ggml_backend_vk_reg_i,
        /* .context     = */ nullptr,
    };
-
-    return &reg;
+    try {
+        ggml_vk_instance_init();
+        return &reg;
+    } catch (const vk::SystemError& e) {
+        VK_LOG_DEBUG("ggml_backend_vk_reg() -> Error: System error: " << e.what());
+        return nullptr;
+    }
 }

 // Extension availability
Author	SHA1	Message	Date
Olivier Chafik	d7b31a9d84	sync: minja (`a72057e519`) (#11774 )	2025-02-10 09:34:09 +00:00
pascal-lc	9ac3457b39	Update README.md [no ci] (#11781 ) typo: `\` -> `/` Change the UNIX path separator to` \`.	2025-02-10 09:05:57 +01:00
Danny Milosavljevic	c2a67efe38	vulkan: Make Vulkan optional at runtime (#11493 ). (#11494 ) Co-authored-by: Jeff Bolz <jbolz@nvidia.com>	2025-02-10 07:17:21 +01:00
Wagner Bruna	b044a0fe3c	vulkan: add environment variable GGML_VK_PREFER_HOST_MEMORY to avoid VRAM allocation (#11592 )	2025-02-10 07:08:22 +01:00