Compare commits
4 commits
compilade/
...
master
Author | SHA1 | Date | |
---|---|---|---|
|
d7b31a9d84 | ||
|
9ac3457b39 | ||
|
c2a67efe38 | ||
|
b044a0fe3c |
5 changed files with 63 additions and 26 deletions
|
@ -249,16 +249,30 @@ class chat_template {
|
||||||
inputs.add_generation_prompt = false;
|
inputs.add_generation_prompt = false;
|
||||||
full = apply(inputs);
|
full = apply(inputs);
|
||||||
}
|
}
|
||||||
|
auto eos_pos_last = full.rfind(eos_token_);
|
||||||
if (full.find(prefix) != 0) {
|
if (eos_pos_last == prefix.size() - eos_token_.size() ||
|
||||||
if (prefix.rfind(eos_token_) == prefix.size() - eos_token_.size()) {
|
(full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
|
||||||
prefix = prefix.substr(0, prefix.size() - eos_token_.size());
|
full = full.substr(0, eos_pos_last);
|
||||||
|
}
|
||||||
|
size_t common_prefix_length = 0;
|
||||||
|
for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
|
||||||
|
if (prefix[i] != full[i]) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
if (prefix[i] == '<') {
|
||||||
|
// DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
|
||||||
|
// but it removes thinking tags for past messages.
|
||||||
|
// The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
common_prefix_length = i + 1;
|
||||||
}
|
}
|
||||||
if (full.find(prefix) != 0) {
|
auto example = full.substr(common_prefix_length);
|
||||||
|
if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
|
||||||
fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
|
fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
|
||||||
|
} else {
|
||||||
|
tool_call_example_ = example;
|
||||||
}
|
}
|
||||||
tool_call_example_ = full.substr(prefix.size());
|
|
||||||
}
|
}
|
||||||
} catch (const std::exception & e) {
|
} catch (const std::exception & e) {
|
||||||
fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
|
fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
|
||||||
|
@ -363,7 +377,7 @@ class chat_template {
|
||||||
if (polyfill_tools) {
|
if (polyfill_tools) {
|
||||||
adjusted_messages = add_system(inputs.messages,
|
adjusted_messages = add_system(inputs.messages,
|
||||||
"You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
|
"You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
|
||||||
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_));
|
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
|
||||||
} else {
|
} else {
|
||||||
adjusted_messages = inputs.messages;
|
adjusted_messages = inputs.messages;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1385,6 +1385,13 @@ static std::string strip(const std::string & s) {
|
||||||
return s.substr(start, end - start + 1);
|
return s.substr(start, end - start + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::string capitalize(const std::string & s) {
|
||||||
|
if (s.empty()) return s;
|
||||||
|
auto result = s;
|
||||||
|
result[0] = std::toupper(result[0]);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static std::string html_escape(const std::string & s) {
|
static std::string html_escape(const std::string & s) {
|
||||||
std::string result;
|
std::string result;
|
||||||
result.reserve(s.size());
|
result.reserve(s.size());
|
||||||
|
@ -1462,6 +1469,9 @@ public:
|
||||||
if (method->get_name() == "strip") {
|
if (method->get_name() == "strip") {
|
||||||
vargs.expectArgs("strip method", {0, 0}, {0, 0});
|
vargs.expectArgs("strip method", {0, 0}, {0, 0});
|
||||||
return Value(strip(str));
|
return Value(strip(str));
|
||||||
|
} else if (method->get_name() == "capitalize") {
|
||||||
|
vargs.expectArgs("capitalize method", {0, 0}, {0, 0});
|
||||||
|
return Value(capitalize(str));
|
||||||
} else if (method->get_name() == "endswith") {
|
} else if (method->get_name() == "endswith") {
|
||||||
vargs.expectArgs("endswith method", {1, 1}, {0, 0});
|
vargs.expectArgs("endswith method", {1, 1}, {0, 0});
|
||||||
auto suffix = vargs.args[0].get<std::string>();
|
auto suffix = vargs.args[0].get<std::string>();
|
||||||
|
@ -1792,7 +1802,7 @@ private:
|
||||||
auto left = parseStringConcat();
|
auto left = parseStringConcat();
|
||||||
if (!left) throw std::runtime_error("Expected left side of 'logical compare' expression");
|
if (!left) throw std::runtime_error("Expected left side of 'logical compare' expression");
|
||||||
|
|
||||||
static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not[\r\n\s]+in\b)");
|
static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not\s+in\b)");
|
||||||
static std::regex not_tok(R"(not\b)");
|
static std::regex not_tok(R"(not\b)");
|
||||||
std::string op_str;
|
std::string op_str;
|
||||||
while (!(op_str = consumeToken(compare_tok)).empty()) {
|
while (!(op_str = consumeToken(compare_tok)).empty()) {
|
||||||
|
@ -2171,7 +2181,7 @@ private:
|
||||||
using TemplateTokenIterator = TemplateTokenVector::const_iterator;
|
using TemplateTokenIterator = TemplateTokenVector::const_iterator;
|
||||||
|
|
||||||
std::vector<std::string> parseVarNames() {
|
std::vector<std::string> parseVarNames() {
|
||||||
static std::regex varnames_regex(R"(((?:\w+)(?:[\r\n\s]*,[\r\n\s]*(?:\w+))*)[\r\n\s]*)");
|
static std::regex varnames_regex(R"(((?:\w+)(?:\s*,\s*(?:\w+))*)\s*)");
|
||||||
|
|
||||||
std::vector<std::string> group;
|
std::vector<std::string> group;
|
||||||
if ((group = consumeTokenGroups(varnames_regex)).empty()) throw std::runtime_error("Expected variable names");
|
if ((group = consumeTokenGroups(varnames_regex)).empty()) throw std::runtime_error("Expected variable names");
|
||||||
|
@ -2194,13 +2204,13 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
TemplateTokenVector tokenize() {
|
TemplateTokenVector tokenize() {
|
||||||
static std::regex comment_tok(R"(\{#([-~]?)([\s\S\r\n]*?)([-~]?)#\})");
|
static std::regex comment_tok(R"(\{#([-~]?)([\s\S]*?)([-~]?)#\})");
|
||||||
static std::regex expr_open_regex(R"(\{\{([-~])?)");
|
static std::regex expr_open_regex(R"(\{\{([-~])?)");
|
||||||
static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)");
|
static std::regex block_open_regex(R"(^\{%([-~])?\s*)");
|
||||||
static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter|break|continue)\b)");
|
static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter|break|continue)\b)");
|
||||||
static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)");
|
static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)");
|
||||||
static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})");
|
static std::regex expr_close_regex(R"(\s*([-~])?\}\})");
|
||||||
static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})");
|
static std::regex block_close_regex(R"(\s*([-~])?%\})");
|
||||||
|
|
||||||
TemplateTokenVector tokens;
|
TemplateTokenVector tokens;
|
||||||
std::vector<std::string> group;
|
std::vector<std::string> group;
|
||||||
|
@ -2284,7 +2294,7 @@ private:
|
||||||
auto post_space = parseBlockClose();
|
auto post_space = parseBlockClose();
|
||||||
tokens.push_back(std::make_unique<EndGenerationTemplateToken>(location, pre_space, post_space));
|
tokens.push_back(std::make_unique<EndGenerationTemplateToken>(location, pre_space, post_space));
|
||||||
} else if (keyword == "set") {
|
} else if (keyword == "set") {
|
||||||
static std::regex namespaced_var_regex(R"((\w+)[\s\n\r]*\.[\s\n\r]*(\w+))");
|
static std::regex namespaced_var_regex(R"((\w+)\s*\.\s*(\w+))");
|
||||||
|
|
||||||
std::string ns;
|
std::string ns;
|
||||||
std::vector<std::string> var_names;
|
std::vector<std::string> var_names;
|
||||||
|
@ -2336,6 +2346,11 @@ private:
|
||||||
throw std::runtime_error("Unexpected block: " + keyword);
|
throw std::runtime_error("Unexpected block: " + keyword);
|
||||||
}
|
}
|
||||||
} else if (std::regex_search(it, end, match, non_text_open_regex)) {
|
} else if (std::regex_search(it, end, match, non_text_open_regex)) {
|
||||||
|
if (!match.position()) {
|
||||||
|
if (match[0] != "{#")
|
||||||
|
throw std::runtime_error("Internal error: Expected a comment");
|
||||||
|
throw std::runtime_error("Missing end of comment tag");
|
||||||
|
}
|
||||||
auto text_end = it + match.position();
|
auto text_end = it + match.position();
|
||||||
text = std::string(it, text_end);
|
text = std::string(it, text_end);
|
||||||
it = text_end;
|
it = text_end;
|
||||||
|
@ -2400,7 +2415,7 @@ private:
|
||||||
|
|
||||||
auto text = text_token->text;
|
auto text = text_token->text;
|
||||||
if (post_space == SpaceHandling::Strip) {
|
if (post_space == SpaceHandling::Strip) {
|
||||||
static std::regex trailing_space_regex(R"((\s|\r|\n)+$)");
|
static std::regex trailing_space_regex(R"(\s+$)");
|
||||||
text = std::regex_replace(text, trailing_space_regex, "");
|
text = std::regex_replace(text, trailing_space_regex, "");
|
||||||
} else if (options.lstrip_blocks && it != end) {
|
} else if (options.lstrip_blocks && it != end) {
|
||||||
auto i = text.size();
|
auto i = text.size();
|
||||||
|
@ -2410,7 +2425,7 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pre_space == SpaceHandling::Strip) {
|
if (pre_space == SpaceHandling::Strip) {
|
||||||
static std::regex leading_space_regex(R"(^(\s|\r|\n)+)");
|
static std::regex leading_space_regex(R"(^\s+)");
|
||||||
text = std::regex_replace(text, leading_space_regex, "");
|
text = std::regex_replace(text, leading_space_regex, "");
|
||||||
} else if (options.trim_blocks && (it - 1) != begin && !dynamic_cast<ExpressionTemplateToken*>((*(it - 2)).get())) {
|
} else if (options.trim_blocks && (it - 1) != begin && !dynamic_cast<ExpressionTemplateToken*>((*(it - 2)).get())) {
|
||||||
if (text.length() > 0 && text[0] == '\n') {
|
if (text.length() > 0 && text[0] == '\n') {
|
||||||
|
|
|
@ -37,7 +37,7 @@ Once downloaded, place your model in the models folder in llama.cpp.
|
||||||
|
|
||||||
##### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it):
|
##### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it):
|
||||||
```bash
|
```bash
|
||||||
./llama-cli -m models\gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1
|
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1
|
||||||
```
|
```
|
||||||
|
|
||||||
### Windows:
|
### Windows:
|
||||||
|
|
|
@ -10,8 +10,6 @@ extern "C" {
|
||||||
#define GGML_VK_NAME "Vulkan"
|
#define GGML_VK_NAME "Vulkan"
|
||||||
#define GGML_VK_MAX_DEVICES 16
|
#define GGML_VK_MAX_DEVICES 16
|
||||||
|
|
||||||
GGML_BACKEND_API void ggml_vk_instance_init(void);
|
|
||||||
|
|
||||||
// backend API
|
// backend API
|
||||||
GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
||||||
|
|
||||||
|
|
|
@ -167,6 +167,7 @@ struct vk_device_struct {
|
||||||
uint32_t subgroup_size;
|
uint32_t subgroup_size;
|
||||||
uint32_t shader_core_count;
|
uint32_t shader_core_count;
|
||||||
bool uma;
|
bool uma;
|
||||||
|
bool prefer_host_memory;
|
||||||
bool float_controls_rte_fp16;
|
bool float_controls_rte_fp16;
|
||||||
|
|
||||||
bool subgroup_size_control;
|
bool subgroup_size_control;
|
||||||
|
@ -1294,7 +1295,9 @@ static vk_buffer ggml_vk_create_buffer_check(vk_device& device, size_t size, vk:
|
||||||
static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
|
static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
|
||||||
vk_buffer buf;
|
vk_buffer buf;
|
||||||
try {
|
try {
|
||||||
if (device->uma) {
|
if (device->prefer_host_memory) {
|
||||||
|
buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
||||||
|
} else if (device->uma) {
|
||||||
// Fall back to host memory type
|
// Fall back to host memory type
|
||||||
buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
||||||
} else {
|
} else {
|
||||||
|
@ -2199,6 +2202,9 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
||||||
device->physical_device = physical_devices[dev_num];
|
device->physical_device = physical_devices[dev_num];
|
||||||
const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
|
const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
|
||||||
|
|
||||||
|
const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY");
|
||||||
|
device->prefer_host_memory = GGML_VK_PREFER_HOST_MEMORY != nullptr;
|
||||||
|
|
||||||
bool fp16_storage = false;
|
bool fp16_storage = false;
|
||||||
bool fp16_compute = false;
|
bool fp16_compute = false;
|
||||||
bool maintenance4_support = false;
|
bool maintenance4_support = false;
|
||||||
|
@ -2787,14 +2793,12 @@ static void ggml_vk_print_gpu_info(size_t idx) {
|
||||||
static bool ggml_vk_instance_validation_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions);
|
static bool ggml_vk_instance_validation_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions);
|
||||||
static bool ggml_vk_instance_portability_enumeration_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions);
|
static bool ggml_vk_instance_portability_enumeration_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions);
|
||||||
|
|
||||||
void ggml_vk_instance_init() {
|
static void ggml_vk_instance_init() {
|
||||||
if (vk_instance_initialized) {
|
if (vk_instance_initialized) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
VK_LOG_DEBUG("ggml_vk_instance_init()");
|
VK_LOG_DEBUG("ggml_vk_instance_init()");
|
||||||
|
|
||||||
vk_instance_initialized = true;
|
|
||||||
|
|
||||||
uint32_t api_version = vk::enumerateInstanceVersion();
|
uint32_t api_version = vk::enumerateInstanceVersion();
|
||||||
|
|
||||||
if (api_version < VK_API_VERSION_1_2) {
|
if (api_version < VK_API_VERSION_1_2) {
|
||||||
|
@ -2845,6 +2849,7 @@ void ggml_vk_instance_init() {
|
||||||
GGML_LOG_DEBUG("ggml_vulkan: Validation layers enabled\n");
|
GGML_LOG_DEBUG("ggml_vulkan: Validation layers enabled\n");
|
||||||
}
|
}
|
||||||
vk_instance.instance = vk::createInstance(instance_create_info);
|
vk_instance.instance = vk::createInstance(instance_create_info);
|
||||||
|
vk_instance_initialized = true;
|
||||||
|
|
||||||
size_t num_available_devices = vk_instance.instance.enumeratePhysicalDevices().size();
|
size_t num_available_devices = vk_instance.instance.enumeratePhysicalDevices().size();
|
||||||
|
|
||||||
|
@ -2869,7 +2874,7 @@ void ggml_vk_instance_init() {
|
||||||
// Make sure at least one device exists
|
// Make sure at least one device exists
|
||||||
if (devices.empty()) {
|
if (devices.empty()) {
|
||||||
std::cerr << "ggml_vulkan: Error: No devices found." << std::endl;
|
std::cerr << "ggml_vulkan: Error: No devices found." << std::endl;
|
||||||
GGML_ABORT("fatal error");
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default to using all dedicated GPUs
|
// Default to using all dedicated GPUs
|
||||||
|
@ -8344,8 +8349,13 @@ ggml_backend_reg_t ggml_backend_vk_reg() {
|
||||||
/* .iface = */ ggml_backend_vk_reg_i,
|
/* .iface = */ ggml_backend_vk_reg_i,
|
||||||
/* .context = */ nullptr,
|
/* .context = */ nullptr,
|
||||||
};
|
};
|
||||||
|
try {
|
||||||
return ®
|
ggml_vk_instance_init();
|
||||||
|
return ®
|
||||||
|
} catch (const vk::SystemError& e) {
|
||||||
|
VK_LOG_DEBUG("ggml_backend_vk_reg() -> Error: System error: " << e.what());
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extension availability
|
// Extension availability
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue