From fc49c3230aa0c224488f376e819b2532b4a646cb Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Fri, 3 Jan 2025 12:05:16 +0200
Subject: [PATCH] tokenize : escape the prompt

---
 examples/tokenize/tokenize.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp
index c97e22724..76624c8b0 100644
--- a/examples/tokenize/tokenize.cpp
+++ b/examples/tokenize/tokenize.cpp
@@ -198,6 +198,7 @@ int main(int raw_argc, char ** raw_argv) {
     // variables where to put any arguments we see.
     bool printing_ids = false;
     bool no_bos = false;
+    bool no_escape = false;
     bool no_parse_special = false;
     bool disable_logging = false;
     bool show_token_count = false;
@@ -233,6 +234,9 @@ int main(int raw_argc, char ** raw_argv) {
         else if (arg == "--no-bos") {
             no_bos = true;
         }
+        else if (arg == "--no-escape") {
+            no_escape = true;
+        }
         else if (arg == "--no-parse-special") {
             no_parse_special = true;
         }
@@ -363,6 +367,11 @@ int main(int raw_argc, char ** raw_argv) {
     const bool model_wants_add_bos = llama_add_bos_token(model);
     const bool add_bos = model_wants_add_bos && !no_bos;
     const bool parse_special = !no_parse_special;
+    const bool escape = !no_escape;
+
+    if (escape) {
+        string_process_escapes(prompt);
+    }
 
     std::vector<llama_token> tokens;
     tokens = common_tokenize(model, prompt, add_bos, parse_special);