diff --git a/examples/common.h b/examples/common.h
index 69170dfc0..a9dd6933c 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -3,6 +3,7 @@
 #pragma once
 
 #include "llama.h"
+#include "llama.cpp.h"
 
 #include <string>
 #include <vector>
diff --git a/llama.cpp b/llama.cpp
index e566465fa..7dea8c9c7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -520,7 +520,7 @@ struct llama_file_loader {
             vocab.token_to_id[word] = i;
 
             auto & tok_score = vocab.id_to_token[i];
-            tok_score.tok = word;
+            tok_score.tok = std::move(word);
             tok_score.score = score;
         }
     }
@@ -3725,24 +3725,32 @@ float * llama_get_embeddings(struct llama_context * ctx) {
     return ctx->embedding.data();
 }
 
-std::string llama_token_to_str_with_model(const struct llama_model * model, llama_token token) {
-    if (token >= llama_n_vocab_from_model(model)) {
-        return nullptr;
+int llama_token_to_str_with_model(const struct llama_model * model, llama_token token, char * str, int length) {
+    if (0 <= token && token < llama_n_vocab_from_model(model)) {
+        std::string result = llama_unescape_whitespace(model->vocab.id_to_token[token].tok);
+        if(result.length() > length) {
+            return - result.length();
+        }
+        strcpy(str, result.c_str());
+        return result.length();
     }
-
-    return llama_unescape_whitespace(model->vocab.id_to_token[token].tok);
+    return 0;
 }
 
-std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
-    return llama_token_to_str_with_model(&ctx->model, token);
+int llama_token_to_str(const struct llama_context * ctx, llama_token token, char * str, int length) {
+    return llama_token_to_str_with_model(&ctx->model, token, str, length);
 }
 
-std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
-    if (token >= llama_n_vocab_from_model(&ctx->model)) {
-        return nullptr;
+int llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token, char * str, int length) {
+    if (0 <= token && token < llama_n_vocab_from_model(&ctx->model)) {
+        std::string result = ctx->model.vocab.id_to_token[token].tok;
+        if (result.length() > length) {
+            return -result.length();
+        }
+        strcpy(str, result.c_str());
+        return result.length();
     }
-
-    return ctx->model.vocab.id_to_token[token].tok;
+    return 0;
 }
 
 llama_token llama_token_bos() {
diff --git a/llama.cpp.h b/llama.cpp.h
new file mode 100644
index 000000000..6e806bef2
--- /dev/null
+++ b/llama.cpp.h
@@ -0,0 +1,44 @@
+#ifndef LLAMA_CPP_H
+#define LLAMA_CPP_H
+
+#include "llama.h"
+
+#include <cassert>
+
+static std::string llama_token_to_str(
+        const struct llama_context * ctx,
+                       llama_token   token) {
+    std::string result;
+    int length = 8;
+    result.resize(length);
+    length = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
+    if (length < 0) {
+        result.resize(-length);
+        int check = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
+        assert(check == -length);
+        GGML_UNUSED(check);
+    } else {
+        result.resize(length);
+    }
+    return result;
+}
+
+static std::string llama_token_to_str_bpe(
+    const struct llama_context * ctx,
+                   llama_token   token) {
+    std::string result;
+    int length = 8;
+    result.resize(length);
+    length = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
+    if (length < 0) {
+        result.resize(-length);
+        int check = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
+        assert(check == -length);
+        GGML_UNUSED(check);
+    } else {
+        result.resize(length);
+    }
+    return result;
+}
+
+#endif
diff --git a/llama.h b/llama.h
index f6e574bb9..8305432ef 100644
--- a/llama.h
+++ b/llama.h
@@ -327,18 +327,23 @@ extern "C" {
     LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
 
     // Token Id -> String. Uses the vocabulary in the provided context
-    LLAMA_API std::string llama_token_to_str(
+    LLAMA_API int llama_token_to_str(
             const struct llama_context * ctx,
-                           llama_token   token);
+                           llama_token   token,
+                                  char * str,
+                                  int    length);
 
-    LLAMA_API std::string llama_token_to_str_bpe(
+    LLAMA_API int llama_token_to_str_bpe(
             const struct llama_context * ctx,
-                           llama_token   token);
+                           llama_token   token,
+                                  char * str,
+                                  int    length);
 
-    LLAMA_API std::string llama_token_to_str_with_model(
+    LLAMA_API int llama_token_to_str_with_model(
               const struct llama_model * model,
-                           llama_token   token);
-
+                           llama_token   token,
+                                  char * str,
+                                  int    length);
     // Special tokens
     LLAMA_API llama_token llama_token_bos();  // beginning-of-sentence
     LLAMA_API llama_token llama_token_eos();  // end-of-sentence
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp
index 36a73c9d7..dca3e72c7 100644
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@@ -1,4 +1,5 @@
 #include "llama.h"
+#include "llama.cpp.h"
 
 #include <cstdio>
 #include <string>
diff --git a/tests/test-tokenizer-1.cpp b/tests/test-tokenizer-1.cpp
index cd105f098..d9a6293c0 100644
--- a/tests/test-tokenizer-1.cpp
+++ b/tests/test-tokenizer-1.cpp
@@ -1,4 +1,5 @@
 #include "llama.h"
+#include "llama.cpp.h"
 
 #include <cassert>
 #include <cstdio>