From c3d13eaa4d7a34b93b3726f79057d77dd7253776 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 22 Mar 2023 07:27:26 +0200
Subject: [PATCH] Change llama_tokenize return meaning

---
 llama.cpp | 2 +-
 llama.h   | 2 +-
 main.cpp  | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index 8b7006a1d..08dfcb31f 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1455,7 +1455,7 @@ int llama_tokenize(
 
     if (n_max_tokens < (int) res.size()) {
         fprintf(stderr, "%s: too many tokens\n", __func__);
-        return 1;
+        return -((int) res.size());
     }
 
     for (size_t i = 0; i < res.size(); i++) {
diff --git a/llama.h b/llama.h
index 73c0cb151..3df9ed1fd 100644
--- a/llama.h
+++ b/llama.h
@@ -89,7 +89,7 @@ extern "C" {
     // Convert the provided text into tokens.
     // The tokens pointer must be large enough to hold the resulting tokens.
     // Returns the number of tokens on success, no more than n_max_tokens
-    // Returns -1 on failure
+    // Returns a negative number on failure - the number of tokens that would have been returned
     // TODO: not sure if correct
     LLAMA_API int llama_tokenize(
             struct llama_context * ctx,
diff --git a/main.cpp b/main.cpp
index 17c69b870..7db3df7e9 100644
--- a/main.cpp
+++ b/main.cpp
@@ -155,6 +155,7 @@ void sigint_handler(int signo) {
 #endif
 
 int main(int argc, char ** argv) {
+    // has to be called once at the start of the program to init ggml stuff
     ggml_time_init();
 
     gpt_params params;