diff --git a/examples/low_level_api_chat_cpp.py b/examples/low_level_api_chat_cpp.py
index 4a87d7d6b..7a932a36f 100644
--- a/examples/low_level_api_chat_cpp.py
+++ b/examples/low_level_api_chat_cpp.py
@@ -96,7 +96,7 @@ specified) expect poor results""", file=sys.stderr)
 
         print(file=sys.stderr)
         print(f"system_info: n_threads = {self.params.n_threads} / {cpu_count()} \
-| {llama_cpp.llama_print_system_info().decode('utf8')}", file=sys.stderr)
+| {llama_cpp.llama_print_system_info().decode('utf8', errors='ignore')}", file=sys.stderr)
 
         # determine the required inference memory per token:
         if (self.params.mem_test):
@@ -342,7 +342,7 @@ n_keep = {self.params.n_keep}
     # return past text
     def past(self):
         for id in self.last_n_tokens[-self.n_past:]:
-            yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8")
+            yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8", errors="ignore")
 
     # write input
     def input(self, prompt: str):
@@ -356,7 +356,7 @@ n_keep = {self.params.n_keep}
     def output(self):
         self.remaining_tokens = self.params.n_predict
         for id in self.generate():
-            yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8")
+            yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8", errors="ignore")
 
     # read user input
     def read_input(self):
diff --git a/examples/low_level_api_llama_cpp.py b/examples/low_level_api_llama_cpp.py
index b048c0ac8..4fb5a0366 100644
--- a/examples/low_level_api_llama_cpp.py
+++ b/examples/low_level_api_llama_cpp.py
@@ -70,7 +70,7 @@ while remaining_tokens > 0:
     if not input_noecho:
         for id in embd:
             print(
-                llama_cpp.llama_token_to_str(ctx, id).decode("utf-8"),
+                llama_cpp.llama_token_to_str(ctx, id).decode("utf-8", errors="ignore"),
                 end="",
                 flush=True,
             )