diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp
index 6d4746ff4..9cfde8308 100644
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@@ -6,49 +6,11 @@
 #include <string>
 #include <vector>
 
-// a function that can be called for every computed node during graph evaluation
-// the user can choose to whether to observe the data of the node depending on the tensor parameters
-static bool observe_compute(struct ggml_tensor * t, bool ask, void * user_data) {
-    GGML_UNUSED(user_data);
-
-    // the scheduler is asking us if we want to observe this node
-    if (ask) {
-        // check if name contains soft_max (customize to your needs)
-        return strstr(t->name, "soft_max") != 0;
-    }
-
-    // print the node info
-    printf("%s: t->name = %32s, t->op = %12s, [%5d, %5d, %5d, %5d]\n",
-            __func__, t->name, ggml_op_name(t->op), (int) t->ne[0], (int) t->ne[1], (int) t->ne[2], (int) t->ne[3]);
-
-    // this will copy the data to host memory (if needed)
-    static std::vector<float> t_data;
-
-    const bool is_host = ggml_backend_buffer_is_host(t->buffer);
-
-    if (!is_host) {
-        t_data.resize(ggml_nelements(t));
-        ggml_backend_tensor_get(t, t_data.data(), 0, ggml_nbytes(t));
-    }
-
-    const float * data = is_host ? (const float *) t->data : t_data.data();
-
-    // print first row
-    for (int i = 0; i < t->ne[0]; i++) {
-        printf("%8.4f ", data[i]);
-    }
-    printf("\n");
-
-    return true;
-}
-
 int main(int argc, char ** argv) {
     gpt_params params;
 
-    bool observe = false;
-
     if (argc == 1 || argv[1][0] == '-') {
-        printf("usage: %s MODEL_PATH [PROMPT] [OBSERV]\n" , argv[0]);
+        printf("usage: %s MODEL_PATH [PROMPT]\n" , argv[0]);
         return 1 ;
     }
 
@@ -60,10 +22,6 @@ int main(int argc, char ** argv) {
         params.prompt = argv[2];
     }
 
-    if (argc >= 4) {
-        observe = !!atoi(argv[3]);
-    }
-
     if (params.prompt.empty()) {
         params.prompt = "Hello my name is";
     }
@@ -79,7 +37,7 @@ int main(int argc, char ** argv) {
 
     llama_model_params model_params = llama_model_default_params();
 
-    model_params.n_gpu_layers = 99; // offload all layers to the GPU
+    // model_params.n_gpu_layers = 99; // offload all layers to the GPU
 
     llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
 
@@ -97,9 +55,6 @@ int main(int argc, char ** argv) {
     ctx_params.n_threads = params.n_threads;
     ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
 
-    ctx_params.cb_eval = observe ? observe_compute : NULL;
-    ctx_params.cb_eval_user_data = NULL;
-
     llama_context * ctx = llama_new_context_with_model(model, ctx_params);
 
     if (ctx == NULL) {
diff --git a/ggml-backend.c b/ggml-backend.c
index 970495a4c..8dfbb2af2 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -1384,10 +1384,6 @@ static void sched_reset(ggml_backend_sched_t sched) {
     memset(sched->node_talloc,   0, sizeof(sched->node_talloc[0])   * hash_size);
     memset(sched->node_copies,   0, sizeof(sched->node_copies[0])   * hash_size);
 
-    // TODO: should we clear the callbacks?
-    //sched->callback_eval = NULL;
-    //sched->callback_eval_user_data = NULL;
-
     sched->is_reset = true;
 }