simple : restore examples, imatrix will serve as a demo
This commit is contained in:
parent
012ecec506
commit
200dcaf799
2 changed files with 2 additions and 51 deletions
|
@ -6,49 +6,11 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
// a function that can be called for every computed node during graph evaluation
|
|
||||||
// the user can choose to whether to observe the data of the node depending on the tensor parameters
|
|
||||||
static bool observe_compute(struct ggml_tensor * t, bool ask, void * user_data) {
|
|
||||||
GGML_UNUSED(user_data);
|
|
||||||
|
|
||||||
// the scheduler is asking us if we want to observe this node
|
|
||||||
if (ask) {
|
|
||||||
// check if name contains soft_max (customize to your needs)
|
|
||||||
return strstr(t->name, "soft_max") != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// print the node info
|
|
||||||
printf("%s: t->name = %32s, t->op = %12s, [%5d, %5d, %5d, %5d]\n",
|
|
||||||
__func__, t->name, ggml_op_name(t->op), (int) t->ne[0], (int) t->ne[1], (int) t->ne[2], (int) t->ne[3]);
|
|
||||||
|
|
||||||
// this will copy the data to host memory (if needed)
|
|
||||||
static std::vector<float> t_data;
|
|
||||||
|
|
||||||
const bool is_host = ggml_backend_buffer_is_host(t->buffer);
|
|
||||||
|
|
||||||
if (!is_host) {
|
|
||||||
t_data.resize(ggml_nelements(t));
|
|
||||||
ggml_backend_tensor_get(t, t_data.data(), 0, ggml_nbytes(t));
|
|
||||||
}
|
|
||||||
|
|
||||||
const float * data = is_host ? (const float *) t->data : t_data.data();
|
|
||||||
|
|
||||||
// print first row
|
|
||||||
for (int i = 0; i < t->ne[0]; i++) {
|
|
||||||
printf("%8.4f ", data[i]);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
|
|
||||||
bool observe = false;
|
|
||||||
|
|
||||||
if (argc == 1 || argv[1][0] == '-') {
|
if (argc == 1 || argv[1][0] == '-') {
|
||||||
printf("usage: %s MODEL_PATH [PROMPT] [OBSERV]\n" , argv[0]);
|
printf("usage: %s MODEL_PATH [PROMPT]\n" , argv[0]);
|
||||||
return 1 ;
|
return 1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,10 +22,6 @@ int main(int argc, char ** argv) {
|
||||||
params.prompt = argv[2];
|
params.prompt = argv[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc >= 4) {
|
|
||||||
observe = !!atoi(argv[3]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (params.prompt.empty()) {
|
if (params.prompt.empty()) {
|
||||||
params.prompt = "Hello my name is";
|
params.prompt = "Hello my name is";
|
||||||
}
|
}
|
||||||
|
@ -79,7 +37,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
llama_model_params model_params = llama_model_default_params();
|
llama_model_params model_params = llama_model_default_params();
|
||||||
|
|
||||||
model_params.n_gpu_layers = 99; // offload all layers to the GPU
|
// model_params.n_gpu_layers = 99; // offload all layers to the GPU
|
||||||
|
|
||||||
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
|
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
|
||||||
|
|
||||||
|
@ -97,9 +55,6 @@ int main(int argc, char ** argv) {
|
||||||
ctx_params.n_threads = params.n_threads;
|
ctx_params.n_threads = params.n_threads;
|
||||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||||
|
|
||||||
ctx_params.cb_eval = observe ? observe_compute : NULL;
|
|
||||||
ctx_params.cb_eval_user_data = NULL;
|
|
||||||
|
|
||||||
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
||||||
|
|
||||||
if (ctx == NULL) {
|
if (ctx == NULL) {
|
||||||
|
|
|
@ -1384,10 +1384,6 @@ static void sched_reset(ggml_backend_sched_t sched) {
|
||||||
memset(sched->node_talloc, 0, sizeof(sched->node_talloc[0]) * hash_size);
|
memset(sched->node_talloc, 0, sizeof(sched->node_talloc[0]) * hash_size);
|
||||||
memset(sched->node_copies, 0, sizeof(sched->node_copies[0]) * hash_size);
|
memset(sched->node_copies, 0, sizeof(sched->node_copies[0]) * hash_size);
|
||||||
|
|
||||||
// TODO: should we clear the callbacks?
|
|
||||||
//sched->callback_eval = NULL;
|
|
||||||
//sched->callback_eval_user_data = NULL;
|
|
||||||
|
|
||||||
sched->is_reset = true;
|
sched->is_reset = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue