llama : fix duplicate symbols + refactor example benchmark
This commit is contained in:
parent
8fdf86dd25
commit
9c9bdaf0b8
2 changed files with 15 additions and 25 deletions
|
@ -20,6 +20,17 @@
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) {
|
||||||
|
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
|
||||||
|
|
||||||
|
if (plan.work_size > 0) {
|
||||||
|
buf.resize(plan.work_size);
|
||||||
|
plan.work_data = buf.data();
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_graph_compute(graph, &plan);
|
||||||
|
}
|
||||||
|
|
||||||
float tensor_sum_elements(const ggml_tensor * tensor) {
|
float tensor_sum_elements(const ggml_tensor * tensor) {
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
if (tensor->type==GGML_TYPE_F32) {
|
if (tensor->type==GGML_TYPE_F32) {
|
||||||
|
@ -166,14 +177,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
std::vector<uint8_t> work_buffer;
|
std::vector<uint8_t> work_buffer;
|
||||||
|
|
||||||
{
|
ggml_graph_compute_helper(work_buffer, &gf, benchmark_params.n_threads);
|
||||||
ggml_cplan pf = ggml_graph_plan(&gf, benchmark_params.n_threads);
|
|
||||||
if (pf.work_size > 0) {
|
|
||||||
work_buffer.resize(pf.work_size);
|
|
||||||
pf.work_data = work_buffer.data();
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&gf, &pf);
|
|
||||||
}
|
|
||||||
|
|
||||||
TENSOR_DUMP(gf.nodes[0]);
|
TENSOR_DUMP(gf.nodes[0]);
|
||||||
|
|
||||||
|
@ -227,14 +231,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
long long int start = ggml_time_us();
|
long long int start = ggml_time_us();
|
||||||
//printf("Running ggml_graph_compute\n");
|
//printf("Running ggml_graph_compute\n");
|
||||||
{
|
ggml_graph_compute_helper(work_buffer, &gf31, benchmark_params.n_threads);
|
||||||
ggml_cplan pf31 = ggml_graph_plan(&gf31, benchmark_params.n_threads);
|
|
||||||
if (pf31.work_size > 0) {
|
|
||||||
work_buffer.resize(pf31.work_size);
|
|
||||||
pf31.work_data = work_buffer.data();
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&gf31, &pf31);
|
|
||||||
}
|
|
||||||
|
|
||||||
long long int stop = ggml_time_us();
|
long long int stop = ggml_time_us();
|
||||||
long long int usec = stop-start;
|
long long int usec = stop-start;
|
||||||
|
@ -267,14 +264,7 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Running a different graph computation to make sure we override the CPU cache lines
|
// Running a different graph computation to make sure we override the CPU cache lines
|
||||||
{
|
ggml_graph_compute_helper(work_buffer, &gf32, benchmark_params.n_threads);
|
||||||
ggml_cplan pf32 = ggml_graph_plan(&gf32, benchmark_params.n_threads);
|
|
||||||
if (pf32.work_size > 0) {
|
|
||||||
work_buffer.resize(pf32.work_size);
|
|
||||||
pf32.work_data = work_buffer.data();
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&gf32, &pf32);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Average%78.2f\n",gflops_sum/((double)benchmark_params.n_iterations));
|
printf("Average%78.2f\n",gflops_sum/((double)benchmark_params.n_iterations));
|
||||||
|
|
|
@ -83,7 +83,7 @@ void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
|
||||||
// ggml helpers
|
// ggml helpers
|
||||||
//
|
//
|
||||||
|
|
||||||
void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) {
|
static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) {
|
||||||
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
|
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
|
||||||
|
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue