now faster and smaller

2023-11-28 21:50:31 -05:00 · 2023-11-28 21:50:31 -05:00 · 1807a6e280
commit 1807a6e280
parent d1d1cceda7
3 changed files with 1131 additions and 3650 deletions
--- a/4
+++ b/4
@ -17,6 +17,8 @@ ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif

+LLAMA_DEBUG := 1
+
 ifndef UNAME_P
 UNAME_P := $(shell uname -p)
 endif
@ -116,7 +118,7 @@ endif
 # keep standard at C11 and C++11
 MK_CPPFLAGS = -I. -Icommon
 MK_CFLAGS   = -std=c11   -fPIC
-MK_CXXFLAGS = -std=c++20 -fPIC -fpermissive -DCPP_ONLY
+MK_CXXFLAGS = -std=c++20 -fPIC -fpermissive -DCPP_ONLY 

 # -Ofast tends to produce faster code, but may not be available for some compilers.
 ifdef LLAMA_FAST
--- a/Untitled.ipynb
+++ b/Untitled.ipynb
--- a/ggml.cpp
+++ b/ggml.cpp
@ -9440,8 +9440,8 @@ float* ggml_tensor_to_float(const ggml_tensor* tensor) {

 // function to create a hash table of the N most common values of a given tensor
 void find_n_most_common_values(const char * pname, const ggml_tensor* tensor, int decimal_place, size_t top_n) {
-  //float* buffer = ggml_tensor_to_float(tensor);
-  float* buffer = 0;
+  
+  //float* buffer = 0;

  //if(tensor->type == GGML_TYPE_F32)
  //  {
@ -9451,10 +9451,11 @@ void find_n_most_common_values(const char * pname, const ggml_tensor* tensor, in
  //	memcpy(buffer, ggml_get_data_f32(tensor), ggml_nbytes(tensor));
 	//return buffer;

-    const size_t num_elements = ggml_nbytes(tensor)/sizeof(float);
-    //buffer = new float[num_elements];
-
-    buffer=(float*)ggml_get_data(tensor);
+    const size_t num_elements = ggml_nelements(tensor);
+    if (tensor->type != GGML_TYPE_F32) {
+      return;
+    }
+    float* buffer = ggml_get_data_f32(tensor);

    auto values = std::unordered_map<double, int>(); // hash table to store the count of each value