update

2023-11-10 16:46:24 -05:00 · 2023-11-10 16:46:24 -05:00 · ee76500835
commit ee76500835
parent df9d1293de
12 changed files with 1173 additions and 9 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,7 +1,26 @@
 cmake_minimum_required(VERSION 3.13)  # for add_link_options
 project("llama.cpp" C CXX)

+if (NOT MSVC)
+    set(cuda_flags -Wno-pedantic)
+endif()
+
+set(LLAMA_CUBLAS ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+set(LLAMA_CUDA_F16 ON)
+set(LLAMA_ACCELERATE ON)
+set(LLAMA_K_QUANTS ON)
+
+#-DLLAMA_NATIVE=off
+set(LLAMA_AVX ON)
+set(LLAMA_AVX2 OFF)
+set(LLAMA_AVX512 OFF)
+set(LLAMA_FMA OFF)
+set(LLAMA_F16C OFF)
+set(CMAKE_CUDA_FLAGS "--verbose") #
+set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
+set(CUDACXX /usr/local/cuda-12.2/bin/nvcc)
+#GGML_USE_CUBLAS

 if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
@ -77,9 +96,9 @@ endif()

 # 3rd party libs
 option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
-option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
+option(LLAMA_BLAS                            "llama: use BLAS"                                  ON)
 set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
-option(LLAMA_CUBLAS                          "llama: use CUDA"                                  OFF)
+option(LLAMA_CUBLAS                          "llama: use CUDA"                                  ON)
 #option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
 option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
 option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
@ -230,7 +249,12 @@ if (LLAMA_BLAS)

        message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
        add_compile_options(${BLAS_LINKER_FLAGS})
-        add_compile_definitions(GGML_USE_OPENBLAS)
+
+	# from https://github.com/NVIDIA/cutlass
+	make_directory("${PROJECT_BINARY_DIR}/nvcc_tmp")
+	set(cuda_flags --keep "SHELL:--keep-dir ${PROJECT_BINARY_DIR}/nvcc_tmp" ${cuda_flags})
+
+	#        add_compile_definitions(GGML_USE_OPENBLAS)
        if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
            add_compile_definitions(GGML_BLAS_USE_MKL)
        endif()
@ -272,6 +296,7 @@ if (LLAMA_CUBLAS)
        endif()
        add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
        add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
+
        if (DEFINED LLAMA_CUDA_DMMV_Y)
            add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
        endif()
@ -420,17 +445,14 @@ if (LLAMA_ALL_WARNINGS)
        # todo : msvc
    endif()

-    set(c_flags   ${c_flags}   ${warning_flags})
-    set(cxx_flags ${cxx_flags} ${warning_flags})
+    set(c_flags   ${c_flags} -save-temps --verbose  ${warning_flags})
+    set(cxx_flags ${cxx_flags} -save-temps --verbose ${warning_flags})
    add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
                        "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
                        "$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")

 endif()

-if (NOT MSVC)
-    set(cuda_flags -Wno-pedantic)
-endif()
 set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})

 list(JOIN host_cxx_flags " " cuda_host_flags)  # pass host compiler flags as a single argument
@ -438,6 +460,9 @@ if (NOT cuda_host_flags STREQUAL "")
    set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
 endif()

+# 
+set(cuda_flags --verbose -G  ${cuda_flags})
+
 add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")

 if (WIN32)
@ -476,6 +501,8 @@ if (NOT MSVC)
            add_link_options(-static-libgcc -static-libstdc++)
        endif()
    endif()
+    add_link_options("-Wl,-Map=${TARGET}.map")
+
    if (LLAMA_GPROF)
        add_compile_options(-pg)
    endif()
--- a/README.org
+++ b/README.org
--- a/models/ggml-vocab-aquila.gguf
+++ b/models/ggml-vocab-aquila.gguf
--- a/models/ggml-vocab-baichuan.gguf
+++ b/models/ggml-vocab-baichuan.gguf
--- a/models/ggml-vocab-falcon.gguf
+++ b/models/ggml-vocab-falcon.gguf
--- a/models/ggml-vocab-gpt-neox.gguf
+++ b/models/ggml-vocab-gpt-neox.gguf
--- a/models/ggml-vocab-llama.gguf
+++ b/models/ggml-vocab-llama.gguf
--- a/models/ggml-vocab-mpt.gguf
+++ b/models/ggml-vocab-mpt.gguf
--- a/models/ggml-vocab-refact.gguf
+++ b/models/ggml-vocab-refact.gguf
--- a/models/ggml-vocab-starcoder.gguf
+++ b/models/ggml-vocab-starcoder.gguf
--- a/reporthd5_callchains.py
+++ b/reporthd5_callchains.py
@ -0,0 +1,98 @@
+import h5py
+import click 
+import collections
+
+ids = {}
+#with open("string_ids.txt") as fi:
+#    for x in fi:
+#        p = x.strip().split("|")
+#        ids[p[0]] = p[1]
+#print(ids)
+# from https://stackoverflow.com/a/53340677
+
+def descend_obj(obj,sep='\t', callback=None):
+    """
+    Iterate through groups in a HDF5 file and prints the groups and datasets names and datasets attributes
+    """
+    if type(obj) in [h5py._hl.group.Group,h5py._hl.files.File]:
+        #print("FILE")
+        for key in obj.keys():
+            #print ("KEY",sep,'-',key,':',obj[key])
+            descend_obj(obj[key],sep=sep+'\t',callback=callback)
+    elif type(obj)==h5py._hl.dataset.Dataset:
+        #print("ds")
+        #print( obj.name, obj.shape, obj.size, obj.dtype)
+        return callback(obj)
+    else:
+        print(obj)
+
+def h5dump(path,group='/', callback=None):
+    """
+    print HDF5 file metadata
+
+    group: you can give a specific group, defaults to the root group
+    """
+    with h5py.File(path,'r') as f:
+        print(path)
+        descend_obj(f[group],callback=callback)
+
+
+def get_map(obj):
+    global ids
+    for x in obj:
+        k = x[0]
+        v = x[1].decode("utf-8")
+        if len(v) >100:
+            v = str(v[0:100]).replace("\n","").replace("\t","") +"trunc"
+            #print("DEBUG",k,v)
+        ids[k] = v
+            
+def get_data(obj):
+    #for x in obj:
+    #    print(x[2]
+    report = collections.Counter()
+    objs = obj.size
+    ldepth = 0
+    lname = ""
+    for i in range(objs):
+        #print("OBJ",i, obj[i])
+        data = obj[i]
+        symbol = data[1]
+        pointer = data[4] #instruction pointer
+        module = str(data[2]) + ids.get(data[2],"oops") 
+        depth = str(data[5])
+        idepth = data[5]
+            
+        name = ids.get(symbol,"oops")
+        name = str(name) + "|"+ str(symbol) + "|MOD:" + module + "|DEP:" +depth + "|ORIG:" + str(pointer) +"/" + hex(pointer)
+        rname = ""
+        if idepth > ldepth:
+            rname = lname +"|"+ name            
+        else:
+            rname = "NEW"+"|"+name
+
+
+        ldepth = idepth
+        lname = name
+        #print("\t".join(map(str,data)),name)
+        report[rname] += 1
+        # 1 [('id', '<i8'), 
+        # 2 ('symbol', '<u4'),
+        # 3 ('module', '<u4'),
+        # 4 ('unresolved', 'u1'),
+        # 5 ('originalIP', '<u8'),
+        # 6 ('stackDepth', '<i4')]
+        #ip = obj[i][4]
+        #print("DEB",j,f)
+        #    report[ip] += 1
+    for k in report.most_common():
+        print("\t".join(map(str,k)))
+@click.command()
+@click.argument("ifile", type=click.Path(exists=True))
+def main(ifile):
+    #h5dump(ifile,"/")
+    h5dump(ifile,"/StringIds",callback=get_map)
+    #print(ids)
+    h5dump(ifile,"/CUDA_CALLCHAINS",callback=get_data)
+if __name__ == "__main__":
+    main()