update

2023-11-10 16:46:24 -05:00 · 2023-11-10 16:46:24 -05:00 · ee76500835
commit ee76500835
parent df9d1293de
12 changed files with 1173 additions and 9 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,7 +1,26 @@
 cmake_minimum_required(VERSION 3.13)  # for add_link_options
 project("llama.cpp" C CXX)
 if (NOT MSVC)
    set(cuda_flags -Wno-pedantic)
 endif()
 set(LLAMA_CUBLAS ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(LLAMA_CUDA_F16 ON)
 set(LLAMA_ACCELERATE ON)
 set(LLAMA_K_QUANTS ON)
 #-DLLAMA_NATIVE=off
 set(LLAMA_AVX ON)
 set(LLAMA_AVX2 OFF)
 set(LLAMA_AVX512 OFF)
 set(LLAMA_FMA OFF)
 set(LLAMA_F16C OFF)
 set(CMAKE_CUDA_FLAGS "--verbose") #
 set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
 set(CUDACXX /usr/local/cuda-12.2/bin/nvcc)
 #GGML_USE_CUBLAS
 if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
@ -77,9 +96,9 @@ endif()
 # 3rd party libs
 option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
-option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
+option(LLAMA_BLAS                            "llama: use BLAS"                                  ON)
 set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
-option(LLAMA_CUBLAS                          "llama: use CUDA"                                  OFF)
+option(LLAMA_CUBLAS                          "llama: use CUDA"                                  ON)
 #option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
 option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
 option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
@ -230,7 +249,12 @@ if (LLAMA_BLAS)
        message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
        add_compile_options(${BLAS_LINKER_FLAGS})
-        add_compile_definitions(GGML_USE_OPENBLAS)
+
 	# from https://github.com/NVIDIA/cutlass
 	make_directory("${PROJECT_BINARY_DIR}/nvcc_tmp")
 	set(cuda_flags --keep "SHELL:--keep-dir ${PROJECT_BINARY_DIR}/nvcc_tmp" ${cuda_flags})
 	#        add_compile_definitions(GGML_USE_OPENBLAS)
        if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
            add_compile_definitions(GGML_BLAS_USE_MKL)
        endif()
@ -272,6 +296,7 @@ if (LLAMA_CUBLAS)
        endif()
        add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
        add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
        if (DEFINED LLAMA_CUDA_DMMV_Y)
            add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
        endif()
@ -420,17 +445,14 @@ if (LLAMA_ALL_WARNINGS)
        # todo : msvc
    endif()
-    set(c_flags   ${c_flags}   ${warning_flags})
+    set(c_flags   ${c_flags} -save-temps --verbose  ${warning_flags})
-    set(cxx_flags ${cxx_flags} ${warning_flags})
+    set(cxx_flags ${cxx_flags} -save-temps --verbose ${warning_flags})
    add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
                        "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
                        "$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
 endif()
 if (NOT MSVC)
    set(cuda_flags -Wno-pedantic)
 endif()
 set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
 list(JOIN host_cxx_flags " " cuda_host_flags)  # pass host compiler flags as a single argument
@ -438,6 +460,9 @@ if (NOT cuda_host_flags STREQUAL "")
    set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
 endif()
 # 
 set(cuda_flags --verbose -G  ${cuda_flags})
 add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
 if (WIN32)
@ -476,6 +501,8 @@ if (NOT MSVC)
            add_link_options(-static-libgcc -static-libstdc++)
        endif()
    endif()
    add_link_options("-Wl,-Map=${TARGET}.map")
    if (LLAMA_GPROF)
        add_compile_options(-pg)
    endif()
--- a/README.md
+++ b/README.md
@ -583,7 +583,7 @@ From the unzipped folder, open a terminal/cmd window here and place a pre-conver
 ### Memory/Disk Requirements
-As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same.
+	As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same.
 | Model | Original size | Quantized size (4-bit) |
 |------:|--------------:|-----------------------:|
--- a/README.org
+++ b/README.org
--- a/models/ggml-vocab-aquila.gguf
+++ b/models/ggml-vocab-aquila.gguf
--- a/models/ggml-vocab-baichuan.gguf
+++ b/models/ggml-vocab-baichuan.gguf
--- a/models/ggml-vocab-falcon.gguf
+++ b/models/ggml-vocab-falcon.gguf
--- a/models/ggml-vocab-gpt-neox.gguf
+++ b/models/ggml-vocab-gpt-neox.gguf
--- a/models/ggml-vocab-llama.gguf
+++ b/models/ggml-vocab-llama.gguf
--- a/models/ggml-vocab-mpt.gguf
+++ b/models/ggml-vocab-mpt.gguf
--- a/models/ggml-vocab-refact.gguf
+++ b/models/ggml-vocab-refact.gguf
--- a/models/ggml-vocab-starcoder.gguf
+++ b/models/ggml-vocab-starcoder.gguf
--- a/reporthd5_callchains.py
+++ b/reporthd5_callchains.py
@ -0,0 +1,98 @@
 import h5py
 import click 
 import collections
 ids = {}
 #with open("string_ids.txt") as fi:
 #    for x in fi:
 #        p = x.strip().split("|")
 #        ids[p[0]] = p[1]
 #print(ids)
 # from https://stackoverflow.com/a/53340677
 def descend_obj(obj,sep='\t', callback=None):
    """
    Iterate through groups in a HDF5 file and prints the groups and datasets names and datasets attributes
    """
    if type(obj) in [h5py._hl.group.Group,h5py._hl.files.File]:
        #print("FILE")
        for key in obj.keys():
            #print ("KEY",sep,'-',key,':',obj[key])
            descend_obj(obj[key],sep=sep+'\t',callback=callback)
    elif type(obj)==h5py._hl.dataset.Dataset:
        #print("ds")
        #print( obj.name, obj.shape, obj.size, obj.dtype)
        return callback(obj)
    else:
        print(obj)
 def h5dump(path,group='/', callback=None):
    """
    print HDF5 file metadata
    group: you can give a specific group, defaults to the root group
    """
    with h5py.File(path,'r') as f:
        print(path)
        descend_obj(f[group],callback=callback)
 def get_map(obj):
    global ids
    for x in obj:
        k = x[0]
        v = x[1].decode("utf-8")
        if len(v) >100:
            v = str(v[0:100]).replace("\n","").replace("\t","") +"trunc"
            #print("DEBUG",k,v)
        ids[k] = v
 def get_data(obj):
    #for x in obj:
    #    print(x[2]
    report = collections.Counter()
    objs = obj.size
    ldepth = 0
    lname = ""
    for i in range(objs):
        #print("OBJ",i, obj[i])
        data = obj[i]
        symbol = data[1]
        pointer = data[4] #instruction pointer
        module = str(data[2]) + ids.get(data[2],"oops") 
        depth = str(data[5])
        idepth = data[5]
        name = ids.get(symbol,"oops")
        name = str(name) + "|"+ str(symbol) + "|MOD:" + module + "|DEP:" +depth + "|ORIG:" + str(pointer) +"/" + hex(pointer)
        rname = ""
        if idepth > ldepth:
            rname = lname +"|"+ name            
        else:
            rname = "NEW"+"|"+name
        ldepth = idepth
        lname = name
        #print("\t".join(map(str,data)),name)
        report[rname] += 1
        # 1 [('id', '<i8'), 
        # 2 ('symbol', '<u4'),
        # 3 ('module', '<u4'),
        # 4 ('unresolved', 'u1'),
        # 5 ('originalIP', '<u8'),
        # 6 ('stackDepth', '<i4')]
        #ip = obj[i][4]
        #print("DEB",j,f)
        #    report[ip] += 1
    for k in report.most_common():
        print("\t".join(map(str,k)))
@click.command()
@click.argument("ifile", type=click.Path(exists=True))
 def main(ifile):
    #h5dump(ifile,"/")
    h5dump(ifile,"/StringIds",callback=get_map)
    #print(ids)
    h5dump(ifile,"/CUDA_CALLCHAINS",callback=get_data)
 if __name__ == "__main__":
    main()