update
This commit is contained in:
parent
df9d1293de
commit
ee76500835
12 changed files with 1173 additions and 9 deletions
|
@ -1,7 +1,26 @@
|
||||||
cmake_minimum_required(VERSION 3.13) # for add_link_options
|
cmake_minimum_required(VERSION 3.13) # for add_link_options
|
||||||
project("llama.cpp" C CXX)
|
project("llama.cpp" C CXX)
|
||||||
|
|
||||||
|
if (NOT MSVC)
|
||||||
|
set(cuda_flags -Wno-pedantic)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(LLAMA_CUBLAS ON)
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
|
set(LLAMA_CUDA_F16 ON)
|
||||||
|
set(LLAMA_ACCELERATE ON)
|
||||||
|
set(LLAMA_K_QUANTS ON)
|
||||||
|
|
||||||
|
#-DLLAMA_NATIVE=off
|
||||||
|
set(LLAMA_AVX ON)
|
||||||
|
set(LLAMA_AVX2 OFF)
|
||||||
|
set(LLAMA_AVX512 OFF)
|
||||||
|
set(LLAMA_FMA OFF)
|
||||||
|
set(LLAMA_F16C OFF)
|
||||||
|
set(CMAKE_CUDA_FLAGS "--verbose") #
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
|
||||||
|
set(CUDACXX /usr/local/cuda-12.2/bin/nvcc)
|
||||||
|
#GGML_USE_CUBLAS
|
||||||
|
|
||||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||||
|
@ -77,9 +96,9 @@ endif()
|
||||||
|
|
||||||
# 3rd party libs
|
# 3rd party libs
|
||||||
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
||||||
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
option(LLAMA_BLAS "llama: use BLAS" ON)
|
||||||
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
||||||
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
|
option(LLAMA_CUBLAS "llama: use CUDA" ON)
|
||||||
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
|
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
|
||||||
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
|
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
|
||||||
option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
|
option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
|
||||||
|
@ -230,7 +249,12 @@ if (LLAMA_BLAS)
|
||||||
|
|
||||||
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
|
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
|
||||||
add_compile_options(${BLAS_LINKER_FLAGS})
|
add_compile_options(${BLAS_LINKER_FLAGS})
|
||||||
add_compile_definitions(GGML_USE_OPENBLAS)
|
|
||||||
|
# from https://github.com/NVIDIA/cutlass
|
||||||
|
make_directory("${PROJECT_BINARY_DIR}/nvcc_tmp")
|
||||||
|
set(cuda_flags --keep "SHELL:--keep-dir ${PROJECT_BINARY_DIR}/nvcc_tmp" ${cuda_flags})
|
||||||
|
|
||||||
|
# add_compile_definitions(GGML_USE_OPENBLAS)
|
||||||
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
|
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
|
||||||
add_compile_definitions(GGML_BLAS_USE_MKL)
|
add_compile_definitions(GGML_BLAS_USE_MKL)
|
||||||
endif()
|
endif()
|
||||||
|
@ -272,6 +296,7 @@ if (LLAMA_CUBLAS)
|
||||||
endif()
|
endif()
|
||||||
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
||||||
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
||||||
|
|
||||||
if (DEFINED LLAMA_CUDA_DMMV_Y)
|
if (DEFINED LLAMA_CUDA_DMMV_Y)
|
||||||
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
|
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
|
||||||
endif()
|
endif()
|
||||||
|
@ -420,17 +445,14 @@ if (LLAMA_ALL_WARNINGS)
|
||||||
# todo : msvc
|
# todo : msvc
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(c_flags ${c_flags} ${warning_flags})
|
set(c_flags ${c_flags} -save-temps --verbose ${warning_flags})
|
||||||
set(cxx_flags ${cxx_flags} ${warning_flags})
|
set(cxx_flags ${cxx_flags} -save-temps --verbose ${warning_flags})
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
||||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
||||||
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
|
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT MSVC)
|
|
||||||
set(cuda_flags -Wno-pedantic)
|
|
||||||
endif()
|
|
||||||
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
|
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
|
||||||
|
|
||||||
list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
|
list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
|
||||||
|
@ -438,6 +460,9 @@ if (NOT cuda_host_flags STREQUAL "")
|
||||||
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
|
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#
|
||||||
|
set(cuda_flags --verbose -G ${cuda_flags})
|
||||||
|
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
|
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
|
@ -476,6 +501,8 @@ if (NOT MSVC)
|
||||||
add_link_options(-static-libgcc -static-libstdc++)
|
add_link_options(-static-libgcc -static-libstdc++)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
add_link_options("-Wl,-Map=${TARGET}.map")
|
||||||
|
|
||||||
if (LLAMA_GPROF)
|
if (LLAMA_GPROF)
|
||||||
add_compile_options(-pg)
|
add_compile_options(-pg)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -583,7 +583,7 @@ From the unzipped folder, open a terminal/cmd window here and place a pre-conver
|
||||||
|
|
||||||
### Memory/Disk Requirements
|
### Memory/Disk Requirements
|
||||||
|
|
||||||
As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same.
|
As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same.
|
||||||
|
|
||||||
| Model | Original size | Quantized size (4-bit) |
|
| Model | Original size | Quantized size (4-bit) |
|
||||||
|------:|--------------:|-----------------------:|
|
|------:|--------------:|-----------------------:|
|
||||||
|
|
1039
README.org
Normal file
1039
README.org
Normal file
File diff suppressed because it is too large
Load diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
98
reporthd5_callchains.py
Normal file
98
reporthd5_callchains.py
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
import h5py
|
||||||
|
import click
|
||||||
|
import collections
|
||||||
|
|
||||||
|
ids = {}
|
||||||
|
#with open("string_ids.txt") as fi:
|
||||||
|
# for x in fi:
|
||||||
|
# p = x.strip().split("|")
|
||||||
|
# ids[p[0]] = p[1]
|
||||||
|
#print(ids)
|
||||||
|
# from https://stackoverflow.com/a/53340677
|
||||||
|
|
||||||
|
def descend_obj(obj,sep='\t', callback=None):
|
||||||
|
"""
|
||||||
|
Iterate through groups in a HDF5 file and prints the groups and datasets names and datasets attributes
|
||||||
|
"""
|
||||||
|
if type(obj) in [h5py._hl.group.Group,h5py._hl.files.File]:
|
||||||
|
#print("FILE")
|
||||||
|
for key in obj.keys():
|
||||||
|
#print ("KEY",sep,'-',key,':',obj[key])
|
||||||
|
descend_obj(obj[key],sep=sep+'\t',callback=callback)
|
||||||
|
elif type(obj)==h5py._hl.dataset.Dataset:
|
||||||
|
#print("ds")
|
||||||
|
#print( obj.name, obj.shape, obj.size, obj.dtype)
|
||||||
|
return callback(obj)
|
||||||
|
else:
|
||||||
|
print(obj)
|
||||||
|
|
||||||
|
def h5dump(path,group='/', callback=None):
|
||||||
|
"""
|
||||||
|
print HDF5 file metadata
|
||||||
|
|
||||||
|
group: you can give a specific group, defaults to the root group
|
||||||
|
"""
|
||||||
|
with h5py.File(path,'r') as f:
|
||||||
|
print(path)
|
||||||
|
descend_obj(f[group],callback=callback)
|
||||||
|
|
||||||
|
|
||||||
|
def get_map(obj):
|
||||||
|
global ids
|
||||||
|
for x in obj:
|
||||||
|
k = x[0]
|
||||||
|
v = x[1].decode("utf-8")
|
||||||
|
if len(v) >100:
|
||||||
|
v = str(v[0:100]).replace("\n","").replace("\t","") +"trunc"
|
||||||
|
#print("DEBUG",k,v)
|
||||||
|
ids[k] = v
|
||||||
|
|
||||||
|
def get_data(obj):
|
||||||
|
#for x in obj:
|
||||||
|
# print(x[2]
|
||||||
|
report = collections.Counter()
|
||||||
|
objs = obj.size
|
||||||
|
ldepth = 0
|
||||||
|
lname = ""
|
||||||
|
for i in range(objs):
|
||||||
|
#print("OBJ",i, obj[i])
|
||||||
|
data = obj[i]
|
||||||
|
symbol = data[1]
|
||||||
|
pointer = data[4] #instruction pointer
|
||||||
|
module = str(data[2]) + ids.get(data[2],"oops")
|
||||||
|
depth = str(data[5])
|
||||||
|
idepth = data[5]
|
||||||
|
|
||||||
|
name = ids.get(symbol,"oops")
|
||||||
|
name = str(name) + "|"+ str(symbol) + "|MOD:" + module + "|DEP:" +depth + "|ORIG:" + str(pointer) +"/" + hex(pointer)
|
||||||
|
rname = ""
|
||||||
|
if idepth > ldepth:
|
||||||
|
rname = lname +"|"+ name
|
||||||
|
else:
|
||||||
|
rname = "NEW"+"|"+name
|
||||||
|
|
||||||
|
|
||||||
|
ldepth = idepth
|
||||||
|
lname = name
|
||||||
|
#print("\t".join(map(str,data)),name)
|
||||||
|
report[rname] += 1
|
||||||
|
# 1 [('id', '<i8'),
|
||||||
|
# 2 ('symbol', '<u4'),
|
||||||
|
# 3 ('module', '<u4'),
|
||||||
|
# 4 ('unresolved', 'u1'),
|
||||||
|
# 5 ('originalIP', '<u8'),
|
||||||
|
# 6 ('stackDepth', '<i4')]
|
||||||
|
#ip = obj[i][4]
|
||||||
|
#print("DEB",j,f)
|
||||||
|
# report[ip] += 1
|
||||||
|
for k in report.most_common():
|
||||||
|
print("\t".join(map(str,k)))
|
||||||
|
@click.command()
|
||||||
|
@click.argument("ifile", type=click.Path(exists=True))
|
||||||
|
def main(ifile):
|
||||||
|
#h5dump(ifile,"/")
|
||||||
|
h5dump(ifile,"/StringIds",callback=get_map)
|
||||||
|
#print(ids)
|
||||||
|
h5dump(ifile,"/CUDA_CALLCHAINS",callback=get_data)
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Add table
Add a link
Reference in a new issue