update
This commit is contained in:
parent
df9d1293de
commit
ee76500835
12 changed files with 1173 additions and 9 deletions
|
@ -1,7 +1,26 @@
|
|||
cmake_minimum_required(VERSION 3.13) # for add_link_options
|
||||
project("llama.cpp" C CXX)
|
||||
|
||||
if (NOT MSVC)
|
||||
set(cuda_flags -Wno-pedantic)
|
||||
endif()
|
||||
|
||||
set(LLAMA_CUBLAS ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(LLAMA_CUDA_F16 ON)
|
||||
set(LLAMA_ACCELERATE ON)
|
||||
set(LLAMA_K_QUANTS ON)
|
||||
|
||||
#-DLLAMA_NATIVE=off
|
||||
set(LLAMA_AVX ON)
|
||||
set(LLAMA_AVX2 OFF)
|
||||
set(LLAMA_AVX512 OFF)
|
||||
set(LLAMA_FMA OFF)
|
||||
set(LLAMA_F16C OFF)
|
||||
set(CMAKE_CUDA_FLAGS "--verbose") #
|
||||
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
|
||||
set(CUDACXX /usr/local/cuda-12.2/bin/nvcc)
|
||||
#GGML_USE_CUBLAS
|
||||
|
||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
|
@ -77,9 +96,9 @@ endif()
|
|||
|
||||
# 3rd party libs
|
||||
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
||||
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
||||
option(LLAMA_BLAS "llama: use BLAS" ON)
|
||||
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
||||
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
|
||||
option(LLAMA_CUBLAS "llama: use CUDA" ON)
|
||||
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
|
||||
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
|
||||
option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
|
||||
|
@ -230,7 +249,12 @@ if (LLAMA_BLAS)
|
|||
|
||||
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
|
||||
add_compile_options(${BLAS_LINKER_FLAGS})
|
||||
add_compile_definitions(GGML_USE_OPENBLAS)
|
||||
|
||||
# from https://github.com/NVIDIA/cutlass
|
||||
make_directory("${PROJECT_BINARY_DIR}/nvcc_tmp")
|
||||
set(cuda_flags --keep "SHELL:--keep-dir ${PROJECT_BINARY_DIR}/nvcc_tmp" ${cuda_flags})
|
||||
|
||||
# add_compile_definitions(GGML_USE_OPENBLAS)
|
||||
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
|
||||
add_compile_definitions(GGML_BLAS_USE_MKL)
|
||||
endif()
|
||||
|
@ -272,6 +296,7 @@ if (LLAMA_CUBLAS)
|
|||
endif()
|
||||
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
||||
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
||||
|
||||
if (DEFINED LLAMA_CUDA_DMMV_Y)
|
||||
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
|
||||
endif()
|
||||
|
@ -420,17 +445,14 @@ if (LLAMA_ALL_WARNINGS)
|
|||
# todo : msvc
|
||||
endif()
|
||||
|
||||
set(c_flags ${c_flags} ${warning_flags})
|
||||
set(cxx_flags ${cxx_flags} ${warning_flags})
|
||||
set(c_flags ${c_flags} -save-temps --verbose ${warning_flags})
|
||||
set(cxx_flags ${cxx_flags} -save-temps --verbose ${warning_flags})
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
|
||||
|
||||
endif()
|
||||
|
||||
if (NOT MSVC)
|
||||
set(cuda_flags -Wno-pedantic)
|
||||
endif()
|
||||
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
|
||||
|
||||
list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
|
||||
|
@ -438,6 +460,9 @@ if (NOT cuda_host_flags STREQUAL "")
|
|||
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
|
||||
endif()
|
||||
|
||||
#
|
||||
set(cuda_flags --verbose -G ${cuda_flags})
|
||||
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
|
||||
|
||||
if (WIN32)
|
||||
|
@ -476,6 +501,8 @@ if (NOT MSVC)
|
|||
add_link_options(-static-libgcc -static-libstdc++)
|
||||
endif()
|
||||
endif()
|
||||
add_link_options("-Wl,-Map=${TARGET}.map")
|
||||
|
||||
if (LLAMA_GPROF)
|
||||
add_compile_options(-pg)
|
||||
endif()
|
||||
|
|
1039
README.org
Normal file
1039
README.org
Normal file
File diff suppressed because it is too large
Load diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
98
reporthd5_callchains.py
Normal file
98
reporthd5_callchains.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
import h5py
|
||||
import click
|
||||
import collections
|
||||
|
||||
ids = {}
|
||||
#with open("string_ids.txt") as fi:
|
||||
# for x in fi:
|
||||
# p = x.strip().split("|")
|
||||
# ids[p[0]] = p[1]
|
||||
#print(ids)
|
||||
# from https://stackoverflow.com/a/53340677
|
||||
|
||||
def descend_obj(obj,sep='\t', callback=None):
|
||||
"""
|
||||
Iterate through groups in a HDF5 file and prints the groups and datasets names and datasets attributes
|
||||
"""
|
||||
if type(obj) in [h5py._hl.group.Group,h5py._hl.files.File]:
|
||||
#print("FILE")
|
||||
for key in obj.keys():
|
||||
#print ("KEY",sep,'-',key,':',obj[key])
|
||||
descend_obj(obj[key],sep=sep+'\t',callback=callback)
|
||||
elif type(obj)==h5py._hl.dataset.Dataset:
|
||||
#print("ds")
|
||||
#print( obj.name, obj.shape, obj.size, obj.dtype)
|
||||
return callback(obj)
|
||||
else:
|
||||
print(obj)
|
||||
|
||||
def h5dump(path,group='/', callback=None):
|
||||
"""
|
||||
print HDF5 file metadata
|
||||
|
||||
group: you can give a specific group, defaults to the root group
|
||||
"""
|
||||
with h5py.File(path,'r') as f:
|
||||
print(path)
|
||||
descend_obj(f[group],callback=callback)
|
||||
|
||||
|
||||
def get_map(obj):
|
||||
global ids
|
||||
for x in obj:
|
||||
k = x[0]
|
||||
v = x[1].decode("utf-8")
|
||||
if len(v) >100:
|
||||
v = str(v[0:100]).replace("\n","").replace("\t","") +"trunc"
|
||||
#print("DEBUG",k,v)
|
||||
ids[k] = v
|
||||
|
||||
def get_data(obj):
|
||||
#for x in obj:
|
||||
# print(x[2]
|
||||
report = collections.Counter()
|
||||
objs = obj.size
|
||||
ldepth = 0
|
||||
lname = ""
|
||||
for i in range(objs):
|
||||
#print("OBJ",i, obj[i])
|
||||
data = obj[i]
|
||||
symbol = data[1]
|
||||
pointer = data[4] #instruction pointer
|
||||
module = str(data[2]) + ids.get(data[2],"oops")
|
||||
depth = str(data[5])
|
||||
idepth = data[5]
|
||||
|
||||
name = ids.get(symbol,"oops")
|
||||
name = str(name) + "|"+ str(symbol) + "|MOD:" + module + "|DEP:" +depth + "|ORIG:" + str(pointer) +"/" + hex(pointer)
|
||||
rname = ""
|
||||
if idepth > ldepth:
|
||||
rname = lname +"|"+ name
|
||||
else:
|
||||
rname = "NEW"+"|"+name
|
||||
|
||||
|
||||
ldepth = idepth
|
||||
lname = name
|
||||
#print("\t".join(map(str,data)),name)
|
||||
report[rname] += 1
|
||||
# 1 [('id', '<i8'),
|
||||
# 2 ('symbol', '<u4'),
|
||||
# 3 ('module', '<u4'),
|
||||
# 4 ('unresolved', 'u1'),
|
||||
# 5 ('originalIP', '<u8'),
|
||||
# 6 ('stackDepth', '<i4')]
|
||||
#ip = obj[i][4]
|
||||
#print("DEB",j,f)
|
||||
# report[ip] += 1
|
||||
for k in report.most_common():
|
||||
print("\t".join(map(str,k)))
|
||||
@click.command()
|
||||
@click.argument("ifile", type=click.Path(exists=True))
|
||||
def main(ifile):
|
||||
#h5dump(ifile,"/")
|
||||
h5dump(ifile,"/StringIds",callback=get_map)
|
||||
#print(ids)
|
||||
h5dump(ifile,"/CUDA_CALLCHAINS",callback=get_data)
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue