This commit is contained in:
mike dupont 2023-11-10 16:46:24 -05:00
parent df9d1293de
commit ee76500835
12 changed files with 1173 additions and 9 deletions

View file

@ -1,7 +1,26 @@
cmake_minimum_required(VERSION 3.13) # for add_link_options
project("llama.cpp" C CXX)
if (NOT MSVC)
set(cuda_flags -Wno-pedantic)
endif()
set(LLAMA_CUBLAS ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(LLAMA_CUDA_F16 ON)
set(LLAMA_ACCELERATE ON)
set(LLAMA_K_QUANTS ON)
#-DLLAMA_NATIVE=off
set(LLAMA_AVX ON)
set(LLAMA_AVX2 OFF)
set(LLAMA_AVX512 OFF)
set(LLAMA_FMA OFF)
set(LLAMA_F16C OFF)
set(CMAKE_CUDA_FLAGS "--verbose") #
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
set(CUDACXX /usr/local/cuda-12.2/bin/nvcc)
#GGML_USE_CUBLAS
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
@ -77,9 +96,9 @@ endif()
# 3rd party libs
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
option(LLAMA_BLAS "llama: use BLAS" OFF)
option(LLAMA_BLAS "llama: use BLAS" ON)
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
option(LLAMA_CUBLAS "llama: use CUDA" ON)
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
@ -230,7 +249,12 @@ if (LLAMA_BLAS)
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
add_compile_options(${BLAS_LINKER_FLAGS})
add_compile_definitions(GGML_USE_OPENBLAS)
# from https://github.com/NVIDIA/cutlass
make_directory("${PROJECT_BINARY_DIR}/nvcc_tmp")
set(cuda_flags --keep "SHELL:--keep-dir ${PROJECT_BINARY_DIR}/nvcc_tmp" ${cuda_flags})
# add_compile_definitions(GGML_USE_OPENBLAS)
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
add_compile_definitions(GGML_BLAS_USE_MKL)
endif()
@ -272,6 +296,7 @@ if (LLAMA_CUBLAS)
endif()
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
if (DEFINED LLAMA_CUDA_DMMV_Y)
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
endif()
@ -420,17 +445,14 @@ if (LLAMA_ALL_WARNINGS)
# todo : msvc
endif()
set(c_flags ${c_flags} ${warning_flags})
set(cxx_flags ${cxx_flags} ${warning_flags})
set(c_flags ${c_flags} -save-temps --verbose ${warning_flags})
set(cxx_flags ${cxx_flags} -save-temps --verbose ${warning_flags})
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
endif()
if (NOT MSVC)
set(cuda_flags -Wno-pedantic)
endif()
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
@ -438,6 +460,9 @@ if (NOT cuda_host_flags STREQUAL "")
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
endif()
#
set(cuda_flags --verbose -G ${cuda_flags})
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
if (WIN32)
@ -476,6 +501,8 @@ if (NOT MSVC)
add_link_options(-static-libgcc -static-libstdc++)
endif()
endif()
add_link_options("-Wl,-Map=${TARGET}.map")
if (LLAMA_GPROF)
add_compile_options(-pg)
endif()

1039
README.org Normal file

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

98
reporthd5_callchains.py Normal file
View file

@ -0,0 +1,98 @@
import h5py
import click
import collections
ids = {}
#with open("string_ids.txt") as fi:
# for x in fi:
# p = x.strip().split("|")
# ids[p[0]] = p[1]
#print(ids)
# from https://stackoverflow.com/a/53340677
def descend_obj(obj,sep='\t', callback=None):
"""
Iterate through groups in a HDF5 file and prints the groups and datasets names and datasets attributes
"""
if type(obj) in [h5py._hl.group.Group,h5py._hl.files.File]:
#print("FILE")
for key in obj.keys():
#print ("KEY",sep,'-',key,':',obj[key])
descend_obj(obj[key],sep=sep+'\t',callback=callback)
elif type(obj)==h5py._hl.dataset.Dataset:
#print("ds")
#print( obj.name, obj.shape, obj.size, obj.dtype)
return callback(obj)
else:
print(obj)
def h5dump(path,group='/', callback=None):
"""
print HDF5 file metadata
group: you can give a specific group, defaults to the root group
"""
with h5py.File(path,'r') as f:
print(path)
descend_obj(f[group],callback=callback)
def get_map(obj):
global ids
for x in obj:
k = x[0]
v = x[1].decode("utf-8")
if len(v) >100:
v = str(v[0:100]).replace("\n","").replace("\t","") +"trunc"
#print("DEBUG",k,v)
ids[k] = v
def get_data(obj):
#for x in obj:
# print(x[2]
report = collections.Counter()
objs = obj.size
ldepth = 0
lname = ""
for i in range(objs):
#print("OBJ",i, obj[i])
data = obj[i]
symbol = data[1]
pointer = data[4] #instruction pointer
module = str(data[2]) + ids.get(data[2],"oops")
depth = str(data[5])
idepth = data[5]
name = ids.get(symbol,"oops")
name = str(name) + "|"+ str(symbol) + "|MOD:" + module + "|DEP:" +depth + "|ORIG:" + str(pointer) +"/" + hex(pointer)
rname = ""
if idepth > ldepth:
rname = lname +"|"+ name
else:
rname = "NEW"+"|"+name
ldepth = idepth
lname = name
#print("\t".join(map(str,data)),name)
report[rname] += 1
# 1 [('id', '<i8'),
# 2 ('symbol', '<u4'),
# 3 ('module', '<u4'),
# 4 ('unresolved', 'u1'),
# 5 ('originalIP', '<u8'),
# 6 ('stackDepth', '<i4')]
#ip = obj[i][4]
#print("DEB",j,f)
# report[ip] += 1
for k in report.most_common():
print("\t".join(map(str,k)))
@click.command()
@click.argument("ifile", type=click.Path(exists=True))
def main(ifile):
#h5dump(ifile,"/")
h5dump(ifile,"/StringIds",callback=get_map)
#print(ids)
h5dump(ifile,"/CUDA_CALLCHAINS",callback=get_data)
if __name__ == "__main__":
main()