Merge branch 'ggerganov:master' into server-chat-templates-custom
This commit is contained in:
commit
33761375d2
11 changed files with 792 additions and 471 deletions
161
.clang-format
Normal file
161
.clang-format
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
AlignAfterOpenBracket: Align
|
||||||
|
AlignArrayOfStructures: Left
|
||||||
|
AlignConsecutiveAssignments: AcrossComments
|
||||||
|
AlignConsecutiveBitFields: AcrossComments
|
||||||
|
AlignConsecutiveDeclarations: AcrossComments
|
||||||
|
AlignConsecutiveMacros: AcrossComments
|
||||||
|
# AlignConsecutiveShortCaseStatements: AcrossComments
|
||||||
|
AlignEscapedNewlines: Left # LeftWithLastLine
|
||||||
|
AlignOperands: Align
|
||||||
|
AlignTrailingComments:
|
||||||
|
Kind: Always
|
||||||
|
OverEmptyLines: 1
|
||||||
|
AllowAllArgumentsOnNextLine: true
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: false
|
||||||
|
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
||||||
|
AllowShortBlocksOnASingleLine: Never
|
||||||
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
|
AllowShortFunctionsOnASingleLine: Inline
|
||||||
|
AllowShortIfStatementsOnASingleLine: Never
|
||||||
|
AllowShortLambdasOnASingleLine: Inline
|
||||||
|
AllowShortLoopsOnASingleLine: false
|
||||||
|
AlwaysBreakBeforeMultilineStrings: true
|
||||||
|
BinPackArguments: true
|
||||||
|
BinPackParameters: true # OnePerLine
|
||||||
|
BitFieldColonSpacing: Both
|
||||||
|
BreakBeforeBraces: Custom # Attach
|
||||||
|
BraceWrapping:
|
||||||
|
AfterCaseLabel: true
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: false
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: false
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterObjCDeclaration: false
|
||||||
|
AfterStruct: false
|
||||||
|
AfterUnion: false
|
||||||
|
AfterExternBlock: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
BeforeLambdaBody: false
|
||||||
|
BeforeWhile: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: false
|
||||||
|
SplitEmptyRecord: false
|
||||||
|
SplitEmptyNamespace: false
|
||||||
|
# BreakAdjacentStringLiterals: true
|
||||||
|
BreakAfterAttributes: Never
|
||||||
|
BreakBeforeBinaryOperators: None
|
||||||
|
BreakBeforeInlineASMColon: OnlyMultiline
|
||||||
|
BreakBeforeTernaryOperators: false
|
||||||
|
# BreakBinaryOperations: Never
|
||||||
|
BreakConstructorInitializers: AfterColon
|
||||||
|
# BreakFunctionDefinitionParameters: false
|
||||||
|
BreakInheritanceList: AfterComma
|
||||||
|
BreakStringLiterals: true
|
||||||
|
# BreakTemplateDeclarations: Yes
|
||||||
|
ColumnLimit: 120
|
||||||
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
|
CompactNamespaces: false
|
||||||
|
ConstructorInitializerIndentWidth: 4
|
||||||
|
ContinuationIndentWidth: 4
|
||||||
|
Cpp11BracedListStyle: false
|
||||||
|
DerivePointerAlignment: false
|
||||||
|
DisableFormat: false
|
||||||
|
EmptyLineBeforeAccessModifier: Leave
|
||||||
|
EmptyLineAfterAccessModifier: Never
|
||||||
|
ExperimentalAutoDetectBinPacking: false
|
||||||
|
FixNamespaceComments: true
|
||||||
|
IncludeBlocks: Regroup
|
||||||
|
IncludeCategories:
|
||||||
|
- Regex: '^<.*\.h>'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '^<.*'
|
||||||
|
Priority: 2
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '.*'
|
||||||
|
Priority: 3
|
||||||
|
SortPriority: 0
|
||||||
|
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||||
|
IncludeIsMainSourceRegex: ''
|
||||||
|
IndentAccessModifiers: false
|
||||||
|
IndentCaseBlocks: true
|
||||||
|
IndentCaseLabels: true
|
||||||
|
IndentExternBlock: NoIndent
|
||||||
|
IndentGotoLabels: false
|
||||||
|
IndentPPDirectives: AfterHash
|
||||||
|
IndentWidth: 4
|
||||||
|
IndentWrappedFunctionNames: false
|
||||||
|
InsertBraces: true # NOTE: may lead to incorrect formatting
|
||||||
|
InsertNewlineAtEOF: true
|
||||||
|
JavaScriptQuotes: Leave
|
||||||
|
JavaScriptWrapImports: true
|
||||||
|
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||||
|
LambdaBodyIndentation: Signature
|
||||||
|
LineEnding: LF
|
||||||
|
MacroBlockBegin: ''
|
||||||
|
MacroBlockEnd: ''
|
||||||
|
MaxEmptyLinesToKeep: 1
|
||||||
|
NamespaceIndentation: None
|
||||||
|
ObjCBinPackProtocolList: Auto
|
||||||
|
ObjCBlockIndentWidth: 4
|
||||||
|
ObjCSpaceAfterProperty: true
|
||||||
|
ObjCSpaceBeforeProtocolList: true
|
||||||
|
PPIndentWidth: -1
|
||||||
|
PackConstructorInitializers: CurrentLine
|
||||||
|
PenaltyBreakAssignment: 2
|
||||||
|
PenaltyBreakBeforeFirstCallParameter: 1
|
||||||
|
PenaltyBreakComment: 300
|
||||||
|
PenaltyBreakFirstLessLess: 120
|
||||||
|
PenaltyBreakString: 1000
|
||||||
|
PenaltyBreakTemplateDeclaration: 10
|
||||||
|
PenaltyExcessCharacter: 1000000
|
||||||
|
PenaltyReturnTypeOnItsOwnLine: 200
|
||||||
|
PointerAlignment: Middle
|
||||||
|
QualifierAlignment: Left
|
||||||
|
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
||||||
|
RawStringFormats:
|
||||||
|
- Language: Cpp
|
||||||
|
Delimiters:
|
||||||
|
- cc
|
||||||
|
- CC
|
||||||
|
- cpp
|
||||||
|
- Cpp
|
||||||
|
- CPP
|
||||||
|
- 'c++'
|
||||||
|
- 'C++'
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
ReferenceAlignment: Middle
|
||||||
|
ReflowComments: false # IndentOnly
|
||||||
|
SeparateDefinitionBlocks: Always
|
||||||
|
SortIncludes: CaseInsensitive
|
||||||
|
SortUsingDeclarations: LexicographicNumeric
|
||||||
|
SpaceAfterCStyleCast: true
|
||||||
|
SpaceAfterLogicalNot: false
|
||||||
|
SpaceAfterTemplateKeyword: true
|
||||||
|
SpaceBeforeAssignmentOperators: true
|
||||||
|
SpaceBeforeCpp11BracedList: false
|
||||||
|
SpaceBeforeCtorInitializerColon: true
|
||||||
|
SpaceBeforeInheritanceColon: true
|
||||||
|
SpaceBeforeParens: ControlStatements
|
||||||
|
SpaceBeforeRangeBasedForLoopColon: true
|
||||||
|
SpaceInEmptyBlock: false
|
||||||
|
SpaceInEmptyParentheses: false
|
||||||
|
SpacesBeforeTrailingComments: 2
|
||||||
|
SpacesInAngles: Never
|
||||||
|
SpacesInContainerLiterals: true
|
||||||
|
SpacesInLineCommentPrefix:
|
||||||
|
Minimum: 1
|
||||||
|
Maximum: -1
|
||||||
|
SpacesInParentheses: false
|
||||||
|
SpacesInSquareBrackets: false
|
||||||
|
SpaceBeforeSquareBrackets: false
|
||||||
|
Standard: c++17
|
||||||
|
TabWidth: 4
|
||||||
|
UseTab: Never
|
||||||
|
WhitespaceSensitiveMacros: ['STRINGIZE']
|
||||||
|
...
|
||||||
|
|
|
@ -3,12 +3,60 @@ set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
||||||
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
||||||
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
||||||
|
|
||||||
|
set(GGML_STATIC @GGML_STATIC@)
|
||||||
|
set(GGML_NATIVE @GGML_NATIVE@)
|
||||||
|
set(GGML_LTO @GGML_LTO@)
|
||||||
|
set(GGML_CCACHE @GGML_CCACHE@)
|
||||||
|
set(GGML_AVX @GGML_AVX@)
|
||||||
|
set(GGML_AVX2 @GGML_AVX2@)
|
||||||
|
set(GGML_AVX512 @GGML_AVX512@)
|
||||||
|
set(GGML_AVX512_VBMI @GGML_AVX512_VBMI@)
|
||||||
|
set(GGML_AVX512_VNNI @GGML_AVX512_VNNI@)
|
||||||
|
set(GGML_AVX512_BF16 @GGML_AVX512_BF16@)
|
||||||
|
set(GGML_AMX_TILE @GGML_AMX_TILE@)
|
||||||
|
set(GGML_AMX_INT8 @GGML_AMX_INT8@)
|
||||||
|
set(GGML_AMX_BF16 @GGML_AMX_BF16@)
|
||||||
|
set(GGML_FMA @GGML_FMA@)
|
||||||
|
set(GGML_LASX @GGML_LASX@)
|
||||||
|
set(GGML_LSX @GGML_LSX@)
|
||||||
|
set(GGML_RVV @GGML_RVV@)
|
||||||
|
set(GGML_SVE @GGML_SVE@)
|
||||||
|
|
||||||
set(GGML_ACCELERATE @GGML_ACCELERATE@)
|
set(GGML_ACCELERATE @GGML_ACCELERATE@)
|
||||||
|
set(GGML_OPENMP @GGML_OPENMP@)
|
||||||
|
set(GGML_CPU_HBM @GGML_CPU_HBM@)
|
||||||
|
set(GGML_BLAS_VENDOR @GGML_BLAS_VENDOR@)
|
||||||
|
|
||||||
|
set(GGML_CUDA_FORCE_MMQ @GGML_CUDA_FORCE_MMQ@)
|
||||||
|
set(GGML_CUDA_FORCE_CUBLAS @GGML_CUDA_FORCE_CUBLAS@)
|
||||||
|
set(GGML_CUDA_F16 @GGML_CUDA_F16@)
|
||||||
|
set(GGML_CUDA_PEER_MAX_BATCH_SIZE @GGML_CUDA_PEER_MAX_BATCH_SIZE@)
|
||||||
|
set(GGML_CUDA_NO_PEER_COPY @GGML_CUDA_NO_PEER_COPY@)
|
||||||
|
set(GGML_CUDA_NO_VMM @GGML_CUDA_NO_VMM@)
|
||||||
|
set(GGML_CUDA_FA_ALL_QUANTS @GGML_CUDA_FA_ALL_QUANTS@)
|
||||||
|
set(GGML_CUDA_GRAPHS @GGML_CUDA_GRAPHS@)
|
||||||
|
|
||||||
|
set(GGML_HIP_UMA @GGML_HIP_UMA@)
|
||||||
|
|
||||||
set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@)
|
set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@)
|
||||||
set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@)
|
set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@)
|
||||||
set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@)
|
set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@)
|
||||||
set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@)
|
set(GGML_VULKAN_SHADER_DEBUG_INFO @GGML_VULKAN_SHADER_DEBUG_INFO@)
|
||||||
set(GGML_OPENMP @GGML_OPENMP@)
|
set(GGML_VULKAN_PERF @GGML_VULKAN_PERF@)
|
||||||
|
set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@)
|
||||||
|
set(GGML_VULKAN_RUN_TESTS @GGML_VULKAN_RUN_TESTS@)
|
||||||
|
|
||||||
|
set(GGML_METAL_USE_BF16 @GGML_METAL_USE_BF16@)
|
||||||
|
set(GGML_METAL_NDEBUG @GGML_METAL_NDEBUG@)
|
||||||
|
set(GGML_METAL_SHADER_DEBUG @GGML_METAL_SHADER_DEBUG@)
|
||||||
|
set(GGML_METAL_EMBED_LIBRARY @GGML_METAL_EMBED_LIBRARY@)
|
||||||
|
set(GGML_METAL_MACOSX_VERSION_MIN @GGML_METAL_MACOSX_VERSION_MIN@)
|
||||||
|
set(GGML_METAL_STD @GGML_METAL_STD@)
|
||||||
|
|
||||||
|
set(GGML_SYCL_F16 @GGML_SYCL_F16@)
|
||||||
|
set(GGML_SYCL_TARGET @GGML_SYCL_TARGET@)
|
||||||
|
set(GGML_SYCL_DEVICE_ARCH @GGML_SYCL_DEVICE_ARCH@)
|
||||||
|
|
||||||
|
|
||||||
@PACKAGE_INIT@
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
|
@ -20,6 +68,7 @@ find_package(Threads REQUIRED)
|
||||||
|
|
||||||
set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@")
|
set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@")
|
||||||
set(_llama_link_deps "")
|
set(_llama_link_deps "")
|
||||||
|
set(_llama_link_opts "")
|
||||||
foreach(_ggml_lib ggml ggml-base)
|
foreach(_ggml_lib ggml ggml-base)
|
||||||
string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY")
|
string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY")
|
||||||
find_library(${_ggml_lib_var} ${_ggml_lib}
|
find_library(${_ggml_lib_var} ${_ggml_lib}
|
||||||
|
@ -49,41 +98,63 @@ foreach(backend amx blas cann cpu cuda hip kompute metal musa rpc sycl vulkan)
|
||||||
endif()
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
if (APPLE AND GGML_ACCELERATE)
|
if (NOT LLAMA_SHARED_LIB)
|
||||||
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
|
if (APPLE AND GGML_ACCELERATE)
|
||||||
endif()
|
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
|
||||||
|
list(APPEND _llama_link_deps ${ACCELERATE_FRAMEWORK})
|
||||||
|
endif()
|
||||||
|
|
||||||
if (GGML_BLAS)
|
if (GGML_OPENMP)
|
||||||
find_package(BLAS REQUIRED)
|
find_package(OpenMP REQUIRED)
|
||||||
endif()
|
list(APPEND _llama_link_deps OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (GGML_CUDA)
|
if (GGML_CPU_HBM)
|
||||||
find_package(CUDAToolkit REQUIRED)
|
find_library(memkind memkind REQUIRED)
|
||||||
endif()
|
list(APPEND _llama_link_deps memkind)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (GGML_METAL)
|
if (GGML_BLAS)
|
||||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
find_package(BLAS REQUIRED)
|
||||||
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
list(APPEND _llama_link_deps ${BLAS_LIBRARIES})
|
||||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
list(APPEND _llama_link_opts ${BLAS_LINKER_FLAGS})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (GGML_VULKAN)
|
if (GGML_CUDA)
|
||||||
find_package(Vulkan REQUIRED)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (GGML_HIP)
|
if (GGML_METAL)
|
||||||
find_package(hip REQUIRED)
|
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||||
find_package(hipblas REQUIRED)
|
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||||
find_package(rocblas REQUIRED)
|
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||||
endif()
|
list(APPEND _llama_link_deps ${FOUNDATION_LIBRARY}
|
||||||
|
${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
|
||||||
|
endif()
|
||||||
|
|
||||||
if (GGML_SYCL)
|
if (GGML_VULKAN)
|
||||||
find_package(IntelSYCL REQUIRED)
|
find_package(Vulkan REQUIRED)
|
||||||
find_package(MKL REQUIRED)
|
list(APPEND _llama_link_deps Vulkan::Vulkan)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (GGML_OPENMP)
|
if (GGML_HIP)
|
||||||
find_package(OpenMP REQUIRED)
|
find_package(hip REQUIRED)
|
||||||
|
find_package(hipblas REQUIRED)
|
||||||
|
find_package(rocblas REQUIRED)
|
||||||
|
list(APPEND _llama_link_deps hip::host roc::rocblas roc::hipblas)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_SYCL)
|
||||||
|
find_package(DNNL)
|
||||||
|
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
|
||||||
|
list(APPEND _llama_link_deps DNNL::dnnl)
|
||||||
|
endif()
|
||||||
|
if (WIN32)
|
||||||
|
find_package(IntelSYCL REQUIRED)
|
||||||
|
find_package(MKL REQUIRED)
|
||||||
|
list(APPEND _llama_link_deps IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
find_library(llama_LIBRARY llama
|
find_library(llama_LIBRARY llama
|
||||||
|
@ -97,6 +168,7 @@ set_target_properties(llama
|
||||||
PROPERTIES
|
PROPERTIES
|
||||||
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
||||||
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
|
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
|
||||||
|
INTERFACE_LINK_OPTIONS "${_llama_link_opts}"
|
||||||
INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
|
INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||||
IMPORTED_LOCATION "${llama_LIBRARY}"
|
IMPORTED_LOCATION "${llama_LIBRARY}"
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -252,6 +252,7 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||||
|
GGML_ASSERT(tensor);
|
||||||
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
|
@ -266,6 +267,7 @@ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, siz
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||||
|
GGML_ASSERT(tensor);
|
||||||
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
|
@ -884,9 +886,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||||
for (int i = 0; i < graph->n_nodes; i++) {
|
for (int i = 0; i < graph->n_nodes; i++) {
|
||||||
struct ggml_tensor * node = graph->nodes[i];
|
struct ggml_tensor * node = graph->nodes[i];
|
||||||
int * node_backend_id = &tensor_backend_id(node);
|
int * node_backend_id = &tensor_backend_id(node);
|
||||||
if (ggml_is_view_op(node->op)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// do not overwrite user assignments
|
// do not overwrite user assignments
|
||||||
if (*node_backend_id == -1) {
|
if (*node_backend_id == -1) {
|
||||||
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
|
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
|
||||||
|
|
|
@ -295,6 +295,9 @@ struct ggml_cgraph {
|
||||||
enum ggml_cgraph_eval_order order;
|
enum ggml_cgraph_eval_order order;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// returns a slice of cgraph with nodes [i0, i1)
|
||||||
|
// the slice does not have leafs or gradients
|
||||||
|
// if you need the gradients, get them from the original graph
|
||||||
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
|
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
|
||||||
|
|
||||||
// Memory allocation
|
// Memory allocation
|
||||||
|
|
|
@ -14,51 +14,51 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
struct ggml_opt_dataset {
|
struct ggml_opt_dataset {
|
||||||
struct ggml_context * ctx;
|
struct ggml_context * ctx = nullptr;
|
||||||
ggml_backend_buffer_t buf;
|
ggml_backend_buffer_t buf = nullptr;
|
||||||
struct ggml_tensor * data;
|
struct ggml_tensor * data = nullptr;
|
||||||
struct ggml_tensor * labels;
|
struct ggml_tensor * labels = nullptr;
|
||||||
|
|
||||||
int64_t ndata;
|
int64_t ndata = -1;
|
||||||
int64_t ndata_shard;
|
int64_t ndata_shard = -1;
|
||||||
size_t nbs_data;
|
size_t nbs_data = -1;
|
||||||
size_t nbs_labels;
|
size_t nbs_labels = -1;
|
||||||
|
|
||||||
std::vector<int64_t> permutation;
|
std::vector<int64_t> permutation;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_opt_context {
|
struct ggml_opt_context {
|
||||||
ggml_backend_sched_t backend_sched;
|
ggml_backend_sched_t backend_sched = nullptr;
|
||||||
ggml_cgraph * allocated_graph;
|
ggml_cgraph * allocated_graph = nullptr;
|
||||||
ggml_cgraph * allocated_graph_copy;
|
ggml_cgraph * allocated_graph_copy = nullptr;
|
||||||
struct ggml_context * ctx_static;
|
struct ggml_context * ctx_static = nullptr;
|
||||||
struct ggml_context * ctx_static_cpu;
|
struct ggml_context * ctx_static_cpu = nullptr;
|
||||||
struct ggml_context * ctx_compute;
|
struct ggml_context * ctx_compute = nullptr;
|
||||||
struct ggml_context * ctx_copy;
|
struct ggml_context * ctx_copy = nullptr;
|
||||||
ggml_backend_buffer_t buf_static;
|
ggml_backend_buffer_t buf_static = nullptr;
|
||||||
ggml_backend_buffer_t buf_static_cpu;
|
ggml_backend_buffer_t buf_static_cpu = nullptr;
|
||||||
std::mt19937 rng;
|
std::mt19937 rng;
|
||||||
|
|
||||||
struct ggml_tensor * inputs;
|
struct ggml_tensor * inputs = nullptr;
|
||||||
struct ggml_tensor * outputs;
|
struct ggml_tensor * outputs = nullptr;
|
||||||
struct ggml_tensor * labels;
|
struct ggml_tensor * labels = nullptr;
|
||||||
|
|
||||||
struct ggml_tensor * loss;
|
struct ggml_tensor * loss = nullptr;
|
||||||
struct ggml_tensor * pred;
|
struct ggml_tensor * pred = nullptr;
|
||||||
struct ggml_tensor * ncorrect;
|
struct ggml_tensor * ncorrect = nullptr;
|
||||||
|
|
||||||
struct ggml_cgraph * gf;
|
struct ggml_cgraph * gf = nullptr;
|
||||||
struct ggml_cgraph * gb_grad;
|
struct ggml_cgraph * gb_grad = nullptr;
|
||||||
struct ggml_cgraph * gb_opt;
|
struct ggml_cgraph * gb_opt = nullptr;
|
||||||
|
|
||||||
int64_t iter;
|
int64_t iter = 1;
|
||||||
int32_t opt_period;
|
int32_t opt_period = 1;
|
||||||
int32_t opt_i;
|
int32_t opt_i = 0;
|
||||||
bool loss_per_datapoint;
|
bool loss_per_datapoint = false;
|
||||||
|
|
||||||
ggml_opt_get_optimizer_params get_opt_pars;
|
ggml_opt_get_optimizer_params get_opt_pars = nullptr;
|
||||||
void * get_opt_pars_ud;
|
void * get_opt_pars_ud = nullptr;
|
||||||
struct ggml_tensor * adamw_params;
|
struct ggml_tensor * adamw_params = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_opt_result {
|
struct ggml_opt_result {
|
||||||
|
@ -67,8 +67,8 @@ struct ggml_opt_result {
|
||||||
std::vector<int32_t> pred;
|
std::vector<int32_t> pred;
|
||||||
int64_t ncorrect = 0;
|
int64_t ncorrect = 0;
|
||||||
|
|
||||||
bool loss_per_datapoint = false;
|
int64_t opt_period = -1;
|
||||||
int64_t opt_period = -1;
|
bool loss_per_datapoint = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
// ====== Dataset ======
|
// ====== Dataset ======
|
||||||
|
@ -188,11 +188,11 @@ struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * us
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_opt_params ggml_opt_default_params(
|
struct ggml_opt_params ggml_opt_default_params(
|
||||||
ggml_backend_sched_t backend_sched,
|
ggml_backend_sched_t backend_sched,
|
||||||
struct ggml_context * ctx_compute,
|
struct ggml_context * ctx_compute,
|
||||||
struct ggml_tensor * inputs,
|
struct ggml_tensor * inputs,
|
||||||
struct ggml_tensor * outputs,
|
struct ggml_tensor * outputs,
|
||||||
enum ggml_opt_loss_type loss_type) {
|
enum ggml_opt_loss_type loss_type) {
|
||||||
return {
|
return {
|
||||||
/*backend_sched =*/ backend_sched,
|
/*backend_sched =*/ backend_sched,
|
||||||
/*ctx_compute =*/ ctx_compute,
|
/*ctx_compute =*/ ctx_compute,
|
||||||
|
@ -237,25 +237,33 @@ static ggml_tensor * map_tensor(std::map<ggml_tensor *, ggml_tensor *> & tensor_
|
||||||
return new_tensor;
|
return new_tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_cgraph * dup_graph(ggml_context * ctx, ggml_cgraph * graph) {
|
static ggml_cgraph * dup_graph(ggml_context * ctx, ggml_cgraph * src) {
|
||||||
std::map<ggml_tensor *, ggml_tensor *> tensor_map;
|
std::map<ggml_tensor *, ggml_tensor *> tensor_map;
|
||||||
|
|
||||||
ggml_cgraph * new_graph = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, /*grads =*/ true);
|
ggml_cgraph * dst = ggml_new_graph_custom(ctx, src->size, /*grads =*/ true);
|
||||||
|
|
||||||
for (int i = 0; i < graph->n_leafs; i++) {
|
for (int i = 0; i < src->n_leafs; i++) {
|
||||||
ggml_build_forward_expand(new_graph, map_tensor(tensor_map, ctx, graph->leafs[i]));
|
ggml_build_forward_expand(dst, map_tensor(tensor_map, ctx, src->leafs[i]));
|
||||||
}
|
}
|
||||||
for (int i = 0; i < graph->n_nodes; i++) {
|
GGML_ASSERT(dst->n_leafs == src->n_leafs);
|
||||||
ggml_build_forward_expand(new_graph, map_tensor(tensor_map, ctx, graph->nodes[i]));
|
for (int i = 0; i < src->n_nodes; i++) {
|
||||||
|
ggml_build_forward_expand(dst, map_tensor(tensor_map, ctx, src->nodes[i]));
|
||||||
}
|
}
|
||||||
for (int i = 0; i < graph->n_nodes; ++i) {
|
GGML_ASSERT(dst->n_nodes == src->n_nodes);
|
||||||
const size_t igrad_src = ggml_hash_find(&graph->visited_hash_set, graph->nodes[i]);
|
for (int i = 0; i < src->n_nodes; ++i) {
|
||||||
const size_t igrad_dst = ggml_hash_find(&new_graph->visited_hash_set, new_graph->nodes[i]);
|
const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
|
||||||
graph->grads[igrad_dst] = new_graph->grads[igrad_src];
|
const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
|
||||||
graph->grad_accs[igrad_dst] = new_graph->grad_accs[igrad_src];
|
|
||||||
|
GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
|
||||||
|
GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
|
||||||
|
GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
|
||||||
|
GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
|
||||||
|
|
||||||
|
dst->grads[igrad_dst] = src->grads[igrad_src];
|
||||||
|
dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
|
||||||
}
|
}
|
||||||
|
|
||||||
return new_graph;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_opt_alloc_graph(ggml_opt_context_t opt_ctx, ggml_cgraph * graph) {
|
static void ggml_opt_alloc_graph(ggml_opt_context_t opt_ctx, ggml_cgraph * graph) {
|
||||||
|
@ -284,18 +292,13 @@ static void ggml_opt_alloc_graph(ggml_opt_context_t opt_ctx, ggml_cgraph * graph
|
||||||
|
|
||||||
ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
|
ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
|
||||||
ggml_opt_context_t result = new struct ggml_opt_context;
|
ggml_opt_context_t result = new struct ggml_opt_context;
|
||||||
result->backend_sched = params.backend_sched;
|
result->backend_sched = params.backend_sched;
|
||||||
result->allocated_graph = nullptr;
|
result->ctx_compute = params.ctx_compute;
|
||||||
result->allocated_graph_copy = nullptr;
|
result->inputs = params.inputs;
|
||||||
result->ctx_compute = params.ctx_compute;
|
result->outputs = params.outputs;
|
||||||
result->ctx_copy = nullptr;
|
result->opt_period = params.opt_period;
|
||||||
result->inputs = params.inputs;
|
result->get_opt_pars = params.get_opt_pars;
|
||||||
result->outputs = params.outputs;
|
result->get_opt_pars_ud = params.get_opt_pars_ud;
|
||||||
result->iter = 1;
|
|
||||||
result->opt_period = params.opt_period;
|
|
||||||
result->opt_i = 0;
|
|
||||||
result->get_opt_pars = params.get_opt_pars;
|
|
||||||
result->get_opt_pars_ud = params.get_opt_pars_ud;
|
|
||||||
|
|
||||||
GGML_ASSERT(result->inputs->data && "the inputs must be allocated statically");
|
GGML_ASSERT(result->inputs->data && "the inputs must be allocated statically");
|
||||||
GGML_ASSERT(result->opt_period >= 1);
|
GGML_ASSERT(result->opt_period >= 1);
|
||||||
|
@ -348,7 +351,6 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
|
||||||
|
|
||||||
switch (params.loss_type) {
|
switch (params.loss_type) {
|
||||||
case GGML_OPT_LOSS_TYPE_MEAN: {
|
case GGML_OPT_LOSS_TYPE_MEAN: {
|
||||||
result->labels = nullptr;
|
|
||||||
result->loss = ggml_sum(result->ctx_static, result->outputs);
|
result->loss = ggml_sum(result->ctx_static, result->outputs);
|
||||||
ggml_set_name(result->loss, "loss_sum");
|
ggml_set_name(result->loss, "loss_sum");
|
||||||
const float scale = 1.0f / (result->opt_period * ggml_nelements(result->outputs));
|
const float scale = 1.0f / (result->opt_period * ggml_nelements(result->outputs));
|
||||||
|
@ -358,7 +360,6 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case GGML_OPT_LOSS_TYPE_SUM: {
|
case GGML_OPT_LOSS_TYPE_SUM: {
|
||||||
result->labels = nullptr;
|
|
||||||
result->loss = ggml_sum(result->ctx_static, result->outputs);
|
result->loss = ggml_sum(result->ctx_static, result->outputs);
|
||||||
ggml_set_name(result->loss, "loss_sum");
|
ggml_set_name(result->loss, "loss_sum");
|
||||||
result->loss_per_datapoint = false;
|
result->loss_per_datapoint = false;
|
||||||
|
@ -413,14 +414,7 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.build_type == GGML_OPT_BUILD_TYPE_FORWARD) {
|
if (params.build_type == GGML_OPT_BUILD_TYPE_FORWARD) {
|
||||||
result->gb_grad = nullptr;
|
|
||||||
result->gb_opt = nullptr;
|
|
||||||
|
|
||||||
result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0));
|
result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0));
|
||||||
result->buf_static_cpu = nullptr;
|
|
||||||
|
|
||||||
ggml_opt_alloc_graph(result, result->gf);
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -429,14 +423,8 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
|
||||||
ggml_build_backward_expand(result->ctx_static, result->ctx_compute, result->gb_grad, accumulate);
|
ggml_build_backward_expand(result->ctx_static, result->ctx_compute, result->gb_grad, accumulate);
|
||||||
|
|
||||||
if (params.build_type == GGML_OPT_BUILD_TYPE_GRAD) {
|
if (params.build_type == GGML_OPT_BUILD_TYPE_GRAD) {
|
||||||
result->gb_opt = nullptr;
|
|
||||||
|
|
||||||
result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0));
|
result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0));
|
||||||
result->buf_static_cpu = nullptr;
|
|
||||||
|
|
||||||
ggml_opt_alloc_graph(result, result->gb_grad);
|
|
||||||
ggml_graph_reset(result->gb_grad);
|
ggml_graph_reset(result->gb_grad);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -466,7 +454,6 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
|
||||||
|
|
||||||
result->buf_static_cpu = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx_static_cpu, ggml_backend_cpu_buffer_type());
|
result->buf_static_cpu = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx_static_cpu, ggml_backend_cpu_buffer_type());
|
||||||
|
|
||||||
ggml_opt_alloc_graph(result, result->gb_opt);
|
|
||||||
ggml_graph_reset(result->gb_opt);
|
ggml_graph_reset(result->gb_opt);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -73,7 +73,9 @@ void soft_max(uint num_iters) {
|
||||||
|
|
||||||
FLOAT_TYPE v = a * p.scale + slope * b;
|
FLOAT_TYPE v = a * p.scale + slope * b;
|
||||||
|
|
||||||
max_val = max(max_val, v);
|
if (col < p.KX) {
|
||||||
|
max_val = max(max_val, v);
|
||||||
|
}
|
||||||
|
|
||||||
if (idx < DATA_CACHE_SIZE) {
|
if (idx < DATA_CACHE_SIZE) {
|
||||||
data_cache[idx] = v;
|
data_cache[idx] = v;
|
||||||
|
|
|
@ -5019,8 +5019,10 @@ static void ggml_hash_map_free(struct hash_map * map) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// utility functions to change gradients
|
// utility functions to change gradients
|
||||||
// if a is in acc_table, modify gradients in-place and mark result as gradient accumulator
|
// isrc is the index of tensor in cgraph->visited_has_set.keys
|
||||||
// else if a is in zero_table, replace a
|
// the corresponding gradient (accumulators) are also at position isrc
|
||||||
|
// if tensor has a gradient accumulator, modify that accumulator in-place
|
||||||
|
// else if there is no gradient for tensor, set the corresponding value
|
||||||
// else, just add/subtract/etc. the gradients
|
// else, just add/subtract/etc. the gradients
|
||||||
|
|
||||||
static void ggml_add_or_set(
|
static void ggml_add_or_set(
|
||||||
|
@ -5028,11 +5030,14 @@ static void ggml_add_or_set(
|
||||||
struct ggml_cgraph * cgraph,
|
struct ggml_cgraph * cgraph,
|
||||||
size_t isrc,
|
size_t isrc,
|
||||||
struct ggml_tensor * tensor) {
|
struct ggml_tensor * tensor) {
|
||||||
|
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
|
||||||
|
GGML_ASSERT(src);
|
||||||
if (cgraph->grads[isrc]) {
|
if (cgraph->grads[isrc]) {
|
||||||
cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
|
cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
|
||||||
} else {
|
} else {
|
||||||
cgraph->grads[isrc] = tensor;
|
cgraph->grads[isrc] = tensor;
|
||||||
}
|
}
|
||||||
|
ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
|
||||||
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5040,18 +5045,20 @@ static void ggml_acc_or_set(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_cgraph * cgraph,
|
struct ggml_cgraph * cgraph,
|
||||||
size_t isrc,
|
size_t isrc,
|
||||||
struct ggml_tensor * src,
|
|
||||||
struct ggml_tensor * tensor,
|
struct ggml_tensor * tensor,
|
||||||
const size_t nb1,
|
const size_t nb1,
|
||||||
const size_t nb2,
|
const size_t nb2,
|
||||||
const size_t nb3,
|
const size_t nb3,
|
||||||
const size_t offset) {
|
const size_t offset) {
|
||||||
|
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
|
||||||
|
GGML_ASSERT(src);
|
||||||
if (cgraph->grads[isrc]) {
|
if (cgraph->grads[isrc]) {
|
||||||
cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
|
cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
|
||||||
} else {
|
} else {
|
||||||
struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
|
struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
|
||||||
cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
|
cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
|
||||||
}
|
}
|
||||||
|
ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
|
||||||
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5059,13 +5066,15 @@ static void ggml_add1_or_set(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_cgraph * cgraph,
|
struct ggml_cgraph * cgraph,
|
||||||
size_t isrc,
|
size_t isrc,
|
||||||
struct ggml_tensor * src,
|
|
||||||
struct ggml_tensor * tensor) {
|
struct ggml_tensor * tensor) {
|
||||||
|
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
|
||||||
|
GGML_ASSERT(src);
|
||||||
if (cgraph->grads[isrc]) {
|
if (cgraph->grads[isrc]) {
|
||||||
cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
|
cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
|
||||||
} else {
|
} else {
|
||||||
cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
|
cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
|
||||||
}
|
}
|
||||||
|
ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
|
||||||
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5074,11 +5083,14 @@ static void ggml_sub_or_set(
|
||||||
struct ggml_cgraph * cgraph,
|
struct ggml_cgraph * cgraph,
|
||||||
size_t isrc,
|
size_t isrc,
|
||||||
struct ggml_tensor * tensor) {
|
struct ggml_tensor * tensor) {
|
||||||
|
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
|
||||||
|
GGML_ASSERT(src);
|
||||||
if (cgraph->grads[isrc]) {
|
if (cgraph->grads[isrc]) {
|
||||||
cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
|
cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
|
||||||
} else {
|
} else {
|
||||||
cgraph->grads[isrc] = ggml_neg(ctx, tensor);
|
cgraph->grads[isrc] = ggml_neg(ctx, tensor);
|
||||||
}
|
}
|
||||||
|
ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
|
||||||
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5095,12 +5107,12 @@ static void ggml_compute_backward(
|
||||||
struct ggml_tensor * src1 = tensor->src[1];
|
struct ggml_tensor * src1 = tensor->src[1];
|
||||||
struct ggml_tensor * src2 = tensor->src[2];
|
struct ggml_tensor * src2 = tensor->src[2];
|
||||||
struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
|
struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
|
||||||
const size_t isrc0 = ggml_hash_find(hash_set, src0);
|
const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
|
||||||
const size_t isrc1 = ggml_hash_find(hash_set, src1);
|
const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
|
||||||
const size_t isrc2 = ggml_hash_find(hash_set, src2);
|
const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
|
||||||
const bool src0_needs_grads = isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
|
const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
|
||||||
const bool src1_needs_grads = isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
|
const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
|
||||||
const bool src2_needs_grads = isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
|
const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
|
||||||
|
|
||||||
switch (tensor->op) {
|
switch (tensor->op) {
|
||||||
case GGML_OP_DUP: {
|
case GGML_OP_DUP: {
|
||||||
|
@ -5200,7 +5212,7 @@ static void ggml_compute_backward(
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_SUM: {
|
case GGML_OP_SUM: {
|
||||||
if (src0_needs_grads) {
|
if (src0_needs_grads) {
|
||||||
ggml_add1_or_set(ctx, cgraph, isrc0, src0, grad);
|
ggml_add1_or_set(ctx, cgraph, isrc0, grad);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_SUM_ROWS: {
|
case GGML_OP_SUM_ROWS: {
|
||||||
|
@ -5210,7 +5222,7 @@ static void ggml_compute_backward(
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_MEAN: {
|
case GGML_OP_MEAN: {
|
||||||
if (src0_needs_grads) {
|
if (src0_needs_grads) {
|
||||||
ggml_add1_or_set(ctx, cgraph, isrc0, src0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false));
|
ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false));
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_REPEAT: {
|
case GGML_OP_REPEAT: {
|
||||||
|
@ -5363,7 +5375,7 @@ static void ggml_compute_backward(
|
||||||
nb3 = (nb3 / n0) * ng;
|
nb3 = (nb3 / n0) * ng;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_acc_or_set(ctx, cgraph, isrc0, src0, grad, nb1, nb2, nb3, offset);
|
ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_PERMUTE: {
|
case GGML_OP_PERMUTE: {
|
||||||
|
@ -5597,10 +5609,9 @@ void ggml_build_backward_expand(
|
||||||
|
|
||||||
const int n_nodes_f = cgraph->n_nodes;
|
const int n_nodes_f = cgraph->n_nodes;
|
||||||
|
|
||||||
const size_t hash_size = ggml_hash_size(2*cgraph->size);
|
memset(cgraph->grads, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
|
||||||
memset(cgraph->grads, 0, hash_size*sizeof(struct ggml_tensor *));
|
memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
|
||||||
memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
|
bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
|
||||||
bool * grads_needed = calloc(hash_size, sizeof(bool));
|
|
||||||
|
|
||||||
{
|
{
|
||||||
bool any_params = false;
|
bool any_params = false;
|
||||||
|
@ -5621,7 +5632,7 @@ void ggml_build_backward_expand(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool node_needs_grad = node->flags & GGML_TENSOR_FLAG_PARAM;
|
bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
|
||||||
bool ignore_src[GGML_MAX_SRC] = {false};
|
bool ignore_src[GGML_MAX_SRC] = {false};
|
||||||
switch (node->op) {
|
switch (node->op) {
|
||||||
// gradients in node->src[0] for one reason or another have no effect on output gradients
|
// gradients in node->src[0] for one reason or another have no effect on output gradients
|
||||||
|
@ -5638,7 +5649,7 @@ void ggml_build_backward_expand(
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
// gradients in node->src[1] for one reason or another have no effect on output gradients
|
// gradients in node->src[1] for one reason or another have no effect on output gradients
|
||||||
case GGML_OP_CPY: // gradients in CPY target are irrelevant
|
case GGML_OP_CPY: // gradients in CPY target are irrelevant
|
||||||
case GGML_OP_GET_ROWS: // row indices not differentiable
|
case GGML_OP_GET_ROWS: // row indices not differentiable
|
||||||
case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
|
case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
|
||||||
case GGML_OP_ROPE: // positions not differentiable
|
case GGML_OP_ROPE: // positions not differentiable
|
||||||
|
@ -5665,9 +5676,12 @@ void ggml_build_backward_expand(
|
||||||
node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
|
node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
|
||||||
|
|
||||||
const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
|
const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
|
||||||
|
GGML_ASSERT(igrad != GGML_HASHSET_FULL);
|
||||||
|
GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, igrad));
|
||||||
if ((accumulate && (node->flags & GGML_TENSOR_FLAG_PARAM)) || (node->flags & GGML_TENSOR_FLAG_LOSS)) {
|
if ((accumulate && (node->flags & GGML_TENSOR_FLAG_PARAM)) || (node->flags & GGML_TENSOR_FLAG_LOSS)) {
|
||||||
cgraph->grads[igrad] = ggml_dup_tensor(ctx_static, node);
|
cgraph->grad_accs[igrad] = ggml_dup_tensor(ctx_static, node);
|
||||||
cgraph->grad_accs[igrad] = cgraph->grads[igrad];
|
cgraph->grads[igrad] = cgraph->grad_accs[igrad];
|
||||||
|
ggml_format_name(cgraph->grad_accs[igrad], "grad acc for %s", node->name);
|
||||||
}
|
}
|
||||||
grads_needed[igrad] = true;
|
grads_needed[igrad] = true;
|
||||||
}
|
}
|
||||||
|
@ -5761,15 +5775,15 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
||||||
|
|
||||||
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
|
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
|
||||||
struct ggml_cgraph cgraph = {
|
struct ggml_cgraph cgraph = {
|
||||||
/*.size =*/ 0,
|
/*.size =*/ 0,
|
||||||
/*.n_nodes =*/ i1 - i0,
|
/*.n_nodes =*/ i1 - i0,
|
||||||
/*.n_leafs =*/ 0,
|
/*.n_leafs =*/ 0,
|
||||||
/*.nodes =*/ cgraph0->nodes + i0,
|
/*.nodes =*/ cgraph0->nodes + i0,
|
||||||
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
|
/*.grads =*/ NULL, // gradients would need visited_hash_set
|
||||||
/*.grad_accs =*/ cgraph0->grad_accs ? cgraph0->grad_accs + i0 : NULL,
|
/*.grad_accs =*/ NULL,
|
||||||
/*.leafs =*/ NULL,
|
/*.leafs =*/ NULL,
|
||||||
/*.hash_table =*/ { 0, NULL, NULL },
|
/*.visited_hash_set =*/ { 0, NULL, NULL },
|
||||||
/*.order =*/ cgraph0->order,
|
/*.order =*/ cgraph0->order,
|
||||||
};
|
};
|
||||||
|
|
||||||
return cgraph;
|
return cgraph;
|
||||||
|
@ -5799,12 +5813,22 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dst->grads) {
|
||||||
|
memset(dst->grads, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
|
||||||
|
memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
|
||||||
|
}
|
||||||
if (src->grads) {
|
if (src->grads) {
|
||||||
GGML_ASSERT(dst->grads != NULL);
|
GGML_ASSERT(dst->grads != NULL);
|
||||||
GGML_ASSERT(dst->grad_accs != NULL);
|
GGML_ASSERT(dst->grad_accs != NULL);
|
||||||
for (int i = 0; i < src->n_nodes; ++i) {
|
for (int i = 0; i < src->n_nodes; ++i) {
|
||||||
const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
|
const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
|
||||||
const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
|
const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
|
||||||
|
|
||||||
|
GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
|
||||||
|
GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
|
||||||
|
GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
|
||||||
|
GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
|
||||||
|
|
||||||
dst->grads[igrad_dst] = src->grads[igrad_src];
|
dst->grads[igrad_dst] = src->grads[igrad_src];
|
||||||
dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
|
dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
|
||||||
}
|
}
|
||||||
|
@ -5839,12 +5863,8 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
||||||
|
|
||||||
if (node->op == GGML_OP_OPT_STEP_ADAMW) {
|
if (node->op == GGML_OP_OPT_STEP_ADAMW) {
|
||||||
// clear momenta
|
// clear momenta
|
||||||
if (node->src[2]->data) {
|
ggml_set_zero(node->src[2]);
|
||||||
ggml_set_zero(node->src[2]);
|
ggml_set_zero(node->src[3]);
|
||||||
}
|
|
||||||
if (node->src[3]->data) {
|
|
||||||
ggml_set_zero(node->src[3]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// initial gradients of loss should be 1, 0 otherwise
|
// initial gradients of loss should be 1, 0 otherwise
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
2884dd72fea8922910fe53387c3d17ab928d3a8e
|
6fcbd60bc72ac3f7ad43f78c87e535f2e6206f58
|
||||||
|
|
|
@ -18211,13 +18211,13 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
||||||
static void llama_kv_cache_update_internal(struct llama_context & lctx) {
|
static void llama_kv_cache_update_internal(struct llama_context & lctx) {
|
||||||
bool need_reserve = false;
|
bool need_reserve = false;
|
||||||
|
|
||||||
// apply K-shift if needed
|
if (lctx.kv_self.has_shift) {
|
||||||
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
|
|
||||||
if (!llama_kv_cache_can_shift(&lctx)) {
|
if (!llama_kv_cache_can_shift(&lctx)) {
|
||||||
GGML_ABORT("Deepseek2 does not support K-shift");
|
GGML_ABORT("The current context does not support K-shift");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
// apply K-shift if needed
|
||||||
|
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {
|
||||||
ggml_backend_sched_reset(lctx.sched.get());
|
ggml_backend_sched_reset(lctx.sched.get());
|
||||||
|
|
||||||
ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
|
ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
|
||||||
|
@ -20463,7 +20463,7 @@ void llama_kv_cache_update(struct llama_context * ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool llama_kv_cache_can_shift(struct llama_context * ctx) {
|
bool llama_kv_cache_can_shift(struct llama_context * ctx) {
|
||||||
return ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
|
return !ctx->kv_self.recurrent && ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
|
||||||
}
|
}
|
||||||
|
|
||||||
// deprecated
|
// deprecated
|
||||||
|
|
|
@ -819,7 +819,6 @@ struct test_case {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: refactor so that this check is only needed once
|
|
||||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||||
if (!ggml_backend_supports_op(backend, t)) {
|
if (!ggml_backend_supports_op(backend, t)) {
|
||||||
printf("not supported [%s] ", ggml_backend_name(backend));
|
printf("not supported [%s] ", ggml_backend_name(backend));
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue