Merge branch 'ggerganov:master' into server-chat-templates-custom

This commit is contained in:
MaggotHATE 2024-11-21 21:59:11 +05:00 committed by GitHub
commit 33761375d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 792 additions and 471 deletions

161
.clang-format Normal file
View file

@ -0,0 +1,161 @@
---
Language: Cpp
AlignAfterOpenBracket: Align
AlignArrayOfStructures: Left
AlignConsecutiveAssignments: AcrossComments
AlignConsecutiveBitFields: AcrossComments
AlignConsecutiveDeclarations: AcrossComments
AlignConsecutiveMacros: AcrossComments
# AlignConsecutiveShortCaseStatements: AcrossComments
AlignEscapedNewlines: Left # LeftWithLastLine
AlignOperands: Align
AlignTrailingComments:
Kind: Always
OverEmptyLines: 1
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: true
BinPackArguments: true
BinPackParameters: true # OnePerLine
BitFieldColonSpacing: Both
BreakBeforeBraces: Custom # Attach
BraceWrapping:
AfterCaseLabel: true
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
# BreakAdjacentStringLiterals: true
BreakAfterAttributes: Never
BreakBeforeBinaryOperators: None
BreakBeforeInlineASMColon: OnlyMultiline
BreakBeforeTernaryOperators: false
# BreakBinaryOperations: Never
BreakConstructorInitializers: AfterColon
# BreakFunctionDefinitionParameters: false
BreakInheritanceList: AfterComma
BreakStringLiterals: true
# BreakTemplateDeclarations: Yes
ColumnLimit: 120
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
EmptyLineBeforeAccessModifier: Leave
EmptyLineAfterAccessModifier: Never
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
IndentCaseBlocks: true
IndentCaseLabels: true
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentWidth: 4
IndentWrappedFunctionNames: false
InsertBraces: true # NOTE: may lead to incorrect formatting
InsertNewlineAtEOF: true
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
LambdaBodyIndentation: Signature
LineEnding: LF
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Auto
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PPIndentWidth: -1
PackConstructorInitializers: CurrentLine
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Middle
QualifierAlignment: Left
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
ReferenceAlignment: Middle
ReflowComments: false # IndentOnly
SeparateDefinitionBlocks: Always
SortIncludes: CaseInsensitive
SortUsingDeclarations: LexicographicNumeric
SpaceAfterCStyleCast: true
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: Never
SpacesInContainerLiterals: true
SpacesInLineCommentPrefix:
Minimum: 1
Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: c++17
TabWidth: 4
UseTab: Never
WhitespaceSensitiveMacros: ['STRINGIZE']
...

View file

@ -3,12 +3,60 @@ set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@) set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@) set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
set(GGML_STATIC @GGML_STATIC@)
set(GGML_NATIVE @GGML_NATIVE@)
set(GGML_LTO @GGML_LTO@)
set(GGML_CCACHE @GGML_CCACHE@)
set(GGML_AVX @GGML_AVX@)
set(GGML_AVX2 @GGML_AVX2@)
set(GGML_AVX512 @GGML_AVX512@)
set(GGML_AVX512_VBMI @GGML_AVX512_VBMI@)
set(GGML_AVX512_VNNI @GGML_AVX512_VNNI@)
set(GGML_AVX512_BF16 @GGML_AVX512_BF16@)
set(GGML_AMX_TILE @GGML_AMX_TILE@)
set(GGML_AMX_INT8 @GGML_AMX_INT8@)
set(GGML_AMX_BF16 @GGML_AMX_BF16@)
set(GGML_FMA @GGML_FMA@)
set(GGML_LASX @GGML_LASX@)
set(GGML_LSX @GGML_LSX@)
set(GGML_RVV @GGML_RVV@)
set(GGML_SVE @GGML_SVE@)
set(GGML_ACCELERATE @GGML_ACCELERATE@) set(GGML_ACCELERATE @GGML_ACCELERATE@)
set(GGML_OPENMP @GGML_OPENMP@)
set(GGML_CPU_HBM @GGML_CPU_HBM@)
set(GGML_BLAS_VENDOR @GGML_BLAS_VENDOR@)
set(GGML_CUDA_FORCE_MMQ @GGML_CUDA_FORCE_MMQ@)
set(GGML_CUDA_FORCE_CUBLAS @GGML_CUDA_FORCE_CUBLAS@)
set(GGML_CUDA_F16 @GGML_CUDA_F16@)
set(GGML_CUDA_PEER_MAX_BATCH_SIZE @GGML_CUDA_PEER_MAX_BATCH_SIZE@)
set(GGML_CUDA_NO_PEER_COPY @GGML_CUDA_NO_PEER_COPY@)
set(GGML_CUDA_NO_VMM @GGML_CUDA_NO_VMM@)
set(GGML_CUDA_FA_ALL_QUANTS @GGML_CUDA_FA_ALL_QUANTS@)
set(GGML_CUDA_GRAPHS @GGML_CUDA_GRAPHS@)
set(GGML_HIP_UMA @GGML_HIP_UMA@)
set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@) set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@)
set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@) set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@)
set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@) set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@)
set(GGML_VULKAN_SHADER_DEBUG_INFO @GGML_VULKAN_SHADER_DEBUG_INFO@)
set(GGML_VULKAN_PERF @GGML_VULKAN_PERF@)
set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@) set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@)
set(GGML_OPENMP @GGML_OPENMP@) set(GGML_VULKAN_RUN_TESTS @GGML_VULKAN_RUN_TESTS@)
set(GGML_METAL_USE_BF16 @GGML_METAL_USE_BF16@)
set(GGML_METAL_NDEBUG @GGML_METAL_NDEBUG@)
set(GGML_METAL_SHADER_DEBUG @GGML_METAL_SHADER_DEBUG@)
set(GGML_METAL_EMBED_LIBRARY @GGML_METAL_EMBED_LIBRARY@)
set(GGML_METAL_MACOSX_VERSION_MIN @GGML_METAL_MACOSX_VERSION_MIN@)
set(GGML_METAL_STD @GGML_METAL_STD@)
set(GGML_SYCL_F16 @GGML_SYCL_F16@)
set(GGML_SYCL_TARGET @GGML_SYCL_TARGET@)
set(GGML_SYCL_DEVICE_ARCH @GGML_SYCL_DEVICE_ARCH@)
@PACKAGE_INIT@ @PACKAGE_INIT@
@ -20,6 +68,7 @@ find_package(Threads REQUIRED)
set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@") set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@")
set(_llama_link_deps "") set(_llama_link_deps "")
set(_llama_link_opts "")
foreach(_ggml_lib ggml ggml-base) foreach(_ggml_lib ggml ggml-base)
string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY") string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY")
find_library(${_ggml_lib_var} ${_ggml_lib} find_library(${_ggml_lib_var} ${_ggml_lib}
@ -49,12 +98,26 @@ foreach(backend amx blas cann cpu cuda hip kompute metal musa rpc sycl vulkan)
endif() endif()
endforeach() endforeach()
if (NOT LLAMA_SHARED_LIB)
if (APPLE AND GGML_ACCELERATE) if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED) find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
list(APPEND _llama_link_deps ${ACCELERATE_FRAMEWORK})
endif()
if (GGML_OPENMP)
find_package(OpenMP REQUIRED)
list(APPEND _llama_link_deps OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
endif()
if (GGML_CPU_HBM)
find_library(memkind memkind REQUIRED)
list(APPEND _llama_link_deps memkind)
endif() endif()
if (GGML_BLAS) if (GGML_BLAS)
find_package(BLAS REQUIRED) find_package(BLAS REQUIRED)
list(APPEND _llama_link_deps ${BLAS_LIBRARIES})
list(APPEND _llama_link_opts ${BLAS_LINKER_FLAGS})
endif() endif()
if (GGML_CUDA) if (GGML_CUDA)
@ -65,25 +128,33 @@ if (GGML_METAL)
find_library(FOUNDATION_LIBRARY Foundation REQUIRED) find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED) find_library(METAL_FRAMEWORK Metal REQUIRED)
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
list(APPEND _llama_link_deps ${FOUNDATION_LIBRARY}
${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
endif() endif()
if (GGML_VULKAN) if (GGML_VULKAN)
find_package(Vulkan REQUIRED) find_package(Vulkan REQUIRED)
list(APPEND _llama_link_deps Vulkan::Vulkan)
endif() endif()
if (GGML_HIP) if (GGML_HIP)
find_package(hip REQUIRED) find_package(hip REQUIRED)
find_package(hipblas REQUIRED) find_package(hipblas REQUIRED)
find_package(rocblas REQUIRED) find_package(rocblas REQUIRED)
list(APPEND _llama_link_deps hip::host roc::rocblas roc::hipblas)
endif() endif()
if (GGML_SYCL) if (GGML_SYCL)
find_package(DNNL)
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
list(APPEND _llama_link_deps DNNL::dnnl)
endif()
if (WIN32)
find_package(IntelSYCL REQUIRED) find_package(IntelSYCL REQUIRED)
find_package(MKL REQUIRED) find_package(MKL REQUIRED)
list(APPEND _llama_link_deps IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
endif()
endif() endif()
if (GGML_OPENMP)
find_package(OpenMP REQUIRED)
endif() endif()
find_library(llama_LIBRARY llama find_library(llama_LIBRARY llama
@ -97,6 +168,7 @@ set_target_properties(llama
PROPERTIES PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}" INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}" INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
INTERFACE_LINK_OPTIONS "${_llama_link_opts}"
INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}" INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
IMPORTED_LOCATION "${llama_LIBRARY}" IMPORTED_LOCATION "${llama_LIBRARY}"

View file

@ -6,21 +6,21 @@
#include <clocale> #include <clocale>
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
#include <cstdlib>
#include <cstring> #include <cstring>
#include <ctime> #include <ctime>
#include <cstdlib>
#include <iterator> #include <iterator>
#include <map> #include <map>
#include <numeric> #include <numeric>
#include <regex> #include <regex>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector>
#include <thread> #include <thread>
#include <vector>
#include "common.h"
#include "ggml.h" #include "ggml.h"
#include "llama.h" #include "llama.h"
#include "common.h"
#ifdef _WIN32 #ifdef _WIN32
# define WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN
@ -36,8 +36,7 @@ static uint64_t get_time_ns() {
return std::chrono::nanoseconds(clock::now().time_since_epoch()).count(); return std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
} }
template<class T> template <class T> static std::string join(const std::vector<T> & values, const std::string & delim) {
static std::string join(const std::vector<T> & values, const std::string & delim) {
std::ostringstream str; std::ostringstream str;
for (size_t i = 0; i < values.size(); i++) { for (size_t i = 0; i < values.size(); i++) {
str << values[i]; str << values[i];
@ -48,15 +47,13 @@ static std::string join(const std::vector<T> & values, const std::string & delim
return str.str(); return str.str();
} }
template<typename T, typename F> template <typename T, typename F> static std::vector<std::string> transform_to_str(const std::vector<T> & values, F f) {
static std::vector<std::string> transform_to_str(const std::vector<T> & values, F f) {
std::vector<std::string> str_values; std::vector<std::string> str_values;
std::transform(values.begin(), values.end(), std::back_inserter(str_values), f); std::transform(values.begin(), values.end(), std::back_inserter(str_values), f);
return str_values; return str_values;
} }
template<typename T> template <typename T> static T avg(const std::vector<T> & v) {
static T avg(const std::vector<T> & v) {
if (v.empty()) { if (v.empty()) {
return 0; return 0;
} }
@ -64,8 +61,7 @@ static T avg(const std::vector<T> & v) {
return sum / (T) v.size(); return sum / (T) v.size();
} }
template<typename T> template <typename T> static T stdev(const std::vector<T> & v) {
static T stdev(const std::vector<T> & v) {
if (v.size() <= 1) { if (v.size() <= 1) {
return 0; return 0;
} }
@ -104,13 +100,20 @@ enum output_formats {NONE, CSV, JSON, JSONL, MARKDOWN, SQL};
static const char * output_format_str(output_formats format) { static const char * output_format_str(output_formats format) {
switch (format) { switch (format) {
case NONE: return "none"; case NONE:
case CSV: return "csv"; return "none";
case JSON: return "json"; case CSV:
case JSONL: return "jsonl"; return "csv";
case MARKDOWN: return "md"; case JSON:
case SQL: return "sql"; return "json";
default: GGML_ABORT("invalid output format"); case JSONL:
return "jsonl";
case MARKDOWN:
return "md";
case SQL:
return "sql";
default:
GGML_ABORT("invalid output format");
} }
} }
@ -135,10 +138,14 @@ static bool output_format_from_str(const std::string & s, output_formats & forma
static const char * split_mode_str(llama_split_mode mode) { static const char * split_mode_str(llama_split_mode mode) {
switch (mode) { switch (mode) {
case LLAMA_SPLIT_MODE_NONE: return "none"; case LLAMA_SPLIT_MODE_NONE:
case LLAMA_SPLIT_MODE_LAYER: return "layer"; return "none";
case LLAMA_SPLIT_MODE_ROW: return "row"; case LLAMA_SPLIT_MODE_LAYER:
default: GGML_ABORT("invalid split mode"); return "layer";
case LLAMA_SPLIT_MODE_ROW:
return "row";
default:
GGML_ABORT("invalid split mode");
} }
} }
@ -218,38 +225,59 @@ static void print_usage(int /* argc */, char ** argv) {
printf("options:\n"); printf("options:\n");
printf(" -h, --help\n"); printf(" -h, --help\n");
printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str()); printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str()); printf(" -p, --n-prompt <n> (default: %s)\n",
join(cmd_params_defaults.n_prompt, ",").c_str());
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str()); printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
printf(" -pg <pp,tg> (default: %s)\n", join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str()); printf(" -pg <pp,tg> (default: %s)\n",
printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str()); join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
printf(" -ub, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str()); printf(" -b, --batch-size <n> (default: %s)\n",
printf(" -ctk, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str()); join(cmd_params_defaults.n_batch, ",").c_str());
printf(" -ctv, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str()); printf(" -ub, --ubatch-size <n> (default: %s)\n",
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str()); join(cmd_params_defaults.n_ubatch, ",").c_str());
printf(" -C, --cpu-mask <hex,hex> (default: %s)\n", join(cmd_params_defaults.cpu_mask, ",").c_str()); printf(" -ctk, --cache-type-k <t> (default: %s)\n",
printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str()); join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
printf(" -ctv, --cache-type-v <t> (default: %s)\n",
join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
printf(" -t, --threads <n> (default: %s)\n",
join(cmd_params_defaults.n_threads, ",").c_str());
printf(" -C, --cpu-mask <hex,hex> (default: %s)\n",
join(cmd_params_defaults.cpu_mask, ",").c_str());
printf(" --cpu-strict <0|1> (default: %s)\n",
join(cmd_params_defaults.cpu_strict, ",").c_str());
printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str()); printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str()); printf(" -ngl, --n-gpu-layers <n> (default: %s)\n",
join(cmd_params_defaults.n_gpu_layers, ",").c_str());
if (llama_supports_rpc()) { if (llama_supports_rpc()) {
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str()); printf(" -rpc, --rpc <rpc_servers> (default: %s)\n",
join(cmd_params_defaults.rpc_servers, ",").c_str());
} }
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str()); printf(" -sm, --split-mode <none|layer|row> (default: %s)\n",
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str()); join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str()); printf(" -mg, --main-gpu <i> (default: %s)\n",
printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str()); join(cmd_params_defaults.main_gpu, ",").c_str());
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str()); printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n",
join(cmd_params_defaults.no_kv_offload, ",").c_str());
printf(" -fa, --flash-attn <0|1> (default: %s)\n",
join(cmd_params_defaults.flash_attn, ",").c_str());
printf(" -mmp, --mmap <0|1> (default: %s)\n",
join(cmd_params_defaults.use_mmap, ",").c_str());
printf(" --numa <distribute|isolate|numactl> (default: disabled)\n"); printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str()); printf(" -embd, --embeddings <0|1> (default: %s)\n",
join(cmd_params_defaults.embeddings, ",").c_str());
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n"); printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps); printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio); printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay); printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n",
printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr)); output_format_str(cmd_params_defaults.output_format));
printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n",
output_format_str(cmd_params_defaults.output_format_stderr));
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0"); printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
printf(" --progress (default: %s)\n", cmd_params_defaults.progress ? "1" : "0"); printf(" --progress (default: %s)\n", cmd_params_defaults.progress ? "1" : "0");
printf("\n"); printf("\n");
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n"); printf(
"Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter "
"multiple times.\n");
} }
static ggml_type ggml_type_from_name(const std::string & s) { static ggml_type ggml_type_from_name(const std::string & s) {
@ -281,7 +309,6 @@ static ggml_type ggml_type_from_name(const std::string & s) {
return GGML_TYPE_COUNT; return GGML_TYPE_COUNT;
} }
static cmd_params parse_cmd_params(int argc, char ** argv) { static cmd_params parse_cmd_params(int argc, char ** argv) {
cmd_params params; cmd_params params;
std::string arg; std::string arg;
@ -476,10 +503,16 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
break; break;
} else { } else {
std::string value(argv[i]); std::string value(argv[i]);
/**/ if (value == "distribute" || value == "" ) { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; } /**/ if (value == "distribute" || value == "") {
else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; } params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE;
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; } } else if (value == "isolate") {
else { invalid_param = true; break; } params.numa = GGML_NUMA_STRATEGY_ISOLATE;
} else if (value == "numactl") {
params.numa = GGML_NUMA_STRATEGY_NUMACTL;
} else {
invalid_param = true;
break;
}
} }
} else if (arg == "-fa" || arg == "--flash-attn") { } else if (arg == "-fa" || arg == "--flash-attn") {
if (++i >= argc) { if (++i >= argc) {
@ -570,27 +603,69 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
} }
// set defaults // set defaults
if (params.model.empty()) { params.model = cmd_params_defaults.model; } if (params.model.empty()) {
if (params.n_prompt.empty()) { params.n_prompt = cmd_params_defaults.n_prompt; } params.model = cmd_params_defaults.model;
if (params.n_gen.empty()) { params.n_gen = cmd_params_defaults.n_gen; } }
if (params.n_pg.empty()) { params.n_pg = cmd_params_defaults.n_pg; } if (params.n_prompt.empty()) {
if (params.n_batch.empty()) { params.n_batch = cmd_params_defaults.n_batch; } params.n_prompt = cmd_params_defaults.n_prompt;
if (params.n_ubatch.empty()) { params.n_ubatch = cmd_params_defaults.n_ubatch; } }
if (params.type_k.empty()) { params.type_k = cmd_params_defaults.type_k; } if (params.n_gen.empty()) {
if (params.type_v.empty()) { params.type_v = cmd_params_defaults.type_v; } params.n_gen = cmd_params_defaults.n_gen;
if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; } }
if (params.rpc_servers.empty()) { params.rpc_servers = cmd_params_defaults.rpc_servers; } if (params.n_pg.empty()) {
if (params.split_mode.empty()) { params.split_mode = cmd_params_defaults.split_mode; } params.n_pg = cmd_params_defaults.n_pg;
if (params.main_gpu.empty()) { params.main_gpu = cmd_params_defaults.main_gpu; } }
if (params.no_kv_offload.empty()){ params.no_kv_offload = cmd_params_defaults.no_kv_offload; } if (params.n_batch.empty()) {
if (params.flash_attn.empty()) { params.flash_attn = cmd_params_defaults.flash_attn; } params.n_batch = cmd_params_defaults.n_batch;
if (params.tensor_split.empty()) { params.tensor_split = cmd_params_defaults.tensor_split; } }
if (params.use_mmap.empty()) { params.use_mmap = cmd_params_defaults.use_mmap; } if (params.n_ubatch.empty()) {
if (params.embeddings.empty()) { params.embeddings = cmd_params_defaults.embeddings; } params.n_ubatch = cmd_params_defaults.n_ubatch;
if (params.n_threads.empty()) { params.n_threads = cmd_params_defaults.n_threads; } }
if (params.cpu_mask.empty()) { params.cpu_mask = cmd_params_defaults.cpu_mask; } if (params.type_k.empty()) {
if (params.cpu_strict.empty()) { params.cpu_strict = cmd_params_defaults.cpu_strict; } params.type_k = cmd_params_defaults.type_k;
if (params.poll.empty()) { params.poll = cmd_params_defaults.poll; } }
if (params.type_v.empty()) {
params.type_v = cmd_params_defaults.type_v;
}
if (params.n_gpu_layers.empty()) {
params.n_gpu_layers = cmd_params_defaults.n_gpu_layers;
}
if (params.rpc_servers.empty()) {
params.rpc_servers = cmd_params_defaults.rpc_servers;
}
if (params.split_mode.empty()) {
params.split_mode = cmd_params_defaults.split_mode;
}
if (params.main_gpu.empty()) {
params.main_gpu = cmd_params_defaults.main_gpu;
}
if (params.no_kv_offload.empty()) {
params.no_kv_offload = cmd_params_defaults.no_kv_offload;
}
if (params.flash_attn.empty()) {
params.flash_attn = cmd_params_defaults.flash_attn;
}
if (params.tensor_split.empty()) {
params.tensor_split = cmd_params_defaults.tensor_split;
}
if (params.use_mmap.empty()) {
params.use_mmap = cmd_params_defaults.use_mmap;
}
if (params.embeddings.empty()) {
params.embeddings = cmd_params_defaults.embeddings;
}
if (params.n_threads.empty()) {
params.n_threads = cmd_params_defaults.n_threads;
}
if (params.cpu_mask.empty()) {
params.cpu_mask = cmd_params_defaults.cpu_mask;
}
if (params.cpu_strict.empty()) {
params.cpu_strict = cmd_params_defaults.cpu_strict;
}
if (params.poll.empty()) {
params.poll = cmd_params_defaults.poll;
}
return params; return params;
} }
@ -633,12 +708,8 @@ struct cmd_params_instance {
} }
bool equal_mparams(const cmd_params_instance & other) const { bool equal_mparams(const cmd_params_instance & other) const {
return model == other.model && return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers == other.rpc_servers &&
n_gpu_layers == other.n_gpu_layers && split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap &&
rpc_servers == other.rpc_servers &&
split_mode == other.split_mode &&
main_gpu == other.main_gpu &&
use_mmap == other.use_mmap &&
tensor_split == other.tensor_split; tensor_split == other.tensor_split;
} }
@ -662,6 +733,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
std::vector<cmd_params_instance> instances; std::vector<cmd_params_instance> instances;
// this ordering minimizes the number of times that each model needs to be reloaded // this ordering minimizes the number of times that each model needs to be reloaded
// clang-format off
for (const auto & m : params.model) for (const auto & m : params.model)
for (const auto & nl : params.n_gpu_layers) for (const auto & nl : params.n_gpu_layers)
for (const auto & rpc : params.rpc_servers) for (const auto & rpc : params.rpc_servers)
@ -767,6 +839,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
instances.push_back(instance); instances.push_back(instance);
} }
} }
// clang-format on
return instances; return instances;
} }
@ -834,28 +907,21 @@ struct test {
(void) ctx; (void) ctx;
} }
uint64_t avg_ns() const { uint64_t avg_ns() const { return ::avg(samples_ns); }
return ::avg(samples_ns);
}
uint64_t stdev_ns() const { uint64_t stdev_ns() const { return ::stdev(samples_ns); }
return ::stdev(samples_ns);
}
std::vector<double> get_ts() const { std::vector<double> get_ts() const {
int n_tokens = n_prompt + n_gen; int n_tokens = n_prompt + n_gen;
std::vector<double> ts; std::vector<double> ts;
std::transform(samples_ns.begin(), samples_ns.end(), std::back_inserter(ts), [n_tokens](uint64_t t) { return 1e9 * n_tokens / t; }); std::transform(samples_ns.begin(), samples_ns.end(), std::back_inserter(ts),
[n_tokens](uint64_t t) { return 1e9 * n_tokens / t; });
return ts; return ts;
} }
double avg_ts() const { double avg_ts() const { return ::avg(get_ts()); }
return ::avg(get_ts());
}
double stdev_ts() const { double stdev_ts() const { return ::stdev(get_ts()); }
return ::stdev(get_ts());
}
static std::string get_backend() { static std::string get_backend() {
std::vector<std::string> backends; std::vector<std::string> backends;
@ -871,17 +937,11 @@ struct test {
static const std::vector<std::string> & get_fields() { static const std::vector<std::string> & get_fields() {
static const std::vector<std::string> fields = { static const std::vector<std::string> fields = {
"build_commit", "build_number", "build_commit", "build_number", "cpu_info", "gpu_info", "backends", "model_filename",
"cpu_info", "gpu_info", "backends", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads",
"model_filename", "model_type", "model_size", "model_n_params", "cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers",
"n_batch", "n_ubatch", "split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "use_mmap",
"n_threads", "cpu_mask", "cpu_strict", "poll", "embeddings", "n_prompt", "n_gen", "test_time", "avg_ns", "stddev_ns",
"type_k", "type_v",
"n_gpu_layers", "split_mode",
"main_gpu", "no_kv_offload", "flash_attn",
"tensor_split", "use_mmap", "embeddings",
"n_prompt", "n_gen", "test_time",
"avg_ns", "stddev_ns",
"avg_ts", "stddev_ts", "avg_ts", "stddev_ts",
}; };
return fields; return fields;
@ -890,17 +950,14 @@ struct test {
enum field_type { STRING, BOOL, INT, FLOAT }; enum field_type { STRING, BOOL, INT, FLOAT };
static field_type get_field_type(const std::string & field) { static field_type get_field_type(const std::string & field) {
if (field == "build_number" || field == "n_batch" || field == "n_ubatch" || if (field == "build_number" || field == "n_batch" || field == "n_ubatch" || field == "n_threads" ||
field == "n_threads" || field == "poll" || field == "poll" || field == "model_size" || field == "model_n_params" || field == "n_gpu_layers" ||
field == "model_size" || field == "model_n_params" || field == "main_gpu" || field == "n_prompt" || field == "n_gen" || field == "avg_ns" ||
field == "n_gpu_layers" || field == "main_gpu" || field == "stddev_ns") {
field == "n_prompt" || field == "n_gen" ||
field == "avg_ns" || field == "stddev_ns") {
return INT; return INT;
} }
if (field == "f16_kv" || field == "no_kv_offload" || if (field == "f16_kv" || field == "no_kv_offload" || field == "cpu_strict" || field == "flash_attn" ||
field == "cpu_strict" || field == "use_mmap" || field == "embeddings") {
field == "flash_attn" || field == "use_mmap" || field == "embeddings") {
return BOOL; return BOOL;
} }
if (field == "avg_ts" || field == "stddev_ts") { if (field == "avg_ts" || field == "stddev_ts") {
@ -925,20 +982,38 @@ struct test {
tensor_split_str += "/"; tensor_split_str += "/";
} }
} }
std::vector<std::string> values = { std::vector<std::string> values = { build_commit,
build_commit, std::to_string(build_number), std::to_string(build_number),
cpu_info, gpu_info, get_backend(), cpu_info,
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params), gpu_info,
std::to_string(n_batch), std::to_string(n_ubatch), get_backend(),
std::to_string(n_threads), cpu_mask, std::to_string(cpu_strict), std::to_string(poll), model_filename,
ggml_type_name(type_k), ggml_type_name(type_v), model_type,
std::to_string(n_gpu_layers), split_mode_str(split_mode), std::to_string(model_size),
std::to_string(main_gpu), std::to_string(no_kv_offload), std::to_string(flash_attn), std::to_string(model_n_params),
tensor_split_str, std::to_string(use_mmap), std::to_string(embeddings), std::to_string(n_batch),
std::to_string(n_prompt), std::to_string(n_gen), test_time, std::to_string(n_ubatch),
std::to_string(avg_ns()), std::to_string(stdev_ns()), std::to_string(n_threads),
std::to_string(avg_ts()), std::to_string(stdev_ts()) cpu_mask,
}; std::to_string(cpu_strict),
std::to_string(poll),
ggml_type_name(type_k),
ggml_type_name(type_v),
std::to_string(n_gpu_layers),
split_mode_str(split_mode),
std::to_string(main_gpu),
std::to_string(no_kv_offload),
std::to_string(flash_attn),
tensor_split_str,
std::to_string(use_mmap),
std::to_string(embeddings),
std::to_string(n_prompt),
std::to_string(n_gen),
test_time,
std::to_string(avg_ns()),
std::to_string(stdev_ns()),
std::to_string(avg_ts()),
std::to_string(stdev_ts()) };
return values; return values;
} }
@ -946,8 +1021,8 @@ struct test {
std::map<std::string, std::string> map; std::map<std::string, std::string> map;
auto fields = get_fields(); auto fields = get_fields();
auto values = get_values(); auto values = get_values();
std::transform(fields.begin(), fields.end(), values.begin(), std::transform(fields.begin(), fields.end(), values.begin(), std::inserter(map, map.end()),
std::inserter(map, map.end()), std::make_pair<const std::string &, const std::string &>); std::make_pair<const std::string &, const std::string &>);
return map; return map;
} }
}; };
@ -961,8 +1036,11 @@ struct printer {
virtual ~printer() {} virtual ~printer() {}
FILE * fout; FILE * fout;
virtual void print_header(const cmd_params & params) { (void) params; } virtual void print_header(const cmd_params & params) { (void) params; }
virtual void print_test(const test & t) = 0; virtual void print_test(const test & t) = 0;
virtual void print_footer() {} virtual void print_footer() {}
}; };
@ -992,7 +1070,6 @@ struct csv_printer : public printer {
} }
}; };
static std::string escape_json(const std::string & value) { static std::string escape_json(const std::string & value) {
std::string escaped; std::string escaped;
for (auto c : value) { for (auto c : value) {
@ -1033,7 +1110,8 @@ struct json_printer : public printer {
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) { void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
assert(fields.size() == values.size()); assert(fields.size() == values.size());
for (size_t i = 0; i < fields.size(); i++) { for (size_t i = 0; i < fields.size(); i++) {
fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str()); fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(),
format_json_value(fields.at(i), values.at(i)).c_str());
} }
} }
@ -1051,12 +1129,9 @@ struct json_printer : public printer {
fflush(fout); fflush(fout);
} }
void print_footer() override { void print_footer() override { fprintf(fout, "\n]\n"); }
fprintf(fout, "\n]\n");
}
}; };
struct jsonl_printer : public printer { struct jsonl_printer : public printer {
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) { void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
assert(fields.size() == values.size()); assert(fields.size() == values.size());
@ -1303,7 +1378,8 @@ struct sql_printer : public printer {
std::vector<std::string> fields = test::get_fields(); std::vector<std::string> fields = test::get_fields();
fprintf(fout, "CREATE TABLE IF NOT EXISTS test (\n"); fprintf(fout, "CREATE TABLE IF NOT EXISTS test (\n");
for (size_t i = 0; i < fields.size(); i++) { for (size_t i = 0; i < fields.size(); i++) {
fprintf(fout, " %s %s%s\n", fields.at(i).c_str(), get_sql_field_type(fields.at(i)).c_str(), i < fields.size() - 1 ? "," : ""); fprintf(fout, " %s %s%s\n", fields.at(i).c_str(), get_sql_field_type(fields.at(i)).c_str(),
i < fields.size() - 1 ? "," : "");
} }
fprintf(fout, ");\n"); fprintf(fout, ");\n");
fprintf(fout, "\n"); fprintf(fout, "\n");
@ -1505,13 +1581,15 @@ int main(int argc, char ** argv) {
if (t.n_prompt > 0) { if (t.n_prompt > 0) {
if (params.progress) { if (params.progress) {
fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count, i + 1, params.reps); fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count,
i + 1, params.reps);
} }
test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads); test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads);
} }
if (t.n_gen > 0) { if (t.n_gen > 0) {
if (params.progress) { if (params.progress) {
fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count, i + 1, params.reps); fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count,
i + 1, params.reps);
} }
test_gen(ctx, t.n_gen, t.n_threads); test_gen(ctx, t.n_gen, t.n_threads);
} }

View file

@ -252,6 +252,7 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
} }
void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
GGML_ASSERT(tensor);
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
if (size == 0) { if (size == 0) {
@ -266,6 +267,7 @@ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, siz
} }
void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
GGML_ASSERT(tensor);
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
if (size == 0) { if (size == 0) {
@ -884,9 +886,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
for (int i = 0; i < graph->n_nodes; i++) { for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
int * node_backend_id = &tensor_backend_id(node); int * node_backend_id = &tensor_backend_id(node);
if (ggml_is_view_op(node->op)) {
continue;
}
// do not overwrite user assignments // do not overwrite user assignments
if (*node_backend_id == -1) { if (*node_backend_id == -1) {
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node); *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);

View file

@ -295,6 +295,9 @@ struct ggml_cgraph {
enum ggml_cgraph_eval_order order; enum ggml_cgraph_eval_order order;
}; };
// returns a slice of cgraph with nodes [i0, i1)
// the slice does not have leafs or gradients
// if you need the gradients, get them from the original graph
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
// Memory allocation // Memory allocation

View file

@ -14,51 +14,51 @@
#include <vector> #include <vector>
struct ggml_opt_dataset { struct ggml_opt_dataset {
struct ggml_context * ctx; struct ggml_context * ctx = nullptr;
ggml_backend_buffer_t buf; ggml_backend_buffer_t buf = nullptr;
struct ggml_tensor * data; struct ggml_tensor * data = nullptr;
struct ggml_tensor * labels; struct ggml_tensor * labels = nullptr;
int64_t ndata; int64_t ndata = -1;
int64_t ndata_shard; int64_t ndata_shard = -1;
size_t nbs_data; size_t nbs_data = -1;
size_t nbs_labels; size_t nbs_labels = -1;
std::vector<int64_t> permutation; std::vector<int64_t> permutation;
}; };
struct ggml_opt_context { struct ggml_opt_context {
ggml_backend_sched_t backend_sched; ggml_backend_sched_t backend_sched = nullptr;
ggml_cgraph * allocated_graph; ggml_cgraph * allocated_graph = nullptr;
ggml_cgraph * allocated_graph_copy; ggml_cgraph * allocated_graph_copy = nullptr;
struct ggml_context * ctx_static; struct ggml_context * ctx_static = nullptr;
struct ggml_context * ctx_static_cpu; struct ggml_context * ctx_static_cpu = nullptr;
struct ggml_context * ctx_compute; struct ggml_context * ctx_compute = nullptr;
struct ggml_context * ctx_copy; struct ggml_context * ctx_copy = nullptr;
ggml_backend_buffer_t buf_static; ggml_backend_buffer_t buf_static = nullptr;
ggml_backend_buffer_t buf_static_cpu; ggml_backend_buffer_t buf_static_cpu = nullptr;
std::mt19937 rng; std::mt19937 rng;
struct ggml_tensor * inputs; struct ggml_tensor * inputs = nullptr;
struct ggml_tensor * outputs; struct ggml_tensor * outputs = nullptr;
struct ggml_tensor * labels; struct ggml_tensor * labels = nullptr;
struct ggml_tensor * loss; struct ggml_tensor * loss = nullptr;
struct ggml_tensor * pred; struct ggml_tensor * pred = nullptr;
struct ggml_tensor * ncorrect; struct ggml_tensor * ncorrect = nullptr;
struct ggml_cgraph * gf; struct ggml_cgraph * gf = nullptr;
struct ggml_cgraph * gb_grad; struct ggml_cgraph * gb_grad = nullptr;
struct ggml_cgraph * gb_opt; struct ggml_cgraph * gb_opt = nullptr;
int64_t iter; int64_t iter = 1;
int32_t opt_period; int32_t opt_period = 1;
int32_t opt_i; int32_t opt_i = 0;
bool loss_per_datapoint; bool loss_per_datapoint = false;
ggml_opt_get_optimizer_params get_opt_pars; ggml_opt_get_optimizer_params get_opt_pars = nullptr;
void * get_opt_pars_ud; void * get_opt_pars_ud = nullptr;
struct ggml_tensor * adamw_params; struct ggml_tensor * adamw_params = nullptr;
}; };
struct ggml_opt_result { struct ggml_opt_result {
@ -67,8 +67,8 @@ struct ggml_opt_result {
std::vector<int32_t> pred; std::vector<int32_t> pred;
int64_t ncorrect = 0; int64_t ncorrect = 0;
bool loss_per_datapoint = false;
int64_t opt_period = -1; int64_t opt_period = -1;
bool loss_per_datapoint = false;
}; };
// ====== Dataset ====== // ====== Dataset ======
@ -237,25 +237,33 @@ static ggml_tensor * map_tensor(std::map<ggml_tensor *, ggml_tensor *> & tensor_
return new_tensor; return new_tensor;
} }
static ggml_cgraph * dup_graph(ggml_context * ctx, ggml_cgraph * graph) { static ggml_cgraph * dup_graph(ggml_context * ctx, ggml_cgraph * src) {
std::map<ggml_tensor *, ggml_tensor *> tensor_map; std::map<ggml_tensor *, ggml_tensor *> tensor_map;
ggml_cgraph * new_graph = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, /*grads =*/ true); ggml_cgraph * dst = ggml_new_graph_custom(ctx, src->size, /*grads =*/ true);
for (int i = 0; i < graph->n_leafs; i++) { for (int i = 0; i < src->n_leafs; i++) {
ggml_build_forward_expand(new_graph, map_tensor(tensor_map, ctx, graph->leafs[i])); ggml_build_forward_expand(dst, map_tensor(tensor_map, ctx, src->leafs[i]));
} }
for (int i = 0; i < graph->n_nodes; i++) { GGML_ASSERT(dst->n_leafs == src->n_leafs);
ggml_build_forward_expand(new_graph, map_tensor(tensor_map, ctx, graph->nodes[i])); for (int i = 0; i < src->n_nodes; i++) {
ggml_build_forward_expand(dst, map_tensor(tensor_map, ctx, src->nodes[i]));
} }
for (int i = 0; i < graph->n_nodes; ++i) { GGML_ASSERT(dst->n_nodes == src->n_nodes);
const size_t igrad_src = ggml_hash_find(&graph->visited_hash_set, graph->nodes[i]); for (int i = 0; i < src->n_nodes; ++i) {
const size_t igrad_dst = ggml_hash_find(&new_graph->visited_hash_set, new_graph->nodes[i]); const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
graph->grads[igrad_dst] = new_graph->grads[igrad_src]; const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
graph->grad_accs[igrad_dst] = new_graph->grad_accs[igrad_src];
GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
dst->grads[igrad_dst] = src->grads[igrad_src];
dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
} }
return new_graph; return dst;
} }
static void ggml_opt_alloc_graph(ggml_opt_context_t opt_ctx, ggml_cgraph * graph) { static void ggml_opt_alloc_graph(ggml_opt_context_t opt_ctx, ggml_cgraph * graph) {
@ -285,15 +293,10 @@ static void ggml_opt_alloc_graph(ggml_opt_context_t opt_ctx, ggml_cgraph * graph
ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) { ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
ggml_opt_context_t result = new struct ggml_opt_context; ggml_opt_context_t result = new struct ggml_opt_context;
result->backend_sched = params.backend_sched; result->backend_sched = params.backend_sched;
result->allocated_graph = nullptr;
result->allocated_graph_copy = nullptr;
result->ctx_compute = params.ctx_compute; result->ctx_compute = params.ctx_compute;
result->ctx_copy = nullptr;
result->inputs = params.inputs; result->inputs = params.inputs;
result->outputs = params.outputs; result->outputs = params.outputs;
result->iter = 1;
result->opt_period = params.opt_period; result->opt_period = params.opt_period;
result->opt_i = 0;
result->get_opt_pars = params.get_opt_pars; result->get_opt_pars = params.get_opt_pars;
result->get_opt_pars_ud = params.get_opt_pars_ud; result->get_opt_pars_ud = params.get_opt_pars_ud;
@ -348,7 +351,6 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
switch (params.loss_type) { switch (params.loss_type) {
case GGML_OPT_LOSS_TYPE_MEAN: { case GGML_OPT_LOSS_TYPE_MEAN: {
result->labels = nullptr;
result->loss = ggml_sum(result->ctx_static, result->outputs); result->loss = ggml_sum(result->ctx_static, result->outputs);
ggml_set_name(result->loss, "loss_sum"); ggml_set_name(result->loss, "loss_sum");
const float scale = 1.0f / (result->opt_period * ggml_nelements(result->outputs)); const float scale = 1.0f / (result->opt_period * ggml_nelements(result->outputs));
@ -358,7 +360,6 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
break; break;
} }
case GGML_OPT_LOSS_TYPE_SUM: { case GGML_OPT_LOSS_TYPE_SUM: {
result->labels = nullptr;
result->loss = ggml_sum(result->ctx_static, result->outputs); result->loss = ggml_sum(result->ctx_static, result->outputs);
ggml_set_name(result->loss, "loss_sum"); ggml_set_name(result->loss, "loss_sum");
result->loss_per_datapoint = false; result->loss_per_datapoint = false;
@ -413,14 +414,7 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
} }
if (params.build_type == GGML_OPT_BUILD_TYPE_FORWARD) { if (params.build_type == GGML_OPT_BUILD_TYPE_FORWARD) {
result->gb_grad = nullptr;
result->gb_opt = nullptr;
result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0)); result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0));
result->buf_static_cpu = nullptr;
ggml_opt_alloc_graph(result, result->gf);
return result; return result;
} }
@ -429,14 +423,8 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
ggml_build_backward_expand(result->ctx_static, result->ctx_compute, result->gb_grad, accumulate); ggml_build_backward_expand(result->ctx_static, result->ctx_compute, result->gb_grad, accumulate);
if (params.build_type == GGML_OPT_BUILD_TYPE_GRAD) { if (params.build_type == GGML_OPT_BUILD_TYPE_GRAD) {
result->gb_opt = nullptr;
result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0)); result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0));
result->buf_static_cpu = nullptr;
ggml_opt_alloc_graph(result, result->gb_grad);
ggml_graph_reset(result->gb_grad); ggml_graph_reset(result->gb_grad);
return result; return result;
} }
@ -466,7 +454,6 @@ ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) {
result->buf_static_cpu = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx_static_cpu, ggml_backend_cpu_buffer_type()); result->buf_static_cpu = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx_static_cpu, ggml_backend_cpu_buffer_type());
ggml_opt_alloc_graph(result, result->gb_opt);
ggml_graph_reset(result->gb_opt); ggml_graph_reset(result->gb_opt);
return result; return result;

View file

@ -73,7 +73,9 @@ void soft_max(uint num_iters) {
FLOAT_TYPE v = a * p.scale + slope * b; FLOAT_TYPE v = a * p.scale + slope * b;
if (col < p.KX) {
max_val = max(max_val, v); max_val = max(max_val, v);
}
if (idx < DATA_CACHE_SIZE) { if (idx < DATA_CACHE_SIZE) {
data_cache[idx] = v; data_cache[idx] = v;

View file

@ -5019,8 +5019,10 @@ static void ggml_hash_map_free(struct hash_map * map) {
} }
// utility functions to change gradients // utility functions to change gradients
// if a is in acc_table, modify gradients in-place and mark result as gradient accumulator // isrc is the index of tensor in cgraph->visited_has_set.keys
// else if a is in zero_table, replace a // the corresponding gradient (accumulators) are also at position isrc
// if tensor has a gradient accumulator, modify that accumulator in-place
// else if there is no gradient for tensor, set the corresponding value
// else, just add/subtract/etc. the gradients // else, just add/subtract/etc. the gradients
static void ggml_add_or_set( static void ggml_add_or_set(
@ -5028,11 +5030,14 @@ static void ggml_add_or_set(
struct ggml_cgraph * cgraph, struct ggml_cgraph * cgraph,
size_t isrc, size_t isrc,
struct ggml_tensor * tensor) { struct ggml_tensor * tensor) {
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
GGML_ASSERT(src);
if (cgraph->grads[isrc]) { if (cgraph->grads[isrc]) {
cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]); cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
} else { } else {
cgraph->grads[isrc] = tensor; cgraph->grads[isrc] = tensor;
} }
ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
} }
@ -5040,18 +5045,20 @@ static void ggml_acc_or_set(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_cgraph * cgraph, struct ggml_cgraph * cgraph,
size_t isrc, size_t isrc,
struct ggml_tensor * src,
struct ggml_tensor * tensor, struct ggml_tensor * tensor,
const size_t nb1, const size_t nb1,
const size_t nb2, const size_t nb2,
const size_t nb3, const size_t nb3,
const size_t offset) { const size_t offset) {
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
GGML_ASSERT(src);
if (cgraph->grads[isrc]) { if (cgraph->grads[isrc]) {
cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]); cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
} else { } else {
struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false); cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
} }
ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
} }
@ -5059,13 +5066,15 @@ static void ggml_add1_or_set(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_cgraph * cgraph, struct ggml_cgraph * cgraph,
size_t isrc, size_t isrc,
struct ggml_tensor * src,
struct ggml_tensor * tensor) { struct ggml_tensor * tensor) {
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
GGML_ASSERT(src);
if (cgraph->grads[isrc]) { if (cgraph->grads[isrc]) {
cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]); cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
} else { } else {
cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src); cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
} }
ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
} }
@ -5074,11 +5083,14 @@ static void ggml_sub_or_set(
struct ggml_cgraph * cgraph, struct ggml_cgraph * cgraph,
size_t isrc, size_t isrc,
struct ggml_tensor * tensor) { struct ggml_tensor * tensor) {
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
GGML_ASSERT(src);
if (cgraph->grads[isrc]) { if (cgraph->grads[isrc]) {
cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]); cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
} else { } else {
cgraph->grads[isrc] = ggml_neg(ctx, tensor); cgraph->grads[isrc] = ggml_neg(ctx, tensor);
} }
ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
} }
@ -5095,12 +5107,12 @@ static void ggml_compute_backward(
struct ggml_tensor * src1 = tensor->src[1]; struct ggml_tensor * src1 = tensor->src[1];
struct ggml_tensor * src2 = tensor->src[2]; struct ggml_tensor * src2 = tensor->src[2];
struct ggml_hash_set * hash_set = &cgraph->visited_hash_set; struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
const size_t isrc0 = ggml_hash_find(hash_set, src0); const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
const size_t isrc1 = ggml_hash_find(hash_set, src1); const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
const size_t isrc2 = ggml_hash_find(hash_set, src2); const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
const bool src0_needs_grads = isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0]; const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
const bool src1_needs_grads = isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1]; const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
const bool src2_needs_grads = isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2]; const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
switch (tensor->op) { switch (tensor->op) {
case GGML_OP_DUP: { case GGML_OP_DUP: {
@ -5200,7 +5212,7 @@ static void ggml_compute_backward(
} break; } break;
case GGML_OP_SUM: { case GGML_OP_SUM: {
if (src0_needs_grads) { if (src0_needs_grads) {
ggml_add1_or_set(ctx, cgraph, isrc0, src0, grad); ggml_add1_or_set(ctx, cgraph, isrc0, grad);
} }
} break; } break;
case GGML_OP_SUM_ROWS: { case GGML_OP_SUM_ROWS: {
@ -5210,7 +5222,7 @@ static void ggml_compute_backward(
} break; } break;
case GGML_OP_MEAN: { case GGML_OP_MEAN: {
if (src0_needs_grads) { if (src0_needs_grads) {
ggml_add1_or_set(ctx, cgraph, isrc0, src0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false)); ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false));
} }
} break; } break;
case GGML_OP_REPEAT: { case GGML_OP_REPEAT: {
@ -5363,7 +5375,7 @@ static void ggml_compute_backward(
nb3 = (nb3 / n0) * ng; nb3 = (nb3 / n0) * ng;
} }
ggml_acc_or_set(ctx, cgraph, isrc0, src0, grad, nb1, nb2, nb3, offset); ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
} }
} break; } break;
case GGML_OP_PERMUTE: { case GGML_OP_PERMUTE: {
@ -5597,10 +5609,9 @@ void ggml_build_backward_expand(
const int n_nodes_f = cgraph->n_nodes; const int n_nodes_f = cgraph->n_nodes;
const size_t hash_size = ggml_hash_size(2*cgraph->size); memset(cgraph->grads, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
memset(cgraph->grads, 0, hash_size*sizeof(struct ggml_tensor *)); memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *)); bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
bool * grads_needed = calloc(hash_size, sizeof(bool));
{ {
bool any_params = false; bool any_params = false;
@ -5621,7 +5632,7 @@ void ggml_build_backward_expand(
continue; continue;
} }
bool node_needs_grad = node->flags & GGML_TENSOR_FLAG_PARAM; bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
bool ignore_src[GGML_MAX_SRC] = {false}; bool ignore_src[GGML_MAX_SRC] = {false};
switch (node->op) { switch (node->op) {
// gradients in node->src[0] for one reason or another have no effect on output gradients // gradients in node->src[0] for one reason or another have no effect on output gradients
@ -5665,9 +5676,12 @@ void ggml_build_backward_expand(
node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE); node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node); const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
GGML_ASSERT(igrad != GGML_HASHSET_FULL);
GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, igrad));
if ((accumulate && (node->flags & GGML_TENSOR_FLAG_PARAM)) || (node->flags & GGML_TENSOR_FLAG_LOSS)) { if ((accumulate && (node->flags & GGML_TENSOR_FLAG_PARAM)) || (node->flags & GGML_TENSOR_FLAG_LOSS)) {
cgraph->grads[igrad] = ggml_dup_tensor(ctx_static, node); cgraph->grad_accs[igrad] = ggml_dup_tensor(ctx_static, node);
cgraph->grad_accs[igrad] = cgraph->grads[igrad]; cgraph->grads[igrad] = cgraph->grad_accs[igrad];
ggml_format_name(cgraph->grad_accs[igrad], "grad acc for %s", node->name);
} }
grads_needed[igrad] = true; grads_needed[igrad] = true;
} }
@ -5765,10 +5779,10 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
/*.n_nodes =*/ i1 - i0, /*.n_nodes =*/ i1 - i0,
/*.n_leafs =*/ 0, /*.n_leafs =*/ 0,
/*.nodes =*/ cgraph0->nodes + i0, /*.nodes =*/ cgraph0->nodes + i0,
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL, /*.grads =*/ NULL, // gradients would need visited_hash_set
/*.grad_accs =*/ cgraph0->grad_accs ? cgraph0->grad_accs + i0 : NULL, /*.grad_accs =*/ NULL,
/*.leafs =*/ NULL, /*.leafs =*/ NULL,
/*.hash_table =*/ { 0, NULL, NULL }, /*.visited_hash_set =*/ { 0, NULL, NULL },
/*.order =*/ cgraph0->order, /*.order =*/ cgraph0->order,
}; };
@ -5799,12 +5813,22 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
} }
} }
if (dst->grads) {
memset(dst->grads, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
}
if (src->grads) { if (src->grads) {
GGML_ASSERT(dst->grads != NULL); GGML_ASSERT(dst->grads != NULL);
GGML_ASSERT(dst->grad_accs != NULL); GGML_ASSERT(dst->grad_accs != NULL);
for (int i = 0; i < src->n_nodes; ++i) { for (int i = 0; i < src->n_nodes; ++i) {
const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]); const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]); const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
dst->grads[igrad_dst] = src->grads[igrad_src]; dst->grads[igrad_dst] = src->grads[igrad_src];
dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src]; dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
} }
@ -5839,13 +5863,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
if (node->op == GGML_OP_OPT_STEP_ADAMW) { if (node->op == GGML_OP_OPT_STEP_ADAMW) {
// clear momenta // clear momenta
if (node->src[2]->data) {
ggml_set_zero(node->src[2]); ggml_set_zero(node->src[2]);
}
if (node->src[3]->data) {
ggml_set_zero(node->src[3]); ggml_set_zero(node->src[3]);
} }
}
// initial gradients of loss should be 1, 0 otherwise // initial gradients of loss should be 1, 0 otherwise
if (grad_acc) { if (grad_acc) {

View file

@ -1 +1 @@
2884dd72fea8922910fe53387c3d17ab928d3a8e 6fcbd60bc72ac3f7ad43f78c87e535f2e6206f58

View file

@ -18211,13 +18211,13 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
static void llama_kv_cache_update_internal(struct llama_context & lctx) { static void llama_kv_cache_update_internal(struct llama_context & lctx) {
bool need_reserve = false; bool need_reserve = false;
// apply K-shift if needed if (lctx.kv_self.has_shift) {
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
if (!llama_kv_cache_can_shift(&lctx)) { if (!llama_kv_cache_can_shift(&lctx)) {
GGML_ABORT("Deepseek2 does not support K-shift"); GGML_ABORT("The current context does not support K-shift");
} }
{ // apply K-shift if needed
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {
ggml_backend_sched_reset(lctx.sched.get()); ggml_backend_sched_reset(lctx.sched.get());
ggml_cgraph * gf = llama_build_graph_k_shift(lctx); ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
@ -20463,7 +20463,7 @@ void llama_kv_cache_update(struct llama_context * ctx) {
} }
bool llama_kv_cache_can_shift(struct llama_context * ctx) { bool llama_kv_cache_can_shift(struct llama_context * ctx) {
return ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA return !ctx->kv_self.recurrent && ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
} }
// deprecated // deprecated

View file

@ -819,7 +819,6 @@ struct test_case {
} }
} }
// TODO: refactor so that this check is only needed once
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
if (!ggml_backend_supports_op(backend, t)) { if (!ggml_backend_supports_op(backend, t)) {
printf("not supported [%s] ", ggml_backend_name(backend)); printf("not supported [%s] ", ggml_backend_name(backend));