Merge 'origin/master' into hipblas
This commit is contained in:
commit
a4648c1e7c
6 changed files with 79 additions and 19 deletions
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
|
@ -165,7 +165,7 @@ jobs:
|
||||||
- build: 'clblast'
|
- build: 'clblast'
|
||||||
defines: '-DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
|
defines: '-DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
|
||||||
- build: 'openblas'
|
- build: 'openblas'
|
||||||
defines: '-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include"'
|
defines: '-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
|
@ -187,7 +187,7 @@ jobs:
|
||||||
curl.exe -o $env:RUNNER_TEMP/clblast.7z -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-windows-x64.7z"
|
curl.exe -o $env:RUNNER_TEMP/clblast.7z -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-windows-x64.7z"
|
||||||
curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE"
|
curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE"
|
||||||
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/clblast.7z
|
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/clblast.7z
|
||||||
rename-item $env:RUNNER_TEMP/clblast_release_dir clblast
|
rename-item $env:RUNNER_TEMP/CLBlast-${env:CLBLAST_VERSION}-windows-x64 clblast
|
||||||
foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) {
|
foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) {
|
||||||
$txt = Get-Content -Path $f -Raw
|
$txt = Get-Content -Path $f -Raw
|
||||||
$txt.Replace('C:/vcpkg/packages/opencl_x64-windows/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8
|
$txt.Replace('C:/vcpkg/packages/opencl_x64-windows/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8
|
||||||
|
@ -213,7 +213,6 @@ jobs:
|
||||||
cd build
|
cd build
|
||||||
cmake .. ${{ matrix.defines }}
|
cmake .. ${{ matrix.defines }}
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
cp ../LICENSE ./bin/Release/llama.cpp.txt
|
|
||||||
|
|
||||||
- name: Add clblast.dll
|
- name: Add clblast.dll
|
||||||
id: add_clblast_dll
|
id: add_clblast_dll
|
||||||
|
@ -258,6 +257,7 @@ jobs:
|
||||||
id: pack_artifacts
|
id: pack_artifacts
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
run: |
|
run: |
|
||||||
|
Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
|
||||||
7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip .\build\bin\Release\*
|
7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip .\build\bin\Release\*
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
|
|
|
@ -66,7 +66,7 @@ endif()
|
||||||
# 3rd party libs
|
# 3rd party libs
|
||||||
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
||||||
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
||||||
option(LLAMA_BLAS_VENDOR "llama: BLA_VENDOR from https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" Generic)
|
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
||||||
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
|
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
|
||||||
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
|
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
|
||||||
set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
|
set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
|
||||||
|
|
|
@ -272,7 +272,7 @@ These options help improve the performance and memory usage of the LLaMA models.
|
||||||
|
|
||||||
### Prompt Caching
|
### Prompt Caching
|
||||||
|
|
||||||
- `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs.
|
- `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs. **Note**: Restoring a cached prompt does not imply restoring the exact state of the session at the point it was saved. So even when specifying a specific seed, you are not guaranteed to get the same sequence of tokens as the original generation.
|
||||||
|
|
||||||
### Quantization
|
### Quantization
|
||||||
|
|
||||||
|
|
|
@ -134,8 +134,6 @@ int main(int argc, char ** argv) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a space in front of the first character to match OG llama tokenizer behavior
|
|
||||||
params.prompt.insert(0, 1, ' ');
|
|
||||||
|
|
||||||
std::string path_session = params.path_prompt_cache;
|
std::string path_session = params.path_prompt_cache;
|
||||||
std::vector<llama_token> session_tokens;
|
std::vector<llama_token> session_tokens;
|
||||||
|
@ -155,6 +153,7 @@ int main(int argc, char ** argv) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
session_tokens.resize(n_token_count_out);
|
session_tokens.resize(n_token_count_out);
|
||||||
|
llama_set_rng_seed(ctx, params.seed);
|
||||||
|
|
||||||
fprintf(stderr, "%s: loaded a session with prompt size of %d tokens\n", __func__, (int) session_tokens.size());
|
fprintf(stderr, "%s: loaded a session with prompt size of %d tokens\n", __func__, (int) session_tokens.size());
|
||||||
} else {
|
} else {
|
||||||
|
@ -163,7 +162,16 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// tokenize the prompt
|
// tokenize the prompt
|
||||||
auto embd_inp = ::llama_tokenize(ctx, params.prompt, true);
|
std::vector<llama_token> embd_inp;
|
||||||
|
|
||||||
|
if (params.interactive_first || params.instruct || !params.prompt.empty() || session_tokens.empty()) {
|
||||||
|
// Add a space in front of the first character to match OG llama tokenizer behavior
|
||||||
|
params.prompt.insert(0, 1, ' ');
|
||||||
|
|
||||||
|
embd_inp = ::llama_tokenize(ctx, params.prompt, true);
|
||||||
|
} else {
|
||||||
|
embd_inp = session_tokens;
|
||||||
|
}
|
||||||
|
|
||||||
const int n_ctx = llama_n_ctx(ctx);
|
const int n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
|
@ -181,7 +189,9 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
n_matching_session_tokens++;
|
n_matching_session_tokens++;
|
||||||
}
|
}
|
||||||
if (n_matching_session_tokens >= embd_inp.size()) {
|
if (params.prompt.empty() && n_matching_session_tokens == embd_inp.size()) {
|
||||||
|
fprintf(stderr, "%s: using full prompt from session file\n", __func__);
|
||||||
|
} else if (n_matching_session_tokens >= embd_inp.size()) {
|
||||||
fprintf(stderr, "%s: session file has exact match for prompt!\n", __func__);
|
fprintf(stderr, "%s: session file has exact match for prompt!\n", __func__);
|
||||||
} else if (n_matching_session_tokens < (embd_inp.size() / 2)) {
|
} else if (n_matching_session_tokens < (embd_inp.size() / 2)) {
|
||||||
fprintf(stderr, "%s: warning: session file has low similarity to prompt (%zu / %zu tokens); will mostly be reevaluated\n",
|
fprintf(stderr, "%s: warning: session file has low similarity to prompt (%zu / %zu tokens); will mostly be reevaluated\n",
|
||||||
|
|
58
ggml.c
58
ggml.c
|
@ -3494,7 +3494,7 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
|
||||||
};
|
};
|
||||||
static_assert(GGML_TYPE_COUNT == 13, "GGML_IS_QUANTIZED is outdated");
|
static_assert(GGML_TYPE_COUNT == 13, "GGML_IS_QUANTIZED is outdated");
|
||||||
|
|
||||||
static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
|
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||||
"NONE",
|
"NONE",
|
||||||
|
|
||||||
"DUP",
|
"DUP",
|
||||||
|
@ -3749,6 +3749,9 @@ const char * ggml_type_name(enum ggml_type type) {
|
||||||
return GGML_TYPE_NAME[type];
|
return GGML_TYPE_NAME[type];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char * ggml_op_name(enum ggml_op op) {
|
||||||
|
return GGML_OP_NAME[op];
|
||||||
|
}
|
||||||
|
|
||||||
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
||||||
return GGML_TYPE_SIZE[tensor->type];
|
return GGML_TYPE_SIZE[tensor->type];
|
||||||
|
@ -3805,6 +3808,10 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
||||||
return wtype;
|
return wtype;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t ggml_tensor_overhead(void) {
|
||||||
|
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
static inline bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
||||||
return tensor->nb[0] > tensor->nb[1];
|
return tensor->nb[0] > tensor->nb[1];
|
||||||
}
|
}
|
||||||
|
@ -4017,6 +4024,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
||||||
|
ctx->no_alloc = no_alloc;
|
||||||
|
}
|
||||||
|
|
||||||
// IMPORTANT:
|
// IMPORTANT:
|
||||||
// when creating "opt" tensors, always save and load the scratch buffer
|
// when creating "opt" tensors, always save and load the scratch buffer
|
||||||
// this is an error prone process, but it is necessary to support inplace
|
// this is an error prone process, but it is necessary to support inplace
|
||||||
|
@ -4061,7 +4072,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
||||||
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
||||||
|
|
||||||
if (ctx->scratch.data == NULL || data != NULL) {
|
if (ctx->scratch.data == NULL || data != NULL) {
|
||||||
size_needed += sizeof(struct ggml_tensor);
|
size_needed += GGML_TENSOR_SIZE;
|
||||||
|
|
||||||
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
||||||
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
||||||
|
@ -4077,14 +4088,15 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
|
if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
|
||||||
GGML_PRINT("%s: not enough space in the scratch memory\n", __func__);
|
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
||||||
|
__func__, ctx->scratch.offs + size_needed, ctx->scratch.size);
|
||||||
assert(false);
|
assert(false);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cur_end + sizeof(struct ggml_tensor) + GGML_OBJECT_SIZE > ctx->mem_size) {
|
if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) {
|
||||||
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
||||||
__func__, cur_end + sizeof(struct ggml_tensor) + GGML_OBJECT_SIZE, ctx->mem_size);
|
__func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size);
|
||||||
assert(false);
|
assert(false);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -4093,7 +4105,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
||||||
|
|
||||||
*obj_new = (struct ggml_object) {
|
*obj_new = (struct ggml_object) {
|
||||||
.offs = cur_end + GGML_OBJECT_SIZE,
|
.offs = cur_end + GGML_OBJECT_SIZE,
|
||||||
.size = sizeof(struct ggml_tensor),
|
.size = GGML_TENSOR_SIZE,
|
||||||
.next = NULL,
|
.next = NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -13792,11 +13804,19 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
||||||
// reached a leaf node, not part of the gradient graph (e.g. a constant)
|
// reached a leaf node, not part of the gradient graph (e.g. a constant)
|
||||||
GGML_ASSERT(cgraph->n_leafs < GGML_MAX_NODES);
|
GGML_ASSERT(cgraph->n_leafs < GGML_MAX_NODES);
|
||||||
|
|
||||||
|
if (strlen(node->name) == 0) {
|
||||||
|
snprintf(node->name, sizeof(node->name), "leaf_%d", cgraph->n_leafs);
|
||||||
|
}
|
||||||
|
|
||||||
cgraph->leafs[cgraph->n_leafs] = node;
|
cgraph->leafs[cgraph->n_leafs] = node;
|
||||||
cgraph->n_leafs++;
|
cgraph->n_leafs++;
|
||||||
} else {
|
} else {
|
||||||
GGML_ASSERT(cgraph->n_nodes < GGML_MAX_NODES);
|
GGML_ASSERT(cgraph->n_nodes < GGML_MAX_NODES);
|
||||||
|
|
||||||
|
if (strlen(node->name) == 0) {
|
||||||
|
snprintf(node->name, sizeof(node->name), "node_%d", cgraph->n_nodes);
|
||||||
|
}
|
||||||
|
|
||||||
cgraph->nodes[cgraph->n_nodes] = node;
|
cgraph->nodes[cgraph->n_nodes] = node;
|
||||||
cgraph->grads[cgraph->n_nodes] = node->grad;
|
cgraph->grads[cgraph->n_nodes] = node->grad;
|
||||||
cgraph->n_nodes++;
|
cgraph->n_nodes++;
|
||||||
|
@ -14510,6 +14530,26 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_get_tensor_by_name(struct ggml_cgraph * cgraph, const char * name) {
|
||||||
|
for (int i = 0; i < cgraph->n_leafs; i++) {
|
||||||
|
struct ggml_tensor * leaf = cgraph->leafs[i];
|
||||||
|
|
||||||
|
if (strcmp(leaf->name, name) == 0) {
|
||||||
|
return leaf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||||
|
struct ggml_tensor * node = cgraph->nodes[i];
|
||||||
|
|
||||||
|
if (strcmp(node->name, name) == 0) {
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
||||||
int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0};
|
int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0};
|
||||||
|
|
||||||
|
@ -14527,7 +14567,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
||||||
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
||||||
i,
|
i,
|
||||||
node->ne[0], node->ne[1], node->ne[2],
|
node->ne[0], node->ne[1], node->ne[2],
|
||||||
GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
GGML_OP_NAME[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
||||||
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
||||||
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
||||||
(double) node->perf_time_us / 1000.0,
|
(double) node->perf_time_us / 1000.0,
|
||||||
|
@ -14541,7 +14581,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
||||||
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
||||||
i,
|
i,
|
||||||
node->ne[0], node->ne[1],
|
node->ne[0], node->ne[1],
|
||||||
GGML_OP_LABEL[node->op]);
|
GGML_OP_NAME[node->op]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
||||||
|
@ -14549,7 +14589,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_LABEL[i], (double) perf_total_per_op_us[i] / 1000.0);
|
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_NAME[i], (double) perf_total_per_op_us[i] / 1000.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_PRINT("========================================\n");
|
GGML_PRINT("========================================\n");
|
||||||
|
|
12
ggml.h
12
ggml.h
|
@ -198,6 +198,7 @@
|
||||||
#define GGML_MAX_PARAMS 256
|
#define GGML_MAX_PARAMS 256
|
||||||
#define GGML_MAX_CONTEXTS 64
|
#define GGML_MAX_CONTEXTS 64
|
||||||
#define GGML_MAX_OPT 4
|
#define GGML_MAX_OPT 4
|
||||||
|
#define GGML_MAX_NAME 32
|
||||||
#define GGML_DEFAULT_N_THREADS 4
|
#define GGML_DEFAULT_N_THREADS 4
|
||||||
|
|
||||||
#define GGML_ASSERT(x) \
|
#define GGML_ASSERT(x) \
|
||||||
|
@ -372,11 +373,13 @@ extern "C" {
|
||||||
|
|
||||||
void * data;
|
void * data;
|
||||||
|
|
||||||
char name[32];
|
char name[GGML_MAX_NAME];
|
||||||
|
|
||||||
char padding[16];
|
char padding[16];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
||||||
|
|
||||||
// computation graph
|
// computation graph
|
||||||
struct ggml_cgraph {
|
struct ggml_cgraph {
|
||||||
int n_nodes;
|
int n_nodes;
|
||||||
|
@ -429,6 +432,7 @@ extern "C" {
|
||||||
GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
||||||
|
|
||||||
GGML_API const char * ggml_type_name(enum ggml_type type);
|
GGML_API const char * ggml_type_name(enum ggml_type type);
|
||||||
|
GGML_API const char * ggml_op_name (enum ggml_op op);
|
||||||
|
|
||||||
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
@ -437,6 +441,9 @@ extern "C" {
|
||||||
// TODO: temporary until model loading of ggml examples is refactored
|
// TODO: temporary until model loading of ggml examples is refactored
|
||||||
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
|
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
|
||||||
|
|
||||||
|
// use this to compute the memory overhead of a tensor
|
||||||
|
GGML_API size_t ggml_tensor_overhead(void);
|
||||||
|
|
||||||
// main
|
// main
|
||||||
|
|
||||||
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
||||||
|
@ -445,6 +452,7 @@ extern "C" {
|
||||||
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
||||||
|
|
||||||
GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
||||||
|
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_new_tensor(
|
GGML_API struct ggml_tensor * ggml_new_tensor(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
|
@ -970,6 +978,8 @@ extern "C" {
|
||||||
GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
||||||
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
|
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_get_tensor_by_name(struct ggml_cgraph * cgraph, const char * name);
|
||||||
|
|
||||||
// print info and performance information for the graph
|
// print info and performance information for the graph
|
||||||
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue