Merge branch 'master' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # Makefile # README.md
This commit is contained in:
commit
62beded0e7
7 changed files with 20 additions and 24 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -21,6 +21,7 @@ build-sanitize-addr/
|
|||
build-sanitize-thread/
|
||||
|
||||
models/*
|
||||
*.bin
|
||||
|
||||
/main
|
||||
/quantize
|
||||
|
|
7
Makefile
7
Makefile
|
@ -16,12 +16,7 @@ ifndef UNAME_M
|
|||
UNAME_M := $(shell uname -m)
|
||||
endif
|
||||
|
||||
ARCH_LINUX1 := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
|
||||
ARCH_LINUX2 := $(shell grep "ID_LIKE=arch" /etc/os-release 2>/dev/null)
|
||||
ifdef ARCH_LINUX1
|
||||
ARCH_ADD = -lcblas
|
||||
endif
|
||||
ifdef ARCH_LINUX2
|
||||
ifneq ($(shell grep -e "Arch Linux" -e "ID_LIKE=arch" /etc/os-release 2>/dev/null),)
|
||||
ARCH_ADD = -lcblas
|
||||
endif
|
||||
|
||||
|
|
|
@ -444,10 +444,10 @@ int main(int argc, char ** argv) {
|
|||
id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
||||
} else {
|
||||
// Temperature sampling
|
||||
llama_sample_top_k(ctx, &candidates_p, top_k);
|
||||
llama_sample_tail_free(ctx, &candidates_p, tfs_z);
|
||||
llama_sample_typical(ctx, &candidates_p, typical_p);
|
||||
llama_sample_top_p(ctx, &candidates_p, top_p);
|
||||
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
|
||||
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
|
||||
llama_sample_typical(ctx, &candidates_p, typical_p, 1);
|
||||
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token(ctx, &candidates_p);
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ llama_token sample_token_mirostat(int n_vocab, llama_token_data_array * candidat
|
|||
float epsilon_hat = s_hat - 1;
|
||||
float k = powf((epsilon_hat * powf(2, *mu)) / (1 - powf(N, -epsilon_hat)), 1 / s_hat);
|
||||
// Sample the next word X using top-k sampling
|
||||
llama_sample_top_k(nullptr, candidates, int(k));
|
||||
llama_sample_top_k(nullptr, candidates, int(k),1);
|
||||
llama_token X = sample_token(candidates, rng); // Compute error as the difference between observed surprise and target surprise value
|
||||
size_t X_idx = std::distance(candidates->data, std::find_if(candidates->data, candidates->data + candidates->size, [&](const llama_token_data & candidate) {
|
||||
return candidate.id == X;
|
||||
|
@ -194,10 +194,10 @@ int mirostat, float mirostat_tau, float mirostat_eta)
|
|||
else
|
||||
{
|
||||
// Temperature sampling
|
||||
llama_sample_top_k(nullptr, &candidates_p, top_k);
|
||||
llama_sample_tail_free(nullptr, &candidates_p, tfs);
|
||||
llama_sample_typical(nullptr, &candidates_p, typical_p);
|
||||
llama_sample_top_p(nullptr, &candidates_p, top_p);
|
||||
llama_sample_top_k(nullptr, &candidates_p, top_k,1);
|
||||
llama_sample_tail_free(nullptr, &candidates_p, tfs,1);
|
||||
llama_sample_typical(nullptr, &candidates_p, typical_p,1);
|
||||
llama_sample_top_p(nullptr, &candidates_p, top_p,1);
|
||||
llama_sample_temperature(nullptr, &candidates_p, temp);
|
||||
id = sample_token(&candidates_p, rng);
|
||||
}
|
||||
|
|
|
@ -1801,7 +1801,7 @@ llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_
|
|||
float k = powf((epsilon_hat * powf(2, *mu)) / (1 - powf(N, -epsilon_hat)), 1 / s_hat);
|
||||
|
||||
// Sample the next word X using top-k sampling
|
||||
llama_sample_top_k(nullptr, candidates, int(k));
|
||||
llama_sample_top_k(nullptr, candidates, int(k), 1);
|
||||
if (ctx) {
|
||||
ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
|
||||
}
|
||||
|
|
8
llama.h
8
llama.h
|
@ -202,16 +202,16 @@ extern "C" {
|
|||
LLAMA_API void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates);
|
||||
|
||||
/// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
|
||||
LLAMA_API void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep = 1);
|
||||
LLAMA_API void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep);
|
||||
|
||||
/// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
|
||||
LLAMA_API void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep = 1);
|
||||
LLAMA_API void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep);
|
||||
|
||||
/// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
|
||||
LLAMA_API void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep = 1);
|
||||
LLAMA_API void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep);
|
||||
|
||||
/// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
|
||||
LLAMA_API void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep = 1);
|
||||
LLAMA_API void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep);
|
||||
LLAMA_API void llama_sample_temperature(struct llama_context * ctx, llama_token_data_array * candidates, float temp);
|
||||
|
||||
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
|
||||
|
|
|
@ -32,7 +32,7 @@ void test_top_k(const std::vector<float> & probs,
|
|||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||
llama_sample_softmax(nullptr, &candidates_p);
|
||||
DUMP(&candidates_p);
|
||||
llama_sample_top_k(nullptr, &candidates_p, k);
|
||||
llama_sample_top_k(nullptr, &candidates_p, k, 1);
|
||||
DUMP(&candidates_p);
|
||||
|
||||
assert(candidates_p.size == expected_probs.size());
|
||||
|
@ -57,7 +57,7 @@ void test_top_p(const std::vector<float> & probs,
|
|||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||
llama_sample_softmax(nullptr, &candidates_p);
|
||||
DUMP(&candidates_p);
|
||||
llama_sample_top_p(nullptr, &candidates_p, p);
|
||||
llama_sample_top_p(nullptr, &candidates_p, p, 1);
|
||||
DUMP(&candidates_p);
|
||||
|
||||
assert(candidates_p.size == expected_probs.size());
|
||||
|
@ -80,7 +80,7 @@ void test_tfs(const std::vector<float> & probs,
|
|||
|
||||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||
DUMP(&candidates_p);
|
||||
llama_sample_tail_free(nullptr, &candidates_p, z);
|
||||
llama_sample_tail_free(nullptr, &candidates_p, z, 1);
|
||||
DUMP(&candidates_p);
|
||||
|
||||
assert(candidates_p.size == expected_probs.size());
|
||||
|
@ -103,7 +103,7 @@ void test_typical(const std::vector<float> & probs,
|
|||
|
||||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||
DUMP(&candidates_p);
|
||||
llama_sample_typical(nullptr, &candidates_p, p);
|
||||
llama_sample_typical(nullptr, &candidates_p, p, 1);
|
||||
DUMP(&candidates_p);
|
||||
|
||||
assert(candidates_p.size == expected_probs.size());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue