Merge branch 'master' into gg/flash-attn

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-04-24 14:00:32 +03:00
commit 8937ec5307
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
57 changed files with 1576 additions and 8245 deletions

View file

@ -108,7 +108,7 @@ int32_t get_num_physical_cores() {
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
}
#if defined(__x86_64__) && defined(__linux__)
#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__)
#include <pthread.h>
static void cpuid(unsigned leaf, unsigned subleaf,
@ -162,7 +162,7 @@ static int count_math_cpus(int cpu_count) {
* Returns number of CPUs on system that are useful for math.
*/
int get_math_cpu_count() {
#if defined(__x86_64__) && defined(__linux__)
#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__)
int cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
if (cpu_count < 1) {
return get_num_physical_cores();
@ -242,7 +242,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
invalid_param = true;
return true;
}
// This is temporary, in the future the samplign state will be moved fully to llama_sampling_context.
params.seed = std::stoul(argv[i]);
sparams.seed = std::stoul(argv[i]);
return true;
}
if (arg == "-t" || arg == "--threads") {
@ -2332,12 +2334,12 @@ std::vector<llama_token> llama_tokenize(
return result;
}
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
std::vector<char> result(8, 0);
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);