ggml : move CPU backend to a separate file (#10144)
This commit is contained in:
parent
08828a6d7d
commit
9f40989351
32 changed files with 14747 additions and 19345 deletions
|
@ -66,8 +66,6 @@ add_library(${TARGET} STATIC
|
|||
ngram-cache.h
|
||||
sampling.cpp
|
||||
sampling.h
|
||||
train.cpp
|
||||
train.h
|
||||
)
|
||||
|
||||
if (BUILD_SHARED_LIBS)
|
||||
|
|
|
@ -1951,6 +1951,8 @@ void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const cha
|
|||
|
||||
void yaml_dump_non_result_info(FILE * stream, const common_params & params, const llama_context * lctx,
|
||||
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc) {
|
||||
ggml_cpu_init(); // some ARM features are detected at runtime
|
||||
|
||||
const auto & sparams = params.sparams;
|
||||
|
||||
fprintf(stream, "build_commit: %s\n", LLAMA_COMMIT);
|
||||
|
|
1515
common/train.cpp
1515
common/train.cpp
File diff suppressed because it is too large
Load diff
233
common/train.h
233
common/train.h
|
@ -1,233 +0,0 @@
|
|||
// Various helper functions and utilities for training
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
|
||||
#define LLAMA_TRAIN_MAX_NODES 16384
|
||||
|
||||
typedef std::string mt19937_state;
|
||||
|
||||
struct train_state {
|
||||
struct ggml_opt_context * opt;
|
||||
|
||||
uint64_t train_its;
|
||||
uint64_t train_samples;
|
||||
uint64_t train_tokens;
|
||||
uint64_t train_epochs;
|
||||
|
||||
size_t shuffle_samples_hash; // fn, sample_count, *zip(sample_begins, sample_sizes)
|
||||
mt19937_state shuffle_rng_state_current;
|
||||
mt19937_state shuffle_rng_state_next;
|
||||
size_t shuffle_sample_count;
|
||||
size_t shuffle_next_sample;
|
||||
};
|
||||
|
||||
struct train_params_common {
|
||||
const char * fn_train_data;
|
||||
const char * fn_checkpoint_in;
|
||||
const char * fn_checkpoint_out;
|
||||
const char * pattern_fn_it;
|
||||
const char * fn_latest;
|
||||
|
||||
bool print_usage;
|
||||
|
||||
int save_every;
|
||||
|
||||
uint32_t seed;
|
||||
|
||||
int n_ctx;
|
||||
int n_threads;
|
||||
int n_batch;
|
||||
int n_gradient_accumulation;
|
||||
int n_epochs;
|
||||
int n_gpu_layers;
|
||||
|
||||
bool custom_n_ctx;
|
||||
|
||||
bool use_flash;
|
||||
bool use_checkpointing;
|
||||
|
||||
std::string sample_start;
|
||||
bool include_sample_start;
|
||||
bool escape;
|
||||
bool overlapping_samples;
|
||||
bool fill_with_next_samples;
|
||||
bool separate_with_eos;
|
||||
bool separate_with_bos;
|
||||
bool sample_random_offsets;
|
||||
|
||||
bool force_reshuffle;
|
||||
|
||||
int warmup;
|
||||
int cos_decay_steps;
|
||||
float cos_decay_restart;
|
||||
float cos_decay_min;
|
||||
bool enable_restart;
|
||||
|
||||
int opt_past;
|
||||
float opt_delta;
|
||||
int opt_max_no_improvement;
|
||||
|
||||
int adam_n_iter;
|
||||
float adam_alpha;
|
||||
float adam_min_alpha;
|
||||
float adam_decay;
|
||||
int adam_decay_min_ndim;
|
||||
float adam_beta1;
|
||||
float adam_beta2;
|
||||
float adam_gclip;
|
||||
float adam_eps_f;
|
||||
};
|
||||
|
||||
typedef void (*save_train_files_callback)(void * data, struct train_state * train);
|
||||
|
||||
struct train_opt_callback_data {
|
||||
struct train_params_common * params;
|
||||
struct train_state * train;
|
||||
save_train_files_callback save_cb;
|
||||
void * save_data;
|
||||
struct llama_context * lctx;
|
||||
int last_save_iter;
|
||||
llama_token * tokens_data;
|
||||
size_t tokens_size;
|
||||
size_t * samples_begin;
|
||||
size_t * samples_size;
|
||||
size_t * shuffled_samples_offs;
|
||||
size_t * shuffled_samples_begin;
|
||||
size_t * shuffled_samples_size;
|
||||
size_t samples_count;
|
||||
struct ggml_tensor * tokens_input;
|
||||
struct ggml_tensor * target_probs;
|
||||
int first_iter;
|
||||
int first_epoch;
|
||||
int iter_at_last_epoch;
|
||||
int64_t last_time;
|
||||
double millis_per_iter;
|
||||
};
|
||||
|
||||
struct train_state * init_train_state();
|
||||
void free_train_state(struct train_state * state);
|
||||
|
||||
struct train_params_common get_default_train_params_common();
|
||||
void print_common_train_usage(int /*argc*/, char ** argv, const struct train_params_common * params);
|
||||
|
||||
bool consume_common_train_arg(int argc, char ** argv, int * idx, struct train_params_common * params, bool * invalid_param);
|
||||
void finish_processing_train_args(struct train_params_common * params);
|
||||
|
||||
struct random_normal_distribution;
|
||||
struct random_uniform_distribution;
|
||||
|
||||
struct random_normal_distribution * init_random_normal_distribution (int seed, float mean, float std, float min, float max);
|
||||
struct random_uniform_distribution * init_random_uniform_distribution(int seed, float min, float max);
|
||||
|
||||
void free_random_normal_distribution (struct random_normal_distribution * rnd);
|
||||
void free_random_uniform_distribution(struct random_uniform_distribution * rnd);
|
||||
|
||||
struct ggml_tensor * randomize_tensor_normal (struct ggml_tensor * tensor, struct random_normal_distribution * rnd);
|
||||
struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd);
|
||||
|
||||
// generate random float in interval [0,1)
|
||||
float frand();
|
||||
float frand_normal (struct random_normal_distribution * rnd);
|
||||
float frand_uniform(struct random_uniform_distribution * rnd);
|
||||
|
||||
int clamp (const int v, const int min, const int max);
|
||||
float fclamp(const float v, const float min, const float max);
|
||||
|
||||
void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0);
|
||||
void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1);
|
||||
void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2);
|
||||
void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3);
|
||||
|
||||
size_t tokenize_file(
|
||||
struct llama_context * lctx,
|
||||
const char * filename,
|
||||
const std::string & sample_start,
|
||||
bool include_sample_start,
|
||||
bool overlapping_samples,
|
||||
unsigned context_length,
|
||||
std::vector<llama_token> & out_tokens,
|
||||
std::vector<size_t> & out_samples_begin,
|
||||
std::vector<size_t> & out_samples_size);
|
||||
|
||||
int64_t get_example_targets_batch(
|
||||
struct llama_context * lctx,
|
||||
struct ggml_tensor * tokens_input,
|
||||
struct ggml_tensor * target_probs,
|
||||
int64_t example_id,
|
||||
const size_t * samples_offs,
|
||||
const size_t * samples_begin,
|
||||
const size_t * samples_size,
|
||||
size_t samples_count,
|
||||
const llama_token * train_data,
|
||||
size_t n_train_data,
|
||||
bool separate_with_eos,
|
||||
bool separate_with_bos,
|
||||
bool fill_with_next_samples,
|
||||
bool sample_random_offsets);
|
||||
|
||||
|
||||
void mt19937_set_state(std::mt19937& rng, const mt19937_state& rng_state);
|
||||
mt19937_state mt19937_get_state(const std::mt19937& rng);
|
||||
mt19937_state mt19937_seed_to_state(unsigned seed);
|
||||
|
||||
mt19937_state shuffle_samples(
|
||||
const mt19937_state & rng_state,
|
||||
size_t * shuffled_offs,
|
||||
size_t * shuffled_begins,
|
||||
size_t * shuffled_sizes,
|
||||
const size_t * begins,
|
||||
const size_t * sizes,
|
||||
size_t count);
|
||||
|
||||
size_t hash_combine(size_t h1, size_t h2);
|
||||
|
||||
size_t compute_samples_hash(
|
||||
const char* fn,
|
||||
const size_t* samples_begin,
|
||||
const size_t* samples_size,
|
||||
size_t sample_count);
|
||||
|
||||
|
||||
std::string replace_str(const char * s, const char * needle, const char * replacement);
|
||||
|
||||
void print_duration(double milliseconds);
|
||||
|
||||
float cosine_decay(
|
||||
int64_t step,
|
||||
int64_t decay_steps,
|
||||
float minimum);
|
||||
|
||||
float cosine_decay_restart(
|
||||
int64_t step,
|
||||
int64_t decay_steps,
|
||||
float minimum,
|
||||
float restart_step_mult);
|
||||
|
||||
float learning_schedule(
|
||||
int64_t step,
|
||||
int64_t warmup_steps,
|
||||
int64_t decay_steps,
|
||||
float learning_rate,
|
||||
float overall_minimum,
|
||||
float cos_decay_minimum,
|
||||
float cos_decay_restart_step_mult,
|
||||
bool enable_restart);
|
||||
|
||||
void copy_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name);
|
||||
|
||||
void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct ggml_opt_context * opt);
|
||||
void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * opt);
|
||||
|
||||
bool load_train_state_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct train_state * train);
|
||||
void save_train_state_gguf(struct gguf_context * fctx, struct train_state * train);
|
||||
|
||||
std::string get_train_filename(const char * filename, const char * pattern_it, const char * latest, int64_t iteration);
|
||||
|
||||
void train_opt_callback(void * vdata, int accum_step, float * sched, bool * cancel);
|
Loading…
Add table
Add a link
Reference in a new issue