add mean method
This commit is contained in:
parent
bd989c21d4
commit
6a11a39a8e
5 changed files with 109 additions and 19 deletions
|
@ -1644,6 +1644,21 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
params.n_pca_iterations = std::stoi(argv[i]);
|
params.n_pca_iterations = std::stoi(argv[i]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (arg == "--method") {
|
||||||
|
if (++i >= argc) {
|
||||||
|
invalid_param = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
std::string arg = argv[i];
|
||||||
|
if (arg == "pca") {
|
||||||
|
params.cvector_dimre_method = DIMRE_METHOD_PCA;
|
||||||
|
} else if (arg == "mean") {
|
||||||
|
params.cvector_dimre_method = DIMRE_METHOD_MEAN;
|
||||||
|
} else {
|
||||||
|
invalid_param = true;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
#ifndef LOG_DISABLE_LOGS
|
#ifndef LOG_DISABLE_LOGS
|
||||||
// Parse args for logging parameters
|
// Parse args for logging parameters
|
||||||
if (log_param_single_parse(argv[i])) {
|
if (log_param_single_parse(argv[i])) {
|
||||||
|
@ -1972,6 +1987,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||||
options.push_back({ "cvector", " --negative-file FNAME", "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() });
|
options.push_back({ "cvector", " --negative-file FNAME", "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() });
|
||||||
options.push_back({ "cvector", " --pca-batch N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
|
options.push_back({ "cvector", " --pca-batch N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
|
||||||
options.push_back({ "cvector", " --pca-iter N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
|
options.push_back({ "cvector", " --pca-iter N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
|
||||||
|
options.push_back({ "cvector", " --method {pca,mean}", "dimensionality reduction method to be used (default: pca)" });
|
||||||
|
|
||||||
printf("usage: %s [options]\n", argv[0]);
|
printf("usage: %s [options]\n", argv[0]);
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,12 @@ int32_t cpu_get_num_math();
|
||||||
// CLI argument parsing
|
// CLI argument parsing
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// dimensionality reduction methods, used by cvector-generator
|
||||||
|
enum dimre_method {
|
||||||
|
DIMRE_METHOD_PCA,
|
||||||
|
DIMRE_METHOD_MEAN,
|
||||||
|
};
|
||||||
|
|
||||||
struct gpt_params {
|
struct gpt_params {
|
||||||
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
||||||
|
|
||||||
|
@ -235,6 +241,7 @@ struct gpt_params {
|
||||||
// cvector-generator params
|
// cvector-generator params
|
||||||
int n_pca_batch = 100;
|
int n_pca_batch = 100;
|
||||||
int n_pca_iterations = 1000;
|
int n_pca_iterations = 1000;
|
||||||
|
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
|
||||||
std::string cvector_outfile = "control_vector.gguf";
|
std::string cvector_outfile = "control_vector.gguf";
|
||||||
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
|
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
|
||||||
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
||||||
|
|
|
@ -19,6 +19,9 @@ Related PRs:
|
||||||
# With advanced options
|
# With advanced options
|
||||||
./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100
|
./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100
|
||||||
|
|
||||||
|
# Using mean value instead of PCA
|
||||||
|
./cvector-generator -m ./llama-3.Q4_K_M.gguf --method mean
|
||||||
|
|
||||||
# To see help message
|
# To see help message
|
||||||
./cvector-generator -h
|
./cvector-generator -h
|
||||||
# Then, have a look at "cvector" section
|
# Then, have a look at "cvector" section
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "pca.hpp"
|
#include "pca.hpp"
|
||||||
|
#include "mean.hpp"
|
||||||
|
|
||||||
#ifdef GGML_USE_CUDA
|
#ifdef GGML_USE_CUDA
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
|
@ -41,6 +42,7 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
|
||||||
printf("\n CPU only: %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]);
|
printf("\n CPU only: %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]);
|
||||||
printf("\n with GPU: %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]);
|
printf("\n with GPU: %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]);
|
||||||
printf("\n advanced: %s -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100\n", argv[0]);
|
printf("\n advanced: %s -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100\n", argv[0]);
|
||||||
|
printf("\n using mean: %s -m ./llama-3.Q4_K_M.gguf --method mean\n", argv[0]);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -223,17 +225,20 @@ struct train_context {
|
||||||
|
|
||||||
// build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
|
// build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
|
||||||
// TODO @ngxson : maybe add option NOT to transpose v_diff; will be useful for "mean" method
|
// TODO @ngxson : maybe add option NOT to transpose v_diff; will be useful for "mean" method
|
||||||
void build_v_diff() {
|
void build_v_diff(bool transpose) {
|
||||||
printf("build_v_diff\n");
|
printf("build_v_diff\n");
|
||||||
for (int il = 0; il < n_layers - 1; il++) {
|
for (int il = 0; il < n_layers - 1; il++) {
|
||||||
auto & diff_tmp = v_diff_tmp[il];
|
auto & diff_tmp = v_diff_tmp[il];
|
||||||
int n_elem = diff_tmp.size() / sizeof(float);
|
int n_elem = diff_tmp.size() / sizeof(float);
|
||||||
GGML_ASSERT(n_elem % n_embd == 0);
|
GGML_ASSERT(n_elem % n_embd == 0);
|
||||||
int n_rows = n_elem / n_embd;
|
int n_rows = n_elem / n_embd;
|
||||||
struct ggml_tensor * diff = ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd);
|
struct ggml_tensor * diff = transpose
|
||||||
|
? ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd)
|
||||||
|
: ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_embd, n_rows);
|
||||||
ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
|
ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
|
||||||
// copy data & transpose
|
|
||||||
diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
|
diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
|
||||||
|
if (transpose) {
|
||||||
|
// copy data & transpose
|
||||||
float * arr = (float *) diff_tmp.data();
|
float * arr = (float *) diff_tmp.data();
|
||||||
for (int ir = 0; ir < n_rows; ++ir) {
|
for (int ir = 0; ir < n_rows; ++ir) {
|
||||||
for (int ic = 0; ic < n_embd; ++ic) {
|
for (int ic = 0; ic < n_embd; ++ic) {
|
||||||
|
@ -241,6 +246,10 @@ struct train_context {
|
||||||
ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
|
ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// only copy
|
||||||
|
memcpy(diff->data, diff_tmp.data(), ggml_nbytes(diff));
|
||||||
|
}
|
||||||
v_diff.push_back(diff);
|
v_diff.push_back(diff);
|
||||||
print_debug_tensor(diff);
|
print_debug_tensor(diff);
|
||||||
// free memory of diff_tmp
|
// free memory of diff_tmp
|
||||||
|
@ -468,15 +477,22 @@ int main(int argc, char ** argv) {
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
|
|
||||||
// prepare ctx_train for PCA
|
bool use_pca = params.cvector_dimre_method == DIMRE_METHOD_PCA;
|
||||||
ctx_train.build_v_diff();
|
|
||||||
|
|
||||||
|
// prepare ctx_train for PCA
|
||||||
|
ctx_train.build_v_diff(use_pca);
|
||||||
|
|
||||||
|
if (use_pca) {
|
||||||
// run PCA
|
// run PCA
|
||||||
PCA::pca_params pca_params;
|
PCA::pca_params pca_params;
|
||||||
pca_params.n_threads = params.n_threads;
|
pca_params.n_threads = params.n_threads;
|
||||||
pca_params.n_batch = params.n_pca_batch;
|
pca_params.n_batch = params.n_pca_batch;
|
||||||
pca_params.n_iterations = params.n_pca_iterations;
|
pca_params.n_iterations = params.n_pca_iterations;
|
||||||
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
|
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
|
||||||
|
} else {
|
||||||
|
// run mean
|
||||||
|
mean::run(ctx_train.v_diff, ctx_train.v_final);
|
||||||
|
}
|
||||||
|
|
||||||
// write output vectors to gguf
|
// write output vectors to gguf
|
||||||
export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
|
export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
|
||||||
|
|
48
examples/cvector-generator/mean.hpp
Normal file
48
examples/cvector-generator/mean.hpp
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
#include "common.h"
|
||||||
|
#include "llama.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
namespace mean {
|
||||||
|
|
||||||
|
static void run(
|
||||||
|
const std::vector<struct ggml_tensor *> & v_input, // shape of v_input[0]: [n_embd, n_samples]
|
||||||
|
const std::vector<struct ggml_tensor *> & v_output) {
|
||||||
|
printf("%s: Running mean...\n", __func__);
|
||||||
|
for (size_t il = 0; il < v_input.size(); ++il) {
|
||||||
|
// prepare output vector
|
||||||
|
struct ggml_tensor * ctrl_out = v_output[il];
|
||||||
|
ggml_format_name(ctrl_out, "direction.%ld", il+1);
|
||||||
|
|
||||||
|
// calculate mean vector
|
||||||
|
struct ggml_tensor * t_layer = v_input[il];
|
||||||
|
GGML_ASSERT(t_layer->ne[0] == ctrl_out->ne[0]); // == n_embd
|
||||||
|
for (int ic = 0; ic < t_layer->ne[0]; ic++) {
|
||||||
|
float f = 0.0;
|
||||||
|
for (int ir = 0; ir < t_layer->ne[1]; ir++) {
|
||||||
|
f += ggml_get_f32_nd(t_layer, ic, ir, 0, 0);
|
||||||
|
}
|
||||||
|
f /= t_layer->ne[1];
|
||||||
|
ggml_set_f32_1d(ctrl_out, ic, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalize output vector
|
||||||
|
float norm = 0.0;
|
||||||
|
for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
|
||||||
|
float f = ggml_get_f32_1d(ctrl_out, i);
|
||||||
|
norm += f*f;
|
||||||
|
}
|
||||||
|
norm = sqrt(norm);
|
||||||
|
for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
|
||||||
|
float f = ggml_get_f32_1d(ctrl_out, i);
|
||||||
|
ggml_set_f32_1d(ctrl_out, i, f / norm);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%s: Done layer %d / %d\n", __func__, (int) il+1, (int) v_input.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue