fix code formating

group of parameters // embedding
print usage for embedding parameters
This commit is contained in:
Yann Follet 2024-06-24 02:07:24 +00:00
parent 3b1ae2cbeb
commit 25fc226143
3 changed files with 25 additions and 17 deletions

View file

@ -1573,6 +1573,11 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "bench", "-ntg n0,n1,...", "number of text generation tokens" }); options.push_back({ "bench", "-ntg n0,n1,...", "number of text generation tokens" });
options.push_back({ "bench", "-npl n0,n1,...", "number of parallel prompts" }); options.push_back({ "bench", "-npl n0,n1,...", "number of parallel prompts" });
options.push_back({ "embedding" });
options.push_back({ "embedding", " --embd-normalize", "normalisation for embendings (default: %d) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)", params.embd_normalize });
options.push_back({ "embedding", " --embd-output-format", "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix" });
options.push_back({ "embedding", " --embd-separator", "separator of embendings (default \\n) for example \"<#sep#>\"" });
options.push_back({ "server" }); options.push_back({ "server" });
options.push_back({ "server", " --host HOST", "ip address to listen (default: %s)", params.hostname.c_str() }); options.push_back({ "server", " --host HOST", "ip address to listen (default: %s)", params.hostname.c_str() });
options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port }); options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port });

View file

@ -152,10 +152,6 @@ struct gpt_params {
bool prompt_cache_all = false; // save user input and generations to prompt cache bool prompt_cache_all = false; // save user input and generations to prompt cache
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
bool embedding = false; // get only sentence embedding
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute, 1=taxicab, 2=euclidean, >2=p-norm)
std::string embd_out = ""; // empty = default, "array" = [] or [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
std::string embd_sep = "\n"; // separator of embendings
bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\" bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
bool multiline_input = false; // reverse the usage of `\` bool multiline_input = false; // reverse the usage of `\`
bool simple_io = false; // improves compatibility with subprocesses and limited consoles bool simple_io = false; // improves compatibility with subprocesses and limited consoles
@ -182,6 +178,12 @@ struct gpt_params {
std::string mmproj = ""; // path to multimodal projector std::string mmproj = ""; // path to multimodal projector
std::vector<std::string> image; // path to image file(s) std::vector<std::string> image; // path to image file(s)
// embedding
bool embedding = false; // get only sentence embedding
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
std::string embd_sep = "\n"; // separator of embendings
// server params // server params
int32_t port = 8080; // server listens on this network port int32_t port = 8080; // server listens on this network port
int32_t timeout_read = 600; // http read timeout in seconds int32_t timeout_read = 600; // http read timeout in seconds

View file

@ -184,17 +184,18 @@ int main(int argc, char ** argv) {
float * out = emb + p * n_embd; float * out = emb + p * n_embd;
batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize); batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize);
if (params.embd_out=="") { if (params.embd_out.empty()) {
// print the first part of the embeddings or for a single prompt, the full embedding // print the first part of the embeddings or for a single prompt, the full embedding
fprintf(stdout, "\n"); fprintf(stdout, "\n");
for (int j = 0; j < n_prompts; j++) { for (int j = 0; j < n_prompts; j++) {
fprintf(stdout, "embedding %d: ", j); fprintf(stdout, "embedding %d: ", j);
for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd) : n_embd); i++) { for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd) : n_embd); i++) {
if (params.embd_normalize==0) if (params.embd_normalize == 0) {
fprintf(stdout, "%6.0f ", emb[j * n_embd + i]); fprintf(stdout, "%6.0f ", emb[j * n_embd + i]);
else } else {
fprintf(stdout, "%9.6f ", emb[j * n_embd + i]); fprintf(stdout, "%9.6f ", emb[j * n_embd + i]);
} }
}
fprintf(stdout, "\n"); fprintf(stdout, "\n");
} }