fix code formating

group of parameters // embedding
print usage for embedding parameters
This commit is contained in:
Yann Follet 2024-06-24 02:07:24 +00:00
parent 3b1ae2cbeb
commit 25fc226143
3 changed files with 25 additions and 17 deletions

View file

@ -1573,6 +1573,11 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "bench", "-ntg n0,n1,...", "number of text generation tokens" }); options.push_back({ "bench", "-ntg n0,n1,...", "number of text generation tokens" });
options.push_back({ "bench", "-npl n0,n1,...", "number of parallel prompts" }); options.push_back({ "bench", "-npl n0,n1,...", "number of parallel prompts" });
options.push_back({ "embedding" });
options.push_back({ "embedding", " --embd-normalize", "normalisation for embendings (default: %d) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)", params.embd_normalize });
options.push_back({ "embedding", " --embd-output-format", "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix" });
options.push_back({ "embedding", " --embd-separator", "separator of embendings (default \\n) for example \"<#sep#>\"" });
options.push_back({ "server" }); options.push_back({ "server" });
options.push_back({ "server", " --host HOST", "ip address to listen (default: %s)", params.hostname.c_str() }); options.push_back({ "server", " --host HOST", "ip address to listen (default: %s)", params.hostname.c_str() });
options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port }); options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port });

View file

@ -152,10 +152,6 @@ struct gpt_params {
bool prompt_cache_all = false; // save user input and generations to prompt cache bool prompt_cache_all = false; // save user input and generations to prompt cache
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
bool embedding = false; // get only sentence embedding
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute, 1=taxicab, 2=euclidean, >2=p-norm)
std::string embd_out = ""; // empty = default, "array" = [] or [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
std::string embd_sep = "\n"; // separator of embendings
bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\" bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
bool multiline_input = false; // reverse the usage of `\` bool multiline_input = false; // reverse the usage of `\`
bool simple_io = false; // improves compatibility with subprocesses and limited consoles bool simple_io = false; // improves compatibility with subprocesses and limited consoles
@ -182,6 +178,12 @@ struct gpt_params {
std::string mmproj = ""; // path to multimodal projector std::string mmproj = ""; // path to multimodal projector
std::vector<std::string> image; // path to image file(s) std::vector<std::string> image; // path to image file(s)
// embedding
bool embedding = false; // get only sentence embedding
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
std::string embd_sep = "\n"; // separator of embendings
// server params // server params
int32_t port = 8080; // server listens on this network port int32_t port = 8080; // server listens on this network port
int32_t timeout_read = 600; // http read timeout in seconds int32_t timeout_read = 600; // http read timeout in seconds
@ -380,7 +382,7 @@ void llama_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_siz
// Embedding utils // Embedding utils
// //
void llama_embd_normalize(const float * inp, float * out, int n, int embd_norm = 2); void llama_embd_normalize(const float * inp, float * out, int n, int embd_norm = 2);
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n); float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);

View file

@ -184,16 +184,17 @@ int main(int argc, char ** argv) {
float * out = emb + p * n_embd; float * out = emb + p * n_embd;
batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize); batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize);
if (params.embd_out=="") { if (params.embd_out.empty()) {
// print the first part of the embeddings or for a single prompt, the full embedding // print the first part of the embeddings or for a single prompt, the full embedding
fprintf(stdout, "\n"); fprintf(stdout, "\n");
for (int j = 0; j < n_prompts; j++) { for (int j = 0; j < n_prompts; j++) {
fprintf(stdout, "embedding %d: ", j); fprintf(stdout, "embedding %d: ", j);
for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd) : n_embd); i++) { for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd) : n_embd); i++) {
if (params.embd_normalize==0) if (params.embd_normalize == 0) {
fprintf(stdout, "%6.0f ", emb[j * n_embd + i]); fprintf(stdout, "%6.0f ", emb[j * n_embd + i]);
else } else {
fprintf(stdout, "%9.6f ", emb[j * n_embd + i]); fprintf(stdout, "%9.6f ", emb[j * n_embd + i]);
}
} }
fprintf(stdout, "\n"); fprintf(stdout, "\n");
} }
@ -211,31 +212,31 @@ int main(int argc, char ** argv) {
float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd); float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
fprintf(stdout, "%6.2f ", sim); fprintf(stdout, "%6.2f ", sim);
} }
fprintf(stdout, "%1.10s",prompts[i].c_str()); fprintf(stdout, "%1.10s", prompts[i].c_str());
fprintf(stdout, "\n"); fprintf(stdout, "\n");
} }
} }
} }
if (params.embd_out=="json" || params.embd_out=="json+" || params.embd_out=="array") { if (params.embd_out == "json" || params.embd_out == "json+" || params.embd_out == "array") {
const bool notArray = params.embd_out!="array"; const bool notArray = params.embd_out != "array";
fprintf(stdout, notArray?"{\n \"object\": \"list\",\n \"data\": [\n":"["); fprintf(stdout, notArray ? "{\n \"object\": \"list\",\n \"data\": [\n" : "[");
for (int j = 0;;) { // at least one iteration (one prompt) for (int j = 0;;) { // at least one iteration (one prompt)
if (notArray) fprintf(stdout, " {\n \"object\": \"embedding\",\n \"index\": %d,\n \"embedding\": ",j); if (notArray) fprintf(stdout, " {\n \"object\": \"embedding\",\n \"index\": %d,\n \"embedding\": ",j);
fprintf(stdout, "["); fprintf(stdout, "[");
for (int i = 0;;) { // at least one iteration (n_embd > 0) for (int i = 0;;) { // at least one iteration (n_embd > 0)
fprintf(stdout, params.embd_normalize==0?"%1.0f":"%1.7f", emb[j * n_embd + i]); fprintf(stdout, params.embd_normalize == 0 ? "%1.0f" : "%1.7f", emb[j * n_embd + i]);
i++; i++;
if (i < n_embd) fprintf(stdout, ","); else break; if (i < n_embd) fprintf(stdout, ","); else break;
} }
fprintf(stdout, notArray?"]\n }":"]"); fprintf(stdout, notArray ? "]\n }" : "]");
j++; j++;
if (j < n_prompts) fprintf(stdout, notArray?",\n":","); else break; if (j < n_prompts) fprintf(stdout, notArray ? ",\n" : ","); else break;
} }
fprintf(stdout, notArray?"\n ]":"]\n"); fprintf(stdout, notArray ? "\n ]" : "]\n");
if (params.embd_out=="json+" && n_prompts > 1) { if (params.embd_out == "json+" && n_prompts > 1) {
fprintf(stdout, ",\n \"cosineSimilarity\": [\n"); fprintf(stdout, ",\n \"cosineSimilarity\": [\n");
for (int i = 0;;) { // at least two iteration (n_prompts > 1) for (int i = 0;;) { // at least two iteration (n_prompts > 1)
fprintf(stdout, " ["); fprintf(stdout, " [");