server : allow using LoRA adapters per-request (#10994)
* slot.can_batch_with * lora per request * test: force disable cache prompt * move can_batch_with check * fix condition * add slow test with llama 8b * update docs * move lora change task to queue * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * lora_base * remove redundant check --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
a45433ba20
commit
0da5d86026
8 changed files with 235 additions and 59 deletions
|
@ -797,3 +797,44 @@ static std::vector<llama_token_data> get_token_probabilities(llama_context * ctx
|
|||
|
||||
return cur;
|
||||
}
|
||||
|
||||
static bool are_lora_equal(
|
||||
const std::vector<common_lora_adapter_container> & l1,
|
||||
const std::vector<common_lora_adapter_container> & l2) {
|
||||
if (l1.size() != l2.size()) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < l1.size(); ++i) {
|
||||
// we don't check lora.path to reduce the time complexity
|
||||
if (l1[i].scale != l2[i].scale || l1[i].adapter != l2[i].adapter) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// parse lora config from JSON request, returned a copy of base_lora with updated scale
|
||||
static std::vector<common_lora_adapter_container> parse_lora_request(
|
||||
const std::vector<common_lora_adapter_container> & base_lora,
|
||||
const json & data) {
|
||||
std::vector<common_lora_adapter_container> lora(base_lora);
|
||||
int max_idx = lora.size();
|
||||
|
||||
// clear existing value
|
||||
for (auto & entry : lora) {
|
||||
entry.scale = 0.0f;
|
||||
}
|
||||
|
||||
// set value
|
||||
for (const auto & entry : data) {
|
||||
int id = json_value(entry, "id", -1);
|
||||
float scale = json_value(entry, "scale", 0.0f);
|
||||
if (0 <= id && id < max_idx) {
|
||||
lora[id].scale = scale;
|
||||
} else {
|
||||
throw std::runtime_error("invalid adapter id");
|
||||
}
|
||||
}
|
||||
|
||||
return lora;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue