add support for precompiled binaries, used as a fallback

This commit is contained in:
Concedo 2023-08-15 13:49:05 +08:00
parent 9483288e03
commit 469d70be45
5 changed files with 125 additions and 14 deletions

28
.gitignore vendored
View file

@ -1,6 +1,5 @@
*.o *.o
*.a *.a
*.so
*.bin *.bin
.DS_Store .DS_Store
.build/ .build/
@ -80,16 +79,17 @@ tests/test-quantize-perf
tests/test-sampling tests/test-sampling
tests/test-tokenizer-0 tests/test-tokenizer-0
koboldcpp_default.so /koboldcpp_default.so
koboldcpp_failsafe.so /koboldcpp_failsafe.so
koboldcpp_openblas.so /koboldcpp_openblas.so
koboldcpp_noavx2.so /koboldcpp_noavx2.so
koboldcpp_clblast.so /koboldcpp_clblast.so
koboldcpp_default.dll /koboldcpp_cublas.so
koboldcpp_failsafe.dll /koboldcpp_default.dll
koboldcpp_openblas.dll /koboldcpp_failsafe.dll
koboldcpp_noavx2.dll /koboldcpp_openblas.dll
koboldcpp_clblast.dll /koboldcpp_noavx2.dll
koboldcpp_cublas.dll /koboldcpp_clblast.dll
cublas64_11.dll /koboldcpp_cublas.dll
cublasLt64_11.dll /cublas64_11.dll
/cublasLt64_11.dll

104
class.py Normal file
View file

@ -0,0 +1,104 @@
## KoboldCpp based GGML Backend by Concedo
## For use as a custom backend in KoboldAI United
## Not intended for general use.
from __future__ import annotations
import time, json
import torch
import requests
import numpy as np
from typing import List, Optional, Union
import os
from . import koboldcpp
import utils
from logger import logger
from modeling.inference_model import (
GenerationResult,
GenerationSettings,
InferenceModel,
)
model_backend_name = "koboldcpp" #specific instead of ggml
model_backend_type = "ggml" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)
kcpp_backend_loaded = False
class KoboldCppException(Exception):
"""To be used for errors on cpp side of KoboldCpp."""
class KcppArgsObject:
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
class model_backend(InferenceModel):
def __init__(self) -> None:
super().__init__()
def is_valid(self, model_name, model_path, menu_path):
return "ggml" in model_name.lower()
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
self.filename = model_name #model_path is null, name is path for some reason
self.model_name = "GGML_Model"
try:
from pathlib import Path
self.model_name = Path(model_name).name
except:
pass
requested_parameters = []
return requested_parameters
def set_input_parameters(self, parameters):
pass
def _load(self, save_model: bool, initial_load: bool) -> None:
global kcpp_backend_loaded
self.tokenizer = self._get_tokenizer("gpt2")
if not kcpp_backend_loaded:
kcppargs = KcppArgsObject(model=self.filename, model_param=self.filename,
port=5001, port_param=5001, host='', launch=False, lora=None, threads=5, blasthreads=5,
psutil_set_threads=False, highpriority=False, contextsize=2048,
blasbatchsize=512, ropeconfig=[0.0, 10000.0], stream=False, smartcontext=False,
unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=False,
usemlock=False, noavx2=False, debugmode=0, skiplauncher=False, hordeconfig=None, noblas=False,
useclblast=None, usecublas=None, gpulayers=0, tensor_split=None)
koboldcpp.main(kcppargs,False) #initialize library without enabling Lite http server
kcpp_backend_loaded = True
pass
def _save_settings(self):
pass
def _raw_generate(
self,
prompt_tokens: Union[List[int], torch.Tensor],
max_new: int,
gen_settings: GenerationSettings,
single_line: bool = False,
batch_count: int = 1,
seed: Optional[int] = None,
**kwargs,
) -> GenerationResult:
decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
# Store context in memory to use it for comparison with generated content
utils.koboldai_vars.lastctx = decoded_prompt
genresult = koboldcpp.generate(decoded_prompt,max_new,utils.koboldai_vars.max_length,
gen_settings.temp,int(gen_settings.top_k),gen_settings.top_a,gen_settings.top_p,
gen_settings.typical,gen_settings.tfs,gen_settings.rep_pen,gen_settings.rep_pen_range)
outputs = [genresult]
return GenerationResult(
model=self,
out_batches=np.array(
[self.tokenizer.encode(x) for x in outputs]
),
prompt=prompt_tokens,
is_whole_generation=True,
single_line=single_line,
)

View file

@ -76,13 +76,20 @@ def file_exists(filename):
return os.path.exists(os.path.join(getdirpath(), filename)) return os.path.exists(os.path.join(getdirpath(), filename))
def pick_existant_file(ntoption,nonntoption): def pick_existant_file(ntoption,nonntoption):
precompiled_prefix = "precompiled_"
ntexist = file_exists(ntoption) ntexist = file_exists(ntoption)
nonntexist = file_exists(nonntoption) nonntexist = file_exists(nonntoption)
precompiled_ntexist = file_exists(precompiled_prefix+ntoption)
precompiled_nonntexist = file_exists(precompiled_prefix+nonntoption)
if os.name == 'nt': if os.name == 'nt':
if not ntexist and precompiled_ntexist:
return (precompiled_prefix+ntoption)
if nonntexist and not ntexist: if nonntexist and not ntexist:
return nonntoption return nonntoption
return ntoption return ntoption
else: else:
if not nonntexist and precompiled_nonntexist:
return (precompiled_prefix+nonntoption)
if ntexist and not nonntexist: if ntexist and not nonntexist:
return ntoption return ntoption
return nonntoption return nonntoption

Binary file not shown.

Binary file not shown.