updated kobold lite, work on rwkv, added exe path to model load params, added launch parameter

This commit is contained in:
Concedo 2023-04-18 17:36:44 +08:00
parent 8e923dc6e9
commit c200b674f4
11 changed files with 100587 additions and 16 deletions

View file

@ -20,6 +20,8 @@
#include "expose.h"
#include "model_adapter.cpp"
std::string executable_path = "";
extern "C"
{
@ -46,6 +48,7 @@ extern "C"
deviceenv = "KCPP_CLBLAST_DEVICES="+std::to_string(devices);
putenv((char*)platformenv.c_str());
putenv((char*)deviceenv.c_str());
executable_path = inputs.executable_path;
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3)
{

View file

@ -7,6 +7,7 @@ struct load_model_inputs
const int max_context_length;
const int batch_size;
const bool f16_kv;
const char *executable_path;
const char *model_filename;
const int n_parts_overwrite = -1;
const bool use_mmap;
@ -33,3 +34,4 @@ struct generation_outputs
char text[16384]; //16kb should be enough for any response
};
extern std::string executable_path;

View file

@ -77,14 +77,16 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
n_batch = 1;
std::string word;
for (int i = 0; i < 20; i++) {
read_rwkv_vocab();
int vocabsiz = rwkv_vocab.size();
for (int i = 0; i < vocabsiz; i++) {
uint32_t len;
word = ('a'+i);
word = rwkv_vocab[i];
vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word;
}
printf("\nRWKV Vocab: %u\n",vocabsiz);
int vocabsiz = vocab.token_to_id.size();
bool testeval = rwkv_eval(rwkv_context_v1, 0, rwkv_context_v1->state_in, rwkv_context_v1->state_out, rwkv_context_v1->logits_out);
if(!testeval)
{
@ -230,6 +232,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
// tokenize the prompt
std::vector<gpt_vocab::id> embd_inp = ::gpt_tokenize(vocab, params.prompt);
print_tok_vec(embd_inp);
//truncate to front of the prompt if its too long
int32_t nctx = params.n_ctx;
@ -330,7 +333,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
if(file_format==FileFormat::RWKV_1)
{
printf("\nsiz:%d val:%d\n",embd.size(),embd[0]);
evalres = rwkv_eval(rwkv_context_v1, embd[0], rwkv_context_v1->state_in, rwkv_context_v1->state_out, rwkv_context_v1->logits_out);
memcpy(logits.data(), rwkv_context_v1->logits_out, sizeof(float)*rwkv_vocab.size());
}
else if(file_format==FileFormat::GPT2_1)
{

File diff suppressed because one or more lines are too long

View file

@ -15,6 +15,7 @@ class load_model_inputs(ctypes.Structure):
("max_context_length", ctypes.c_int),
("batch_size", ctypes.c_int),
("f16_kv", ctypes.c_bool),
("executable_path", ctypes.c_char_p),
("model_filename", ctypes.c_char_p),
("n_parts_overwrite", ctypes.c_int),
("use_mmap", ctypes.c_bool),
@ -85,6 +86,7 @@ def load_model(model_filename,batch_size=8,max_context_length=512,n_parts_overwr
if args.useclblast:
clblastids = 100 + int(args.useclblast[0])*10 + int(args.useclblast[1])
inputs.clblast_info = clblastids
inputs.executable_path = (os.path.dirname(os.path.realpath(__file__))+"/").encode("UTF-8")
ret = handle.load_model(inputs)
return ret
@ -437,7 +439,12 @@ def main(args):
else:
epurl = f"http://{args.host}:{args.port}" + ("?streaming=1" if args.stream else "")
if args.launch:
try:
import webbrowser as wb
wb.open(epurl)
except:
print("--launch was set, but could not launch web browser automatically.")
print(f"Please connect to custom endpoint at {epurl}")
RunServerMultiThreaded(args.host, args.port, embedded_kailite)
@ -451,6 +458,7 @@ if __name__ == '__main__':
portgroup.add_argument("--port", help="Port to listen on", default=defaultport, type=int, action='store')
portgroup.add_argument("port_param", help="Port to listen on (positional)", default=defaultport, nargs="?", type=int, action='store')
parser.add_argument("--host", help="Host IP to listen on. If empty, all routable interfaces are accepted.", default="")
parser.add_argument("--launch", help="Launches a web browser when load is completed.", action='store_true')
#os.environ["OMP_NUM_THREADS"] = '12'
# psutil.cpu_count(logical=False)

View file

@ -1 +1 @@
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe"

View file

@ -14,9 +14,10 @@
#include <iostream>
#include <unordered_map>
#include "model_adapter.h"
#include "rwkv_vocab.cpp"
// --- Utilities ---
#define FP32_SIZE 4

30
otherarch/rwkv_vocab.cpp Normal file
View file

@ -0,0 +1,30 @@
#include <vector>
#include <string>
#include <fstream>
#include <iostream>
#include "expose.h"
std::vector<std::string> rwkv_vocab;
void read_rwkv_vocab()
{
std::string line;
auto filepath = executable_path+ "rwkv_vocab.embd";
printf("Reading vocab from %s",filepath.c_str());
std::ifstream myfile(filepath);
if (myfile.is_open())
{
while (myfile.good())
{
getline(myfile, line);
rwkv_vocab.push_back(line);
}
myfile.close();
}
else
{
std::cout << "Unable to open RWKV vocab file";
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,13 @@
import json
with open("rwkv_orig_vocab.json", "r", encoding="utf-8") as f:
encoder = json.load(f)
s = ""
with open("rwkv_vocab.embd", "w", encoding="utf-8") as f2:
for key in encoder:
# key = key.replace("\\","\\\\")
# key = key.replace("\"","\\\"")
# s += "\""+key+"\",\n"
s += key +"\n"
f2.write(s)
print("OK")

50254
rwkv_vocab.embd Normal file

File diff suppressed because it is too large Load diff