updated kobold lite, work on rwkv, added exe path to model load params, added launch parameter
This commit is contained in:
parent
8e923dc6e9
commit
c200b674f4
11 changed files with 100587 additions and 16 deletions
|
@ -20,6 +20,8 @@
|
||||||
#include "expose.h"
|
#include "expose.h"
|
||||||
#include "model_adapter.cpp"
|
#include "model_adapter.cpp"
|
||||||
|
|
||||||
|
std::string executable_path = "";
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -46,6 +48,7 @@ extern "C"
|
||||||
deviceenv = "KCPP_CLBLAST_DEVICES="+std::to_string(devices);
|
deviceenv = "KCPP_CLBLAST_DEVICES="+std::to_string(devices);
|
||||||
putenv((char*)platformenv.c_str());
|
putenv((char*)platformenv.c_str());
|
||||||
putenv((char*)deviceenv.c_str());
|
putenv((char*)deviceenv.c_str());
|
||||||
|
executable_path = inputs.executable_path;
|
||||||
|
|
||||||
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3)
|
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3)
|
||||||
{
|
{
|
||||||
|
|
2
expose.h
2
expose.h
|
@ -7,6 +7,7 @@ struct load_model_inputs
|
||||||
const int max_context_length;
|
const int max_context_length;
|
||||||
const int batch_size;
|
const int batch_size;
|
||||||
const bool f16_kv;
|
const bool f16_kv;
|
||||||
|
const char *executable_path;
|
||||||
const char *model_filename;
|
const char *model_filename;
|
||||||
const int n_parts_overwrite = -1;
|
const int n_parts_overwrite = -1;
|
||||||
const bool use_mmap;
|
const bool use_mmap;
|
||||||
|
@ -33,3 +34,4 @@ struct generation_outputs
|
||||||
char text[16384]; //16kb should be enough for any response
|
char text[16384]; //16kb should be enough for any response
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern std::string executable_path;
|
|
@ -77,14 +77,16 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
n_batch = 1;
|
n_batch = 1;
|
||||||
|
|
||||||
std::string word;
|
std::string word;
|
||||||
for (int i = 0; i < 20; i++) {
|
read_rwkv_vocab();
|
||||||
|
int vocabsiz = rwkv_vocab.size();
|
||||||
|
for (int i = 0; i < vocabsiz; i++) {
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
word = ('a'+i);
|
word = rwkv_vocab[i];
|
||||||
vocab.token_to_id[word] = i;
|
vocab.token_to_id[word] = i;
|
||||||
vocab.id_to_token[i] = word;
|
vocab.id_to_token[i] = word;
|
||||||
}
|
}
|
||||||
|
printf("\nRWKV Vocab: %u\n",vocabsiz);
|
||||||
|
|
||||||
int vocabsiz = vocab.token_to_id.size();
|
|
||||||
bool testeval = rwkv_eval(rwkv_context_v1, 0, rwkv_context_v1->state_in, rwkv_context_v1->state_out, rwkv_context_v1->logits_out);
|
bool testeval = rwkv_eval(rwkv_context_v1, 0, rwkv_context_v1->state_in, rwkv_context_v1->state_out, rwkv_context_v1->logits_out);
|
||||||
if(!testeval)
|
if(!testeval)
|
||||||
{
|
{
|
||||||
|
@ -230,6 +232,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
|
|
||||||
// tokenize the prompt
|
// tokenize the prompt
|
||||||
std::vector<gpt_vocab::id> embd_inp = ::gpt_tokenize(vocab, params.prompt);
|
std::vector<gpt_vocab::id> embd_inp = ::gpt_tokenize(vocab, params.prompt);
|
||||||
|
print_tok_vec(embd_inp);
|
||||||
|
|
||||||
//truncate to front of the prompt if its too long
|
//truncate to front of the prompt if its too long
|
||||||
int32_t nctx = params.n_ctx;
|
int32_t nctx = params.n_ctx;
|
||||||
|
@ -330,7 +333,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
|
|
||||||
if(file_format==FileFormat::RWKV_1)
|
if(file_format==FileFormat::RWKV_1)
|
||||||
{
|
{
|
||||||
|
printf("\nsiz:%d val:%d\n",embd.size(),embd[0]);
|
||||||
evalres = rwkv_eval(rwkv_context_v1, embd[0], rwkv_context_v1->state_in, rwkv_context_v1->state_out, rwkv_context_v1->logits_out);
|
evalres = rwkv_eval(rwkv_context_v1, embd[0], rwkv_context_v1->state_in, rwkv_context_v1->state_out, rwkv_context_v1->logits_out);
|
||||||
|
memcpy(logits.data(), rwkv_context_v1->logits_out, sizeof(float)*rwkv_vocab.size());
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::GPT2_1)
|
else if(file_format==FileFormat::GPT2_1)
|
||||||
{
|
{
|
||||||
|
|
17
klite.embd
17
klite.embd
File diff suppressed because one or more lines are too long
12
koboldcpp.py
12
koboldcpp.py
|
@ -15,6 +15,7 @@ class load_model_inputs(ctypes.Structure):
|
||||||
("max_context_length", ctypes.c_int),
|
("max_context_length", ctypes.c_int),
|
||||||
("batch_size", ctypes.c_int),
|
("batch_size", ctypes.c_int),
|
||||||
("f16_kv", ctypes.c_bool),
|
("f16_kv", ctypes.c_bool),
|
||||||
|
("executable_path", ctypes.c_char_p),
|
||||||
("model_filename", ctypes.c_char_p),
|
("model_filename", ctypes.c_char_p),
|
||||||
("n_parts_overwrite", ctypes.c_int),
|
("n_parts_overwrite", ctypes.c_int),
|
||||||
("use_mmap", ctypes.c_bool),
|
("use_mmap", ctypes.c_bool),
|
||||||
|
@ -77,7 +78,7 @@ def load_model(model_filename,batch_size=8,max_context_length=512,n_parts_overwr
|
||||||
inputs.max_context_length = max_context_length #initial value to use for ctx, can be overwritten
|
inputs.max_context_length = max_context_length #initial value to use for ctx, can be overwritten
|
||||||
inputs.threads = threads
|
inputs.threads = threads
|
||||||
inputs.n_parts_overwrite = n_parts_overwrite
|
inputs.n_parts_overwrite = n_parts_overwrite
|
||||||
inputs.f16_kv = True
|
inputs.f16_kv = True
|
||||||
inputs.use_mmap = use_mmap
|
inputs.use_mmap = use_mmap
|
||||||
inputs.use_smartcontext = use_smartcontext
|
inputs.use_smartcontext = use_smartcontext
|
||||||
inputs.blasbatchsize = blasbatchsize
|
inputs.blasbatchsize = blasbatchsize
|
||||||
|
@ -85,6 +86,7 @@ def load_model(model_filename,batch_size=8,max_context_length=512,n_parts_overwr
|
||||||
if args.useclblast:
|
if args.useclblast:
|
||||||
clblastids = 100 + int(args.useclblast[0])*10 + int(args.useclblast[1])
|
clblastids = 100 + int(args.useclblast[0])*10 + int(args.useclblast[1])
|
||||||
inputs.clblast_info = clblastids
|
inputs.clblast_info = clblastids
|
||||||
|
inputs.executable_path = (os.path.dirname(os.path.realpath(__file__))+"/").encode("UTF-8")
|
||||||
ret = handle.load_model(inputs)
|
ret = handle.load_model(inputs)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
@ -437,7 +439,12 @@ def main(args):
|
||||||
else:
|
else:
|
||||||
epurl = f"http://{args.host}:{args.port}" + ("?streaming=1" if args.stream else "")
|
epurl = f"http://{args.host}:{args.port}" + ("?streaming=1" if args.stream else "")
|
||||||
|
|
||||||
|
if args.launch:
|
||||||
|
try:
|
||||||
|
import webbrowser as wb
|
||||||
|
wb.open(epurl)
|
||||||
|
except:
|
||||||
|
print("--launch was set, but could not launch web browser automatically.")
|
||||||
print(f"Please connect to custom endpoint at {epurl}")
|
print(f"Please connect to custom endpoint at {epurl}")
|
||||||
RunServerMultiThreaded(args.host, args.port, embedded_kailite)
|
RunServerMultiThreaded(args.host, args.port, embedded_kailite)
|
||||||
|
|
||||||
|
@ -451,6 +458,7 @@ if __name__ == '__main__':
|
||||||
portgroup.add_argument("--port", help="Port to listen on", default=defaultport, type=int, action='store')
|
portgroup.add_argument("--port", help="Port to listen on", default=defaultport, type=int, action='store')
|
||||||
portgroup.add_argument("port_param", help="Port to listen on (positional)", default=defaultport, nargs="?", type=int, action='store')
|
portgroup.add_argument("port_param", help="Port to listen on (positional)", default=defaultport, nargs="?", type=int, action='store')
|
||||||
parser.add_argument("--host", help="Host IP to listen on. If empty, all routable interfaces are accepted.", default="")
|
parser.add_argument("--host", help="Host IP to listen on. If empty, all routable interfaces are accepted.", default="")
|
||||||
|
parser.add_argument("--launch", help="Launches a web browser when load is completed.", action='store_true')
|
||||||
|
|
||||||
#os.environ["OMP_NUM_THREADS"] = '12'
|
#os.environ["OMP_NUM_THREADS"] = '12'
|
||||||
# psutil.cpu_count(logical=False)
|
# psutil.cpu_count(logical=False)
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
|
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe"
|
|
@ -14,9 +14,10 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
|
|
||||||
#include "model_adapter.h"
|
#include "model_adapter.h"
|
||||||
|
|
||||||
|
#include "rwkv_vocab.cpp"
|
||||||
|
|
||||||
// --- Utilities ---
|
// --- Utilities ---
|
||||||
|
|
||||||
#define FP32_SIZE 4
|
#define FP32_SIZE 4
|
||||||
|
|
30
otherarch/rwkv_vocab.cpp
Normal file
30
otherarch/rwkv_vocab.cpp
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "expose.h"
|
||||||
|
|
||||||
|
std::vector<std::string> rwkv_vocab;
|
||||||
|
|
||||||
|
void read_rwkv_vocab()
|
||||||
|
{
|
||||||
|
std::string line;
|
||||||
|
auto filepath = executable_path+ "rwkv_vocab.embd";
|
||||||
|
printf("Reading vocab from %s",filepath.c_str());
|
||||||
|
std::ifstream myfile(filepath);
|
||||||
|
if (myfile.is_open())
|
||||||
|
{
|
||||||
|
while (myfile.good())
|
||||||
|
{
|
||||||
|
getline(myfile, line);
|
||||||
|
rwkv_vocab.push_back(line);
|
||||||
|
}
|
||||||
|
myfile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "Unable to open RWKV vocab file";
|
||||||
|
}
|
||||||
|
}
|
50256
otherarch/tools/rwkv_orig_vocab.json
Normal file
50256
otherarch/tools/rwkv_orig_vocab.json
Normal file
File diff suppressed because it is too large
Load diff
13
otherarch/tools/rwkv_prepare_vocab.py
Normal file
13
otherarch/tools/rwkv_prepare_vocab.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
import json
|
||||||
|
with open("rwkv_orig_vocab.json", "r", encoding="utf-8") as f:
|
||||||
|
encoder = json.load(f)
|
||||||
|
s = ""
|
||||||
|
with open("rwkv_vocab.embd", "w", encoding="utf-8") as f2:
|
||||||
|
for key in encoder:
|
||||||
|
# key = key.replace("\\","\\\\")
|
||||||
|
# key = key.replace("\"","\\\"")
|
||||||
|
# s += "\""+key+"\",\n"
|
||||||
|
s += key +"\n"
|
||||||
|
f2.write(s)
|
||||||
|
|
||||||
|
print("OK")
|
50254
rwkv_vocab.embd
Normal file
50254
rwkv_vocab.embd
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue