Windows: convert prompt in system locale to UTF-8. Allows to use others languages without tambourine dancing...
This commit is contained in:
parent
9cbc404ba6
commit
dc5adf173a
1 changed files with 21 additions and 0 deletions
|
@ -18,6 +18,10 @@
|
|||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
#if defined (_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
static console_state con_st;
|
||||
|
||||
static bool is_interacting = false;
|
||||
|
@ -36,6 +40,18 @@ void sigint_handler(int signo) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#if defined (_WIN32)
|
||||
std::string promptconvert(const std::string str)
|
||||
{
|
||||
// Convert from current locale to UTF-8
|
||||
wchar_t wstr[1024];
|
||||
int wlen = MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.length(), wstr, 1024);
|
||||
char mbstr[2048];
|
||||
int mblen = WideCharToMultiByte(CP_UTF8, 0, wstr, wlen, mbstr, 2048, 0, 0);
|
||||
return std::string(mbstr, mblen);
|
||||
}
|
||||
#endif
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
params.model = "models/llama-7B/ggml-model.bin";
|
||||
|
@ -136,6 +152,11 @@ int main(int argc, char ** argv) {
|
|||
// Add a space in front of the first character to match OG llama tokenizer behavior
|
||||
params.prompt.insert(0, 1, ' ');
|
||||
|
||||
#if defined (_WIN32)
|
||||
// Convert from current locale to UTF-8
|
||||
params.prompt = promptconvert(params.prompt);
|
||||
#endif
|
||||
|
||||
// tokenize the prompt
|
||||
auto embd_inp = ::llama_tokenize(ctx, params.prompt, true);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue