Merge branch 'master' into HEAD

This commit is contained in:
Georgi Gerganov 2023-10-12 14:35:47 +03:00
commit 04ac0558de
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
8 changed files with 2430 additions and 2075 deletions

View file

@ -422,8 +422,7 @@ endif()
if (LLAMA_ALL_WARNINGS) if (LLAMA_ALL_WARNINGS)
if (NOT MSVC) if (NOT MSVC)
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
-Werror=implicit-function-declaration)
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn) set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
set(host_cxx_flags "") set(host_cxx_flags "")
@ -455,7 +454,8 @@ if (LLAMA_ALL_WARNINGS)
set(c_flags ${c_flags} ${warning_flags}) set(c_flags ${c_flags} ${warning_flags})
set(cxx_flags ${cxx_flags} ${warning_flags}) set(cxx_flags ${cxx_flags} ${warning_flags})
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>" add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags} ${host_cxx_flags}>") "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
endif() endif()

View file

@ -279,7 +279,7 @@ In order to build llama.cpp you have three different options.
On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU. On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU.
To disable the Metal build at compile time use the `LLAMA_NO_METAL=1` flag or the `LLAMA_METAL=OFF` cmake option. To disable the Metal build at compile time use the `LLAMA_NO_METAL=1` flag or the `LLAMA_METAL=OFF` cmake option.
When built with Metal support, you can explicitly disable GPU inference with the `--gpu-layers|-ngl 0` command-line When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers|-ngl 0` command-line
argument. argument.
### MPI Build ### MPI Build

View file

@ -496,11 +496,13 @@ test $ret -eq 0 && gg_run ctest_debug
test $ret -eq 0 && gg_run ctest_release test $ret -eq 0 && gg_run ctest_release
if [ -z ${GG_BUILD_LOW_PERF} ]; then if [ -z ${GG_BUILD_LOW_PERF} ]; then
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
if [ -z ${GG_BUILD_CUDA} ]; then if [ -z ${GG_BUILD_CUDA} ]; then
test $ret -eq 0 && gg_run open_llama_3b_v2 test $ret -eq 0 && gg_run open_llama_3b_v2
else else
test $ret -eq 0 && gg_run open_llama_7b_v2 test $ret -eq 0 && gg_run open_llama_7b_v2
fi fi
fi
fi fi
exit $ret exit $ret

View file

@ -297,6 +297,9 @@ int main(int argc, char ** argv) {
LOG_TEE("%s: session file matches %zu / %zu tokens of prompt\n", LOG_TEE("%s: session file matches %zu / %zu tokens of prompt\n",
__func__, n_matching_session_tokens, embd_inp.size()); __func__, n_matching_session_tokens, embd_inp.size());
} }
// remove any "future" tokens that we might have inherited from the previous session
llama_kv_cache_tokens_rm(ctx, n_matching_session_tokens, -1);
} }
LOGLN( LOGLN(
@ -545,9 +548,6 @@ int main(int argc, char ** argv) {
if (i > 0) { if (i > 0) {
embd.erase(embd.begin(), embd.begin() + i); embd.erase(embd.begin(), embd.begin() + i);
} }
// remove any "future" tokens that we might have inherited from the session from the KV cache
llama_kv_cache_tokens_rm(ctx, n_past, -1);
} }
// evaluate tokens in batches // evaluate tokens in batches

File diff suppressed because it is too large Load diff

View file

@ -136,6 +136,11 @@
display: block; display: block;
} }
fieldset label.slim {
margin: 0 0.5em;
display: inline;
}
header, footer { header, footer {
text-align: center; text-align: center;
} }
@ -145,6 +150,14 @@
color: #888; color: #888;
} }
.mode-chat textarea[name=prompt] {
height: 4.5em;
}
.mode-completion textarea[name=prompt] {
height: 10em;
}
@keyframes loading-bg-wipe { @keyframes loading-bg-wipe {
0% { 0% {
@ -187,7 +200,7 @@
template: "{{prompt}}\n\n{{history}}\n{{char}}:", template: "{{prompt}}\n\n{{history}}\n{{char}}:",
historyTemplate: "{{name}}: {{message}}", historyTemplate: "{{name}}: {{message}}",
transcript: [], transcript: [],
type: "chat", type: "chat", // "chat" | "completion"
char: "Llama", char: "Llama",
user: "User", user: "User",
}) })
@ -365,13 +378,44 @@
return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key])); return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]));
} }
async function runLlama(prompt, llamaParams, char) {
const currentMessages = [];
const history = session.value.transcript;
if (controller.value) {
throw new Error("already running");
}
controller.value = new AbortController();
for await (const chunk of llama(prompt, llamaParams, {controller: controller.value})) {
const data = chunk.data;
if (data.stop) {
while (
currentMessages.length > 0 &&
currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
) {
currentMessages.pop();
}
transcriptUpdate([...history, [char, currentMessages]])
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
} else {
currentMessages.push(data);
transcriptUpdate([...history, [char, currentMessages]])
}
if (data.timings) {
llamaStats.value = data.timings;
}
}
controller.value = null;
}
// send message to server // send message to server
const chat = async (msg) => { const chat = async (msg) => {
if (controller.value) { if (controller.value) {
console.log('already running...'); console.log('already running...');
return; return;
} }
controller.value = new AbortController();
transcriptUpdate([...session.value.transcript, ["{{user}}", msg]]) transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
@ -391,42 +435,25 @@
).join("\n"), ).join("\n"),
}); });
const currentMessages = []; await runLlama(prompt, {
const history = session.value.transcript
const llamaParams = {
...params.value, ...params.value,
stop: ["</s>", template("{{char}}:"), template("{{user}}:")], stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
}, "{{char}}");
} }
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) { const runCompletion = async () => {
const data = chunk.data; if (controller.value) {
console.log('already running...');
if (data.stop) { return;
while (
currentMessages.length > 0 &&
currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
) {
currentMessages.pop();
} }
transcriptUpdate([...history, ["{{char}}", currentMessages]]) const {prompt} = session.value;
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data); transcriptUpdate([...session.value.transcript, ["", prompt]]);
} else { await runLlama(prompt, {
currentMessages.push(data); ...params.value,
transcriptUpdate([...history, ["{{char}}", currentMessages]]) stop: [],
}, "");
} }
if (data.timings) {
llamaStats.value = data.timings;
}
}
controller.value = null;
}
function MessageInput() {
const message = useSignal("")
const stop = (e) => { const stop = (e) => {
e.preventDefault(); e.preventDefault();
if (controller.value) { if (controller.value) {
@ -440,6 +467,9 @@
transcriptUpdate([]); transcriptUpdate([]);
} }
function MessageInput() {
const message = useSignal("")
const submit = (e) => { const submit = (e) => {
stop(e); stop(e);
chat(message.value); chat(message.value);
@ -474,6 +504,19 @@
` `
} }
function CompletionControls() {
const submit = (e) => {
stop(e);
runCompletion();
}
return html`
<div>
<button onclick=${submit} type="button" disabled=${generating.value}>Start</button>
<button onclick=${stop} disabled=${!generating.value}>Stop</button>
<button onclick=${reset}>Reset</button>
</div>`;
}
const ChatLog = (props) => { const ChatLog = (props) => {
const messages = session.value.transcript; const messages = session.value.transcript;
const container = useRef(null) const container = useRef(null)
@ -497,7 +540,11 @@
data; data;
message = html`<${Markdownish} text=${template(text)} />` message = html`<${Markdownish} text=${template(text)} />`
} }
if(user) {
return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>` return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
} else {
return html`<p key=${index}>${message}</p>`
}
}; };
return html` return html`
@ -574,18 +621,31 @@
userTemplateAutosave() userTemplateAutosave()
}, [session.value, params.value]) }, [session.value, params.value])
return html` const GrammarControl = () => (
<form> html`
<fieldset> <div>
<${UserTemplateResetButton}/> <label for="template">Grammar</label>
</fieldset> <textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
<input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
<button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
</div>
`
);
const PromptControlFieldSet = () => (
html`
<fieldset> <fieldset>
<div> <div>
<label for="prompt">Prompt</label> <label htmlFor="prompt">Prompt</label>
<textarea type="text" name="prompt" value="${session.value.prompt}" rows=4 oninput=${updateSession}/> <textarea type="text" name="prompt" value="${session.value.prompt}" oninput=${updateSession}/>
</div> </div>
</fieldset> </fieldset>
`
);
const ChatConfigForm = () => (
html`
${PromptControlFieldSet()}
<fieldset class="two"> <fieldset class="two">
<div> <div>
@ -609,15 +669,30 @@
<label for="template">Chat history template</label> <label for="template">Chat history template</label>
<textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/> <textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
</div> </div>
${GrammarControl()}
</fieldset>
`
);
const CompletionConfigForm = () => (
html`
${PromptControlFieldSet()}
<fieldset>${GrammarControl()}</fieldset>
`
);
return html`
<form>
<fieldset class="two">
<${UserTemplateResetButton}/>
<div> <div>
<label for="template">Grammar</label> <label class="slim"><input type="radio" name="type" value="chat" checked=${session.value.type === "chat"} oninput=${updateSession} /> Chat</label>
<textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/> <label class="slim"><input type="radio" name="type" value="completion" checked=${session.value.type === "completion"} oninput=${updateSession} /> Completion</label>
<input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
<button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
</div> </div>
</fieldset> </fieldset>
${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
<fieldset class="two"> <fieldset class="two">
${IntField({label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict})} ${IntField({label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict})}
${FloatField({label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature})} ${FloatField({label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature})}
@ -851,7 +926,7 @@
function App(props) { function App(props) {
return html` return html`
<div> <div class="mode-${session.value.type}">
<header> <header>
<h1>llama.cpp</h1> <h1>llama.cpp</h1>
</header> </header>
@ -861,7 +936,7 @@
</main> </main>
<section id="write"> <section id="write">
<${MessageInput} /> <${session.value.type === 'chat' ? MessageInput : CompletionControls} />
</section> </section>
<footer> <footer>

View file

@ -405,6 +405,7 @@ struct llama_server_context
// compare the evaluated prompt with the new prompt // compare the evaluated prompt with the new prompt
n_past = common_part(embd, prompt_tokens); n_past = common_part(embd, prompt_tokens);
embd = prompt_tokens; embd = prompt_tokens;
if (n_past == num_prompt_tokens) if (n_past == num_prompt_tokens)
{ {
// we have to evaluate at least 1 token to generate logits. // we have to evaluate at least 1 token to generate logits.
@ -412,6 +413,9 @@ struct llama_server_context
n_past--; n_past--;
} }
// since #3228 we now have to manually manage the KV cache
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
LOG_VERBOSE("prompt ingested", { LOG_VERBOSE("prompt ingested", {
{"n_past", n_past}, {"n_past", n_past},
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)}, {"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
@ -461,9 +465,6 @@ struct llama_server_context
// compare the evaluated prompt with the new prompt // compare the evaluated prompt with the new prompt
n_past = common_part(embd, prompt_tokens); n_past = common_part(embd, prompt_tokens);
// since #3228 we now have to manually manage the KV cache
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
embd = prompt_tokens; embd = prompt_tokens;
if (n_past == num_prompt_tokens) if (n_past == num_prompt_tokens)
{ {
@ -471,6 +472,9 @@ struct llama_server_context
n_past--; n_past--;
} }
// since #3228 we now have to manually manage the KV cache
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
LOG_VERBOSE("prompt ingested", { LOG_VERBOSE("prompt ingested", {
{"n_past", n_past}, {"n_past", n_past},
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)}, {"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},

93
prompts/mnemonics.txt Normal file
View file

@ -0,0 +1,93 @@
For each kanji character, write a Markdownformatted mnemonic that uses its keyword and the keyword of all its components.
Kanji: 欠 (lack of)
Components: 𠂊 (hook claw), 人 (person)
Mnemonic: This **person** is a pirate. He lost his hand to a crocodile many years ago. Nowadays, the ***lack of*** a hand does not bother him too much. In fact, the **hook claw** that replaces it is the mark of a true pirate, so he is quite proud of it!
Kanji: 類 (kind (of something))
Components: 米 (rice), 大 (large), 頁 (page)
Mnemonic: The waiter at a Chinese restaurant hands you a **large** menu. Each **page** has all ***kinds*** of **rice** on offer!
Kanji: 燃 (burn)
Components: 火 (fire), 然 (sort of thing)
Mnemonic: ***Burning*** things up with **fire** is just my **sort of thing**. (Spoken like a true pyromaniac.)
Kanji: 頂 (top of)
Components: 丁 (street), 頁 (page)
Mnemonic: To be at the ***top of*** your game, you need both practical knowledge (**street** smarts) and theoretical knowledge (having read many **pages**).
Kanji: 険 (risky and steep)
Components: 阝 (small village), 㑒 (consensus)
Mnemonic: Everyone agrees (there is **consensus**) that the path to the **small village** is ***risky and steep***.
Kanji: 困 (distressed)
Components: 囗 (closed box), 木 (tree)
Mnemonic: You would feel ***distressed*** too if you were a **tree** trapped in a **closed box**! I have no place to grow!
Kanji: 頭 (head)
Components: 豆 (bean), 頁 (page)
Mnemonic: What do you have in that ***head*** of yours? A **bean** for a brain? Go read more **pages** and become more knowledgeable about the world!
Kanji: 確 (certain)
Components: 石 (stone), 冖 (roof without a chimney), 隹 (old bird)
Mnemonic: An **old bird** has made a nest on your **roof**. What do you do? You call Misaka from a <cite>A ***Certain*** Scientific Railgun</cite> to get rid of it, of course! But she doesnt really want to vaporize the poor thing, so she just throws a **stone** to scare it away. (What was the point of calling her, then‽)
Kanji: 魚 (fish)
Components: 𠂊 (hook claw), 田 (rice field), 灬 (fire sparks)
Mnemonic: Catch ***fish*** with a **hook**, collect rice from the **rice field**, cook them with **fire**… And my meal is ready!
Kanji: 警 (to police (something))
Components: 敬 (respect), 言 (say)
Mnemonic: ***To police something*** is to make people **respect** what the law **says**.
Kanji: 筆 (writing brush)
Components: 竹 (bamboo), 聿 (brush)
Mnemonic: A traditional ***writing brush*** is a **brush** made of **bamboo**.
Kanji: 獄 (prison)
Components: 犭 (animal), 言 (say), 犬 (dog)
Mnemonic: In ***prison***, like in the **animal** kingdom, only the toughest survive. You have to watch what you **say**. Its a **dog**eatdog world.
Kanji: 新 (new)
Components: 立 (standing up), 木 (tree), 斤 (axe)
Mnemonic: In order for a ***new*** construction to be made, an empty lot is needed. If there are any **trees** **standing up**, they must be cut down with an **axe**.
Kanji: 怪 (suspicious)
Components: 忄 (weak heart), 圣 (sacred)
Mnemonic: That painting of the **Sacred** **Heart** of Jesus looks ***suspicious***. I think it might be a forgery.
Kanji: 温 (warm (to the touch))
Components: 氵 (water drops), 日 (sun), 皿 (dish)
Mnemonic: If you leave **water** on a **dish** in the **sun**, it will get ***warm***.
Kanji: 階 (floor (of a building))
Components: 阝 (small village), 皆 (all)
Mnemonic: It might be a **small village**, but, despite that, **all** of its buildings have many ***floors***. Its a village of skyscrapers!
Kanji: 多 (many)
Components: 夕 (evening (before sunset)), 夕 (evening (before sunset))
Mnemonic: Two **evenings** in a day would be one too ***many***.
Kanji: 別 (separate)
Components: 口 (mouth), 万 (ten thousand), 刂 (knife)
Mnemonic: Tom Six is at it again. For his next flick, he wants to stitch together **ten thousand** people, **mouth**toanus. One of the most graphic and disturbing scenes will feature one of the victims using a **knife** to ***separate*** perself.
Kanji: 並 (line up)
Components: 䒑 (antlers on a wall), 业 (runway)
Mnemonic: In order to land a plane you have to ***line up*** properly with the **runway**. The things that look like **antlers** at the end of the runway are the control towers; you should follow their instructions.
Kanji: 姿 (figure)
Components: 次 (next), 女 (woman)
Mnemonic: The **next** **woman** that I date will have a perfect **figure**. Because Im done with 3D women—it will *literally* be an anime figure!
Kanji: 実 (real)
Components: 宀 (roof with a chimney), 𡗗 (three people)
Mnemonic: Living under a **roof with a chimney** with **three people** (a wife and two children)—a happy family life—is not something I could have ever imagined. It does not feel ***real***.
Kanji: 謝 (apologize)
Components: 言 (say), 射 (shoot)
Mnemonic: **Shot** first, ***apologize*** (**say** you are sorry) later.
Kanji: 提 (propose)
Components: 扌 (left hand), 是 (go with)
Mnemonic: