Merge branch 'master' into llava
This commit is contained in:
commit
346e3c1605
8 changed files with 2430 additions and 2075 deletions
|
@ -422,8 +422,7 @@ endif()
|
|||
if (LLAMA_ALL_WARNINGS)
|
||||
if (NOT MSVC)
|
||||
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
|
||||
-Werror=implicit-function-declaration)
|
||||
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
|
||||
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
|
||||
set(host_cxx_flags "")
|
||||
|
||||
|
@ -455,7 +454,8 @@ if (LLAMA_ALL_WARNINGS)
|
|||
set(c_flags ${c_flags} ${warning_flags})
|
||||
set(cxx_flags ${cxx_flags} ${warning_flags})
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags} ${host_cxx_flags}>")
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
|
||||
|
||||
endif()
|
||||
|
||||
|
|
|
@ -279,7 +279,7 @@ In order to build llama.cpp you have three different options.
|
|||
On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU.
|
||||
To disable the Metal build at compile time use the `LLAMA_NO_METAL=1` flag or the `LLAMA_METAL=OFF` cmake option.
|
||||
|
||||
When built with Metal support, you can explicitly disable GPU inference with the `--gpu-layers|-ngl 0` command-line
|
||||
When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers|-ngl 0` command-line
|
||||
argument.
|
||||
|
||||
### MPI Build
|
||||
|
|
10
ci/run.sh
10
ci/run.sh
|
@ -496,10 +496,12 @@ test $ret -eq 0 && gg_run ctest_debug
|
|||
test $ret -eq 0 && gg_run ctest_release
|
||||
|
||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||
if [ -z ${GG_BUILD_CUDA} ]; then
|
||||
test $ret -eq 0 && gg_run open_llama_3b_v2
|
||||
else
|
||||
test $ret -eq 0 && gg_run open_llama_7b_v2
|
||||
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
||||
if [ -z ${GG_BUILD_CUDA} ]; then
|
||||
test $ret -eq 0 && gg_run open_llama_3b_v2
|
||||
else
|
||||
test $ret -eq 0 && gg_run open_llama_7b_v2
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
|
@ -297,6 +297,9 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("%s: session file matches %zu / %zu tokens of prompt\n",
|
||||
__func__, n_matching_session_tokens, embd_inp.size());
|
||||
}
|
||||
|
||||
// remove any "future" tokens that we might have inherited from the previous session
|
||||
llama_kv_cache_tokens_rm(ctx, n_matching_session_tokens, -1);
|
||||
}
|
||||
|
||||
LOGLN(
|
||||
|
@ -545,9 +548,6 @@ int main(int argc, char ** argv) {
|
|||
if (i > 0) {
|
||||
embd.erase(embd.begin(), embd.begin() + i);
|
||||
}
|
||||
|
||||
// remove any "future" tokens that we might have inherited from the session from the KV cache
|
||||
llama_kv_cache_tokens_rm(ctx, n_past, -1);
|
||||
}
|
||||
|
||||
// evaluate tokens in batches
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -136,6 +136,11 @@
|
|||
display: block;
|
||||
}
|
||||
|
||||
fieldset label.slim {
|
||||
margin: 0 0.5em;
|
||||
display: inline;
|
||||
}
|
||||
|
||||
header, footer {
|
||||
text-align: center;
|
||||
}
|
||||
|
@ -145,6 +150,14 @@
|
|||
color: #888;
|
||||
}
|
||||
|
||||
.mode-chat textarea[name=prompt] {
|
||||
height: 4.5em;
|
||||
}
|
||||
|
||||
.mode-completion textarea[name=prompt] {
|
||||
height: 10em;
|
||||
}
|
||||
|
||||
|
||||
@keyframes loading-bg-wipe {
|
||||
0% {
|
||||
|
@ -187,7 +200,7 @@
|
|||
template: "{{prompt}}\n\n{{history}}\n{{char}}:",
|
||||
historyTemplate: "{{name}}: {{message}}",
|
||||
transcript: [],
|
||||
type: "chat",
|
||||
type: "chat", // "chat" | "completion"
|
||||
char: "Llama",
|
||||
user: "User",
|
||||
})
|
||||
|
@ -365,13 +378,44 @@
|
|||
return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]));
|
||||
}
|
||||
|
||||
async function runLlama(prompt, llamaParams, char) {
|
||||
const currentMessages = [];
|
||||
const history = session.value.transcript;
|
||||
if (controller.value) {
|
||||
throw new Error("already running");
|
||||
}
|
||||
controller.value = new AbortController();
|
||||
for await (const chunk of llama(prompt, llamaParams, {controller: controller.value})) {
|
||||
const data = chunk.data;
|
||||
|
||||
if (data.stop) {
|
||||
while (
|
||||
currentMessages.length > 0 &&
|
||||
currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
|
||||
) {
|
||||
currentMessages.pop();
|
||||
}
|
||||
transcriptUpdate([...history, [char, currentMessages]])
|
||||
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
|
||||
} else {
|
||||
currentMessages.push(data);
|
||||
transcriptUpdate([...history, [char, currentMessages]])
|
||||
}
|
||||
|
||||
if (data.timings) {
|
||||
llamaStats.value = data.timings;
|
||||
}
|
||||
}
|
||||
|
||||
controller.value = null;
|
||||
}
|
||||
|
||||
// send message to server
|
||||
const chat = async (msg) => {
|
||||
if (controller.value) {
|
||||
console.log('already running...');
|
||||
return;
|
||||
}
|
||||
controller.value = new AbortController();
|
||||
|
||||
transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
|
||||
|
||||
|
@ -391,55 +435,41 @@
|
|||
).join("\n"),
|
||||
});
|
||||
|
||||
const currentMessages = [];
|
||||
const history = session.value.transcript
|
||||
|
||||
const llamaParams = {
|
||||
await runLlama(prompt, {
|
||||
...params.value,
|
||||
stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
|
||||
}, "{{char}}");
|
||||
}
|
||||
|
||||
const runCompletion = async () => {
|
||||
if (controller.value) {
|
||||
console.log('already running...');
|
||||
return;
|
||||
}
|
||||
const {prompt} = session.value;
|
||||
transcriptUpdate([...session.value.transcript, ["", prompt]]);
|
||||
await runLlama(prompt, {
|
||||
...params.value,
|
||||
stop: [],
|
||||
}, "");
|
||||
}
|
||||
|
||||
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
|
||||
const data = chunk.data;
|
||||
|
||||
if (data.stop) {
|
||||
while (
|
||||
currentMessages.length > 0 &&
|
||||
currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
|
||||
) {
|
||||
currentMessages.pop();
|
||||
}
|
||||
transcriptUpdate([...history, ["{{char}}", currentMessages]])
|
||||
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
|
||||
} else {
|
||||
currentMessages.push(data);
|
||||
transcriptUpdate([...history, ["{{char}}", currentMessages]])
|
||||
}
|
||||
|
||||
if (data.timings) {
|
||||
llamaStats.value = data.timings;
|
||||
}
|
||||
const stop = (e) => {
|
||||
e.preventDefault();
|
||||
if (controller.value) {
|
||||
controller.value.abort();
|
||||
controller.value = null;
|
||||
}
|
||||
}
|
||||
|
||||
controller.value = null;
|
||||
const reset = (e) => {
|
||||
stop(e);
|
||||
transcriptUpdate([]);
|
||||
}
|
||||
|
||||
function MessageInput() {
|
||||
const message = useSignal("")
|
||||
|
||||
const stop = (e) => {
|
||||
e.preventDefault();
|
||||
if (controller.value) {
|
||||
controller.value.abort();
|
||||
controller.value = null;
|
||||
}
|
||||
}
|
||||
|
||||
const reset = (e) => {
|
||||
stop(e);
|
||||
transcriptUpdate([]);
|
||||
}
|
||||
|
||||
const submit = (e) => {
|
||||
stop(e);
|
||||
chat(message.value);
|
||||
|
@ -474,6 +504,19 @@
|
|||
`
|
||||
}
|
||||
|
||||
function CompletionControls() {
|
||||
const submit = (e) => {
|
||||
stop(e);
|
||||
runCompletion();
|
||||
}
|
||||
return html`
|
||||
<div>
|
||||
<button onclick=${submit} type="button" disabled=${generating.value}>Start</button>
|
||||
<button onclick=${stop} disabled=${!generating.value}>Stop</button>
|
||||
<button onclick=${reset}>Reset</button>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
const ChatLog = (props) => {
|
||||
const messages = session.value.transcript;
|
||||
const container = useRef(null)
|
||||
|
@ -497,7 +540,11 @@
|
|||
data;
|
||||
message = html`<${Markdownish} text=${template(text)} />`
|
||||
}
|
||||
return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
|
||||
if(user) {
|
||||
return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
|
||||
} else {
|
||||
return html`<p key=${index}>${message}</p>`
|
||||
}
|
||||
};
|
||||
|
||||
return html`
|
||||
|
@ -574,18 +621,31 @@
|
|||
userTemplateAutosave()
|
||||
}, [session.value, params.value])
|
||||
|
||||
return html`
|
||||
<form>
|
||||
<fieldset>
|
||||
<${UserTemplateResetButton}/>
|
||||
</fieldset>
|
||||
const GrammarControl = () => (
|
||||
html`
|
||||
<div>
|
||||
<label for="template">Grammar</label>
|
||||
<textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
|
||||
<input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
|
||||
<button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
|
||||
</div>
|
||||
`
|
||||
);
|
||||
|
||||
<fieldset>
|
||||
<div>
|
||||
<label for="prompt">Prompt</label>
|
||||
<textarea type="text" name="prompt" value="${session.value.prompt}" rows=4 oninput=${updateSession}/>
|
||||
</div>
|
||||
</fieldset>
|
||||
const PromptControlFieldSet = () => (
|
||||
html`
|
||||
<fieldset>
|
||||
<div>
|
||||
<label htmlFor="prompt">Prompt</label>
|
||||
<textarea type="text" name="prompt" value="${session.value.prompt}" oninput=${updateSession}/>
|
||||
</div>
|
||||
</fieldset>
|
||||
`
|
||||
);
|
||||
|
||||
const ChatConfigForm = () => (
|
||||
html`
|
||||
${PromptControlFieldSet()}
|
||||
|
||||
<fieldset class="two">
|
||||
<div>
|
||||
|
@ -609,15 +669,30 @@
|
|||
<label for="template">Chat history template</label>
|
||||
<textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
|
||||
</div>
|
||||
${GrammarControl()}
|
||||
</fieldset>
|
||||
`
|
||||
);
|
||||
|
||||
const CompletionConfigForm = () => (
|
||||
html`
|
||||
${PromptControlFieldSet()}
|
||||
<fieldset>${GrammarControl()}</fieldset>
|
||||
`
|
||||
);
|
||||
|
||||
return html`
|
||||
<form>
|
||||
<fieldset class="two">
|
||||
<${UserTemplateResetButton}/>
|
||||
<div>
|
||||
<label for="template">Grammar</label>
|
||||
<textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
|
||||
<input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
|
||||
<button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
|
||||
<label class="slim"><input type="radio" name="type" value="chat" checked=${session.value.type === "chat"} oninput=${updateSession} /> Chat</label>
|
||||
<label class="slim"><input type="radio" name="type" value="completion" checked=${session.value.type === "completion"} oninput=${updateSession} /> Completion</label>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
|
||||
|
||||
<fieldset class="two">
|
||||
${IntField({label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict})}
|
||||
${FloatField({label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature})}
|
||||
|
@ -851,7 +926,7 @@
|
|||
function App(props) {
|
||||
|
||||
return html`
|
||||
<div>
|
||||
<div class="mode-${session.value.type}">
|
||||
<header>
|
||||
<h1>llama.cpp</h1>
|
||||
</header>
|
||||
|
@ -861,7 +936,7 @@
|
|||
</main>
|
||||
|
||||
<section id="write">
|
||||
<${MessageInput} />
|
||||
<${session.value.type === 'chat' ? MessageInput : CompletionControls} />
|
||||
</section>
|
||||
|
||||
<footer>
|
||||
|
|
|
@ -405,6 +405,7 @@ struct llama_server_context
|
|||
// compare the evaluated prompt with the new prompt
|
||||
n_past = common_part(embd, prompt_tokens);
|
||||
embd = prompt_tokens;
|
||||
|
||||
if (n_past == num_prompt_tokens)
|
||||
{
|
||||
// we have to evaluate at least 1 token to generate logits.
|
||||
|
@ -412,6 +413,9 @@ struct llama_server_context
|
|||
n_past--;
|
||||
}
|
||||
|
||||
// since #3228 we now have to manually manage the KV cache
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
|
||||
LOG_VERBOSE("prompt ingested", {
|
||||
{"n_past", n_past},
|
||||
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
|
||||
|
@ -461,9 +465,6 @@ struct llama_server_context
|
|||
// compare the evaluated prompt with the new prompt
|
||||
n_past = common_part(embd, prompt_tokens);
|
||||
|
||||
// since #3228 we now have to manually manage the KV cache
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
|
||||
embd = prompt_tokens;
|
||||
if (n_past == num_prompt_tokens)
|
||||
{
|
||||
|
@ -471,6 +472,9 @@ struct llama_server_context
|
|||
n_past--;
|
||||
}
|
||||
|
||||
// since #3228 we now have to manually manage the KV cache
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
|
||||
LOG_VERBOSE("prompt ingested", {
|
||||
{"n_past", n_past},
|
||||
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
|
||||
|
|
93
prompts/mnemonics.txt
Normal file
93
prompts/mnemonics.txt
Normal file
|
@ -0,0 +1,93 @@
|
|||
For each kanji character, write a Markdown‐formatted mnemonic that uses its keyword and the keyword of all its components.
|
||||
|
||||
Kanji: 欠 (lack of)
|
||||
Components: 𠂊 (hook claw), 人 (person)
|
||||
Mnemonic: This **person** is a pirate. He lost his hand to a crocodile many years ago. Nowadays, the ***lack of*** a hand does not bother him too much. In fact, the **hook claw** that replaces it is the mark of a true pirate, so he is quite proud of it!
|
||||
|
||||
Kanji: 類 (kind (of something))
|
||||
Components: 米 (rice), 大 (large), 頁 (page)
|
||||
Mnemonic: The waiter at a Chinese restaurant hands you a **large** menu. Each **page** has all ***kinds*** of **rice** on offer!
|
||||
|
||||
Kanji: 燃 (burn)
|
||||
Components: 火 (fire), 然 (sort of thing)
|
||||
Mnemonic: ***Burning*** things up with **fire** is just my **sort of thing**. (Spoken like a true pyromaniac.)
|
||||
|
||||
Kanji: 頂 (top of)
|
||||
Components: 丁 (street), 頁 (page)
|
||||
Mnemonic: To be at the ***top of*** your game, you need both practical knowledge (**street** smarts) and theoretical knowledge (having read many **pages**).
|
||||
|
||||
Kanji: 険 (risky and steep)
|
||||
Components: 阝 (small village), 㑒 (consensus)
|
||||
Mnemonic: Everyone agrees (there is **consensus**) that the path to the **small village** is ***risky and steep***.
|
||||
|
||||
Kanji: 困 (distressed)
|
||||
Components: 囗 (closed box), 木 (tree)
|
||||
Mnemonic: You would feel ***distressed*** too if you were a **tree** trapped in a **closed box**! I have no place to grow!
|
||||
|
||||
Kanji: 頭 (head)
|
||||
Components: 豆 (bean), 頁 (page)
|
||||
Mnemonic: What do you have in that ***head*** of yours? A **bean** for a brain? Go read more **pages** and become more knowledgeable about the world!
|
||||
|
||||
Kanji: 確 (certain)
|
||||
Components: 石 (stone), 冖 (roof without a chimney), 隹 (old bird)
|
||||
Mnemonic: An **old bird** has made a nest on your **roof**. What do you do? You call Misaka from a <cite>A ***Certain*** Scientific Railgun</cite> to get rid of it, of course! But she doesn’t really want to vaporize the poor thing, so she just throws a **stone** to scare it away. (What was the point of calling her, then‽)
|
||||
|
||||
Kanji: 魚 (fish)
|
||||
Components: 𠂊 (hook claw), 田 (rice field), 灬 (fire sparks)
|
||||
Mnemonic: Catch ***fish*** with a **hook**, collect rice from the **rice field**, cook them with **fire**… And my meal is ready!
|
||||
|
||||
Kanji: 警 (to police (something))
|
||||
Components: 敬 (respect), 言 (say)
|
||||
Mnemonic: ***To police something*** is to make people **respect** what the law **says**.
|
||||
|
||||
Kanji: 筆 (writing brush)
|
||||
Components: 竹 (bamboo), 聿 (brush)
|
||||
Mnemonic: A traditional ***writing brush*** is a **brush** made of **bamboo**.
|
||||
|
||||
Kanji: 獄 (prison)
|
||||
Components: 犭 (animal), 言 (say), 犬 (dog)
|
||||
Mnemonic: In ***prison***, like in the **animal** kingdom, only the toughest survive. You have to watch what you **say**. It’s a **dog**‐eat‐dog world.
|
||||
|
||||
Kanji: 新 (new)
|
||||
Components: 立 (standing up), 木 (tree), 斤 (axe)
|
||||
Mnemonic: In order for a ***new*** construction to be made, an empty lot is needed. If there are any **trees** **standing up**, they must be cut down with an **axe**.
|
||||
|
||||
Kanji: 怪 (suspicious)
|
||||
Components: 忄 (weak heart), 圣 (sacred)
|
||||
Mnemonic: That painting of the **Sacred** **Heart** of Jesus looks ***suspicious***. I think it might be a forgery.
|
||||
|
||||
Kanji: 温 (warm (to the touch))
|
||||
Components: 氵 (water drops), 日 (sun), 皿 (dish)
|
||||
Mnemonic: If you leave **water** on a **dish** in the **sun**, it will get ***warm***.
|
||||
|
||||
Kanji: 階 (floor (of a building))
|
||||
Components: 阝 (small village), 皆 (all)
|
||||
Mnemonic: It might be a **small village**, but, despite that, **all** of its buildings have many ***floors***. It’s a village of skyscrapers!
|
||||
|
||||
Kanji: 多 (many)
|
||||
Components: 夕 (evening (before sunset)), 夕 (evening (before sunset))
|
||||
Mnemonic: Two **evenings** in a day would be one too ***many***.
|
||||
|
||||
Kanji: 別 (separate)
|
||||
Components: 口 (mouth), 万 (ten thousand), 刂 (knife)
|
||||
Mnemonic: Tom Six is at it again. For his next flick, he wants to stitch together **ten thousand** people, **mouth**‐to‐anus. One of the most graphic and disturbing scenes will feature one of the victims using a **knife** to ***separate*** perself.
|
||||
|
||||
Kanji: 並 (line up)
|
||||
Components: 䒑 (antlers on a wall), 业 (runway)
|
||||
Mnemonic: In order to land a plane you have to ***line up*** properly with the **runway**. The things that look like **antlers** at the end of the runway are the control towers; you should follow their instructions.
|
||||
|
||||
Kanji: 姿 (figure)
|
||||
Components: 次 (next), 女 (woman)
|
||||
Mnemonic: The **next** **woman** that I date will have a perfect **figure**. Because I’m done with 3D women—it will *literally* be an anime figure!
|
||||
|
||||
Kanji: 実 (real)
|
||||
Components: 宀 (roof with a chimney), 𡗗 (three people)
|
||||
Mnemonic: Living under a **roof with a chimney** with **three people** (a wife and two children)—a happy family life—is not something I could have ever imagined. It does not feel ***real***.
|
||||
|
||||
Kanji: 謝 (apologize)
|
||||
Components: 言 (say), 射 (shoot)
|
||||
Mnemonic: **Shot** first, ***apologize*** (**say** you are sorry) later.
|
||||
|
||||
Kanji: 提 (propose)
|
||||
Components: 扌 (left hand), 是 (go with)
|
||||
Mnemonic:
|
Loading…
Add table
Add a link
Reference in a new issue