Merge branch 'master' into HEAD

2023-10-12 14:35:47 +03:00 · 2023-10-12 14:35:47 +03:00 · 04ac0558de
commit 04ac0558de
parent f7b1205a51 d28e572c02
8 changed files with 2430 additions and 2075 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -422,8 +422,7 @@ endif()
 if (LLAMA_ALL_WARNINGS)
    if (NOT MSVC)
        set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
-        set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
+        set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
            -Werror=implicit-function-declaration)
        set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
        set(host_cxx_flags "")
@ -455,7 +454,8 @@ if (LLAMA_ALL_WARNINGS)
    set(c_flags   ${c_flags}   ${warning_flags})
    set(cxx_flags ${cxx_flags} ${warning_flags})
    add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
-                        "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags} ${host_cxx_flags}>")
+                        "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
                        "$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
 endif()
--- a/README.md
+++ b/README.md
@ -279,7 +279,7 @@ In order to build llama.cpp you have three different options.
 On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU.
 To disable the Metal build at compile time use the `LLAMA_NO_METAL=1` flag or the `LLAMA_METAL=OFF` cmake option.
-When built with Metal support, you can explicitly disable GPU inference with the `--gpu-layers|-ngl 0` command-line
+When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers|-ngl 0` command-line
 argument.
 ### MPI Build
--- a/ci/run.sh
+++ b/ci/run.sh
@ -496,11 +496,13 @@ test $ret -eq 0 && gg_run ctest_debug
 test $ret -eq 0 && gg_run ctest_release
 if [ -z ${GG_BUILD_LOW_PERF} ]; then
    if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
        if [ -z ${GG_BUILD_CUDA} ]; then
            test $ret -eq 0 && gg_run open_llama_3b_v2
        else
            test $ret -eq 0 && gg_run open_llama_7b_v2
        fi
    fi
 fi
 exit $ret
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -297,6 +297,9 @@ int main(int argc, char ** argv) {
            LOG_TEE("%s: session file matches %zu / %zu tokens of prompt\n",
                __func__, n_matching_session_tokens, embd_inp.size());
        }
        // remove any "future" tokens that we might have inherited from the previous session
        llama_kv_cache_tokens_rm(ctx, n_matching_session_tokens, -1);
    }
    LOGLN(
@ -545,9 +548,6 @@ int main(int argc, char ** argv) {
                if (i > 0) {
                    embd.erase(embd.begin(), embd.begin() + i);
                }
                // remove any "future" tokens that we might have inherited from the session from the KV cache
                llama_kv_cache_tokens_rm(ctx, n_past, -1);
            }
            // evaluate tokens in batches
--- a/examples/server/index.html.hpp
+++ b/examples/server/index.html.hpp
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@ -136,6 +136,11 @@
      display: block;
    }
    fieldset label.slim {
      margin: 0 0.5em;
      display: inline;
    }
    header, footer {
      text-align: center;
    }
@ -145,6 +150,14 @@
      color: #888;
    }
    .mode-chat textarea[name=prompt] {
      height: 4.5em;
    }
    .mode-completion textarea[name=prompt] {
      height: 10em;
    }
    @keyframes loading-bg-wipe {
      0% {
@ -187,7 +200,7 @@
      template: "{{prompt}}\n\n{{history}}\n{{char}}:",
      historyTemplate: "{{name}}: {{message}}",
      transcript: [],
-      type: "chat",
+      type: "chat",  // "chat" | "completion"
      char: "Llama",
      user: "User",
    })
@ -365,13 +378,44 @@
      return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]));
    }
    async function runLlama(prompt, llamaParams, char) {
      const currentMessages = [];
      const history = session.value.transcript;
      if (controller.value) {
        throw new Error("already running");
      }
      controller.value = new AbortController();
      for await (const chunk of llama(prompt, llamaParams, {controller: controller.value})) {
        const data = chunk.data;
        if (data.stop) {
          while (
            currentMessages.length > 0 &&
            currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
            ) {
            currentMessages.pop();
          }
          transcriptUpdate([...history, [char, currentMessages]])
          console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
        } else {
          currentMessages.push(data);
          transcriptUpdate([...history, [char, currentMessages]])
        }
        if (data.timings) {
          llamaStats.value = data.timings;
        }
      }
      controller.value = null;
    }
    // send message to server
    const chat = async (msg) => {
      if (controller.value) {
        console.log('already running...');
        return;
      }
      controller.value = new AbortController();
      transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
@ -391,42 +435,25 @@
        ).join("\n"),
      });
-      const currentMessages = [];
+      await runLlama(prompt, {
      const history = session.value.transcript
      const llamaParams = {
        ...params.value,
        stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
      }, "{{char}}");
    }
-      for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
+    const runCompletion = async () => {
-        const data = chunk.data;
+      if (controller.value) {
-
+        console.log('already running...');
-        if (data.stop) {
+        return;
          while (
            currentMessages.length > 0 &&
            currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
          ) {
            currentMessages.pop();
      }
-          transcriptUpdate([...history, ["{{char}}", currentMessages]])
+      const {prompt} = session.value;
-          console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
+      transcriptUpdate([...session.value.transcript, ["", prompt]]);
-        } else {
+      await runLlama(prompt, {
-          currentMessages.push(data);
+        ...params.value,
-          transcriptUpdate([...history, ["{{char}}", currentMessages]])
+        stop: [],
      }, "");
    }
        if (data.timings) {
          llamaStats.value = data.timings;
        }
      }
      controller.value = null;
    }
    function MessageInput() {
      const message = useSignal("")
    const stop = (e) => {
      e.preventDefault();
      if (controller.value) {
@ -440,6 +467,9 @@
      transcriptUpdate([]);
    }
    function MessageInput() {
      const message = useSignal("")
      const submit = (e) => {
        stop(e);
        chat(message.value);
@ -474,6 +504,19 @@
      `
    }
    function CompletionControls() {
      const submit = (e) => {
        stop(e);
        runCompletion();
      }
      return html`
        <div>
          <button onclick=${submit} type="button" disabled=${generating.value}>Start</button>
          <button onclick=${stop} disabled=${!generating.value}>Stop</button>
          <button onclick=${reset}>Reset</button>
        </div>`;
    }
    const ChatLog = (props) => {
      const messages = session.value.transcript;
      const container = useRef(null)
@ -497,7 +540,11 @@
            data;
          message = html`<${Markdownish} text=${template(text)} />`
        }
        if(user) {
          return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
        } else {
          return html`<p key=${index}>${message}</p>`
        }
      };
      return html`
@ -574,18 +621,31 @@
        userTemplateAutosave()
      }, [session.value, params.value])
-      return html`
+      const GrammarControl = () => (
-        <form>
+        html`
-          <fieldset>
+          <div>
-            <${UserTemplateResetButton}/>
+            <label for="template">Grammar</label>
-          </fieldset>
+            <textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
            <input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
            <button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
          </div>
          `
      );
      const PromptControlFieldSet = () => (
        html`
        <fieldset>
          <div>
-              <label for="prompt">Prompt</label>
+            <label htmlFor="prompt">Prompt</label>
-              <textarea type="text" name="prompt" value="${session.value.prompt}" rows=4 oninput=${updateSession}/>
+            <textarea type="text" name="prompt" value="${session.value.prompt}" oninput=${updateSession}/>
          </div>
        </fieldset>
        `
      );
      const ChatConfigForm = () => (
        html`
          ${PromptControlFieldSet()}
          <fieldset class="two">
            <div>
@ -609,15 +669,30 @@
              <label for="template">Chat history template</label>
              <textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
            </div>
            ${GrammarControl()}
          </fieldset>
      `
    );
      const CompletionConfigForm = () => (
        html`
          ${PromptControlFieldSet()}
          <fieldset>${GrammarControl()}</fieldset>
        `
      );
      return html`
        <form>
          <fieldset class="two">
            <${UserTemplateResetButton}/>
            <div>
-              <label for="template">Grammar</label>
+              <label class="slim"><input type="radio" name="type" value="chat" checked=${session.value.type === "chat"} oninput=${updateSession} /> Chat</label>
-              <textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
+              <label class="slim"><input type="radio" name="type" value="completion" checked=${session.value.type === "completion"} oninput=${updateSession} /> Completion</label>
              <input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
              <button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
            </div>
          </fieldset>
          ${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
          <fieldset class="two">
            ${IntField({label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict})}
            ${FloatField({label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature})}
@ -851,7 +926,7 @@
    function App(props) {
      return html`
-        <div>
+        <div class="mode-${session.value.type}">
          <header>
            <h1>llama.cpp</h1>
          </header>
@ -861,7 +936,7 @@
          </main>
          <section id="write">
-            <${MessageInput} />
+            <${session.value.type === 'chat' ? MessageInput : CompletionControls} />
          </section>
          <footer>
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -405,6 +405,7 @@ struct llama_server_context
        // compare the evaluated prompt with the new prompt
        n_past = common_part(embd, prompt_tokens);
        embd = prompt_tokens;
        if (n_past == num_prompt_tokens)
        {
            // we have to evaluate at least 1 token to generate logits.
@ -412,6 +413,9 @@ struct llama_server_context
            n_past--;
        }
        // since #3228 we now have to manually manage the KV cache
        llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
        LOG_VERBOSE("prompt ingested", {
                                           {"n_past", n_past},
                                           {"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
@ -461,9 +465,6 @@ struct llama_server_context
        // compare the evaluated prompt with the new prompt
        n_past = common_part(embd, prompt_tokens);
        // since #3228 we now have to manually manage the KV cache
        llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
        embd = prompt_tokens;
        if (n_past == num_prompt_tokens)
        {
@ -471,6 +472,9 @@ struct llama_server_context
            n_past--;
        }
        // since #3228 we now have to manually manage the KV cache
        llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
        LOG_VERBOSE("prompt ingested", {
                                           {"n_past", n_past},
                                           {"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
--- a/prompts/mnemonics.txt
+++ b/prompts/mnemonics.txt
@ -0,0 +1,93 @@
 For each kanji character, write a Markdown‐formatted mnemonic that uses its keyword and the keyword of all its components.
 Kanji: 欠 (lack of)
 Components: 𠂊 (hook claw), 人 (person)
 Mnemonic: This **person** is a pirate. He lost his hand to a crocodile many years ago. Nowadays, the ***lack of*** a hand does not bother him too much. In fact, the **hook claw** that replaces it is the mark of a true pirate, so he is quite proud of it!
 Kanji: 類 (kind (of something))
 Components: 米 (rice), 大 (large), 頁 (page)
 Mnemonic: The waiter at a Chinese restaurant hands you a **large** menu. Each **page** has all ***kinds*** of **rice** on offer!
 Kanji: 燃 (burn)
 Components: 火 (fire), 然 (sort of thing)
 Mnemonic: ***Burning*** things up with **fire** is just my **sort of thing**. (Spoken like a true pyromaniac.)
 Kanji: 頂 (top of)
 Components: 丁 (street), 頁 (page)
 Mnemonic: To be at the ***top of*** your game, you need both practical knowledge (**street** smarts) and theoretical knowledge (having read many **pages**).
 Kanji: 険 (risky and steep)
 Components: 阝 (small village), 㑒 (consensus)
 Mnemonic: Everyone agrees (there is **consensus**) that the path to the **small village** is ***risky and steep***.
 Kanji: 困 (distressed)
 Components: 囗 (closed box), 木 (tree)
 Mnemonic: You would feel ***distressed*** too if you were a **tree** trapped in a **closed box**! I have no place to grow!
 Kanji: 頭 (head)
 Components: 豆 (bean), 頁 (page)
 Mnemonic: What do you have in that ***head*** of yours? A **bean** for a brain? Go read more **pages** and become more knowledgeable about the world!
 Kanji: 確 (certain)
 Components: 石 (stone), 冖 (roof without a chimney), 隹 (old bird)
 Mnemonic: An **old bird** has made a nest on your **roof**. What do you do? You call Misaka from a <cite>A ***Certain*** Scientific Railgun</cite> to get rid of it, of course! But she doesn’t really want to vaporize the poor thing, so she just throws a **stone** to scare it away. (What was the point of calling her, then‽)
 Kanji: 魚 (fish)
 Components: 𠂊 (hook claw), 田 (rice field), 灬 (fire sparks)
 Mnemonic: Catch ***fish*** with a **hook**, collect rice from the **rice field**, cook them with **fire**… And my meal is ready!
 Kanji: 警 (to police (something))
 Components: 敬 (respect), 言 (say)
 Mnemonic: ***To police something*** is to make people **respect** what the law **says**.
 Kanji: 筆 (writing brush)
 Components: 竹 (bamboo), 聿 (brush)
 Mnemonic: A traditional ***writing brush*** is a **brush** made of **bamboo**.
 Kanji: 獄 (prison)
 Components: 犭 (animal), 言 (say), 犬 (dog)
 Mnemonic: In ***prison***, like in the **animal** kingdom, only the toughest survive. You have to watch what you **say**. It’s a **dog**‐eat‐dog world.
 Kanji: 新 (new)
 Components: 立 (standing up), 木 (tree), 斤 (axe)
 Mnemonic: In order for a ***new*** construction to be made, an empty lot is needed. If there are any **trees** **standing up**, they must be cut down with an **axe**.
 Kanji: 怪 (suspicious)
 Components: 忄 (weak heart), 圣 (sacred)
 Mnemonic: That painting of the **Sacred** **Heart** of Jesus looks ***suspicious***. I think it might be a forgery.
 Kanji: 温 (warm (to the touch))
 Components: 氵 (water drops), 日 (sun), 皿 (dish)
 Mnemonic: If you leave **water** on a **dish** in the **sun**, it will get ***warm***.
 Kanji: 階 (floor (of a building))
 Components: 阝 (small village), 皆 (all)
 Mnemonic: It might be a **small village**, but, despite that, **all** of its buildings have many ***floors***. It’s a village of skyscrapers!
 Kanji: 多 (many)
 Components: 夕 (evening (before sunset)), 夕 (evening (before sunset))
 Mnemonic: Two **evenings** in a day would be one too ***many***.
 Kanji: 別 (separate)
 Components: 口 (mouth), 万 (ten thousand), 刂 (knife)
 Mnemonic: Tom Six is at it again. For his next flick, he wants to stitch together **ten thousand** people, **mouth**‐to‐anus. One of the most graphic and disturbing scenes will feature one of the victims using a **knife** to ***separate*** perself.
 Kanji: 並 (line up)
 Components: 䒑 (antlers on a wall), 业 (runway)
 Mnemonic: In order to land a plane you have to ***line up*** properly with the **runway**. The things that look like **antlers** at the end of the runway are the control towers; you should follow their instructions.
 Kanji: 姿 (figure)
 Components: 次 (next), 女 (woman)
 Mnemonic: The **next** **woman** that I date will have a perfect **figure**. Because I’m done with 3D women—it will *literally* be an anime figure!
 Kanji: 実 (real)
 Components: 宀 (roof with a chimney), 𡗗 (three people)
 Mnemonic: Living under a **roof with a chimney** with **three people** (a wife and two children)—a happy family life—is not something I could have ever imagined. It does not feel ***real***.
 Kanji: 謝 (apologize)
 Components: 言 (say), 射 (shoot)
 Mnemonic: **Shot** first, ***apologize*** (**say** you are sorry) later.
 Kanji: 提 (propose)
 Components: 扌 (left hand), 是 (go with)
 Mnemonic: