Expose generation timings from server & update completions.js (#2116)
* use javascript generators as much cleaner API Also add ways to access completion as promise and EventSource * export llama_timings as struct and expose them in server * update readme, update baked includes * llama : uniform variable names + struct init --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
983b555e9d
commit
31cfbb1013
9 changed files with 1921 additions and 1363 deletions
|
@ -6,7 +6,6 @@
|
|||
<title>llama.cpp - chat</title>
|
||||
|
||||
<style>
|
||||
|
||||
body {
|
||||
background-color: #fff;
|
||||
color: #000;
|
||||
|
@ -22,10 +21,6 @@
|
|||
height: 100%;
|
||||
}
|
||||
|
||||
header, footer {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
main {
|
||||
margin: 3px;
|
||||
display: flex;
|
||||
|
@ -99,6 +94,15 @@
|
|||
margin: 0.5em 0;
|
||||
display: block;
|
||||
}
|
||||
|
||||
header, footer {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
footer {
|
||||
font-size: 80%;
|
||||
color: #888;
|
||||
}
|
||||
</style>
|
||||
|
||||
<script type="module">
|
||||
|
@ -106,10 +110,10 @@
|
|||
html, h, signal, effect, computed, render, useSignal, useEffect, useRef
|
||||
} from '/index.js';
|
||||
|
||||
import { llamaComplete } from '/completion.js';
|
||||
import { llama } from '/completion.js';
|
||||
|
||||
const session = signal({
|
||||
prompt: "This is a conversation between user and llama, a friendly chatbot. respond in markdown.",
|
||||
prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.",
|
||||
template: "{{prompt}}\n\n{{history}}\n{{char}}:",
|
||||
historyTemplate: "{{name}}: {{message}}",
|
||||
transcript: [],
|
||||
|
@ -118,15 +122,6 @@
|
|||
user: "User",
|
||||
})
|
||||
|
||||
const transcriptUpdate = (transcript) => {
|
||||
session.value = {
|
||||
...session.value,
|
||||
transcript
|
||||
}
|
||||
}
|
||||
|
||||
const chatStarted = computed(() => session.value.transcript.length > 0)
|
||||
|
||||
const params = signal({
|
||||
n_predict: 400,
|
||||
temperature: 0.7,
|
||||
|
@ -136,8 +131,18 @@
|
|||
top_p: 0.5,
|
||||
})
|
||||
|
||||
const llamaStats = signal(null)
|
||||
const controller = signal(null)
|
||||
|
||||
const generating = computed(() => controller.value == null )
|
||||
const chatStarted = computed(() => session.value.transcript.length > 0)
|
||||
|
||||
const transcriptUpdate = (transcript) => {
|
||||
session.value = {
|
||||
...session.value,
|
||||
transcript
|
||||
}
|
||||
}
|
||||
|
||||
// simple template replace
|
||||
const template = (str, extraSettings) => {
|
||||
|
@ -158,7 +163,7 @@
|
|||
|
||||
transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
|
||||
|
||||
const payload = template(session.value.template, {
|
||||
const prompt = template(session.value.template, {
|
||||
message: msg,
|
||||
history: session.value.transcript.flatMap(([name, message]) => template(session.value.historyTemplate, {name, message})).join("\n"),
|
||||
});
|
||||
|
@ -168,22 +173,26 @@
|
|||
|
||||
const llamaParams = {
|
||||
...params.value,
|
||||
prompt: payload,
|
||||
stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
|
||||
}
|
||||
|
||||
await llamaComplete(llamaParams, controller.value, (message) => {
|
||||
const data = message.data;
|
||||
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
|
||||
const data = chunk.data;
|
||||
currentMessage += data.content;
|
||||
|
||||
// remove leading whitespace
|
||||
currentMessage = currentMessage.replace(/^\s+/, "")
|
||||
|
||||
transcriptUpdate([...history, ["{{char}}", currentMessage]])
|
||||
|
||||
if (data.stop) {
|
||||
console.log("-->", data, ' response was:', currentMessage, 'transcript state:', session.value.transcript);
|
||||
console.log("Completion finished: '", currentMessage, "', summary: ", data);
|
||||
}
|
||||
})
|
||||
|
||||
if (data.timings) {
|
||||
llamaStats.value = data.timings;
|
||||
}
|
||||
}
|
||||
|
||||
controller.value = null;
|
||||
}
|
||||
|
@ -219,13 +228,12 @@
|
|||
return html`
|
||||
<form onsubmit=${submit}>
|
||||
<div>
|
||||
<textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/>
|
||||
|
||||
<textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/>
|
||||
</div>
|
||||
<div class="right">
|
||||
<button type="submit" disabled=${!generating.value} >Send</button>
|
||||
<button onclick=${stop} disabled=${generating}>Stop</button>
|
||||
<button onclick=${reset}>Reset</button>
|
||||
<button type="submit" disabled=${!generating.value} >Send</button>
|
||||
<button onclick=${stop} disabled=${generating}>Stop</button>
|
||||
<button onclick=${reset}>Reset</button>
|
||||
</div>
|
||||
</form>
|
||||
`
|
||||
|
@ -243,7 +251,7 @@
|
|||
}, [messages])
|
||||
|
||||
const chatLine = ([user, msg]) => {
|
||||
return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdown} text=${template(msg)} /></p>`
|
||||
return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdownish} text=${template(msg)} /></p>`
|
||||
};
|
||||
|
||||
return html`
|
||||
|
@ -313,39 +321,52 @@
|
|||
</form>
|
||||
`
|
||||
}
|
||||
const Markdown = (params) => {
|
||||
const md = params.text
|
||||
.replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
|
||||
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
||||
.replace(/__(.*?)__/g, '<strong>$1</strong>')
|
||||
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
||||
.replace(/_(.*?)_/g, '<em>$1</em>')
|
||||
.replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
|
||||
.replace(/`(.*?)`/g, '<code>$1</code>')
|
||||
.replace(/\n/gim, '<br />');
|
||||
return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
|
||||
};
|
||||
// poor mans markdown replacement
|
||||
const Markdownish = (params) => {
|
||||
const md = params.text
|
||||
.replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
|
||||
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
||||
.replace(/__(.*?)__/g, '<strong>$1</strong>')
|
||||
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
||||
.replace(/_(.*?)_/g, '<em>$1</em>')
|
||||
.replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
|
||||
.replace(/`(.*?)`/g, '<code>$1</code>')
|
||||
.replace(/\n/gim, '<br />');
|
||||
return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
|
||||
};
|
||||
|
||||
const ModelGenerationInfo = (params) => {
|
||||
if (!llamaStats.value) {
|
||||
return html`<span/>`
|
||||
}
|
||||
return html`
|
||||
<span>
|
||||
${llamaStats.value.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.predicted_per_second.toFixed(2)} tokens per second
|
||||
</span>
|
||||
`
|
||||
}
|
||||
|
||||
function App(props) {
|
||||
|
||||
return html`
|
||||
<div id="container">
|
||||
<header>
|
||||
<h1>llama.cpp</h1>
|
||||
</header>
|
||||
<div id="container">
|
||||
<header>
|
||||
<h1>llama.cpp</h1>
|
||||
</header>
|
||||
|
||||
<main id="content">
|
||||
<${chatStarted.value ? ChatLog : ConfigForm} />
|
||||
</main>
|
||||
<main id="content">
|
||||
<${chatStarted.value ? ChatLog : ConfigForm} />
|
||||
</main>
|
||||
|
||||
<footer id="write">
|
||||
<${MessageInput} />
|
||||
</footer>
|
||||
<section id="write">
|
||||
<${MessageInput} />
|
||||
</section>
|
||||
|
||||
<footer>
|
||||
<p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a></p>
|
||||
</footer>
|
||||
</div>
|
||||
<footer>
|
||||
<p><${ModelGenerationInfo} /></p>
|
||||
<p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p>
|
||||
</footer>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue