correct probabilites usage

This commit is contained in:
Jhen 2023-08-21 12:47:39 +08:00
parent a7042c187f
commit b7ddf04a26
2 changed files with 829 additions and 1001 deletions

File diff suppressed because it is too large Load diff

View file

@ -382,63 +382,12 @@
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
const data = chunk.data;
currentMessages.push(data);
let messages = currentMessages
if (data.stop && params.value.n_probs > 0) {
messages = []
let i = 0;
while (i < data.completion_probabilities.length) {
let prob = data.completion_probabilities[i]
prob = {
...prob,
completion_probabilities: [{ content: prob.content, probs: prob.probs }]
}
// Merge byte characters
if (prob.content.startsWith('byte: ')) {
const c = parseByteChar(prob.content)
if (!c) {
i++;
continue;
}
let multibytePending = 0;
if ((c & 0xE0) == 0xC0) {
// 2-byte characters: 110xxxxx 10xxxxxx
multibytePending = 1;
} else if ((c & 0xF0) == 0xE0) {
// 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
multibytePending = 2;
} else if ((c & 0xF8) == 0xF0) {
// 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
multibytePending = 3;
}
prob.content = String.fromCharCode(c);
// Forward index to the end of the multibyte character
if (multibytePending > 0) {
for (let j = 0; j < multibytePending; j++) {
i++;
const nextProb = data.completion_probabilities[i]
if (!nextProb) break;
const next = parseByteChar(nextProb.content);
prob.content = prob.content + String.fromCharCode(next);
prob.completion_probabilities.push(nextProb);
}
prob.content = decodeURIComponent(escape(prob.content));
}
}
messages.push(prob);
i++;
}
console.log('Processed completion probabilities to messages: ', messages)
}
transcriptUpdate([...history, ["{{char}}", messages]])
if (data.stop) {
console.log("Completion finished: '", messages.map(msg => msg.content).join(''), "', summary: ", data);
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
} else {
currentMessages.push(data);
transcriptUpdate([...history, ["{{char}}", currentMessages]])
}
if (data.timings) {
@ -680,10 +629,20 @@
const { completion_probabilities } = msg;
if (
!completion_probabilities ||
// Skip empty array or byte pair (> 1)
completion_probabilities.length !== 1
completion_probabilities.length === 0
) return msg.content
if (completion_probabilities.length > 1) {
// Not for byte pair
if (completion_probabilities[0].content.startsWith('byte: \\')) return msg.content
const splitData = completion_probabilities.map(prob => ({
content: prob.content,
completion_probabilities: [prob]
}))
return html`<${Probabilites} data=${splitData} />`
}
const { probs } = completion_probabilities[0]
const found = probs.find(p => p.tok_str === msg.content)
const pColor = probColor(found ? found.prob : 0)