Merge branch 'master' into name-metadata-fix
This commit is contained in:
commit
6324c528d1
6 changed files with 780 additions and 334 deletions
|
@ -137,6 +137,7 @@ as the main playground for developing new features for the [ggml](https://github
|
|||
- [semperai/amica](https://github.com/semperai/amica)
|
||||
- [psugihara/FreeChat](https://github.com/psugihara/FreeChat)
|
||||
- [ptsochantaris/emeltal](https://github.com/ptsochantaris/emeltal)
|
||||
- [iohub/collama](https://github.com/iohub/coLLaMA)
|
||||
|
||||
---
|
||||
|
||||
|
|
967
convert.py
967
convert.py
File diff suppressed because it is too large
Load diff
|
@ -243,6 +243,9 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
auto image_embed = load_image(ctx_llava, ¶ms);
|
||||
if (!image_embed) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// process the prompt
|
||||
process_prompt(ctx_llava, image_embed, ¶ms, params.prompt);
|
||||
|
|
|
@ -175,35 +175,44 @@ node index.js
|
|||
|
||||
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
||||
|
||||
*Result JSON:*
|
||||
### Result JSON:
|
||||
|
||||
Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
|
||||
* Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
|
||||
|
||||
`content`: Completion result as a string (excluding `stopping_word` if any). In case of streaming mode, will contain the next token as a string.
|
||||
|
||||
`stop`: Boolean for use with `stream` to check whether the generation has stopped (Note: This is not related to stopping words array `stop` from input options)
|
||||
- `completion_probabilities`: An array of token probabilities for each completion. The array's length is `n_predict`. Each item in the array has the following structure:
|
||||
|
||||
`generation_settings`: The provided options above excluding `prompt` but including `n_ctx`, `model`
|
||||
```
|
||||
{
|
||||
"content": "<the token selected by the model>",
|
||||
"probs": [
|
||||
{
|
||||
"prob": float,
|
||||
"tok_str": "<most likely token>"
|
||||
},
|
||||
{
|
||||
"prob": float,
|
||||
"tok_str": "<second most likely tonen>"
|
||||
},
|
||||
...
|
||||
]
|
||||
},
|
||||
```
|
||||
Notice that each `probs` is an array of length `n_probs`.
|
||||
|
||||
`model`: The path to the model loaded with `-m`
|
||||
|
||||
`prompt`: The provided `prompt`
|
||||
|
||||
`stopped_eos`: Indicating whether the completion has stopped because it encountered the EOS token
|
||||
|
||||
`stopped_limit`: Indicating whether the completion stopped because `n_predict` tokens were generated before stop words or EOS was encountered
|
||||
|
||||
`stopped_word`: Indicating whether the completion stopped due to encountering a stopping word from `stop` JSON array provided
|
||||
|
||||
`stopping_word`: The stopping word encountered which stopped the generation (or "" if not stopped due to a stopping word)
|
||||
|
||||
`timings`: Hash of timing information about the completion such as the number of tokens `predicted_per_second`
|
||||
|
||||
`tokens_cached`: Number of tokens from the prompt which could be re-used from previous completion (`n_past`)
|
||||
|
||||
`tokens_evaluated`: Number of tokens evaluated in total from the prompt
|
||||
|
||||
`truncated`: Boolean indicating if the context size was exceeded during generation, i.e. the number of tokens provided in the prompt (`tokens_evaluated`) plus tokens generated (`tokens predicted`) exceeded the context size (`n_ctx`)
|
||||
- `content`: Completion result as a string (excluding `stopping_word` if any). In case of streaming mode, will contain the next token as a string.
|
||||
- `stop`: Boolean for use with `stream` to check whether the generation has stopped (Note: This is not related to stopping words array `stop` from input options)
|
||||
- `generation_settings`: The provided options above excluding `prompt` but including `n_ctx`, `model`
|
||||
- `model`: The path to the model loaded with `-m`
|
||||
- `prompt`: The provided `prompt`
|
||||
- `stopped_eos`: Indicating whether the completion has stopped because it encountered the EOS token
|
||||
- `stopped_limit`: Indicating whether the completion stopped because `n_predict` tokens were generated before stop words or EOS was encountered
|
||||
- `stopped_word`: Indicating whether the completion stopped due to encountering a stopping word from `stop` JSON array provided
|
||||
- `stopping_word`: The stopping word encountered which stopped the generation (or "" if not stopped due to a stopping word)
|
||||
- `timings`: Hash of timing information about the completion such as the number of tokens `predicted_per_second`
|
||||
- `tokens_cached`: Number of tokens from the prompt which could be re-used from previous completion (`n_past`)
|
||||
- `tokens_evaluated`: Number of tokens evaluated in total from the prompt
|
||||
- `truncated`: Boolean indicating if the context size was exceeded during generation, i.e. the number of tokens provided in the prompt (`tokens_evaluated`) plus tokens generated (`tokens predicted`) exceeded the context size (`n_ctx`)
|
||||
|
||||
- **POST** `/tokenize`: Tokenize a given text.
|
||||
|
||||
|
|
|
@ -3841,8 +3841,8 @@ void dequantize_q3_K(device const block_q3_K *xb, short il, thread type4x4 & reg
|
|||
uint16_t scale_2 = scales[il%8], scale_1 = scales[8 + il%4];
|
||||
int16_t dl_int = (il/4)&1 ? (scale_2&kmask2) | ((scale_1&kmask1) << 2)
|
||||
: (scale_2&kmask2) | ((scale_1&kmask1) << 4);
|
||||
half dl = il<8 ? d_all * (dl_int - 32.h) : d_all * (dl_int / 16.h - 32.h);
|
||||
const half ml = 4.h * dl;
|
||||
float dl = il<8 ? d_all * (dl_int - 32.f) : d_all * (dl_int / 16.f - 32.f);
|
||||
const float ml = 4.f * dl;
|
||||
|
||||
il = (il/2) & 3;
|
||||
const half coef = il>1 ? (il>2 ? 1/64.h : 1/16.h) : (il>0 ? 1/4.h : 1.h);
|
||||
|
@ -3909,7 +3909,7 @@ void dequantize_q5_K(device const block_q5_K *xb, short il, thread type4x4 & reg
|
|||
uint8_t ul = 1 << (il/2);
|
||||
il = il & 3;
|
||||
const uchar2 sc = get_scale_min_k4_just2(is, il/2, xb->scales);
|
||||
const float d = il < 2 ? xb->d : xb->d / 16.h;
|
||||
const float d = il < 2 ? xb->d : xb->d / 16.f;
|
||||
const float min = xb->dmin;
|
||||
const float dl = d * sc[0];
|
||||
const float ml = min * sc[1];
|
||||
|
@ -3942,17 +3942,17 @@ void dequantize_q6_K(device const block_q6_K *xb, short il, thread type4x4 & reg
|
|||
#if QK_K == 256
|
||||
ql = ql + 64*(il/8) + 32*((il/2)&1) + 16*(il&1);
|
||||
qh = qh + 32*(il/8) + 16*(il&1);
|
||||
half sc = scales[(il%2) + 2 * ((il/2))];
|
||||
float sc = scales[(il%2) + 2 * ((il/2))];
|
||||
il = (il/2) & 3;
|
||||
#else
|
||||
ql = ql + 16 * (il&1);
|
||||
half sc = scales[il];
|
||||
float sc = scales[il];
|
||||
#endif
|
||||
const uint16_t kmask1 = il>1 ? (il>2 ? 192 : 48) : (il>0 ? 12 : 3);
|
||||
const uint16_t kmask2 = il>1 ? 0xF0 : 0x0F;
|
||||
const half coef = il>1 ? 1.f/16.h : 1.h;
|
||||
const half ml = d_all * sc * 32.h;
|
||||
const half dl = d_all * sc * coef;
|
||||
const float coef = il>1 ? 1.f/16.f : 1.f;
|
||||
const float ml = d_all * sc * 32.f;
|
||||
const float dl = d_all * sc * coef;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
const half q = il&1 ? ((ql[i] & kmask2) | ((qh[i] & kmask1) << 2))
|
||||
: ((ql[i] & kmask2) | ((qh[i] & kmask1) << 4));
|
||||
|
|
70
scripts/get-pg.sh
Executable file
70
scripts/get-pg.sh
Executable file
|
@ -0,0 +1,70 @@
|
|||
#!/bin/bash
|
||||
|
||||
function usage {
|
||||
echo "usage: <n>$0"
|
||||
echo "note: n is the number of essays to download"
|
||||
echo "for specific n, the resulting pg.txt file will have the following number of tokens:"
|
||||
echo "n | tokens"
|
||||
echo "--- | ---"
|
||||
echo "1 | 6230"
|
||||
echo "2 | 23619"
|
||||
echo "5 | 25859"
|
||||
echo "10 | 36888"
|
||||
echo "15 | 50188"
|
||||
echo "20 | 59094"
|
||||
echo "25 | 88764"
|
||||
echo "30 | 103121"
|
||||
echo "32 | 108338"
|
||||
echo "35 | 113403"
|
||||
echo "40 | 127699"
|
||||
echo "45 | 135896"
|
||||
exit 1
|
||||
}
|
||||
|
||||
function has_cmd {
|
||||
if ! [ -x "$(command -v $1)" ]; then
|
||||
echo "error: $1 is not available" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# check for: curl, html2text, tail, sed, fmt
|
||||
has_cmd curl
|
||||
has_cmd html2text
|
||||
has_cmd tail
|
||||
has_cmd sed
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
n=$1
|
||||
|
||||
# get urls
|
||||
urls="$(curl http://www.aaronsw.com/2002/feeds/pgessays.rss | grep html | sed -e "s/.*http/http/" | sed -e "s/html.*/html/" | head -n $n)"
|
||||
|
||||
printf "urls:\n%s\n" "$urls"
|
||||
|
||||
if [ -f pg.txt ]; then
|
||||
rm pg.txt
|
||||
fi
|
||||
|
||||
c=1
|
||||
for url in $urls; do
|
||||
echo "processing $url"
|
||||
|
||||
cc=$(printf "%03d" $c)
|
||||
|
||||
curl -L $url | html2text | tail -n +4 | sed -E "s/^[[:space:]]+//g" | fmt -w 80 >> pg-$cc-one.txt
|
||||
cat pg-$cc-one.txt >> pg.txt
|
||||
|
||||
cp -v pg.txt pg-$cc-all.txt
|
||||
c=$((c+1))
|
||||
|
||||
# don't flood the server
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "done. data in pg.txt"
|
||||
|
||||
exit 0
|
Loading…
Add table
Add a link
Reference in a new issue