Compare commits
1413 commits
gg/nix-rem
...
master
Author | SHA1 | Date | |
---|---|---|---|
|
d7b31a9d84 | ||
|
9ac3457b39 | ||
|
c2a67efe38 | ||
|
b044a0fe3c | ||
|
19d3c8293b | ||
|
98f6b0fd1e | ||
|
55ac8c7791 | ||
|
e6e6583199 | ||
|
aaa5505307 | ||
|
bdcf8b6a56 | ||
|
4d3465c5ae | ||
|
d80be897ac | ||
|
3ab410f55f | ||
|
0cf867160c | ||
|
d2fe216fb2 | ||
|
ed926d8833 | ||
|
2d219b389e | ||
|
333820d749 | ||
|
c026ba3c23 | ||
|
7ee953a64a | ||
|
ec3bc8270b | ||
|
b7552cfcbc | ||
|
225bbbfa39 | ||
|
855cd0734a | ||
|
8a59053f63 | ||
|
1d20e53c40 | ||
|
2fb3c32a16 | ||
|
9ab42dc722 | ||
|
194b2e69f8 | ||
|
9dd7a0390f | ||
|
c0d4843225 | ||
|
8d4d2be143 | ||
|
2c6c8df56d | ||
|
8a7e3bf17a | ||
|
1b598b3058 | ||
|
902368a06b | ||
|
c3db0480bb | ||
|
d774ab3acc | ||
|
fa62da9b2d | ||
|
1ec208083c | ||
|
9f4cc8f8d3 | ||
|
fd08255d0d | ||
|
3ec9fd4b77 | ||
|
3962fc1a79 | ||
|
1bef571f6a | ||
|
db288b60cb | ||
|
106045e7bb | ||
|
f117d84b48 | ||
|
534c46b53c | ||
|
387a1598ca | ||
|
7c9e0ca520 | ||
|
8f8290ada9 | ||
|
b34aedd558 | ||
|
cde3833239 | ||
|
b3451785ac | ||
|
1d1e6a90bc | ||
|
5598f475be | ||
|
8ec05832fa | ||
|
21c84b5d2d | ||
|
d92cb67e37 | ||
|
6eecde3cc8 | ||
|
396856b400 | ||
|
4d0598e144 | ||
|
90f9b88afb | ||
|
864a0b67a6 | ||
|
84ec8a58f7 | ||
|
bfcce4d693 | ||
|
69804487e0 | ||
|
ff227703d6 | ||
|
0cec062a63 | ||
|
53debe6f3c | ||
|
cfd74c86db | ||
|
ecef206ccb | ||
|
5bbc7362cb | ||
|
aa6fb13213 | ||
|
a83f528688 | ||
|
b1bcd309fc | ||
|
5783575c9d | ||
|
4a2b196d03 | ||
|
1bd3047a93 | ||
|
a2df2787b3 | ||
|
553f1e46e9 | ||
|
8b576b6c55 | ||
|
27d135c970 | ||
|
6af1ca48cb | ||
|
c300e68ef4 | ||
|
3d804dec76 | ||
|
ffd0821c57 | ||
|
4314e56c4f | ||
|
496e5bf46b | ||
|
7919256c57 | ||
|
e0449763a4 | ||
|
eb7cf15a80 | ||
|
66ee4f297c | ||
|
e51c47b401 | ||
|
2711d0215f | ||
|
f0d4b29edf | ||
|
815857791d | ||
|
1a0e87d291 | ||
|
d2e518e9b4 | ||
|
b636228c0a | ||
|
325afb370a | ||
|
794fe23f29 | ||
|
cf8cc856d7 | ||
|
d0c08040b6 | ||
|
be5ef7963f | ||
|
cae9fb4361 | ||
|
7fee2889e6 | ||
|
d7d1eccacc | ||
|
4bf3119d61 | ||
|
f643120bad | ||
|
6e84b0ab8e | ||
|
2b8525d5c8 | ||
|
a4417ddda9 | ||
|
d6d24cd9ed | ||
|
a5203b4465 | ||
|
df984e0147 | ||
|
acd38efee3 | ||
|
caf773f249 | ||
|
178a7eb952 | ||
|
6f53d8a6b4 | ||
|
19f65187cb | ||
|
1d8ee06000 | ||
|
2cc9b8c32c | ||
|
f35726c2fb | ||
|
4a75d19376 | ||
|
26771a1491 | ||
|
ca6baf76c1 | ||
|
6e264a905b | ||
|
49b0e3cec4 | ||
|
20a758155b | ||
|
00c24acb2a | ||
|
466ea66f33 | ||
|
5f0db9522f | ||
|
c5d9effb49 | ||
|
9fbadaef4f | ||
|
9755129c27 | ||
|
a07c2c8a52 | ||
|
8137b4bb2b | ||
|
1af6945eb0 | ||
|
01f37edf1a | ||
|
c07e87f38b | ||
|
564804b79b | ||
|
05f63cc9ee | ||
|
f7fb43cd0b | ||
|
5845661640 | ||
|
f211d1dc10 | ||
|
955a6c2d91 | ||
|
1971adf55e | ||
|
5245729e33 | ||
|
6152129d05 | ||
|
16d3df7ab0 | ||
|
12c2bdf2de | ||
|
c64d2becb1 | ||
|
96f4053934 | ||
|
a94f3b2727 | ||
|
3e3357fd77 | ||
|
6171c9d258 | ||
|
e28245f35f | ||
|
6da5bec81c | ||
|
2e2f8f093c | ||
|
2139667ec4 | ||
|
80d0d6b4b7 | ||
|
aea8ddd516 | ||
|
9f7add1cde | ||
|
90d987b105 | ||
|
a4251edd6f | ||
|
ec7f3ac9ab | ||
|
ef6dada60c | ||
|
ae3c1db2f9 | ||
|
92bc493917 | ||
|
b9daaffe02 | ||
|
99487b57d4 | ||
|
a1649cc13f | ||
|
4dd34ff831 | ||
|
f30f099228 | ||
|
f26c874179 | ||
|
6390a998bf | ||
|
44e18ef939 | ||
|
3edfa7d375 | ||
|
667d72846c | ||
|
a133566d34 | ||
|
960ec65273 | ||
|
7a689c415e | ||
|
bd38ddea01 | ||
|
466300fe14 | ||
|
206bc53422 | ||
|
4dbc8b9cb7 | ||
|
9c8dcefe17 | ||
|
681149ced2 | ||
|
c67cc9837d | ||
|
adc5dd92e8 | ||
|
f11cfdfd7f | ||
|
1d8504338e | ||
|
432df2d5f9 | ||
|
0ccd7f3eb2 | ||
|
f446c2cf6a | ||
|
b4d92a59a2 | ||
|
bbf3e55e35 | ||
|
c5bf0d1bd7 | ||
|
091592d758 | ||
|
44d1e796d0 | ||
|
a4f3f5d8e6 | ||
|
48e1ae0e61 | ||
|
d00a80e89d | ||
|
504af20ee4 | ||
|
84a44815f7 | ||
|
39509fb082 | ||
|
a29f0870d4 | ||
|
437e05f714 | ||
|
ca001f6656 | ||
|
00b4c3da62 | ||
|
7426a26b24 | ||
|
8f70fc3d1b | ||
|
1244cdcf14 | ||
|
924518e2e5 | ||
|
9a483999a6 | ||
|
08f10f69c3 | ||
|
afa8a9ec9b | ||
|
c05e8c9934 | ||
|
2739a71e4b | ||
|
ba8a1f9c5b | ||
|
ff3fcabc72 | ||
|
c3f9d25706 | ||
|
ee7136c6d1 | ||
|
c6860cc734 | ||
|
1204f97270 | ||
|
8eceb888d7 | ||
|
f8feb4b01a | ||
|
be0e950c91 | ||
|
d9feae1c06 | ||
|
8d59d91171 | ||
|
8a1d9c25fa | ||
|
1bf839b1e8 | ||
|
f7cd13301c | ||
|
4d2b3d8804 | ||
|
c07d437bbd | ||
|
99a3755a3c | ||
|
c792dcf488 | ||
|
80ccf5d725 | ||
|
a3c1232c3f | ||
|
8cef75c743 | ||
|
0d52a69e4b | ||
|
02f0430141 | ||
|
bec2183f2c | ||
|
53ff6b9b9f | ||
|
017cc5f446 | ||
|
a3d50bc022 | ||
|
a4dd490069 | ||
|
c0d6f790d0 | ||
|
dc7cef9f37 | ||
|
ecebbd292d | ||
|
96be8c3264 | ||
|
e6e7c75d94 | ||
|
09186fabbe | ||
|
96a1dc27c3 | ||
|
6369f867a4 | ||
|
47182dd03f | ||
|
3e6e7a6bc2 | ||
|
ae2f606bb5 | ||
|
727368c60f | ||
|
5047dd3546 | ||
|
46e3556e01 | ||
|
b56f079e28 | ||
|
9394bbd484 | ||
|
f922a9c542 | ||
|
46be942214 | ||
|
78c6785175 | ||
|
5e3b08d606 | ||
|
db68c93b57 | ||
|
c31fc8b966 | ||
|
4b0c638b9a | ||
|
e7da954ecc | ||
|
f66f582927 | ||
|
2f0ee84b9b | ||
|
0da5d86026 | ||
|
a45433ba20 | ||
|
0827b2c1da | ||
|
45095a61bf | ||
|
5896c65232 | ||
|
bc7b1f8632 | ||
|
6e1531aca5 | ||
|
716bd6dec3 | ||
|
c250ecb315 | ||
|
a813badbbd | ||
|
fdd2188912 | ||
|
f865ea149d | ||
|
16cdce7b68 | ||
|
d79d8f39b4 | ||
|
d283d02bf2 | ||
|
9ba399dfa7 | ||
|
2cd43f4900 | ||
|
09fe2e7613 | ||
|
30caac3a68 | ||
|
60cfa728e2 | ||
|
3327bb0f8d | ||
|
32d6ee6385 | ||
|
14b699ecde | ||
|
485dc01214 | ||
|
86bf31cfe6 | ||
|
b92a14a841 | ||
|
6f0c9e034b | ||
|
dab76c92cc | ||
|
7024d59e6a | ||
|
7c0e285858 | ||
|
7ae33a616f | ||
|
ebdee9478c | ||
|
5cd85b5e00 | ||
|
a91a41364b | ||
|
e34c5af43f | ||
|
eb5c3dc64b | ||
|
0ca416c91a | ||
|
21ae3b9be8 | ||
|
0a11f8b7b5 | ||
|
d408bb9268 | ||
|
5cab3e4aaa | ||
|
36319dec5d | ||
|
57bb2c40cd | ||
|
a3c33b1dce | ||
|
2fffc52b50 | ||
|
7585edbdeb | ||
|
cd920d0ac3 | ||
|
7909e8588d | ||
|
9177484f58 | ||
|
0bf2d10c55 | ||
|
7bbb5acf12 | ||
|
152610eda9 | ||
|
0e70ba686e | ||
|
46828872c3 | ||
|
6b064c92b4 | ||
|
4da69d1abd | ||
|
d62b532c52 | ||
|
081b29bd2a | ||
|
5437d4aaf5 | ||
|
78f766768d | ||
|
8dd19a4812 | ||
|
130d0c90bd | ||
|
3919da8e33 | ||
|
0006f5a74a | ||
|
05c3a444b8 | ||
|
382bc7f2e8 | ||
|
4f51968aca | ||
|
227d7c5a7f | ||
|
7b1ec53f56 | ||
|
160bc039c8 | ||
|
08ea539df2 | ||
|
644fd71b44 | ||
|
4ddd199f6f | ||
|
a0974156f3 | ||
|
87cf323cef | ||
|
5478bbcd17 | ||
|
b5ae1ddff9 | ||
|
89d604f2c8 | ||
|
e52aba537a | ||
|
ba1cb19cdd | ||
|
56eea0781c | ||
|
a76c56fa1a | ||
|
c27ac678dd | ||
|
11e07fd63b | ||
|
4601a8bb67 | ||
|
9f35e44592 | ||
|
64ae065511 | ||
|
83ed24a97b | ||
|
d583cd03f6 | ||
|
adffa6ffd5 | ||
|
274ec65af6 | ||
|
8faa1d4dd4 | ||
|
cb13ef85a4 | ||
|
4064c0e3b6 | ||
|
dc5301d565 | ||
|
9fdb124304 | ||
|
5555c0c1f6 | ||
|
973f328b1e | ||
|
fb18934a97 | ||
|
235f6e14bf | ||
|
1a31d0dc00 | ||
|
92f77a640f | ||
|
484d2f31ae | ||
|
4b4d92b098 | ||
|
43041d2eb3 | ||
|
b685daf386 | ||
|
dafae66cc2 | ||
|
ae4b922614 | ||
|
750cb3e246 | ||
|
a86ad841f1 | ||
|
a05e2afcc2 | ||
|
26a8406ba9 | ||
|
c37fb4cf62 | ||
|
3d98b4cb22 | ||
|
1a05004743 | ||
|
ce8784bdb1 | ||
|
e52522b869 | ||
|
06d70147e6 | ||
|
43ed389a3f | ||
|
ecc93d0558 | ||
|
62e84d9848 | ||
|
3573fa8e7b | ||
|
d9c3ba2b77 | ||
|
ce4a7b8493 | ||
|
19d8762ab6 | ||
|
c2a16c0bdb | ||
|
3df784b305 | ||
|
86a1934978 | ||
|
784a14aa49 | ||
|
c5ede3849f | ||
|
f162d45a21 | ||
|
6c5bc0625f | ||
|
7736837d62 | ||
|
c9c6e01dae | ||
|
6fe6247831 | ||
|
0cd182ebcc | ||
|
a8cbab201d | ||
|
c2082d93a8 | ||
|
d405804be8 | ||
|
f112d198cd | ||
|
1da7b76569 | ||
|
59f4db1088 | ||
|
2803540814 | ||
|
253b7fde91 | ||
|
8d0cfd554a | ||
|
2759916d86 | ||
|
40c6d79fb5 | ||
|
98036d5670 | ||
|
cd2f37b304 | ||
|
da6aac91f1 | ||
|
01e6d9bb71 | ||
|
cc98896db8 | ||
|
91c36c269b | ||
|
1cd3df46bd | ||
|
c505471857 | ||
|
e9e661bd59 | ||
|
efb6ae9630 | ||
|
667d70d170 | ||
|
3b4f2e33e2 | ||
|
82bca2257b | ||
|
0115df2f65 | ||
|
515d4e5372 | ||
|
844e2e1fee | ||
|
70b98fadbc | ||
|
642330ac7c | ||
|
8648c52101 | ||
|
64ed2091b2 | ||
|
991f8aabee | ||
|
4cb003dd8d | ||
|
917786f43d | ||
|
5e1ed95583 | ||
|
5c7a5aa0c3 | ||
|
3420909dff | ||
|
86dc11c5bc | ||
|
6acce39710 | ||
|
43957ef203 | ||
|
0c39f44d70 | ||
|
3e0ba0e604 | ||
|
abadba05be | ||
|
0533e7fb38 | ||
|
7cc2d2c889 | ||
|
b782e5c7d4 | ||
|
3a8e9af402 | ||
|
a3a3048e7a | ||
|
f0678c5ff4 | ||
|
4b3242bbea | ||
|
0f77aae560 | ||
|
266b8519ee | ||
|
938f608742 | ||
|
f095a649ec | ||
|
678d7994f4 | ||
|
dc22344088 | ||
|
4c0a95b107 | ||
|
6c59567689 | ||
|
890719311b | ||
|
7281cf13ad | ||
|
e90688edd0 | ||
|
76b27d29c2 | ||
|
eea986f215 | ||
|
c202cef168 | ||
|
2025fa67e9 | ||
|
c6bc73951e | ||
|
605fa66c50 | ||
|
b7420131bf | ||
|
9f912511bc | ||
|
3ad5451f3b | ||
|
46c69e0e75 | ||
|
9e2301f4a4 | ||
|
fee824a1a1 | ||
|
9150f8fef9 | ||
|
c31ed2abfc | ||
|
5b3466bedf | ||
|
249a7902ec | ||
|
71a64989a5 | ||
|
4a57d362e1 | ||
|
c9b00a70b0 | ||
|
de5097351c | ||
|
5a349f2809 | ||
|
30ec398321 | ||
|
be0e350c8b | ||
|
249cd93da3 | ||
|
904109ed0d | ||
|
45abe0f74e | ||
|
0bbd2262a3 | ||
|
ab96610b1e | ||
|
7db3846a94 | ||
|
c6807b3f28 | ||
|
25669aa92c | ||
|
84e1c33cde | ||
|
811872a59d | ||
|
9a4b79bcfa | ||
|
7066b4cce2 | ||
|
0eb4e12bee | ||
|
0cc63754b8 | ||
|
50d5cecbda | ||
|
9fd8c2687f | ||
|
47f931c8f9 | ||
|
106964e3d2 | ||
|
80acb7b430 | ||
|
10bce0450f | ||
|
1f922254f0 | ||
|
a9a678a6b2 | ||
|
9ca2e67762 | ||
|
5931c1f233 | ||
|
f6d12e7df8 | ||
|
b756441104 | ||
|
5a8987793f | ||
|
d9d54e498d | ||
|
cce5a90075 | ||
|
dc39012cba | ||
|
9336db462c | ||
|
96fa2c5e2d | ||
|
55ed008b2d | ||
|
6dfcfef078 | ||
|
599b3e0cd4 | ||
|
c18610b4ee | ||
|
a5e47592b6 | ||
|
1bb30bf28c | ||
|
87a533be57 | ||
|
59b9172822 | ||
|
02e4eaf22f | ||
|
9abe9eeae9 | ||
|
f95caa7954 | ||
|
fab5d30ff6 | ||
|
8fd4b7fa29 | ||
|
1bacb9f625 | ||
|
ad21c9e1f1 | ||
|
3952a221af | ||
|
42ae10bbcd | ||
|
9fe0fb0626 | ||
|
611fabd792 | ||
|
12b0ad953a | ||
|
342397dc7e | ||
|
2a11b6b094 | ||
|
3ee6382d48 | ||
|
8e752a777b | ||
|
a88ad007de | ||
|
2a1507c162 | ||
|
b3e585988f | ||
|
557924f222 | ||
|
d3481e6316 | ||
|
531cb1c233 | ||
|
f139d2ea61 | ||
|
2eb76b2a5e | ||
|
9b75f03cd2 | ||
|
75207b3a88 | ||
|
76e9e58b78 | ||
|
ce2e59ba10 | ||
|
be5caccef9 | ||
|
20a780c7b6 | ||
|
cf32a9b93a | ||
|
a43178299c | ||
|
c3ea58aca4 | ||
|
467576b6cc | ||
|
eda7e1d4f5 | ||
|
24203e9dd7 | ||
|
5d9e59979c | ||
|
a4200cafad | ||
|
84274a10c3 | ||
|
68fcb4759c | ||
|
8a43e940ab | ||
|
5c9a8b22b1 | ||
|
0fff7fd798 | ||
|
4e54be0ec6 | ||
|
db4cfd5dbc | ||
|
8ee0d09ae6 | ||
|
bcdb7a2386 | ||
|
f245cc28d4 | ||
|
772703c8ff | ||
|
dd3a6ce9f8 | ||
|
1e58ee1318 | ||
|
89e4caaaf0 | ||
|
74d73dc85c | ||
|
4047be74da | ||
|
883d206fbd | ||
|
09ecbcb596 | ||
|
3225008973 | ||
|
cbf5541a82 | ||
|
18429220bd | ||
|
f0204a0ec7 | ||
|
57f8355b29 | ||
|
9901068ac7 | ||
|
231f9360d9 | ||
|
4802ad350b | ||
|
5a54af4d4f | ||
|
1607a5e5b0 | ||
|
ae8de6d50a | ||
|
4a8ccb37ad | ||
|
2a82891a85 | ||
|
af148c9386 | ||
|
66798e42fb | ||
|
fb4a0ec083 | ||
|
5ea926dad7 | ||
|
1ee9eea094 | ||
|
ff7fb670d0 | ||
|
0e712a5acb | ||
|
a0ec17b32e | ||
|
2e82ffa4af | ||
|
80dd7ff22f | ||
|
54ef9cfc72 | ||
|
b0cefea58a | ||
|
b141e5f6ef | ||
|
4b3a9212b6 | ||
|
505f33274d | ||
|
160687b3ed | ||
|
6423c65aa8 | ||
|
39a334a9aa | ||
|
bb38cdd8ba | ||
|
f018acba22 | ||
|
46323fa9ef | ||
|
5b359bb1e3 | ||
|
e89213492d | ||
|
8fc393f246 | ||
|
ec450d3bbf | ||
|
695ad752b2 | ||
|
841f27abdb | ||
|
d05b3127bd | ||
|
76c6e7f105 | ||
|
a71d81cf8c | ||
|
eec4d71737 | ||
|
3b08828674 | ||
|
a2c6fd747c | ||
|
97404c4a03 | ||
|
60e17ce23c | ||
|
5107e8cea3 | ||
|
2319126a70 | ||
|
3bcd40b3c5 | ||
|
5c333e0140 | ||
|
b11f9ba9b8 | ||
|
94d8cb8be1 | ||
|
1dc04b2dee | ||
|
a1eaf6a960 | ||
|
b8deef0ec0 | ||
|
a9e8a9a030 | ||
|
3407364776 | ||
|
d5a409e57f | ||
|
401558b7ba | ||
|
9e0ecfb697 | ||
|
6a066b9978 | ||
|
ea02c753eb | ||
|
05697f670b | ||
|
f8e58135cf | ||
|
329ed914c9 | ||
|
ce027adfb3 | ||
|
284e5b0275 | ||
|
e2292aaa17 | ||
|
9f40989351 | ||
|
08828a6d7d | ||
|
1839f69130 | ||
|
9830b6923b | ||
|
42cadc74bd | ||
|
45950415ed | ||
|
1926d6e39d | ||
|
b634f8a26f | ||
|
7554aa4655 | ||
|
a6744e43e8 | ||
|
e991e3127f | ||
|
418f5eef26 | ||
|
ba6f62eb79 | ||
|
d865d1478c | ||
|
1804adb0cf | ||
|
815fe72adc | ||
|
f221d56220 | ||
|
e597e50794 | ||
|
85679d37f3 | ||
|
1e9f94994e | ||
|
c02e5ab2a6 | ||
|
ab3d71f97f | ||
|
0a683e8088 | ||
|
dea5e86051 | ||
|
1329c0a75e | ||
|
61408e7fad | ||
|
b9e02e8184 | ||
|
6763f713bb | ||
|
79a2bc042d | ||
|
fc83a9e584 | ||
|
c5b0f4b5d9 | ||
|
8f275a7c45 | ||
|
8d8ff71536 | ||
|
61715d5cc8 | ||
|
07028f9d74 | ||
|
524afeec9d | ||
|
8125e6cbfc | ||
|
8841ce3f43 | ||
|
cc2983d375 | ||
|
8c60a8a462 | ||
|
9e4a2563ea | ||
|
668750357e | ||
|
ff252ea48e | ||
|
d80fb71f8b | ||
|
2f8bd2b901 | ||
|
bc5ba007b2 | ||
|
958367bf53 | ||
|
40f2555797 | ||
|
167a515651 | ||
|
c39665f589 | ||
|
0a1c750c80 | ||
|
190a37d797 | ||
|
2d3aba9ee8 | ||
|
80273a306d | ||
|
c19af0acb1 | ||
|
ac113a0fee | ||
|
4c9388fb96 | ||
|
873279b159 | ||
|
c8c07d658a | ||
|
19d900a756 | ||
|
11d47057a5 | ||
|
c421ac072d | ||
|
4ff7fe1fb3 | ||
|
6b8447352d | ||
|
674804a996 | ||
|
e94a138d64 | ||
|
e01c67affe | ||
|
994cfb1acb | ||
|
94008cc760 | ||
|
dbd5f2f573 | ||
|
f594bc80ba | ||
|
d5ebd79c76 | ||
|
55e47786e3 | ||
|
bc21975084 | ||
|
1db8c84fc6 | ||
|
45f097645e | ||
|
7cab2083c7 | ||
|
cda0e4b648 | ||
|
afd9909a64 | ||
|
87421a23e8 | ||
|
60ce97c9d8 | ||
|
8901755ba3 | ||
|
6f55bccbb8 | ||
|
17bb928080 | ||
|
9f45fc1e99 | ||
|
99bd4ac28c | ||
|
3752217ed5 | ||
|
f010b77a37 | ||
|
2194200278 | ||
|
73afe681aa | ||
|
9e04102448 | ||
|
dbf18e4de9 | ||
|
66c2c93082 | ||
|
10433e8b45 | ||
|
1f66b699c4 | ||
|
0e41b300ed | ||
|
cd60b88bf7 | ||
|
becfd387f6 | ||
|
755a9b2bf0 | ||
|
223c25a72f | ||
|
fbc98b748e | ||
|
dcdd535302 | ||
|
4c42f93b22 | ||
|
a89f75e1b7 | ||
|
13dca2a54a | ||
|
d4c19c0f5c | ||
|
c7181bd294 | ||
|
92be9f1216 | ||
|
edc265661c | ||
|
1bde94dd02 | ||
|
95c76e8e92 | ||
|
11ac9800af | ||
|
943d20b411 | ||
|
96776405a1 | ||
|
7eee341bee | ||
|
0e9f760eb1 | ||
|
cf8e0a3bb9 | ||
|
c7499c557c | ||
|
c81f3bbb05 | ||
|
e7022064ab | ||
|
3dc48fe75a | ||
|
dca1d4b58a | ||
|
458367a906 | ||
|
fa42aa6d89 | ||
|
6374743747 | ||
|
f1af42fa8c | ||
|
6279dac039 | ||
|
d5ac8cf2f2 | ||
|
96b6912103 | ||
|
d5cb86844f | ||
|
f4b2dcdf49 | ||
|
b6d6c5289f | ||
|
b0915d5b51 | ||
|
8c475b97b8 | ||
|
58b16695e1 | ||
|
905f5485b2 | ||
|
71967c2a6d | ||
|
17880771ad | ||
|
55951c018d | ||
|
ff565769f2 | ||
|
f3fdcfaa79 | ||
|
133c7b46b3 | ||
|
d5ed2b929d | ||
|
1bb8a64ebf | ||
|
fabdc3bda3 | ||
|
eee39bdc96 | ||
|
5d5ab1e5cc | ||
|
a7ad553513 | ||
|
d6fe7abf04 | ||
|
e3c355ba65 | ||
|
841713e1e4 | ||
|
5639971466 | ||
|
c83ad6d01e | ||
|
a39ab216aa | ||
|
f536f4c439 | ||
|
00b7317e63 | ||
|
76b37d1541 | ||
|
148844fe97 | ||
|
3f1ae2e32c | ||
|
f1b8c42711 | ||
|
e98c1c188e | ||
|
cb00020504 | ||
|
6c5322481a | ||
|
7254cdf7e8 | ||
|
cad341d889 | ||
|
a90484c6d9 | ||
|
1927378bcc | ||
|
6f1d9d71f4 | ||
|
511636df0c | ||
|
08a43d05b6 | ||
|
ace4f4be37 | ||
|
8277a817f1 | ||
|
c919d5db39 | ||
|
d0b1d663e4 | ||
|
aaa4099925 | ||
|
641002fba8 | ||
|
0de8b203f1 | ||
|
544f409b4b | ||
|
6084bfb261 | ||
|
faac0bae26 | ||
|
f99d3f8367 | ||
|
589b48d41e | ||
|
f4d2b8846a | ||
|
1b2f992cd2 | ||
|
739842703e | ||
|
6102037bbb | ||
|
9a913110cf | ||
|
43bcdd9703 | ||
|
6a0f779484 | ||
|
89f9944981 | ||
|
b5de3b74a5 | ||
|
44f59b4301 | ||
|
95bc82fbc0 | ||
|
7691654c68 | ||
|
ea9c32be71 | ||
|
1e43630218 | ||
|
afbbfaa537 | ||
|
3d6bf6919f | ||
|
904837e0cb | ||
|
70392f1f81 | ||
|
bb5f819975 | ||
|
c038931615 | ||
|
31ac5834fe | ||
|
cea1486ecf | ||
|
0aa15011e3 | ||
|
b0f27361f3 | ||
|
c087b6f11d | ||
|
116efee0ee | ||
|
0b3bf966f4 | ||
|
f0c7b5edf8 | ||
|
1d48e98e4f | ||
|
f3979df762 | ||
|
1e7b9299c6 | ||
|
37f8c7b4c9 | ||
|
bf9c1013ac | ||
|
e62e9789cd | ||
|
c35e586ea5 | ||
|
912c331d3d | ||
|
a5b57b08ce | ||
|
ecd5d6b65b | ||
|
2a63caaa69 | ||
|
d09770cae7 | ||
|
41f477879f | ||
|
e948a7da7a | ||
|
63351143b2 | ||
|
d13edb17ed | ||
|
27609c49b9 | ||
|
4301535326 | ||
|
424c5d00a9 | ||
|
a6809c6a2e | ||
|
5cb12f6839 | ||
|
d39e26741f | ||
|
722ec1eb51 | ||
|
6026da52d6 | ||
|
eca0fab44e | ||
|
64c6af3195 | ||
|
0d2f22e45c | ||
|
6443ddd985 | ||
|
8a308354f6 | ||
|
f799155ab8 | ||
|
faf67b3de4 | ||
|
7be099fa81 | ||
|
8b836ae731 | ||
|
8344ef58f8 | ||
|
0226613853 | ||
|
503147a9f9 | ||
|
0d2ec43833 | ||
|
37f3a3810e | ||
|
23e0d70bac | ||
|
acb2c32c33 | ||
|
a6a3a5c531 | ||
|
d54c21df7e | ||
|
19514d632e | ||
|
5c3d0f1824 | ||
|
0aadac10c7 | ||
|
95ca85168b | ||
|
441b72b91f | ||
|
c4965a64f7 | ||
|
90a2fff0e7 | ||
|
6262d13e0b | ||
|
e6deac31f7 | ||
|
6988da94a2 | ||
|
3c7989fd29 | ||
|
d6b37c881f | ||
|
7596487beb | ||
|
822b6322de | ||
|
dcdcee3a74 | ||
|
1f4111e540 | ||
|
befaf1197f | ||
|
feff4aa846 | ||
|
0abc6a2c25 | ||
|
bd35cb0ae3 | ||
|
78203641fe | ||
|
e6b7801bd1 | ||
|
e665744317 | ||
|
d4c3c10fad | ||
|
2a825116b6 | ||
|
4dc4f5f14a | ||
|
c837981bba | ||
|
3c26a1644d | ||
|
ff76e18516 | ||
|
39f852f440 | ||
|
2b00fa7997 | ||
|
d6a04f872d | ||
|
c9c8575a1a | ||
|
df4b7945ae | ||
|
449ccfb6f5 | ||
|
1b28061400 | ||
|
8db003a19d | ||
|
0996c5597f | ||
|
5bb2c5dbd2 | ||
|
67155ab7f5 | ||
|
5af118efda | ||
|
d2b496bff4 | ||
|
b34e023480 | ||
|
51b6038636 | ||
|
cb9c933eb2 | ||
|
6cd4e03444 | ||
|
8d300bd35f | ||
|
49006c67b4 | ||
|
00ba2ff781 | ||
|
83008b7cfe | ||
|
0b4ac75772 | ||
|
fb3f249815 | ||
|
bfe76d4a17 | ||
|
293bebe077 | ||
|
5fac4d5764 | ||
|
5fb5e24811 | ||
|
38ca6f644b | ||
|
8e6e2fbe14 | ||
|
5ed087573e | ||
|
54f376d0b9 | ||
|
b2e89a3274 | ||
|
daa9623ab0 | ||
|
e079bffb66 | ||
|
3f7ccfd649 | ||
|
a249843d89 | ||
|
19f4a7b296 | ||
|
2a358fb0c4 | ||
|
eae597182c | ||
|
00b02bb249 | ||
|
a876861455 | ||
|
385decbd63 | ||
|
60a3107ccd | ||
|
406c1a32a1 | ||
|
9cb9260861 | ||
|
202084d31d | ||
|
dbbebcab33 | ||
|
ba1cf846ed | ||
|
d2d3200b38 | ||
|
51d964a4ef | ||
|
efe6a83e30 | ||
|
fbb7fcffbc | ||
|
a5b5d9a101 | ||
|
f12295b8a9 | ||
|
faf69d4237 | ||
|
e536426ded | ||
|
1b9ae5189c | ||
|
e32d0816ed | ||
|
df270ef745 | ||
|
947538acb8 | ||
|
6c89eb0b47 | ||
|
9b2c24c099 | ||
|
134bc38ecf | ||
|
815b1fb20a | ||
|
409dc4f8bb | ||
|
4a1411b4f1 | ||
|
8ebe8ddebd | ||
|
9bc6db28d0 | ||
|
32b2ec88bc | ||
|
1031771faa | ||
|
4db04784f9 | ||
|
bdf314f38a | ||
|
581c305186 | ||
|
5910ea9427 | ||
|
c8671ae282 | ||
|
82e3b03c11 | ||
|
9379d3cc17 | ||
|
7605ae7daf | ||
|
8962422b1c | ||
|
b69a480af4 | ||
|
48baa61ecc | ||
|
f1485161e5 | ||
|
048de848ee | ||
|
f771d064a9 | ||
|
6e7d133a5f | ||
|
b60074f1c2 | ||
|
9c1ba55733 | ||
|
c6d4cb4655 | ||
|
8f1d81a0b6 | ||
|
a47667cff4 | ||
|
ea5d7478b1 | ||
|
49271efbaf | ||
|
0ab30f8d82 | ||
|
cddae4884c | ||
|
7ea8d80d53 | ||
|
42c76d1358 | ||
|
9f7d4bcf5c | ||
|
1d1ccce676 | ||
|
9fe94ccac9 | ||
|
66b039a501 | ||
|
20f1789dfb | ||
|
231cff5f6f | ||
|
3246fe84d7 | ||
|
78eb487bb0 | ||
|
a77feb5d71 | ||
|
2e59d61c1b | ||
|
75e1dbbaab | ||
|
ad76569f8e | ||
|
7d787ed96c | ||
|
06658ad7c3 | ||
|
fc18425b6a | ||
|
879275ac98 | ||
|
7a3df798fc | ||
|
e5edb210cd | ||
|
0c41e03ceb | ||
|
f12ceaca0c | ||
|
436787f170 | ||
|
93bc3839f9 | ||
|
f91fc5639b | ||
|
e11bd856d5 | ||
|
8f824ffe8e | ||
|
3ba780e2a8 | ||
|
a07c32ea54 | ||
|
11b84eb457 | ||
|
1731d4238f | ||
|
a1631e53f6 | ||
|
fc54ef0d1c | ||
|
b40eb84895 | ||
|
f63f603c87 | ||
|
8455340b87 | ||
|
2f3c1466ff | ||
|
50addec9a5 | ||
|
4f8d19ff17 | ||
|
90db8146d5 | ||
|
cfac111e2b | ||
|
1b6ff90ff8 | ||
|
18eaf29f4c | ||
|
554b049068 | ||
|
2339a0be1c | ||
|
2fb9267887 | ||
|
8b3befc0e2 | ||
|
d565bb2fd5 | ||
|
ee2984bdaf | ||
|
c8ddce8560 | ||
|
23fd453544 | ||
|
c679e0cb5c | ||
|
fb487bb567 | ||
|
2a24c8caa6 | ||
|
e3f6fd56b1 | ||
|
4b9afbbe90 | ||
|
37501d9c79 | ||
|
4af8420afb | ||
|
6bda7ce6c3 | ||
|
d5492f0525 | ||
|
234b30676a | ||
|
5fd89a70ea | ||
|
98a532d474 | ||
|
43bdd3ce18 | ||
|
06943a69f6 | ||
|
828d6ff7d7 | ||
|
fc4ca27b25 | ||
|
1f67436c5e | ||
|
0fd93cdef5 | ||
|
84eb2f4fad | ||
|
1262e7ed13 | ||
|
df5478fbea | ||
|
2589292cde | ||
|
d3ae0ee8d7 | ||
|
5ef07e25ac | ||
|
4134999e01 | ||
|
8cd1bcfd3f | ||
|
a21c6fd450 | ||
|
33309f661a | ||
|
7c5bfd57f8 | ||
|
6e02327e8b | ||
|
7eb23840ed | ||
|
7c3f55c100 | ||
|
911b437f22 | ||
|
b72942fac9 | ||
|
6afd1a99dc | ||
|
272e3bd95e | ||
|
45a55b91aa | ||
|
3071c0a5f2 | ||
|
4305b57c80 | ||
|
70c0ea3560 | ||
|
5b2c04f492 | ||
|
6f6496bb09 | ||
|
daef3ab233 | ||
|
345a686d82 | ||
|
3a14e00366 | ||
|
afd27f01fe | ||
|
366d486c16 | ||
|
e44a561ab0 | ||
|
f93d49ab1e | ||
|
5b33ea1ee7 | ||
|
85fca8deb6 | ||
|
ebd541a570 | ||
|
15fa07a5c5 | ||
|
be55695eff | ||
|
0478174d59 | ||
|
a8dbc6f753 | ||
|
506122d854 | ||
|
725e3d9437 | ||
|
31958546c3 | ||
|
1e6f6554aa | ||
|
641f5dd2a6 | ||
|
5f4dcb1e60 | ||
|
db20f50cf4 | ||
|
efda90c93a | ||
|
0bf16de07b | ||
|
2d5dd7bb3f | ||
|
cdd1889de6 | ||
|
c21a896405 | ||
|
d4ff847153 | ||
|
0a4ce78681 | ||
|
bc0f887e15 | ||
|
b42978e7e4 | ||
|
b9dfc25ca3 | ||
|
1ef14b3007 | ||
|
d3f0c7166a | ||
|
e31a4f6797 | ||
|
400ae6f65f | ||
|
f1ea5146d7 | ||
|
064cdc265f | ||
|
5587e57a76 | ||
|
a3738b2fa7 | ||
|
655858ace0 | ||
|
c02b0a8a4d | ||
|
0d6fb52be0 | ||
|
978ba3d83d | ||
|
ecf6b7f23e | ||
|
01aae2b497 | ||
|
4b77ea95f5 | ||
|
76614f352e | ||
|
b72c20b85c | ||
|
e09a800f9a | ||
|
0fbbd88458 | ||
|
afbb4c1322 | ||
|
b7a08fd5e0 | ||
|
7a11eb3a26 | ||
|
c8a0090922 | ||
|
afbbcf3c04 | ||
|
ed9d2854c9 | ||
|
398ede5efe | ||
|
44d28ddd5c | ||
|
268c566006 | ||
|
7e72aa74fd | ||
|
7c27a19b2e | ||
|
140074bb86 | ||
|
6e2b6000e5 | ||
|
c887d8b017 | ||
|
75af08c475 | ||
|
439b3fc75a | ||
|
0832de7236 | ||
|
6eeaeba126 | ||
|
4730faca61 | ||
|
4c676c85e5 | ||
|
e54c35e4fb | ||
|
5e2727fe03 | ||
|
56f20aa25d | ||
|
345c8c0c87 | ||
|
ae7985cd7b | ||
|
a05ca93697 | ||
|
9f77d899b7 | ||
|
203b7f1531 | ||
|
d2b851bfa1 | ||
|
c12b6e8ee7 | ||
|
b5e95468b1 | ||
|
92090eca21 | ||
|
9d03d085dd | ||
|
bfb4c74981 | ||
|
2b1f616b20 | ||
|
01245f5b16 | ||
|
01aec4a631 | ||
|
41cd47caab | ||
|
49ce0ab6d4 | ||
|
4226a8d10e | ||
|
bf5a81df37 | ||
|
88954f7fbd | ||
|
ed67bcb24f | ||
|
eddcb5238b | ||
|
be6d7c0791 | ||
|
4b0eff3df5 | ||
|
8a4bad50a8 | ||
|
68504f0970 | ||
|
f19bf99c01 | ||
|
3a7ac5300a | ||
|
96952e7181 | ||
|
79167d9e49 | ||
|
b115105f05 | ||
|
de280085e7 | ||
|
b841d07408 | ||
|
64cf50a0ed | ||
|
938943cdbf | ||
|
751fcfc6c3 | ||
|
46e47417aa | ||
|
e7e6487ba0 | ||
|
063d99ad11 | ||
|
081fe431aa | ||
|
d94c6e0ccb | ||
|
566daa5a5b | ||
|
6f11a83e4e | ||
|
e093dd2382 | ||
|
50e05353e8 | ||
|
628154492a | ||
|
04bab6b7da | ||
|
b7c11d36e6 | ||
|
45f2c19cc5 | ||
|
22f281aa16 | ||
|
328884f421 | ||
|
c69c63039c | ||
|
69c487f4ed | ||
|
07283b1a90 | ||
|
940362224d | ||
|
69b9945b44 | ||
|
c3776cacab | ||
|
87e397d00b | ||
|
57b1d4f9eb | ||
|
d197545530 | ||
|
be0cfb4175 | ||
|
b57eb9ca4f | ||
|
f299aa98ec | ||
|
3d0e4367d9 | ||
|
a15ef8f8a0 | ||
|
705b7ecf60 | ||
|
0d2c7321e9 | ||
|
672a6f1018 | ||
|
3807c3de04 | ||
|
e02b597be3 | ||
|
b3283448ce | ||
|
30f80ca0bc | ||
|
1bdd8ae19f | ||
|
da3913d8f9 | ||
|
d65a8361fe | ||
|
5e116e8dd5 | ||
|
1666f92dcd | ||
|
37b12f92ab | ||
|
0efec57787 | ||
|
7acfd4e8d5 | ||
|
97bdd26eee | ||
|
4db8f60fe7 | ||
|
8fac431b06 | ||
|
f17f39ff9c | ||
|
9104bc20ed | ||
|
fc690b018e | ||
|
16bdfa42ac | ||
|
3dfda05956 | ||
|
bda62d7999 | ||
|
090fca7a07 | ||
|
aaab2419ea | ||
|
73cf442e7b | ||
|
e236528e76 | ||
|
fa79495bb4 | ||
|
17eb6aa8a9 | ||
|
c917b67f06 | ||
|
4e24cffd8c | ||
|
6af51c0d96 | ||
|
f53226245f | ||
|
c3ebcfa148 | ||
|
8a4441ea1a | ||
|
5aefbce27a | ||
|
71c1121d11 | ||
|
370b1f7e7a | ||
|
b549a1bbef | ||
|
368645698a | ||
|
b078c619aa | ||
|
808aba3916 | ||
|
a977c11544 | ||
|
9a55ffe6fb | ||
|
7a221b672e | ||
|
278d0e1846 | ||
|
dd07a123b7 | ||
|
f4444d992c | ||
|
6b2a849d1f | ||
|
0f1a39f343 | ||
|
83321c6958 | ||
|
cc61948b1f | ||
|
7a80710d93 | ||
|
a8be1e6f59 | ||
|
e4dd31ff89 | ||
|
8f0fad42b9 | ||
|
a59f8fdc85 | ||
|
fd560fe680 | ||
|
e500d6135a | ||
|
a03e8dd99d | ||
|
5b0b8d8cfb | ||
|
9925ca4087 | ||
|
9beb2dda03 | ||
|
7d0e23d72e | ||
|
7fdb6f73e3 | ||
|
a130eccef4 | ||
|
c4dd11d1d3 | ||
|
2ec846d558 | ||
|
3f2d538b81 | ||
|
2ee44c9a18 | ||
|
6847d54c4f | ||
|
fde13b3bb9 | ||
|
470939d483 | ||
|
6f0dbf6ab0 | ||
|
ffd00797d8 | ||
|
04ce3a8b19 | ||
|
3fd62a6b1c | ||
|
a8db2a9ce6 | ||
|
4090ea5501 | ||
|
f1948f1e10 | ||
|
f7cab35ef9 | ||
|
905942abdb | ||
|
b5040086d4 | ||
|
d39130a398 | ||
|
b81ba1f96b | ||
|
210eb9ed0a | ||
|
cb4d86c4d7 | ||
|
86e7299ef5 | ||
|
60d83a0149 | ||
|
87e25a1d1b | ||
|
213701b51a | ||
|
be20e7f49d | ||
|
7ed03b8974 | ||
|
1d894a790e | ||
|
1f3e1b66e2 | ||
|
148ec970b6 | ||
|
2cccbaa008 | ||
|
8e558309dc | ||
|
0a423800ff | ||
|
d12f781074 | ||
|
bcefa03bc0 | ||
|
5a7447c569 | ||
|
61ecafa390 | ||
|
aa5898dc53 | ||
|
6c05752c50 | ||
|
a9554e20b6 | ||
|
e235b267a2 | ||
|
f09b7cb609 | ||
|
a38b884c6c | ||
|
d7fd29fff1 | ||
|
6f63d646c1 | ||
|
51d2ebadbb | ||
|
1e920018d3 | ||
|
01a5f06550 | ||
|
07786a61a2 | ||
|
de14e2ea2b | ||
|
821922916f | ||
|
b1c3f26e5e | ||
|
b0a46993df | ||
|
807b0c49ff | ||
|
f8c4c0738d | ||
|
402d6feffa | ||
|
20fc3804bf | ||
|
f619024764 | ||
|
d23287f122 | ||
|
5f2d4e60e2 | ||
|
916248af1f | ||
|
f8d6a23804 | ||
|
fadde67135 | ||
|
a27152b602 | ||
|
3e2618bc7b | ||
|
07a3fc0608 | ||
|
968967376d | ||
|
023b8807e1 | ||
|
0e0590adab | ||
|
a9f3b10215 | ||
|
d08c20edde | ||
|
5fac350b9c | ||
|
cb5fad4c6c | ||
|
dae57a1ebc | ||
|
49122a873f | ||
|
0ddeff1023 | ||
|
3840b6f593 | ||
|
257f8e41e2 | ||
|
694c59cb42 | ||
|
197fe6c1d7 |
946 changed files with 219418 additions and 256122 deletions
161
.clang-format
Normal file
161
.clang-format
Normal file
|
@ -0,0 +1,161 @@
|
|||
---
|
||||
Language: Cpp
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignArrayOfStructures: Left
|
||||
AlignConsecutiveAssignments: AcrossComments
|
||||
AlignConsecutiveBitFields: AcrossComments
|
||||
AlignConsecutiveDeclarations: AcrossComments
|
||||
AlignConsecutiveMacros: AcrossComments
|
||||
# AlignConsecutiveShortCaseStatements: AcrossComments
|
||||
AlignEscapedNewlines: Left # LeftWithLastLine
|
||||
AlignOperands: Align
|
||||
AlignTrailingComments:
|
||||
Kind: Always
|
||||
OverEmptyLines: 1
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
||||
AllowShortBlocksOnASingleLine: Never
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: Inline
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLambdasOnASingleLine: Inline
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true # OnePerLine
|
||||
BitFieldColonSpacing: Both
|
||||
BreakBeforeBraces: Custom # Attach
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: true
|
||||
AfterClass: false
|
||||
AfterControlStatement: false
|
||||
AfterEnum: false
|
||||
AfterFunction: false
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: false
|
||||
SplitEmptyRecord: false
|
||||
SplitEmptyNamespace: false
|
||||
# BreakAdjacentStringLiterals: true
|
||||
BreakAfterAttributes: Never
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeInlineASMColon: OnlyMultiline
|
||||
BreakBeforeTernaryOperators: false
|
||||
# BreakBinaryOperations: Never
|
||||
BreakConstructorInitializers: AfterColon
|
||||
# BreakFunctionDefinitionParameters: false
|
||||
BreakInheritanceList: AfterComma
|
||||
BreakStringLiterals: true
|
||||
# BreakTemplateDeclarations: Yes
|
||||
ColumnLimit: 120
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: false
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
EmptyLineBeforeAccessModifier: Leave
|
||||
EmptyLineAfterAccessModifier: Never
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
FixNamespaceComments: true
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
- Regex: '^<.*\.h>'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
- Regex: '^<.*'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
- Regex: '.*'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
IndentAccessModifiers: false
|
||||
IndentCaseBlocks: true
|
||||
IndentCaseLabels: true
|
||||
IndentExternBlock: NoIndent
|
||||
IndentGotoLabels: false
|
||||
IndentPPDirectives: AfterHash
|
||||
IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
InsertBraces: true # NOTE: may lead to incorrect formatting
|
||||
InsertNewlineAtEOF: true
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
LambdaBodyIndentation: Signature
|
||||
LineEnding: LF
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
ObjCBinPackProtocolList: Auto
|
||||
ObjCBlockIndentWidth: 4
|
||||
ObjCSpaceAfterProperty: true
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PPIndentWidth: -1
|
||||
PackConstructorInitializers: CurrentLine
|
||||
PenaltyBreakAssignment: 2
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
PointerAlignment: Middle
|
||||
QualifierAlignment: Left
|
||||
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
||||
RawStringFormats:
|
||||
- Language: Cpp
|
||||
Delimiters:
|
||||
- cc
|
||||
- CC
|
||||
- cpp
|
||||
- Cpp
|
||||
- CPP
|
||||
- 'c++'
|
||||
- 'C++'
|
||||
CanonicalDelimiter: ''
|
||||
ReferenceAlignment: Middle
|
||||
ReflowComments: false # IndentOnly
|
||||
SeparateDefinitionBlocks: Always
|
||||
SortIncludes: CaseInsensitive
|
||||
SortUsingDeclarations: LexicographicNumeric
|
||||
SpaceAfterCStyleCast: true
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCpp11BracedList: false
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: Never
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInLineCommentPrefix:
|
||||
Minimum: 1
|
||||
Maximum: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
SpaceBeforeSquareBrackets: false
|
||||
Standard: c++17
|
||||
TabWidth: 4
|
||||
UseTab: Never
|
||||
WhitespaceSensitiveMacros: ['STRINGIZE']
|
||||
...
|
||||
|
|
@ -17,8 +17,10 @@ Checks: >
|
|||
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
||||
performance-*,
|
||||
portability-*,
|
||||
-portability-simd-intrinsics,
|
||||
misc-*,
|
||||
-misc-const-correctness,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-no-recursion,
|
||||
-misc-use-anonymous-namespace,
|
||||
FormatStyle: none
|
||||
|
|
92
.devops/cpu.Dockerfile
Normal file
92
.devops/cpu.Dockerfile
Normal file
|
@ -0,0 +1,92 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION AS build
|
||||
|
||||
ARG TARGETARCH
|
||||
|
||||
ARG GGML_CPU_ARM_ARCH=armv8-a
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
||||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
||||
elif [ "$TARGETARCH" = "arm64" ]; then \
|
||||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
|
||||
else \
|
||||
echo "Unsupported architecture"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
cmake --build build -j $(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
## Base image
|
||||
FROM ubuntu:$UBUNTU_VERSION AS base
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 curl\
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
|
||||
### Full
|
||||
FROM base AS full
|
||||
|
||||
COPY --from=build /app/full /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
git \
|
||||
python3 \
|
||||
python3-pip \
|
||||
&& pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
ENTRYPOINT ["/app/tools.sh"]
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/full/llama-cli /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/full/llama-server /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
94
.devops/cuda.Dockerfile
Normal file
94
.devops/cuda.Dockerfile
Normal file
|
@ -0,0 +1,94 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG CUDA_VERSION=12.6.0
|
||||
# Target the CUDA build image
|
||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
||||
|
||||
# CUDA architecture to build for (defaults to all supported archs)
|
||||
ARG CUDA_DOCKER_ARCH=default
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
||||
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
||||
fi && \
|
||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
||||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
## Base image
|
||||
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 curl\
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
|
||||
### Full
|
||||
FROM base AS full
|
||||
|
||||
COPY --from=build /app/full /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
git \
|
||||
python3 \
|
||||
python3-pip \
|
||||
&& pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
|
||||
ENTRYPOINT ["/app/tools.sh"]
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/full/llama-cli /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/full/llama-server /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -1,36 +0,0 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG CUDA_VERSION=11.7.1
|
||||
|
||||
# Target the CUDA build image
|
||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
ARG CUDA_DOCKER_ARCH=all
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
|
||||
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY requirements requirements
|
||||
|
||||
RUN pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Set nvcc architecture
|
||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
# Enable CUDA
|
||||
ENV GGML_CUDA=1
|
||||
# Enable cURL
|
||||
ENV LLAMA_CURL=1
|
||||
|
||||
RUN make -j$(nproc)
|
||||
|
||||
ENTRYPOINT ["/app/.devops/tools.sh"]
|
|
@ -1,50 +0,0 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG ROCM_VERSION=5.6
|
||||
|
||||
# Target the CUDA build image
|
||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||
|
||||
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
||||
# This is mostly tied to rocBLAS supported archs.
|
||||
ARG ROCM_DOCKER_ARCH=\
|
||||
gfx803 \
|
||||
gfx900 \
|
||||
gfx906 \
|
||||
gfx908 \
|
||||
gfx90a \
|
||||
gfx1010 \
|
||||
gfx1030 \
|
||||
gfx1100 \
|
||||
gfx1101 \
|
||||
gfx1102
|
||||
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY requirements requirements
|
||||
|
||||
RUN pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Set nvcc architecture
|
||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||
# Enable ROCm
|
||||
ENV GGML_HIPBLAS=1
|
||||
ENV CC=/opt/rocm/llvm/bin/clang
|
||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||
|
||||
# Enable cURL
|
||||
ENV LLAMA_CURL=1
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libcurl4-openssl-dev
|
||||
|
||||
RUN make -j$(nproc)
|
||||
|
||||
ENTRYPOINT ["/app/.devops/tools.sh"]
|
|
@ -1,25 +0,0 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION as build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
|
||||
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY requirements requirements
|
||||
|
||||
RUN pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
ENV LLAMA_CURL=1
|
||||
|
||||
|
||||
RUN make -j$(nproc)
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT ["/app/.devops/tools.sh"]
|
91
.devops/intel.Dockerfile
Normal file
91
.devops/intel.Dockerfile
Normal file
|
@ -0,0 +1,91 @@
|
|||
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
|
||||
|
||||
## Build Image
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
||||
|
||||
ARG GGML_SYCL_F16=OFF
|
||||
RUN apt-get update && \
|
||||
apt-get install -y git libcurl4-openssl-dev
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||
echo "GGML_SYCL_F16 is set" \
|
||||
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||
fi && \
|
||||
echo "Building with dynamic libs" && \
|
||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
||||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 curl\
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
### Full
|
||||
FROM base AS full
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
COPY --from=build /app/full /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
git \
|
||||
python3 \
|
||||
python3-pip \
|
||||
&& pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
|
||||
ENTRYPOINT ["/app/tools.sh"]
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
COPY --from=build /app/full/llama-cli /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
COPY --from=build /app/full/llama-server /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
||||
|
44
.devops/llama-cli-cann.Dockerfile
Normal file
44
.devops/llama-cli-cann.Dockerfile
Normal file
|
@ -0,0 +1,44 @@
|
|||
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
|
||||
|
||||
FROM ascendai/cann:$ASCEND_VERSION AS build
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN yum install -y gcc g++ cmake make
|
||||
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
||||
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
||||
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
||||
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
||||
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
||||
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
||||
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
||||
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
||||
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
||||
|
||||
# find libascend_hal.so, because the drive hasn`t been mounted.
|
||||
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
||||
|
||||
RUN echo "Building with static libs" && \
|
||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
|
||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
|
||||
cmake --build build --config Release --target llama-cli
|
||||
|
||||
# TODO: use image with NNRT
|
||||
FROM ascendai/cann:$ASCEND_VERSION AS runtime
|
||||
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
||||
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
||||
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
||||
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
||||
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
||||
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
||||
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
||||
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
||||
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
||||
|
||||
ENTRYPOINT ["/llama-cli" ]
|
|
@ -1,35 +0,0 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG CUDA_VERSION=11.7.1
|
||||
# Target the CUDA build image
|
||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
# Target the CUDA runtime image
|
||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
ARG CUDA_DOCKER_ARCH=all
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Set nvcc architecture
|
||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
# Enable CUDA
|
||||
ENV GGML_CUDA=1
|
||||
|
||||
RUN make -j$(nproc) llama-cli
|
||||
|
||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libgomp1
|
||||
|
||||
COPY --from=build /app/llama-cli /llama-cli
|
||||
|
||||
ENTRYPOINT [ "/llama-cli" ]
|
|
@ -1,26 +0,0 @@
|
|||
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
||||
|
||||
ARG GGML_SYCL_F16=OFF
|
||||
RUN apt-get update && \
|
||||
apt-get install -y git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||
echo "GGML_SYCL_F16 is set" && \
|
||||
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||
fi && \
|
||||
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
|
||||
cmake --build build --config Release --target llama-cli
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
||||
|
||||
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT [ "/llama-cli" ]
|
|
@ -1,45 +0,0 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG ROCM_VERSION=5.6
|
||||
|
||||
# Target the CUDA build image
|
||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||
|
||||
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
||||
# This is mostly tied to rocBLAS supported archs.
|
||||
ARG ROCM_DOCKER_ARCH=\
|
||||
gfx803 \
|
||||
gfx900 \
|
||||
gfx906 \
|
||||
gfx908 \
|
||||
gfx90a \
|
||||
gfx1010 \
|
||||
gfx1030 \
|
||||
gfx1100 \
|
||||
gfx1101 \
|
||||
gfx1102
|
||||
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY requirements requirements
|
||||
|
||||
RUN pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Set nvcc architecture
|
||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||
# Enable ROCm
|
||||
ENV GGML_HIPBLAS=1
|
||||
ENV CC=/opt/rocm/llvm/bin/clang
|
||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||
|
||||
RUN make -j$(nproc) llama-cli
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
|
@ -1,27 +0,0 @@
|
|||
ARG UBUNTU_VERSION=jammy
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION as build
|
||||
|
||||
# Install build tools
|
||||
RUN apt update && apt install -y git build-essential cmake wget libgomp1
|
||||
|
||||
# Install Vulkan SDK
|
||||
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||
apt update -y && \
|
||||
apt-get install -y vulkan-sdk
|
||||
|
||||
# Build it
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
RUN cmake -B build -DGGML_VULKAN=1 && \
|
||||
cmake --build build --config Release --target llama-cli
|
||||
|
||||
# Clean up
|
||||
WORKDIR /
|
||||
RUN cp /app/build/bin/llama-cli /llama-cli && \
|
||||
rm -rf /app
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT [ "/llama-cli" ]
|
|
@ -1,23 +0,0 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION as build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN make -j$(nproc) llama-cli
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libgomp1
|
||||
|
||||
COPY --from=build /app/llama-cli /llama-cli
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT [ "/llama-cli" ]
|
|
@ -1,39 +0,0 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG CUDA_VERSION=11.7.1
|
||||
# Target the CUDA build image
|
||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
# Target the CUDA runtime image
|
||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
ARG CUDA_DOCKER_ARCH=all
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential git libcurl4-openssl-dev
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Set nvcc architecture
|
||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
# Enable CUDA
|
||||
ENV GGML_CUDA=1
|
||||
# Enable cURL
|
||||
ENV LLAMA_CURL=1
|
||||
|
||||
RUN make -j$(nproc) llama-server
|
||||
|
||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libcurl4-openssl-dev libgomp1 curl
|
||||
|
||||
COPY --from=build /app/llama-server /llama-server
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/llama-server" ]
|
|
@ -1,31 +0,0 @@
|
|||
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
||||
|
||||
ARG GGML_SYCL_F16=OFF
|
||||
RUN apt-get update && \
|
||||
apt-get install -y git libcurl4-openssl-dev
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||
echo "GGML_SYCL_F16 is set" && \
|
||||
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||
fi && \
|
||||
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
||||
cmake --build build --config Release --target llama-server
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libcurl4-openssl-dev curl
|
||||
|
||||
COPY --from=build /app/build/bin/llama-server /llama-server
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/llama-server" ]
|
|
@ -1,52 +0,0 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG ROCM_VERSION=5.6
|
||||
|
||||
# Target the CUDA build image
|
||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||
|
||||
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
||||
# This is mostly tied to rocBLAS supported archs.
|
||||
ARG ROCM_DOCKER_ARCH=\
|
||||
gfx803 \
|
||||
gfx900 \
|
||||
gfx906 \
|
||||
gfx908 \
|
||||
gfx90a \
|
||||
gfx1010 \
|
||||
gfx1030 \
|
||||
gfx1100 \
|
||||
gfx1101 \
|
||||
gfx1102
|
||||
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY requirements requirements
|
||||
|
||||
RUN pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Set nvcc architecture
|
||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||
# Enable ROCm
|
||||
ENV GGML_HIPBLAS=1
|
||||
ENV CC=/opt/rocm/llvm/bin/clang
|
||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||
|
||||
# Enable cURL
|
||||
ENV LLAMA_CURL=1
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libcurl4-openssl-dev curl
|
||||
|
||||
RUN make -j$(nproc) llama-server
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -1,29 +0,0 @@
|
|||
ARG UBUNTU_VERSION=jammy
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION as build
|
||||
|
||||
# Install build tools
|
||||
RUN apt update && apt install -y git build-essential cmake wget
|
||||
|
||||
# Install Vulkan SDK and cURL
|
||||
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||
apt update -y && \
|
||||
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
||||
|
||||
# Build it
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
RUN cmake -B build -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
||||
cmake --build build --config Release --target llama-server
|
||||
|
||||
# Clean up
|
||||
WORKDIR /
|
||||
RUN cp /app/build/bin/llama-server /llama-server && \
|
||||
rm -rf /app
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/llama-server" ]
|
|
@ -1,27 +0,0 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION as build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential git libcurl4-openssl-dev curl
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
ENV LLAMA_CURL=1
|
||||
|
||||
RUN make -j$(nproc) llama-server
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libcurl4-openssl-dev libgomp1
|
||||
|
||||
COPY --from=build /app/llama-server /llama-server
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/llama-server" ]
|
108
.devops/musa.Dockerfile
Normal file
108
.devops/musa.Dockerfile
Normal file
|
@ -0,0 +1,108 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG MUSA_VERSION=rc3.1.0
|
||||
# Target the MUSA build image
|
||||
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
||||
|
||||
# MUSA architecture to build for (defaults to all supported archs)
|
||||
ARG MUSA_DOCKER_ARCH=default
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
build-essential \
|
||||
cmake \
|
||||
python3 \
|
||||
python3-pip \
|
||||
git \
|
||||
libcurl4-openssl-dev \
|
||||
libgomp1
|
||||
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY requirements requirements
|
||||
|
||||
RUN pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Use the default MUSA archs if not specified
|
||||
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
||||
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
||||
fi && \
|
||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
||||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
## Base image
|
||||
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 curl\
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
|
||||
### Full
|
||||
FROM base AS full
|
||||
|
||||
COPY --from=build /app/full /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
git \
|
||||
python3 \
|
||||
python3-pip \
|
||||
&& pip install --upgrade pip setuptools wheel \
|
||||
&& pip install -r requirements.txt \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
|
||||
ENTRYPOINT ["/app/tools.sh"]
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/full/llama-cli /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/full/llama-server /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -10,7 +10,6 @@
|
|||
"llama-embedding"
|
||||
"llama-server"
|
||||
"llama-quantize"
|
||||
"llama-train-text-from-scratch"
|
||||
];
|
||||
mkApp = name: {
|
||||
type = "app";
|
||||
|
|
|
@ -1,13 +1,52 @@
|
|||
{ inputs, ... }:
|
||||
|
||||
{
|
||||
perSystem =
|
||||
{ config, lib, ... }:
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
system,
|
||||
...
|
||||
}:
|
||||
{
|
||||
devShells =
|
||||
lib.concatMapAttrs
|
||||
(name: package: {
|
||||
${name} = package.passthru.shell;
|
||||
${name + "-extra"} = package.passthru.shell-extra;
|
||||
})
|
||||
config.packages;
|
||||
let
|
||||
pkgs = import inputs.nixpkgs { inherit system; };
|
||||
stdenv = pkgs.stdenv;
|
||||
scripts = config.packages.python-scripts;
|
||||
in
|
||||
lib.pipe (config.packages) [
|
||||
(lib.concatMapAttrs (
|
||||
name: package: {
|
||||
${name} = pkgs.mkShell {
|
||||
name = "${name}";
|
||||
inputsFrom = [ package ];
|
||||
shellHook = ''
|
||||
echo "Entering ${name} devShell"
|
||||
'';
|
||||
};
|
||||
"${name}-extra" =
|
||||
if (name == "python-scripts") then
|
||||
null
|
||||
else
|
||||
pkgs.mkShell {
|
||||
name = "${name}-extra";
|
||||
inputsFrom = [
|
||||
package
|
||||
scripts
|
||||
];
|
||||
# Extra packages that *may* be used by some scripts
|
||||
packages = [
|
||||
pkgs.python3Packages.tiktoken
|
||||
];
|
||||
shellHook = ''
|
||||
echo "Entering ${name} devShell"
|
||||
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
||||
'';
|
||||
};
|
||||
}
|
||||
))
|
||||
(lib.filterAttrs (name: value: value != null))
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
|
@ -26,16 +26,14 @@
|
|||
config.cudaSupport = true;
|
||||
config.allowUnfreePredicate =
|
||||
p:
|
||||
builtins.all
|
||||
(
|
||||
builtins.all (
|
||||
license:
|
||||
license.free
|
||||
|| builtins.elem license.shortName [
|
||||
"CUDA EULA"
|
||||
"cuDNN EULA"
|
||||
]
|
||||
)
|
||||
(p.meta.licenses or [ p.meta.license ]);
|
||||
) (p.meta.licenses or [ p.meta.license ]);
|
||||
};
|
||||
# Ensure dependencies use ROCm consistently
|
||||
pkgsRocm = import inputs.nixpkgs {
|
||||
|
|
36
.devops/nix/package-gguf-py.nix
Normal file
36
.devops/nix/package-gguf-py.nix
Normal file
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
lib,
|
||||
llamaVersion,
|
||||
numpy,
|
||||
tqdm,
|
||||
sentencepiece,
|
||||
pyyaml,
|
||||
poetry-core,
|
||||
buildPythonPackage,
|
||||
pytestCheckHook,
|
||||
}:
|
||||
|
||||
buildPythonPackage {
|
||||
pname = "gguf";
|
||||
version = llamaVersion;
|
||||
pyproject = true;
|
||||
nativeBuildInputs = [ poetry-core ];
|
||||
propagatedBuildInputs = [
|
||||
numpy
|
||||
tqdm
|
||||
sentencepiece
|
||||
pyyaml
|
||||
];
|
||||
src = lib.cleanSource ../../gguf-py;
|
||||
pythonImportsCheck = [
|
||||
"numpy"
|
||||
"gguf"
|
||||
];
|
||||
nativeCheckInputs = [ pytestCheckHook ];
|
||||
doCheck = true;
|
||||
meta = with lib; {
|
||||
description = "Python package for writing binary files in the GGUF format";
|
||||
license = licenses.mit;
|
||||
maintainers = [ maintainers.ditsuke ];
|
||||
};
|
||||
}
|
|
@ -3,33 +3,36 @@
|
|||
glibc,
|
||||
config,
|
||||
stdenv,
|
||||
mkShell,
|
||||
runCommand,
|
||||
cmake,
|
||||
ninja,
|
||||
pkg-config,
|
||||
git,
|
||||
python3,
|
||||
mpi,
|
||||
blas,
|
||||
cudaPackages,
|
||||
autoAddDriverRunpath,
|
||||
darwin,
|
||||
rocmPackages,
|
||||
vulkan-headers,
|
||||
vulkan-loader,
|
||||
clblast,
|
||||
useBlas ? builtins.all (x: !x) [
|
||||
curl,
|
||||
shaderc,
|
||||
useBlas ?
|
||||
builtins.all (x: !x) [
|
||||
useCuda
|
||||
useMetalKit
|
||||
useOpenCL
|
||||
useRocm
|
||||
useVulkan
|
||||
] && blas.meta.available,
|
||||
]
|
||||
&& blas.meta.available,
|
||||
useCuda ? config.cudaSupport,
|
||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
|
||||
useMpi ? false, # Increases the runtime closure size by ~700M
|
||||
useOpenCL ? false,
|
||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
||||
# Increases the runtime closure size by ~700M
|
||||
useMpi ? false,
|
||||
useRocm ? config.rocmSupport,
|
||||
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
|
||||
enableCurl ? true,
|
||||
useVulkan ? false,
|
||||
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
||||
|
||||
|
@ -37,8 +40,8 @@
|
|||
# otherwise we get libstdc++ errors downstream.
|
||||
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
||||
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
||||
precompileMetalShaders ? false
|
||||
}@inputs:
|
||||
precompileMetalShaders ? false,
|
||||
}:
|
||||
|
||||
let
|
||||
inherit (lib)
|
||||
|
@ -46,7 +49,6 @@ let
|
|||
cmakeFeature
|
||||
optionals
|
||||
strings
|
||||
versionOlder
|
||||
;
|
||||
|
||||
stdenv = throw "Use effectiveStdenv instead";
|
||||
|
@ -56,45 +58,17 @@ let
|
|||
++ lib.optionals useCuda [ "CUDA" ]
|
||||
++ lib.optionals useMetalKit [ "MetalKit" ]
|
||||
++ lib.optionals useMpi [ "MPI" ]
|
||||
++ lib.optionals useOpenCL [ "OpenCL" ]
|
||||
++ lib.optionals useRocm [ "ROCm" ]
|
||||
++ lib.optionals useVulkan [ "Vulkan" ];
|
||||
|
||||
pnameSuffix =
|
||||
strings.optionalString (suffices != [ ])
|
||||
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
||||
descriptionSuffix =
|
||||
strings.optionalString (suffices != [ ])
|
||||
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||
descriptionSuffix = strings.optionalString (
|
||||
suffices != [ ]
|
||||
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||
|
||||
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
|
||||
|
||||
# TODO: package the Python in this repository in a Nix-like way.
|
||||
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
||||
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
||||
# https://peps.python.org/pep-0517/
|
||||
#
|
||||
# TODO: Package up each Python script or service appropriately, by making
|
||||
# them into "entrypoints"
|
||||
llama-python = python3.withPackages (
|
||||
ps: [
|
||||
ps.numpy
|
||||
ps.sentencepiece
|
||||
]
|
||||
);
|
||||
|
||||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
||||
llama-python-extra = python3.withPackages (
|
||||
ps: [
|
||||
ps.numpy
|
||||
ps.sentencepiece
|
||||
ps.tiktoken
|
||||
ps.torchWithoutCuda
|
||||
ps.transformers
|
||||
]
|
||||
);
|
||||
|
||||
xcrunHost = runCommand "xcrunHost" {} ''
|
||||
xcrunHost = runCommand "xcrunHost" { } ''
|
||||
mkdir -p $out/bin
|
||||
ln -s /usr/bin/xcrun $out/bin
|
||||
'';
|
||||
|
@ -111,16 +85,9 @@ let
|
|||
++ optionals useMetalKit [ MetalKit ];
|
||||
|
||||
cudaBuildInputs = with cudaPackages; [
|
||||
cuda_cccl.dev # <nv/target>
|
||||
|
||||
# A temporary hack for reducing the closure size, remove once cudaPackages
|
||||
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
|
||||
cuda_cudart.dev
|
||||
cuda_cudart.lib
|
||||
cuda_cudart.static
|
||||
libcublas.dev
|
||||
libcublas.lib
|
||||
libcublas.static
|
||||
cuda_cudart
|
||||
cuda_cccl # <nv/target>
|
||||
libcublas
|
||||
];
|
||||
|
||||
rocmBuildInputs = with rocmPackages; [
|
||||
|
@ -132,11 +99,11 @@ let
|
|||
vulkanBuildInputs = [
|
||||
vulkan-headers
|
||||
vulkan-loader
|
||||
shaderc
|
||||
];
|
||||
in
|
||||
|
||||
effectiveStdenv.mkDerivation (
|
||||
finalAttrs: {
|
||||
effectiveStdenv.mkDerivation (finalAttrs: {
|
||||
pname = "llama-cpp${pnameSuffix}";
|
||||
version = llamaVersion;
|
||||
|
||||
|
@ -160,9 +127,9 @@ effectiveStdenv.mkDerivation (
|
|||
};
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace ./ggml/src/ggml-metal.m \
|
||||
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
||||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
||||
substituteInPlace ./ggml/src/ggml-metal.m \
|
||||
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
||||
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
||||
'';
|
||||
|
||||
|
@ -184,35 +151,30 @@ effectiveStdenv.mkDerivation (
|
|||
++ optionals useCuda [
|
||||
cudaPackages.cuda_nvcc
|
||||
|
||||
# TODO: Replace with autoAddDriverRunpath
|
||||
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
|
||||
cudaPackages.autoAddOpenGLRunpathHook
|
||||
autoAddDriverRunpath
|
||||
]
|
||||
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
|
||||
glibc.static
|
||||
] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
|
||||
xcrunHost
|
||||
];
|
||||
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
||||
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
||||
|
||||
buildInputs =
|
||||
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
||||
++ optionals useCuda cudaBuildInputs
|
||||
++ optionals useMpi [ mpi ]
|
||||
++ optionals useOpenCL [ clblast ]
|
||||
++ optionals useRocm rocmBuildInputs
|
||||
++ optionals useBlas [ blas ]
|
||||
++ optionals useVulkan vulkanBuildInputs;
|
||||
++ optionals useVulkan vulkanBuildInputs
|
||||
++ optionals enableCurl [ curl ];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
||||
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
||||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
||||
(cmakeBool "LLAMA_CURL" enableCurl)
|
||||
(cmakeBool "GGML_NATIVE" false)
|
||||
(cmakeBool "GGML_BLAS" useBlas)
|
||||
(cmakeBool "GGML_CLBLAST" useOpenCL)
|
||||
(cmakeBool "GGML_CUDA" useCuda)
|
||||
(cmakeBool "GGML_HIPBLAS" useRocm)
|
||||
(cmakeBool "GGML_HIP" useRocm)
|
||||
(cmakeBool "GGML_METAL" useMetalKit)
|
||||
(cmakeBool "GGML_VULKAN" useVulkan)
|
||||
(cmakeBool "GGML_STATIC" enableStatic)
|
||||
|
@ -227,7 +189,7 @@ effectiveStdenv.mkDerivation (
|
|||
]
|
||||
++ optionals useRocm [
|
||||
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
||||
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
||||
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
|
||||
]
|
||||
++ optionals useMetalKit [
|
||||
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
||||
|
@ -247,41 +209,11 @@ effectiveStdenv.mkDerivation (
|
|||
cp $src/include/llama.h $out/include/
|
||||
'';
|
||||
|
||||
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
||||
passthru = {
|
||||
inherit
|
||||
useBlas
|
||||
useCuda
|
||||
useMetalKit
|
||||
useMpi
|
||||
useOpenCL
|
||||
useRocm
|
||||
useVulkan
|
||||
;
|
||||
|
||||
shell = mkShell {
|
||||
name = "shell-${finalAttrs.finalPackage.name}";
|
||||
description = "contains numpy and sentencepiece";
|
||||
buildInputs = [ llama-python ];
|
||||
inputsFrom = [ finalAttrs.finalPackage ];
|
||||
shellHook = ''
|
||||
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
|
||||
'';
|
||||
};
|
||||
|
||||
shell-extra = mkShell {
|
||||
name = "shell-extra-${finalAttrs.finalPackage.name}";
|
||||
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
||||
buildInputs = [ llama-python-extra ];
|
||||
inputsFrom = [ finalAttrs.finalPackage ];
|
||||
};
|
||||
};
|
||||
|
||||
meta = {
|
||||
# Configurations we don't want even the CI to evaluate. Results in the
|
||||
# "unsupported platform" messages. This is mostly a no-op, because
|
||||
# cudaPackages would've refused to evaluate anyway.
|
||||
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
|
||||
badPlatforms = optionals useCuda lib.platforms.darwin;
|
||||
|
||||
# Configurations that are known to result in build failures. Can be
|
||||
# overridden by importing Nixpkgs with `allowBroken = true`.
|
||||
|
@ -312,5 +244,4 @@ effectiveStdenv.mkDerivation (
|
|||
# Extend `badPlatforms` instead
|
||||
platforms = lib.platforms.all;
|
||||
};
|
||||
}
|
||||
)
|
||||
})
|
||||
|
|
66
.devops/nix/python-scripts.nix
Normal file
66
.devops/nix/python-scripts.nix
Normal file
|
@ -0,0 +1,66 @@
|
|||
{
|
||||
lib,
|
||||
stdenv,
|
||||
buildPythonPackage,
|
||||
poetry-core,
|
||||
mkShell,
|
||||
python3Packages,
|
||||
gguf-py,
|
||||
}@inputs:
|
||||
|
||||
let
|
||||
llama-python-deps = with python3Packages; [
|
||||
numpy
|
||||
sentencepiece
|
||||
transformers
|
||||
protobuf
|
||||
torchWithoutCuda
|
||||
gguf-py
|
||||
tqdm
|
||||
|
||||
# for scripts/compare-llama-bench.py
|
||||
gitpython
|
||||
tabulate
|
||||
|
||||
# for examples/pydantic-models-to-grammar-examples.py
|
||||
docstring-parser
|
||||
pydantic
|
||||
|
||||
];
|
||||
|
||||
llama-python-test-deps = with python3Packages; [
|
||||
# Server bench
|
||||
matplotlib
|
||||
|
||||
# server tests
|
||||
openai
|
||||
pytest
|
||||
prometheus-client
|
||||
];
|
||||
in
|
||||
|
||||
buildPythonPackage ({
|
||||
pname = "llama-scripts";
|
||||
version = "0.0.0";
|
||||
pyproject = true;
|
||||
|
||||
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
||||
# do they affect the output hash. They can be modified without triggering a rebuild.
|
||||
src = lib.cleanSourceWith {
|
||||
filter =
|
||||
name: type:
|
||||
let
|
||||
any = builtins.any (x: x);
|
||||
baseName = builtins.baseNameOf name;
|
||||
in
|
||||
any [
|
||||
(lib.hasSuffix ".py" name)
|
||||
(baseName == "README.md")
|
||||
(baseName == "pyproject.toml")
|
||||
];
|
||||
src = lib.cleanSource ../../.;
|
||||
};
|
||||
nativeBuildInputs = [ poetry-core ];
|
||||
nativeCheckInputs = llama-python-test-deps;
|
||||
dependencies = llama-python-deps;
|
||||
})
|
|
@ -1,19 +1,41 @@
|
|||
{
|
||||
lib,
|
||||
newScope,
|
||||
python3,
|
||||
llamaVersion ? "0.0.0",
|
||||
}:
|
||||
|
||||
let
|
||||
pythonPackages = python3.pkgs;
|
||||
buildPythonPackage = pythonPackages.buildPythonPackage;
|
||||
numpy = pythonPackages.numpy;
|
||||
tqdm = pythonPackages.tqdm;
|
||||
sentencepiece = pythonPackages.sentencepiece;
|
||||
pyyaml = pythonPackages.pyyaml;
|
||||
poetry-core = pythonPackages.poetry-core;
|
||||
pytestCheckHook = pythonPackages.pytestCheckHook;
|
||||
in
|
||||
|
||||
# We're using `makeScope` instead of just writing out an attrset
|
||||
# because it allows users to apply overlays later using `overrideScope'`.
|
||||
# Cf. https://noogle.dev/f/lib/makeScope
|
||||
|
||||
lib.makeScope newScope (
|
||||
self: {
|
||||
lib.makeScope newScope (self: {
|
||||
inherit llamaVersion;
|
||||
gguf-py = self.callPackage ./package-gguf-py.nix {
|
||||
inherit
|
||||
buildPythonPackage
|
||||
numpy
|
||||
tqdm
|
||||
sentencepiece
|
||||
poetry-core
|
||||
pyyaml
|
||||
pytestCheckHook
|
||||
;
|
||||
};
|
||||
python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
|
||||
llama-cpp = self.callPackage ./package.nix { };
|
||||
docker = self.callPackage ./docker.nix { };
|
||||
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
||||
sif = self.callPackage ./sif.nix { };
|
||||
}
|
||||
)
|
||||
})
|
||||
|
|
113
.devops/rocm.Dockerfile
Normal file
113
.devops/rocm.Dockerfile
Normal file
|
@ -0,0 +1,113 @@
|
|||
ARG UBUNTU_VERSION=24.04
|
||||
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG ROCM_VERSION=6.3
|
||||
ARG AMDGPU_VERSION=6.3
|
||||
|
||||
# Target the CUDA build image
|
||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||
|
||||
### Build image
|
||||
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
||||
# This is mostly tied to rocBLAS supported archs.
|
||||
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
|
||||
# gfx906 is deprecated
|
||||
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
|
||||
|
||||
#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
|
||||
ARG ROCM_DOCKER_ARCH=gfx1100
|
||||
|
||||
# Set nvcc architectured
|
||||
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||
# Enable ROCm
|
||||
# ENV CC=/opt/rocm/llvm/bin/clang
|
||||
# ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
build-essential \
|
||||
cmake \
|
||||
git \
|
||||
libcurl4-openssl-dev \
|
||||
curl \
|
||||
libgomp1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
||||
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
|
||||
&& cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib \
|
||||
&& find build -name "*.so" -exec cp {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
## Base image
|
||||
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 curl\
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
|
||||
### Full
|
||||
FROM base AS full
|
||||
|
||||
COPY --from=build /app/full /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
git \
|
||||
python3-pip \
|
||||
python3 \
|
||||
python3-wheel\
|
||||
&& pip install --break-system-packages --upgrade setuptools \
|
||||
&& pip install --break-system-packages -r requirements.txt \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
ENTRYPOINT ["/app/tools.sh"]
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/full/llama-cli /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/full/llama-server /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -8,36 +8,40 @@ arg1="$1"
|
|||
shift
|
||||
|
||||
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
||||
python3 ./convert-hf-to-gguf.py "$@"
|
||||
exec python3 ./convert_hf_to_gguf.py "$@"
|
||||
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
||||
./llama-quantize "$@"
|
||||
exec ./llama-quantize "$@"
|
||||
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
||||
./llama-cli "$@"
|
||||
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
|
||||
./llama-finetune "$@"
|
||||
exec ./llama-cli "$@"
|
||||
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
|
||||
exec ./llama-bench "$@"
|
||||
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
|
||||
exec ./llama-perplexity "$@"
|
||||
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
||||
echo "Converting PTH to GGML..."
|
||||
for i in `ls $1/$2/ggml-model-f16.bin*`; do
|
||||
for i in $(ls $1/$2/ggml-model-f16.bin*); do
|
||||
if [ -f "${i/f16/q4_0}" ]; then
|
||||
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
||||
else
|
||||
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
||||
./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
||||
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
||||
fi
|
||||
done
|
||||
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
||||
./llama-server "$@"
|
||||
exec ./llama-server "$@"
|
||||
else
|
||||
echo "Unknown command: $arg1"
|
||||
echo "Available commands: "
|
||||
echo " --run (-r): Run a model previously converted into ggml"
|
||||
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
||||
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
|
||||
echo " ex: -m model.gguf"
|
||||
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
|
||||
echo " ex: -m model.gguf -f file.txt"
|
||||
echo " --convert (-c): Convert a llama model into ggml"
|
||||
echo " ex: --outtype f16 \"/models/7B/\" "
|
||||
echo " --quantize (-q): Optimize with quantization process ggml"
|
||||
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
||||
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
|
||||
echo " See documentation for finetune for command-line parameters"
|
||||
echo " --all-in-one (-a): Execute --convert & --quantize"
|
||||
echo " ex: \"/models/\" 7B"
|
||||
echo " --server (-s): Run a model on the server"
|
||||
|
|
89
.devops/vulkan.Dockerfile
Normal file
89
.devops/vulkan.Dockerfile
Normal file
|
@ -0,0 +1,89 @@
|
|||
ARG UBUNTU_VERSION=24.04
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION AS build
|
||||
|
||||
# Install build tools
|
||||
RUN apt update && apt install -y git build-essential cmake wget
|
||||
|
||||
# Install Vulkan SDK and cURL
|
||||
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||
apt update -y && \
|
||||
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
||||
|
||||
# Build it
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
||||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
## Base image
|
||||
FROM ubuntu:$UBUNTU_VERSION AS base
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 curl libvulkan-dev \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
|
||||
### Full
|
||||
FROM base AS full
|
||||
|
||||
COPY --from=build /app/full /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
git \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-wheel \
|
||||
&& pip install --break-system-packages --upgrade setuptools \
|
||||
&& pip install --break-system-packages -r requirements.txt \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
ENTRYPOINT ["/app/tools.sh"]
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/full/llama-cli /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/full/llama-server /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -1,7 +1,7 @@
|
|||
*.o
|
||||
*.a
|
||||
.cache/
|
||||
.git/
|
||||
# Do not ignore .git directory, otherwise the reported build number will always be 0
|
||||
.github/
|
||||
.gitignore
|
||||
.vs/
|
||||
|
|
2
.ecrc
2
.ecrc
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"Exclude": ["^\\.gitmodules$"],
|
||||
"Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
|
||||
"Disable": {
|
||||
"IndentSize": true
|
||||
}
|
||||
|
|
|
@ -24,9 +24,27 @@ insert_final_newline = unset
|
|||
[examples/server/public/*]
|
||||
indent_size = 2
|
||||
|
||||
[examples/server/public/deps_*]
|
||||
trim_trailing_whitespace = unset
|
||||
indent_style = unset
|
||||
indent_size = unset
|
||||
|
||||
[examples/server/deps_*]
|
||||
trim_trailing_whitespace = unset
|
||||
indent_style = unset
|
||||
indent_size = unset
|
||||
|
||||
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
||||
indent_style = tab
|
||||
|
||||
[examples/cvector-generator/*.txt]
|
||||
trim_trailing_whitespace = unset
|
||||
insert_final_newline = unset
|
||||
|
||||
[models/templates/*.jinja]
|
||||
indent_style = unset
|
||||
indent_size = unset
|
||||
end_of_line = unset
|
||||
charset = unset
|
||||
trim_trailing_whitespace = unset
|
||||
insert_final_newline = unset
|
||||
|
|
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
|
@ -1,50 +0,0 @@
|
|||
name: Low Severity Bugs
|
||||
description: Used to report low severity bugs in llama.cpp (e.g. cosmetic issues, non critical UI glitches)
|
||||
title: "Bug: "
|
||||
labels: ["bug-unconfirmed", "low severity"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
Please include information about your system, the steps to reproduce the bug,
|
||||
and the version of llama.cpp that you are using.
|
||||
If possible, please provide a minimal code example that reproduces the bug.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: Also tell us, what did you expect to happen?
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: What operating system are you seeing the problem on?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
87
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
Normal file
87
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
Normal file
|
@ -0,0 +1,87 @@
|
|||
name: Bug (compilation)
|
||||
description: Something goes wrong when trying to compile llama.cpp.
|
||||
title: "Compile bug: "
|
||||
labels: ["bug-unconfirmed", "compilation"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: >
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
This issue template is intended for bug reports where the compilation of llama.cpp fails.
|
||||
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
|
||||
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
|
||||
by clearing `~/.cache/ccache` (on Linux).
|
||||
- type: textarea
|
||||
id: commit
|
||||
attributes:
|
||||
label: Git commit
|
||||
description: Which commit are you trying to compile?
|
||||
placeholder: |
|
||||
$git rev-parse HEAD
|
||||
84a07a17b1b08cf2b9747c633a2372782848a27f
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: Operating systems
|
||||
description: Which operating systems do you know to be affected?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: backends
|
||||
attributes:
|
||||
label: GGML backends
|
||||
description: Which GGML backends do you know to be affected?
|
||||
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
||||
multiple: true
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: info
|
||||
attributes:
|
||||
label: Problem description & steps to reproduce
|
||||
description: >
|
||||
Please give us a summary of the problem and tell us how to reproduce it.
|
||||
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
|
||||
placeholder: >
|
||||
I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
|
||||
Here are the exact commands that I used: ...
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: first_bad_commit
|
||||
attributes:
|
||||
label: First Bad Commit
|
||||
description: >
|
||||
If the bug was not present on an earlier version: when did it start appearing?
|
||||
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: command
|
||||
attributes:
|
||||
label: Compile command
|
||||
description: >
|
||||
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
|
||||
This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: >
|
||||
Please copy and paste any relevant log output, including any generated text.
|
||||
This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
101
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
Normal file
101
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
Normal file
|
@ -0,0 +1,101 @@
|
|||
name: Bug (model use)
|
||||
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
|
||||
title: "Eval bug: "
|
||||
labels: ["bug-unconfirmed", "model evaluation"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: >
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
This issue template is intended for bug reports where the model evaluation results
|
||||
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
|
||||
If you encountered the issue while using an external UI (e.g. ollama),
|
||||
please reproduce your issue using one of the examples/binaries in this repository.
|
||||
The `llama-cli` binary can be used for simple and reproducible model inference.
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which version of our software are you running? (use `--version` to get a version string)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: Operating systems
|
||||
description: Which operating systems do you know to be affected?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: backends
|
||||
attributes:
|
||||
label: GGML backends
|
||||
description: Which GGML backends do you know to be affected?
|
||||
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
||||
multiple: true
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: hardware
|
||||
attributes:
|
||||
label: Hardware
|
||||
description: Which CPUs/GPUs are you using?
|
||||
placeholder: >
|
||||
e.g. Ryzen 5950X + 2x RTX 4090
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: model
|
||||
attributes:
|
||||
label: Models
|
||||
description: >
|
||||
Which model(s) at which quantization were you using when encountering the bug?
|
||||
If you downloaded a GGUF file off of Huggingface, please provide a link.
|
||||
placeholder: >
|
||||
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: info
|
||||
attributes:
|
||||
label: Problem description & steps to reproduce
|
||||
description: >
|
||||
Please give us a summary of the problem and tell us how to reproduce it.
|
||||
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
|
||||
that information would be very much appreciated by us.
|
||||
placeholder: >
|
||||
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
|
||||
When I use -ngl 0 it works correctly.
|
||||
Here are the exact commands that I used: ...
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: first_bad_commit
|
||||
attributes:
|
||||
label: First Bad Commit
|
||||
description: >
|
||||
If the bug was not present on an earlier version: when did it start appearing?
|
||||
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: >
|
||||
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
||||
This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
91
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
Normal file
91
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
Normal file
|
@ -0,0 +1,91 @@
|
|||
name: Bug (misc.)
|
||||
description: Something is not working the way it should (and it's not covered by any of the above cases).
|
||||
title: "Misc. bug: "
|
||||
labels: ["bug-unconfirmed"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: >
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
This issue template is intended for miscellaneous bugs that don't fit into any other category.
|
||||
If you encountered the issue while using an external UI (e.g. ollama),
|
||||
please reproduce your issue using one of the examples/binaries in this repository.
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which version of our software is affected? (You can use `--version` to get a version string.)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: Operating systems
|
||||
description: Which operating systems do you know to be affected?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: module
|
||||
attributes:
|
||||
label: Which llama.cpp modules do you know to be affected?
|
||||
multiple: true
|
||||
options:
|
||||
- Documentation/Github
|
||||
- libllama (core library)
|
||||
- llama-cli
|
||||
- llama-server
|
||||
- llama-bench
|
||||
- llama-quantize
|
||||
- Python/Bash scripts
|
||||
- Test code
|
||||
- Other (Please specify in the next section)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: command
|
||||
attributes:
|
||||
label: Command line
|
||||
description: >
|
||||
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
|
||||
This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: info
|
||||
attributes:
|
||||
label: Problem description & steps to reproduce
|
||||
description: >
|
||||
Please give us a summary of the problem and tell us how to reproduce it (if applicable).
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: first_bad_commit
|
||||
attributes:
|
||||
label: First Bad Commit
|
||||
description: >
|
||||
If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
|
||||
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: >
|
||||
If applicable, please copy and paste any relevant log output, including any generated text.
|
||||
This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
||||
validations:
|
||||
required: false
|
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
|
@ -1,50 +0,0 @@
|
|||
name: Medium Severity Bug
|
||||
description: Used to report medium severity bugs in llama.cpp (e.g. Malfunctioning Features but generally still useable)
|
||||
title: "Bug: "
|
||||
labels: ["bug-unconfirmed", "medium severity"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
Please include information about your system, the steps to reproduce the bug,
|
||||
and the version of llama.cpp that you are using.
|
||||
If possible, please provide a minimal code example that reproduces the bug.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: Also tell us, what did you expect to happen?
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: What operating system are you seeing the problem on?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
|
@ -1,5 +1,5 @@
|
|||
name: Enhancement
|
||||
description: Used to request enhancements for llama.cpp
|
||||
description: Used to request enhancements for llama.cpp.
|
||||
title: "Feature Request: "
|
||||
labels: ["enhancement"]
|
||||
body:
|
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
|
@ -1,50 +0,0 @@
|
|||
name: High Severity Bug
|
||||
description: Used to report high severity bugs in llama.cpp (e.g. Malfunctioning features hindering important common workflow)
|
||||
title: "Bug: "
|
||||
labels: ["bug-unconfirmed", "high severity"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
Please include information about your system, the steps to reproduce the bug,
|
||||
and the version of llama.cpp that you are using.
|
||||
If possible, please provide a minimal code example that reproduces the bug.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: Also tell us, what did you expect to happen?
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: What operating system are you seeing the problem on?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
|
@ -1,5 +1,5 @@
|
|||
name: Research
|
||||
description: Track new technical research area
|
||||
description: Track new technical research area.
|
||||
title: "Research: "
|
||||
labels: ["research 🔬"]
|
||||
body:
|
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
|
@ -1,50 +0,0 @@
|
|||
name: Critical Severity Bug
|
||||
description: Used to report critical severity bugs in llama.cpp (e.g. Crashing, Corrupted, Dataloss)
|
||||
title: "Bug: "
|
||||
labels: ["bug-unconfirmed", "critical severity"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
Please include information about your system, the steps to reproduce the bug,
|
||||
and the version of llama.cpp that you are using.
|
||||
If possible, please provide a minimal code example that reproduces the bug.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: Also tell us, what did you expect to happen?
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: What operating system are you seeing the problem on?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
|
@ -1,5 +1,5 @@
|
|||
name: Refactor (Maintainers)
|
||||
description: Used to track refactoring opportunities
|
||||
description: Used to track refactoring opportunities.
|
||||
title: "Refactor: "
|
||||
labels: ["refactor"]
|
||||
body:
|
2
.github/ISSUE_TEMPLATE/config.yml
vendored
2
.github/ISSUE_TEMPLATE/config.yml
vendored
|
@ -9,5 +9,3 @@ contact_links:
|
|||
- name: Want to contribute?
|
||||
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
||||
about: Head to the contribution guide page of the wiki for areas you can help with
|
||||
|
||||
|
||||
|
|
19
.github/labeler.yml
vendored
19
.github/labeler.yml
vendored
|
@ -3,20 +3,21 @@ Kompute:
|
|||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/include/ggml-kompute.h
|
||||
- ggml/src/ggml-kompute.cpp
|
||||
- ggml/src/ggml-kompute/**
|
||||
- README-kompute.md
|
||||
Apple Metal:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/include/ggml-metal.h
|
||||
- ggml/src/ggml-metal.cpp
|
||||
- ggml/src/ggml-metal/**
|
||||
- README-metal.md
|
||||
SYCL:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/include/ggml-sycl.h
|
||||
- ggml/src/ggml-sycl.cpp
|
||||
- README-sycl.md
|
||||
- ggml/src/ggml-sycl/**
|
||||
- docs/backend/SYCL.md
|
||||
- examples/sycl/**
|
||||
Nvidia GPU:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
|
@ -25,8 +26,8 @@ Nvidia GPU:
|
|||
Vulkan:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/ggml_vk_generate_shaders.py
|
||||
- ggml/src/ggml-vulkan*
|
||||
- ggml/include/ggml-vulkan.h
|
||||
- ggml/src/ggml-vulkan/**
|
||||
documentation:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
|
@ -73,11 +74,7 @@ server:
|
|||
ggml:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/include/ggml*.h
|
||||
- ggml/src/ggml*.c
|
||||
- ggml/src/ggml*.cpp
|
||||
- ggml/src/ggml*.h
|
||||
- ggml-cuda/**
|
||||
- ggml/**
|
||||
nix:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
|
|
8
.github/pull_request_template.md
vendored
8
.github/pull_request_template.md
vendored
|
@ -1,7 +1 @@
|
|||
|
||||
|
||||
- [x] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
|
||||
- Self-reported review complexity:
|
||||
- [ ] Low
|
||||
- [ ] Medium
|
||||
- [ ] High
|
||||
*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
# TODO: there have been some issues with the workflow, so disabling for now
|
||||
# https://github.com/ggerganov/llama.cpp/issues/7893
|
||||
#
|
||||
# Benchmark
|
||||
name: Benchmark
|
||||
|
||||
|
@ -24,10 +27,10 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||
schedule:
|
||||
- cron: '04 2 * * *'
|
||||
|
||||
|
@ -129,6 +132,8 @@ jobs:
|
|||
|
||||
- name: Server bench
|
||||
id: server_bench
|
||||
env:
|
||||
HEAD_REF: ${{ github.head_ref || github.ref_name }}
|
||||
run: |
|
||||
set -eux
|
||||
|
||||
|
@ -137,7 +142,7 @@ jobs:
|
|||
python bench.py \
|
||||
--runner-label ${{ env.RUNNER_LABEL }} \
|
||||
--name ${{ github.job }} \
|
||||
--branch ${{ github.head_ref || github.ref_name }} \
|
||||
--branch $HEAD_REF \
|
||||
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
||||
--scenario script.js \
|
||||
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
868
.github/workflows/build.yml
vendored
868
.github/workflows/build.yml
vendored
File diff suppressed because it is too large
Load diff
7
.github/workflows/close-issue.yml
vendored
7
.github/workflows/close-issue.yml
vendored
|
@ -3,6 +3,11 @@ on:
|
|||
schedule:
|
||||
- cron: "42 0 * * *"
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
close-issues:
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -12,7 +17,7 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
|
||||
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
|
||||
days-before-issue-stale: 30
|
||||
days-before-issue-close: 14
|
||||
stale-issue-label: "stale"
|
||||
|
|
157
.github/workflows/docker.yml
vendored
157
.github/workflows/docker.yml
vendored
|
@ -10,48 +10,50 @@
|
|||
name: Publish Docker image
|
||||
|
||||
on:
|
||||
#pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
||||
workflow_dispatch: # allows manual triggering
|
||||
schedule:
|
||||
# Rebuild daily rather than on every push because it is expensive
|
||||
- cron: '12 4 * * *'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
push_to_registry:
|
||||
name: Push Docker image to Docker Hub
|
||||
#if: github.event.pull_request.draft == false
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
- { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
- { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||
- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
# Note: the full-rocm image is failing due to a "no space left on device" error. It is disabled for now to allow the workflow to complete.
|
||||
#- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
|
||||
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
|
||||
# Multi-stage build
|
||||
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
|
||||
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
||||
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
|
||||
steps:
|
||||
- name: Check out the repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # preserve git history, so we can determine the build number
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
|
@ -60,9 +62,45 @@ jobs:
|
|||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
|
||||
- name: Determine tag name
|
||||
id: tag
|
||||
shell: bash
|
||||
run: |
|
||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
||||
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
||||
REPO_NAME="${{ github.event.repository.name }}"
|
||||
|
||||
# determine tag name postfix (build number, commit hash)
|
||||
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
||||
TAG_POSTFIX="-b${BUILD_NUMBER}"
|
||||
else
|
||||
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
||||
TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
|
||||
fi
|
||||
# list all tags possible
|
||||
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
|
||||
TYPE=""
|
||||
else
|
||||
TYPE="-${{ matrix.config.tag }}"
|
||||
fi
|
||||
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
||||
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
|
||||
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
|
||||
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
|
||||
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
|
||||
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
|
||||
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
|
||||
echo "full_output_tags=$FULLTAGS" # print out for debugging
|
||||
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
|
||||
echo "server_output_tags=$SERVERTAGS" # print out for debugging
|
||||
env:
|
||||
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
||||
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
if: ${{ matrix.config.free_disk_space == true }}
|
||||
uses: ggml-org/free-disk-space@v1.3.1
|
||||
with:
|
||||
# this might remove tools that are actually needed,
|
||||
# if set to "true" but frees about 6 GB
|
||||
|
@ -77,40 +115,59 @@ jobs:
|
|||
docker-images: true
|
||||
swap-storage: true
|
||||
|
||||
- name: Determine tag name
|
||||
id: tag
|
||||
shell: bash
|
||||
run: |
|
||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
||||
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
||||
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
||||
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Downcase github.repository_owner
|
||||
run: |
|
||||
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
|
||||
env:
|
||||
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
||||
|
||||
- name: Build and push Docker image (versioned)
|
||||
if: github.event_name == 'push'
|
||||
uses: docker/build-push-action@v4
|
||||
- name: Build and push Full Docker image (tagged + versioned)
|
||||
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
platforms: ${{ matrix.config.platforms }}
|
||||
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
||||
# tag list is generated from step above
|
||||
tags: ${{ steps.tag.outputs.full_output_tags }}
|
||||
file: ${{ matrix.config.dockerfile }}
|
||||
target: full
|
||||
provenance: false
|
||||
# using github experimental cache
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
# return to this if the experimental github cache is having issues
|
||||
#cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
#cache-from: type=local,src=/tmp/.buildx-cache
|
||||
|
||||
- name: Build and push Docker image (tagged)
|
||||
uses: docker/build-push-action@v4
|
||||
- name: Build and push Light Docker image (tagged + versioned)
|
||||
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: ${{ github.event_name == 'push' }}
|
||||
push: true
|
||||
platforms: ${{ matrix.config.platforms }}
|
||||
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
|
||||
# tag list is generated from step above
|
||||
tags: ${{ steps.tag.outputs.light_output_tags }}
|
||||
file: ${{ matrix.config.dockerfile }}
|
||||
target: light
|
||||
provenance: false
|
||||
# using github experimental cache
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
# return to this if the experimental github cache is having issues
|
||||
#cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
#cache-from: type=local,src=/tmp/.buildx-cache
|
||||
|
||||
- name: Build and push Server Docker image (tagged + versioned)
|
||||
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
platforms: ${{ matrix.config.platforms }}
|
||||
# tag list is generated from step above
|
||||
tags: ${{ steps.tag.outputs.server_output_tags }}
|
||||
file: ${{ matrix.config.dockerfile }}
|
||||
target: server
|
||||
provenance: false
|
||||
# using github experimental cache
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
# return to this if the experimental github cache is having issues
|
||||
#cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
#cache-from: type=local,src=/tmp/.buildx-cache
|
||||
|
|
4
.github/workflows/editorconfig.yml
vendored
4
.github/workflows/editorconfig.yml
vendored
|
@ -23,5 +23,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: editorconfig-checker/action-editorconfig-checker@main
|
||||
- uses: editorconfig-checker/action-editorconfig-checker@v2
|
||||
with:
|
||||
version: v3.0.3
|
||||
- run: editorconfig-checker
|
||||
|
|
65
.github/workflows/nix-ci-aarch64.yml
vendored
65
.github/workflows/nix-ci-aarch64.yml
vendored
|
@ -1,65 +0,0 @@
|
|||
name: Nix aarch64 builds
|
||||
|
||||
on:
|
||||
workflow_dispatch: # allows manual triggering
|
||||
schedule:
|
||||
# Rebuild daily rather than on every push because QEMU is expensive (e.g.
|
||||
# 1.5h instead of minutes with the cold cache).
|
||||
#
|
||||
# randint(0, 59), randint(0, 23)
|
||||
- cron: '26 12 * * *'
|
||||
# But also rebuild if we touched any of the Nix expressions:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: ['**/*.nix', 'flake.lock']
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths: ['**/*.nix', 'flake.lock']
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
nix-build-aarch64:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Install QEMU
|
||||
# Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y qemu-user-static qemu-system-aarch64
|
||||
sudo usermod -a -G kvm $USER
|
||||
- name: Install Nix
|
||||
uses: DeterminateSystems/nix-installer-action@v9
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
extra-conf: |
|
||||
extra-platforms = aarch64-linux
|
||||
extra-system-features = nixos-test kvm
|
||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
||||
with:
|
||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
||||
- name: Set-up cachix to push the results to
|
||||
uses: cachix/cachix-action@v13
|
||||
with:
|
||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||
name: llama-cpp
|
||||
- name: Show all output paths
|
||||
run: >
|
||||
nix run github:nix-community/nix-eval-jobs
|
||||
-- --gc-roots-dir gcroot
|
||||
--flake
|
||||
".#packages.aarch64-linux"
|
||||
- name: Build
|
||||
run: >
|
||||
nix run github:Mic92/nix-fast-build
|
||||
-- --skip-cached --no-nom
|
||||
--systems aarch64-linux
|
||||
--flake
|
||||
".#checks.aarch64-linux"
|
72
.github/workflows/nix-ci.yml
vendored
72
.github/workflows/nix-ci.yml
vendored
|
@ -1,72 +0,0 @@
|
|||
name: Nix CI
|
||||
|
||||
on:
|
||||
workflow_dispatch: # allows manual triggering
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
nix-eval:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ ubuntu-latest, macos-latest ]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Nix
|
||||
uses: DeterminateSystems/nix-installer-action@v9
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
extra-conf: |
|
||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
||||
with:
|
||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
||||
- name: List all flake outputs
|
||||
run: nix flake show --all-systems
|
||||
- name: Show all output paths
|
||||
run: >
|
||||
nix run github:nix-community/nix-eval-jobs
|
||||
-- --gc-roots-dir gcroot
|
||||
--flake
|
||||
".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
||||
nix-build:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ ubuntu-latest, macos-latest ]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Nix
|
||||
uses: DeterminateSystems/nix-installer-action@v9
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
extra-conf: |
|
||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
||||
with:
|
||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
||||
- name: Set-up cachix to push the results to
|
||||
uses: cachix/cachix-action@v13
|
||||
with:
|
||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||
name: llama-cpp
|
||||
- name: Build
|
||||
run: >
|
||||
nix run github:Mic92/nix-fast-build
|
||||
-- --skip-cached --no-nom
|
||||
--flake
|
||||
".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
22
.github/workflows/nix-flake-update.yml
vendored
22
.github/workflows/nix-flake-update.yml
vendored
|
@ -1,22 +0,0 @@
|
|||
name: update-flake-lock
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
|
||||
|
||||
jobs:
|
||||
lockfile:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Nix
|
||||
uses: DeterminateSystems/nix-installer-action@main
|
||||
- name: Update flake.lock
|
||||
uses: DeterminateSystems/update-flake-lock@main
|
||||
with:
|
||||
pr-title: "nix: update flake.lock"
|
||||
pr-labels: |
|
||||
nix
|
||||
pr-reviewers: philiptaron,SomeoneSerge
|
||||
token: ${{ secrets.FLAKE_TOKEN }}
|
36
.github/workflows/nix-publish-flake.yml
vendored
36
.github/workflows/nix-publish-flake.yml
vendored
|
@ -1,36 +0,0 @@
|
|||
# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
|
||||
name: "Publish a flake to flakestry & flakehub"
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "The existing tag to publish"
|
||||
type: "string"
|
||||
required: true
|
||||
jobs:
|
||||
flakestry-publish:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: "write"
|
||||
contents: "read"
|
||||
steps:
|
||||
- uses: flakestry/flakestry-publish@main
|
||||
with:
|
||||
version: "${{ inputs.tag || github.ref_name }}"
|
||||
flakehub-publish:
|
||||
runs-on: "ubuntu-latest"
|
||||
permissions:
|
||||
id-token: "write"
|
||||
contents: "read"
|
||||
steps:
|
||||
- uses: "actions/checkout@v4"
|
||||
with:
|
||||
ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
|
||||
- uses: "DeterminateSystems/nix-installer-action@main"
|
||||
- uses: "DeterminateSystems/flakehub-push@main"
|
||||
with:
|
||||
visibility: "public"
|
||||
tag: "${{ inputs.tag }}"
|
|
@ -6,15 +6,13 @@ on:
|
|||
- '.github/workflows/python-check-requirements.yml'
|
||||
- 'scripts/check-requirements.sh'
|
||||
- 'convert*.py'
|
||||
- 'requirements.txt'
|
||||
- 'requirements/*.txt'
|
||||
- '**/requirements*.txt'
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/python-check-requirements.yml'
|
||||
- 'scripts/check-requirements.sh'
|
||||
- 'convert*.py'
|
||||
- 'requirements.txt'
|
||||
- 'requirements/*.txt'
|
||||
- '**/requirements*.txt'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
|
|
9
.github/workflows/python-lint.yml
vendored
9
.github/workflows/python-lint.yml
vendored
|
@ -1,6 +1,13 @@
|
|||
name: flake8 Lint
|
||||
|
||||
on: [push, pull_request]
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
|
|
40
.github/workflows/python-type-check.yml
vendored
Normal file
40
.github/workflows/python-type-check.yml
vendored
Normal file
|
@ -0,0 +1,40 @@
|
|||
name: Python Type-Check
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/python-type-check.yml'
|
||||
- 'pyrightconfig.json'
|
||||
- '**.py'
|
||||
- '**/requirements*.txt'
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/python-type-check.yml'
|
||||
- 'pyrightconfig.json'
|
||||
- '**.py'
|
||||
- '**/requirements*.txt'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
python-type-check:
|
||||
runs-on: ubuntu-latest
|
||||
name: pyright type-check
|
||||
steps:
|
||||
- name: Check out source repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python environment
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install Python dependencies
|
||||
# TODO: use a venv
|
||||
run: pip install -r requirements/requirements-all.txt
|
||||
- name: Type-check with Pyright
|
||||
uses: jakebailey/pyright-action@v2
|
||||
with:
|
||||
version: 1.1.382
|
||||
level: warning
|
||||
warnings: true
|
90
.github/workflows/server.yml
vendored
90
.github/workflows/server.yml
vendored
|
@ -20,6 +20,12 @@ on:
|
|||
types: [opened, synchronize, reopened]
|
||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
||||
|
||||
env:
|
||||
LLAMA_LOG_COLORS: 1
|
||||
LLAMA_LOG_PREFIX: 1
|
||||
LLAMA_LOG_TIMESTAMPS: 1
|
||||
LLAMA_LOG_VERBOSITY: 10
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
@ -70,20 +76,49 @@ jobs:
|
|||
run: |
|
||||
pip install -r examples/server/tests/requirements.txt
|
||||
|
||||
- name: Verify server deps
|
||||
id: verify_server_deps
|
||||
# Setup nodejs (to be used for verifying bundled index.html)
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.11.0'
|
||||
|
||||
- name: WebUI - Install dependencies
|
||||
id: webui_lint
|
||||
run: |
|
||||
cd examples/server/webui
|
||||
npm ci
|
||||
|
||||
- name: WebUI - Check code format
|
||||
id: webui_format
|
||||
run: |
|
||||
git config --global --add safe.directory $(realpath .)
|
||||
cd examples/server
|
||||
git ls-files --others --modified
|
||||
cd examples/server/webui
|
||||
git status
|
||||
./deps.sh
|
||||
|
||||
npm run format
|
||||
git status
|
||||
not_ignored_files="$(git ls-files --others --modified)"
|
||||
echo "Modified files: ${not_ignored_files}"
|
||||
if [ -n "${not_ignored_files}" ]; then
|
||||
echo "Repository is dirty or server deps are not built as expected"
|
||||
echo "${not_ignored_files}"
|
||||
modified_files="$(git status -s)"
|
||||
echo "Modified files: ${modified_files}"
|
||||
if [ -n "${modified_files}" ]; then
|
||||
echo "Files do not follow coding style. To fix: npm run format"
|
||||
echo "${modified_files}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify bundled index.html
|
||||
id: verify_server_index_html
|
||||
run: |
|
||||
git config --global --add safe.directory $(realpath .)
|
||||
cd examples/server/webui
|
||||
git status
|
||||
|
||||
npm run build
|
||||
git status
|
||||
modified_files="$(git status -s)"
|
||||
echo "Modified files: ${modified_files}"
|
||||
if [ -n "${modified_files}" ]; then
|
||||
echo "Repository is dirty or server/webui is not built as expected"
|
||||
echo "Hint: You may need to follow Web UI build guide in server/README.md"
|
||||
echo "${modified_files}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -100,9 +135,9 @@ jobs:
|
|||
-DGGML_OPENMP=OFF ;
|
||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
if: ${{ matrix.sanitizer != 'THREAD' }}
|
||||
- name: Build (sanitizers)
|
||||
id: cmake_build_sanitizers
|
||||
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
|
||||
run: |
|
||||
cmake -B build \
|
||||
-DGGML_NATIVE=OFF \
|
||||
|
@ -112,18 +147,37 @@ jobs:
|
|||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||
|
||||
- name: Build (sanitizers)
|
||||
id: cmake_build
|
||||
if: ${{ matrix.sanitizer == '' }}
|
||||
run: |
|
||||
cmake -B build \
|
||||
-DGGML_NATIVE=OFF \
|
||||
-DLLAMA_BUILD_SERVER=ON \
|
||||
-DLLAMA_CURL=ON \
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
|
||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||
|
||||
- name: Tests
|
||||
id: server_integration_tests
|
||||
if: ${{ matrix.sanitizer == '' }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
PORT=8888 ./tests.sh
|
||||
./tests.sh
|
||||
|
||||
- name: Tests (sanitizers)
|
||||
id: server_integration_tests_sanitizers
|
||||
if: ${{ matrix.sanitizer != '' }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
LLAMA_SANITIZE=1 ./tests.sh
|
||||
|
||||
- name: Slow tests
|
||||
id: server_integration_tests_slow
|
||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|
||||
SLOW_TESTS=1 ./tests.sh
|
||||
|
||||
|
||||
server-windows:
|
||||
|
@ -173,11 +227,13 @@ jobs:
|
|||
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||
$env:PYTHONIOENCODING = ":replace"
|
||||
pytest -v -x -m "not slow"
|
||||
|
||||
- name: Slow tests
|
||||
id: server_integration_tests_slow
|
||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
behave.exe --stop --no-skipped --no-capture --tags slow
|
||||
$env:SLOW_TESTS = "1"
|
||||
pytest -v -x
|
||||
|
|
33
.gitignore
vendored
33
.gitignore
vendored
|
@ -3,6 +3,7 @@
|
|||
*.a
|
||||
*.bat
|
||||
*.bin
|
||||
*.d
|
||||
*.dll
|
||||
*.dot
|
||||
*.etag
|
||||
|
@ -17,6 +18,7 @@
|
|||
*.metallib
|
||||
*.o
|
||||
*.so
|
||||
*.swp
|
||||
*.tmp
|
||||
|
||||
# IDE / OS
|
||||
|
@ -47,8 +49,10 @@ build*
|
|||
!build-info.cpp.in
|
||||
!build-info.sh
|
||||
!build.zig
|
||||
!docs/build.md
|
||||
/libllama.so
|
||||
/llama-*
|
||||
/vulkan-shaders-gen
|
||||
android-ndk-*
|
||||
arm_neon.h
|
||||
cmake-build-*
|
||||
|
@ -59,6 +63,12 @@ llama-batched-swift
|
|||
/rpc-server
|
||||
out/
|
||||
tmp/
|
||||
autogen-*.md
|
||||
|
||||
# Deprecated
|
||||
|
||||
/main
|
||||
/server
|
||||
|
||||
# CI
|
||||
|
||||
|
@ -72,7 +82,6 @@ models-mnt
|
|||
!models/ggml-vocab-*.gguf*
|
||||
|
||||
# Zig
|
||||
|
||||
zig-out/
|
||||
zig-cache/
|
||||
|
||||
|
@ -96,15 +105,20 @@ examples/server/*.mjs.hpp
|
|||
!examples/sycl/*.bat
|
||||
!examples/sycl/*.sh
|
||||
|
||||
# Server Web UI temporary files
|
||||
node_modules
|
||||
examples/server/webui/dist
|
||||
|
||||
# Python
|
||||
|
||||
__pycache__
|
||||
.venv
|
||||
/Pipfile
|
||||
dist
|
||||
poetry.lock
|
||||
/.venv
|
||||
__pycache__/
|
||||
*/poetry.lock
|
||||
poetry.toml
|
||||
|
||||
# Nix
|
||||
/result
|
||||
|
||||
# Test binaries
|
||||
/tests/test-backend-ops
|
||||
/tests/test-double-float
|
||||
|
@ -122,3 +136,10 @@ poetry.toml
|
|||
|
||||
# Scripts
|
||||
!/scripts/install-oneapi.bat
|
||||
|
||||
# Test models for lora adapters
|
||||
/lora-tests
|
||||
|
||||
# Local scripts
|
||||
/run-vim.sh
|
||||
/run-chat.sh
|
||||
|
|
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -1,3 +1,3 @@
|
|||
[submodule "kompute"]
|
||||
path = ggml/src/kompute
|
||||
path = ggml/src/ggml-kompute/kompute
|
||||
url = https://github.com/nomic-ai/kompute.git
|
||||
|
|
267
AUTHORS
267
AUTHORS
|
@ -1,4 +1,4 @@
|
|||
# date: Wed Jun 26 19:36:34 EEST 2024
|
||||
# date: Tue Feb 4 13:04:05 EET 2025
|
||||
# this file is auto-generated by scripts/gen-authors.sh
|
||||
|
||||
0cc4m <picard12@live.de>
|
||||
|
@ -7,6 +7,7 @@
|
|||
2f38b454 <dxf@protonmail.com>
|
||||
3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
|
||||
44670 <44670@users.noreply.github.com>
|
||||
65a <10104049+65a@users.noreply.github.com>
|
||||
AN Long <aisk@users.noreply.github.com>
|
||||
AT <manyoso@users.noreply.github.com>
|
||||
Aarni Koskela <akx@iki.fi>
|
||||
|
@ -19,20 +20,30 @@ Adithya Balaji <adithya.b94@gmail.com>
|
|||
AdithyanI <adithyan.i4internet@gmail.com>
|
||||
Adrian <smith.adriane@gmail.com>
|
||||
Adrian Hesketh <a-h@users.noreply.github.com>
|
||||
Adrien Gallouët <adrien@gallouet.fr>
|
||||
Adrien Gallouët <angt@huggingface.co>
|
||||
Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
|
||||
Ahmet Zeer <ahmed.zeer@std.yildiz.edu.tr>
|
||||
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||
AidanBeltonS <aidan.belton@codeplay.com>
|
||||
Aisuko <urakiny@gmail.com>
|
||||
Akarshan Biswas <akarshan.biswas@gmail.com>
|
||||
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
||||
Al Mochkin <14274697+amochkin@users.noreply.github.com>
|
||||
Albert Jin <albert.jin@gmail.com>
|
||||
Alberto <57916483+albbus-stack@users.noreply.github.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@intel.com>
|
||||
Alex <awhill19@icloud.com>
|
||||
Alex Azarov <alex@azarov.by>
|
||||
Alex Azarov <alexander.azarov@mapbox.com>
|
||||
Alex Klinkhamer <from.github.com.917@grencez.dev>
|
||||
Alex Klinkhamer <git@grencez.dev>
|
||||
Alex Nguyen <tiendung@users.noreply.github.com>
|
||||
Alex O'Connell <35843486+acon96@users.noreply.github.com>
|
||||
Alex Petenchea <alex.petenchea@gmail.com>
|
||||
Alex Renda <alexrenda@users.noreply.github.com>
|
||||
Alex Tuddenham <61622354+AlexsCode@users.noreply.github.com>
|
||||
Alex von Gluck IV <kallisti5@unixzen.com>
|
||||
Alexey Parfenov <zxed@alkatrazstudio.net>
|
||||
Ali Chraghi <63465728+alichraghi@users.noreply.github.com>
|
||||
|
@ -45,18 +56,26 @@ AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
|
|||
Ananta Bastola <anantarajbastola@gmail.com>
|
||||
Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
|
||||
András Salamon <ott2@users.noreply.github.com>
|
||||
Andreas (Andi) Kunar <andreask@msn.com>
|
||||
Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
|
||||
Andrei <abetlen@gmail.com>
|
||||
Andrew Canis <andrew.canis@gmail.com>
|
||||
Andrew Downing <andrew2085@gmail.com>
|
||||
Andrew Duffy <a10y@users.noreply.github.com>
|
||||
Andrew Godfrey <AndrewGodfrey@users.noreply.github.com>
|
||||
Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
|
||||
Andy Salerno <andysalerno@gmail.com>
|
||||
Andy Tai <andy-tai@users.noreply.github.com>
|
||||
Anthony Van de Gejuchte <anthonyvdgent@gmail.com>
|
||||
Antonis Makropoulos <benuix@gmail.com>
|
||||
Arik Poznanski <arikpoz@users.noreply.github.com>
|
||||
Armen Kaleshian <kriation@users.noreply.github.com>
|
||||
Artem <guinmoon@gmail.com>
|
||||
Artem Zinnatullin <ceo@abstractny.gay>
|
||||
Artyom Lebedev <vagran.ast@gmail.com>
|
||||
Asbjørn Olling <asbjornolling@gmail.com>
|
||||
Ásgeir Bjarni Ingvarsson <asgeir@fundinn.org>
|
||||
Asghar Ghorbani <a-ghorbani@users.noreply.github.com>
|
||||
Ashish <1856117+ashishdatta@users.noreply.github.com>
|
||||
Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
|
||||
Ashraful Islam <ashraful.meche@gmail.com>
|
||||
|
@ -75,13 +94,21 @@ Ben Siraphob <bensiraphob@gmail.com>
|
|||
Ben Williams <ben@719ben.com>
|
||||
Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com>
|
||||
Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
|
||||
Benson Wong <mostlygeek@gmail.com>
|
||||
Bernat Vadell <hounter.caza@gmail.com>
|
||||
Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
|
||||
Bert Wagner <github@bertwagner.com>
|
||||
Billel Mokeddem <billel.mokeddem.ml@gmail.com>
|
||||
Bingan <70050083+binganao@users.noreply.github.com>
|
||||
Bjarke Viksøe <164612031+bviksoe@users.noreply.github.com>
|
||||
Bodo Graumann <mail@bodograumann.de>
|
||||
Bono Lv <lvscar@users.noreply.github.com>
|
||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||
Borislav Stanimirov <b@ibob.bg>
|
||||
Branden Butler <bwtbutler@hotmail.com>
|
||||
Brandon Squizzato <35474886+bsquizz@users.noreply.github.com>
|
||||
Brian <mofosyne@gmail.com>
|
||||
Brian Cunnie <brian.cunnie@gmail.com>
|
||||
Bruce MacDonald <brucewmacdonald@gmail.com>
|
||||
Bryan Honof <bryanhonof@gmail.com>
|
||||
CJ Pais <cj@cjpais.com>
|
||||
|
@ -90,32 +117,51 @@ Calvin Laurenson <calvin@laurenson.dev>
|
|||
Cameron <csteele@steelecameron.com>
|
||||
Cameron Kaiser <classilla@users.noreply.github.com>
|
||||
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
||||
CarryFun <76023481+CarryFun@users.noreply.github.com>
|
||||
Carsten Kragelund Jørgensen <carsten@kragelund.me>
|
||||
CarterLi999 <664681047@qq.com>
|
||||
Casey Primozic <casey@cprimozic.net>
|
||||
Casey Primozic <me@ameo.link>
|
||||
CausalLM <148736309+CausalLM@users.noreply.github.com>
|
||||
Cebtenzzre <cebtenzzre@gmail.com>
|
||||
CentricStorm <CentricStorm@users.noreply.github.com>
|
||||
Chad Brewbaker <crb002@gmail.com>
|
||||
Changyeon Kim <cyzero.kim@samsung.com>
|
||||
Chao Jiang <jc19chaoj@zoho.com>
|
||||
Charles Xu <63788048+chaxu01@users.noreply.github.com>
|
||||
Charles Xu <charles.xu@arm.com>
|
||||
Chen Xi <xi2.chen@intel.com>
|
||||
Chen Xi <xixichen08@foxmail.com>
|
||||
Cheng Shao <terrorjack@type.dance>
|
||||
Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
|
||||
Chris Elrod <elrodc@gmail.com>
|
||||
Chris Kuehl <ckuehl@ckuehl.me>
|
||||
Christian Demsar <christian@github.email.demsar.us>
|
||||
Christian Demsar <crasm@git.vczf.us>
|
||||
Christian Falch <875252+chrfalch@users.noreply.github.com>
|
||||
Christian Kastner <ckk@kvr.at>
|
||||
Christian Kögler <ck3d@gmx.de>
|
||||
Christian Köhnenkamp <cvk5@me.com>
|
||||
Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com>
|
||||
Christopher Nielsen <62156882+mascguy@users.noreply.github.com>
|
||||
Clark Saben <76020733+csaben@users.noreply.github.com>
|
||||
Clint Herron <hanclinto@gmail.com>
|
||||
Conrad Kramer <conrad@conradkramer.com>
|
||||
Corentin REGAL <corentin.regal@gmail.com>
|
||||
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
|
||||
Csaba Kecskemeti <csaba.kecskemeti@gmail.com>
|
||||
Cuong Trinh Manh <nguoithichkhampha@gmail.com>
|
||||
DAN™ <dranger003@gmail.com>
|
||||
Damian Stewart <d@damianstewart.com>
|
||||
Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
|
||||
Dan Johansson <dan.johansson@arm.com>
|
||||
Dane Madsen <dane_madsen@hotmail.com>
|
||||
DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com>
|
||||
Daniel Bevenius <daniel.bevenius@gmail.com>
|
||||
Daniel Drake <drake@endlessos.org>
|
||||
Daniel Hiltgen <dhiltgen@users.noreply.github.com>
|
||||
Daniel Illescas Romero <illescas.daniel@protonmail.com>
|
||||
Daniel Kleine <53251018+d-kleine@users.noreply.github.com>
|
||||
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
||||
DannyDaemonic <DannyDaemonic@gmail.com>
|
||||
Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
|
||||
|
@ -129,19 +175,29 @@ David Pflug <david@pflug.email>
|
|||
David Renshaw <dwrenshaw@gmail.com>
|
||||
David Sommers <12738+databyte@users.noreply.github.com>
|
||||
David Yang <davidyang6us@gmail.com>
|
||||
DavidKorczynski <david@adalogics.com>
|
||||
Dawid Potocki <github@dawidpotocki.com>
|
||||
Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
|
||||
Dean <Dean.Sinaean@gmail.com>
|
||||
Deins <deinsegle@gmail.com>
|
||||
Denis Spasyuk <34203011+dspasyuk@users.noreply.github.com>
|
||||
Derrick T. Woolworth <dwoolworth@gmail.com>
|
||||
Deven Mistry <31466137+deven367@users.noreply.github.com>
|
||||
Dibakar Gope <dibakar.gope@arm.com>
|
||||
Didzis Gosko <didzis@users.noreply.github.com>
|
||||
Diego Devesa <slarengh@gmail.com>
|
||||
Diogo Teles Sant'Anna <diogoteles@google.com>
|
||||
Djip007 <3705339+Djip007@users.noreply.github.com>
|
||||
Djip007 <djip.perois@free.fr>
|
||||
Don Mahurin <dmahurin@users.noreply.github.com>
|
||||
DooWoong Lee (David) <manics99@naver.com>
|
||||
Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
|
||||
Dou Xinpeng <15529241576@163.com>
|
||||
Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
|
||||
Douglas Hanley <thesecretaryofwar@gmail.com>
|
||||
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
||||
Ebey Abraham <ebey97@gmail.com>
|
||||
Echo Nolan <echo@echonolan.net>
|
||||
Ed Lee <edilee@mozilla.com>
|
||||
Ed Lepedus <ed.lepedus@googlemail.com>
|
||||
Eddie-Wang <wangjinheng1120@163.com>
|
||||
|
@ -149,12 +205,16 @@ Edward Taylor <edeetee@gmail.com>
|
|||
Elaine <elaine.zosa@gmail.com>
|
||||
Elbios <141279586+Elbios@users.noreply.github.com>
|
||||
Elton Kola <eltonkola@gmail.com>
|
||||
Emreerdog <34742675+Emreerdog@users.noreply.github.com>
|
||||
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
||||
Equim <sayaka@ekyu.moe>
|
||||
Eric Curtin <ecurtin@redhat.com>
|
||||
Eric Curtin <ericcurtin17@gmail.com>
|
||||
Eric Sommerlade <es0m@users.noreply.github.com>
|
||||
Eric Zhang <34133756+EZForever@users.noreply.github.com>
|
||||
Erik Garrison <erik.garrison@gmail.com>
|
||||
Erik Scholz <Green-Sky@users.noreply.github.com>
|
||||
Esko Toivonen <eskot98@gmail.com>
|
||||
Ettore Di Giacinto <mudler@users.noreply.github.com>
|
||||
Evan Jones <evan.q.jones@gmail.com>
|
||||
Evan Miller <emmiller@gmail.com>
|
||||
|
@ -166,19 +226,27 @@ FK <sozforex@gmail.com>
|
|||
Fabian <cmdrf@users.noreply.github.com>
|
||||
Fabio R. Sluzala <Fabio3rs@users.noreply.github.com>
|
||||
Faez Shakil <faez.shakil@gmail.com>
|
||||
Faisal Zaghloul <faisal.zaghloul@gmail.com>
|
||||
Faisal Zaghloul <quic_fzaghlou@quicinc.com>
|
||||
Fan Shupei <dymarkfan@outlook.com>
|
||||
FantasyGmm <16450052+FantasyGmm@users.noreply.github.com>
|
||||
Farbod Bijary <110523279+farbodbj@users.noreply.github.com>
|
||||
Fattire <528174+fat-tire@users.noreply.github.com>
|
||||
Felix <stenbackfelix@gmail.com>
|
||||
Finn Voorhees <finnvoorhees@gmail.com>
|
||||
Firat <firatkiral@gmail.com>
|
||||
FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
|
||||
Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
|
||||
Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
|
||||
Francisco Melo <43780565+francis2tm@users.noreply.github.com>
|
||||
Frank Mai <thxcode0824@gmail.com>
|
||||
FrankHB <frankhb1989@gmail.com>
|
||||
Frankie Robertson <frankier@users.noreply.github.com>
|
||||
Fred Douglas <43351173+fredlas@users.noreply.github.com>
|
||||
Frederik Vogel <Schaltfehler@users.noreply.github.com>
|
||||
Gabe Goodhart <gabe.l.hart@gmail.com>
|
||||
Gabe Goodhart <ghart@us.ibm.com>
|
||||
Gaetan Bisson <gaetan@fenua.org>
|
||||
GainLee <perfecter.gen@gmail.com>
|
||||
Galunid <karolek1231456@gmail.com>
|
||||
Gary Linscott <glinscott@gmail.com>
|
||||
|
@ -187,12 +255,15 @@ Gavin Zhao <gavinzhaojw@protonmail.com>
|
|||
Genkagaku.GPT <hlhr202@163.com>
|
||||
Georgi Gerganov <ggerganov@gmail.com>
|
||||
Gilad S <giladgd@users.noreply.github.com>
|
||||
Gilad S. <7817232+giladgd@users.noreply.github.com>
|
||||
Giuseppe Scrivano <giuseppe@scrivano.org>
|
||||
GiviMAD <GiviMAD@users.noreply.github.com>
|
||||
Govlzkoy <gotope@users.noreply.github.com>
|
||||
Guillaume "Vermeille" Sanchez <Guillaume.V.Sanchez@gmail.com>
|
||||
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
||||
Guoliang Hua <32868157+nbcsm@users.noreply.github.com>
|
||||
Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
|
||||
Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com>
|
||||
Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
|
||||
Haggai Nuchi <h.nuchi@gmail.com>
|
||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||
|
@ -203,35 +274,47 @@ Haoxiang Fei <tonyfettes@tonyfettes.com>
|
|||
Harald Fernengel <harald.fernengel@here.com>
|
||||
Hatsune Miku <129688334+at8u@users.noreply.github.com>
|
||||
HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com>
|
||||
Haus1 <haus.xda@gmail.com>
|
||||
Henk Poley <HenkPoley@gmail.com>
|
||||
Henri Vasserman <henv@hot.ee>
|
||||
Henrik Forstén <henrik.forsten@gmail.com>
|
||||
Herman Semenov <GermanAizek@yandex.ru>
|
||||
Hesen Peng <hesen.peng@gmail.com>
|
||||
HimariO <dsfhe49854@gmail.com>
|
||||
Hoang Nguyen <hugo53@users.noreply.github.com>
|
||||
Hong Bo PENG <penghb@cn.ibm.com>
|
||||
Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
|
||||
Howard Su <howard0su@gmail.com>
|
||||
Hua Jiang <allenhjiang@outlook.com>
|
||||
Huang Qi <huangqi3@xiaomi.com>
|
||||
Huawei Lin <huaweilin.cs@gmail.com>
|
||||
Hugo Roussel <hugo.rous@gmail.com>
|
||||
Huifeng Ou <79071290+ho2103@users.noreply.github.com>
|
||||
Ian Bull <irbull@eclipsesource.com>
|
||||
Ian Bull <irbull@gmail.com>
|
||||
Ian Scrivener <github@zilogy.asia>
|
||||
Icecream95 <the.real.icecream95@gmail.com>
|
||||
Ido S <ido.pluto@gmail.com>
|
||||
IgnacioFDM <ignaciofdm@gmail.com>
|
||||
Igor Okulist <okigan@gmail.com>
|
||||
Ihar Hrachyshka <ihrachys@redhat.com>
|
||||
Ikko Eltociear Ashimine <eltociear@gmail.com>
|
||||
Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com>
|
||||
Ionoclast Laboratories <brigham@ionoclast.com>
|
||||
Isaac McFadyen <isaac@imcf.me>
|
||||
IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com>
|
||||
Ivan <nekotekina@gmail.com>
|
||||
Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
|
||||
Ivan Komarov <Ivan.Komarov@dfyz.info>
|
||||
Ivan Stepanov <ivanstepanovftw@gmail.com>
|
||||
JFLFY2255 <JFLFY2255@163.com>
|
||||
JH23X <165871467+JH23X@users.noreply.github.com>
|
||||
Jack Mousseau <jack@software.inc>
|
||||
Jack Mousseau <jmousseau@users.noreply.github.com>
|
||||
JackJollimore <130917767+JackJollimore@users.noreply.github.com>
|
||||
Jaeden Amero <jaeden@patater.com>
|
||||
Jaemin Son <woalsdnd@gmail.com>
|
||||
Jafar Uruç <jafar.uruc@gmail.com>
|
||||
Jag Chadha <jagtesh@gmail.com>
|
||||
Jakub N <jakubniemczyk97@gmail.com>
|
||||
James A Capozzoli <157492257+jac-jim@users.noreply.github.com>
|
||||
|
@ -243,11 +326,16 @@ Jannis Schönleber <joennlae@gmail.com>
|
|||
Jared Van Bortel <cebtenzzre@gmail.com>
|
||||
Jared Van Bortel <jared@nomic.ai>
|
||||
Jason McCartney <jmac@theroot.org>
|
||||
Jason Stillerman <jason.t.stillerman@gmail.com>
|
||||
Jean-Christophe Hoelt <hoelt@fovea.cc>
|
||||
Jean-Michaël Celerier <jeanmichael.celerier+github@gmail.com>
|
||||
Jed Fox <git@jedfox.com>
|
||||
Jeff Bolz <jbolz@nvidia.com>
|
||||
Jeffrey Morgan <jmorganca@gmail.com>
|
||||
Jeffrey Quesnelle <emozilla@nousresearch.com>
|
||||
Jeroen Mostert <jeroen.mostert@cm.com>
|
||||
Jesse Jojo Johnson <williamsaintgeorge@gmail.com>
|
||||
Jett Janiak <jettjaniak@gmail.com>
|
||||
Jeximo <jeximo@gmail.com>
|
||||
Jhen-Jie Hong <iainst0409@gmail.com>
|
||||
Jiahao Li <liplus17@163.com>
|
||||
|
@ -258,6 +346,9 @@ Jiří Podivín <66251151+jpodivin@users.noreply.github.com>
|
|||
Jiří Sejkora <Sejseloid@gmail.com>
|
||||
Joan Fontanals <jfontanalsmartinez@gmail.com>
|
||||
Joan Fontanals <joan.fontanals.martinez@jina.ai>
|
||||
João Dinis Ferreira <hello@joaof.eu>
|
||||
Joe Eli McIlvain <joe.eli.mac@gmail.com>
|
||||
Joe Todd <joe.todd@codeplay.com>
|
||||
Johan <JohanAR@users.noreply.github.com>
|
||||
Johannes Gäßler <johannesg@5d6.de>
|
||||
Johannes Rudolph <johannes.rudolph@gmail.com>
|
||||
|
@ -273,8 +364,11 @@ Josh Ramer <josh.ramer@icloud.com>
|
|||
Joyce <joycebrum@google.com>
|
||||
Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
|
||||
Judd <foldl@users.noreply.github.com>
|
||||
Juk Armstrong <69222624+jukofyork@users.noreply.github.com>
|
||||
Julius Arkenberg <arki05@users.noreply.github.com>
|
||||
Jun Hee Yoo <contact.jhyoo@gmail.com>
|
||||
Jun Jie <71215065+junnjiee16@users.noreply.github.com>
|
||||
Junil Kim <logyourself@gmail.com>
|
||||
Junyang Lin <justinlin930319@hotmail.com>
|
||||
Juraj Bednar <juraj@bednar.io>
|
||||
Justin Parker <jparkerweb@gmail.com>
|
||||
|
@ -285,6 +379,7 @@ Justine Tunney <jtunney@mozilla.com>
|
|||
Juuso Alasuutari <juuso.alasuutari@gmail.com>
|
||||
KASR <karim.asrih@gmail.com>
|
||||
Kamil Tomšík <info@tomsik.cz>
|
||||
Karol Kontny <82021046+kkontny@users.noreply.github.com>
|
||||
Karsten Weiss <knweiss@gmail.com>
|
||||
Karthick <j.karthic2004@gmail.com>
|
||||
Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com>
|
||||
|
@ -292,16 +387,19 @@ Karthik Sethuraman <k.seth1993@gmail.com>
|
|||
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
||||
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
||||
Keiichi Tabata <keiichi.tabata@outlook.com>
|
||||
Keke Han <hankeke303@163.com>
|
||||
Kenvix ⭐ <kenvixzure@live.com>
|
||||
Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
|
||||
Kevin Gibbons <bakkot@gmail.com>
|
||||
Kevin Ji <1146876+kevinji@users.noreply.github.com>
|
||||
Kevin Kwok <antimatter15@gmail.com>
|
||||
Kevin Lo <kevlo@kevlo.org>
|
||||
Kevin Wang <kevmo314@gmail.com>
|
||||
Kolen Cheung <ickc@users.noreply.github.com>
|
||||
Konstantin Herud <konstantin.herud@denkbares.com>
|
||||
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
||||
Kunshang Ji <kunshang.ji@intel.com>
|
||||
Kyle Bruene <KyleBruene@users.noreply.github.com>
|
||||
Kyle Liang <liangmanlai@gmail.com>
|
||||
Kyle Mistele <kyle@mistele.com>
|
||||
Kylin <56434533+KyL0N@users.noreply.github.com>
|
||||
|
@ -315,22 +413,30 @@ LeonEricsson <70749762+LeonEricsson@users.noreply.github.com>
|
|||
Leonardo Neumann <leonardo@neumann.dev.br>
|
||||
Li Tan <tanliboy@gmail.com>
|
||||
Linwei Wang <wanix1988@gmail.com>
|
||||
Liu Jia <109258120+Septa2112@users.noreply.github.com>
|
||||
Liu Jia <jia3.liu@intel.com>
|
||||
LoganDark <github@logandark.mozmail.com>
|
||||
Loïc Carrère <loic.carrere@gmail.com>
|
||||
LostRuins <39025047+LostRuins@users.noreply.github.com>
|
||||
LostRuins Concedo <39025047+LostRuins@users.noreply.github.com>
|
||||
Luciano <lucianostrika44@gmail.com>
|
||||
Luo Tian <lt@basecity.com>
|
||||
Lyle Dean <dean@lyle.dev>
|
||||
M-A <maruel@gmail.com>
|
||||
M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
|
||||
Ma Mingfei <mingfei.ma@intel.com>
|
||||
Maarten ter Huurne <maarten@treewalker.org>
|
||||
Mack Straight <eiz@users.noreply.github.com>
|
||||
Maël Kerbiriou <m431.kerbiriou@gmail.com>
|
||||
MaggotHATE <clay1326@gmail.com>
|
||||
Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
|
||||
Manuel <44313466+makuche@users.noreply.github.com>
|
||||
Marc Köhlbrugge <subscriptions@marckohlbrugge.com>
|
||||
Marco Matthies <71844+marcom@users.noreply.github.com>
|
||||
Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>
|
||||
Marian Cepok <marian.cepok@gmail.com>
|
||||
Mark Fairbairn <thebaron88@gmail.com>
|
||||
Mark Zhuang <zhuangqiubin@gmail.com>
|
||||
Marko Tasic <mtasic85@gmail.com>
|
||||
Markus Tavenrath <mtavenrath@users.noreply.github.com>
|
||||
Martin Delille <martin@delille.org>
|
||||
|
@ -342,11 +448,16 @@ MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com>
|
|||
Mateusz Charytoniuk <mateusz.charytoniuk@protonmail.com>
|
||||
Matheus C. França <matheus-catarino@hotmail.com>
|
||||
Matheus Gabriel Alves Silva <matheusgasource@gmail.com>
|
||||
Mathieu Baudier <mbaudier@argeo.org>
|
||||
Mathieu Geli <mathieu.geli@gmail.com>
|
||||
Mathieu Nayrolles <MathieuNls@users.noreply.github.com>
|
||||
Mathijs Henquet <mathijs.henquet@gmail.com>
|
||||
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
||||
Matt Clayton <156335168+mattjcly@users.noreply.github.com>
|
||||
Matt Pulver <matt.pulver@heavy.ai>
|
||||
Matt Stephenson <mstephenson6@users.noreply.github.com>
|
||||
Matteo Boschini <12133566+mbosc@users.noreply.github.com>
|
||||
Matteo Mortari <matteo.mortari@gmail.com>
|
||||
Mattheus Chediak <shammcity00@gmail.com>
|
||||
Matthew Tejo <matthew.tejo@gmail.com>
|
||||
Matvey Soloviev <blackhole89@gmail.com>
|
||||
|
@ -356,8 +467,11 @@ Maxime <672982+maximegmd@users.noreply.github.com>
|
|||
Maximilian Winter <maximilian.winter.91@gmail.com>
|
||||
Meng Zhang <meng@tabbyml.com>
|
||||
Meng, Hengyu <hengyu.meng@intel.com>
|
||||
Mengqing Cao <cmq0113@163.com>
|
||||
Merrick Christensen <merrick.christensen@gmail.com>
|
||||
Michael Coppola <m18coppola@gmail.com>
|
||||
Michael Engel <mengel@redhat.com>
|
||||
Michael Francis <edude03@gmail.com>
|
||||
Michael Hueschen <m@mhueschen.dev>
|
||||
Michael Kesper <mkesper@schokokeks.org>
|
||||
Michael Klimenko <mklimenko29@gmail.com>
|
||||
|
@ -365,52 +479,81 @@ Michael Podvitskiy <podvitskiymichael@gmail.com>
|
|||
Michael Potter <NanoTekGuy@Gmail.com>
|
||||
Michael de Gans <michael.john.degans@gmail.com>
|
||||
Michaël de Vries <vriesdemichael@gmail.com>
|
||||
Michał Moskal <michal@moskal.me>
|
||||
Michał Tuszyński <srgtuszy@gmail.com>
|
||||
Michelle Tan <41475767+MichelleTanPY@users.noreply.github.com>
|
||||
Mihai <mihai.chirculescu@yahoo.com>
|
||||
Mike <ytianhui2004@gmail.com>
|
||||
Mikko Juola <mikjuo@gmail.com>
|
||||
Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
|
||||
Minsoo Cheong <icycle0409@snu.ac.kr>
|
||||
Mirko185 <mirkosig@gmail.com>
|
||||
Mirror Azure <54669636+MirrorAzure@users.noreply.github.com>
|
||||
MistApproach <98988043+MistApproach@users.noreply.github.com>
|
||||
Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
|
||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||
Mohammadreza Hendiani <mohammad.r.hendiani@gmail.com>
|
||||
Molly Sophia <mollysophia379@gmail.com>
|
||||
MorganRO8 <47795945+MorganRO8@users.noreply.github.com>
|
||||
Murilo Santana <mvrilo@gmail.com>
|
||||
Musab Gultekin <musabgultekin@users.noreply.github.com>
|
||||
Nam D. Tran <42194884+namtranase@users.noreply.github.com>
|
||||
Nathan Epstein <nate2@umbc.edu>
|
||||
Natsu <chino@hotococoa.moe>
|
||||
NawafAlansari <72708095+NawafAlansari@users.noreply.github.com>
|
||||
Nebula <infinitewormhole@gmail.com>
|
||||
Neo Zhang <14088817+arthw@users.noreply.github.com>
|
||||
Neo Zhang <zhang.jianyu@outlook.com>
|
||||
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
||||
Neuman Vong <neuman.vong@gmail.com>
|
||||
NeverLucky <92274250+nvrxq@users.noreply.github.com>
|
||||
Nexes the Old <124105151+Nexesenex@users.noreply.github.com>
|
||||
Nexesenex <124105151+Nexesenex@users.noreply.github.com>
|
||||
Niall Coates <1349685+Niall-@users.noreply.github.com>
|
||||
Nicholai Tukanov <nicholaitukanov@gmail.com>
|
||||
Nico Bosshard <nico@bosshome.ch>
|
||||
Nicolai Weitkemper <kontakt@nicolaiweitkemper.de>
|
||||
Nicolás Pérez <nicolas_perez@brown.edu>
|
||||
Nicolò Scipione <nicolo.scipione@codeplay.com>
|
||||
Nigel Bosch <pnigelb@gmail.com>
|
||||
Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
|
||||
Niklas Korz <niklas@niklaskorz.de>
|
||||
NikolaiLyssogor <59844691+NikolaiLyssogor@users.noreply.github.com>
|
||||
Nikolaos Pothitos <pothitos@di.uoa.gr>
|
||||
Nikolas <127742645+nneubacher@users.noreply.github.com>
|
||||
Nindaleth <Nindaleth@users.noreply.github.com>
|
||||
Nuno <rare-magma@posteo.eu>
|
||||
OSecret <135510162+OLSecret@users.noreply.github.com>
|
||||
Oleksandr Nikitin <oleksandr@tvori.info>
|
||||
Oleksii Maryshchenko <oleksii.maryshchenko@gmail.com>
|
||||
Olivier Chafik <ochafik@users.noreply.github.com>
|
||||
Ondřej Čertík <ondrej@certik.us>
|
||||
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
||||
PAB <pierreantoine.bannier@gmail.com>
|
||||
Pablo Duboue <pablo.duboue@gmail.com>
|
||||
Pascal Patry <ppatry@mtacitlabs.com>
|
||||
Patrice Ferlet <metal3d@gmail.com>
|
||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||
Pavel Zloi <github.com@drteam.rocks>
|
||||
Pavol Rusnak <pavol@rusnak.io>
|
||||
Paweł Wodnicki <151604+32bitmicro@users.noreply.github.com>
|
||||
Pedro Cuenca <pedro@huggingface.co>
|
||||
Peter <peter277@users.noreply.github.com>
|
||||
Peter Sugihara <peter@campsh.com>
|
||||
Phil H <5756783+phiharri@users.noreply.github.com>
|
||||
Philip Taron <philip.taron@gmail.com>
|
||||
Phillip Kravtsov <phillip@kravtsov.net>
|
||||
Pierre Alexandre SCHEMBRI <pa.schembri@gmail.com>
|
||||
Pierrick Hymbert <pierrick.hymbert@gmail.com>
|
||||
Pieter Ouwerkerk <pieter.ouwerkerk@gmail.com>
|
||||
Plamen Minev <pacominev@gmail.com>
|
||||
Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
|
||||
Przemysław Pawełczyk <przemoc@gmail.com>
|
||||
Qin Yue Chen <71813199+chenqiny@users.noreply.github.com>
|
||||
Qingyou Meng <meng.qingyou@gmail.com>
|
||||
Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com>
|
||||
R0CKSTAR <xiaodong.ye@mthreads.com>
|
||||
R0CKSTAR <yeahdongcn@gmail.com>
|
||||
RJ Adriaansen <adriaansen@eshcc.eur.nl>
|
||||
Radoslav Gerganov <rgerganov@gmail.com>
|
||||
Radosław Gryta <radek.gryta@gmail.com>
|
||||
|
@ -419,11 +562,16 @@ Raj Hammeer Singh Hada <hammeerraj@gmail.com>
|
|||
Ralph Soika <ralph.soika@imixs.com>
|
||||
Rand Xie <randxiexyy29@gmail.com>
|
||||
Randall Fitzgerald <randall@dasaku.net>
|
||||
Random Fly <renfei8@live.cn>
|
||||
Reinforce-II <fate@eastal.com>
|
||||
Rémy Oudompheng <oudomphe@phare.normalesup.org>
|
||||
Ren Xuancheng <jklj077@users.noreply.github.com>
|
||||
Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com>
|
||||
Reza Kakhki <rezakakhki.de@gmail.com>
|
||||
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
||||
Riccardo Orlando <Riccorl@users.noreply.github.com>
|
||||
Riceball LEE <snowyu.lee@gmail.com>
|
||||
Rich Dougherty <rich@rd.nz>
|
||||
Richard Kiss <him@richardkiss.com>
|
||||
Richard Roberson <richardr1126@gmail.com>
|
||||
Rick G <26732651+TheFlipbook@users.noreply.github.com>
|
||||
|
@ -434,26 +582,39 @@ Riley Stewart <ristew@users.noreply.github.com>
|
|||
Rinne <AsakusaRinne@gmail.com>
|
||||
Rinne <liu_yaohui1998@126.com>
|
||||
Robert Brisita <986796+rbrisita@users.noreply.github.com>
|
||||
Robert Collins <roberto.tomas.cuentas@gmail.com>
|
||||
Robert Ormandi <52251610+ormandi@users.noreply.github.com>
|
||||
Robert Sung-wook Shin <edp1096@users.noreply.github.com>
|
||||
Robey Holderith <robey@flaminglunchbox.net>
|
||||
Robyn <robyngraf@users.noreply.github.com>
|
||||
Roger Meier <r.meier@siemens.com>
|
||||
Roland <14355895+rbur0425@users.noreply.github.com>
|
||||
Romain Biessy <romain.biessy@codeplay.com>
|
||||
Romain D <90720+Artefact2@users.noreply.github.com>
|
||||
Romain Neutron <romain@neutron.io>
|
||||
Roman Parykin <donderom@gmail.com>
|
||||
Ron Evans <ron@hybridgroup.com>
|
||||
Ron Jailall <rojailal@gmail.com>
|
||||
Roni <sulpher@gmx.net>
|
||||
Ronny Brendel <ronnybrendel@gmail.com>
|
||||
Ronsor <ronsor@ronsor.pw>
|
||||
Rowan Hart <rowanbhart@gmail.com>
|
||||
Ruan <47767371+ruanych@users.noreply.github.com>
|
||||
Ruchira Hasaranga <ruchira66@gmail.com>
|
||||
Rudi Servo <rudiservo@gmail.com>
|
||||
Ruixin Huang <18860020911@163.com>
|
||||
Rune <43761327+Rune-AI@users.noreply.github.com>
|
||||
RunningLeon <maningsheng@sensetime.com>
|
||||
RunningLeon <mnsheng@yeah.net>
|
||||
Ryan Landay <rlanday@gmail.com>
|
||||
Ryder Wishart <ryderwishart@gmail.com>
|
||||
Ryuei <louixs@users.noreply.github.com>
|
||||
Rőczey Barnabás <31726601+An0nie@users.noreply.github.com>
|
||||
SRHMorris <69468379+SRHMorris@users.noreply.github.com>
|
||||
SXX <sxx1136965276@gmail.com>
|
||||
SakuraUmi <yukinon244@gmail.com>
|
||||
Salvador E. Tropea <stropea@inti.gob.ar>
|
||||
Salvatore Mesoraca <s.mesoraca16@gmail.com>
|
||||
Sam Spilsbury <smspillaz@gmail.com>
|
||||
Sami Farin <3876865+Safari77@users.noreply.github.com>
|
||||
Samuel Maynard <samwmaynard@gmail.com>
|
||||
|
@ -463,23 +624,29 @@ Sebastián A <sebastian.aedo29@gmail.com>
|
|||
SebastianApel <13675545+SebastianApel@users.noreply.github.com>
|
||||
Senemu <10880819+Senemu@users.noreply.github.com>
|
||||
Sergey Alirzaev <zl29ah@gmail.com>
|
||||
Sergio López <slp@redhat.com>
|
||||
Sergio López <slp@sinrega.org>
|
||||
Sertaç Özercan <852750+sozercan@users.noreply.github.com>
|
||||
SeungWon Jeong <65549245+redlion0929@users.noreply.github.com>
|
||||
ShadovvBeast <ShadovvBeast@gmail.com>
|
||||
Shakhar Dasgupta <shakhardasgupta@gmail.com>
|
||||
Shane A <shanea@allenai.org>
|
||||
Shangning Xu <32517059+xushangning@users.noreply.github.com>
|
||||
Shankar <gshankar.87@gmail.com>
|
||||
Shanshan Shen <467638484@qq.com>
|
||||
Shijie <821898965@qq.com>
|
||||
Shintarou Okada <kokuzen@gmail.com>
|
||||
Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com>
|
||||
Shouzheng Liu <lshzh.hi@gmail.com>
|
||||
Shuichi Tsutsumi <shuichi0526@gmail.com>
|
||||
Shupei Fan <dymarkfan@outlook.com>
|
||||
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
||||
Simon Willison <swillison@gmail.com>
|
||||
Siwen Yu <yusiwen@gmail.com>
|
||||
Sky Yan <skyan83@gmail.com>
|
||||
Slaren <2141330+slaren@users.noreply.github.com>
|
||||
Slava Primenko <primenko.s@gmail.com>
|
||||
Small Grass Forest <zixuanxcl@gmail.com>
|
||||
SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com>
|
||||
Someone <sergei.kozlukov@aalto.fi>
|
||||
Someone Serge <sergei.kozlukov@aalto.fi>
|
||||
|
@ -491,25 +658,33 @@ Stefan Sydow <stefan@sydow.email>
|
|||
Steffen Röcker <sroecker@gmail.com>
|
||||
Stephan Walter <stephan@walter.name>
|
||||
Stephen Nichols <snichols@users.noreply.github.com>
|
||||
Steve Bonds <sbonds@gmail.com>
|
||||
Steve Grubb <ausearch.1@gmail.com>
|
||||
Steven Prichard <spprichard20@gmail.com>
|
||||
Steven Roussey <sroussey@gmail.com>
|
||||
Steward Garcia <57494570+FSSRepo@users.noreply.github.com>
|
||||
StrangeBytesDev <141275258+StrangeBytesDev@users.noreply.github.com>
|
||||
Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com>
|
||||
Sukriti Sharma <Ssukriti@users.noreply.github.com>
|
||||
SuperUserNameMan <yoann@terminajones.com>
|
||||
Sutou Kouhei <kou@cozmixng.org>
|
||||
Tai Duc Nguyen <taiducnguyen.drexel@gmail.com>
|
||||
Taikono-Himazin <kazu@po.harenet.ne.jp>
|
||||
Tameem <113388789+AhmadTameem@users.noreply.github.com>
|
||||
Tamotsu Takahashi <ttakah+github@gmail.com>
|
||||
Tei Home <taiteitonghome@proton.me>
|
||||
Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com>
|
||||
Thatcher Chamberlin <j.thatcher.c@gmail.com>
|
||||
Theia Vogel <theia@vgel.me>
|
||||
Thérence <13496987+Royalphax@users.noreply.github.com>
|
||||
Thibault Terrasson <thibault.terrasson@gmail.com>
|
||||
Thomas Klausner <wiz@gatalith.at>
|
||||
Thorsten Sommer <SommerEngineering@users.noreply.github.com>
|
||||
Tim Miller <drasticactions@users.noreply.github.com>
|
||||
Tim Wang <overocean@gmail.com>
|
||||
Timmy Knight <r2d2fish@gmail.com>
|
||||
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
||||
Ting Lou <louting@189.cn>
|
||||
Ting Lou <ting.lou@gmail.com>
|
||||
Ting Sun <suntcrick@gmail.com>
|
||||
Tobias Lütke <tobi@shopify.com>
|
||||
|
@ -517,32 +692,44 @@ Tom C <tom.corelis@gmail.com>
|
|||
Tom Jobbins <784313+TheBloke@users.noreply.github.com>
|
||||
Tomas <tom.tomas.36478119@gmail.com>
|
||||
Tomáš Pazdiora <tomas.pazdiora@gmail.com>
|
||||
Tony Wasserka <4840017+neobrain@users.noreply.github.com>
|
||||
Tristan Druyen <tristan@vault81.mozmail.com>
|
||||
Tristan Ross <rosscomputerguy@protonmail.com>
|
||||
Trivikram Kamat <16024985+trivikr@users.noreply.github.com>
|
||||
Tungsten842 <886724vf@anonaddy.me>
|
||||
Tungsten842 <quantmint@protonmail.com>
|
||||
Tushar <ditsuke@protonmail.com>
|
||||
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
||||
Ujjawal Panchal <31011628+Ujjawal-K-Panchal@users.noreply.github.com>
|
||||
Ulrich Drepper <drepper@gmail.com>
|
||||
Uzo Nweke <uzoechi@gmail.com>
|
||||
Vaibhav Srivastav <vaibhavs10@gmail.com>
|
||||
Val Kharitonov <mail@kharvd.com>
|
||||
Valentin Konovalov <valle.ketsujin@gmail.com>
|
||||
Valentin Mamedov <45292985+Inf1delis@users.noreply.github.com>
|
||||
Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com>
|
||||
Vali Malinoiu <0x4139@gmail.com>
|
||||
Victor Nogueira <felladrin@gmail.com>
|
||||
Victor Z. Peng <ziliangdotme@gmail.com>
|
||||
Viet-Anh NGUYEN (Andrew) <vietanh.dev@gmail.com>
|
||||
Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com>
|
||||
Vlad <spitfireage@gmail.com>
|
||||
Vladimir <bogdad@gmail.com>
|
||||
Vladimir Malyutin <first-leon@yandex.ru>
|
||||
Vladimir Zorin <vladimir@deviant.guru>
|
||||
VoidIsVoid <343750470@qq.com>
|
||||
Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com>
|
||||
Wang Qin <37098874+wangqin0@users.noreply.github.com>
|
||||
Wang Ran (汪然) <wangr@smail.nju.edu.cn>
|
||||
WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com>
|
||||
Weird Constructor <weirdconstructor@gmail.com>
|
||||
Welby Seely <welbyseely@gmail.com>
|
||||
Wentai Zhang <rchardx@gmail.com>
|
||||
WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com>
|
||||
William Tambellini <william.tambellini@gmail.com>
|
||||
William Tambellini <wtambellini@sdl.com>
|
||||
Willy Tarreau <w@1wt.eu>
|
||||
Woof Dog <197125663+woof-dog@users.noreply.github.com>
|
||||
Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
|
||||
Wu Jian Ping <wujjpp@hotmail.com>
|
||||
Wu Jian Ping <wujp@greatld.com>
|
||||
|
@ -551,15 +738,25 @@ Xiang (Kevin) Li <kevinli020508@gmail.com>
|
|||
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
||||
XiaotaoChen <chenxiaotao1234@gmail.com>
|
||||
Xiaoyi Chen <cxychina@gmail.com>
|
||||
Xie Yanbo <xieyanbo@gmail.com>
|
||||
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
||||
Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
|
||||
Xuan Son Nguyen <thichthat@gmail.com>
|
||||
Xuan-Son Nguyen <thichthat@gmail.com>
|
||||
Yaiko <elyaiko@hotmail.com>
|
||||
Yann Follet <131855179+YannFollet@users.noreply.github.com>
|
||||
Yaroslav <yaroslav.yashin@me.com>
|
||||
Yazan Agha-Schrader <mountaiin@icloud.com>
|
||||
Yiming Cui <conandiy@vip.qq.com>
|
||||
Yishuo Wang <MeouSker77@outlook.com>
|
||||
Yoshi Suhara <y.suhara@gmail.com>
|
||||
Yoshi Suhara <ysuhara@nvidia.com>
|
||||
Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
|
||||
Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com>
|
||||
Yüg <eugeniosegalaweb@gmail.com>
|
||||
Yui <dev@sleepyyui.com>
|
||||
Yun Dou <dixyes@gmail.com>
|
||||
Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
|
||||
Yusuf Kağan Hanoğlu <hanoglu@yahoo.com>
|
||||
Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com>
|
||||
ZHAOKAI WANG <sanxianwei@163.com>
|
||||
|
@ -568,19 +765,27 @@ Zay <95888118+isaiahbjork@users.noreply.github.com>
|
|||
Zenix <zenixls2@gmail.com>
|
||||
Zhang Peiyuan <a1286225768@gmail.com>
|
||||
Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com>
|
||||
Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
|
||||
Zhiyuan Li <lizhiyuan@uniartisan.com>
|
||||
Zhiyuan Li <uniartisan2017@gmail.com>
|
||||
ZhouYuChen <zhouyuchen@naver.com>
|
||||
Ziad Ben Hadj-Alouane <zied.benhadjalouane@gmail.com>
|
||||
Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com>
|
||||
Zsapi <martin1.zsapka@gmail.com>
|
||||
a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com>
|
||||
a3sh <38979186+A3shTnT@users.noreply.github.com>
|
||||
adel boussaken <netdur@gmail.com>
|
||||
afrideva <95653597+afrideva@users.noreply.github.com>
|
||||
ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
|
||||
agray3 <agray3@users.noreply.github.com>
|
||||
akawrykow <142945436+akawrykow@users.noreply.github.com>
|
||||
alek3y <44779186+alek3y@users.noreply.github.com>
|
||||
alexpinel <93524949+alexpinel@users.noreply.github.com>
|
||||
alonfaraj <alonfaraj@gmail.com>
|
||||
alwqx <kenan3015@gmail.com>
|
||||
amd-dwang <dong.wang@amd.com>
|
||||
amd-lalithnc <lalithnc@amd.com>
|
||||
amritahs-ibm <amritahs@linux.vnet.ibm.com>
|
||||
andrijdavid <david@geek.mg>
|
||||
anon998 <131767832+anon998@users.noreply.github.com>
|
||||
anzz1 <anzz1@live.com>
|
||||
|
@ -588,24 +793,31 @@ apaz <aarpazdera@gmail.com>
|
|||
apcameron <37645737+apcameron@users.noreply.github.com>
|
||||
arch-btw <57669023+arch-btw@users.noreply.github.com>
|
||||
arcrank <arcrank@gmail.com>
|
||||
ardfork <134447697+ardfork@users.noreply.github.com>
|
||||
arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com>
|
||||
aryantandon01 <80969509+aryantandon01@users.noreply.github.com>
|
||||
at8u <129688334+at8u@users.noreply.github.com>
|
||||
automaticcat <daogiatuank54@gmail.com>
|
||||
awatuna <23447591+awatuna@users.noreply.github.com>
|
||||
b4b4o <zwbao@foxmail.com>
|
||||
bandoti <141645996+bandoti@users.noreply.github.com>
|
||||
beiller <beiller@gmail.com>
|
||||
bhubbb <79117352+bhubbb@users.noreply.github.com>
|
||||
bmwl <brian.marshall@tolko.com>
|
||||
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
||||
brucepro <git@brucepro.net>
|
||||
bryanSwk <93190252+bryanSwk@users.noreply.github.com>
|
||||
bsilvereagle <bsilvereagle@users.noreply.github.com>
|
||||
bssrdf <merlintiger@hotmail.com>
|
||||
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
||||
cduk <19917266+cduk@users.noreply.github.com>
|
||||
cebtenzzre <cebtenzzre@gmail.com>
|
||||
chaihahaha <chai836275709@gmail.com>
|
||||
chiranko <96988916+chiranko@users.noreply.github.com>
|
||||
clibdev <52199778+clibdev@users.noreply.github.com>
|
||||
clyang <clyang@clyang.net>
|
||||
cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com>
|
||||
codezjx <code.zjx@gmail.com>
|
||||
coezbek <c.oezbek@gmail.com>
|
||||
comex <comexk@gmail.com>
|
||||
compilade <113953597+compilade@users.noreply.github.com>
|
||||
|
@ -614,10 +826,14 @@ cpumaxx <163466046+cpumaxx@users.noreply.github.com>
|
|||
crasm <crasm@git.vczf.net>
|
||||
crasm <crasm@git.vczf.us>
|
||||
daboe01 <daboe01@googlemail.com>
|
||||
daghanerdonmez <44506702+daghanerdonmez@users.noreply.github.com>
|
||||
daminho <37615795+daminho@users.noreply.github.com>
|
||||
david raistrick <keen99@users.noreply.github.com>
|
||||
ddh0 <dylanhalladay02@icloud.com>
|
||||
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
||||
deepdiffuser <112834445+deepdiffuser@users.noreply.github.com>
|
||||
devojony <61173062+devojony@users.noreply.github.com>
|
||||
ditsuke <ditsuke@protonmail.com>
|
||||
divinity76 <divinity76@gmail.com>
|
||||
dm4 <sunrisedm4@gmail.com>
|
||||
dotpy314 <33351922+dotpy314@users.noreply.github.com>
|
||||
|
@ -625,18 +841,25 @@ drbh <david.richard.holtz@gmail.com>
|
|||
ds5t5 <145942675+ds5t5@users.noreply.github.com>
|
||||
dylan <canardleteer@users.noreply.github.com>
|
||||
eastriver <lee@eastriver.dev>
|
||||
ebraminio <ebrahim@gnu.org>
|
||||
ebraminio <ebraminio@gmail.com>
|
||||
eiery <19350831+eiery@users.noreply.github.com>
|
||||
eric8607242 <e0928021388@gmail.com>
|
||||
fairydreaming <166155368+fairydreaming@users.noreply.github.com>
|
||||
fengerhu1 <2748250768@qq.com>
|
||||
fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
|
||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||
github-actions[bot] <github-actions[bot]@users.noreply.github.com>
|
||||
gliptic <gliptic@users.noreply.github.com>
|
||||
gn64 <yukikaze.jp@gmail.com>
|
||||
goerch <jhr.walter@t-online.de>
|
||||
grahameth <96447521+grahameth@users.noreply.github.com>
|
||||
gtygo <gtydoit@gmail.com>
|
||||
gwjr <502526+gwjr@users.noreply.github.com>
|
||||
h-h-h-h <13482553+h-h-h-h@users.noreply.github.com>
|
||||
hankcs <cnhankmc@gmail.com>
|
||||
haopeng <657407891@qq.com>
|
||||
hipudding <huafengchun@gmail.com>
|
||||
hoangmit <hoangmit@users.noreply.github.com>
|
||||
hongbo.mo <352280764@qq.com>
|
||||
hopkins385 <98618192+hopkins385@users.noreply.github.com>
|
||||
|
@ -649,12 +872,16 @@ hxer7963 <hxer7963@gmail.com>
|
|||
hydai <z54981220@gmail.com>
|
||||
iSma <ismail.senhaji@gmail.com>
|
||||
iacore <74560659+iacore@users.noreply.github.com>
|
||||
icppWorld <124377669+icppWorld@users.noreply.github.com>
|
||||
igarnier <igarnier@protonmail.com>
|
||||
intelmatt <61025942+intelmatt@users.noreply.github.com>
|
||||
iohub <rickyang.pro@gmail.com>
|
||||
issixx <46835150+issixx@users.noreply.github.com>
|
||||
jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com>
|
||||
jaime-m-p <167997752+jaime-m-p@users.noreply.github.com>
|
||||
jameswu2014 <545426914@qq.com>
|
||||
jdomke <28772296+jdomke@users.noreply.github.com>
|
||||
jiahao su <damow890@gmail.com>
|
||||
jiez <373447296@qq.com>
|
||||
jneem <joeneeman@gmail.com>
|
||||
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
||||
|
@ -667,6 +894,7 @@ junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
|
|||
jwj7140 <32943891+jwj7140@users.noreply.github.com>
|
||||
k.h.lai <adrian.k.h.lai@outlook.com>
|
||||
kaizau <kaizau@users.noreply.github.com>
|
||||
kallewoof <kalle.alm@gmail.com>
|
||||
kalomaze <66376113+kalomaze@users.noreply.github.com>
|
||||
kang <tpdns9032100@gmail.com>
|
||||
katsu560 <118887472+katsu560@users.noreply.github.com>
|
||||
|
@ -674,32 +902,46 @@ kchro3 <62481661+kchro3@users.noreply.github.com>
|
|||
khimaros <me@khimaros.com>
|
||||
kiltyj <kiltyj@gmail.com>
|
||||
klosax <131523366+klosax@users.noreply.github.com>
|
||||
krystiancha <krystian@krystianch.com>
|
||||
kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com>
|
||||
kunnis <kunnis@users.noreply.github.com>
|
||||
kuronekosaiko <EvanChanJ@163.com>
|
||||
kustaaya <58045274+kustaaya@users.noreply.github.com>
|
||||
kuvaus <22169537+kuvaus@users.noreply.github.com>
|
||||
kwin1412 <42286931+kwin1412@users.noreply.github.com>
|
||||
l3utterfly <gc.pthzfoldr@gmail.com>
|
||||
laik <laik.lj@me.com>
|
||||
ldwang <ftgreat@163.com>
|
||||
le.chang <cljs118@126.com>
|
||||
leejet <leejet714@gmail.com>
|
||||
leo-pony <nengjunma@outlook.com>
|
||||
lexasub <lexakopp2212@gmail.com>
|
||||
lhez <quic_lih@quicinc.com>
|
||||
limitedAtonement <limitedAtonement@users.noreply.github.com>
|
||||
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
|
||||
lon <114724657+longregen@users.noreply.github.com>
|
||||
loonerin <132926317+loonerin@users.noreply.github.com>
|
||||
ltoniazzi <61414566+ltoniazzi@users.noreply.github.com>
|
||||
luoyu-intel <yu.luo@intel.com>
|
||||
m3ndax <adrian.goessl@outlook.com>
|
||||
maddes8cht <55592906+maddes8cht@users.noreply.github.com>
|
||||
mahorozte <41834471+mahorozte@users.noreply.github.com>
|
||||
makomk <makosoft@googlemail.com>
|
||||
manikbhandari <mbbhandarimanik2@gmail.com>
|
||||
maor-ps <154728172+maor-ps@users.noreply.github.com>
|
||||
mashdragon <122402293+mashdragon@users.noreply.github.com>
|
||||
matiaslin <45382001+matiaslin@users.noreply.github.com>
|
||||
matt23654 <matthew.webber@protonmail.com>
|
||||
matteo <matteogeniaccio@yahoo.it>
|
||||
mdrokz <mohammadmunshi@gmail.com>
|
||||
mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com>
|
||||
minarchist <minarchist@users.noreply.github.com>
|
||||
mj-shifu <77107165+mj-shifu@users.noreply.github.com>
|
||||
mmyjona <jonathan.gonse@gmail.com>
|
||||
momonga <115213907+mmnga@users.noreply.github.com>
|
||||
momonga <146910567+mmngays@users.noreply.github.com>
|
||||
moritzbrantner <31051084+moritzbrantner@users.noreply.github.com>
|
||||
musoles <135031143+musoles@users.noreply.github.com>
|
||||
mzcu <milos.cubrilo@gmail.com>
|
||||
nanahi <130121847+na-na-hi@users.noreply.github.com>
|
||||
ngc92 <7938269+ngc92@users.noreply.github.com>
|
||||
|
@ -716,16 +958,21 @@ omahs <73983677+omahs@users.noreply.github.com>
|
|||
oobabooga <112222186+oobabooga@users.noreply.github.com>
|
||||
opparco <parco.opaai@gmail.com>
|
||||
ostix360 <55257054+ostix360@users.noreply.github.com>
|
||||
pculliton <phillipculliton@gmail.com>
|
||||
peidaqi <peidaqi@gmail.com>
|
||||
pengxin99 <pengxin.yuan@intel.com>
|
||||
perserk <perserk@gmail.com>
|
||||
piDack <104877312+piDack@users.noreply.github.com>
|
||||
pmysl <piotr.myslinski@outlook.com>
|
||||
postmasters <namnguyen@google.com>
|
||||
pudepiedj <pudepiedj@gmail.com>
|
||||
qingfengfenga <41416092+qingfengfenga@users.noreply.github.com>
|
||||
qingy1337 <qxli2@students.everettcc.edu>
|
||||
qouoq <qouoq@fastmail.com>
|
||||
qunash <anzoria@gmail.com>
|
||||
rabidcopy <rabidcopy@yahoo.com>
|
||||
rankaiyx <rankaiyx@rankaiyx.com>
|
||||
redbeard <bharrington@alticon.net>
|
||||
rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com>
|
||||
rhuddleston <ryan.huddleston@percona.com>
|
||||
rimoliga <53384203+rimoliga@users.noreply.github.com>
|
||||
|
@ -733,6 +980,7 @@ runfuture <runfuture@users.noreply.github.com>
|
|||
sandyiscool <sandyiscool@gmail.com>
|
||||
sasha0552 <admin@sasha0552.org>
|
||||
semidark <me@semidark.net>
|
||||
serhii-nakon <57632032+serhii-nakon@users.noreply.github.com>
|
||||
sharpHL <132747147+sharpHL@users.noreply.github.com>
|
||||
shibe2 <shibe@tuta.io>
|
||||
singularity <12184989+singularity-s0@users.noreply.github.com>
|
||||
|
@ -741,42 +989,59 @@ sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com>
|
|||
slaren <2141330+slaren@users.noreply.github.com>
|
||||
slaren <slarengh@gmail.com>
|
||||
snadampal <87143774+snadampal@users.noreply.github.com>
|
||||
someone13574 <81528246+someone13574@users.noreply.github.com>
|
||||
standby24x7 <standby24x7@gmail.com>
|
||||
staviq <staviq@gmail.com>
|
||||
stduhpf <stephduh@live.fr>
|
||||
strawberrymelonpanda <152940198+strawberrymelonpanda@users.noreply.github.com>
|
||||
swittk <switt1995@gmail.com>
|
||||
takov751 <40316768+takov751@users.noreply.github.com>
|
||||
tarcey <cey.tarik@gmail.com>
|
||||
tc-mb <157115220+tc-mb@users.noreply.github.com>
|
||||
texmex76 <40733439+texmex76@users.noreply.github.com>
|
||||
thement <40525767+thement@users.noreply.github.com>
|
||||
thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
|
||||
tjohnman <tjohnman@users.noreply.github.com>
|
||||
toyer <2042519524@qq.com>
|
||||
tslmy <tslmy@users.noreply.github.com>
|
||||
ubik2 <ubik2@users.noreply.github.com>
|
||||
uint256_t <konndennsa@gmail.com>
|
||||
uint256_t <maekawatoshiki1017@gmail.com>
|
||||
unbounded <haakon@likedan.net>
|
||||
uvos <devnull@uvos.xyz>
|
||||
uvos <philipp@uvos.xyz>
|
||||
valiray <133289098+valiray@users.noreply.github.com>
|
||||
vb <vaibhavs10@gmail.com>
|
||||
vik <vikhyatk@gmail.com>
|
||||
viric <viric@viric.name>
|
||||
vodkaslime <646329483@qq.com>
|
||||
vvhg1 <94630311+vvhg1@users.noreply.github.com>
|
||||
vxiiduu <73044267+vxiiduu@users.noreply.github.com>
|
||||
wangshuai09 <391746016@qq.com>
|
||||
wbpxre150 <100937007+wbpxre150@users.noreply.github.com>
|
||||
whoreson <139810751+whoreson@users.noreply.github.com>
|
||||
woachk <24752637+woachk@users.noreply.github.com>
|
||||
wonjun Jang <strutive07@gmail.com>
|
||||
woodx <124784234+woodx9@users.noreply.github.com>
|
||||
wwoodsTM <104587230+wwoodsTM@users.noreply.github.com>
|
||||
wzy <32936898+Freed-Wu@users.noreply.github.com>
|
||||
xaedes <xaedes@gmail.com>
|
||||
xaedes <xaedes@googlemail.com>
|
||||
xctan <axunlei@gmail.com>
|
||||
xloem <0xloem@gmail.com>
|
||||
yangli2 <yangli2@gmail.com>
|
||||
ymcki <84055651+ymcki@users.noreply.github.com>
|
||||
yuiseki <yuiseki@gmail.com>
|
||||
yuri@FreeBSD <yurivict@users.noreply.github.com>
|
||||
zakkor <edward.partenie@gmail.com>
|
||||
zhangkaihuo <zhangkaihuo@gmail.com>
|
||||
zhentaoyu <zhentao.yu@intel.com>
|
||||
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
||||
zhouwg <zhouwg2000@gmail.com>
|
||||
zrm <trustiosity.zrm@gmail.com>
|
||||
Ștefan-Gabriel Muscalu <legraphista@users.noreply.github.com>
|
||||
杨朱 · Kiki <baofa.fan@daocloud.io>
|
||||
源文雨 <41315874+fumiama@users.noreply.github.com>
|
||||
蕭澧邦 <45505768+shou692199@users.noreply.github.com>
|
||||
谢乃闻 <sienaiwun@users.noreply.github.com>
|
||||
Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com>
|
||||
|
|
111
CMakeLists.txt
111
CMakeLists.txt
|
@ -16,6 +16,7 @@ endif()
|
|||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
|
||||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||
set(LLAMA_STANDALONE ON)
|
||||
|
@ -42,13 +43,21 @@ endif()
|
|||
|
||||
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||
|
||||
if (WIN32)
|
||||
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/bigobj>")
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
|
||||
endif()
|
||||
|
||||
#
|
||||
# option list
|
||||
#
|
||||
|
||||
# general
|
||||
option(LLAMA_CCACHE "llama: use ccache if available" ON)
|
||||
|
||||
# debug
|
||||
option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
|
||||
option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
|
||||
|
@ -61,6 +70,9 @@ option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF)
|
|||
option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
|
||||
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
|
||||
|
||||
# utils
|
||||
option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
|
||||
|
||||
# extra artifacts
|
||||
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
||||
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
||||
|
@ -68,25 +80,23 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
|
|||
|
||||
# 3rd party libs
|
||||
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
||||
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
|
||||
|
||||
# Required for relocatable CMake package
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
|
||||
|
||||
# override ggml options
|
||||
set(GGML_CCACHE ${LLAMA_CCACHE})
|
||||
set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD})
|
||||
set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
|
||||
set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
|
||||
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
|
||||
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
|
||||
|
||||
# change the default for these ggml options
|
||||
if (NOT DEFINED GGML_LLAMAFILE)
|
||||
set(GGML_LLAMAFILE ON)
|
||||
set(GGML_LLAMAFILE_DEFAULT ON)
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED GGML_CUDA_USE_GRAPHS)
|
||||
set(GGML_CUDA_USE_GRAPHS ON)
|
||||
if (NOT DEFINED GGML_CUDA_GRAPHS)
|
||||
set(GGML_CUDA_GRAPHS_DEFAULT ON)
|
||||
endif()
|
||||
|
||||
# transition helpers
|
||||
|
@ -106,14 +116,64 @@ llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
|
|||
llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
|
||||
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
|
||||
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
|
||||
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
|
||||
|
||||
if (NOT MSVC)
|
||||
if (LLAMA_SANITIZE_THREAD)
|
||||
message(STATUS "Using -fsanitize=thread")
|
||||
|
||||
add_compile_options(-fsanitize=thread)
|
||||
link_libraries (-fsanitize=thread)
|
||||
endif()
|
||||
|
||||
if (LLAMA_SANITIZE_ADDRESS)
|
||||
message(STATUS "Using -fsanitize=address")
|
||||
|
||||
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
|
||||
link_libraries (-fsanitize=address)
|
||||
endif()
|
||||
|
||||
if (LLAMA_SANITIZE_UNDEFINED)
|
||||
message(STATUS "Using -fsanitize=undefined")
|
||||
|
||||
add_compile_options(-fsanitize=undefined)
|
||||
link_libraries (-fsanitize=undefined)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#
|
||||
# 3rd-party
|
||||
#
|
||||
|
||||
if (NOT TARGET ggml)
|
||||
add_subdirectory(ggml)
|
||||
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
||||
endif()
|
||||
|
||||
#
|
||||
# build the library
|
||||
#
|
||||
|
||||
add_subdirectory(ggml)
|
||||
add_subdirectory(src)
|
||||
|
||||
#
|
||||
# utils, programs, examples and tests
|
||||
#
|
||||
|
||||
if (LLAMA_BUILD_COMMON)
|
||||
add_subdirectory(common)
|
||||
endif()
|
||||
|
||||
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
||||
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
add_subdirectory(pocs)
|
||||
endif()
|
||||
|
||||
#
|
||||
# install
|
||||
#
|
||||
|
@ -129,9 +189,14 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
|
|||
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
||||
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
||||
|
||||
get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
||||
set(LLAMA_PUBLIC_HEADERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
|
||||
|
||||
set_target_properties(llama
|
||||
PROPERTIES
|
||||
PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
|
||||
|
||||
set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
|
||||
install(TARGETS llama LIBRARY PUBLIC_HEADER)
|
||||
|
||||
configure_package_config_file(
|
||||
|
@ -152,7 +217,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
|
|||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
|
||||
|
||||
install(
|
||||
FILES convert-hf-to-gguf.py
|
||||
FILES convert_hf_to_gguf.py
|
||||
PERMISSIONS
|
||||
OWNER_READ
|
||||
OWNER_WRITE
|
||||
|
@ -168,20 +233,4 @@ configure_file(cmake/llama.pc.in
|
|||
@ONLY)
|
||||
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
|
||||
DESTINATION lib/pkgconfig)
|
||||
|
||||
#
|
||||
# programs, examples and tests
|
||||
#
|
||||
|
||||
add_subdirectory(common)
|
||||
|
||||
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
endif ()
|
||||
|
||||
if (LLAMA_BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
add_subdirectory(pocs)
|
||||
endif()
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
"cacheVariables": {
|
||||
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
|
||||
"CMAKE_CXX_COMPILER": "icx",
|
||||
"CMAKE_C_COMPILER": "cl",
|
||||
"GGML_SYCL": "ON",
|
||||
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
||||
}
|
||||
|
@ -27,11 +28,20 @@
|
|||
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
|
||||
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
|
||||
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
|
||||
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
|
||||
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
|
||||
|
||||
{
|
||||
"name": "x64-windows-llvm", "hidden": true,
|
||||
"cacheVariables": {
|
||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"name": "arm64-windows-msvc", "hidden": true,
|
||||
"architecture": { "value": "arm64", "strategy": "external" },
|
||||
"toolset": { "value": "host=x86_64", "strategy": "external" },
|
||||
"toolset": { "value": "host=x64", "strategy": "external" },
|
||||
"cacheVariables": {
|
||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
|
||||
}
|
||||
|
@ -40,25 +50,48 @@
|
|||
{
|
||||
"name": "arm64-windows-llvm", "hidden": true,
|
||||
"architecture": { "value": "arm64", "strategy": "external" },
|
||||
"toolset": { "value": "host=x86_64", "strategy": "external" },
|
||||
"toolset": { "value": "host=x64", "strategy": "external" },
|
||||
"cacheVariables": {
|
||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
|
||||
}
|
||||
},
|
||||
|
||||
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
|
||||
{
|
||||
"name": "arm64-apple-clang", "hidden": true,
|
||||
"architecture": { "value": "arm64", "strategy": "external" },
|
||||
"toolset": { "value": "host=x64", "strategy": "external" },
|
||||
"cacheVariables": {
|
||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
|
||||
}
|
||||
},
|
||||
|
||||
{ "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
|
||||
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
|
||||
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
|
||||
|
||||
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
|
||||
{ "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] },
|
||||
{ "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
|
||||
{ "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
|
||||
|
||||
{ "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
|
||||
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
|
||||
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
|
||||
|
||||
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
|
||||
{ "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] },
|
||||
{ "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] },
|
||||
{ "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] },
|
||||
{ "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] },
|
||||
|
||||
{ "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
|
||||
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
|
||||
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
|
||||
|
||||
{ "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
|
||||
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }
|
||||
{ "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] },
|
||||
{ "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
|
||||
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
|
||||
{ "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
|
||||
|
||||
{ "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] },
|
||||
{ "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] }
|
||||
]
|
||||
}
|
||||
|
|
11
CODEOWNERS
Normal file
11
CODEOWNERS
Normal file
|
@ -0,0 +1,11 @@
|
|||
# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
|
||||
|
||||
/ci/ @ggerganov
|
||||
/.devops/*.Dockerfile @ngxson
|
||||
/examples/server/ @ngxson
|
||||
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
|
||||
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
|
||||
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
|
||||
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
|
||||
/ggml/src/ggml-opt.cpp @JohannesGaessler
|
||||
/ggml/src/gguf.cpp @JohannesGaessler
|
131
CONTRIBUTING.md
131
CONTRIBUTING.md
|
@ -1,14 +1,125 @@
|
|||
# Contributing Guidelines
|
||||
# Pull requests (for contributors)
|
||||
|
||||
## Checklist
|
||||
- Test your changes:
|
||||
- Execute [the full CI locally on your machine](ci/README.md) before publishing
|
||||
- Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`)
|
||||
- If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends)
|
||||
- If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops`
|
||||
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
|
||||
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
|
||||
|
||||
* Make sure your PR follows the [coding guidelines](https://github.com/ggerganov/llama.cpp/blob/master/README.md#coding-guidelines)
|
||||
* Test your changes using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
|
||||
* Execute [the full CI locally on your machine](ci/README.md) before publishing
|
||||
# Pull requests (for collaborators)
|
||||
|
||||
## PR formatting
|
||||
- Squash-merge PRs
|
||||
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
|
||||
- Optionally pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
|
||||
- Consider adding yourself to [CODEOWNERS](CODEOWNERS)
|
||||
|
||||
* Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
|
||||
- The PR template has a series of review complexity checkboxes `[ ]` that you can mark as `[X]` for your conveience. Refer to [About task lists](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) for more information.
|
||||
* If the pull request only contains documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times.
|
||||
* When squashing multiple commits on merge, use the following format for your commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : Fix typo in utils.py (#1234)`
|
||||
# Coding guidelines
|
||||
|
||||
- Avoid adding third-party dependencies, extra files, extra headers, etc.
|
||||
- Always consider cross-compatibility with other operating systems and architectures
|
||||
- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
|
||||
- Vertical alignment makes things more readable and easier to batch edit
|
||||
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
|
||||
- Use sized integer types such as `int32_t` in the public API, e.g. `size_t` may also be appropriate for allocation sizes or byte offsets
|
||||
- Declare structs with `struct foo {}` instead of `typedef struct foo {} foo`
|
||||
- In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary
|
||||
```cpp
|
||||
// OK
|
||||
llama_context * ctx;
|
||||
const llama_rope_type rope_type;
|
||||
|
||||
// not OK
|
||||
struct llama_context * ctx;
|
||||
const enum llama_rope_type rope_type;
|
||||
```
|
||||
|
||||
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_
|
||||
|
||||
- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code
|
||||
- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines)
|
||||
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
|
||||
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
|
||||
|
||||

|
||||
|
||||
# Naming guidelines
|
||||
|
||||
- Use `snake_case` for function, variable and type names
|
||||
- Naming usually optimizes for longest common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)
|
||||
|
||||
```cpp
|
||||
// not OK
|
||||
int small_number;
|
||||
int big_number;
|
||||
|
||||
// OK
|
||||
int number_small;
|
||||
int number_big;
|
||||
```
|
||||
|
||||
- Enum values are always in upper case and prefixed with the enum name
|
||||
|
||||
```cpp
|
||||
enum llama_vocab_type {
|
||||
LLAMA_VOCAB_TYPE_NONE = 0,
|
||||
LLAMA_VOCAB_TYPE_SPM = 1,
|
||||
LLAMA_VOCAB_TYPE_BPE = 2,
|
||||
LLAMA_VOCAB_TYPE_WPM = 3,
|
||||
LLAMA_VOCAB_TYPE_UGM = 4,
|
||||
LLAMA_VOCAB_TYPE_RWKV = 5,
|
||||
};
|
||||
```
|
||||
|
||||
- The general naming pattern is `<class>_<method>`, with `<method>` being `<action>_<noun>`
|
||||
|
||||
```cpp
|
||||
llama_model_init(); // class: "llama_model", method: "init"
|
||||
llama_sampler_chain_remove(); // class: "llama_sampler_chain", method: "remove"
|
||||
llama_sampler_get_seed(); // class: "llama_sampler", method: "get_seed"
|
||||
llama_set_embeddings(); // class: "llama_context", method: "set_embeddings"
|
||||
llama_n_threads(); // class: "llama_context", method: "n_threads"
|
||||
llama_adapter_lora_free(); // class: "llama_adapter_lora", method: "free"
|
||||
```
|
||||
|
||||
- The `get` `<action>` can be omitted
|
||||
- The `<noun>` can be omitted if not necessary
|
||||
- The `_context` suffix of the `<class>` is optional. Use it to disambiguate symbols when needed
|
||||
- Use `init`/`free` for constructor/destructor `<action>`
|
||||
|
||||
- Use the `_t` suffix when a type is supposed to be opaque to the user - it's not relevant to them if it is a struct or anything else
|
||||
|
||||
```cpp
|
||||
typedef struct llama_context * llama_context_t;
|
||||
|
||||
enum llama_pooling_type llama_pooling_type(const llama_context_t ctx);
|
||||
```
|
||||
|
||||
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline)_
|
||||
|
||||
- C/C++ filenames are all lowercase with dashes. Headers use the `.h` extension. Source files use the `.c` or `.cpp` extension
|
||||
- Python filenames are all lowercase with underscores
|
||||
|
||||
- _(TODO: abbreviations usage)_
|
||||
|
||||
# Preprocessor directives
|
||||
|
||||
- _(TODO: add guidelines with examples and apply them to the codebase)_
|
||||
|
||||
```cpp
|
||||
#ifdef FOO
|
||||
#endif // FOO
|
||||
```
|
||||
|
||||
# Documentation
|
||||
|
||||
- Documentation is a community effort
|
||||
- When you need to look into the source code to figure out how to use an API consider adding a short summary to the header file for future reference
|
||||
- When you notice incorrect or outdated documentation, please update it
|
||||
|
||||
# Resources
|
||||
|
||||
The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects:
|
||||
|
||||
https://github.com/ggerganov/llama.cpp/projects
|
||||
|
|
|
@ -2,44 +2,6 @@
|
|||
|
||||
import PackageDescription
|
||||
|
||||
var sources = [
|
||||
"src/llama.cpp",
|
||||
"src/unicode.cpp",
|
||||
"src/unicode-data.cpp",
|
||||
"ggml/src/ggml.c",
|
||||
"ggml/src/ggml-alloc.c",
|
||||
"ggml/src/ggml-backend.c",
|
||||
"ggml/src/ggml-quants.c",
|
||||
]
|
||||
|
||||
var resources: [Resource] = []
|
||||
var linkerSettings: [LinkerSetting] = []
|
||||
var cSettings: [CSetting] = [
|
||||
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
||||
.unsafeFlags(["-fno-objc-arc"]),
|
||||
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
||||
// We should consider add this in the future when we drop support for iOS 14
|
||||
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
||||
// .define("ACCELERATE_NEW_LAPACK"),
|
||||
// .define("ACCELERATE_LAPACK_ILP64")
|
||||
]
|
||||
|
||||
#if canImport(Darwin)
|
||||
sources.append("ggml/src/ggml-metal.m")
|
||||
resources.append(.process("ggml/src/ggml-metal.metal"))
|
||||
linkerSettings.append(.linkedFramework("Accelerate"))
|
||||
cSettings.append(
|
||||
contentsOf: [
|
||||
.define("GGML_USE_ACCELERATE"),
|
||||
.define("GGML_USE_METAL")
|
||||
]
|
||||
)
|
||||
#endif
|
||||
|
||||
#if os(Linux)
|
||||
cSettings.append(.define("_GNU_SOURCE"))
|
||||
#endif
|
||||
|
||||
let package = Package(
|
||||
name: "llama",
|
||||
platforms: [
|
||||
|
@ -52,24 +14,6 @@ let package = Package(
|
|||
.library(name: "llama", targets: ["llama"]),
|
||||
],
|
||||
targets: [
|
||||
.target(
|
||||
name: "llama",
|
||||
path: ".",
|
||||
exclude: [
|
||||
"cmake",
|
||||
"examples",
|
||||
"scripts",
|
||||
"models",
|
||||
"tests",
|
||||
"CMakeLists.txt",
|
||||
"Makefile"
|
||||
],
|
||||
sources: sources,
|
||||
resources: resources,
|
||||
publicHeadersPath: "spm-headers",
|
||||
cSettings: cSettings,
|
||||
linkerSettings: linkerSettings
|
||||
)
|
||||
],
|
||||
cxxLanguageStandard: .cxx11
|
||||
.systemLibrary(name: "llama", pkgConfig: "llama"),
|
||||
]
|
||||
)
|
||||
|
|
4
Sources/llama/llama.h
Normal file
4
Sources/llama/llama.h
Normal file
|
@ -0,0 +1,4 @@
|
|||
#pragma once
|
||||
|
||||
#include <llama.h>
|
||||
|
5
Sources/llama/module.modulemap
Normal file
5
Sources/llama/module.modulemap
Normal file
|
@ -0,0 +1,5 @@
|
|||
module llama [system] {
|
||||
header "llama.h"
|
||||
link "llama"
|
||||
export *
|
||||
}
|
301
ci/run.sh
301
ci/run.sh
|
@ -1,4 +1,4 @@
|
|||
#/bin/bash
|
||||
#!/bin/bash
|
||||
#
|
||||
# sample usage:
|
||||
#
|
||||
|
@ -13,6 +13,9 @@
|
|||
# # with SYCL support
|
||||
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
#
|
||||
# # with VULKAN support
|
||||
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
#
|
||||
|
||||
if [ -z "$2" ]; then
|
||||
echo "usage: $0 <output-dir> <mnt-dir>"
|
||||
|
@ -36,11 +39,11 @@ SRC=`pwd`
|
|||
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON"
|
||||
|
||||
if [ ! -z ${GG_BUILD_METAL} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=1"
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||
|
@ -50,7 +53,11 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_VULKAN} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
|
||||
fi
|
||||
## helpers
|
||||
|
||||
|
@ -103,8 +110,11 @@ function gg_run_ctest_debug {
|
|||
|
||||
set -e
|
||||
|
||||
# Check cmake, make and ctest are installed
|
||||
gg_check_build_requirements
|
||||
|
||||
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
|
||||
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
||||
|
||||
|
@ -131,8 +141,11 @@ function gg_run_ctest_release {
|
|||
|
||||
set -e
|
||||
|
||||
# Check cmake, make and ctest are installed
|
||||
gg_check_build_requirements
|
||||
|
||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
|
||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
||||
|
@ -260,7 +273,6 @@ function gg_sum_ctest_with_model_release {
|
|||
}
|
||||
|
||||
# open_llama_7b_v2
|
||||
# requires: GG_BUILD_CUDA
|
||||
|
||||
function gg_run_open_llama_7b_v2 {
|
||||
cd ${SRC}
|
||||
|
@ -284,10 +296,10 @@ function gg_run_open_llama_7b_v2 {
|
|||
|
||||
set -e
|
||||
|
||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
|
||||
python3 ../examples/convert-legacy-llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||
python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||
|
||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||
|
@ -314,36 +326,36 @@ function gg_run_open_llama_7b_v2 {
|
|||
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||
|
||||
(time ./bin/llama-cli --model ${model_f16} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-cli --model ${model_q8_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-cli --model ${model_q4_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-cli --model ${model_q4_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-cli --model ${model_q5_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-cli --model ${model_q5_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-cli --model ${model_q2_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-cli --model ${model_q3_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-cli --model ${model_q4_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-cli --model ${model_q5_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-cli --model ${model_q6_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
|
||||
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
|
||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||
|
||||
(time ./bin/llama-save-load-state -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state -fa -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state -fa -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
|
||||
function check_ppl {
|
||||
qnt="$1"
|
||||
|
@ -419,9 +431,9 @@ function gg_run_pythia_1_4b {
|
|||
set -e
|
||||
|
||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
|
||||
python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||
|
||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||
|
@ -448,34 +460,34 @@ function gg_run_pythia_1_4b {
|
|||
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||
|
||||
(time ./bin/llama-cli --model ${model_f16} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-cli --model ${model_q8_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-cli --model ${model_q4_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-cli --model ${model_q4_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-cli --model ${model_q5_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-cli --model ${model_q5_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-cli --model ${model_q2_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-cli --model ${model_q3_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-cli --model ${model_q4_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-cli --model ${model_q5_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-cli --model ${model_q6_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
|
||||
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
|
||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state -fa --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
|
||||
function check_ppl {
|
||||
qnt="$1"
|
||||
|
@ -529,7 +541,6 @@ function gg_sum_pythia_1_4b {
|
|||
}
|
||||
|
||||
# pythia_2_8b
|
||||
# requires: GG_BUILD_CUDA
|
||||
|
||||
function gg_run_pythia_2_8b {
|
||||
cd ${SRC}
|
||||
|
@ -550,10 +561,10 @@ function gg_run_pythia_2_8b {
|
|||
|
||||
set -e
|
||||
|
||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
|
||||
python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||
|
||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||
|
@ -580,36 +591,36 @@ function gg_run_pythia_2_8b {
|
|||
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||
|
||||
(time ./bin/llama-cli --model ${model_f16} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-cli --model ${model_q8_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-cli --model ${model_q4_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-cli --model ${model_q4_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-cli --model ${model_q5_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-cli --model ${model_q5_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-cli --model ${model_q2_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-cli --model ${model_q3_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-cli --model ${model_q4_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-cli --model ${model_q5_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-cli --model ${model_q6_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
|
||||
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
|
||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||
|
||||
(time ./bin/llama-save-load-state -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state -fa -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state -fa -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
|
||||
function check_ppl {
|
||||
qnt="$1"
|
||||
|
@ -686,17 +697,17 @@ function gg_run_embd_bge_small {
|
|||
set -e
|
||||
|
||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
|
||||
python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||
|
||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||
|
||||
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||
|
||||
(time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
|
||||
set +e
|
||||
}
|
||||
|
@ -710,8 +721,92 @@ function gg_sum_embd_bge_small {
|
|||
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
||||
}
|
||||
|
||||
# rerank_tiny
|
||||
|
||||
function gg_run_rerank_tiny {
|
||||
cd ${SRC}
|
||||
|
||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
|
||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer.json
|
||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer_config.json
|
||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/special_tokens_map.json
|
||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/resolve/main/pytorch_model.bin
|
||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/sentence_bert_config.json
|
||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/vocab.txt
|
||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/modules.json
|
||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
|
||||
|
||||
gg_wget models-mnt/rerank-tiny/1_Pooling https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/1_Pooling/config.json
|
||||
|
||||
path_models="../models-mnt/rerank-tiny"
|
||||
|
||||
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
||||
|
||||
set -e
|
||||
|
||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
|
||||
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||
|
||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||
|
||||
# for this model, the SEP token is "</s>"
|
||||
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
|
||||
|
||||
# sample output
|
||||
# rerank score 0: 0.029
|
||||
# rerank score 1: 0.029
|
||||
# rerank score 2: 0.135
|
||||
|
||||
# check that the score is in the range [$3, $4]
|
||||
function check_score {
|
||||
qnt="$1"
|
||||
score=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
||||
|
||||
if [ $(echo "$score < $3" | bc) -eq 1 ] || [ $(echo "$score > $4" | bc) -eq 1 ]; then
|
||||
printf ' - %s @ %s (FAIL: score not in range [%s, %s])\n' "$qnt" "$score" "$3" "$4"
|
||||
return 20
|
||||
fi
|
||||
|
||||
printf ' - %s @ %s OK\n' "$qnt" "$score"
|
||||
return 0
|
||||
}
|
||||
|
||||
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
||||
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
||||
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
|
||||
|
||||
set +e
|
||||
}
|
||||
|
||||
function gg_sum_rerank_tiny {
|
||||
gg_printf '### %s\n\n' "${ci}"
|
||||
|
||||
gg_printf 'Rerank Tiny (Jina):\n'
|
||||
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-rk-f16.log)"
|
||||
}
|
||||
|
||||
function gg_check_build_requirements {
|
||||
if ! command -v cmake &> /dev/null; then
|
||||
gg_printf 'cmake not found, please install'
|
||||
fi
|
||||
|
||||
if ! command -v make &> /dev/null; then
|
||||
gg_printf 'make not found, please install'
|
||||
fi
|
||||
|
||||
if ! command -v ctest &> /dev/null; then
|
||||
gg_printf 'ctest not found, please install'
|
||||
fi
|
||||
}
|
||||
|
||||
## main
|
||||
|
||||
export LLAMA_LOG_PREFIX=1
|
||||
export LLAMA_LOG_TIMESTAMPS=1
|
||||
|
||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
|
||||
rm -rf ${SRC}/models-mnt
|
||||
|
@ -720,7 +815,10 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
|||
ln -sfn ${mnt_models} ${SRC}/models-mnt
|
||||
|
||||
# Create a fresh python3 venv and enter it
|
||||
python3 -m venv "$MNT/venv"
|
||||
if ! python3 -m venv "$MNT/venv"; then
|
||||
echo "Error: Failed to create Python virtual environment at $MNT/venv."
|
||||
exit 1
|
||||
fi
|
||||
source "$MNT/venv/bin/activate"
|
||||
|
||||
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
|
||||
|
@ -734,6 +832,7 @@ test $ret -eq 0 && gg_run ctest_release
|
|||
|
||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||
test $ret -eq 0 && gg_run embd_bge_small
|
||||
test $ret -eq 0 && gg_run rerank_tiny
|
||||
|
||||
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
|
||||
test $ret -eq 0 && gg_run test_scripts_debug
|
||||
|
@ -741,7 +840,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
|||
fi
|
||||
|
||||
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
||||
if [ -z ${GG_BUILD_CUDA} ]; then
|
||||
if [ -z ${GG_BUILD_CUDA} ] && [ -z ${GG_BUILD_VULKAN} ]; then
|
||||
test $ret -eq 0 && gg_run pythia_1_4b
|
||||
else
|
||||
test $ret -eq 0 && gg_run pythia_2_8b
|
||||
|
|
16
cmake/arm64-apple-clang.cmake
Normal file
16
cmake/arm64-apple-clang.cmake
Normal file
|
@ -0,0 +1,16 @@
|
|||
set( CMAKE_SYSTEM_NAME Darwin )
|
||||
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
||||
|
||||
set( target arm64-apple-darwin-macho )
|
||||
|
||||
set( CMAKE_C_COMPILER clang )
|
||||
set( CMAKE_CXX_COMPILER clang++ )
|
||||
|
||||
set( CMAKE_C_COMPILER_TARGET ${target} )
|
||||
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
||||
|
||||
set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
|
||||
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
|
||||
|
||||
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
||||
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
|
@ -44,7 +44,7 @@ if(MSVC)
|
|||
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
||||
else()
|
||||
execute_process(
|
||||
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
|
||||
COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
|
||||
OUTPUT_VARIABLE OUT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
|
|
33
cmake/common.cmake
Normal file
33
cmake/common.cmake
Normal file
|
@ -0,0 +1,33 @@
|
|||
function(llama_add_compile_flags)
|
||||
if (LLAMA_FATAL_WARNINGS)
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
list(APPEND C_FLAGS -Werror)
|
||||
list(APPEND CXX_FLAGS -Werror)
|
||||
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
add_compile_options(/WX)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (LLAMA_ALL_WARNINGS)
|
||||
if (NOT MSVC)
|
||||
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
||||
-Werror=implicit-int -Werror=implicit-function-declaration)
|
||||
|
||||
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
||||
|
||||
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||
|
||||
list(APPEND C_FLAGS ${WARNING_FLAGS})
|
||||
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
|
||||
|
||||
ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
||||
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
|
||||
else()
|
||||
# todo : msvc
|
||||
set(C_FLAGS "" PARENT_SCOPE)
|
||||
set(CXX_FLAGS "" PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
|
@ -3,63 +3,28 @@ set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
|||
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
||||
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
||||
|
||||
set(GGML_BLAS @GGML_BLAS@)
|
||||
set(GGML_CUDA @GGML_CUDA@)
|
||||
set(GGML_METAL @GGML_METAL@)
|
||||
set(GGML_HIPBLAS @GGML_HIPBLAS@)
|
||||
set(GGML_ACCELERATE @GGML_ACCELERATE@)
|
||||
|
||||
@PACKAGE_INIT@
|
||||
|
||||
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
||||
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
||||
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
||||
|
||||
# Ensure transient dependencies satisfied
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
if (APPLE AND GGML_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
|
||||
endif()
|
||||
|
||||
if (GGML_BLAS)
|
||||
find_package(BLAS REQUIRED)
|
||||
endif()
|
||||
|
||||
if (GGML_CUDA)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
endif()
|
||||
|
||||
if (GGML_METAL)
|
||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||
endif()
|
||||
|
||||
if (GGML_HIPBLAS)
|
||||
find_package(hip REQUIRED)
|
||||
find_package(hipblas REQUIRED)
|
||||
find_package(rocblas REQUIRED)
|
||||
endif()
|
||||
find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake)
|
||||
|
||||
find_library(llama_LIBRARY llama
|
||||
REQUIRED
|
||||
HINTS ${LLAMA_LIB_DIR})
|
||||
|
||||
set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@")
|
||||
set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@")
|
||||
HINTS ${LLAMA_LIB_DIR}
|
||||
NO_CMAKE_FIND_ROOT_PATH
|
||||
)
|
||||
|
||||
add_library(llama UNKNOWN IMPORTED)
|
||||
|
||||
set_target_properties(llama
|
||||
PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
||||
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
|
||||
INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
|
||||
INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||
IMPORTED_LOCATION "${llama_LIBRARY}"
|
||||
INTERFACE_COMPILE_FEATURES cxx_std_11
|
||||
POSITION_INDEPENDENT_CODE ON )
|
||||
INTERFACE_COMPILE_FEATURES c_std_90
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
check_required_components(Llama)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
prefix=@CMAKE_INSTALL_PREFIX@
|
||||
exec_prefix=${prefix}
|
||||
libdir=${exec_prefix}/lib
|
||||
includedir=${prefix}/include
|
||||
exec_prefix=@CMAKE_INSTALL_PREFIX@
|
||||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
|
||||
Name: llama
|
||||
Description: Port of Facebook's LLaMA model in C/C++
|
||||
Version: @PROJECT_VERSION@
|
||||
Libs: -L${libdir} -lllama
|
||||
Version: @LLAMA_INSTALL_VERSION@
|
||||
Libs: -L${libdir} -lggml -lggml-base -lllama
|
||||
Cflags: -I${includedir}
|
||||
|
|
11
cmake/x64-windows-llvm.cmake
Normal file
11
cmake/x64-windows-llvm.cmake
Normal file
|
@ -0,0 +1,11 @@
|
|||
set( CMAKE_SYSTEM_NAME Windows )
|
||||
set( CMAKE_SYSTEM_PROCESSOR x86_64 )
|
||||
|
||||
set( CMAKE_C_COMPILER clang )
|
||||
set( CMAKE_CXX_COMPILER clang++ )
|
||||
|
||||
set( arch_c_flags "-march=native" )
|
||||
|
||||
set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" )
|
||||
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )
|
||||
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
llama_add_compile_flags()
|
||||
|
||||
# Build info header
|
||||
#
|
||||
|
||||
|
@ -51,21 +53,28 @@ endif()
|
|||
set(TARGET common)
|
||||
|
||||
add_library(${TARGET} STATIC
|
||||
arg.cpp
|
||||
arg.h
|
||||
base64.hpp
|
||||
common.h
|
||||
chat.cpp
|
||||
chat.hpp
|
||||
chat-template.hpp
|
||||
common.cpp
|
||||
sampling.h
|
||||
sampling.cpp
|
||||
console.h
|
||||
common.h
|
||||
console.cpp
|
||||
grammar-parser.h
|
||||
grammar-parser.cpp
|
||||
json.hpp
|
||||
console.h
|
||||
json-schema-to-grammar.cpp
|
||||
train.h
|
||||
train.cpp
|
||||
ngram-cache.h
|
||||
json.hpp
|
||||
llguidance.cpp
|
||||
log.cpp
|
||||
log.h
|
||||
minja.hpp
|
||||
ngram-cache.cpp
|
||||
ngram-cache.h
|
||||
sampling.cpp
|
||||
sampling.h
|
||||
speculative.cpp
|
||||
speculative.h
|
||||
)
|
||||
|
||||
if (BUILD_SHARED_LIBS)
|
||||
|
@ -77,12 +86,39 @@ set(LLAMA_COMMON_EXTRA_LIBS build_info)
|
|||
# Use curl to download model url
|
||||
if (LLAMA_CURL)
|
||||
find_package(CURL REQUIRED)
|
||||
add_definitions(-DLLAMA_USE_CURL)
|
||||
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
||||
include_directories(${CURL_INCLUDE_DIRS})
|
||||
find_library(CURL_LIBRARY curl REQUIRED)
|
||||
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
|
||||
endif ()
|
||||
|
||||
if (LLAMA_LLGUIDANCE)
|
||||
include(ExternalProject)
|
||||
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
|
||||
set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)
|
||||
ExternalProject_Add(llguidance_ext
|
||||
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
|
||||
# v0.6.12:
|
||||
GIT_TAG ced1c9023d47ec194fa977932d35ce65c2ebfc09
|
||||
PREFIX ${CMAKE_BINARY_DIR}/llguidance
|
||||
SOURCE_DIR ${LLGUIDANCE_SRC}
|
||||
BUILD_IN_SOURCE TRUE
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND cargo build --release
|
||||
INSTALL_COMMAND ""
|
||||
BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/libllguidance.a ${LLGUIDANCE_PATH}/llguidance.h
|
||||
UPDATE_COMMAND ""
|
||||
)
|
||||
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE)
|
||||
|
||||
add_library(llguidance STATIC IMPORTED)
|
||||
set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/libllguidance.a)
|
||||
add_dependencies(llguidance llguidance_ext)
|
||||
|
||||
target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
|
||||
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance)
|
||||
endif ()
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC .)
|
||||
target_compile_features (${TARGET} PUBLIC cxx_std_11)
|
||||
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
||||
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
||||
|
|
2370
common/arg.cpp
Normal file
2370
common/arg.cpp
Normal file
File diff suppressed because it is too large
Load diff
80
common/arg.h
Normal file
80
common/arg.h
Normal file
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
//
|
||||
// CLI argument parsing
|
||||
//
|
||||
|
||||
struct common_arg {
|
||||
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
||||
std::set<enum llama_example> excludes = {};
|
||||
std::vector<const char *> args;
|
||||
const char * value_hint = nullptr; // help text or example for arg value
|
||||
const char * value_hint_2 = nullptr; // for second arg value
|
||||
const char * env = nullptr;
|
||||
std::string help;
|
||||
bool is_sparam = false; // is current arg a sampling param?
|
||||
void (*handler_void) (common_params & params) = nullptr;
|
||||
void (*handler_string) (common_params & params, const std::string &) = nullptr;
|
||||
void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr;
|
||||
void (*handler_int) (common_params & params, int) = nullptr;
|
||||
|
||||
common_arg(
|
||||
const std::initializer_list<const char *> & args,
|
||||
const char * value_hint,
|
||||
const std::string & help,
|
||||
void (*handler)(common_params & params, const std::string &)
|
||||
) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
|
||||
|
||||
common_arg(
|
||||
const std::initializer_list<const char *> & args,
|
||||
const char * value_hint,
|
||||
const std::string & help,
|
||||
void (*handler)(common_params & params, int)
|
||||
) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
|
||||
|
||||
common_arg(
|
||||
const std::initializer_list<const char *> & args,
|
||||
const std::string & help,
|
||||
void (*handler)(common_params & params)
|
||||
) : args(args), help(help), handler_void(handler) {}
|
||||
|
||||
// support 2 values for arg
|
||||
common_arg(
|
||||
const std::initializer_list<const char *> & args,
|
||||
const char * value_hint,
|
||||
const char * value_hint_2,
|
||||
const std::string & help,
|
||||
void (*handler)(common_params & params, const std::string &, const std::string &)
|
||||
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
|
||||
|
||||
common_arg & set_examples(std::initializer_list<enum llama_example> examples);
|
||||
common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
|
||||
common_arg & set_env(const char * env);
|
||||
common_arg & set_sparam();
|
||||
bool in_example(enum llama_example ex);
|
||||
bool is_exclude(enum llama_example ex);
|
||||
bool get_value_from_env(std::string & output);
|
||||
bool has_value_from_env();
|
||||
std::string to_string();
|
||||
};
|
||||
|
||||
struct common_params_context {
|
||||
enum llama_example ex = LLAMA_EXAMPLE_COMMON;
|
||||
common_params & params;
|
||||
std::vector<common_arg> options;
|
||||
void(*print_usage)(int, char **) = nullptr;
|
||||
common_params_context(common_params & params) : params(params) {}
|
||||
};
|
||||
|
||||
// parse input arguments from CLI
|
||||
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
||||
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
||||
|
||||
// function to be used by test-arg-parser
|
||||
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
529
common/chat-template.hpp
Normal file
529
common/chat-template.hpp
Normal file
|
@ -0,0 +1,529 @@
|
|||
/*
|
||||
Copyright 2024 Google LLC
|
||||
|
||||
Use of this source code is governed by an MIT-style
|
||||
license that can be found in the LICENSE file or at
|
||||
https://opensource.org/licenses/MIT.
|
||||
*/
|
||||
// SPDX-License-Identifier: MIT
|
||||
#pragma once
|
||||
|
||||
#include "minja.hpp"
|
||||
#include <json.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
namespace minja {
|
||||
|
||||
struct chat_template_caps {
|
||||
bool supports_tools = false;
|
||||
bool supports_tool_calls = false;
|
||||
bool supports_tool_responses = false;
|
||||
bool supports_system_role = false;
|
||||
bool supports_parallel_tool_calls = false;
|
||||
bool supports_tool_call_id = false;
|
||||
// meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object.
|
||||
// Most other templates (and OpenAI's API) expect the arguments object to be stringified.
|
||||
bool requires_object_arguments = false;
|
||||
// CohereForAI/c4ai-command-r-plus simple variant
|
||||
bool requires_non_null_content = false;
|
||||
// MiniMaxAI/MiniMax-Text-01 special
|
||||
bool requires_typed_content = false;
|
||||
};
|
||||
|
||||
struct chat_template_inputs {
|
||||
nlohmann::ordered_json messages;
|
||||
nlohmann::ordered_json tools;
|
||||
bool add_generation_prompt = true;
|
||||
nlohmann::ordered_json extra_context;
|
||||
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
||||
};
|
||||
|
||||
struct chat_template_options {
|
||||
bool apply_polyfills = true;
|
||||
bool use_bos_token = true;
|
||||
bool use_eos_token = true;
|
||||
bool define_strftime_now = true;
|
||||
|
||||
bool polyfill_tools = true;
|
||||
bool polyfill_tool_call_examples = true;
|
||||
bool polyfill_tool_calls = true;
|
||||
bool polyfill_tool_responses = true;
|
||||
bool polyfill_system_role = true;
|
||||
bool polyfill_object_arguments = true;
|
||||
bool polyfill_typed_content = true;
|
||||
};
|
||||
|
||||
class chat_template {
|
||||
|
||||
private:
|
||||
chat_template_caps caps_;
|
||||
std::string source_;
|
||||
std::string bos_token_;
|
||||
std::string eos_token_;
|
||||
std::shared_ptr<minja::TemplateNode> template_root_;
|
||||
std::string tool_call_example_;
|
||||
|
||||
std::string try_raw_render(
|
||||
const nlohmann::ordered_json & messages,
|
||||
const nlohmann::ordered_json & tools,
|
||||
bool add_generation_prompt,
|
||||
const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
|
||||
{
|
||||
try {
|
||||
chat_template_inputs inputs;
|
||||
inputs.messages = messages;
|
||||
inputs.tools = tools;
|
||||
inputs.add_generation_prompt = add_generation_prompt;
|
||||
inputs.extra_context = extra_context;
|
||||
// Use fixed date for tests
|
||||
inputs.now = std::chrono::system_clock::from_time_t(0);
|
||||
|
||||
chat_template_options opts;
|
||||
opts.apply_polyfills = false;
|
||||
|
||||
auto prompt = apply(inputs, opts);
|
||||
// fprintf(stderr, "try_raw_render: %s\n", prompt.c_str());
|
||||
return prompt;
|
||||
} catch (const std::exception & e) {
|
||||
// fprintf(stderr, "try_raw_render error: %s\n", e.what());
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token)
|
||||
: source_(source), bos_token_(bos_token), eos_token_(eos_token)
|
||||
{
|
||||
template_root_ = minja::Parser::parse(source_, {
|
||||
/* .trim_blocks = */ true,
|
||||
/* .lstrip_blocks = */ true,
|
||||
/* .keep_trailing_newline = */ false,
|
||||
});
|
||||
|
||||
auto contains = [](const std::string & haystack, const std::string & needle) {
|
||||
return haystack.find(needle) != std::string::npos;
|
||||
};
|
||||
|
||||
const std::string user_needle = "<User Needle>";
|
||||
const std::string sys_needle = "<System Needle>";
|
||||
const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}};
|
||||
const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}};
|
||||
|
||||
caps_.requires_typed_content =
|
||||
!contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle)
|
||||
&& contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle);
|
||||
|
||||
const auto dummy_user_msg = caps_.requires_typed_content
|
||||
? dummy_typed_user_msg
|
||||
: dummy_str_user_msg;
|
||||
const json needle_system_msg = {
|
||||
{"role", "system"},
|
||||
{"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)},
|
||||
};
|
||||
|
||||
caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle);
|
||||
|
||||
auto out = try_raw_render(json::array({
|
||||
dummy_user_msg
|
||||
}), json::array({
|
||||
{
|
||||
{"name", "some_tool"},
|
||||
{"type", "function"},
|
||||
{"function", {
|
||||
{"name", "some_tool"},
|
||||
{"description", "Some tool."},
|
||||
{"parameters", {
|
||||
{"type", "object"},
|
||||
{"properties", {
|
||||
{"arg", {
|
||||
{"type", "string"},
|
||||
{"description", "Some argument."},
|
||||
}},
|
||||
}},
|
||||
{"required", json::array({ "arg" })},
|
||||
}},
|
||||
}},
|
||||
},
|
||||
}), false);
|
||||
caps_.supports_tools = contains(out, "some_tool");
|
||||
|
||||
auto make_tool_calls_msg = [&](const json & tool_calls) {
|
||||
return json {
|
||||
{"role", "assistant"},
|
||||
{"content", nullptr},
|
||||
{"tool_calls", tool_calls},
|
||||
};
|
||||
};
|
||||
auto make_tool_call = [](const std::string & tool_name, const json & arguments) {
|
||||
return json {
|
||||
{"id", "call_1___"},
|
||||
{"type", "function"},
|
||||
{"function", {
|
||||
{"arguments", arguments},
|
||||
{"name", tool_name},
|
||||
}},
|
||||
};
|
||||
};
|
||||
const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}};
|
||||
|
||||
// Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want.
|
||||
out = try_raw_render(json::array({
|
||||
dummy_user_msg,
|
||||
make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})),
|
||||
}), {}, false);
|
||||
auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
|
||||
out = try_raw_render(json::array({
|
||||
dummy_user_msg,
|
||||
make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})),
|
||||
}), {}, false);
|
||||
auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
|
||||
|
||||
caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments;
|
||||
caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments;
|
||||
auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false);
|
||||
auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false);
|
||||
caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle);
|
||||
|
||||
if (caps_.supports_tool_calls) {
|
||||
auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump());
|
||||
auto tc1 = make_tool_call("test_tool1", dummy_args);
|
||||
auto tc2 = make_tool_call("test_tool2", dummy_args);
|
||||
auto out = try_raw_render(json::array({
|
||||
dummy_user_msg,
|
||||
make_tool_calls_msg(json::array({tc1, tc2})),
|
||||
}), {}, false);
|
||||
caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2");
|
||||
|
||||
out = try_raw_render(json::array({
|
||||
dummy_user_msg,
|
||||
make_tool_calls_msg(json::array({tc1})),
|
||||
{
|
||||
{"role", "tool"},
|
||||
{"name", "test_tool1"},
|
||||
{"content", "Some response!"},
|
||||
{"tool_call_id", "call_911_"},
|
||||
}
|
||||
}), {}, false);
|
||||
caps_.supports_tool_responses = contains(out, "Some response!");
|
||||
caps_.supports_tool_call_id = contains(out, "call_911_");
|
||||
}
|
||||
|
||||
try {
|
||||
if (!caps_.supports_tools) {
|
||||
const json user_msg {
|
||||
{"role", "user"},
|
||||
{"content", "Hey"},
|
||||
};
|
||||
const json args {
|
||||
{"arg1", "some_value"},
|
||||
};
|
||||
const json tool_call_msg {
|
||||
{"role", "assistant"},
|
||||
{"content", nullptr},
|
||||
{"tool_calls", json::array({
|
||||
{
|
||||
// TODO: detect if requires numerical id or fixed length == 6 like Nemo
|
||||
{"id", "call_1___"},
|
||||
{"type", "function"},
|
||||
{"function", {
|
||||
{"name", "tool_name"},
|
||||
{"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))},
|
||||
}},
|
||||
},
|
||||
})},
|
||||
};
|
||||
std::string prefix, full;
|
||||
{
|
||||
chat_template_inputs inputs;
|
||||
inputs.messages = json::array({user_msg});
|
||||
inputs.add_generation_prompt = true;
|
||||
prefix = apply(inputs);
|
||||
}
|
||||
{
|
||||
chat_template_inputs inputs;
|
||||
inputs.messages = json::array({user_msg, tool_call_msg});
|
||||
inputs.add_generation_prompt = false;
|
||||
full = apply(inputs);
|
||||
}
|
||||
auto eos_pos_last = full.rfind(eos_token_);
|
||||
if (eos_pos_last == prefix.size() - eos_token_.size() ||
|
||||
(full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
|
||||
full = full.substr(0, eos_pos_last);
|
||||
}
|
||||
size_t common_prefix_length = 0;
|
||||
for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
|
||||
if (prefix[i] != full[i]) {
|
||||
break;
|
||||
}
|
||||
if (prefix[i] == '<') {
|
||||
// DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
|
||||
// but it removes thinking tags for past messages.
|
||||
// The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
|
||||
continue;
|
||||
}
|
||||
common_prefix_length = i + 1;
|
||||
}
|
||||
auto example = full.substr(common_prefix_length);
|
||||
if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
|
||||
fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
|
||||
} else {
|
||||
tool_call_example_ = example;
|
||||
}
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
const std::string & source() const { return source_; }
|
||||
const std::string & bos_token() const { return bos_token_; }
|
||||
const std::string & eos_token() const { return eos_token_; }
|
||||
const chat_template_caps & original_caps() const { return caps_; }
|
||||
|
||||
// Deprecated, please use the form with chat_template_inputs and chat_template_options
|
||||
std::string apply(
|
||||
const nlohmann::ordered_json & messages,
|
||||
const nlohmann::ordered_json & tools,
|
||||
bool add_generation_prompt,
|
||||
const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(),
|
||||
bool apply_polyfills = true)
|
||||
{
|
||||
fprintf(stderr, "[%s] Deprecated!\n", __func__);
|
||||
chat_template_inputs inputs;
|
||||
inputs.messages = messages;
|
||||
inputs.tools = tools;
|
||||
inputs.add_generation_prompt = add_generation_prompt;
|
||||
inputs.extra_context = extra_context;
|
||||
inputs.now = std::chrono::system_clock::now();
|
||||
|
||||
chat_template_options opts;
|
||||
opts.apply_polyfills = apply_polyfills;
|
||||
|
||||
return apply(inputs, opts);
|
||||
}
|
||||
|
||||
std::string apply(
|
||||
const chat_template_inputs & inputs,
|
||||
const chat_template_options & opts = chat_template_options()) const
|
||||
{
|
||||
json actual_messages;
|
||||
|
||||
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||
auto has_tool_calls = false;
|
||||
auto has_tool_responses = false;
|
||||
auto has_string_content = false;
|
||||
for (const auto & message : inputs.messages) {
|
||||
if (message.contains("tool_calls") && !message["tool_calls"].is_null()) {
|
||||
has_tool_calls = true;
|
||||
}
|
||||
if (message.contains("role") && message["role"] == "tool") {
|
||||
has_tool_responses = true;
|
||||
}
|
||||
if (message.contains("content") && message["content"].is_string()) {
|
||||
has_string_content = true;
|
||||
}
|
||||
}
|
||||
|
||||
auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role;
|
||||
auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools;
|
||||
auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples;
|
||||
auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls;
|
||||
auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses;
|
||||
auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments;
|
||||
auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content;
|
||||
|
||||
auto needs_polyfills = opts.apply_polyfills && (false
|
||||
|| polyfill_system_role
|
||||
|| polyfill_tools
|
||||
|| polyfill_tool_calls
|
||||
|| polyfill_tool_responses
|
||||
|| polyfill_object_arguments
|
||||
|| polyfill_typed_content
|
||||
);
|
||||
|
||||
if (needs_polyfills) {
|
||||
actual_messages = json::array();
|
||||
|
||||
auto add_message = [&](const json & msg) {
|
||||
if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) {
|
||||
actual_messages.push_back({
|
||||
{"role", msg.at("role")},
|
||||
{"content", {{
|
||||
{"type", "text"},
|
||||
{"text", msg.at("content")},
|
||||
}}},
|
||||
});
|
||||
} else {
|
||||
actual_messages.push_back(msg);
|
||||
}
|
||||
};
|
||||
|
||||
std::string pending_system;
|
||||
auto flush_sys = [&]() {
|
||||
if (!pending_system.empty()) {
|
||||
add_message({
|
||||
{"role", "user"},
|
||||
{"content", pending_system},
|
||||
});
|
||||
pending_system.clear();
|
||||
}
|
||||
};
|
||||
|
||||
json adjusted_messages;
|
||||
if (polyfill_tools) {
|
||||
adjusted_messages = add_system(inputs.messages,
|
||||
"You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
|
||||
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
|
||||
} else {
|
||||
adjusted_messages = inputs.messages;
|
||||
}
|
||||
|
||||
for (const auto & message_ : adjusted_messages) {
|
||||
auto message = message_;
|
||||
if (!message.contains("role") || !message.contains("content")) {
|
||||
throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
|
||||
}
|
||||
std::string role = message.at("role");
|
||||
|
||||
if (message.contains("tool_calls")) {
|
||||
if (polyfill_object_arguments || polyfill_tool_calls) {
|
||||
for (auto & tool_call : message.at("tool_calls")) {
|
||||
if (tool_call["type"] == "function") {
|
||||
auto & function = tool_call.at("function");
|
||||
auto & arguments = function.at("arguments");
|
||||
if (arguments.is_string()) {
|
||||
try {
|
||||
arguments = json::parse(arguments.get<std::string>());
|
||||
} catch (const std::exception & ecvt) {
|
||||
fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (polyfill_tool_calls) {
|
||||
auto content = message.at("content");
|
||||
auto tool_calls = json::array();
|
||||
for (const auto & tool_call : message.at("tool_calls")) {
|
||||
if (tool_call.at("type") != "function") {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_call.at("function");
|
||||
auto tc = json {
|
||||
{"name", function.at("name")},
|
||||
{"arguments", function.at("arguments")},
|
||||
};
|
||||
if (tool_call.contains("id")) {
|
||||
tc["id"] = tool_call["id"];
|
||||
}
|
||||
tool_calls.push_back(tc);
|
||||
}
|
||||
auto obj = json {
|
||||
{"tool_calls", tool_calls},
|
||||
};
|
||||
if (!content.is_null() && content != "") {
|
||||
obj["content"] = content;
|
||||
}
|
||||
message["content"] = obj.dump(2);
|
||||
message.erase("tool_calls");
|
||||
}
|
||||
}
|
||||
if (polyfill_tool_responses && role == "tool") {
|
||||
message["role"] = "user";
|
||||
auto obj = json {
|
||||
{"tool_response", {
|
||||
{"content", message.at("content")},
|
||||
}},
|
||||
};
|
||||
if (message.contains("name")) {
|
||||
obj["tool_response"]["name"] = message.at("name");
|
||||
}
|
||||
if (message.contains("tool_call_id")) {
|
||||
obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
|
||||
}
|
||||
message["content"] = obj.dump(2);
|
||||
message.erase("name");
|
||||
}
|
||||
|
||||
if (!message["content"].is_null() && polyfill_system_role) {
|
||||
std::string content = message.at("content");
|
||||
if (role == "system") {
|
||||
if (!pending_system.empty()) pending_system += "\n";
|
||||
pending_system += content;
|
||||
continue;
|
||||
} else {
|
||||
if (role == "user") {
|
||||
if (!pending_system.empty()) {
|
||||
message["content"] = pending_system + (content.empty() ? "" : "\n" + content);
|
||||
pending_system.clear();
|
||||
}
|
||||
} else {
|
||||
flush_sys();
|
||||
}
|
||||
}
|
||||
}
|
||||
add_message(message);
|
||||
}
|
||||
flush_sys();
|
||||
} else {
|
||||
actual_messages = inputs.messages;
|
||||
}
|
||||
|
||||
auto context = minja::Context::make(json({
|
||||
{"messages", actual_messages},
|
||||
{"add_generation_prompt", inputs.add_generation_prompt},
|
||||
}));
|
||||
context->set("bos_token", opts.use_bos_token ? bos_token_ : "");
|
||||
context->set("eos_token", opts.use_eos_token ? eos_token_ : "");
|
||||
if (opts.define_strftime_now) {
|
||||
auto now = inputs.now;
|
||||
context->set("strftime_now", Value::callable([now](const std::shared_ptr<minja::Context> &, minja::ArgumentsValue & args) {
|
||||
args.expectArgs("strftime_now", {1, 1}, {0, 0});
|
||||
auto format = args.args[0].get<std::string>();
|
||||
|
||||
auto time = std::chrono::system_clock::to_time_t(now);
|
||||
auto local_time = *std::localtime(&time);
|
||||
std::ostringstream ss;
|
||||
ss << std::put_time(&local_time, format.c_str());
|
||||
return ss.str();
|
||||
}));
|
||||
}
|
||||
if (!inputs.tools.is_null()) {
|
||||
context->set("tools", minja::Value(inputs.tools));
|
||||
}
|
||||
if (!inputs.extra_context.is_null()) {
|
||||
for (auto & kv : inputs.extra_context.items()) {
|
||||
context->set(kv.key(), minja::Value(kv.value()));
|
||||
}
|
||||
}
|
||||
|
||||
auto ret = template_root_->render(context);
|
||||
// fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str());
|
||||
// fprintf(stderr, "apply: %s\n\n", ret.c_str());
|
||||
return ret;
|
||||
}
|
||||
|
||||
static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
|
||||
json messages_with_system = messages;
|
||||
|
||||
if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
|
||||
std::string existing_system = messages_with_system.at(0).at("content");
|
||||
messages_with_system[0] = json {
|
||||
{"role", "system"},
|
||||
{"content", existing_system + "\n\n" + system_prompt},
|
||||
};
|
||||
} else {
|
||||
messages_with_system.insert(messages_with_system.begin(), json {
|
||||
{"role", "system"},
|
||||
{"content", system_prompt},
|
||||
});
|
||||
}
|
||||
return messages_with_system;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace minja
|
966
common/chat.cpp
Normal file
966
common/chat.cpp
Normal file
|
@ -0,0 +1,966 @@
|
|||
#include "chat.hpp"
|
||||
#include "chat-template.hpp"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "log.h"
|
||||
#include "minja.hpp"
|
||||
|
||||
std::string common_chat_format_name(common_chat_format format) {
|
||||
switch (format) {
|
||||
case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
|
||||
case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
|
||||
case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
|
||||
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
|
||||
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
|
||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
|
||||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
||||
default:
|
||||
throw std::runtime_error("Unknown chat format");
|
||||
}
|
||||
}
|
||||
|
||||
const common_grammar_options grammar_options {
|
||||
/* .dotall = */ false,
|
||||
/* .compact_spaces = */ false,
|
||||
// /* .compact_spaces = */ true,
|
||||
};
|
||||
|
||||
static bool parse_json(std::string::const_iterator & it, const std::string::const_iterator & end, json & out) {
|
||||
// // https://json.nlohmann.me/features/parsing/sax_interface/
|
||||
struct json_error_locator : public nlohmann::json_sax<json> {
|
||||
std::size_t position;
|
||||
bool found_error;
|
||||
|
||||
json_error_locator() : position(0), found_error(false) {}
|
||||
|
||||
bool parse_error(std::size_t position, const std::string &, const json::exception &) override {
|
||||
this->position = position - 1;
|
||||
this->found_error = true;
|
||||
return false;
|
||||
}
|
||||
bool null() override { return true; }
|
||||
bool boolean(bool) override { return true; }
|
||||
bool number_integer(number_integer_t) override { return true; }
|
||||
bool number_unsigned(number_unsigned_t) override { return true; }
|
||||
bool number_float(number_float_t, const string_t &) override { return true; }
|
||||
bool string(string_t &) override { return true; }
|
||||
bool binary(binary_t &) override { return true; }
|
||||
bool start_object(std::size_t) override { return true; }
|
||||
bool key(string_t &) override { return true; }
|
||||
bool end_object() override { return true; }
|
||||
bool start_array(std::size_t) override { return true; }
|
||||
bool end_array() override { return true; }
|
||||
};
|
||||
json_error_locator err_loc;
|
||||
json::sax_parse(it, end, &err_loc);
|
||||
|
||||
std::string::const_iterator temptative_end;
|
||||
if (err_loc.found_error) {
|
||||
temptative_end = it + err_loc.position;
|
||||
} else {
|
||||
temptative_end = end;
|
||||
}
|
||||
std::string json_sub {it, temptative_end};
|
||||
try {
|
||||
out = json::parse(json_sub);
|
||||
it = temptative_end;
|
||||
return true;
|
||||
} catch (const std::exception &) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
|
||||
* Aggregates the prefix, suffix and in-between text into the content.
|
||||
*/
|
||||
static common_chat_msg parse_json_tool_calls(
|
||||
const std::string& input,
|
||||
const std::optional<std::regex> & trigger_opt,
|
||||
const std::regex & function_regex,
|
||||
const std::regex & close_regex) {
|
||||
std::smatch match;
|
||||
|
||||
common_chat_msg result;
|
||||
result.role = "assistant";
|
||||
|
||||
|
||||
auto end = input.end();
|
||||
auto it = input.begin();
|
||||
|
||||
if (trigger_opt) {
|
||||
if (!std::regex_search(it, end, match, *trigger_opt)) {
|
||||
result.content = input;
|
||||
return result;
|
||||
}
|
||||
result.content = match.prefix().str();
|
||||
it = match.suffix().first;
|
||||
}
|
||||
|
||||
while (it != end) {
|
||||
std::sregex_iterator rend;
|
||||
std::sregex_iterator rit(it, end, function_regex);
|
||||
if (rit == rend) {
|
||||
fprintf(stderr, "No more tool calls found\n");
|
||||
result.content += std::string(it, end);
|
||||
break;
|
||||
}
|
||||
auto name = rit->str(1);
|
||||
result.content += std::string(it, rit->prefix().second);
|
||||
it = rit->suffix().first;
|
||||
|
||||
json arguments;
|
||||
if (!parse_json(it, end, arguments)) {
|
||||
throw std::runtime_error("Failed to parse json tool call arguments");
|
||||
}
|
||||
if (!std::regex_search(it, end, match, close_regex)) {
|
||||
throw std::runtime_error("Malformed input, missing closing pattern");
|
||||
}
|
||||
it = match.suffix().first;
|
||||
result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) {
|
||||
auto content_end = input.find(prefix);
|
||||
size_t tc_start = std::string::npos;
|
||||
|
||||
common_chat_msg result;
|
||||
result.role = "assistant";
|
||||
const auto process_tool_calls = [&](const json & tool_calls) {
|
||||
for (const auto & tool_call : tool_calls) {
|
||||
const auto & arguments = tool_call["arguments"];
|
||||
result.tool_calls.push_back({
|
||||
tool_call["name"],
|
||||
arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
|
||||
tool_call.contains("id") ? tool_call["id"] : "",
|
||||
});
|
||||
}
|
||||
};
|
||||
if (content_end == std::string::npos) {
|
||||
result.content = input;
|
||||
} else {
|
||||
tc_start = content_end + prefix.size() - rstrip_prefix;
|
||||
result.content = input.substr(0, content_end);
|
||||
auto tool_calls = json::parse(input.substr(tc_start));
|
||||
process_tool_calls(tool_calls);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
|
||||
for (const auto & tool : tools) {
|
||||
if (!tool.contains("type") || tool["type"] != "function" || !tool.contains("function")) {
|
||||
LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str());
|
||||
continue;
|
||||
}
|
||||
fn(tool);
|
||||
}
|
||||
}
|
||||
|
||||
static std::string apply(
|
||||
const common_chat_template & tmpl,
|
||||
const nlohmann::ordered_json & messages,
|
||||
const nlohmann::ordered_json & tools,
|
||||
bool add_generation_prompt,
|
||||
const nlohmann::ordered_json & extra_context = nlohmann::ordered_json())
|
||||
{
|
||||
minja::chat_template_inputs tmpl_inputs;
|
||||
tmpl_inputs.messages = messages;
|
||||
tmpl_inputs.tools = tools;
|
||||
tmpl_inputs.add_generation_prompt = add_generation_prompt;
|
||||
tmpl_inputs.extra_context = extra_context;
|
||||
// TODO: add flag to control date/time, if only for testing purposes.
|
||||
// tmpl_inputs.now = std::chrono::system_clock::now();
|
||||
|
||||
minja::chat_template_options tmpl_opts;
|
||||
tmpl_opts.use_bos_token = false;
|
||||
tmpl_opts.use_eos_token = false;
|
||||
|
||||
return tmpl.apply(tmpl_inputs, tmpl_opts);
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
common_chat_params data;
|
||||
|
||||
auto tool_call_schemas = json::array();
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
auto tool_schema = json {
|
||||
{"type", "object"},
|
||||
{"properties", {
|
||||
{"name", {
|
||||
{"type", "string"},
|
||||
{"const", function["name"]},
|
||||
}},
|
||||
{"arguments", function["parameters"]},
|
||||
}},
|
||||
{"required", json::array({"name", "arguments"})},
|
||||
};
|
||||
if (function.contains("description")) {
|
||||
tool_schema["description"] = function["description"];
|
||||
}
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_schema["properties"]["id"] = {
|
||||
{"type", "string"},
|
||||
{"minLength", 4},
|
||||
};
|
||||
tool_schema["required"].push_back("id");
|
||||
}
|
||||
tool_call_schemas.emplace_back(tool_schema);
|
||||
});
|
||||
const auto tool_call =
|
||||
inputs.parallel_tool_calls
|
||||
? json {
|
||||
{"type", "object"},
|
||||
{"properties", {
|
||||
{"tool_calls", {
|
||||
{"type", "array"},
|
||||
{"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
|
||||
{"anyOf", tool_call_schemas},
|
||||
}},
|
||||
{"minItems", 1},
|
||||
}},
|
||||
}},
|
||||
{"required", json::array({"tool_calls"})},
|
||||
}
|
||||
: json {
|
||||
{"type", "object"},
|
||||
{"properties", {
|
||||
{"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
|
||||
{"anyOf", tool_call_schemas},
|
||||
}},
|
||||
}},
|
||||
{"required", json::array({"tool_call"})},
|
||||
};
|
||||
const auto schema =
|
||||
inputs.tool_choice != "required"
|
||||
? json {
|
||||
{"anyOf", json::array({
|
||||
tool_call,
|
||||
{
|
||||
{"type", "object"},
|
||||
{"properties", {
|
||||
{"response", inputs.json_schema.is_null()
|
||||
? json {{"type", "string"}}
|
||||
: inputs.json_schema
|
||||
},
|
||||
}},
|
||||
{"required", json::array({"response"})},
|
||||
},
|
||||
})}
|
||||
}
|
||||
: tool_call;
|
||||
|
||||
data.grammar_lazy = false;
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
builder.add_schema("root", schema);
|
||||
}, grammar_options);
|
||||
|
||||
auto tweaked_messages = common_chat_template::add_system(
|
||||
inputs.messages,
|
||||
"Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
|
||||
|
||||
data.prompt = apply(tmpl, tweaked_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.format = COMMON_CHAT_FORMAT_GENERIC;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_generic(const std::string & input) {
|
||||
json data = json::parse(input);
|
||||
common_chat_msg result;
|
||||
result.role = "assistant";
|
||||
if (data.contains("tool_calls")) {
|
||||
for (const auto & tool_call : data["tool_calls"]) {
|
||||
result.tool_calls.push_back({
|
||||
tool_call["name"],
|
||||
tool_call["arguments"].dump(),
|
||||
tool_call.contains("id") ? tool_call["id"] : "",
|
||||
});
|
||||
}
|
||||
} else if (data.contains("tool_call")) {
|
||||
result.tool_calls.push_back({
|
||||
data["tool_call"]["name"],
|
||||
data["tool_call"]["arguments"].dump(),
|
||||
/* id= */ "",
|
||||
});
|
||||
} else if (data.contains("response")) {
|
||||
const auto & response = data["response"];
|
||||
result.content = response.is_string() ? response.get<std::string>() : response.dump(2);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
common_chat_params data;
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
auto schemas = json::array();
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
schemas.push_back({
|
||||
{"type", "object"},
|
||||
{"properties", {
|
||||
// Important note: the model is probably trained to take a JSON stringified arguments value.
|
||||
// It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
|
||||
{"name", {
|
||||
{"type", "string"},
|
||||
{"const", function["name"]},
|
||||
}},
|
||||
{"arguments", function["parameters"]},
|
||||
{"id", {
|
||||
{"type", "string"},
|
||||
// Nemo's template expects a 9-character alphanumeric ID.
|
||||
{"pattern", "^[a-zA-Z0-9]{9}$"},
|
||||
}},
|
||||
}},
|
||||
{"required", json::array({"name", "arguments", "id"})},
|
||||
});
|
||||
});
|
||||
auto schema = json {
|
||||
{"type", "array"},
|
||||
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
||||
{"minItems", 1},
|
||||
};
|
||||
if (!inputs.parallel_tool_calls) {
|
||||
schema["maxItems"] = 1;
|
||||
}
|
||||
builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
|
||||
}, grammar_options);
|
||||
data.grammar_triggers.push_back({"[TOOL_CALLS]", /* .at_start = */ true});
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) {
|
||||
return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]");
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
common_chat_params data;
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
auto schemas = json::array();
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
schemas.push_back({
|
||||
{"type", "object"},
|
||||
{"properties", {
|
||||
{"tool_call_id", {
|
||||
{"type", "string"},
|
||||
// Command-R's template expects an integer string.
|
||||
{"pattern", "^[0-9]{1,10}$"},
|
||||
}},
|
||||
{"tool_name", {
|
||||
{"type", "string"},
|
||||
{"const", function["name"]},
|
||||
}},
|
||||
{"parameters", function["parameters"]},
|
||||
}},
|
||||
{"required", json::array({"tool_call_id", "tool_name", "parameters"})},
|
||||
});
|
||||
});
|
||||
auto schema = json {
|
||||
{"type", "array"},
|
||||
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
||||
{"minItems", 1},
|
||||
};
|
||||
if (!inputs.parallel_tool_calls) {
|
||||
schema["maxItems"] = 1;
|
||||
}
|
||||
builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
|
||||
}, grammar_options);
|
||||
data.grammar_triggers.push_back({"<|START_ACTION|>", /* .at_start = */ false});
|
||||
data.preserved_tokens = {
|
||||
"<|START_RESPONSE|>",
|
||||
"<|END_RESPONSE|>",
|
||||
"<|START_THINKING|>",
|
||||
"<|END_THINKING|>",
|
||||
"<|END_ACTION|>",
|
||||
};
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_command_r7b(const std::string & input) {
|
||||
static std::regex response_regex("<\\|START_RESPONSE\\|>([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>");
|
||||
static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>");
|
||||
std::smatch match;
|
||||
|
||||
common_chat_msg result;
|
||||
result.role = "assistant";
|
||||
if (std::regex_match(input, match, response_regex)) {
|
||||
result.content = match[1].str();
|
||||
} else if (std::regex_match(input, match, thought_action_regex)) {
|
||||
result.tool_plan = match[1].str();
|
||||
auto actions_str = match[2].str();
|
||||
auto actions = json::parse(actions_str);
|
||||
for (const auto & action : actions) {
|
||||
result.tool_calls.push_back({
|
||||
/* .name = */ action["tool_name"],
|
||||
/* .arguments = */ action["parameters"].dump(),
|
||||
/* .id = */ action["tool_call_id"],
|
||||
});
|
||||
}
|
||||
} else {
|
||||
LOG_ERR("Failed to parse command_r output");
|
||||
result.content = input;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
|
||||
if (!parameters.is_object() || !parameters.contains("type") || parameters["type"] != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
|
||||
throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
|
||||
}
|
||||
const auto & parameters_properties = parameters.at("properties");
|
||||
const auto & parameters_required = parameters.at("required");
|
||||
for (const auto & prop : expected_properties) {
|
||||
if (!parameters_properties.contains(prop)) {
|
||||
throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop);
|
||||
}
|
||||
if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) {
|
||||
throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop);
|
||||
}
|
||||
}
|
||||
if (parameters_properties.size() != expected_properties.size()) {
|
||||
throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", "));
|
||||
}
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, bool allow_python_tag_builtin_tools) {
|
||||
auto builtin_tools = json::array();
|
||||
common_chat_params data;
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
std::vector<std::string> tool_rules;
|
||||
|
||||
auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
|
||||
if (name == "wolfram_alpha") {
|
||||
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
|
||||
expect_tool_parameters(name, parameters, {"query"});
|
||||
} else if (name == "web_search" || name == "brave_search") {
|
||||
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
|
||||
expect_tool_parameters(name, parameters, {"query"});
|
||||
} else if (name == "python" || name == "code_interpreter") {
|
||||
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
|
||||
expect_tool_parameters(name, parameters, {"code"});
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::string> kvs;
|
||||
for (const auto & [key, value] : parameters.at("properties").items()) {
|
||||
kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value));
|
||||
}
|
||||
|
||||
tool_rules.push_back(
|
||||
builder.add_rule(
|
||||
name + "-call",
|
||||
"\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
|
||||
builtin_tools.push_back(name);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
std::string name = function["name"];
|
||||
auto parameters = function["parameters"];
|
||||
builder.resolve_refs(parameters);
|
||||
|
||||
// https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
|
||||
if (allow_python_tag_builtin_tools) {
|
||||
handle_builtin_tool(name, parameters);
|
||||
}
|
||||
tool_rules.push_back(
|
||||
builder.add_rule(
|
||||
name + "-call",
|
||||
"\"{\" space "
|
||||
"( \"\\\"type\\\":\" space \"\\\"function\\\",\" space )? "
|
||||
"\"\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " +
|
||||
builder.add_schema(name + "-args", parameters) +
|
||||
" \"}\""));
|
||||
data.grammar_triggers.push_back({"{\"name\": \"" + name + "\"", /* .at_start = */ true});
|
||||
});
|
||||
data.grammar_triggers.push_back({"{\"name\":", /* .at_start = */ true});
|
||||
data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true});
|
||||
data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true});
|
||||
data.grammar_triggers.push_back({"{\"type\": \"function\"", /* .at_start = */ true});
|
||||
data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true});
|
||||
data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true});
|
||||
if (!builtin_tools.empty()) {
|
||||
data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false});
|
||||
}
|
||||
builder.add_rule("root", string_join(tool_rules, " | "));
|
||||
}, grammar_options);
|
||||
data.additional_stops.push_back("<|eom_id|>");
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
|
||||
{"tools_in_user_message", false},
|
||||
{"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
|
||||
});
|
||||
data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
|
||||
? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
|
||||
: COMMON_CHAT_FORMAT_LLAMA_3_X;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
|
||||
// TODO: tighten & simplify the parser, don't accept leading text context.
|
||||
static std::regex function_regex("\\{[\\s\\n\\r]*(?:\"type\"[\\s\\n\\r]*:[\\s\\n\\r]*\"function\"[\\s\\n\\r]*,[\\s\\n\\r]*|[\\s\\n\\r]*)\"name\"[\\s\\n\\r]*:[\\s\\n\\r]*\"([^\"]+)\"[\\s\\n\\r]*,[\\s\\n\\r]*\"parameters\": ");
|
||||
static std::regex close_regex("\\}");
|
||||
static std::regex builtin_call_regex("<\\|python_tag\\|>([^.(]+)\\.call\\((.*)\\)");
|
||||
|
||||
if (with_builtin_tools) {
|
||||
std::smatch match;
|
||||
if (std::regex_match(input, match, builtin_call_regex)) {
|
||||
auto name = match[1].str();
|
||||
auto raw_args = match[2].str();
|
||||
|
||||
// TODO: if/when builtin tools start accepting more than 1 argument, use parse_json for real parsing.
|
||||
auto it_eq = raw_args.find('=');
|
||||
auto arg_name = raw_args.substr(0, it_eq);
|
||||
auto arg_value_str = raw_args.substr(it_eq + 1);
|
||||
auto arg_value = json::parse(arg_value_str);
|
||||
|
||||
return {
|
||||
/* .role = */ "assistant",
|
||||
/* .content = */ match.prefix().str(),
|
||||
/* .tool_calls = */ {
|
||||
{
|
||||
/* .name = */ match[1],
|
||||
/* .arguments = */ (json {
|
||||
{arg_name, arg_value},
|
||||
}).dump(),
|
||||
/* .id = */ "",
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
common_chat_params data;
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
std::vector<std::string> tool_rules;
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
std::string name = function["name"];
|
||||
auto parameters = function["parameters"];
|
||||
auto args_rule = builder.add_schema(name + "-args", parameters);
|
||||
tool_rules.push_back(builder.add_rule(name + "-call",
|
||||
"\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
|
||||
});
|
||||
data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
|
||||
data.preserved_tokens = {
|
||||
"<|tool▁sep|>",
|
||||
"<|tool▁call▁end|>",
|
||||
};
|
||||
builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space");
|
||||
}, grammar_options);
|
||||
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.prompt = prompt;
|
||||
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) {
|
||||
static std::regex trigger_regex("<|tool▁calls▁begin|>");
|
||||
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
||||
static std::regex close_regex("```<|tool▁call▁end|>");
|
||||
return parse_json_tool_calls(input, trigger_regex, function_regex, close_regex);
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
fprintf(stderr, "%s\n", __func__);
|
||||
common_chat_params data;
|
||||
data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, {
|
||||
{"datetime", "Jan 29 2025 13:00:00 GMT"},
|
||||
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
||||
});
|
||||
if (!inputs.tools.is_null() && !inputs.tools.empty()) {
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
auto schemas = json::array();
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
schemas.push_back({
|
||||
{"type", "object"},
|
||||
{"properties", {
|
||||
{"name", {
|
||||
{"type", "string"},
|
||||
{"const", function["name"]},
|
||||
}},
|
||||
{"arguments", function["parameters"]},
|
||||
}},
|
||||
{"required", json::array({"name", "arguments", "id"})},
|
||||
});
|
||||
});
|
||||
auto schema = json {
|
||||
{"type", "array"},
|
||||
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
||||
{"minItems", 1},
|
||||
};
|
||||
if (!inputs.parallel_tool_calls) {
|
||||
schema["maxItems"] = 1;
|
||||
}
|
||||
builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
|
||||
}, grammar_options);
|
||||
data.grammar_triggers.push_back({" functools[", /* .at_start = */ false});
|
||||
data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
|
||||
} else {
|
||||
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_firefunction_v2(const std::string & input) {
|
||||
return parse_prefixed_json_tool_call_array(input, " functools[", /* rstrip_prefix= */ 1);
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
// >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
|
||||
// Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
|
||||
common_chat_params data;
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
|
||||
if (!inputs.tools.is_null() && !inputs.tools.empty()) {
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
std::vector<std::string> first_tool_rules;
|
||||
std::vector<std::string> subsequent_tool_rules;
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
std::string name = function["name"];
|
||||
auto parameters = function["parameters"];
|
||||
auto args_rule = builder.add_schema(name + "-args", parameters);
|
||||
first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule));
|
||||
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
|
||||
data.grammar_triggers.push_back({name, /* .at_start = */ true});
|
||||
data.grammar_triggers.push_back({">>>" + name, /* .at_start = */ false});
|
||||
});
|
||||
auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
|
||||
if (inputs.parallel_tool_calls) {
|
||||
auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
|
||||
builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
|
||||
} else {
|
||||
builder.add_rule("root", first_rule);
|
||||
}
|
||||
|
||||
}, grammar_options);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
static bool consume(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) {
|
||||
auto expected_it = expected.begin();
|
||||
auto tmp_it = it;
|
||||
while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) {
|
||||
++tmp_it;
|
||||
++expected_it;
|
||||
}
|
||||
if (expected_it == expected.end()) {
|
||||
it = tmp_it;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) {
|
||||
static std::regex function_regex(R"((?:>>>)?(\w+)\n)");
|
||||
static std::regex close_regex(R"($|(?=>>>))");
|
||||
|
||||
std::string content;
|
||||
auto it = input.begin();
|
||||
const auto end = input.end();
|
||||
|
||||
if (consume(it, end, "all\n")) {
|
||||
std::smatch match;
|
||||
if (std::regex_search(it, end, match, function_regex)) {
|
||||
auto fun_it = match.prefix().second;
|
||||
content = std::string(it, fun_it);
|
||||
it = fun_it;
|
||||
} else {
|
||||
common_chat_msg res;
|
||||
res.role = "assistant";
|
||||
res.content = std::string(it, end);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
// TODO: tighten & simplify.
|
||||
try {
|
||||
auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex);
|
||||
res.content = content + res.content;
|
||||
return res;
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what());
|
||||
common_chat_msg res;
|
||||
res.role = "assistant";
|
||||
res.content = input;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
// https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
|
||||
common_chat_params data;
|
||||
json tools = inputs.tools.is_null() ? inputs.tools : json::array();
|
||||
std::string python_code_argument_name;
|
||||
auto has_raw_python = false;
|
||||
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
std::vector<std::string> tool_rules;
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
const auto & parameters = function["parameters"];
|
||||
std::string name = function["name"];
|
||||
if (name == "python" || name == "ipython") {
|
||||
if (!parameters.contains("type")) {
|
||||
throw std::runtime_error("Missing type in python tool");
|
||||
}
|
||||
has_raw_python = true;
|
||||
auto type = parameters.at("type");
|
||||
if (type == "object") {
|
||||
auto properties = parameters.at("properties");
|
||||
for (auto it = properties.begin(); it != properties.end(); ++it) {
|
||||
if (it.value().at("type") == "string") {
|
||||
if (!python_code_argument_name.empty()) {
|
||||
throw std::runtime_error("Multiple string arguments found in python tool");
|
||||
}
|
||||
python_code_argument_name = it.key();
|
||||
}
|
||||
}
|
||||
if (python_code_argument_name.empty()) {
|
||||
throw std::runtime_error("No string argument found in python tool");
|
||||
}
|
||||
} else if (type != "string") {
|
||||
throw std::runtime_error("Invalid type in python tool: " + type.dump());
|
||||
}
|
||||
}
|
||||
tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
|
||||
});
|
||||
if (has_raw_python) {
|
||||
tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
|
||||
data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false});
|
||||
}
|
||||
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
|
||||
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
||||
data.grammar_triggers.push_back({"<function=", /* .at_start = */ false});
|
||||
}, grammar_options);
|
||||
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
// TODO: if (has_raw_python)
|
||||
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
|
||||
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
||||
static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
|
||||
std::smatch match;
|
||||
if (std::regex_search(input, match, python_tag_regex)) {
|
||||
auto code = match[1].str();
|
||||
return {
|
||||
/* .role = */ "assistant",
|
||||
/* .content = */ match.prefix().str(),
|
||||
/* .tool_calls = */ {
|
||||
{
|
||||
/* .name = */ "python",
|
||||
/* .arguments = */ (json {{"code", code}}).dump(),
|
||||
/* .id = */ "",
|
||||
},
|
||||
}
|
||||
};
|
||||
}
|
||||
static std::regex function_regex(R"(<function=(\w+)>)");
|
||||
static std::regex close_regex(R"(</function>)");
|
||||
// TODO: tighten & simplify.
|
||||
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
common_chat_params data;
|
||||
// (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
std::vector<std::string> tool_rules;
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
std::string name = function["name"];
|
||||
auto parameters = function["parameters"];
|
||||
builder.resolve_refs(parameters);
|
||||
tool_rules.push_back(builder.add_schema(name + "-call", {
|
||||
{"type", "object"},
|
||||
{"properties", json {
|
||||
{"name", json {{"const", name}}},
|
||||
{"arguments", parameters},
|
||||
}},
|
||||
{"required", json::array({"name", "arguments"})},
|
||||
}));
|
||||
});
|
||||
auto tool_call = "\"<tool_call>\" space " + builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " \"</tool_call>\" space";
|
||||
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
||||
data.grammar_triggers.push_back({"<tool_call>", /* .at_start = */ false});
|
||||
data.preserved_tokens = { "</tool_call>" };
|
||||
}, grammar_options);
|
||||
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string & input) {
|
||||
try {
|
||||
std::regex start_pattern(R"([\n\s]*<tool_call>)");
|
||||
std::regex middle_pattern(R"([\n\s]*</tool_call>[\n\s]*<tool_call>)");
|
||||
std::regex end_pattern(R"([\n\s]*</tool_call>[\n\s]*$)");
|
||||
|
||||
auto end = input.end();
|
||||
std::sregex_iterator rend;
|
||||
std::sregex_iterator rit(input.begin(), end, start_pattern);
|
||||
if (rit == rend) {
|
||||
return {
|
||||
/* .role = */ "assistant",
|
||||
/* .content = */ input,
|
||||
/* .tool_calls = */ {},
|
||||
};
|
||||
}
|
||||
|
||||
common_chat_msg result;
|
||||
result.role = "assistant";
|
||||
result.content = rit->prefix();
|
||||
|
||||
auto it = rit->suffix().first;
|
||||
while (it != end) {
|
||||
json call;
|
||||
if (!parse_json(it, end, call)) {
|
||||
throw std::runtime_error("Failed to parse json tool call");
|
||||
}
|
||||
const auto & arguments = call["arguments"];
|
||||
result.tool_calls.push_back({
|
||||
call["name"],
|
||||
arguments.dump(),
|
||||
// arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
|
||||
/* id= */ "",
|
||||
});
|
||||
rit = {it, end, middle_pattern};
|
||||
if (rit != rend) {
|
||||
it = rit->suffix().first;
|
||||
} else {
|
||||
rit = {it, end, end_pattern};
|
||||
if (rit == rend) {
|
||||
throw std::runtime_error("Malformed input, missing </tool_call>");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} catch (const std::exception & e) {
|
||||
return {
|
||||
/* .role = */ "assistant",
|
||||
/* .content = */ input,
|
||||
/* .tool_calls = */ {},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
common_chat_params data;
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
data.grammar_lazy = false;
|
||||
if (!inputs.json_schema.is_null()) {
|
||||
if (!inputs.grammar.empty()) {
|
||||
throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
|
||||
}
|
||||
data.grammar = json_schema_to_grammar(inputs.json_schema);
|
||||
} else {
|
||||
data.grammar = inputs.grammar.empty();
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none";
|
||||
LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false");
|
||||
|
||||
if (has_tools && !inputs.grammar.empty()) {
|
||||
throw std::runtime_error("Cannot specify grammar with tools");
|
||||
}
|
||||
|
||||
const auto & src = tmpl.source();
|
||||
if (src.find(">>>all") != std::string::npos) {
|
||||
// Functionary prepends "all\n" to plain content outputs, so we use the parser no matter when
|
||||
return common_chat_params_init_functionary_v3_2(tmpl, inputs);
|
||||
}
|
||||
if (src.find(" functools[") != std::string::npos) {
|
||||
// Firefunction v2 requires datetime and functions in the context, even w/o tools.
|
||||
return common_chat_params_init_firefunction_v2(tmpl, inputs);
|
||||
}
|
||||
|
||||
if (!has_tools) {
|
||||
return common_chat_params_init_without_tools(tmpl, inputs);
|
||||
}
|
||||
|
||||
if (src.find("<tool_call>") != std::string::npos) {
|
||||
return common_chat_params_init_hermes_2_pro(tmpl, inputs);
|
||||
}
|
||||
if (src.find("<|start_header_id|>") != std::string::npos
|
||||
&& src.find("<function=") != std::string::npos) {
|
||||
return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, inputs);
|
||||
}
|
||||
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
|
||||
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
|
||||
return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools);
|
||||
}
|
||||
if (src.find("<|tool▁calls▁begin|>") != std::string::npos) {
|
||||
return common_chat_params_init_deepseek_r1(tmpl, inputs);
|
||||
}
|
||||
if (src.find("[TOOL_CALLS]") != std::string::npos) {
|
||||
return common_chat_params_init_mistral_nemo(tmpl, inputs);
|
||||
}
|
||||
if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos) {
|
||||
return common_chat_params_init_command_r7b(tmpl, inputs);
|
||||
}
|
||||
return common_chat_params_init_generic(tmpl, inputs);
|
||||
}
|
||||
|
||||
static common_chat_msg common_chat_parse_content_only(const std::string & input) {
|
||||
return {
|
||||
/* .role = */ "assistant",
|
||||
/* .content = */ input,
|
||||
/* .tool_calls = */ {},
|
||||
};
|
||||
}
|
||||
|
||||
common_chat_msg common_chat_parse(const std::string & input, common_chat_format format) {
|
||||
switch (format) {
|
||||
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
|
||||
return common_chat_parse_content_only(input);
|
||||
case COMMON_CHAT_FORMAT_GENERIC:
|
||||
return common_chat_parse_generic(input);
|
||||
case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
|
||||
return common_chat_parse_mistral_nemo(input);
|
||||
case COMMON_CHAT_FORMAT_LLAMA_3_X:
|
||||
return common_chat_parse_llama_3_1(input);
|
||||
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
|
||||
return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true);
|
||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
||||
return common_chat_parse_deepseek_r1(input);
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
||||
return common_chat_parse_functionary_v3_2(input);
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
||||
return common_chat_parse_functionary_v3_1_llama_3_1(input);
|
||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
||||
return common_chat_parse_hermes_2_pro(input);
|
||||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
||||
return common_chat_parse_firefunction_v2(input);
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
||||
return common_chat_parse_command_r7b(input);
|
||||
default:
|
||||
throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
|
||||
}
|
||||
}
|
52
common/chat.hpp
Normal file
52
common/chat.hpp
Normal file
|
@ -0,0 +1,52 @@
|
|||
// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <json.hpp>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
struct common_chat_inputs {
|
||||
json messages;
|
||||
json tools;
|
||||
json tool_choice;
|
||||
json json_schema;
|
||||
bool parallel_tool_calls;
|
||||
bool stream;
|
||||
std::string grammar;
|
||||
bool add_generation_prompt = true;
|
||||
};
|
||||
|
||||
enum common_chat_format {
|
||||
COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
||||
COMMON_CHAT_FORMAT_GENERIC,
|
||||
COMMON_CHAT_FORMAT_MISTRAL_NEMO,
|
||||
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
||||
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
||||
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||
|
||||
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
||||
};
|
||||
|
||||
struct common_chat_params {
|
||||
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
json prompt;
|
||||
std::string grammar;
|
||||
bool grammar_lazy = false;
|
||||
std::vector<common_grammar_trigger> grammar_triggers;
|
||||
std::vector<std::string> preserved_tokens;
|
||||
std::vector<std::string> additional_stops;
|
||||
};
|
||||
|
||||
struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params);
|
||||
std::string common_chat_format_name(common_chat_format format);
|
||||
common_chat_msg common_chat_parse( const std::string & input, common_chat_format format);
|
2925
common/common.cpp
2925
common/common.cpp
File diff suppressed because it is too large
Load diff
527
common/common.h
527
common/common.h
|
@ -2,20 +2,12 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "llama.h"
|
||||
#include "llama-cpp.h"
|
||||
|
||||
#include "sampling.h"
|
||||
|
||||
#define LOG_NO_FILE_LINE_FUNCTION
|
||||
#include "log.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <tuple>
|
||||
#include <sstream>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define DIRECTORY_SEPARATOR '\\'
|
||||
|
@ -33,52 +25,192 @@
|
|||
|
||||
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
||||
|
||||
struct common_adapter_lora_info {
|
||||
std::string path;
|
||||
float scale;
|
||||
|
||||
struct llama_adapter_lora * ptr;
|
||||
};
|
||||
|
||||
using llama_tokens = std::vector<llama_token>;
|
||||
|
||||
// build info
|
||||
extern int LLAMA_BUILD_NUMBER;
|
||||
extern char const * LLAMA_COMMIT;
|
||||
extern char const * LLAMA_COMPILER;
|
||||
extern char const * LLAMA_BUILD_TARGET;
|
||||
extern const char * LLAMA_COMMIT;
|
||||
extern const char * LLAMA_COMPILER;
|
||||
extern const char * LLAMA_BUILD_TARGET;
|
||||
|
||||
struct llama_control_vector_load_info;
|
||||
struct common_control_vector_load_info;
|
||||
|
||||
//
|
||||
// CPU utils
|
||||
//
|
||||
|
||||
struct cpu_params {
|
||||
int n_threads = -1;
|
||||
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
||||
bool mask_valid = false; // Default: any CPU
|
||||
enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
|
||||
bool strict_cpu = false; // Use strict CPU placement
|
||||
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
||||
};
|
||||
|
||||
int32_t cpu_get_num_physical_cores();
|
||||
int32_t cpu_get_num_math();
|
||||
|
||||
//
|
||||
// CLI argument parsing
|
||||
// Common params
|
||||
//
|
||||
|
||||
enum llama_example {
|
||||
LLAMA_EXAMPLE_COMMON,
|
||||
LLAMA_EXAMPLE_SPECULATIVE,
|
||||
LLAMA_EXAMPLE_MAIN,
|
||||
LLAMA_EXAMPLE_INFILL,
|
||||
LLAMA_EXAMPLE_EMBEDDING,
|
||||
LLAMA_EXAMPLE_PERPLEXITY,
|
||||
LLAMA_EXAMPLE_RETRIEVAL,
|
||||
LLAMA_EXAMPLE_PASSKEY,
|
||||
LLAMA_EXAMPLE_IMATRIX,
|
||||
LLAMA_EXAMPLE_BENCH,
|
||||
LLAMA_EXAMPLE_SERVER,
|
||||
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
||||
LLAMA_EXAMPLE_EXPORT_LORA,
|
||||
LLAMA_EXAMPLE_LLAVA,
|
||||
LLAMA_EXAMPLE_LOOKUP,
|
||||
LLAMA_EXAMPLE_PARALLEL,
|
||||
LLAMA_EXAMPLE_TTS,
|
||||
|
||||
LLAMA_EXAMPLE_COUNT,
|
||||
};
|
||||
|
||||
enum common_sampler_type {
|
||||
COMMON_SAMPLER_TYPE_NONE = 0,
|
||||
COMMON_SAMPLER_TYPE_DRY = 1,
|
||||
COMMON_SAMPLER_TYPE_TOP_K = 2,
|
||||
COMMON_SAMPLER_TYPE_TOP_P = 3,
|
||||
COMMON_SAMPLER_TYPE_MIN_P = 4,
|
||||
//COMMON_SAMPLER_TYPE_TFS_Z = 5,
|
||||
COMMON_SAMPLER_TYPE_TYPICAL_P = 6,
|
||||
COMMON_SAMPLER_TYPE_TEMPERATURE = 7,
|
||||
COMMON_SAMPLER_TYPE_XTC = 8,
|
||||
COMMON_SAMPLER_TYPE_INFILL = 9,
|
||||
COMMON_SAMPLER_TYPE_PENALTIES = 10,
|
||||
};
|
||||
|
||||
// dimensionality reduction methods, used by cvector-generator
|
||||
enum dimre_method {
|
||||
DIMRE_METHOD_PCA,
|
||||
DIMRE_METHOD_MEAN,
|
||||
};
|
||||
|
||||
struct gpt_params {
|
||||
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
||||
enum common_conversation_mode {
|
||||
COMMON_CONVERSATION_MODE_DISABLED = 0,
|
||||
COMMON_CONVERSATION_MODE_ENABLED = 1,
|
||||
COMMON_CONVERSATION_MODE_AUTO = 2,
|
||||
};
|
||||
|
||||
int32_t n_threads = cpu_get_num_math();
|
||||
int32_t n_threads_draft = -1;
|
||||
int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)
|
||||
int32_t n_threads_batch_draft = -1;
|
||||
struct common_grammar_trigger {
|
||||
std::string word;
|
||||
bool at_start;
|
||||
};
|
||||
|
||||
// sampling parameters
|
||||
struct common_params_sampling {
|
||||
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
||||
|
||||
int32_t n_prev = 64; // number of previous tokens to remember
|
||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||
int32_t top_k = 40; // <= 0 to use vocab size
|
||||
float top_p = 0.95f; // 1.0 = disabled
|
||||
float min_p = 0.05f; // 0.0 = disabled
|
||||
float xtc_probability = 0.00f; // 0.0 = disabled
|
||||
float xtc_threshold = 0.10f; // > 0.5 disables XTC
|
||||
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
||||
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||
float penalty_present = 0.00f; // 0.0 = disabled
|
||||
float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
|
||||
float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
|
||||
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
|
||||
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
|
||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||
float mirostat_tau = 5.00f; // target entropy
|
||||
float mirostat_eta = 0.10f; // learning rate
|
||||
bool ignore_eos = false;
|
||||
bool no_perf = false; // disable performance metrics
|
||||
bool timing_per_token = false;
|
||||
|
||||
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
|
||||
|
||||
|
||||
std::vector<enum common_sampler_type> samplers = {
|
||||
COMMON_SAMPLER_TYPE_PENALTIES,
|
||||
COMMON_SAMPLER_TYPE_DRY,
|
||||
COMMON_SAMPLER_TYPE_TOP_K,
|
||||
COMMON_SAMPLER_TYPE_TYPICAL_P,
|
||||
COMMON_SAMPLER_TYPE_TOP_P,
|
||||
COMMON_SAMPLER_TYPE_MIN_P,
|
||||
COMMON_SAMPLER_TYPE_XTC,
|
||||
COMMON_SAMPLER_TYPE_TEMPERATURE,
|
||||
};
|
||||
|
||||
std::string grammar; // optional BNF-like grammar to constrain sampling
|
||||
bool grammar_lazy = false;
|
||||
std::vector<common_grammar_trigger> grammar_trigger_words; // optional trigger words to trigger lazy grammar
|
||||
std::vector<llama_token> grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens.
|
||||
std::set<llama_token> preserved_tokens;
|
||||
|
||||
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
|
||||
|
||||
// print the parameters into a string
|
||||
std::string print() const;
|
||||
};
|
||||
|
||||
struct common_params_speculative {
|
||||
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
|
||||
|
||||
int32_t n_ctx = 0; // draft context size
|
||||
int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding
|
||||
int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding
|
||||
int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
|
||||
float p_split = 0.1f; // speculative decoding split probability
|
||||
float p_min = 0.9f; // minimum speculative decoding probability (greedy)
|
||||
|
||||
struct cpu_params cpuparams;
|
||||
struct cpu_params cpuparams_batch;
|
||||
|
||||
std::string hf_repo = ""; // HF repo // NOLINT
|
||||
std::string hf_file = ""; // HF file // NOLINT
|
||||
|
||||
std::string model = ""; // draft model for speculative decoding // NOLINT
|
||||
std::string model_url = ""; // model url to download // NOLINT
|
||||
};
|
||||
|
||||
struct common_params_vocoder {
|
||||
std::string hf_repo = ""; // HF repo // NOLINT
|
||||
std::string hf_file = ""; // HF file // NOLINT
|
||||
|
||||
std::string model = ""; // model path // NOLINT
|
||||
std::string model_url = ""; // model url to download // NOLINT
|
||||
|
||||
bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT
|
||||
};
|
||||
|
||||
struct common_params {
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_ctx = 0; // context size
|
||||
int32_t n_ctx = 4096; // context size
|
||||
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_draft = 5; // number of tokens to draft during speculative decoding
|
||||
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
||||
int32_t n_parallel = 1; // number of parallel sequences to decode
|
||||
int32_t n_sequences = 1; // number of sequences to decode
|
||||
float p_split = 0.1f; // speculative decoding split probability
|
||||
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
|
||||
int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
|
||||
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
||||
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
|
||||
int32_t grp_attn_n = 1; // group-attention factor
|
||||
int32_t grp_attn_w = 512; // group-attention width
|
||||
int32_t n_print = -1; // print token count every n tokens (-1 = disabled)
|
||||
|
@ -89,46 +221,56 @@ struct gpt_params {
|
|||
float yarn_beta_fast = 32.0f; // YaRN low correction dim
|
||||
float yarn_beta_slow = 1.0f; // YaRN high correction dim
|
||||
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
||||
float defrag_thold = -1.0f; // KV cache defragmentation threshold
|
||||
float defrag_thold = 0.1f; // KV cache defragmentation threshold
|
||||
|
||||
// offload params
|
||||
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
|
||||
|
||||
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
|
||||
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
||||
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
|
||||
|
||||
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
|
||||
|
||||
struct cpu_params cpuparams;
|
||||
struct cpu_params cpuparams_batch;
|
||||
|
||||
ggml_backend_sched_eval_callback cb_eval = nullptr;
|
||||
void * cb_eval_user_data = nullptr;
|
||||
|
||||
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
||||
|
||||
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
|
||||
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
||||
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
||||
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
|
||||
|
||||
// // sampling parameters
|
||||
struct llama_sampling_params sparams;
|
||||
struct common_params_sampling sampling;
|
||||
struct common_params_speculative speculative;
|
||||
struct common_params_vocoder vocoder;
|
||||
|
||||
std::string model = ""; // model path
|
||||
std::string model_draft = ""; // draft model for speculative decoding
|
||||
std::string model_alias = "unknown"; // model alias
|
||||
std::string model_url = ""; // model url to download
|
||||
std::string hf_repo = ""; // HF repo
|
||||
std::string hf_file = ""; // HF file
|
||||
std::string prompt = "";
|
||||
std::string prompt_file = ""; // store the external prompt file name
|
||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
||||
std::string input_prefix = ""; // string to prefix user inputs with
|
||||
std::string input_suffix = ""; // string to suffix user inputs with
|
||||
std::string logdir = ""; // directory in which to save YAML log files
|
||||
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
|
||||
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
|
||||
std::string logits_file = ""; // file for saving *all* logits
|
||||
std::string rpc_servers = ""; // comma separated list of RPC servers
|
||||
std::string model = ""; // model path // NOLINT
|
||||
std::string model_alias = ""; // model alias // NOLINT
|
||||
std::string model_url = ""; // model url to download // NOLINT
|
||||
std::string hf_token = ""; // HF token // NOLINT
|
||||
std::string hf_repo = ""; // HF repo // NOLINT
|
||||
std::string hf_file = ""; // HF file // NOLINT
|
||||
std::string prompt = ""; // NOLINT
|
||||
std::string prompt_file = ""; // store the external prompt file name // NOLINT
|
||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT
|
||||
std::string input_prefix = ""; // string to prefix user inputs with // NOLINT
|
||||
std::string input_suffix = ""; // string to suffix user inputs with // NOLINT
|
||||
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT
|
||||
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT
|
||||
std::string logits_file = ""; // file for saving *all* logits // NOLINT
|
||||
|
||||
std::vector<std::string> in_files; // all input files
|
||||
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
||||
std::vector<llama_model_kv_override> kv_overrides;
|
||||
|
||||
// TODO: avoid tuple, use struct
|
||||
std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
|
||||
std::string lora_base = ""; // base model path for the lora adapter
|
||||
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
|
||||
std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
|
||||
|
||||
std::vector<llama_control_vector_load_info> control_vectors; // control vector with user defined scale
|
||||
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
||||
|
||||
int32_t verbosity = 0;
|
||||
int32_t control_vector_layer_start = -1; // layer range for control vector
|
||||
|
@ -154,7 +296,6 @@ struct gpt_params {
|
|||
bool special = false; // enable special token output
|
||||
bool interactive = false; // interactive mode
|
||||
bool interactive_first = false; // wait for user input immediately
|
||||
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
|
||||
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
||||
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
||||
|
||||
|
@ -163,51 +304,58 @@ struct gpt_params {
|
|||
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
||||
bool flash_attn = false; // flash attention
|
||||
bool no_perf = false; // disable performance metrics
|
||||
bool ctx_shift = true; // context shift on inifinite text generation
|
||||
|
||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||
bool ignore_eos = false; // ignore generated EOS tokens
|
||||
bool logits_all = false; // return logits for all tokens in the batch
|
||||
bool use_mmap = true; // use mmap for faster loads
|
||||
bool use_mlock = false; // use mlock to keep model in memory
|
||||
bool verbose_prompt = false; // print prompt tokens before generation
|
||||
bool display_prompt = true; // print prompt before generation
|
||||
bool infill = false; // use infill mode
|
||||
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
||||
bool no_kv_offload = false; // disable KV offloading
|
||||
bool warmup = true; // warmup run
|
||||
bool check_tensors = false; // validate tensor data
|
||||
|
||||
std::string cache_type_k = "f16"; // KV cache data type for the K
|
||||
std::string cache_type_v = "f16"; // KV cache data type for the V
|
||||
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
||||
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
||||
|
||||
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
||||
|
||||
// multimodal models (see examples/llava)
|
||||
std::string mmproj = ""; // path to multimodal projector
|
||||
std::string mmproj = ""; // path to multimodal projector // NOLINT
|
||||
std::vector<std::string> image; // path to image file(s)
|
||||
|
||||
// embedding
|
||||
bool embedding = false; // get only sentence embedding
|
||||
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
||||
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
||||
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
|
||||
std::string embd_sep = "\n"; // separator of embendings
|
||||
std::string embd_sep = "\n"; // separator of embeddings
|
||||
bool reranking = false; // enable reranking support on server
|
||||
|
||||
// server params
|
||||
int32_t port = 8080; // server listens on this network port
|
||||
int32_t timeout_read = 600; // http read timeout in seconds
|
||||
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
||||
int32_t n_threads_http = -1; // number of threads to process HTTP requests
|
||||
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
||||
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
|
||||
|
||||
std::string hostname = "127.0.0.1";
|
||||
std::string public_path = "";
|
||||
std::string chat_template = "";
|
||||
std::string system_prompt = "";
|
||||
std::string public_path = ""; // NOLINT
|
||||
std::string chat_template = ""; // NOLINT
|
||||
bool use_jinja = false; // NOLINT
|
||||
bool enable_chat_template = true;
|
||||
|
||||
std::vector<std::string> api_keys;
|
||||
|
||||
std::string ssl_file_key = "";
|
||||
std::string ssl_file_cert = "";
|
||||
std::string ssl_file_key = ""; // NOLINT
|
||||
std::string ssl_file_cert = ""; // NOLINT
|
||||
|
||||
bool endpoint_slots = true;
|
||||
// "advanced" endpoints are disabled by default for better security
|
||||
bool webui = true;
|
||||
bool endpoint_slots = false;
|
||||
bool endpoint_props = false; // only control POST requests, not GET
|
||||
bool endpoint_metrics = false;
|
||||
|
||||
bool log_json = false;
|
||||
|
@ -253,28 +401,53 @@ struct gpt_params {
|
|||
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
||||
|
||||
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
||||
|
||||
std::string lora_outfile = "ggml-lora-merged-f16.gguf";
|
||||
|
||||
// batched-bench params
|
||||
bool batched_bench_output_jsonl = false;
|
||||
};
|
||||
|
||||
void gpt_params_handle_model_default(gpt_params & params);
|
||||
// call once at the start of a program if it uses libcommon
|
||||
// initializes the logging system and prints info about the build
|
||||
void common_init();
|
||||
|
||||
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
|
||||
bool gpt_params_parse (int argc, char ** argv, gpt_params & params);
|
||||
bool gpt_params_find_arg (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
|
||||
void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
|
||||
std::string common_params_get_system_info(const common_params & params);
|
||||
|
||||
std::string gpt_params_get_system_info(const gpt_params & params);
|
||||
bool parse_cpu_range(const std::string & range, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
||||
bool parse_cpu_mask(const std::string & mask, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
||||
void postprocess_cpu_params(cpu_params & cpuparams, const cpu_params * role_model = nullptr);
|
||||
bool set_process_priority(enum ggml_sched_priority prio);
|
||||
|
||||
//
|
||||
// String utils
|
||||
//
|
||||
|
||||
std::vector<std::string> string_split(std::string input, char separator);
|
||||
#ifdef __GNUC__
|
||||
#ifdef __MINGW32__
|
||||
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
||||
#else
|
||||
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||
#endif
|
||||
#else
|
||||
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
|
||||
#endif
|
||||
|
||||
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
|
||||
std::string string_format(const char * fmt, ...);
|
||||
|
||||
std::string string_strip(const std::string & str);
|
||||
std::string string_get_sortable_timestamp();
|
||||
|
||||
std::string string_join(const std::vector<std::string> & values, const std::string & separator);
|
||||
std::vector<std::string> string_split(const std::string & str, const std::string & delimiter);
|
||||
std::string string_repeat(const std::string & str, size_t n);
|
||||
|
||||
void string_replace_all(std::string & s, const std::string & search, const std::string & replace);
|
||||
|
||||
template<class T>
|
||||
static std::vector<T> string_split(const std::string & str, char delim) {
|
||||
static_assert(!std::is_same<T, std::string>::value, "Please use the specialized version for std::string");
|
||||
std::vector<T> values;
|
||||
std::istringstream str_stream(str);
|
||||
std::string token;
|
||||
|
@ -287,9 +460,40 @@ static std::vector<T> string_split(const std::string & str, char delim) {
|
|||
return values;
|
||||
}
|
||||
|
||||
template<>
|
||||
std::vector<std::string> string_split<std::string>(const std::string & input, char separator)
|
||||
{
|
||||
std::vector<std::string> parts;
|
||||
size_t begin_pos = 0;
|
||||
size_t separator_pos = input.find(separator);
|
||||
while (separator_pos != std::string::npos) {
|
||||
std::string part = input.substr(begin_pos, separator_pos - begin_pos);
|
||||
parts.emplace_back(part);
|
||||
begin_pos = separator_pos + 1;
|
||||
separator_pos = input.find(separator, begin_pos);
|
||||
}
|
||||
parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos));
|
||||
return parts;
|
||||
}
|
||||
|
||||
static bool string_starts_with(const std::string & str,
|
||||
const std::string & prefix) { // While we wait for C++20's std::string::starts_with...
|
||||
return str.rfind(prefix, 0) == 0;
|
||||
}
|
||||
|
||||
static bool string_ends_with(const std::string & str,
|
||||
const std::string & suffix) { // While we wait for C++20's std::string::ends_with...
|
||||
return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
|
||||
}
|
||||
|
||||
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
||||
void string_process_escapes(std::string & input);
|
||||
|
||||
std::string string_from(bool value);
|
||||
std::string string_from(const std::vector<int> & values);
|
||||
std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens);
|
||||
std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch);
|
||||
|
||||
//
|
||||
// Filesystem utils
|
||||
//
|
||||
|
@ -304,133 +508,193 @@ std::string fs_get_cache_file(const std::string & filename);
|
|||
// Model utils
|
||||
//
|
||||
|
||||
// TODO: avoid tuplue, use struct
|
||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params);
|
||||
// note: defines object's lifetime
|
||||
struct common_init_result {
|
||||
llama_model_ptr model;
|
||||
llama_context_ptr context;
|
||||
|
||||
struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params);
|
||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
||||
std::vector<llama_adapter_lora_ptr> lora;
|
||||
};
|
||||
|
||||
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, const struct llama_model_params & params);
|
||||
struct llama_model * llama_load_model_from_hf(const char * repo, const char * file, const char * path_model, const struct llama_model_params & params);
|
||||
struct common_init_result common_init_from_params(common_params & params);
|
||||
|
||||
struct llama_model_params common_model_params_to_llama ( common_params & params);
|
||||
struct llama_context_params common_context_params_to_llama(const common_params & params);
|
||||
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
|
||||
|
||||
struct llama_model * common_load_model_from_url(
|
||||
const std::string & model_url,
|
||||
const std::string & local_path,
|
||||
const std::string & hf_token,
|
||||
const struct llama_model_params & params);
|
||||
|
||||
struct llama_model * common_load_model_from_hf(
|
||||
const std::string & repo,
|
||||
const std::string & remote_path,
|
||||
const std::string & local_path,
|
||||
const std::string & hf_token,
|
||||
const struct llama_model_params & params);
|
||||
|
||||
std::pair<std::string, std::string> common_get_hf_file(
|
||||
const std::string & hf_repo_with_tag,
|
||||
const std::string & hf_token);
|
||||
|
||||
// clear LoRA adapters from context, then apply new list of adapters
|
||||
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
|
||||
|
||||
//
|
||||
// Batch utils
|
||||
//
|
||||
|
||||
void llama_batch_clear(struct llama_batch & batch);
|
||||
void common_batch_clear(struct llama_batch & batch);
|
||||
|
||||
void llama_batch_add(
|
||||
void common_batch_add(
|
||||
struct llama_batch & batch,
|
||||
llama_token id,
|
||||
llama_pos pos,
|
||||
const std::vector<llama_seq_id> & seq_ids,
|
||||
bool logits);
|
||||
|
||||
//
|
||||
// Token utils
|
||||
//
|
||||
|
||||
// longest common prefix
|
||||
size_t common_lcp(const llama_tokens & a, const llama_tokens & b);
|
||||
|
||||
// longet common subsequence
|
||||
size_t common_lcs(const llama_tokens & a, const llama_tokens & b);
|
||||
|
||||
//
|
||||
// Vocab utils
|
||||
//
|
||||
|
||||
// tokenizes a string into a vector of tokens
|
||||
// should work similar to Python's `tokenizer.encode`
|
||||
std::vector<llama_token> llama_tokenize(
|
||||
std::vector<llama_token> common_tokenize(
|
||||
const struct llama_context * ctx,
|
||||
const std::string & text,
|
||||
bool add_special,
|
||||
bool parse_special = false);
|
||||
|
||||
std::vector<llama_token> llama_tokenize(
|
||||
const struct llama_model * model,
|
||||
std::vector<llama_token> common_tokenize(
|
||||
const struct llama_vocab * vocab,
|
||||
const std::string & text,
|
||||
bool add_special,
|
||||
bool parse_special = false);
|
||||
|
||||
// tokenizes a token into a piece, optionally renders special/control tokens
|
||||
// should work similar to Python's `tokenizer.id_to_piece`
|
||||
std::string llama_token_to_piece(
|
||||
std::string common_token_to_piece(
|
||||
const struct llama_context * ctx,
|
||||
llama_token token,
|
||||
bool special = true);
|
||||
|
||||
// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
|
||||
// that takes into account the tokenizer type and decides how to handle the leading space
|
||||
//
|
||||
// detokenizes a vector of tokens into a string
|
||||
// should work similar to Python's `tokenizer.decode`
|
||||
// removes the leading space from the first non-BOS token
|
||||
std::string llama_detokenize_spm(
|
||||
llama_context * ctx,
|
||||
const std::vector<llama_token> & tokens);
|
||||
std::string common_token_to_piece(
|
||||
const struct llama_vocab * vocab,
|
||||
llama_token token,
|
||||
bool special = true);
|
||||
|
||||
// detokenizes a vector of tokens into a string
|
||||
// should work similar to Python's `tokenizer.decode`
|
||||
std::string llama_detokenize_bpe(
|
||||
llama_context * ctx,
|
||||
const std::vector<llama_token> & tokens);
|
||||
// optionally renders special/control tokens
|
||||
std::string common_detokenize(
|
||||
const struct llama_context * ctx,
|
||||
const std::vector<llama_token> & tokens,
|
||||
bool special = true);
|
||||
|
||||
// Uses the value from the model metadata if possible, otherwise
|
||||
// defaults to true when model type is SPM, otherwise false.
|
||||
bool llama_should_add_bos_token(const llama_model * model);
|
||||
std::string common_detokenize(
|
||||
const struct llama_vocab * vocab,
|
||||
const std::vector<llama_token> & tokens,
|
||||
bool special = true);
|
||||
|
||||
//
|
||||
// Chat template utils
|
||||
//
|
||||
|
||||
struct common_tool_call {
|
||||
std::string name;
|
||||
std::string arguments;
|
||||
std::string id;
|
||||
};
|
||||
|
||||
// same with llama_chat_message, but uses std::string
|
||||
struct llama_chat_msg {
|
||||
struct common_chat_msg {
|
||||
std::string role;
|
||||
std::string content;
|
||||
std::vector<common_tool_call> tool_calls;
|
||||
std::string tool_plan = "";
|
||||
};
|
||||
|
||||
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
||||
bool llama_chat_verify_template(const std::string & tmpl);
|
||||
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
|
||||
|
||||
namespace minja {
|
||||
class chat_template;
|
||||
}
|
||||
|
||||
typedef minja::chat_template common_chat_template;
|
||||
|
||||
struct common_chat_templates {
|
||||
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
||||
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
||||
std::unique_ptr<common_chat_template> template_tool_use;
|
||||
};
|
||||
|
||||
// CPP wrapper for llama_chat_apply_template
|
||||
// If the built-in template is not supported, we default to chatml
|
||||
// If the custom "tmpl" is not supported, we throw an error
|
||||
std::string llama_chat_apply_template(const struct llama_model * model,
|
||||
const std::string & tmpl,
|
||||
const std::vector<llama_chat_msg> & chat,
|
||||
bool add_ass);
|
||||
std::string common_chat_apply_template(
|
||||
const common_chat_template & tmpl,
|
||||
const std::vector<common_chat_msg> & chat,
|
||||
bool add_ass,
|
||||
bool use_jinja);
|
||||
|
||||
// Format single message, while taking into account the position of that message in chat history
|
||||
std::string llama_chat_format_single(const struct llama_model * model,
|
||||
const std::string & tmpl,
|
||||
const std::vector<llama_chat_msg> & past_msg,
|
||||
const llama_chat_msg & new_msg,
|
||||
bool add_ass);
|
||||
std::string common_chat_format_single(
|
||||
const common_chat_template & tmpl,
|
||||
const std::vector<common_chat_msg> & past_msg,
|
||||
const common_chat_msg & new_msg,
|
||||
bool add_ass,
|
||||
bool use_jinja);
|
||||
|
||||
// Returns an example of formatted chat
|
||||
std::string llama_chat_format_example(const struct llama_model * model,
|
||||
const std::string & tmpl);
|
||||
std::string common_chat_format_example(
|
||||
const common_chat_template & tmpl, bool use_jinja);
|
||||
|
||||
common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override);
|
||||
|
||||
//
|
||||
// KV cache utils
|
||||
//
|
||||
|
||||
// Dump the KV cache view with the number of sequences per cell.
|
||||
void llama_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
||||
void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
||||
|
||||
// Dump the KV cache view showing individual sequences in each cell (long output).
|
||||
void llama_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
||||
void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
||||
|
||||
//
|
||||
// Embedding utils
|
||||
//
|
||||
|
||||
void llama_embd_normalize(const float * inp, float * out, int n, int embd_norm = 2);
|
||||
// TODO: repace embd_norm with an enum
|
||||
void common_embd_normalize(const float * inp, float * out, int n, int embd_norm);
|
||||
|
||||
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
||||
float common_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
||||
|
||||
//
|
||||
// Control vector utils
|
||||
//
|
||||
|
||||
struct llama_control_vector_data {
|
||||
struct common_control_vector_data {
|
||||
int n_embd;
|
||||
|
||||
// stores data for layers [1, n_layer] where n_layer = data.size() / n_embd
|
||||
std::vector<float> data;
|
||||
};
|
||||
|
||||
struct llama_control_vector_load_info {
|
||||
struct common_control_vector_load_info {
|
||||
float strength;
|
||||
|
||||
std::string fname;
|
||||
|
@ -438,25 +702,16 @@ struct llama_control_vector_load_info {
|
|||
|
||||
// Load control vectors, scale each by strength, and add them together.
|
||||
// On error, returns {-1, empty}
|
||||
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos);
|
||||
common_control_vector_data common_control_vector_load(const std::vector<common_control_vector_load_info> & load_infos);
|
||||
|
||||
//
|
||||
// Split utils
|
||||
//
|
||||
|
||||
static const char * const LLM_KV_SPLIT_NO = "split.no";
|
||||
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
||||
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
||||
namespace {
|
||||
|
||||
//
|
||||
// YAML utils
|
||||
//
|
||||
|
||||
void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector<float> & data);
|
||||
void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector<int> & data);
|
||||
void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const char * data);
|
||||
|
||||
void yaml_dump_non_result_info(
|
||||
FILE * stream, const gpt_params & params, const llama_context * lctx,
|
||||
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
|
||||
const char * const LLM_KV_SPLIT_NO = "split.no";
|
||||
const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
||||
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
||||
|
||||
}
|
||||
|
|
|
@ -94,6 +94,9 @@ namespace console {
|
|||
simple_io = true;
|
||||
}
|
||||
}
|
||||
if (simple_io) {
|
||||
_setmode(_fileno(stdin), _O_U8TEXT);
|
||||
}
|
||||
#else
|
||||
// POSIX-specific console initialization
|
||||
if (!simple_io) {
|
||||
|
|
|
@ -1,536 +0,0 @@
|
|||
#include "grammar-parser.h"
|
||||
#include <cstdint>
|
||||
#include <cwchar>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <stdexcept>
|
||||
#include <exception>
|
||||
|
||||
namespace grammar_parser {
|
||||
// NOTE: assumes valid utf8 (but checks for overrun)
|
||||
// copied from llama.cpp
|
||||
static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
|
||||
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
||||
uint8_t first_byte = static_cast<uint8_t>(*src);
|
||||
uint8_t highbits = first_byte >> 4;
|
||||
int len = lookup[highbits];
|
||||
uint8_t mask = (1 << (8 - len)) - 1;
|
||||
uint32_t value = first_byte & mask;
|
||||
const char * end = src + len; // may overrun!
|
||||
const char * pos = src + 1;
|
||||
for ( ; pos < end && *pos; pos++) {
|
||||
value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
|
||||
}
|
||||
return std::make_pair(value, pos);
|
||||
}
|
||||
|
||||
static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||
auto result = state.symbol_ids.emplace(std::string(src, len), next_id);
|
||||
return result.first->second;
|
||||
}
|
||||
|
||||
static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
|
||||
return next_id;
|
||||
}
|
||||
|
||||
static void add_rule(
|
||||
parse_state & state,
|
||||
uint32_t rule_id,
|
||||
const std::vector<llama_grammar_element> & rule) {
|
||||
if (state.rules.size() <= rule_id) {
|
||||
state.rules.resize(rule_id + 1);
|
||||
}
|
||||
state.rules[rule_id] = rule;
|
||||
}
|
||||
|
||||
static bool is_digit_char(char c) {
|
||||
return '0' <= c && c <= '9';
|
||||
}
|
||||
|
||||
static bool is_word_char(char c) {
|
||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || is_digit_char(c);
|
||||
}
|
||||
|
||||
static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
|
||||
const char * pos = src;
|
||||
const char * end = src + size;
|
||||
uint32_t value = 0;
|
||||
for ( ; pos < end && *pos; pos++) {
|
||||
value <<= 4;
|
||||
char c = *pos;
|
||||
if ('a' <= c && c <= 'f') {
|
||||
value += c - 'a' + 10;
|
||||
} else if ('A' <= c && c <= 'F') {
|
||||
value += c - 'A' + 10;
|
||||
} else if ('0' <= c && c <= '9') {
|
||||
value += c - '0';
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pos != end) {
|
||||
throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
|
||||
}
|
||||
return std::make_pair(value, pos);
|
||||
}
|
||||
|
||||
static const char * parse_space(const char * src, bool newline_ok) {
|
||||
const char * pos = src;
|
||||
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
|
||||
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
|
||||
if (*pos == '#') {
|
||||
while (*pos && *pos != '\r' && *pos != '\n') {
|
||||
pos++;
|
||||
}
|
||||
} else {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
static const char * parse_name(const char * src) {
|
||||
const char * pos = src;
|
||||
while (is_word_char(*pos)) {
|
||||
pos++;
|
||||
}
|
||||
if (pos == src) {
|
||||
throw std::runtime_error(std::string("expecting name at ") + src);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
static const char * parse_int(const char * src) {
|
||||
const char * pos = src;
|
||||
while (is_digit_char(*pos)) {
|
||||
pos++;
|
||||
}
|
||||
if (pos == src) {
|
||||
throw std::runtime_error(std::string("expecting integer at ") + src);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
static std::pair<uint32_t, const char *> parse_char(const char * src) {
|
||||
if (*src == '\\') {
|
||||
switch (src[1]) {
|
||||
case 'x': return parse_hex(src + 2, 2);
|
||||
case 'u': return parse_hex(src + 2, 4);
|
||||
case 'U': return parse_hex(src + 2, 8);
|
||||
case 't': return std::make_pair('\t', src + 2);
|
||||
case 'r': return std::make_pair('\r', src + 2);
|
||||
case 'n': return std::make_pair('\n', src + 2);
|
||||
case '\\':
|
||||
case '"':
|
||||
case '[':
|
||||
case ']':
|
||||
return std::make_pair(src[1], src + 2);
|
||||
default:
|
||||
throw std::runtime_error(std::string("unknown escape at ") + src);
|
||||
}
|
||||
} else if (*src) {
|
||||
return decode_utf8(src);
|
||||
}
|
||||
throw std::runtime_error("unexpected end of input");
|
||||
}
|
||||
|
||||
const char * parse_alternates(
|
||||
parse_state & state,
|
||||
const char * src,
|
||||
const std::string & rule_name,
|
||||
uint32_t rule_id,
|
||||
bool is_nested);
|
||||
|
||||
static const char * parse_sequence(
|
||||
parse_state & state,
|
||||
const char * src,
|
||||
const std::string & rule_name,
|
||||
std::vector<llama_grammar_element> & out_elements,
|
||||
bool is_nested) {
|
||||
size_t last_sym_start = out_elements.size();
|
||||
const char * pos = src;
|
||||
|
||||
auto handle_repetitions = [&](int min_times, int max_times) {
|
||||
|
||||
if (last_sym_start == out_elements.size()) {
|
||||
throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
|
||||
}
|
||||
|
||||
// apply transformation to previous symbol (last_sym_start to end) according to
|
||||
// the following rewrite rules:
|
||||
// S{m,n} --> S S S (m times) S'(n-m)
|
||||
// S'(x) ::= S S'(x-1) |
|
||||
// (... n-m definitions of these S' rules ...)
|
||||
// S'(1) ::= S |
|
||||
// S{m,} --> S S S (m times) S'
|
||||
// S' ::= S S' |
|
||||
// S* --> S{0,}
|
||||
// --> S' ::= S S' |
|
||||
// S+ --> S{1,}
|
||||
// --> S S'
|
||||
// S' ::= S S' |
|
||||
// S? --> S{0,1}
|
||||
// --> S'
|
||||
// S' ::= S |
|
||||
|
||||
std::vector<llama_grammar_element> previous_elements(out_elements.begin() + last_sym_start, out_elements.end());
|
||||
if (min_times == 0) {
|
||||
out_elements.resize(last_sym_start);
|
||||
} else {
|
||||
// Repeat the previous elements (min_times - 1) times
|
||||
for (int i = 1; i < min_times; i++) {
|
||||
out_elements.insert(out_elements.end(), previous_elements.begin(), previous_elements.end());
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t last_rec_rule_id = 0;
|
||||
auto n_opt = max_times < 0 ? 1 : max_times - min_times;
|
||||
|
||||
std::vector<llama_grammar_element> rec_rule(previous_elements);
|
||||
for (int i = 0; i < n_opt; i++) {
|
||||
rec_rule.resize(previous_elements.size());
|
||||
uint32_t rec_rule_id = generate_symbol_id(state, rule_name);
|
||||
if (i > 0 || max_times < 0) {
|
||||
rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
|
||||
}
|
||||
rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
|
||||
rec_rule.push_back({LLAMA_GRETYPE_END, 0});
|
||||
add_rule(state, rec_rule_id, rec_rule);
|
||||
last_rec_rule_id = rec_rule_id;
|
||||
}
|
||||
if (n_opt > 0) {
|
||||
out_elements.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
|
||||
}
|
||||
};
|
||||
|
||||
while (*pos) {
|
||||
if (*pos == '"') { // literal string
|
||||
pos++;
|
||||
last_sym_start = out_elements.size();
|
||||
while (*pos != '"') {
|
||||
if (!*pos) {
|
||||
throw std::runtime_error("unexpected end of input");
|
||||
}
|
||||
auto char_pair = parse_char(pos);
|
||||
pos = char_pair.second;
|
||||
out_elements.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
|
||||
}
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else if (*pos == '[') { // char range(s)
|
||||
pos++;
|
||||
enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
|
||||
if (*pos == '^') {
|
||||
pos++;
|
||||
start_type = LLAMA_GRETYPE_CHAR_NOT;
|
||||
}
|
||||
last_sym_start = out_elements.size();
|
||||
while (*pos != ']') {
|
||||
if (!*pos) {
|
||||
throw std::runtime_error("unexpected end of input");
|
||||
}
|
||||
auto char_pair = parse_char(pos);
|
||||
pos = char_pair.second;
|
||||
enum llama_gretype type = last_sym_start < out_elements.size()
|
||||
? LLAMA_GRETYPE_CHAR_ALT
|
||||
: start_type;
|
||||
|
||||
out_elements.push_back({type, char_pair.first});
|
||||
if (pos[0] == '-' && pos[1] != ']') {
|
||||
if (!pos[1]) {
|
||||
throw std::runtime_error("unexpected end of input");
|
||||
}
|
||||
auto endchar_pair = parse_char(pos + 1);
|
||||
pos = endchar_pair.second;
|
||||
out_elements.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
|
||||
}
|
||||
}
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else if (is_word_char(*pos)) { // rule reference
|
||||
const char * name_end = parse_name(pos);
|
||||
uint32_t ref_rule_id = get_symbol_id(state, pos, name_end - pos);
|
||||
pos = parse_space(name_end, is_nested);
|
||||
last_sym_start = out_elements.size();
|
||||
out_elements.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
|
||||
} else if (*pos == '(') { // grouping
|
||||
// parse nested alternates into synthesized rule
|
||||
pos = parse_space(pos + 1, true);
|
||||
uint32_t sub_rule_id = generate_symbol_id(state, rule_name);
|
||||
pos = parse_alternates(state, pos, rule_name, sub_rule_id, true);
|
||||
last_sym_start = out_elements.size();
|
||||
// output reference to synthesized rule
|
||||
out_elements.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
|
||||
if (*pos != ')') {
|
||||
throw std::runtime_error(std::string("expecting ')' at ") + pos);
|
||||
}
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else if (*pos == '.') { // any char
|
||||
last_sym_start = out_elements.size();
|
||||
out_elements.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else if (*pos == '*') {
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
handle_repetitions(0, -1);
|
||||
} else if (*pos == '+') {
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
handle_repetitions(1, -1);
|
||||
} else if (*pos == '?') {
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
handle_repetitions(0, 1);
|
||||
} else if (*pos == '{') {
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
|
||||
if (!is_digit_char(*pos)) {
|
||||
throw std::runtime_error(std::string("expecting an int at ") + pos);
|
||||
}
|
||||
const char * int_end = parse_int(pos);
|
||||
int min_times = std::stoul(std::string(pos, int_end - pos));
|
||||
pos = parse_space(int_end, is_nested);
|
||||
|
||||
int max_times = -1;
|
||||
|
||||
if (*pos == '}') {
|
||||
max_times = min_times;
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else if (*pos == ',') {
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
|
||||
if (is_digit_char(*pos)) {
|
||||
const char * int_end = parse_int(pos);
|
||||
max_times = std::stoul(std::string(pos, int_end - pos));
|
||||
pos = parse_space(int_end, is_nested);
|
||||
}
|
||||
|
||||
if (*pos != '}') {
|
||||
throw std::runtime_error(std::string("expecting '}' at ") + pos);
|
||||
}
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else {
|
||||
throw std::runtime_error(std::string("expecting ',' at ") + pos);
|
||||
}
|
||||
handle_repetitions(min_times, max_times);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
const char * parse_alternates(
|
||||
parse_state & state,
|
||||
const char * src,
|
||||
const std::string & rule_name,
|
||||
uint32_t rule_id,
|
||||
bool is_nested) {
|
||||
std::vector<llama_grammar_element> rule;
|
||||
const char * pos = parse_sequence(state, src, rule_name, rule, is_nested);
|
||||
while (*pos == '|') {
|
||||
rule.push_back({LLAMA_GRETYPE_ALT, 0});
|
||||
pos = parse_space(pos + 1, true);
|
||||
pos = parse_sequence(state, pos, rule_name, rule, is_nested);
|
||||
}
|
||||
rule.push_back({LLAMA_GRETYPE_END, 0});
|
||||
add_rule(state, rule_id, rule);
|
||||
return pos;
|
||||
}
|
||||
|
||||
static const char * parse_rule(parse_state & state, const char * src) {
|
||||
const char * name_end = parse_name(src);
|
||||
const char * pos = parse_space(name_end, false);
|
||||
size_t name_len = name_end - src;
|
||||
uint32_t rule_id = get_symbol_id(state, src, name_len);
|
||||
const std::string name(src, name_len);
|
||||
|
||||
if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
|
||||
throw std::runtime_error(std::string("expecting ::= at ") + pos);
|
||||
}
|
||||
pos = parse_space(pos + 3, true);
|
||||
|
||||
pos = parse_alternates(state, pos, name, rule_id, false);
|
||||
|
||||
if (*pos == '\r') {
|
||||
pos += pos[1] == '\n' ? 2 : 1;
|
||||
} else if (*pos == '\n') {
|
||||
pos++;
|
||||
} else if (*pos) {
|
||||
throw std::runtime_error(std::string("expecting newline or end at ") + pos);
|
||||
}
|
||||
return parse_space(pos, true);
|
||||
}
|
||||
|
||||
parse_state parse(const char * src) {
|
||||
try {
|
||||
parse_state state;
|
||||
const char * pos = parse_space(src, true);
|
||||
while (*pos) {
|
||||
pos = parse_rule(state, pos);
|
||||
}
|
||||
// Validate the state to ensure that all rules are defined
|
||||
for (const auto & rule : state.rules) {
|
||||
for (const auto & elem : rule) {
|
||||
if (elem.type == LLAMA_GRETYPE_RULE_REF) {
|
||||
// Ensure that the rule at that location exists
|
||||
if (elem.value >= state.rules.size() || state.rules[elem.value].empty()) {
|
||||
// Get the name of the rule that is missing
|
||||
for (const auto & kv : state.symbol_ids) {
|
||||
if (kv.second == elem.value) {
|
||||
throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return state;
|
||||
} catch (const std::exception & err) {
|
||||
fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
|
||||
return parse_state();
|
||||
}
|
||||
}
|
||||
|
||||
static void print_grammar_char(FILE * file, uint32_t c) {
|
||||
if (0x20 <= c && c <= 0x7f) {
|
||||
fprintf(file, "%c", static_cast<char>(c));
|
||||
} else {
|
||||
// cop out of encoding UTF-8
|
||||
fprintf(file, "<U+%04X>", c);
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_char_element(llama_grammar_element elem) {
|
||||
switch (elem.type) {
|
||||
case LLAMA_GRETYPE_CHAR: return true;
|
||||
case LLAMA_GRETYPE_CHAR_NOT: return true;
|
||||
case LLAMA_GRETYPE_CHAR_ALT: return true;
|
||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
|
||||
case LLAMA_GRETYPE_CHAR_ANY: return true;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
|
||||
for (auto elem : rule) {
|
||||
switch (elem.type) {
|
||||
case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
|
||||
case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break;
|
||||
case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
|
||||
case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
|
||||
case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
|
||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
|
||||
case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
|
||||
case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
|
||||
}
|
||||
switch (elem.type) {
|
||||
case LLAMA_GRETYPE_END:
|
||||
case LLAMA_GRETYPE_ALT:
|
||||
case LLAMA_GRETYPE_RULE_REF:
|
||||
fprintf(file, "(%u) ", elem.value);
|
||||
break;
|
||||
case LLAMA_GRETYPE_CHAR:
|
||||
case LLAMA_GRETYPE_CHAR_NOT:
|
||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
|
||||
case LLAMA_GRETYPE_CHAR_ALT:
|
||||
case LLAMA_GRETYPE_CHAR_ANY:
|
||||
fprintf(file, "(\"");
|
||||
print_grammar_char(file, elem.value);
|
||||
fprintf(file, "\") ");
|
||||
break;
|
||||
}
|
||||
}
|
||||
fprintf(file, "\n");
|
||||
}
|
||||
|
||||
static void print_rule(
|
||||
FILE * file,
|
||||
uint32_t rule_id,
|
||||
const std::vector<llama_grammar_element> & rule,
|
||||
const std::map<uint32_t, std::string> & symbol_id_names) {
|
||||
if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) {
|
||||
throw std::runtime_error(
|
||||
"malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id));
|
||||
}
|
||||
fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
|
||||
for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
|
||||
llama_grammar_element elem = rule[i];
|
||||
switch (elem.type) {
|
||||
case LLAMA_GRETYPE_END:
|
||||
throw std::runtime_error(
|
||||
"unexpected end of rule: " + std::to_string(rule_id) + "," +
|
||||
std::to_string(i));
|
||||
case LLAMA_GRETYPE_ALT:
|
||||
fprintf(file, "| ");
|
||||
break;
|
||||
case LLAMA_GRETYPE_RULE_REF:
|
||||
fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
|
||||
break;
|
||||
case LLAMA_GRETYPE_CHAR:
|
||||
fprintf(file, "[");
|
||||
print_grammar_char(file, elem.value);
|
||||
break;
|
||||
case LLAMA_GRETYPE_CHAR_NOT:
|
||||
fprintf(file, "[^");
|
||||
print_grammar_char(file, elem.value);
|
||||
break;
|
||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
|
||||
if (i == 0 || !is_char_element(rule[i - 1])) {
|
||||
throw std::runtime_error(
|
||||
"LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
|
||||
std::to_string(rule_id) + "," + std::to_string(i));
|
||||
}
|
||||
fprintf(file, "-");
|
||||
print_grammar_char(file, elem.value);
|
||||
break;
|
||||
case LLAMA_GRETYPE_CHAR_ALT:
|
||||
if (i == 0 || !is_char_element(rule[i - 1])) {
|
||||
throw std::runtime_error(
|
||||
"LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
|
||||
std::to_string(rule_id) + "," + std::to_string(i));
|
||||
}
|
||||
print_grammar_char(file, elem.value);
|
||||
break;
|
||||
case LLAMA_GRETYPE_CHAR_ANY:
|
||||
fprintf(file, ".");
|
||||
break;
|
||||
}
|
||||
if (is_char_element(elem)) {
|
||||
switch (rule[i + 1].type) {
|
||||
case LLAMA_GRETYPE_CHAR_ALT:
|
||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
|
||||
case LLAMA_GRETYPE_CHAR_ANY:
|
||||
break;
|
||||
default:
|
||||
fprintf(file, "] ");
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(file, "\n");
|
||||
}
|
||||
|
||||
void print_grammar(FILE * file, const parse_state & state) {
|
||||
try {
|
||||
std::map<uint32_t, std::string> symbol_id_names;
|
||||
for (const auto & kv : state.symbol_ids) {
|
||||
symbol_id_names[kv.second] = kv.first;
|
||||
}
|
||||
for (size_t i = 0, end = state.rules.size(); i < end; i++) {
|
||||
// fprintf(file, "%zu: ", i);
|
||||
// print_rule_binary(file, state.rules[i]);
|
||||
print_rule(file, uint32_t(i), state.rules[i], symbol_id_names);
|
||||
// fprintf(file, "\n");
|
||||
}
|
||||
} catch (const std::exception & err) {
|
||||
fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const llama_grammar_element *> parse_state::c_rules() {
|
||||
std::vector<const llama_grammar_element *> ret;
|
||||
ret.reserve(rules.size());
|
||||
for (const auto & rule : rules) {
|
||||
ret.push_back(rule.data());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
// Implements a parser for an extended Backus-Naur form (BNF), producing the
|
||||
// binary context-free grammar format specified by llama.h. Supports character
|
||||
// ranges, grouping, and repetition operators. As an example, a grammar for
|
||||
// arithmetic might look like:
|
||||
//
|
||||
// root ::= expr
|
||||
// expr ::= term ([-+*/] term)*
|
||||
// term ::= num | "(" space expr ")" space
|
||||
// num ::= [0-9]+ space
|
||||
// space ::= [ \t\n]*
|
||||
|
||||
#pragma once
|
||||
#include "llama.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace grammar_parser {
|
||||
struct parse_state {
|
||||
std::map<std::string, uint32_t> symbol_ids;
|
||||
std::vector<std::vector<llama_grammar_element>> rules;
|
||||
|
||||
std::vector<const llama_grammar_element *> c_rules();
|
||||
};
|
||||
|
||||
parse_state parse(const char * src);
|
||||
void print_grammar(FILE * file, const parse_state & state);
|
||||
}
|
|
@ -1,4 +1,6 @@
|
|||
#include "json-schema-to-grammar.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
|
@ -11,11 +13,6 @@
|
|||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
template <typename Iterator>
|
||||
static std::string join(Iterator begin, Iterator end, const std::string & separator);
|
||||
|
||||
static std::string repeat(const std::string & str, size_t n);
|
||||
|
||||
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
||||
auto has_max = max_items != std::numeric_limits<int>::max();
|
||||
|
||||
|
@ -128,8 +125,8 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
|
|||
if (sub_len > 0) {
|
||||
auto from_sub = from.substr(i + 1);
|
||||
auto to_sub = to.substr(i + 1);
|
||||
auto sub_zeros = repeat("0", sub_len);
|
||||
auto sub_nines = repeat("9", sub_len);
|
||||
auto sub_zeros = string_repeat("0", sub_len);
|
||||
auto sub_nines = string_repeat("9", sub_len);
|
||||
|
||||
auto to_reached = false;
|
||||
out << "(";
|
||||
|
@ -188,8 +185,8 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
|
|||
auto max_digits = max_s.length();
|
||||
|
||||
for (auto digits = min_digits; digits < max_digits; digits++) {
|
||||
uniform_range(min_s, repeat("9", digits));
|
||||
min_s = "1" + repeat("0", digits);
|
||||
uniform_range(min_s, string_repeat("9", digits));
|
||||
min_s = "1" + string_repeat("0", digits);
|
||||
out << " | ";
|
||||
}
|
||||
uniform_range(min_s, max_s);
|
||||
|
@ -318,49 +315,6 @@ std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
|
|||
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
||||
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
||||
|
||||
template <typename Iterator>
|
||||
std::string join(Iterator begin, Iterator end, const std::string & separator) {
|
||||
std::ostringstream result;
|
||||
if (begin != end) {
|
||||
result << *begin;
|
||||
for (Iterator it = begin + 1; it != end; ++it) {
|
||||
result << separator << *it;
|
||||
}
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
static std::vector<std::string> split(const std::string & str, const std::string & delimiter) {
|
||||
std::vector<std::string> tokens;
|
||||
size_t start = 0;
|
||||
size_t end = str.find(delimiter);
|
||||
|
||||
while (end != std::string::npos) {
|
||||
tokens.push_back(str.substr(start, end - start));
|
||||
start = end + delimiter.length();
|
||||
end = str.find(delimiter, start);
|
||||
}
|
||||
|
||||
tokens.push_back(str.substr(start));
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
static std::string repeat(const std::string & str, size_t n) {
|
||||
if (n == 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
result.reserve(str.length() * n);
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
result += str;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
|
||||
std::smatch match;
|
||||
std::string result;
|
||||
|
@ -389,6 +343,7 @@ static std::string format_literal(const std::string & literal) {
|
|||
|
||||
class SchemaConverter {
|
||||
private:
|
||||
friend std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options);
|
||||
std::function<json(const std::string &)> _fetch_json;
|
||||
bool _dotall;
|
||||
std::map<std::string, std::string> _rules;
|
||||
|
@ -418,7 +373,7 @@ private:
|
|||
for (size_t i = 0; i < alt_schemas.size(); i++) {
|
||||
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
|
||||
}
|
||||
return join(rules.begin(), rules.end(), " | ");
|
||||
return string_join(rules, " | ");
|
||||
}
|
||||
|
||||
std::string _visit_pattern(const std::string & pattern, const std::string & name) {
|
||||
|
@ -481,7 +436,7 @@ private:
|
|||
for (const auto & item : ret) {
|
||||
results.push_back(to_rule(item));
|
||||
}
|
||||
return std::make_pair(join(results.begin(), results.end(), " "), false);
|
||||
return std::make_pair(string_join(results, " "), false);
|
||||
};
|
||||
|
||||
while (i < length) {
|
||||
|
@ -539,7 +494,7 @@ private:
|
|||
}
|
||||
curly_brackets += '}';
|
||||
i++;
|
||||
auto nums = split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
|
||||
auto nums = string_split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
|
||||
int min_times = 0;
|
||||
int max_times = std::numeric_limits<int>::max();
|
||||
try {
|
||||
|
@ -611,7 +566,7 @@ private:
|
|||
}
|
||||
return join_seq();
|
||||
};
|
||||
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
|
||||
return _add_rule(name, "\"\\\"\" (" + to_rule(transform()) + ") \"\\\"\" space");
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -809,10 +764,11 @@ private:
|
|||
public:
|
||||
SchemaConverter(
|
||||
const std::function<json(const std::string &)> & fetch_json,
|
||||
bool dotall)
|
||||
bool dotall,
|
||||
bool compact_spaces)
|
||||
: _fetch_json(fetch_json), _dotall(dotall)
|
||||
{
|
||||
_rules["space"] = SPACE_RULE;
|
||||
_rules["space"] = compact_spaces ? "\" \"?" : SPACE_RULE;
|
||||
}
|
||||
|
||||
void resolve_refs(json & schema, const std::string & url) {
|
||||
|
@ -854,7 +810,7 @@ public:
|
|||
return;
|
||||
}
|
||||
std::string pointer = ref.substr(ref.find('#') + 1);
|
||||
std::vector<std::string> tokens = split(pointer, "/");
|
||||
std::vector<std::string> tokens = string_split(pointer, "/");
|
||||
for (size_t i = 1; i < tokens.size(); ++i) {
|
||||
std::string sel = tokens[i];
|
||||
if (target.is_null() || !target.contains(sel)) {
|
||||
|
@ -905,7 +861,7 @@ public:
|
|||
for (const auto & v : schema["enum"]) {
|
||||
enum_values.push_back(_generate_constant_rule(v));
|
||||
}
|
||||
return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
|
||||
return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space");
|
||||
} else if ((schema_type.is_null() || schema_type == "object")
|
||||
&& (schema.contains("properties") ||
|
||||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
||||
|
@ -1019,10 +975,10 @@ public:
|
|||
|
||||
void check_errors() {
|
||||
if (!_errors.empty()) {
|
||||
throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n"));
|
||||
throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
|
||||
}
|
||||
if (!_warnings.empty()) {
|
||||
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", join(_warnings.begin(), _warnings.end(), "; ").c_str());
|
||||
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1035,11 +991,35 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
std::string json_schema_to_grammar(const json & schema) {
|
||||
SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false);
|
||||
std::string json_schema_to_grammar(const json & schema, bool force_gbnf) {
|
||||
#ifdef LLAMA_USE_LLGUIDANCE
|
||||
if (!force_gbnf) {
|
||||
return "%llguidance {}\nstart: %json " + schema.dump();
|
||||
}
|
||||
#else
|
||||
(void)force_gbnf;
|
||||
#endif // LLAMA_USE_LLGUIDANCE
|
||||
return build_grammar([&](const common_grammar_builder & callbacks) {
|
||||
auto copy = schema;
|
||||
converter.resolve_refs(copy, "input");
|
||||
converter.visit(copy, "");
|
||||
callbacks.resolve_refs(copy);
|
||||
callbacks.add_schema("", copy);
|
||||
});
|
||||
}
|
||||
|
||||
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options) {
|
||||
SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall, options.compact_spaces);
|
||||
common_grammar_builder builder {
|
||||
/* .add_rule = */ [&](const std::string & name, const std::string & rule) {
|
||||
return converter._add_rule(name, rule);
|
||||
},
|
||||
/* .add_schema = */ [&](const std::string & name, const nlohmann::ordered_json & schema) {
|
||||
return converter.visit(schema, name == "root" ? "" : name);
|
||||
},
|
||||
/* .resolve_refs = */ [&](nlohmann::ordered_json & schema) {
|
||||
converter.resolve_refs(schema, "");
|
||||
}
|
||||
};
|
||||
cb(builder);
|
||||
converter.check_errors();
|
||||
return converter.format_grammar();
|
||||
}
|
||||
|
|
|
@ -5,4 +5,18 @@
|
|||
#define JSON_ASSERT GGML_ASSERT
|
||||
#include "json.hpp"
|
||||
|
||||
std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);
|
||||
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
|
||||
bool force_gbnf = false);
|
||||
|
||||
struct common_grammar_builder {
|
||||
std::function<std::string(const std::string &, const std::string &)> add_rule;
|
||||
std::function<std::string(const std::string &, const nlohmann::ordered_json &)> add_schema;
|
||||
std::function<void(nlohmann::ordered_json &)> resolve_refs;
|
||||
};
|
||||
|
||||
struct common_grammar_options {
|
||||
bool dotall = false;
|
||||
bool compact_spaces = false;
|
||||
};
|
||||
|
||||
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options = {});
|
||||
|
|
270
common/llguidance.cpp
Normal file
270
common/llguidance.cpp
Normal file
|
@ -0,0 +1,270 @@
|
|||
#include "sampling.h"
|
||||
#include "log.h"
|
||||
|
||||
#ifdef LLAMA_USE_LLGUIDANCE
|
||||
|
||||
# include "llguidance.h"
|
||||
# include <cmath>
|
||||
|
||||
struct llama_sampler_llg {
|
||||
const llama_vocab * vocab;
|
||||
std::string grammar_kind;
|
||||
std::string grammar_data;
|
||||
LlgTokenizer * tokenizer;
|
||||
LlgConstraint * grammar;
|
||||
LlgMaskResult llg_res;
|
||||
bool has_llg_res;
|
||||
};
|
||||
|
||||
static LlgConstraint * llama_sampler_llg_new(LlgTokenizer * tokenizer, const char * grammar_kind,
|
||||
const char * grammar_data) {
|
||||
LlgConstraintInit cinit;
|
||||
llg_constraint_init_set_defaults(&cinit, tokenizer);
|
||||
const char * log_level = getenv("LLGUIDANCE_LOG_LEVEL");
|
||||
if (log_level && *log_level) {
|
||||
cinit.log_stderr_level = atoi(log_level);
|
||||
}
|
||||
auto c = llg_new_constraint_any(&cinit, grammar_kind, grammar_data);
|
||||
if (llg_get_error(c)) {
|
||||
LOG_ERR("llg error: %s\n", llg_get_error(c));
|
||||
llg_free_constraint(c);
|
||||
return nullptr;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
static const char * llama_sampler_llg_name(const llama_sampler * /*smpl*/) {
|
||||
return "llguidance";
|
||||
}
|
||||
|
||||
static void llama_sampler_llg_accept_impl(llama_sampler * smpl, llama_token token) {
|
||||
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
||||
if (ctx->grammar) {
|
||||
LlgCommitResult res;
|
||||
llg_commit_token(ctx->grammar, token, &res);
|
||||
ctx->has_llg_res = false;
|
||||
}
|
||||
}
|
||||
|
||||
static void llama_sampler_llg_apply(llama_sampler * smpl, llama_token_data_array * cur_p) {
|
||||
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
||||
if (ctx->grammar) {
|
||||
if (!ctx->has_llg_res) {
|
||||
if (llg_compute_mask(ctx->grammar, &ctx->llg_res) == 0) {
|
||||
ctx->has_llg_res = true;
|
||||
} else {
|
||||
LOG_ERR("llg error: %s\n", llg_get_error(ctx->grammar));
|
||||
llg_free_constraint(ctx->grammar);
|
||||
ctx->grammar = nullptr;
|
||||
}
|
||||
}
|
||||
if (ctx->has_llg_res) {
|
||||
if (ctx->llg_res.is_stop) {
|
||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||
if (!llama_vocab_is_eog(ctx->vocab, cur_p->data[i].id)) {
|
||||
cur_p->data[i].logit = -INFINITY;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const uint32_t * mask = ctx->llg_res.sample_mask;
|
||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||
auto token = cur_p->data[i].id;
|
||||
if ((mask[token / 32] & (1 << (token % 32))) == 0) {
|
||||
cur_p->data[i].logit = -INFINITY;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void llama_sampler_llg_reset(llama_sampler * smpl) {
|
||||
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
||||
if (!ctx->grammar) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto * grammar_new = llama_sampler_llg_new(ctx->tokenizer, ctx->grammar_kind.c_str(), ctx->grammar_data.c_str());
|
||||
llg_free_constraint(ctx->grammar);
|
||||
ctx->grammar = grammar_new;
|
||||
ctx->has_llg_res = false;
|
||||
}
|
||||
|
||||
static llama_sampler * llama_sampler_llg_clone(const llama_sampler * smpl) {
|
||||
const auto * ctx = (const llama_sampler_llg *) smpl->ctx;
|
||||
|
||||
auto * result = llama_sampler_init_llg(ctx->vocab, nullptr, nullptr);
|
||||
|
||||
// copy the state
|
||||
{
|
||||
auto * result_ctx = (llama_sampler_llg *) result->ctx;
|
||||
|
||||
if (ctx->grammar) {
|
||||
result_ctx->grammar_kind = ctx->grammar_kind;
|
||||
result_ctx->grammar_data = ctx->grammar_data;
|
||||
result_ctx->grammar = llg_clone_constraint(ctx->grammar);
|
||||
result_ctx->tokenizer = llg_clone_tokenizer(ctx->tokenizer);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void llama_sampler_llg_free(llama_sampler * smpl) {
|
||||
const auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
||||
|
||||
if (ctx->grammar) {
|
||||
llg_free_constraint(ctx->grammar);
|
||||
llg_free_tokenizer(ctx->tokenizer);
|
||||
}
|
||||
|
||||
delete ctx;
|
||||
}
|
||||
|
||||
static llama_sampler_i llama_sampler_llg_i = {
|
||||
/* .name = */ llama_sampler_llg_name,
|
||||
/* .accept = */ llama_sampler_llg_accept_impl,
|
||||
/* .apply = */ llama_sampler_llg_apply,
|
||||
/* .reset = */ llama_sampler_llg_reset,
|
||||
/* .clone = */ llama_sampler_llg_clone,
|
||||
/* .free = */ llama_sampler_llg_free,
|
||||
};
|
||||
|
||||
static size_t llama_sampler_llg_tokenize_fn(const void * user_data, const uint8_t * bytes, size_t bytes_len,
|
||||
uint32_t * output_tokens, size_t output_tokens_len) {
|
||||
const llama_vocab * vocab = (const llama_vocab *) user_data;
|
||||
int r = 0;
|
||||
try {
|
||||
r = llama_tokenize(vocab, (const char *) bytes, bytes_len, (int32_t *) output_tokens, output_tokens_len, false,
|
||||
true);
|
||||
} catch (const std::exception & e) {
|
||||
GGML_ABORT("llama_tokenize failed: %s\n", e.what());
|
||||
}
|
||||
if (r < 0) {
|
||||
return -r;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static LlgTokenizer * llama_sampler_llg_new_tokenizer(const llama_vocab * vocab) {
|
||||
// TODO store the tokenizer in the vocab somehow
|
||||
static const llama_vocab * vocab_cache;
|
||||
static LlgTokenizer * tokenizer_cache;
|
||||
|
||||
if (vocab_cache == vocab) {
|
||||
return llg_clone_tokenizer(tokenizer_cache);
|
||||
}
|
||||
|
||||
auto tok_eos = llama_vocab_eot(vocab);
|
||||
if (tok_eos == LLAMA_TOKEN_NULL) {
|
||||
tok_eos = llama_vocab_eos(vocab);
|
||||
}
|
||||
|
||||
size_t vocab_size = llama_vocab_n_tokens(vocab);
|
||||
|
||||
auto token_lens = new uint32_t[vocab_size];
|
||||
// we typically have ~7 bytes per token; let's go on the safe side here
|
||||
auto token_bytes_size = vocab_size * 16 + 1024 * 1024;
|
||||
auto token_bytes = new uint8_t[token_bytes_size];
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < vocab_size; i++) {
|
||||
size_t max_token = 1024;
|
||||
if (token_bytes_size - offset < max_token) {
|
||||
GGML_ABORT("token_bytes buffer too small\n");
|
||||
}
|
||||
|
||||
llama_token token = i;
|
||||
auto dp = (char *) token_bytes + offset;
|
||||
auto size = llama_detokenize(vocab, &token, 1, dp, max_token, false, false);
|
||||
if (size < 0) {
|
||||
GGML_ABORT("llama_detokenize failed\n");
|
||||
}
|
||||
if (size == 0) {
|
||||
size = llama_detokenize(vocab, &token, 1, dp + 1, max_token - 1, false, true);
|
||||
if (size < 0) {
|
||||
GGML_ABORT("llama_detokenize failed\n");
|
||||
}
|
||||
if (size != 0) {
|
||||
*dp = '\xff'; // special token prefix marker
|
||||
size += 1;
|
||||
}
|
||||
}
|
||||
|
||||
token_lens[i] = size;
|
||||
offset += size;
|
||||
}
|
||||
|
||||
LlgTokenizerInit tinit = {
|
||||
/* .vocab_size = */ (uint32_t) vocab_size,
|
||||
/* .tok_eos = */ (uint32_t) tok_eos,
|
||||
/* .token_lens = */ token_lens,
|
||||
/* .token_bytes = */ token_bytes,
|
||||
/* .tokenizer_json = */ nullptr,
|
||||
/* .tokenize_assumes_string = */ true,
|
||||
/* .tokenize_fn = */ llama_sampler_llg_tokenize_fn,
|
||||
/* .use_approximate_greedy_tokenize_fn = */ false,
|
||||
/* .tokenize_user_data = */ vocab,
|
||||
};
|
||||
|
||||
char error_buffer[1024];
|
||||
LlgTokenizer * tokenizer = llg_new_tokenizer(&tinit, error_buffer, sizeof(error_buffer));
|
||||
|
||||
delete[] token_bytes;
|
||||
delete[] token_lens;
|
||||
|
||||
if (tokenizer == nullptr) {
|
||||
LOG_ERR("llg tokenizer error: %s\n", error_buffer);
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
if (tokenizer_cache) {
|
||||
llg_free_tokenizer(tokenizer_cache);
|
||||
}
|
||||
vocab_cache = vocab;
|
||||
tokenizer_cache = tokenizer;
|
||||
|
||||
return llg_clone_tokenizer(tokenizer_cache);
|
||||
}
|
||||
|
||||
llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * grammar_kind,
|
||||
const char * grammar_data) {
|
||||
auto * ctx = new llama_sampler_llg;
|
||||
|
||||
if (grammar_kind != nullptr && grammar_kind[0] != '\0') {
|
||||
auto tokenizer = llama_sampler_llg_new_tokenizer(vocab);
|
||||
*ctx = {
|
||||
/* .vocab = */ vocab,
|
||||
/* .grammar_kind = */ grammar_kind,
|
||||
/* .grammar_data = */ grammar_data,
|
||||
/* .tokenizer = */ tokenizer,
|
||||
/* .grammar = */ llama_sampler_llg_new(tokenizer, grammar_kind, grammar_data),
|
||||
/* .llg_res = */ {},
|
||||
/* .has_llg_res = */ false,
|
||||
};
|
||||
} else {
|
||||
*ctx = {
|
||||
/* .vocab = */ vocab,
|
||||
/* .grammar_kind = */ {},
|
||||
/* .grammar_data = */ {},
|
||||
/* .tokenizer = */ nullptr,
|
||||
/* .grammar = */ nullptr,
|
||||
/* .llg_res = */ {},
|
||||
/* .has_llg_res = */ false,
|
||||
};
|
||||
}
|
||||
|
||||
return llama_sampler_init(
|
||||
/* .iface = */ &llama_sampler_llg_i,
|
||||
/* .ctx = */ ctx
|
||||
);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
llama_sampler * llama_sampler_init_llg(const llama_vocab *, const char *, const char *) {
|
||||
LOG_WRN("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif // LLAMA_USE_LLGUIDANCE
|
392
common/log.cpp
Normal file
392
common/log.cpp
Normal file
|
@ -0,0 +1,392 @@
|
|||
#include "log.h"
|
||||
|
||||
#include <condition_variable>
|
||||
#include <cstdarg>
|
||||
#include <cstdio>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
int common_log_verbosity_thold = LOG_DEFAULT_LLAMA;
|
||||
|
||||
void common_log_set_verbosity_thold(int verbosity) {
|
||||
common_log_verbosity_thold = verbosity;
|
||||
}
|
||||
|
||||
static int64_t t_us() {
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
// colors
|
||||
enum common_log_col : int {
|
||||
COMMON_LOG_COL_DEFAULT = 0,
|
||||
COMMON_LOG_COL_BOLD,
|
||||
COMMON_LOG_COL_RED,
|
||||
COMMON_LOG_COL_GREEN,
|
||||
COMMON_LOG_COL_YELLOW,
|
||||
COMMON_LOG_COL_BLUE,
|
||||
COMMON_LOG_COL_MAGENTA,
|
||||
COMMON_LOG_COL_CYAN,
|
||||
COMMON_LOG_COL_WHITE,
|
||||
};
|
||||
|
||||
// disable colors by default
|
||||
static std::vector<const char *> g_col = {
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
|
||||
struct common_log_entry {
|
||||
enum ggml_log_level level;
|
||||
|
||||
bool prefix;
|
||||
|
||||
int64_t timestamp;
|
||||
|
||||
std::vector<char> msg;
|
||||
|
||||
// signals the worker thread to stop
|
||||
bool is_end;
|
||||
|
||||
void print(FILE * file = nullptr) const {
|
||||
FILE * fcur = file;
|
||||
if (!fcur) {
|
||||
// stderr displays DBG messages only when their verbosity level is not higher than the threshold
|
||||
// these messages will still be logged to a file
|
||||
if (level == GGML_LOG_LEVEL_DEBUG && common_log_verbosity_thold < LOG_DEFAULT_DEBUG) {
|
||||
return;
|
||||
}
|
||||
|
||||
fcur = stdout;
|
||||
|
||||
if (level != GGML_LOG_LEVEL_NONE) {
|
||||
fcur = stderr;
|
||||
}
|
||||
}
|
||||
|
||||
if (level != GGML_LOG_LEVEL_NONE && level != GGML_LOG_LEVEL_CONT && prefix) {
|
||||
if (timestamp) {
|
||||
// [M.s.ms.us]
|
||||
fprintf(fcur, "%s%d.%02d.%03d.%03d%s ",
|
||||
g_col[COMMON_LOG_COL_BLUE],
|
||||
(int) (timestamp / 1000000 / 60),
|
||||
(int) (timestamp / 1000000 % 60),
|
||||
(int) (timestamp / 1000 % 1000),
|
||||
(int) (timestamp % 1000),
|
||||
g_col[COMMON_LOG_COL_DEFAULT]);
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_INFO: fprintf(fcur, "%sI %s", g_col[COMMON_LOG_COL_GREEN], g_col[COMMON_LOG_COL_DEFAULT]); break;
|
||||
case GGML_LOG_LEVEL_WARN: fprintf(fcur, "%sW %s", g_col[COMMON_LOG_COL_MAGENTA], "" ); break;
|
||||
case GGML_LOG_LEVEL_ERROR: fprintf(fcur, "%sE %s", g_col[COMMON_LOG_COL_RED], "" ); break;
|
||||
case GGML_LOG_LEVEL_DEBUG: fprintf(fcur, "%sD %s", g_col[COMMON_LOG_COL_YELLOW], "" ); break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(fcur, "%s", msg.data());
|
||||
|
||||
if (level == GGML_LOG_LEVEL_WARN || level == GGML_LOG_LEVEL_ERROR || level == GGML_LOG_LEVEL_DEBUG) {
|
||||
fprintf(fcur, "%s", g_col[COMMON_LOG_COL_DEFAULT]);
|
||||
}
|
||||
|
||||
fflush(fcur);
|
||||
}
|
||||
};
|
||||
|
||||
struct common_log {
|
||||
// default capacity - will be expanded if needed
|
||||
common_log() : common_log(256) {}
|
||||
|
||||
common_log(size_t capacity) {
|
||||
file = nullptr;
|
||||
prefix = false;
|
||||
timestamps = false;
|
||||
running = false;
|
||||
t_start = t_us();
|
||||
|
||||
// initial message size - will be expanded if longer messages arrive
|
||||
entries.resize(capacity);
|
||||
for (auto & entry : entries) {
|
||||
entry.msg.resize(256);
|
||||
}
|
||||
|
||||
head = 0;
|
||||
tail = 0;
|
||||
|
||||
resume();
|
||||
}
|
||||
|
||||
~common_log() {
|
||||
pause();
|
||||
if (file) {
|
||||
fclose(file);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mtx;
|
||||
std::thread thrd;
|
||||
std::condition_variable cv;
|
||||
|
||||
FILE * file;
|
||||
|
||||
bool prefix;
|
||||
bool timestamps;
|
||||
bool running;
|
||||
|
||||
int64_t t_start;
|
||||
|
||||
// ring buffer of entries
|
||||
std::vector<common_log_entry> entries;
|
||||
size_t head;
|
||||
size_t tail;
|
||||
|
||||
// worker thread copies into this
|
||||
common_log_entry cur;
|
||||
|
||||
public:
|
||||
void add(enum ggml_log_level level, const char * fmt, va_list args) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
if (!running) {
|
||||
// discard messages while the worker thread is paused
|
||||
return;
|
||||
}
|
||||
|
||||
auto & entry = entries[tail];
|
||||
|
||||
{
|
||||
// cannot use args twice, so make a copy in case we need to expand the buffer
|
||||
va_list args_copy;
|
||||
va_copy(args_copy, args);
|
||||
|
||||
#if 1
|
||||
const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args);
|
||||
if (n >= entry.msg.size()) {
|
||||
entry.msg.resize(n + 1);
|
||||
vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args_copy);
|
||||
}
|
||||
#else
|
||||
// hack for bolding arguments
|
||||
|
||||
std::stringstream ss;
|
||||
for (int i = 0; fmt[i] != 0; i++) {
|
||||
if (fmt[i] == '%') {
|
||||
ss << LOG_COL_BOLD;
|
||||
while (fmt[i] != ' ' && fmt[i] != ')' && fmt[i] != ']' && fmt[i] != 0) ss << fmt[i++];
|
||||
ss << LOG_COL_DEFAULT;
|
||||
if (fmt[i] == 0) break;
|
||||
}
|
||||
ss << fmt[i];
|
||||
}
|
||||
const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args);
|
||||
if (n >= entry.msg.size()) {
|
||||
entry.msg.resize(n + 1);
|
||||
vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args_copy);
|
||||
}
|
||||
#endif
|
||||
va_end(args_copy);
|
||||
}
|
||||
|
||||
entry.level = level;
|
||||
entry.prefix = prefix;
|
||||
entry.timestamp = 0;
|
||||
if (timestamps) {
|
||||
entry.timestamp = t_us() - t_start;
|
||||
}
|
||||
entry.is_end = false;
|
||||
|
||||
tail = (tail + 1) % entries.size();
|
||||
if (tail == head) {
|
||||
// expand the buffer
|
||||
std::vector<common_log_entry> new_entries(2*entries.size());
|
||||
|
||||
size_t new_tail = 0;
|
||||
|
||||
do {
|
||||
new_entries[new_tail] = std::move(entries[head]);
|
||||
|
||||
head = (head + 1) % entries.size();
|
||||
new_tail = (new_tail + 1);
|
||||
} while (head != tail);
|
||||
|
||||
head = 0;
|
||||
tail = new_tail;
|
||||
|
||||
for (size_t i = tail; i < new_entries.size(); i++) {
|
||||
new_entries[i].msg.resize(256);
|
||||
}
|
||||
|
||||
entries = std::move(new_entries);
|
||||
}
|
||||
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
void resume() {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
if (running) {
|
||||
return;
|
||||
}
|
||||
|
||||
running = true;
|
||||
|
||||
thrd = std::thread([this]() {
|
||||
while (true) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
cv.wait(lock, [this]() { return head != tail; });
|
||||
|
||||
cur = entries[head];
|
||||
|
||||
head = (head + 1) % entries.size();
|
||||
}
|
||||
|
||||
if (cur.is_end) {
|
||||
break;
|
||||
}
|
||||
|
||||
cur.print(); // stdout and stderr
|
||||
|
||||
if (file) {
|
||||
cur.print(file);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void pause() {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
if (!running) {
|
||||
return;
|
||||
}
|
||||
|
||||
running = false;
|
||||
|
||||
// push an entry to signal the worker thread to stop
|
||||
{
|
||||
auto & entry = entries[tail];
|
||||
entry.is_end = true;
|
||||
|
||||
tail = (tail + 1) % entries.size();
|
||||
}
|
||||
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
thrd.join();
|
||||
}
|
||||
|
||||
void set_file(const char * path) {
|
||||
pause();
|
||||
|
||||
if (file) {
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
if (path) {
|
||||
file = fopen(path, "w");
|
||||
} else {
|
||||
file = nullptr;
|
||||
}
|
||||
|
||||
resume();
|
||||
}
|
||||
|
||||
void set_colors(bool colors) {
|
||||
pause();
|
||||
|
||||
if (colors) {
|
||||
g_col[COMMON_LOG_COL_DEFAULT] = LOG_COL_DEFAULT;
|
||||
g_col[COMMON_LOG_COL_BOLD] = LOG_COL_BOLD;
|
||||
g_col[COMMON_LOG_COL_RED] = LOG_COL_RED;
|
||||
g_col[COMMON_LOG_COL_GREEN] = LOG_COL_GREEN;
|
||||
g_col[COMMON_LOG_COL_YELLOW] = LOG_COL_YELLOW;
|
||||
g_col[COMMON_LOG_COL_BLUE] = LOG_COL_BLUE;
|
||||
g_col[COMMON_LOG_COL_MAGENTA] = LOG_COL_MAGENTA;
|
||||
g_col[COMMON_LOG_COL_CYAN] = LOG_COL_CYAN;
|
||||
g_col[COMMON_LOG_COL_WHITE] = LOG_COL_WHITE;
|
||||
} else {
|
||||
for (size_t i = 0; i < g_col.size(); i++) {
|
||||
g_col[i] = "";
|
||||
}
|
||||
}
|
||||
|
||||
resume();
|
||||
}
|
||||
|
||||
void set_prefix(bool prefix) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
this->prefix = prefix;
|
||||
}
|
||||
|
||||
void set_timestamps(bool timestamps) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
this->timestamps = timestamps;
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// public API
|
||||
//
|
||||
|
||||
struct common_log * common_log_init() {
|
||||
return new common_log;
|
||||
}
|
||||
|
||||
struct common_log * common_log_main() {
|
||||
static struct common_log log;
|
||||
|
||||
return &log;
|
||||
}
|
||||
|
||||
void common_log_pause(struct common_log * log) {
|
||||
log->pause();
|
||||
}
|
||||
|
||||
void common_log_resume(struct common_log * log) {
|
||||
log->resume();
|
||||
}
|
||||
|
||||
void common_log_free(struct common_log * log) {
|
||||
delete log;
|
||||
}
|
||||
|
||||
void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
log->add(level, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void common_log_set_file(struct common_log * log, const char * file) {
|
||||
log->set_file(file);
|
||||
}
|
||||
|
||||
void common_log_set_colors(struct common_log * log, bool colors) {
|
||||
log->set_colors(colors);
|
||||
}
|
||||
|
||||
void common_log_set_prefix(struct common_log * log, bool prefix) {
|
||||
log->set_prefix(prefix);
|
||||
}
|
||||
|
||||
void common_log_set_timestamps(struct common_log * log, bool timestamps) {
|
||||
log->set_timestamps(timestamps);
|
||||
}
|
793
common/log.h
793
common/log.h
|
@ -1,724 +1,103 @@
|
|||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include "ggml.h" // for ggml_log_level
|
||||
|
||||
// --------------------------------
|
||||
//
|
||||
// Basic usage:
|
||||
//
|
||||
// --------
|
||||
//
|
||||
// The LOG() and LOG_TEE() macros are ready to go by default
|
||||
// they do not require any initialization.
|
||||
//
|
||||
// LOGLN() and LOG_TEELN() are variants which automatically
|
||||
// include \n character at the end of the log string.
|
||||
//
|
||||
// LOG() behaves exactly like printf, by default writing to a logfile.
|
||||
// LOG_TEE() additionally, prints to the screen too ( mimics Unix tee command ).
|
||||
//
|
||||
// Default logfile is named
|
||||
// "llama.<threadID>.log"
|
||||
// Default LOG_TEE() secondary output target is
|
||||
// stderr
|
||||
//
|
||||
// Logs can be dynamically disabled or enabled using functions:
|
||||
// log_disable()
|
||||
// and
|
||||
// log_enable()
|
||||
//
|
||||
// A log target can be changed with:
|
||||
// log_set_target( string )
|
||||
// creating and opening, or re-opening a file by string filename
|
||||
// or
|
||||
// log_set_target( FILE* )
|
||||
// allowing to point at stderr, stdout, or any valid FILE* file handler.
|
||||
//
|
||||
// --------
|
||||
//
|
||||
// End of Basic usage.
|
||||
//
|
||||
// --------------------------------
|
||||
#define LOG_CLR_TO_EOL "\033[K\r"
|
||||
#define LOG_COL_DEFAULT "\033[0m"
|
||||
#define LOG_COL_BOLD "\033[1m"
|
||||
#define LOG_COL_RED "\033[31m"
|
||||
#define LOG_COL_GREEN "\033[32m"
|
||||
#define LOG_COL_YELLOW "\033[33m"
|
||||
#define LOG_COL_BLUE "\033[34m"
|
||||
#define LOG_COL_MAGENTA "\033[35m"
|
||||
#define LOG_COL_CYAN "\033[36m"
|
||||
#define LOG_COL_WHITE "\033[37m"
|
||||
|
||||
// Specifies a log target.
|
||||
// default uses log_handler() with "llama.log" log file
|
||||
// this can be changed, by defining LOG_TARGET
|
||||
// like so:
|
||||
//
|
||||
// #define LOG_TARGET (a valid FILE*)
|
||||
// #include "log.h"
|
||||
//
|
||||
// or it can be simply redirected to stdout or stderr
|
||||
// like so:
|
||||
//
|
||||
// #define LOG_TARGET stderr
|
||||
// #include "log.h"
|
||||
//
|
||||
// The log target can also be redirected to a different function
|
||||
// like so:
|
||||
//
|
||||
// #define LOG_TARGET log_handler_different()
|
||||
// #include "log.h"
|
||||
//
|
||||
// FILE* log_handler_different()
|
||||
// {
|
||||
// return stderr;
|
||||
// }
|
||||
//
|
||||
// or:
|
||||
//
|
||||
// #define LOG_TARGET log_handler_another_one("somelog.log")
|
||||
// #include "log.h"
|
||||
//
|
||||
// FILE* log_handler_another_one(char*filename)
|
||||
// {
|
||||
// static FILE* logfile = nullptr;
|
||||
// (...)
|
||||
// if( !logfile )
|
||||
// {
|
||||
// fopen(...)
|
||||
// }
|
||||
// (...)
|
||||
// return logfile
|
||||
// }
|
||||
//
|
||||
#ifndef LOG_TARGET
|
||||
#define LOG_TARGET log_handler()
|
||||
#endif
|
||||
|
||||
#ifndef LOG_TEE_TARGET
|
||||
#define LOG_TEE_TARGET stderr
|
||||
#endif
|
||||
|
||||
// Utility for synchronizing log configuration state
|
||||
// since std::optional was introduced only in c++17
|
||||
enum LogTriState
|
||||
{
|
||||
LogTriStateSame,
|
||||
LogTriStateFalse,
|
||||
LogTriStateTrue
|
||||
};
|
||||
|
||||
// Utility to obtain "pid" like unique process id and use it when creating log files.
|
||||
inline std::string log_get_pid()
|
||||
{
|
||||
static std::string pid;
|
||||
if (pid.empty())
|
||||
{
|
||||
// std::this_thread::get_id() is the most portable way of obtaining a "process id"
|
||||
// it's not the same as "pid" but is unique enough to solve multiple instances
|
||||
// trying to write to the same log.
|
||||
std::stringstream ss;
|
||||
ss << std::this_thread::get_id();
|
||||
pid = ss.str();
|
||||
}
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
// Utility function for generating log file names with unique id based on thread id.
|
||||
// invocation with log_filename_generator( "llama", "log" ) creates a string "llama.<number>.log"
|
||||
// where the number is a runtime id of the current thread.
|
||||
|
||||
#define log_filename_generator(log_file_basename, log_file_extension) log_filename_generator_impl(LogTriStateSame, log_file_basename, log_file_extension)
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline std::string log_filename_generator_impl(LogTriState multilog, const std::string & log_file_basename, const std::string & log_file_extension)
|
||||
{
|
||||
static bool _multilog = false;
|
||||
|
||||
if (multilog != LogTriStateSame)
|
||||
{
|
||||
_multilog = multilog == LogTriStateTrue;
|
||||
}
|
||||
|
||||
std::stringstream buf;
|
||||
|
||||
buf << log_file_basename;
|
||||
if (_multilog)
|
||||
{
|
||||
buf << ".";
|
||||
buf << log_get_pid();
|
||||
}
|
||||
buf << ".";
|
||||
buf << log_file_extension;
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
#ifndef LOG_DEFAULT_FILE_NAME
|
||||
#define LOG_DEFAULT_FILE_NAME log_filename_generator("llama", "log")
|
||||
#endif
|
||||
|
||||
// Utility for turning #define values into string literals
|
||||
// so we can have a define for stderr and
|
||||
// we can print "stderr" instead of literal stderr, etc.
|
||||
#define LOG_STRINGIZE1(s) #s
|
||||
#define LOG_STRINGIZE(s) LOG_STRINGIZE1(s)
|
||||
|
||||
#define LOG_TEE_TARGET_STRING LOG_STRINGIZE(LOG_TEE_TARGET)
|
||||
|
||||
// Allows disabling timestamps.
|
||||
// in order to disable, define LOG_NO_TIMESTAMPS
|
||||
// like so:
|
||||
//
|
||||
// #define LOG_NO_TIMESTAMPS
|
||||
// #include "log.h"
|
||||
//
|
||||
#ifndef LOG_NO_TIMESTAMPS
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||
#define LOG_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||
#else
|
||||
#define LOG_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||
#define LOG_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||
#endif
|
||||
#ifndef __GNUC__
|
||||
# define LOG_ATTRIBUTE_FORMAT(...)
|
||||
#elif defined(__MINGW32__)
|
||||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
||||
#else
|
||||
#define LOG_TIMESTAMP_FMT "%s"
|
||||
#define LOG_TIMESTAMP_VAL ,""
|
||||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||
#endif
|
||||
|
||||
#ifdef LOG_TEE_TIMESTAMPS
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_TEE_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||
#define LOG_TEE_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||
#else
|
||||
#define LOG_TEE_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||
#define LOG_TEE_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||
#endif
|
||||
#else
|
||||
#define LOG_TEE_TIMESTAMP_FMT "%s"
|
||||
#define LOG_TEE_TIMESTAMP_VAL ,""
|
||||
#endif
|
||||
#define LOG_DEFAULT_DEBUG 1
|
||||
#define LOG_DEFAULT_LLAMA 0
|
||||
|
||||
// Allows disabling file/line/function prefix
|
||||
// in order to disable, define LOG_NO_FILE_LINE_FUNCTION
|
||||
// like so:
|
||||
// needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower
|
||||
// set via common_log_set_verbosity()
|
||||
extern int common_log_verbosity_thold;
|
||||
|
||||
void common_log_set_verbosity_thold(int verbosity); // not thread-safe
|
||||
|
||||
// the common_log uses an internal worker thread to print/write log messages
|
||||
// when the worker thread is paused, incoming log messages are discarded
|
||||
struct common_log;
|
||||
|
||||
struct common_log * common_log_init();
|
||||
struct common_log * common_log_main(); // singleton, automatically destroys itself on exit
|
||||
void common_log_pause (struct common_log * log); // pause the worker thread, not thread-safe
|
||||
void common_log_resume(struct common_log * log); // resume the worker thread, not thread-safe
|
||||
void common_log_free (struct common_log * log);
|
||||
|
||||
LOG_ATTRIBUTE_FORMAT(3, 4)
|
||||
void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...);
|
||||
|
||||
// defaults: file = NULL, colors = false, prefix = false, timestamps = false
|
||||
//
|
||||
// #define LOG_NO_FILE_LINE_FUNCTION
|
||||
// #include "log.h"
|
||||
// regular log output:
|
||||
//
|
||||
#ifndef LOG_NO_FILE_LINE_FUNCTION
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_FLF_FMT "[%24s:%5d][%24s] "
|
||||
#define LOG_FLF_VAL , __FILE__, __LINE__, __FUNCTION__
|
||||
#else
|
||||
#define LOG_FLF_FMT "[%24s:%5ld][%24s] "
|
||||
#define LOG_FLF_VAL , __FILE__, (long)__LINE__, __FUNCTION__
|
||||
#endif
|
||||
#else
|
||||
#define LOG_FLF_FMT "%s"
|
||||
#define LOG_FLF_VAL ,""
|
||||
#endif
|
||||
|
||||
#ifdef LOG_TEE_FILE_LINE_FUNCTION
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_TEE_FLF_FMT "[%24s:%5d][%24s] "
|
||||
#define LOG_TEE_FLF_VAL , __FILE__, __LINE__, __FUNCTION__
|
||||
#else
|
||||
#define LOG_TEE_FLF_FMT "[%24s:%5ld][%24s] "
|
||||
#define LOG_TEE_FLF_VAL , __FILE__, (long)__LINE__, __FUNCTION__
|
||||
#endif
|
||||
#else
|
||||
#define LOG_TEE_FLF_FMT "%s"
|
||||
#define LOG_TEE_FLF_VAL ,""
|
||||
#endif
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
// USE LOG() INSTEAD
|
||||
// ggml_backend_metal_log_allocated_size: allocated buffer, size = 6695.84 MiB, ( 6695.91 / 21845.34)
|
||||
// llm_load_tensors: ggml ctx size = 0.27 MiB
|
||||
// llm_load_tensors: offloading 32 repeating layers to GPU
|
||||
// llm_load_tensors: offloading non-repeating layers to GPU
|
||||
//
|
||||
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
|
||||
#define LOG_IMPL(str, ...) \
|
||||
// with prefix = true, timestamps = true, the log output will look like this:
|
||||
//
|
||||
// 0.00.035.060 D ggml_backend_metal_log_allocated_size: allocated buffer, size = 6695.84 MiB, ( 6695.91 / 21845.34)
|
||||
// 0.00.035.064 I llm_load_tensors: ggml ctx size = 0.27 MiB
|
||||
// 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
|
||||
// 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
|
||||
//
|
||||
// I - info (stdout, V = 0)
|
||||
// W - warning (stderr, V = 0)
|
||||
// E - error (stderr, V = 0)
|
||||
// D - debug (stderr, V = LOG_DEFAULT_DEBUG)
|
||||
//
|
||||
|
||||
void common_log_set_file (struct common_log * log, const char * file); // not thread-safe
|
||||
void common_log_set_colors (struct common_log * log, bool colors); // not thread-safe
|
||||
void common_log_set_prefix (struct common_log * log, bool prefix); // whether to output prefix to each log
|
||||
void common_log_set_timestamps(struct common_log * log, bool timestamps); // whether to output timestamps in the prefix
|
||||
|
||||
// helper macros for logging
|
||||
// use these to avoid computing log arguments if the verbosity of the log is higher than the threshold
|
||||
//
|
||||
// for example:
|
||||
//
|
||||
// LOG_DBG("this is a debug message: %d\n", expensive_function());
|
||||
//
|
||||
// this will avoid calling expensive_function() if LOG_DEFAULT_DEBUG > common_log_verbosity_thold
|
||||
//
|
||||
|
||||
#define LOG_TMPL(level, verbosity, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
if ((verbosity) <= common_log_verbosity_thold) { \
|
||||
common_log_add(common_log_main(), (level), __VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define LOG_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
// USE LOG_TEE() INSTEAD
|
||||
//
|
||||
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
|
||||
#define LOG_TEE_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
} \
|
||||
if (LOG_TARGET != nullptr && LOG_TARGET != stdout && LOG_TARGET != stderr && LOG_TEE_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL, __VA_ARGS__); \
|
||||
fflush(LOG_TEE_TARGET); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define LOG_TEE_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
} \
|
||||
if (LOG_TARGET != nullptr && LOG_TARGET != stdout && LOG_TARGET != stderr && LOG_TEE_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL "", ##__VA_ARGS__); \
|
||||
fflush(LOG_TEE_TARGET); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
|
||||
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
|
||||
|
||||
// The '\0' as a last argument, is a trick to bypass the silly
|
||||
// "warning: ISO C++11 requires at least one argument for the "..." in a variadic macro"
|
||||
// so we can have a single macro which can be called just like printf.
|
||||
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
|
||||
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
|
||||
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
|
||||
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
|
||||
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__)
|
||||
|
||||
// Main LOG macro.
|
||||
// behaves like printf, and supports arguments the exact same way.
|
||||
//
|
||||
#if !defined(_MSC_VER) || defined(__clang__)
|
||||
#define LOG(...) LOG_IMPL(__VA_ARGS__, "")
|
||||
#else
|
||||
#define LOG(str, ...) LOG_IMPL("%s" str, "", ##__VA_ARGS__, "")
|
||||
#endif
|
||||
|
||||
// Main TEE macro.
|
||||
// does the same as LOG
|
||||
// and
|
||||
// simultaneously writes stderr.
|
||||
//
|
||||
// Secondary target can be changed just like LOG_TARGET
|
||||
// by defining LOG_TEE_TARGET
|
||||
//
|
||||
#if !defined(_MSC_VER) || defined(__clang__)
|
||||
#define LOG_TEE(...) LOG_TEE_IMPL(__VA_ARGS__, "")
|
||||
#else
|
||||
#define LOG_TEE(str, ...) LOG_TEE_IMPL("%s" str, "", ##__VA_ARGS__, "")
|
||||
#endif
|
||||
|
||||
// LOG macro variants with auto endline.
|
||||
#if !defined(_MSC_VER) || defined(__clang__)
|
||||
#define LOGLN(...) LOG_IMPL(__VA_ARGS__, "\n")
|
||||
#define LOG_TEELN(...) LOG_TEE_IMPL(__VA_ARGS__, "\n")
|
||||
#else
|
||||
#define LOGLN(str, ...) LOG_IMPL("%s" str, "", ##__VA_ARGS__, "\n")
|
||||
#define LOG_TEELN(str, ...) LOG_TEE_IMPL("%s" str, "", ##__VA_ARGS__, "\n")
|
||||
#endif
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_handler1_impl(bool change = false, LogTriState append = LogTriStateSame, LogTriState disable = LogTriStateSame, const std::string & filename = LOG_DEFAULT_FILE_NAME, FILE *target = nullptr)
|
||||
{
|
||||
static bool _initialized = false;
|
||||
static bool _append = false;
|
||||
static bool _disabled = filename.empty() && target == nullptr;
|
||||
static std::string log_current_filename{filename};
|
||||
static FILE *log_current_target{target};
|
||||
static FILE *logfile = nullptr;
|
||||
|
||||
if (change)
|
||||
{
|
||||
if (append != LogTriStateSame)
|
||||
{
|
||||
_append = append == LogTriStateTrue;
|
||||
return logfile;
|
||||
}
|
||||
|
||||
if (disable == LogTriStateTrue)
|
||||
{
|
||||
// Disable primary target
|
||||
_disabled = true;
|
||||
}
|
||||
// If previously disabled, only enable, and keep previous target
|
||||
else if (disable == LogTriStateFalse)
|
||||
{
|
||||
_disabled = false;
|
||||
}
|
||||
// Otherwise, process the arguments
|
||||
else if (log_current_filename != filename || log_current_target != target)
|
||||
{
|
||||
_initialized = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (_disabled)
|
||||
{
|
||||
// Log is disabled
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (_initialized)
|
||||
{
|
||||
// with fallback in case something went wrong
|
||||
return logfile ? logfile : stderr;
|
||||
}
|
||||
|
||||
// do the (re)initialization
|
||||
if (target != nullptr)
|
||||
{
|
||||
if (logfile != nullptr && logfile != stdout && logfile != stderr)
|
||||
{
|
||||
fclose(logfile);
|
||||
}
|
||||
|
||||
log_current_filename = LOG_DEFAULT_FILE_NAME;
|
||||
log_current_target = target;
|
||||
|
||||
logfile = target;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (log_current_filename != filename)
|
||||
{
|
||||
if (logfile != nullptr && logfile != stdout && logfile != stderr)
|
||||
{
|
||||
fclose(logfile);
|
||||
}
|
||||
}
|
||||
|
||||
logfile = fopen(filename.c_str(), _append ? "a" : "w");
|
||||
}
|
||||
|
||||
if (!logfile)
|
||||
{
|
||||
// Verify whether the file was opened, otherwise fallback to stderr
|
||||
logfile = stderr;
|
||||
|
||||
fprintf(stderr, "Failed to open logfile '%s' with error '%s'\n", filename.c_str(), std::strerror(errno));
|
||||
fflush(stderr);
|
||||
|
||||
// At this point we let the init flag be to true below, and let the target fallback to stderr
|
||||
// otherwise we would repeatedly fopen() which was already unsuccessful
|
||||
}
|
||||
|
||||
_initialized = true;
|
||||
|
||||
return logfile ? logfile : stderr;
|
||||
}
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_handler2_impl(bool change = false, LogTriState append = LogTriStateSame, LogTriState disable = LogTriStateSame, FILE *target = nullptr, const std::string & filename = LOG_DEFAULT_FILE_NAME)
|
||||
{
|
||||
return log_handler1_impl(change, append, disable, filename, target);
|
||||
}
|
||||
|
||||
// Disables logs entirely at runtime.
|
||||
// Makes LOG() and LOG_TEE() produce no output,
|
||||
// until enabled back.
|
||||
#define log_disable() log_disable_impl()
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_disable_impl()
|
||||
{
|
||||
return log_handler1_impl(true, LogTriStateSame, LogTriStateTrue);
|
||||
}
|
||||
|
||||
// Enables logs at runtime.
|
||||
#define log_enable() log_enable_impl()
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_enable_impl()
|
||||
{
|
||||
return log_handler1_impl(true, LogTriStateSame, LogTriStateFalse);
|
||||
}
|
||||
|
||||
// Sets target fir logs, either by a file name or FILE* pointer (stdout, stderr, or any valid FILE*)
|
||||
#define log_set_target(target) log_set_target_impl(target)
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_set_target_impl(const std::string & filename) { return log_handler1_impl(true, LogTriStateSame, LogTriStateSame, filename); }
|
||||
inline FILE *log_set_target_impl(FILE *target) { return log_handler2_impl(true, LogTriStateSame, LogTriStateSame, target); }
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_handler() { return log_handler1_impl(); }
|
||||
|
||||
// Enable or disable creating separate log files for each run.
|
||||
// can ONLY be invoked BEFORE first log use.
|
||||
#define log_multilog(enable) log_filename_generator_impl((enable) ? LogTriStateTrue : LogTriStateFalse, "", "")
|
||||
// Enable or disable append mode for log file.
|
||||
// can ONLY be invoked BEFORE first log use.
|
||||
#define log_append(enable) log_append_impl(enable)
|
||||
// INTERNAL, DO NOT USE
|
||||
inline FILE *log_append_impl(bool enable)
|
||||
{
|
||||
return log_handler1_impl(true, enable ? LogTriStateTrue : LogTriStateFalse, LogTriStateSame);
|
||||
}
|
||||
|
||||
inline void log_test()
|
||||
{
|
||||
log_disable();
|
||||
LOG("01 Hello World to nobody, because logs are disabled!\n");
|
||||
log_enable();
|
||||
LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET));
|
||||
LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n");
|
||||
log_set_target(stderr);
|
||||
LOG("04 Hello World to stderr!\n");
|
||||
LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n");
|
||||
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||
LOG("06 Hello World to default log file!\n");
|
||||
log_set_target(stdout);
|
||||
LOG("07 Hello World to stdout!\n");
|
||||
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||
LOG("08 Hello World to default log file again!\n");
|
||||
log_disable();
|
||||
LOG("09 Hello World _1_ into the void!\n");
|
||||
log_enable();
|
||||
LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n");
|
||||
log_disable();
|
||||
log_set_target("llama.anotherlog.log");
|
||||
LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n");
|
||||
log_enable();
|
||||
LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n");
|
||||
log_set_target("llama.yetanotherlog.log");
|
||||
LOG("13 Hello World this time in yet new file?\n");
|
||||
log_set_target(log_filename_generator("llama_autonamed", "log"));
|
||||
LOG("14 Hello World in log with generated filename!\n");
|
||||
#ifdef _MSC_VER
|
||||
LOG_TEE("15 Hello msvc TEE without arguments\n");
|
||||
LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test");
|
||||
LOG_TEELN("17 Hello msvc TEELN without arguments\n");
|
||||
LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test");
|
||||
LOG("19 Hello msvc LOG without arguments\n");
|
||||
LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test");
|
||||
LOGLN("21 Hello msvc LOGLN without arguments\n");
|
||||
LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test");
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool log_param_single_parse(const std::string & param)
|
||||
{
|
||||
if ( param == "--log-test")
|
||||
{
|
||||
log_test();
|
||||
return true;
|
||||
}
|
||||
|
||||
if ( param == "--log-disable")
|
||||
{
|
||||
log_disable();
|
||||
return true;
|
||||
}
|
||||
|
||||
if ( param == "--log-enable")
|
||||
{
|
||||
log_enable();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (param == "--log-new")
|
||||
{
|
||||
log_multilog(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (param == "--log-append")
|
||||
{
|
||||
log_append(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool log_param_pair_parse(bool check_but_dont_parse, const std::string & param, const std::string & next = std::string())
|
||||
{
|
||||
if ( param == "--log-file")
|
||||
{
|
||||
if (!check_but_dont_parse)
|
||||
{
|
||||
log_set_target(log_filename_generator(next.empty() ? "unnamed" : next, "log"));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void log_print_usage()
|
||||
{
|
||||
printf("log options:\n");
|
||||
/* format
|
||||
printf(" -h, --help show this help message and exit\n");*/
|
||||
/* spacing
|
||||
printf("__-param----------------Description\n");*/
|
||||
printf(" --log-test Run simple logging test\n");
|
||||
printf(" --log-disable Disable trace logs\n");
|
||||
printf(" --log-enable Enable trace logs\n");
|
||||
printf(" --log-file Specify a log filename (without extension)\n");
|
||||
printf(" --log-new Create a separate new log file on start. "
|
||||
"Each log file will have unique name: \"<name>.<ID>.log\"\n");
|
||||
printf(" --log-append Don't truncate the old log file.\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
#define log_dump_cmdline(argc, argv) log_dump_cmdline_impl(argc, argv)
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
inline void log_dump_cmdline_impl(int argc, char **argv)
|
||||
{
|
||||
std::stringstream buf;
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
if (std::string(argv[i]).find(' ') != std::string::npos)
|
||||
{
|
||||
buf << " \"" << argv[i] <<"\"";
|
||||
}
|
||||
else
|
||||
{
|
||||
buf << " " << argv[i];
|
||||
}
|
||||
}
|
||||
LOGLN("Cmd:%s", buf.str().c_str());
|
||||
}
|
||||
|
||||
#define log_tostr(var) log_var_to_string_impl(var).c_str()
|
||||
|
||||
inline std::string log_var_to_string_impl(bool var)
|
||||
{
|
||||
return var ? "true" : "false";
|
||||
}
|
||||
|
||||
inline std::string log_var_to_string_impl(std::string var)
|
||||
{
|
||||
return var;
|
||||
}
|
||||
|
||||
inline std::string log_var_to_string_impl(const std::vector<int> & var)
|
||||
{
|
||||
std::stringstream buf;
|
||||
buf << "[ ";
|
||||
bool first = true;
|
||||
for (auto e : var)
|
||||
{
|
||||
if (first)
|
||||
{
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
buf << ", ";
|
||||
}
|
||||
buf << std::to_string(e);
|
||||
}
|
||||
buf << " ]";
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
template <typename C, typename T>
|
||||
inline std::string LOG_TOKENS_TOSTR_PRETTY(const C & ctx, const T & tokens)
|
||||
{
|
||||
std::stringstream buf;
|
||||
buf << "[ ";
|
||||
|
||||
bool first = true;
|
||||
for (const auto &token : tokens)
|
||||
{
|
||||
if (!first) {
|
||||
buf << ", ";
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
|
||||
auto detokenized = llama_token_to_piece(ctx, token);
|
||||
|
||||
detokenized.erase(
|
||||
std::remove_if(
|
||||
detokenized.begin(),
|
||||
detokenized.end(),
|
||||
[](const unsigned char c) { return !std::isprint(c); }),
|
||||
detokenized.end());
|
||||
|
||||
buf
|
||||
<< "'" << detokenized << "'"
|
||||
<< ":" << std::to_string(token);
|
||||
}
|
||||
buf << " ]";
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
template <typename C, typename B>
|
||||
inline std::string LOG_BATCH_TOSTR_PRETTY(const C & ctx, const B & batch)
|
||||
{
|
||||
std::stringstream buf;
|
||||
buf << "[ ";
|
||||
|
||||
bool first = true;
|
||||
for (int i = 0; i < batch.n_tokens; ++i)
|
||||
{
|
||||
if (!first) {
|
||||
buf << ", ";
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
|
||||
auto detokenized = llama_token_to_piece(ctx, batch.token[i]);
|
||||
|
||||
detokenized.erase(
|
||||
std::remove_if(
|
||||
detokenized.begin(),
|
||||
detokenized.end(),
|
||||
[](const unsigned char c) { return !std::isprint(c); }),
|
||||
detokenized.end());
|
||||
|
||||
buf
|
||||
<< "\n" << std::to_string(i)
|
||||
<< ":token '" << detokenized << "'"
|
||||
<< ":pos " << std::to_string(batch.pos[i])
|
||||
<< ":n_seq_id " << std::to_string(batch.n_seq_id[i])
|
||||
<< ":seq_id " << std::to_string(batch.seq_id[i][0])
|
||||
<< ":logits " << std::to_string(batch.logits[i]);
|
||||
}
|
||||
buf << " ]";
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
#ifdef LOG_DISABLE_LOGS
|
||||
|
||||
#undef LOG
|
||||
#define LOG(...) // dummy stub
|
||||
#undef LOGLN
|
||||
#define LOGLN(...) // dummy stub
|
||||
|
||||
#undef LOG_TEE
|
||||
#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||
|
||||
#undef LOG_TEELN
|
||||
#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||
|
||||
#undef LOG_DISABLE
|
||||
#define LOG_DISABLE() // dummy stub
|
||||
|
||||
#undef LOG_ENABLE
|
||||
#define LOG_ENABLE() // dummy stub
|
||||
|
||||
#undef LOG_ENABLE
|
||||
#define LOG_ENABLE() // dummy stub
|
||||
|
||||
#undef LOG_SET_TARGET
|
||||
#define LOG_SET_TARGET(...) // dummy stub
|
||||
|
||||
#undef LOG_DUMP_CMDLINE
|
||||
#define LOG_DUMP_CMDLINE(...) // dummy stub
|
||||
|
||||
#endif // LOG_DISABLE_LOGS
|
||||
#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
|
||||
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
|
||||
#define LOG_ERRV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, verbosity, __VA_ARGS__)
|
||||
#define LOG_DBGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, verbosity, __VA_ARGS__)
|
||||
#define LOG_CNTV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_CONT, verbosity, __VA_ARGS__)
|
||||
|
|
2883
common/minja.hpp
Normal file
2883
common/minja.hpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -2,10 +2,13 @@
|
|||
#include "common.h"
|
||||
#include "log.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <thread>
|
||||
|
||||
void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, int ngram_max,
|
||||
void common_ngram_cache_update(common_ngram_cache & ngram_cache, int ngram_min, int ngram_max,
|
||||
std::vector<llama_token> & inp, int nnew, bool print_progress) {
|
||||
const int64_t t_start_ms = ggml_time_ms();
|
||||
const int64_t inp_size = inp.size();
|
||||
|
@ -17,16 +20,16 @@ void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, in
|
|||
const int64_t i_start = std::max(inp_size - nnew, ngram_size);
|
||||
for (int64_t i = i_start; i < inp_size; ++i) {
|
||||
const int64_t ngram_start = i - ngram_size;
|
||||
llama_ngram ngram(&inp[ngram_start], ngram_size);
|
||||
common_ngram ngram(&inp[ngram_start], ngram_size);
|
||||
const llama_token token = inp[i];
|
||||
|
||||
llama_ngram_cache::iterator part_it = ngram_cache.find(ngram);
|
||||
common_ngram_cache::iterator part_it = ngram_cache.find(ngram);
|
||||
if (part_it == ngram_cache.end()) {
|
||||
llama_ngram_cache_part part;
|
||||
common_ngram_cache_part part;
|
||||
part.emplace(token, 1);
|
||||
ngram_cache.emplace(ngram, part);
|
||||
} else {
|
||||
llama_ngram_cache_part::iterator token_count_it = part_it->second.find(token);
|
||||
common_ngram_cache_part::iterator token_count_it = part_it->second.find(token);
|
||||
if (token_count_it == part_it->second.end()) {
|
||||
part_it->second.emplace(token, 1);
|
||||
} else {
|
||||
|
@ -59,16 +62,16 @@ constexpr int draft_min_sample_size_strict[LLAMA_NGRAM_MAX] = { 4, 3, 2, 2};
|
|||
constexpr int draft_min_percent_strict[LLAMA_NGRAM_MAX] = {75, 66, 66, 66};
|
||||
|
||||
// Helper function that tries to draft a token from only the static ngram cache:
|
||||
static llama_token try_draft(llama_ngram_cache & nc_static, const llama_ngram ngram_static) {
|
||||
llama_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
|
||||
static llama_token try_draft(common_ngram_cache & nc_static, const common_ngram ngram_static) {
|
||||
common_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
|
||||
if (part_static_it == nc_static.end()) {
|
||||
return -1;
|
||||
return LLAMA_TOKEN_NULL;
|
||||
}
|
||||
const llama_ngram_cache_part part_static = part_static_it->second;
|
||||
const common_ngram_cache_part part_static = part_static_it->second;
|
||||
|
||||
int max_count_static = 0;
|
||||
int sum_count_static = 0;
|
||||
llama_token max_token = -1;
|
||||
llama_token max_token = LLAMA_TOKEN_NULL;
|
||||
|
||||
for (std::pair<llama_token, int> token_count_static : part_static) {
|
||||
const llama_token token = token_count_static.first;
|
||||
|
@ -82,39 +85,39 @@ static llama_token try_draft(llama_ngram_cache & nc_static, const llama_ngram ng
|
|||
}
|
||||
|
||||
if (sum_count_static < draft_min_sample_size_lax[LLAMA_NGRAM_STATIC-1]) {
|
||||
return -1;
|
||||
return LLAMA_TOKEN_NULL;
|
||||
}
|
||||
if (100*max_count_static < draft_min_percent_lax[LLAMA_NGRAM_STATIC-1]*sum_count_static) {
|
||||
return -1;
|
||||
return LLAMA_TOKEN_NULL;
|
||||
}
|
||||
return max_token;
|
||||
}
|
||||
|
||||
// Try to draft a token from primary cache (context/dynamic), validate with static cache:
|
||||
static llama_token try_draft(
|
||||
llama_ngram_cache & nc_primary, const std::vector<llama_ngram> & ngrams_primary, llama_ngram_cache_part & part_static,
|
||||
common_ngram_cache & nc_primary, const std::vector<common_ngram> & ngrams_primary, common_ngram_cache_part & part_static,
|
||||
const int * min_sample_size, const int * min_percent) {
|
||||
|
||||
llama_token drafted_token = -1;
|
||||
llama_token drafted_token = LLAMA_TOKEN_NULL;
|
||||
|
||||
for (int i = ngrams_primary.size()-1; i >= 0 && drafted_token == -1; --i) {
|
||||
const llama_ngram ngram_primary = ngrams_primary[i];
|
||||
for (int i = ngrams_primary.size()-1; i >= 0 && drafted_token == LLAMA_TOKEN_NULL; --i) {
|
||||
const common_ngram ngram_primary = ngrams_primary[i];
|
||||
|
||||
llama_ngram_cache::iterator part_primary_it = nc_primary.find(ngram_primary);
|
||||
common_ngram_cache::iterator part_primary_it = nc_primary.find(ngram_primary);
|
||||
if (part_primary_it == nc_primary.end()) {
|
||||
continue;
|
||||
}
|
||||
const llama_ngram_cache_part part_primary = part_primary_it->second;
|
||||
const common_ngram_cache_part part_primary = part_primary_it->second;
|
||||
|
||||
int max_count_primary = 0;
|
||||
int max_count_static = 0;
|
||||
int sum_count_primary = 0;
|
||||
llama_token max_token = -1;
|
||||
llama_token max_token = LLAMA_TOKEN_NULL;
|
||||
|
||||
for (std::pair<llama_token, int> token_count_primary : part_primary) {
|
||||
const llama_token token = token_count_primary.first;
|
||||
|
||||
llama_ngram_cache_part::iterator token_count_static_it = part_static.find(token);
|
||||
common_ngram_cache_part::iterator token_count_static_it = part_static.find(token);
|
||||
|
||||
const int32_t count_primary = token_count_primary.second;
|
||||
const int32_t count_static = token_count_static_it != part_static.end() ? 100*token_count_static_it->second : 1;
|
||||
|
@ -139,9 +142,9 @@ static llama_token try_draft(
|
|||
return drafted_token;
|
||||
}
|
||||
|
||||
void llama_ngram_cache_draft(
|
||||
void common_ngram_cache_draft(
|
||||
std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
|
||||
llama_ngram_cache & nc_context, llama_ngram_cache & nc_dynamic, llama_ngram_cache & nc_static
|
||||
common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static
|
||||
) {
|
||||
GGML_ASSERT(draft.size() == 1);
|
||||
const int inp_size = inp.size();
|
||||
|
@ -151,40 +154,40 @@ void llama_ngram_cache_draft(
|
|||
}
|
||||
|
||||
while ((int) draft.size()-1 < n_draft) {
|
||||
llama_token drafted_token = -1;
|
||||
llama_token drafted_token = LLAMA_TOKEN_NULL;
|
||||
|
||||
const int ngram_start_static = inp_size-LLAMA_NGRAM_STATIC + draft.size()-1;
|
||||
llama_ngram ngram_static;
|
||||
common_ngram ngram_static;
|
||||
for (int j = ngram_start_static; j < ngram_start_static + LLAMA_NGRAM_STATIC; ++j) {
|
||||
ngram_static.tokens[j-ngram_start_static] = get_token(inp, draft, j);
|
||||
}
|
||||
llama_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
|
||||
llama_ngram_cache_part part_static;
|
||||
common_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
|
||||
common_ngram_cache_part part_static;
|
||||
if (part_static_it != nc_static.end()) {
|
||||
part_static = part_static_it->second;
|
||||
}
|
||||
|
||||
// cd = context + dynamic
|
||||
std::vector<llama_ngram> ngrams_cd;
|
||||
std::vector<common_ngram> ngrams_cd;
|
||||
for (int ngram_size_cd = ngram_min; ngram_size_cd <= ngram_max; ++ngram_size_cd) {
|
||||
const int ngram_start_cd = inp_size-ngram_size_cd + draft.size()-1;
|
||||
llama_ngram ngram_cd;
|
||||
common_ngram ngram_cd;
|
||||
for (int j = ngram_start_cd; j < ngram_start_cd + ngram_size_cd; ++j) {
|
||||
ngram_cd.tokens[j-ngram_start_cd] = get_token(inp, draft, j);
|
||||
}
|
||||
ngrams_cd.push_back(ngram_cd);
|
||||
}
|
||||
if (drafted_token == -1) {
|
||||
if (drafted_token == LLAMA_TOKEN_NULL) {
|
||||
drafted_token = try_draft(nc_context, ngrams_cd, part_static, draft_min_sample_size_lax, draft_min_percent_lax);
|
||||
}
|
||||
if (drafted_token == -1) {
|
||||
if (drafted_token == LLAMA_TOKEN_NULL) {
|
||||
drafted_token = try_draft(nc_dynamic, ngrams_cd, part_static, draft_min_sample_size_strict, draft_min_percent_strict);
|
||||
}
|
||||
if (drafted_token == -1) {
|
||||
if (drafted_token == LLAMA_TOKEN_NULL) {
|
||||
drafted_token = try_draft(nc_static, ngram_static);
|
||||
}
|
||||
|
||||
if (drafted_token == -1) {
|
||||
if (drafted_token == LLAMA_TOKEN_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -193,16 +196,16 @@ void llama_ngram_cache_draft(
|
|||
}
|
||||
}
|
||||
|
||||
void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filename) {
|
||||
void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename) {
|
||||
std::ofstream file_out(filename, std::ios::binary);
|
||||
for (std::pair<llama_ngram, llama_ngram_cache_part> item : ngram_cache) {
|
||||
const llama_ngram ngram = item.first;
|
||||
llama_ngram_cache_part token_counts = item.second;
|
||||
for (std::pair<common_ngram, common_ngram_cache_part> item : ngram_cache) {
|
||||
const common_ngram ngram = item.first;
|
||||
common_ngram_cache_part token_counts = item.second;
|
||||
GGML_ASSERT(!token_counts.empty());
|
||||
const int32_t ntokens = token_counts.size();
|
||||
GGML_ASSERT(ntokens > 0);
|
||||
|
||||
file_out.write(reinterpret_cast<const char *>(&ngram), sizeof(llama_ngram));
|
||||
file_out.write(reinterpret_cast<const char *>(&ngram), sizeof(common_ngram));
|
||||
file_out.write(reinterpret_cast<const char *>(&ntokens), sizeof(int32_t));
|
||||
for (std::pair<llama_token, int32_t> item2 : token_counts) {
|
||||
const llama_token token = item2.first;
|
||||
|
@ -216,14 +219,14 @@ void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filen
|
|||
|
||||
}
|
||||
|
||||
llama_ngram_cache llama_ngram_cache_load(std::string & filename) {
|
||||
common_ngram_cache common_ngram_cache_load(std::string & filename) {
|
||||
std::ifstream hashmap_file(filename, std::ios::binary);
|
||||
if (!hashmap_file) {
|
||||
throw std::ifstream::failure("Unable to open file " + filename);
|
||||
}
|
||||
llama_ngram_cache ngram_cache;
|
||||
common_ngram_cache ngram_cache;
|
||||
|
||||
llama_ngram ngram;
|
||||
common_ngram ngram;
|
||||
int32_t ntokens;
|
||||
llama_token token;
|
||||
int32_t count;
|
||||
|
@ -232,11 +235,11 @@ llama_ngram_cache llama_ngram_cache_load(std::string & filename) {
|
|||
char * ntokensc = reinterpret_cast<char*>(&ntokens);
|
||||
char * tokenc = reinterpret_cast<char*>(&token);
|
||||
char * countc = reinterpret_cast<char*>(&count);
|
||||
while(hashmap_file.read(ngramc, sizeof(llama_ngram))) {
|
||||
while(hashmap_file.read(ngramc, sizeof(common_ngram))) {
|
||||
GGML_ASSERT(!hashmap_file.eof());
|
||||
GGML_ASSERT(hashmap_file.read(ntokensc, sizeof(int32_t)));
|
||||
GGML_ASSERT(ntokens > 0);
|
||||
llama_ngram_cache_part token_counts;
|
||||
common_ngram_cache_part token_counts;
|
||||
|
||||
for (int i = 0; i < ntokens; ++i) {
|
||||
GGML_ASSERT(!hashmap_file.eof());
|
||||
|
@ -254,12 +257,12 @@ llama_ngram_cache llama_ngram_cache_load(std::string & filename) {
|
|||
return ngram_cache;
|
||||
}
|
||||
|
||||
void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram_cache & ngram_cache_add) {
|
||||
for (std::pair<llama_ngram, llama_ngram_cache_part> ngram_part : ngram_cache_add) {
|
||||
const llama_ngram ngram = ngram_part.first;
|
||||
llama_ngram_cache_part part = ngram_part.second;
|
||||
void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add) {
|
||||
for (std::pair<common_ngram, common_ngram_cache_part> ngram_part : ngram_cache_add) {
|
||||
const common_ngram ngram = ngram_part.first;
|
||||
common_ngram_cache_part part = ngram_part.second;
|
||||
|
||||
llama_ngram_cache::iterator part_merged_it = ngram_cache_target.find(ngram);
|
||||
common_ngram_cache::iterator part_merged_it = ngram_cache_target.find(ngram);
|
||||
if (part_merged_it == ngram_cache_target.end()) {
|
||||
ngram_cache_target.emplace(ngram, part);
|
||||
continue;
|
||||
|
@ -270,7 +273,7 @@ void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram
|
|||
const int32_t count = token_count.second;
|
||||
GGML_ASSERT(count > 0);
|
||||
|
||||
llama_ngram_cache_part::iterator token_count_merged_it = part_merged_it->second.find(token);
|
||||
common_ngram_cache_part::iterator token_count_merged_it = part_merged_it->second.find(token);
|
||||
if (token_count_merged_it == part_merged_it->second.end()) {
|
||||
part_merged_it->second.emplace(token, count);
|
||||
continue;
|
||||
|
|
|
@ -12,22 +12,22 @@
|
|||
|
||||
// Data structures to map n-grams to empirical token probabilities:
|
||||
|
||||
struct llama_ngram {
|
||||
struct common_ngram {
|
||||
llama_token tokens[LLAMA_NGRAM_MAX];
|
||||
|
||||
llama_ngram() {
|
||||
common_ngram() {
|
||||
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
||||
tokens[i] = -1;
|
||||
tokens[i] = LLAMA_TOKEN_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
llama_ngram(const llama_token * input, const int ngram_size) {
|
||||
common_ngram(const llama_token * input, const int ngram_size) {
|
||||
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
||||
tokens[i] = i < ngram_size ? input[i] : -1;
|
||||
tokens[i] = i < ngram_size ? input[i] : LLAMA_TOKEN_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool operator==(const llama_ngram & other) const {
|
||||
bool operator==(const common_ngram & other) const {
|
||||
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
||||
if (tokens[i] != other.tokens[i]) {
|
||||
return false;
|
||||
|
@ -37,21 +37,28 @@ struct llama_ngram {
|
|||
}
|
||||
};
|
||||
|
||||
struct llama_ngram_hash_function {
|
||||
size_t operator()(const llama_ngram & ngram) const {
|
||||
size_t hash = 0;
|
||||
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
||||
hash ^= std::hash<llama_token>{}(ngram.tokens[i]);
|
||||
struct common_token_hash_function {
|
||||
size_t operator()(const llama_token token) const {
|
||||
// see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
|
||||
return token * 11400714819323198485llu;
|
||||
}
|
||||
};
|
||||
|
||||
struct common_ngram_hash_function {
|
||||
size_t operator()(const common_ngram & ngram) const {
|
||||
size_t hash = common_token_hash_function{}(ngram.tokens[0]);
|
||||
for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
|
||||
hash ^= common_token_hash_function{}(ngram.tokens[i]);
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
// token -> number of times token has been seen
|
||||
typedef std::unordered_map<llama_token, int32_t> llama_ngram_cache_part;
|
||||
typedef std::unordered_map<llama_token, int32_t> common_ngram_cache_part;
|
||||
|
||||
// n-gram -> empirical distribution of following tokens
|
||||
typedef std::unordered_map<llama_ngram, llama_ngram_cache_part, llama_ngram_hash_function> llama_ngram_cache;
|
||||
typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_hash_function> common_ngram_cache;
|
||||
|
||||
|
||||
// Update an ngram cache with tokens.
|
||||
|
@ -63,8 +70,8 @@ typedef std::unordered_map<llama_ngram, llama_ngram_cache_part, llama_ngram_hash
|
|||
//
|
||||
// In order to get correct results inp_data can ONLY BE APPENDED TO.
|
||||
// Changes in the middle need a complete rebuild.
|
||||
void llama_ngram_cache_update(
|
||||
llama_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
|
||||
void common_ngram_cache_update(
|
||||
common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
|
||||
|
||||
// Try to draft tokens from ngram caches.
|
||||
// inp: the tokens generated so far.
|
||||
|
@ -74,21 +81,21 @@ void llama_ngram_cache_update(
|
|||
// nc_context: ngram cache based on current context.
|
||||
// nc_dynamic: ngram cache based on previous user generations.
|
||||
// nc_static: ngram cache generated from a large text corpus, used for validation.
|
||||
void llama_ngram_cache_draft(
|
||||
void common_ngram_cache_draft(
|
||||
std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
|
||||
llama_ngram_cache & nc_context, llama_ngram_cache & nc_dynamic, llama_ngram_cache & nc_static);
|
||||
common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static);
|
||||
|
||||
// Save an ngram cache to a file.
|
||||
// ngram_cache: the ngram cache to save.
|
||||
// filename: the path under which to save the ngram cache.
|
||||
void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filename);
|
||||
void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename);
|
||||
|
||||
// Load an ngram cache saved with llama_ngram_cache_save.
|
||||
// Load an ngram cache saved with common_ngram_cache_save.
|
||||
// filename: the path from which to load the ngram cache.
|
||||
// returns: an ngram cache containing the information saved to filename.
|
||||
llama_ngram_cache llama_ngram_cache_load(std::string & filename);
|
||||
common_ngram_cache common_ngram_cache_load(std::string & filename);
|
||||
|
||||
// Merge two ngram caches.
|
||||
// ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.
|
||||
// ngram_cache_add: the ngram cache to add to ngram_cache_target.
|
||||
void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram_cache & ngram_cache_add);
|
||||
void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add);
|
||||
|
|
|
@ -1,459 +1,526 @@
|
|||
#define LLAMA_API_INTERNAL
|
||||
#include "sampling.h"
|
||||
#include <random>
|
||||
|
||||
struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) {
|
||||
struct llama_sampling_context * result = new llama_sampling_context();
|
||||
#include "common.h"
|
||||
|
||||
result->params = params;
|
||||
result->grammar = nullptr;
|
||||
#include <cmath>
|
||||
#include <unordered_map>
|
||||
|
||||
// if there is a grammar, parse it
|
||||
if (!params.grammar.empty()) {
|
||||
result->parsed_grammar = grammar_parser::parse(params.grammar.c_str());
|
||||
// the ring buffer works similarly to std::deque, but with a fixed capacity
|
||||
// TODO: deduplicate with llama-impl.h
|
||||
template<typename T>
|
||||
struct ring_buffer {
|
||||
ring_buffer(size_t cap) : capacity(cap), data(cap) {}
|
||||
|
||||
// will be empty (default) if there are parse errors
|
||||
if (result->parsed_grammar.rules.empty()) {
|
||||
fprintf(stderr, "%s: failed to parse grammar\n", __func__);
|
||||
delete result;
|
||||
return nullptr;
|
||||
T & front() {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
return data[first];
|
||||
}
|
||||
|
||||
// Ensure that there is a "root" node.
|
||||
if (result->parsed_grammar.symbol_ids.find("root") == result->parsed_grammar.symbol_ids.end()) {
|
||||
fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
|
||||
delete result;
|
||||
return nullptr;
|
||||
const T & front() const {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
return data[first];
|
||||
}
|
||||
|
||||
std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules());
|
||||
|
||||
struct llama_grammar * grammar = llama_grammar_init(
|
||||
grammar_rules.data(),
|
||||
grammar_rules.size(), result->parsed_grammar.symbol_ids.at("root"));
|
||||
if (grammar == nullptr) {
|
||||
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||
T & back() {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
result->grammar = grammar;
|
||||
return data[pos];
|
||||
}
|
||||
|
||||
result->prev.resize(params.n_prev);
|
||||
const T & back() const {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
return data[pos];
|
||||
}
|
||||
|
||||
result->n_valid = 0;
|
||||
void push_back(const T & value) {
|
||||
if (sz == capacity) {
|
||||
// advance the start when buffer is full
|
||||
first = (first + 1) % capacity;
|
||||
} else {
|
||||
sz++;
|
||||
}
|
||||
data[pos] = value;
|
||||
pos = (pos + 1) % capacity;
|
||||
}
|
||||
|
||||
llama_sampling_set_rng_seed(result, params.seed);
|
||||
T pop_front() {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
T value = data[first];
|
||||
first = (first + 1) % capacity;
|
||||
sz--;
|
||||
return value;
|
||||
}
|
||||
|
||||
const T & rat(size_t i) const {
|
||||
if (i >= sz) {
|
||||
throw std::runtime_error("ring buffer: index out of bounds");
|
||||
}
|
||||
return data[(first + sz - i - 1) % capacity];
|
||||
}
|
||||
|
||||
std::vector<T> to_vector() const {
|
||||
std::vector<T> result;
|
||||
result.reserve(sz);
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
result.push_back(data[(first + i) % capacity]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void llama_sampling_free(struct llama_sampling_context * ctx) {
|
||||
if (ctx->grammar != NULL) {
|
||||
llama_grammar_free(ctx->grammar);
|
||||
}
|
||||
|
||||
delete ctx;
|
||||
}
|
||||
|
||||
void llama_sampling_reset(llama_sampling_context * ctx) {
|
||||
if (ctx->grammar != NULL) {
|
||||
llama_grammar_free(ctx->grammar);
|
||||
ctx->grammar = NULL;
|
||||
void clear() {
|
||||
// here only reset the status of the buffer
|
||||
sz = 0;
|
||||
first = 0;
|
||||
pos = 0;
|
||||
}
|
||||
|
||||
if (!ctx->parsed_grammar.rules.empty()) {
|
||||
std::vector<const llama_grammar_element *> grammar_rules(ctx->parsed_grammar.c_rules());
|
||||
|
||||
struct llama_grammar * grammar = llama_grammar_init(
|
||||
grammar_rules.data(),
|
||||
grammar_rules.size(), ctx->parsed_grammar.symbol_ids.at("root"));
|
||||
if (grammar == nullptr) {
|
||||
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||
}
|
||||
ctx->grammar = grammar;
|
||||
bool empty() const {
|
||||
return sz == 0;
|
||||
}
|
||||
|
||||
std::fill(ctx->prev.begin(), ctx->prev.end(), 0);
|
||||
ctx->cur.clear();
|
||||
ctx->n_valid = 0;
|
||||
}
|
||||
|
||||
void llama_sampling_set_rng_seed(struct llama_sampling_context * ctx, uint32_t seed) {
|
||||
if (seed == LLAMA_DEFAULT_SEED) {
|
||||
seed = std::random_device{}();
|
||||
}
|
||||
ctx->rng.seed(seed);
|
||||
}
|
||||
|
||||
void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst) {
|
||||
if (dst->grammar) {
|
||||
llama_grammar_free(dst->grammar);
|
||||
dst->grammar = nullptr;
|
||||
size_t size() const {
|
||||
return sz;
|
||||
}
|
||||
|
||||
if (src->grammar) {
|
||||
dst->grammar = llama_grammar_copy(src->grammar);
|
||||
size_t capacity = 0;
|
||||
size_t sz = 0;
|
||||
size_t first = 0;
|
||||
size_t pos = 0;
|
||||
std::vector<T> data;
|
||||
};
|
||||
|
||||
struct common_sampler {
|
||||
common_params_sampling params;
|
||||
|
||||
struct llama_sampler * grmr;
|
||||
struct llama_sampler * chain;
|
||||
|
||||
ring_buffer<llama_token> prev;
|
||||
|
||||
std::vector<llama_token_data> cur;
|
||||
|
||||
llama_token_data_array cur_p;
|
||||
|
||||
void set_logits(struct llama_context * ctx, int idx) {
|
||||
const auto * logits = llama_get_logits_ith(ctx, idx);
|
||||
|
||||
const llama_model * model = llama_get_model(ctx);
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
||||
|
||||
cur.resize(n_vocab);
|
||||
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
||||
}
|
||||
|
||||
dst->prev = src->prev;
|
||||
}
|
||||
|
||||
llama_token llama_sampling_last(llama_sampling_context * ctx) {
|
||||
return ctx->prev.back();
|
||||
}
|
||||
|
||||
std::string llama_sampling_prev_str(llama_sampling_context * ctx_sampling, llama_context * ctx_main, int n) {
|
||||
const int size = ctx_sampling->prev.size();
|
||||
|
||||
n = std::min(n, size);
|
||||
|
||||
std::string result;
|
||||
|
||||
for (int i = size - n; i < size; i++) {
|
||||
result += llama_token_to_piece(ctx_main, ctx_sampling->prev[i]);
|
||||
cur_p = { cur.data(), cur.size(), -1, false };
|
||||
}
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string llama_sampling_print(const llama_sampling_params & params) {
|
||||
std::string common_params_sampling::print() const {
|
||||
char result[1024];
|
||||
|
||||
snprintf(result, sizeof(result),
|
||||
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
|
||||
"\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, typical_p = %.3f, temp = %.3f\n"
|
||||
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
|
||||
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
|
||||
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
|
||||
params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present,
|
||||
params.top_k, params.tfs_z, params.top_p, params.min_p, params.typical_p, params.temp,
|
||||
params.mirostat, params.mirostat_eta, params.mirostat_tau);
|
||||
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
|
||||
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
|
||||
top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
|
||||
mirostat, mirostat_eta, mirostat_tau);
|
||||
|
||||
return std::string(result);
|
||||
}
|
||||
|
||||
std::string llama_sampling_order_print(const llama_sampling_params & params) {
|
||||
std::string result = "CFG -> Penalties ";
|
||||
if (params.mirostat == 0) {
|
||||
for (auto sampler_type : params.samplers_sequence) {
|
||||
const auto sampler_type_name = llama_sampling_type_to_str(sampler_type);
|
||||
if (!sampler_type_name.empty()) {
|
||||
result += "-> " + sampler_type_name + " ";
|
||||
}
|
||||
struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params) {
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
llama_sampler_chain_params lparams = llama_sampler_chain_default_params();
|
||||
|
||||
lparams.no_perf = params.no_perf;
|
||||
|
||||
std::vector<const char *> trigger_words;
|
||||
trigger_words.reserve(params.grammar_trigger_words.size());
|
||||
for (const auto & str : params.grammar_trigger_words) {
|
||||
trigger_words.push_back(str.word.c_str());
|
||||
}
|
||||
|
||||
struct llama_sampler * grmr;
|
||||
if (params.grammar.compare(0, 11, "%llguidance") == 0) {
|
||||
#ifdef LLAMA_USE_LLGUIDANCE
|
||||
grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str());
|
||||
#else
|
||||
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
||||
#endif // LLAMA_USE_LLGUIDANCE
|
||||
} else {
|
||||
result += "-> mirostat ";
|
||||
grmr = params.grammar_lazy
|
||||
? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root",
|
||||
trigger_words.data(), trigger_words.size(),
|
||||
params.grammar_trigger_tokens.data(), params.grammar_trigger_tokens.size())
|
||||
: llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root");
|
||||
}
|
||||
|
||||
auto * result = new common_sampler {
|
||||
/* .params = */ params,
|
||||
/* .grmr = */ grmr,
|
||||
/* .chain = */ llama_sampler_chain_init(lparams),
|
||||
/* .prev = */ ring_buffer<llama_token>(std::max(32, params.n_prev)),
|
||||
/* .cur = */ {},
|
||||
/* .cur_p = */ {},
|
||||
};
|
||||
|
||||
llama_sampler_chain_add(result->chain,
|
||||
llama_sampler_init_logit_bias(
|
||||
llama_vocab_n_tokens(vocab),
|
||||
params.logit_bias.size(),
|
||||
params.logit_bias.data()));
|
||||
|
||||
if (params.mirostat == 0) {
|
||||
for (const auto & cnstr : params.samplers) {
|
||||
switch (cnstr) {
|
||||
case COMMON_SAMPLER_TYPE_DRY:
|
||||
{
|
||||
std::vector<const char *> c_breakers;
|
||||
c_breakers.reserve(params.dry_sequence_breakers.size());
|
||||
for (const auto & str : params.dry_sequence_breakers) {
|
||||
c_breakers.push_back(str.c_str());
|
||||
}
|
||||
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
|
||||
}
|
||||
break;
|
||||
case COMMON_SAMPLER_TYPE_TOP_K:
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
|
||||
break;
|
||||
case COMMON_SAMPLER_TYPE_TOP_P:
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
|
||||
break;
|
||||
case COMMON_SAMPLER_TYPE_MIN_P:
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
|
||||
break;
|
||||
case COMMON_SAMPLER_TYPE_XTC:
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
|
||||
break;
|
||||
case COMMON_SAMPLER_TYPE_TYPICAL_P:
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
|
||||
break;
|
||||
case COMMON_SAMPLER_TYPE_TEMPERATURE:
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
|
||||
break;
|
||||
case COMMON_SAMPLER_TYPE_INFILL:
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab));
|
||||
break;
|
||||
case COMMON_SAMPLER_TYPE_PENALTIES:
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false && "unknown sampler type");
|
||||
}
|
||||
}
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
|
||||
} else if (params.mirostat == 1) {
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
|
||||
} else if (params.mirostat == 2) {
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
|
||||
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
|
||||
} else {
|
||||
GGML_ASSERT(false && "unknown mirostat version");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string llama_sampling_type_to_str(llama_sampler_type sampler_type) {
|
||||
switch (sampler_type) {
|
||||
case llama_sampler_type::TOP_K: return "top_k";
|
||||
case llama_sampler_type::TFS_Z: return "tfs_z";
|
||||
case llama_sampler_type::TYPICAL_P: return "typical_p";
|
||||
case llama_sampler_type::TOP_P: return "top_p";
|
||||
case llama_sampler_type::MIN_P: return "min_p";
|
||||
case llama_sampler_type::TEMPERATURE: return "temperature";
|
||||
void common_sampler_free(struct common_sampler * gsmpl) {
|
||||
if (gsmpl) {
|
||||
llama_sampler_free(gsmpl->grmr);
|
||||
|
||||
llama_sampler_free(gsmpl->chain);
|
||||
|
||||
delete gsmpl;
|
||||
}
|
||||
}
|
||||
|
||||
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
|
||||
if (accept_grammar) {
|
||||
llama_sampler_accept(gsmpl->grmr, token);
|
||||
}
|
||||
|
||||
llama_sampler_accept(gsmpl->chain, token);
|
||||
|
||||
gsmpl->prev.push_back(token);
|
||||
}
|
||||
|
||||
void common_sampler_reset(struct common_sampler * gsmpl) {
|
||||
llama_sampler_reset(gsmpl->grmr);
|
||||
|
||||
llama_sampler_reset(gsmpl->chain);
|
||||
}
|
||||
|
||||
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
|
||||
return new common_sampler {
|
||||
/* .params = */ gsmpl->params,
|
||||
/* .grmr = */ llama_sampler_clone(gsmpl->grmr),
|
||||
/* .chain = */ llama_sampler_clone(gsmpl->chain),
|
||||
/* .prev = */ gsmpl->prev,
|
||||
/* .cur = */ gsmpl->cur,
|
||||
/* .cur_p = */ gsmpl->cur_p,
|
||||
};
|
||||
}
|
||||
|
||||
void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl) {
|
||||
// TODO: measure grammar performance
|
||||
|
||||
if (gsmpl) {
|
||||
llama_perf_sampler_print(gsmpl->chain);
|
||||
}
|
||||
if (ctx) {
|
||||
llama_perf_context_print(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first) {
|
||||
gsmpl->set_logits(ctx, idx);
|
||||
|
||||
auto & grmr = gsmpl->grmr;
|
||||
auto & chain = gsmpl->chain;
|
||||
auto & cur_p = gsmpl->cur_p; // initialized by set_logits
|
||||
|
||||
if (grammar_first) {
|
||||
llama_sampler_apply(grmr, &cur_p);
|
||||
}
|
||||
|
||||
llama_sampler_apply(chain, &cur_p);
|
||||
|
||||
GGML_ASSERT(cur_p.selected != -1 && "no selected token during sampling - check your sampling configuration");
|
||||
|
||||
const llama_token id = cur_p.data[cur_p.selected].id;
|
||||
|
||||
if (grammar_first) {
|
||||
return id;
|
||||
}
|
||||
|
||||
// check if it the sampled token fits the grammar
|
||||
{
|
||||
llama_token_data single_token_data = { id, 1.0f, 0.0f };
|
||||
llama_token_data_array single_token_data_array = { &single_token_data, 1, -1, false };
|
||||
|
||||
llama_sampler_apply(grmr, &single_token_data_array);
|
||||
|
||||
const bool is_valid = single_token_data_array.data[0].logit != -INFINITY;
|
||||
if (is_valid) {
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
||||
// resampling:
|
||||
// if the token is not valid, sample again, but first apply the grammar sampler and then the sampling chain
|
||||
gsmpl->set_logits(ctx, idx);
|
||||
|
||||
llama_sampler_apply(grmr, &cur_p);
|
||||
llama_sampler_apply(chain, &cur_p);
|
||||
|
||||
GGML_ASSERT(cur_p.selected != -1 && "no selected token during re-sampling - check your sampling configuration");
|
||||
|
||||
return cur_p.data[cur_p.selected].id;
|
||||
}
|
||||
|
||||
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft, bool grammar_first) {
|
||||
GGML_ASSERT(idxs.size() == draft.size() + 1 && "idxs.size() must be draft.size() + 1");
|
||||
|
||||
std::vector<llama_token> result;
|
||||
result.reserve(idxs.size());
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < draft.size(); i++) {
|
||||
const llama_token id = common_sampler_sample(gsmpl, ctx, idxs[i], grammar_first);
|
||||
|
||||
common_sampler_accept(gsmpl, id, true);
|
||||
|
||||
result.push_back(id);
|
||||
|
||||
if (draft[i] != id) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == draft.size()) {
|
||||
const llama_token id = common_sampler_sample(gsmpl, ctx, idxs[i], grammar_first);
|
||||
|
||||
common_sampler_accept(gsmpl, id, true);
|
||||
|
||||
result.push_back(id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft, bool grammar_first) {
|
||||
std::vector<int> idxs(draft.size() + 1);
|
||||
for (size_t i = 0; i < idxs.size(); ++i) {
|
||||
idxs[i] = i;
|
||||
}
|
||||
|
||||
return common_sampler_sample_and_accept_n(gsmpl, ctx, idxs, draft, grammar_first);
|
||||
}
|
||||
|
||||
uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl) {
|
||||
return llama_sampler_get_seed(gsmpl->chain);
|
||||
}
|
||||
|
||||
// helpers
|
||||
|
||||
llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl) {
|
||||
return &gsmpl->cur_p;
|
||||
}
|
||||
|
||||
llama_token common_sampler_last(const struct common_sampler * gsmpl) {
|
||||
return gsmpl->prev.rat(0);
|
||||
}
|
||||
|
||||
std::string common_sampler_print(const struct common_sampler * gsmpl) {
|
||||
std::string result = "logits ";
|
||||
|
||||
for (int i = 0; i < llama_sampler_chain_n(gsmpl->chain); i++) {
|
||||
const auto * smpl = llama_sampler_chain_get(gsmpl->chain, i);
|
||||
result += std::string("-> ") + llama_sampler_name(smpl) + " ";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_main, int n) {
|
||||
n = std::min(n, (int) gsmpl->prev.size());
|
||||
|
||||
if (n <= 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
result.reserve(8*n); // 8 is the average length of a token [citation needed], TODO: compute this from the vocab
|
||||
|
||||
for (int i = n - 1; i >= 0; i--) {
|
||||
const llama_token id = gsmpl->prev.rat(i);
|
||||
|
||||
GGML_ASSERT(id != LLAMA_TOKEN_NULL && "null token in the sampling history - should not happen");
|
||||
|
||||
result += common_token_to_piece(ctx_main, id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
|
||||
switch (cnstr) {
|
||||
case COMMON_SAMPLER_TYPE_DRY: return 'd';
|
||||
case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
|
||||
case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
|
||||
case COMMON_SAMPLER_TYPE_TOP_P: return 'p';
|
||||
case COMMON_SAMPLER_TYPE_MIN_P: return 'm';
|
||||
case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't';
|
||||
case COMMON_SAMPLER_TYPE_XTC: return 'x';
|
||||
case COMMON_SAMPLER_TYPE_INFILL: return 'i';
|
||||
case COMMON_SAMPLER_TYPE_PENALTIES: return 'e';
|
||||
default : return '?';
|
||||
}
|
||||
}
|
||||
|
||||
std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
|
||||
switch (cnstr) {
|
||||
case COMMON_SAMPLER_TYPE_DRY: return "dry";
|
||||
case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
|
||||
case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
|
||||
case COMMON_SAMPLER_TYPE_TOP_P: return "top_p";
|
||||
case COMMON_SAMPLER_TYPE_MIN_P: return "min_p";
|
||||
case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature";
|
||||
case COMMON_SAMPLER_TYPE_XTC: return "xtc";
|
||||
case COMMON_SAMPLER_TYPE_INFILL: return "infill";
|
||||
case COMMON_SAMPLER_TYPE_PENALTIES: return "penalties";
|
||||
default : return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
|
||||
std::unordered_map<std::string, llama_sampler_type> sampler_canonical_name_map {
|
||||
{"top_k", llama_sampler_type::TOP_K},
|
||||
{"top_p", llama_sampler_type::TOP_P},
|
||||
{"typical_p", llama_sampler_type::TYPICAL_P},
|
||||
{"min_p", llama_sampler_type::MIN_P},
|
||||
{"tfs_z", llama_sampler_type::TFS_Z},
|
||||
{"temperature", llama_sampler_type::TEMPERATURE}
|
||||
std::vector<common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
|
||||
std::unordered_map<std::string, common_sampler_type> sampler_canonical_name_map {
|
||||
{ "dry", COMMON_SAMPLER_TYPE_DRY },
|
||||
{ "top_k", COMMON_SAMPLER_TYPE_TOP_K },
|
||||
{ "top_p", COMMON_SAMPLER_TYPE_TOP_P },
|
||||
{ "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
||||
{ "min_p", COMMON_SAMPLER_TYPE_MIN_P },
|
||||
{ "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
|
||||
{ "xtc", COMMON_SAMPLER_TYPE_XTC },
|
||||
{ "infill", COMMON_SAMPLER_TYPE_INFILL },
|
||||
{ "penalties", COMMON_SAMPLER_TYPE_PENALTIES },
|
||||
};
|
||||
|
||||
// since samplers names are written multiple ways
|
||||
// make it ready for both system names and input names
|
||||
std::unordered_map<std::string, llama_sampler_type> sampler_alt_name_map {
|
||||
{"top-k", llama_sampler_type::TOP_K},
|
||||
{"top-p", llama_sampler_type::TOP_P},
|
||||
{"nucleus", llama_sampler_type::TOP_P},
|
||||
{"typical-p", llama_sampler_type::TYPICAL_P},
|
||||
{"typical", llama_sampler_type::TYPICAL_P},
|
||||
{"min-p", llama_sampler_type::MIN_P},
|
||||
{"tfs-z", llama_sampler_type::TFS_Z},
|
||||
{"tfs", llama_sampler_type::TFS_Z},
|
||||
{"temp", llama_sampler_type::TEMPERATURE}
|
||||
std::unordered_map<std::string, common_sampler_type> sampler_alt_name_map {
|
||||
{ "top-k", COMMON_SAMPLER_TYPE_TOP_K },
|
||||
{ "top-p", COMMON_SAMPLER_TYPE_TOP_P },
|
||||
{ "nucleus", COMMON_SAMPLER_TYPE_TOP_P },
|
||||
{ "typical-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
||||
{ "typical", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
||||
{ "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
||||
{ "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
||||
{ "min-p", COMMON_SAMPLER_TYPE_MIN_P },
|
||||
{ "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
|
||||
};
|
||||
|
||||
std::vector<llama_sampler_type> sampler_types;
|
||||
sampler_types.reserve(names.size());
|
||||
for (const auto & name : names)
|
||||
{
|
||||
auto sampler_item = sampler_canonical_name_map.find(name);
|
||||
if (sampler_item != sampler_canonical_name_map.end())
|
||||
{
|
||||
sampler_types.push_back(sampler_item->second);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (allow_alt_names)
|
||||
{
|
||||
sampler_item = sampler_alt_name_map.find(name);
|
||||
if (sampler_item != sampler_alt_name_map.end())
|
||||
{
|
||||
sampler_types.push_back(sampler_item->second);
|
||||
std::vector<common_sampler_type> samplers;
|
||||
samplers.reserve(names.size());
|
||||
|
||||
for (const auto & name : names) {
|
||||
auto sampler = sampler_canonical_name_map.find(name);
|
||||
if (sampler != sampler_canonical_name_map.end()) {
|
||||
samplers.push_back(sampler->second);
|
||||
} else {
|
||||
if (allow_alt_names) {
|
||||
sampler = sampler_alt_name_map.find(name);
|
||||
if (sampler != sampler_alt_name_map.end()) {
|
||||
samplers.push_back(sampler->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return sampler_types;
|
||||
|
||||
return samplers;
|
||||
}
|
||||
|
||||
std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::string & names_string) {
|
||||
std::unordered_map<char, llama_sampler_type> sampler_name_map {
|
||||
{'k', llama_sampler_type::TOP_K},
|
||||
{'p', llama_sampler_type::TOP_P},
|
||||
{'y', llama_sampler_type::TYPICAL_P},
|
||||
{'m', llama_sampler_type::MIN_P},
|
||||
{'f', llama_sampler_type::TFS_Z},
|
||||
{'t', llama_sampler_type::TEMPERATURE}
|
||||
std::vector<common_sampler_type> common_sampler_types_from_chars(const std::string & chars) {
|
||||
std::unordered_map<char, common_sampler_type> sampler_name_map = {
|
||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_DRY), COMMON_SAMPLER_TYPE_DRY },
|
||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
|
||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },
|
||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P },
|
||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },
|
||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE },
|
||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
|
||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },
|
||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_PENALTIES), COMMON_SAMPLER_TYPE_PENALTIES },
|
||||
};
|
||||
|
||||
std::vector<llama_sampler_type> sampler_types;
|
||||
sampler_types.reserve(names_string.size());
|
||||
for (const auto & c : names_string) {
|
||||
const auto sampler_item = sampler_name_map.find(c);
|
||||
if (sampler_item != sampler_name_map.end()) {
|
||||
sampler_types.push_back(sampler_item->second);
|
||||
std::vector<common_sampler_type> samplers;
|
||||
samplers.reserve(chars.size());
|
||||
|
||||
for (const auto & c : chars) {
|
||||
const auto sampler = sampler_name_map.find(c);
|
||||
if (sampler != sampler_name_map.end()) {
|
||||
samplers.push_back(sampler->second);
|
||||
}
|
||||
}
|
||||
return sampler_types;
|
||||
}
|
||||
|
||||
// no reasons to expose this function in header
|
||||
static void sampler_queue(
|
||||
struct llama_context * ctx_main,
|
||||
const llama_sampling_params & params,
|
||||
llama_token_data_array & cur_p,
|
||||
size_t min_keep) {
|
||||
const float temp = params.temp;
|
||||
const float dynatemp_range = params.dynatemp_range;
|
||||
const float dynatemp_exponent = params.dynatemp_exponent;
|
||||
const int32_t top_k = params.top_k;
|
||||
const float top_p = params.top_p;
|
||||
const float min_p = params.min_p;
|
||||
const float tfs_z = params.tfs_z;
|
||||
const float typical_p = params.typical_p;
|
||||
const std::vector<llama_sampler_type> & samplers_sequence = params.samplers_sequence;
|
||||
|
||||
for (auto sampler_type : samplers_sequence) {
|
||||
switch (sampler_type) {
|
||||
case llama_sampler_type::TOP_K : llama_sample_top_k (ctx_main, &cur_p, top_k, min_keep); break;
|
||||
case llama_sampler_type::TFS_Z : llama_sample_tail_free(ctx_main, &cur_p, tfs_z, min_keep); break;
|
||||
case llama_sampler_type::TYPICAL_P: llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break;
|
||||
case llama_sampler_type::TOP_P : llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break;
|
||||
case llama_sampler_type::MIN_P : llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break;
|
||||
case llama_sampler_type::TEMPERATURE:
|
||||
if (dynatemp_range > 0) {
|
||||
float dynatemp_min = std::max(0.0f, temp - dynatemp_range);
|
||||
float dynatemp_max = std::max(0.0f, temp + dynatemp_range);
|
||||
llama_sample_entropy(ctx_main, &cur_p, dynatemp_min, dynatemp_max, dynatemp_exponent);
|
||||
} else {
|
||||
llama_sample_temp(ctx_main, &cur_p, temp);
|
||||
}
|
||||
break;
|
||||
default : break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static llama_token llama_sampling_sample_impl(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
const int idx,
|
||||
bool is_resampling) {
|
||||
const llama_sampling_params & params = ctx_sampling->params;
|
||||
|
||||
const float temp = params.temp;
|
||||
const int mirostat = params.mirostat;
|
||||
const float mirostat_tau = params.mirostat_tau;
|
||||
const float mirostat_eta = params.mirostat_eta;
|
||||
|
||||
std::vector<float> original_logits;
|
||||
auto cur_p = llama_sampling_prepare(ctx_sampling, ctx_main, ctx_cfg, idx, /* apply_grammar= */ is_resampling, &original_logits);
|
||||
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
||||
GGML_ASSERT(!original_logits.empty());
|
||||
}
|
||||
llama_token id = 0;
|
||||
// Get a pointer to the logits
|
||||
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||
|
||||
if (temp < 0.0) {
|
||||
// greedy sampling, with probs
|
||||
llama_sample_softmax(ctx_main, &cur_p);
|
||||
id = cur_p.data[0].id;
|
||||
} else if (temp == 0.0) {
|
||||
// greedy sampling, no probs
|
||||
id = llama_sample_token_greedy(ctx_main, &cur_p);
|
||||
} else {
|
||||
if (mirostat == 1) {
|
||||
const int mirostat_m = 100;
|
||||
llama_sample_temp(ctx_main, &cur_p, temp);
|
||||
id = llama_sample_token_mirostat(ctx_main, &cur_p, mirostat_tau, mirostat_eta, mirostat_m, &ctx_sampling->mirostat_mu);
|
||||
} else if (mirostat == 2) {
|
||||
llama_sample_temp(ctx_main, &cur_p, temp);
|
||||
id = llama_sample_token_mirostat_v2(ctx_main, &cur_p, mirostat_tau, mirostat_eta, &ctx_sampling->mirostat_mu);
|
||||
} else {
|
||||
// temperature sampling
|
||||
size_t min_keep = std::max(1, params.min_keep);
|
||||
|
||||
sampler_queue(ctx_main, params, cur_p, min_keep);
|
||||
|
||||
id = llama_sample_token_with_rng(ctx_main, &cur_p, ctx_sampling->rng);
|
||||
|
||||
//{
|
||||
// const int n_top = 10;
|
||||
// LOG("top %d candidates:\n", n_top);
|
||||
|
||||
// for (int i = 0; i < n_top; i++) {
|
||||
// const llama_token id = cur_p.data[i].id;
|
||||
// (void)id; // To avoid a warning that id is unused when logging is disabled.
|
||||
// LOG(" - %5d: '%12s' (%.3f)\n", id, llama_token_to_piece(ctx_main, id).c_str(), cur_p.data[i].p);
|
||||
// }
|
||||
//}
|
||||
|
||||
//LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
||||
// Create an array with a single token data element for the sampled id
|
||||
llama_token_data single_token_data = {id, logits[id], 0.0f};
|
||||
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
|
||||
|
||||
// Apply grammar constraints to the single token
|
||||
llama_sample_grammar(ctx_main, &single_token_data_array, ctx_sampling->grammar);
|
||||
|
||||
// Check if the token is valid according to the grammar by seeing if its logit has been set to -INFINITY
|
||||
bool is_valid = single_token_data_array.data[0].logit != -INFINITY;
|
||||
|
||||
// If the token is not valid according to the grammar, perform resampling
|
||||
if (!is_valid) {
|
||||
LOG("Resampling because token %d: '%s' does not meet grammar rules\n", id, llama_token_to_piece(ctx_main, id).c_str());
|
||||
|
||||
// Restore logits from the copy
|
||||
std::copy(original_logits.begin(), original_logits.end(), logits);
|
||||
|
||||
return llama_sampling_sample_impl(ctx_sampling, ctx_main, ctx_cfg, idx, /* is_resampling= */ true);
|
||||
}
|
||||
}
|
||||
|
||||
ctx_sampling->n_valid = temp == 0.0f ? 0 : cur_p.size;
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static llama_token_data_array llama_sampling_prepare_impl(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
const int idx,
|
||||
bool apply_grammar,
|
||||
std::vector<float> * original_logits) {
|
||||
const llama_sampling_params & params = ctx_sampling->params;
|
||||
|
||||
const int n_vocab = llama_n_vocab(llama_get_model(ctx_main));
|
||||
|
||||
const int32_t penalty_last_n = params.penalty_last_n < 0 ? params.n_prev : params.penalty_last_n;
|
||||
const float penalty_repeat = params.penalty_repeat;
|
||||
const float penalty_freq = params.penalty_freq;
|
||||
const float penalty_present = params.penalty_present;
|
||||
|
||||
const bool penalize_nl = params.penalize_nl;
|
||||
|
||||
auto & prev = ctx_sampling->prev;
|
||||
auto & cur = ctx_sampling->cur;
|
||||
|
||||
// Get a pointer to the logits
|
||||
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||
|
||||
if (ctx_sampling->grammar != NULL && !apply_grammar) {
|
||||
GGML_ASSERT(original_logits != NULL);
|
||||
// Only make a copy of the original logits if we are not applying grammar checks, not sure if I actually have to do this.
|
||||
*original_logits = {logits, logits + llama_n_vocab(llama_get_model(ctx_main))};
|
||||
}
|
||||
|
||||
// apply params.logit_bias map
|
||||
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) {
|
||||
logits[it->first] += it->second;
|
||||
}
|
||||
|
||||
if (ctx_cfg) {
|
||||
float * logits_guidance = llama_get_logits_ith(ctx_cfg, idx);
|
||||
llama_sample_apply_guidance(ctx_main, logits, logits_guidance, params.cfg_scale);
|
||||
}
|
||||
|
||||
cur.clear();
|
||||
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||
}
|
||||
|
||||
llama_token_data_array cur_p = { cur.data(), cur.size(), false };
|
||||
|
||||
// apply penalties
|
||||
const auto& penalty_tokens = params.use_penalty_prompt_tokens ? params.penalty_prompt_tokens : prev;
|
||||
const int penalty_tokens_used_size = std::min((int)penalty_tokens.size(), penalty_last_n);
|
||||
if (penalty_tokens_used_size) {
|
||||
const float nl_logit = logits[llama_token_nl(llama_get_model(ctx_main))];
|
||||
|
||||
llama_sample_repetition_penalties(ctx_main, &cur_p,
|
||||
penalty_tokens.data() + penalty_tokens.size() - penalty_tokens_used_size,
|
||||
penalty_tokens_used_size, penalty_repeat, penalty_freq, penalty_present);
|
||||
|
||||
if (!penalize_nl) {
|
||||
for (size_t idx = 0; idx < cur_p.size; idx++) {
|
||||
if (cur_p.data[idx].id == llama_token_nl(llama_get_model(ctx_main))) {
|
||||
cur_p.data[idx].logit = nl_logit;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// apply grammar checks before sampling logic
|
||||
if (apply_grammar && ctx_sampling->grammar != NULL) {
|
||||
llama_sample_grammar(ctx_main, &cur_p, ctx_sampling->grammar);
|
||||
}
|
||||
|
||||
return cur_p;
|
||||
}
|
||||
|
||||
llama_token llama_sampling_sample(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
const int idx) {
|
||||
// Call the implementation function with is_resampling set to false by default
|
||||
return llama_sampling_sample_impl(ctx_sampling, ctx_main, ctx_cfg, idx, /* is_resampling= */ false);
|
||||
}
|
||||
|
||||
llama_token_data_array llama_sampling_prepare(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
const int idx,
|
||||
bool apply_grammar,
|
||||
std::vector<float> * original_logits) {
|
||||
return llama_sampling_prepare_impl(ctx_sampling,ctx_main, ctx_cfg, idx, apply_grammar, original_logits);
|
||||
}
|
||||
|
||||
void llama_sampling_accept(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
llama_token id,
|
||||
bool apply_grammar) {
|
||||
ctx_sampling->prev.erase(ctx_sampling->prev.begin());
|
||||
ctx_sampling->prev.push_back(id);
|
||||
|
||||
if (ctx_sampling->grammar != NULL && apply_grammar) {
|
||||
llama_grammar_accept_token(ctx_main, ctx_sampling->grammar, id);
|
||||
}
|
||||
|
||||
return samplers;
|
||||
}
|
||||
|
|
|
@ -2,159 +2,106 @@
|
|||
|
||||
#include "llama.h"
|
||||
|
||||
#include "grammar-parser.h"
|
||||
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
// sampler types
|
||||
enum class llama_sampler_type : char {
|
||||
TOP_K = 'k',
|
||||
TOP_P = 'p',
|
||||
MIN_P = 'm',
|
||||
TFS_Z = 'f',
|
||||
TYPICAL_P = 'y',
|
||||
TEMPERATURE = 't'
|
||||
};
|
||||
|
||||
// sampling parameters
|
||||
typedef struct llama_sampling_params {
|
||||
int32_t n_prev = 64; // number of previous tokens to remember
|
||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||
int32_t top_k = 40; // <= 0 to use vocab size
|
||||
float top_p = 0.95f; // 1.0 = disabled
|
||||
float min_p = 0.05f; // 0.0 = disabled
|
||||
float tfs_z = 1.00f; // 1.0 = disabled
|
||||
float typical_p = 1.00f; // 1.0 = disabled
|
||||
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||
float penalty_present = 0.00f; // 0.0 = disabled
|
||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||
float mirostat_tau = 5.00f; // target entropy
|
||||
float mirostat_eta = 0.10f; // learning rate
|
||||
bool penalize_nl = false; // consider newlines as a repeatable token
|
||||
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
|
||||
|
||||
std::vector<llama_sampler_type> samplers_sequence = {
|
||||
llama_sampler_type::TOP_K,
|
||||
llama_sampler_type::TFS_Z,
|
||||
llama_sampler_type::TYPICAL_P,
|
||||
llama_sampler_type::TOP_P,
|
||||
llama_sampler_type::MIN_P,
|
||||
llama_sampler_type::TEMPERATURE
|
||||
};
|
||||
|
||||
std::string grammar; // optional BNF-like grammar to constrain sampling
|
||||
|
||||
// Classifier-Free Guidance
|
||||
// https://arxiv.org/abs/2306.17806
|
||||
std::string cfg_negative_prompt; // string to help guidance
|
||||
float cfg_scale = 1.f; // how strong is guidance
|
||||
|
||||
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
||||
|
||||
std::vector<llama_token> penalty_prompt_tokens;
|
||||
bool use_penalty_prompt_tokens = false;
|
||||
} llama_sampling_params;
|
||||
|
||||
// general sampler context
|
||||
// TODO: move to llama.h
|
||||
struct llama_sampling_context {
|
||||
// parameters that will be used for sampling
|
||||
llama_sampling_params params;
|
||||
|
||||
// mirostat sampler state
|
||||
float mirostat_mu;
|
||||
|
||||
llama_grammar * grammar;
|
||||
|
||||
// internal
|
||||
grammar_parser::parse_state parsed_grammar;
|
||||
|
||||
// TODO: replace with ring-buffer
|
||||
std::vector<llama_token> prev;
|
||||
std::vector<llama_token_data> cur;
|
||||
size_t n_valid; // Number of correct top tokens with correct probabilities.
|
||||
|
||||
std::mt19937 rng;
|
||||
};
|
||||
|
||||
#include "common.h"
|
||||
|
||||
// Create a new sampling context instance.
|
||||
struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params);
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
void llama_sampling_free(struct llama_sampling_context * ctx);
|
||||
|
||||
// Reset the sampler context
|
||||
// - clear prev tokens
|
||||
// - reset grammar
|
||||
void llama_sampling_reset(llama_sampling_context * ctx);
|
||||
|
||||
// Set the sampler seed
|
||||
void llama_sampling_set_rng_seed(struct llama_sampling_context * ctx, uint32_t seed);
|
||||
|
||||
// Copy the sampler context
|
||||
void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst);
|
||||
|
||||
// Get the last sampled token
|
||||
llama_token llama_sampling_last(llama_sampling_context * ctx);
|
||||
|
||||
// Get a string representation of the last sampled tokens
|
||||
std::string llama_sampling_prev_str(llama_sampling_context * ctx_sampling, llama_context * ctx_main, int n);
|
||||
|
||||
// Print sampling parameters into a string
|
||||
std::string llama_sampling_print(const llama_sampling_params & params);
|
||||
|
||||
// Print sampling order into a string
|
||||
std::string llama_sampling_order_print(const llama_sampling_params & params);
|
||||
|
||||
std::string llama_sampling_type_to_str(llama_sampler_type sampler_type);
|
||||
|
||||
std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
||||
std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::string & names_string);
|
||||
|
||||
// this is a common sampling function used across the examples for convenience
|
||||
// it can serve as a starting point for implementing your own sampling function
|
||||
// Note: When using multiple sequences, it is the caller's responsibility to call
|
||||
// llama_sampling_reset when a sequence ends
|
||||
// common_sampler extends llama_sampler with additional functionality:
|
||||
//
|
||||
// required:
|
||||
// - ctx_main: context to use for sampling
|
||||
// - ctx_sampling: sampling-specific context
|
||||
// - grammar support
|
||||
// - custom sampler logic based on the parameters
|
||||
// - history of the last accepted tokens
|
||||
// - performance metrics
|
||||
//
|
||||
// optional:
|
||||
// - ctx_cfg: context to use for classifier-free guidance
|
||||
// - idx: sample from llama_get_logits_ith(ctx, idx)
|
||||
// This goal is to have a common implementation of the sampling logic shared across the examples.
|
||||
// For example, depending on the temperature, the sampling chain can be very simple (greedy) or more
|
||||
// complex (top-k, top-p, etc).
|
||||
//
|
||||
// returns:
|
||||
// - token: sampled token
|
||||
// - candidates: vector of candidate tokens
|
||||
// Another example is related to the grammar. In general, the grammar constraints applied on the full
|
||||
// vocabulary can be very taxing. To improve performance, the grammar can be applied only to the sampled
|
||||
// token in order to verify if it fits the grammar. And only if the token doesn't fit the grammar, the
|
||||
// grammar constraints are applied to the full vocabulary and the token is resampled.
|
||||
//
|
||||
// The common_sampler also maintains a container with the last accepted tokens. In the future, this can
|
||||
// be moved into the core llama library.
|
||||
//
|
||||
// For convenience, the common_sampler also maintains a container with the current candidate tokens.
|
||||
// This can be used to access the probabilities of the rest of the non-sampled tokens.
|
||||
//
|
||||
// TODO: measure grammar performance
|
||||
//
|
||||
llama_token llama_sampling_sample(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
int idx = -1);
|
||||
|
||||
// Prepares and adjusts the set of token candidates for sampling based on penalties, biases, and sampling parameters.
|
||||
llama_token_data_array llama_sampling_prepare(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
int idx = 0,
|
||||
bool apply_grammar = true,
|
||||
std::vector<float> * original_logits = nullptr);
|
||||
struct common_sampler;
|
||||
|
||||
void llama_sampling_accept(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
llama_token id,
|
||||
bool apply_grammar);
|
||||
// llama_sampler API overloads
|
||||
|
||||
struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params);
|
||||
|
||||
void common_sampler_free(struct common_sampler * gsmpl);
|
||||
|
||||
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
|
||||
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
|
||||
void common_sampler_reset (struct common_sampler * gsmpl);
|
||||
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
|
||||
|
||||
// arguments can be nullptr to skip printing
|
||||
void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl);
|
||||
|
||||
// extended sampling implementation:
|
||||
//
|
||||
// - set logits
|
||||
// - apply the configured sampler chain
|
||||
// - check if the token fits the grammar (if any)
|
||||
// - if not: resample by first applying the grammar constraints and then sampling again (slower path)
|
||||
//
|
||||
// if grammar_first is true, the grammar is applied before the samplers (slower)
|
||||
// useful in cases where all the resulting candidates (not just the sampled one) must fit the grammar
|
||||
//
|
||||
llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
|
||||
|
||||
// generalized version of common_sampler_sample
|
||||
//
|
||||
// will cross-reference the sampled tokens with a batch of draft tokens and accept those that match
|
||||
// if the sampler disagrees at some point, we stop and return the accepted tokens up to now
|
||||
//
|
||||
// common_sampler_sample_n(gsmpl, ctx, { idx }, {});
|
||||
//
|
||||
// is equivalent to
|
||||
//
|
||||
// common_sampler_sample(gsmpl, ctx, idx);
|
||||
// common_sampler_accept(gsmpl, token, true);
|
||||
//
|
||||
// requires: idxs.size() == draft.size() + 1
|
||||
//
|
||||
// returns at least 1 token, up to idxs.size()
|
||||
//
|
||||
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft, bool grammar_first = false);
|
||||
|
||||
// assume idxs == [ 0, 1, 2, ..., draft.size() ]
|
||||
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft, bool grammar_first = false);
|
||||
|
||||
uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl);
|
||||
|
||||
// helpers
|
||||
|
||||
// access the internal list of current candidate tokens
|
||||
llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl);
|
||||
|
||||
// get the last accepted token
|
||||
llama_token common_sampler_last(const struct common_sampler * gsmpl);
|
||||
|
||||
// print the sampler chain into a string
|
||||
std::string common_sampler_print(const struct common_sampler * gsmpl);
|
||||
|
||||
// get a string representation of the last accepted tokens
|
||||
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
|
||||
|
||||
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
|
||||
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
|
||||
|
||||
std::vector<enum common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
||||
std::vector<enum common_sampler_type> common_sampler_types_from_chars(const std::string & chars);
|
||||
|
||||
llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab,
|
||||
const char * grammar_kind, const char * grammar_data);
|
||||
|
|
277
common/speculative.cpp
Normal file
277
common/speculative.cpp
Normal file
|
@ -0,0 +1,277 @@
|
|||
#include "speculative.h"
|
||||
|
||||
#include "log.h"
|
||||
#include "common.h"
|
||||
#include "sampling.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 128
|
||||
#define SPEC_VOCAB_CHECK_START_TOKEN_ID 5
|
||||
|
||||
struct common_speculative {
|
||||
struct llama_context * ctx;
|
||||
struct common_sampler * smpl;
|
||||
|
||||
llama_batch batch;
|
||||
llama_tokens prompt;
|
||||
};
|
||||
|
||||
struct common_speculative * common_speculative_init(
|
||||
struct llama_context * ctx_dft) {
|
||||
auto * result = new common_speculative {
|
||||
/* .ctx = */ ctx_dft,
|
||||
/* .smpl = */ nullptr,
|
||||
/* .batch = */ llama_batch_init(llama_n_batch(ctx_dft), 0, 1),
|
||||
/* .prompt = */ {},
|
||||
};
|
||||
|
||||
// TODO: optimize or pass from outside?
|
||||
#if 0
|
||||
{
|
||||
common_params_sampling params;
|
||||
params.no_perf = false;
|
||||
|
||||
params.top_k = 40;
|
||||
params.top_p = 0.9;
|
||||
|
||||
params.samplers = {
|
||||
COMMON_SAMPLER_TYPE_TOP_K,
|
||||
COMMON_SAMPLER_TYPE_TOP_P,
|
||||
COMMON_SAMPLER_TYPE_INFILL,
|
||||
};
|
||||
|
||||
result->smpl = common_sampler_init(llama_get_model(ctx_dft), params);
|
||||
}
|
||||
#else
|
||||
{
|
||||
common_params_sampling params;
|
||||
params.no_perf = false;
|
||||
|
||||
params.top_k = 10;
|
||||
|
||||
params.samplers = {
|
||||
COMMON_SAMPLER_TYPE_TOP_K,
|
||||
};
|
||||
|
||||
result->smpl = common_sampler_init(llama_get_model(ctx_dft), params);
|
||||
}
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void common_speculative_free(struct common_speculative * spec) {
|
||||
if (spec == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
common_sampler_free(spec->smpl);
|
||||
|
||||
llama_batch_free(spec->batch);
|
||||
|
||||
delete spec;
|
||||
}
|
||||
|
||||
bool common_speculative_are_compatible(
|
||||
const struct llama_context * ctx_tgt,
|
||||
const struct llama_context * ctx_dft) {
|
||||
const struct llama_model * model_tgt = llama_get_model(ctx_tgt);
|
||||
const struct llama_model * model_dft = llama_get_model(ctx_dft);
|
||||
|
||||
const struct llama_vocab * vocab_tgt = llama_model_get_vocab(model_tgt);
|
||||
const struct llama_vocab * vocab_dft = llama_model_get_vocab(model_dft);
|
||||
|
||||
const bool vocab_type_tgt = llama_vocab_type(vocab_tgt);
|
||||
LOG_DBG("%s: vocab_type tgt: %d\n", __func__, vocab_type_tgt);
|
||||
|
||||
const bool vocab_type_dft = llama_vocab_type(vocab_dft);
|
||||
LOG_DBG("%s: vocab_type dft: %d\n", __func__, vocab_type_dft);
|
||||
|
||||
if (vocab_type_tgt != vocab_type_dft) {
|
||||
LOG_ERR("%s: draft model vocab type must match target model to use speculation but "
|
||||
"vocab_type_dft = %d while vocab_type_tgt = %d\n", __func__, vocab_type_dft, vocab_type_tgt);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (llama_vocab_get_add_bos(vocab_tgt) != llama_vocab_get_add_bos(vocab_dft) ||
|
||||
llama_vocab_get_add_eos(vocab_tgt) != llama_vocab_get_add_eos(vocab_dft) ||
|
||||
llama_vocab_bos(vocab_tgt) != llama_vocab_bos(vocab_dft) ||
|
||||
llama_vocab_eos(vocab_tgt) != llama_vocab_eos(vocab_dft)) {
|
||||
LOG_ERR("%s: draft vocab special tokens must match target vocab to use speculation\n", __func__);
|
||||
LOG_ERR("%s: tgt: bos = %d (%d), eos = %d (%d)\n", __func__, llama_vocab_bos(vocab_tgt), llama_vocab_get_add_bos(vocab_tgt), llama_vocab_eos(vocab_tgt), llama_vocab_get_add_eos(vocab_tgt));
|
||||
LOG_ERR("%s: dft: bos = %d (%d), eos = %d (%d)\n", __func__, llama_vocab_bos(vocab_dft), llama_vocab_get_add_bos(vocab_dft), llama_vocab_eos(vocab_dft), llama_vocab_get_add_eos(vocab_dft));
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
const int n_vocab_tgt = llama_vocab_n_tokens(vocab_tgt);
|
||||
const int n_vocab_dft = llama_vocab_n_tokens(vocab_dft);
|
||||
|
||||
const int vocab_diff = std::abs(n_vocab_tgt - n_vocab_dft);
|
||||
|
||||
if (vocab_diff > SPEC_VOCAB_MAX_SIZE_DIFFERENCE) {
|
||||
LOG_ERR("%s: draft model vocab must closely match target model to use speculation but "
|
||||
"target vocab size %d does not match draft vocab size %d - difference %d, max allowed %d\n",
|
||||
__func__, n_vocab_tgt, llama_vocab_n_tokens(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = SPEC_VOCAB_CHECK_START_TOKEN_ID; i < std::min(n_vocab_tgt, n_vocab_dft); ++i) {
|
||||
const char * token_text_tgt = llama_vocab_get_text(vocab_tgt, i);
|
||||
const char * token_text_dft = llama_vocab_get_text(vocab_dft, i);
|
||||
if (std::strcmp(token_text_tgt, token_text_dft) != 0) {
|
||||
LOG_ERR("%s: draft vocab vocab must match target vocab to use speculation but "
|
||||
"token %d content differs - target '%s', draft '%s'\n", __func__, i,
|
||||
common_token_to_piece(ctx_tgt, i).c_str(),
|
||||
common_token_to_piece(ctx_dft, i).c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
llama_tokens common_speculative_gen_draft(
|
||||
struct common_speculative * spec,
|
||||
struct common_speculative_params params,
|
||||
const llama_tokens & prompt_tgt,
|
||||
llama_token id_last) {
|
||||
auto & batch = spec->batch;
|
||||
auto & ctx = spec->ctx;
|
||||
auto & smpl = spec->smpl;
|
||||
auto & prompt = spec->prompt;
|
||||
|
||||
int reuse_i = 0;
|
||||
int reuse_n = 0;
|
||||
|
||||
const int n_ctx = llama_n_ctx(ctx) - params.n_draft;
|
||||
|
||||
const int i_start = std::max<int>(0, (int) prompt_tgt.size() - n_ctx);
|
||||
|
||||
// reuse as much as possible from the old draft context
|
||||
// ideally, the draft context should be as big as the target context and we will always reuse the entire prompt
|
||||
for (int i = 0; i < (int) prompt.size(); ++i) {
|
||||
int cur = 0;
|
||||
while (i_start + cur < (int) prompt_tgt.size() &&
|
||||
i + cur < (int) prompt.size() &&
|
||||
prompt_tgt[i_start + cur] == prompt[i + cur]) {
|
||||
cur++;
|
||||
}
|
||||
|
||||
if ((cur >= params.n_reuse || n_ctx >= (int) prompt_tgt.size()) && cur > reuse_n) {
|
||||
reuse_i = i;
|
||||
reuse_n = cur;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DBG("%s: reuse_i = %d, reuse_n = %d, prompt = %d\n", __func__, reuse_i, reuse_n, (int) prompt.size());
|
||||
|
||||
llama_tokens result;
|
||||
result.reserve(params.n_draft);
|
||||
|
||||
if (reuse_n == 0) {
|
||||
llama_kv_cache_clear(ctx);
|
||||
|
||||
prompt.clear();
|
||||
} else {
|
||||
// this happens when a previous draft has been discarded (for example, due to being too small), but the
|
||||
// target model agreed with it. in this case, we simply pass back the previous results to save compute
|
||||
if (reuse_i + reuse_n < (int) prompt.size() && prompt[reuse_i + reuse_n] == id_last) {
|
||||
for (int i = reuse_i + reuse_n + 1; i < (int) prompt.size(); ++i) {
|
||||
result.push_back(prompt[i]);
|
||||
|
||||
if (params.n_draft <= (int) result.size()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (reuse_i > 0) {
|
||||
llama_kv_cache_seq_rm (ctx, 0, 0, reuse_i);
|
||||
llama_kv_cache_seq_add(ctx, 0, reuse_i, -1, -reuse_i);
|
||||
|
||||
prompt.erase(prompt.begin(), prompt.begin() + reuse_i);
|
||||
}
|
||||
|
||||
if (reuse_n < (int) prompt.size()) {
|
||||
llama_kv_cache_seq_rm (ctx, 0, reuse_n, -1);
|
||||
|
||||
prompt.erase(prompt.begin() + reuse_n, prompt.end());
|
||||
}
|
||||
}
|
||||
|
||||
// prepare a batch to evaluate any new tokens in the prompt
|
||||
common_batch_clear(batch);
|
||||
|
||||
for (size_t i = i_start + reuse_n; i < prompt_tgt.size(); ++i) {
|
||||
//LOG_DBG("i = %d, i_start = %d, reuse_n = %d, i - i_start = %d, id = %6d\n", i, i_start, reuse_n, i - i_start, prompt_tgt[i]);
|
||||
common_batch_add(batch, prompt_tgt[i], i - i_start, { 0 }, false);
|
||||
|
||||
prompt.push_back(prompt_tgt[i]);
|
||||
}
|
||||
|
||||
// we should rarely end-up here during normal decoding
|
||||
if (batch.n_tokens > 0) {
|
||||
//LOG_DBG("%s: draft prompt batch: %s\n", __func__, string_from(ctx, batch).c_str());
|
||||
|
||||
llama_decode(ctx, batch);
|
||||
}
|
||||
|
||||
const llama_pos n_past = prompt.size();
|
||||
|
||||
LOG_DBG("%s: n_past = %d\n", __func__, n_past);
|
||||
|
||||
common_batch_clear(batch);
|
||||
common_batch_add (batch, id_last, n_past, { 0 }, true);
|
||||
|
||||
prompt.push_back(id_last);
|
||||
|
||||
//LOG_DBG("%s: draft prompt: %s\n", __func__, string_from(ctx, prompt).c_str());
|
||||
|
||||
llama_decode(ctx, batch);
|
||||
|
||||
common_sampler_reset(smpl);
|
||||
|
||||
// sample n_draft tokens from the draft model
|
||||
for (int i = 0; i < params.n_draft; ++i) {
|
||||
common_batch_clear(batch);
|
||||
|
||||
common_sampler_sample(smpl, ctx, 0, true);
|
||||
|
||||
const auto * cur_p = common_sampler_get_candidates(smpl);
|
||||
|
||||
for (int k = 0; k < std::min(3, (int) cur_p->size); ++k) {
|
||||
LOG_DBG(" - draft candidate %3d, pos %3d: %6d (%8.3f) '%s'\n",
|
||||
k, i, cur_p->data[k].id, cur_p->data[k].p, common_token_to_piece(ctx, cur_p->data[k].id).c_str());
|
||||
}
|
||||
|
||||
// add drafted token for each sequence
|
||||
const llama_token id = cur_p->data[0].id;
|
||||
|
||||
// only collect very high-confidence draft tokens
|
||||
if (cur_p->data[0].p < params.p_min) {
|
||||
break;
|
||||
}
|
||||
|
||||
common_sampler_accept(smpl, id, true);
|
||||
|
||||
result.push_back(id);
|
||||
|
||||
if (params.n_draft <= (int) result.size()) {
|
||||
break;
|
||||
}
|
||||
|
||||
common_batch_add(batch, id, n_past + i + 1, { 0 }, true);
|
||||
|
||||
// evaluate the drafted tokens on the draft model
|
||||
llama_decode(ctx, batch);
|
||||
|
||||
prompt.push_back(id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue