Compare commits
1541 commits
codeplay/r
...
master
Author | SHA1 | Date | |
---|---|---|---|
|
d7b31a9d84 | ||
|
9ac3457b39 | ||
|
c2a67efe38 | ||
|
b044a0fe3c | ||
|
19d3c8293b | ||
|
98f6b0fd1e | ||
|
55ac8c7791 | ||
|
e6e6583199 | ||
|
aaa5505307 | ||
|
bdcf8b6a56 | ||
|
4d3465c5ae | ||
|
d80be897ac | ||
|
3ab410f55f | ||
|
0cf867160c | ||
|
d2fe216fb2 | ||
|
ed926d8833 | ||
|
2d219b389e | ||
|
333820d749 | ||
|
c026ba3c23 | ||
|
7ee953a64a | ||
|
ec3bc8270b | ||
|
b7552cfcbc | ||
|
225bbbfa39 | ||
|
855cd0734a | ||
|
8a59053f63 | ||
|
1d20e53c40 | ||
|
2fb3c32a16 | ||
|
9ab42dc722 | ||
|
194b2e69f8 | ||
|
9dd7a0390f | ||
|
c0d4843225 | ||
|
8d4d2be143 | ||
|
2c6c8df56d | ||
|
8a7e3bf17a | ||
|
1b598b3058 | ||
|
902368a06b | ||
|
c3db0480bb | ||
|
d774ab3acc | ||
|
fa62da9b2d | ||
|
1ec208083c | ||
|
9f4cc8f8d3 | ||
|
fd08255d0d | ||
|
3ec9fd4b77 | ||
|
3962fc1a79 | ||
|
1bef571f6a | ||
|
db288b60cb | ||
|
106045e7bb | ||
|
f117d84b48 | ||
|
534c46b53c | ||
|
387a1598ca | ||
|
7c9e0ca520 | ||
|
8f8290ada9 | ||
|
b34aedd558 | ||
|
cde3833239 | ||
|
b3451785ac | ||
|
1d1e6a90bc | ||
|
5598f475be | ||
|
8ec05832fa | ||
|
21c84b5d2d | ||
|
d92cb67e37 | ||
|
6eecde3cc8 | ||
|
396856b400 | ||
|
4d0598e144 | ||
|
90f9b88afb | ||
|
864a0b67a6 | ||
|
84ec8a58f7 | ||
|
bfcce4d693 | ||
|
69804487e0 | ||
|
ff227703d6 | ||
|
0cec062a63 | ||
|
53debe6f3c | ||
|
cfd74c86db | ||
|
ecef206ccb | ||
|
5bbc7362cb | ||
|
aa6fb13213 | ||
|
a83f528688 | ||
|
b1bcd309fc | ||
|
5783575c9d | ||
|
4a2b196d03 | ||
|
1bd3047a93 | ||
|
a2df2787b3 | ||
|
553f1e46e9 | ||
|
8b576b6c55 | ||
|
27d135c970 | ||
|
6af1ca48cb | ||
|
c300e68ef4 | ||
|
3d804dec76 | ||
|
ffd0821c57 | ||
|
4314e56c4f | ||
|
496e5bf46b | ||
|
7919256c57 | ||
|
e0449763a4 | ||
|
eb7cf15a80 | ||
|
66ee4f297c | ||
|
e51c47b401 | ||
|
2711d0215f | ||
|
f0d4b29edf | ||
|
815857791d | ||
|
1a0e87d291 | ||
|
d2e518e9b4 | ||
|
b636228c0a | ||
|
325afb370a | ||
|
794fe23f29 | ||
|
cf8cc856d7 | ||
|
d0c08040b6 | ||
|
be5ef7963f | ||
|
cae9fb4361 | ||
|
7fee2889e6 | ||
|
d7d1eccacc | ||
|
4bf3119d61 | ||
|
f643120bad | ||
|
6e84b0ab8e | ||
|
2b8525d5c8 | ||
|
a4417ddda9 | ||
|
d6d24cd9ed | ||
|
a5203b4465 | ||
|
df984e0147 | ||
|
acd38efee3 | ||
|
caf773f249 | ||
|
178a7eb952 | ||
|
6f53d8a6b4 | ||
|
19f65187cb | ||
|
1d8ee06000 | ||
|
2cc9b8c32c | ||
|
f35726c2fb | ||
|
4a75d19376 | ||
|
26771a1491 | ||
|
ca6baf76c1 | ||
|
6e264a905b | ||
|
49b0e3cec4 | ||
|
20a758155b | ||
|
00c24acb2a | ||
|
466ea66f33 | ||
|
5f0db9522f | ||
|
c5d9effb49 | ||
|
9fbadaef4f | ||
|
9755129c27 | ||
|
a07c2c8a52 | ||
|
8137b4bb2b | ||
|
1af6945eb0 | ||
|
01f37edf1a | ||
|
c07e87f38b | ||
|
564804b79b | ||
|
05f63cc9ee | ||
|
f7fb43cd0b | ||
|
5845661640 | ||
|
f211d1dc10 | ||
|
955a6c2d91 | ||
|
1971adf55e | ||
|
5245729e33 | ||
|
6152129d05 | ||
|
16d3df7ab0 | ||
|
12c2bdf2de | ||
|
c64d2becb1 | ||
|
96f4053934 | ||
|
a94f3b2727 | ||
|
3e3357fd77 | ||
|
6171c9d258 | ||
|
e28245f35f | ||
|
6da5bec81c | ||
|
2e2f8f093c | ||
|
2139667ec4 | ||
|
80d0d6b4b7 | ||
|
aea8ddd516 | ||
|
9f7add1cde | ||
|
90d987b105 | ||
|
a4251edd6f | ||
|
ec7f3ac9ab | ||
|
ef6dada60c | ||
|
ae3c1db2f9 | ||
|
92bc493917 | ||
|
b9daaffe02 | ||
|
99487b57d4 | ||
|
a1649cc13f | ||
|
4dd34ff831 | ||
|
f30f099228 | ||
|
f26c874179 | ||
|
6390a998bf | ||
|
44e18ef939 | ||
|
3edfa7d375 | ||
|
667d72846c | ||
|
a133566d34 | ||
|
960ec65273 | ||
|
7a689c415e | ||
|
bd38ddea01 | ||
|
466300fe14 | ||
|
206bc53422 | ||
|
4dbc8b9cb7 | ||
|
9c8dcefe17 | ||
|
681149ced2 | ||
|
c67cc9837d | ||
|
adc5dd92e8 | ||
|
f11cfdfd7f | ||
|
1d8504338e | ||
|
432df2d5f9 | ||
|
0ccd7f3eb2 | ||
|
f446c2cf6a | ||
|
b4d92a59a2 | ||
|
bbf3e55e35 | ||
|
c5bf0d1bd7 | ||
|
091592d758 | ||
|
44d1e796d0 | ||
|
a4f3f5d8e6 | ||
|
48e1ae0e61 | ||
|
d00a80e89d | ||
|
504af20ee4 | ||
|
84a44815f7 | ||
|
39509fb082 | ||
|
a29f0870d4 | ||
|
437e05f714 | ||
|
ca001f6656 | ||
|
00b4c3da62 | ||
|
7426a26b24 | ||
|
8f70fc3d1b | ||
|
1244cdcf14 | ||
|
924518e2e5 | ||
|
9a483999a6 | ||
|
08f10f69c3 | ||
|
afa8a9ec9b | ||
|
c05e8c9934 | ||
|
2739a71e4b | ||
|
ba8a1f9c5b | ||
|
ff3fcabc72 | ||
|
c3f9d25706 | ||
|
ee7136c6d1 | ||
|
c6860cc734 | ||
|
1204f97270 | ||
|
8eceb888d7 | ||
|
f8feb4b01a | ||
|
be0e950c91 | ||
|
d9feae1c06 | ||
|
8d59d91171 | ||
|
8a1d9c25fa | ||
|
1bf839b1e8 | ||
|
f7cd13301c | ||
|
4d2b3d8804 | ||
|
c07d437bbd | ||
|
99a3755a3c | ||
|
c792dcf488 | ||
|
80ccf5d725 | ||
|
a3c1232c3f | ||
|
8cef75c743 | ||
|
0d52a69e4b | ||
|
02f0430141 | ||
|
bec2183f2c | ||
|
53ff6b9b9f | ||
|
017cc5f446 | ||
|
a3d50bc022 | ||
|
a4dd490069 | ||
|
c0d6f790d0 | ||
|
dc7cef9f37 | ||
|
ecebbd292d | ||
|
96be8c3264 | ||
|
e6e7c75d94 | ||
|
09186fabbe | ||
|
96a1dc27c3 | ||
|
6369f867a4 | ||
|
47182dd03f | ||
|
3e6e7a6bc2 | ||
|
ae2f606bb5 | ||
|
727368c60f | ||
|
5047dd3546 | ||
|
46e3556e01 | ||
|
b56f079e28 | ||
|
9394bbd484 | ||
|
f922a9c542 | ||
|
46be942214 | ||
|
78c6785175 | ||
|
5e3b08d606 | ||
|
db68c93b57 | ||
|
c31fc8b966 | ||
|
4b0c638b9a | ||
|
e7da954ecc | ||
|
f66f582927 | ||
|
2f0ee84b9b | ||
|
0da5d86026 | ||
|
a45433ba20 | ||
|
0827b2c1da | ||
|
45095a61bf | ||
|
5896c65232 | ||
|
bc7b1f8632 | ||
|
6e1531aca5 | ||
|
716bd6dec3 | ||
|
c250ecb315 | ||
|
a813badbbd | ||
|
fdd2188912 | ||
|
f865ea149d | ||
|
16cdce7b68 | ||
|
d79d8f39b4 | ||
|
d283d02bf2 | ||
|
9ba399dfa7 | ||
|
2cd43f4900 | ||
|
09fe2e7613 | ||
|
30caac3a68 | ||
|
60cfa728e2 | ||
|
3327bb0f8d | ||
|
32d6ee6385 | ||
|
14b699ecde | ||
|
485dc01214 | ||
|
86bf31cfe6 | ||
|
b92a14a841 | ||
|
6f0c9e034b | ||
|
dab76c92cc | ||
|
7024d59e6a | ||
|
7c0e285858 | ||
|
7ae33a616f | ||
|
ebdee9478c | ||
|
5cd85b5e00 | ||
|
a91a41364b | ||
|
e34c5af43f | ||
|
eb5c3dc64b | ||
|
0ca416c91a | ||
|
21ae3b9be8 | ||
|
0a11f8b7b5 | ||
|
d408bb9268 | ||
|
5cab3e4aaa | ||
|
36319dec5d | ||
|
57bb2c40cd | ||
|
a3c33b1dce | ||
|
2fffc52b50 | ||
|
7585edbdeb | ||
|
cd920d0ac3 | ||
|
7909e8588d | ||
|
9177484f58 | ||
|
0bf2d10c55 | ||
|
7bbb5acf12 | ||
|
152610eda9 | ||
|
0e70ba686e | ||
|
46828872c3 | ||
|
6b064c92b4 | ||
|
4da69d1abd | ||
|
d62b532c52 | ||
|
081b29bd2a | ||
|
5437d4aaf5 | ||
|
78f766768d | ||
|
8dd19a4812 | ||
|
130d0c90bd | ||
|
3919da8e33 | ||
|
0006f5a74a | ||
|
05c3a444b8 | ||
|
382bc7f2e8 | ||
|
4f51968aca | ||
|
227d7c5a7f | ||
|
7b1ec53f56 | ||
|
160bc039c8 | ||
|
08ea539df2 | ||
|
644fd71b44 | ||
|
4ddd199f6f | ||
|
a0974156f3 | ||
|
87cf323cef | ||
|
5478bbcd17 | ||
|
b5ae1ddff9 | ||
|
89d604f2c8 | ||
|
e52aba537a | ||
|
ba1cb19cdd | ||
|
56eea0781c | ||
|
a76c56fa1a | ||
|
c27ac678dd | ||
|
11e07fd63b | ||
|
4601a8bb67 | ||
|
9f35e44592 | ||
|
64ae065511 | ||
|
83ed24a97b | ||
|
d583cd03f6 | ||
|
adffa6ffd5 | ||
|
274ec65af6 | ||
|
8faa1d4dd4 | ||
|
cb13ef85a4 | ||
|
4064c0e3b6 | ||
|
dc5301d565 | ||
|
9fdb124304 | ||
|
5555c0c1f6 | ||
|
973f328b1e | ||
|
fb18934a97 | ||
|
235f6e14bf | ||
|
1a31d0dc00 | ||
|
92f77a640f | ||
|
484d2f31ae | ||
|
4b4d92b098 | ||
|
43041d2eb3 | ||
|
b685daf386 | ||
|
dafae66cc2 | ||
|
ae4b922614 | ||
|
750cb3e246 | ||
|
a86ad841f1 | ||
|
a05e2afcc2 | ||
|
26a8406ba9 | ||
|
c37fb4cf62 | ||
|
3d98b4cb22 | ||
|
1a05004743 | ||
|
ce8784bdb1 | ||
|
e52522b869 | ||
|
06d70147e6 | ||
|
43ed389a3f | ||
|
ecc93d0558 | ||
|
62e84d9848 | ||
|
3573fa8e7b | ||
|
d9c3ba2b77 | ||
|
ce4a7b8493 | ||
|
19d8762ab6 | ||
|
c2a16c0bdb | ||
|
3df784b305 | ||
|
86a1934978 | ||
|
784a14aa49 | ||
|
c5ede3849f | ||
|
f162d45a21 | ||
|
6c5bc0625f | ||
|
7736837d62 | ||
|
c9c6e01dae | ||
|
6fe6247831 | ||
|
0cd182ebcc | ||
|
a8cbab201d | ||
|
c2082d93a8 | ||
|
d405804be8 | ||
|
f112d198cd | ||
|
1da7b76569 | ||
|
59f4db1088 | ||
|
2803540814 | ||
|
253b7fde91 | ||
|
8d0cfd554a | ||
|
2759916d86 | ||
|
40c6d79fb5 | ||
|
98036d5670 | ||
|
cd2f37b304 | ||
|
da6aac91f1 | ||
|
01e6d9bb71 | ||
|
cc98896db8 | ||
|
91c36c269b | ||
|
1cd3df46bd | ||
|
c505471857 | ||
|
e9e661bd59 | ||
|
efb6ae9630 | ||
|
667d70d170 | ||
|
3b4f2e33e2 | ||
|
82bca2257b | ||
|
0115df2f65 | ||
|
515d4e5372 | ||
|
844e2e1fee | ||
|
70b98fadbc | ||
|
642330ac7c | ||
|
8648c52101 | ||
|
64ed2091b2 | ||
|
991f8aabee | ||
|
4cb003dd8d | ||
|
917786f43d | ||
|
5e1ed95583 | ||
|
5c7a5aa0c3 | ||
|
3420909dff | ||
|
86dc11c5bc | ||
|
6acce39710 | ||
|
43957ef203 | ||
|
0c39f44d70 | ||
|
3e0ba0e604 | ||
|
abadba05be | ||
|
0533e7fb38 | ||
|
7cc2d2c889 | ||
|
b782e5c7d4 | ||
|
3a8e9af402 | ||
|
a3a3048e7a | ||
|
f0678c5ff4 | ||
|
4b3242bbea | ||
|
0f77aae560 | ||
|
266b8519ee | ||
|
938f608742 | ||
|
f095a649ec | ||
|
678d7994f4 | ||
|
dc22344088 | ||
|
4c0a95b107 | ||
|
6c59567689 | ||
|
890719311b | ||
|
7281cf13ad | ||
|
e90688edd0 | ||
|
76b27d29c2 | ||
|
eea986f215 | ||
|
c202cef168 | ||
|
2025fa67e9 | ||
|
c6bc73951e | ||
|
605fa66c50 | ||
|
b7420131bf | ||
|
9f912511bc | ||
|
3ad5451f3b | ||
|
46c69e0e75 | ||
|
9e2301f4a4 | ||
|
fee824a1a1 | ||
|
9150f8fef9 | ||
|
c31ed2abfc | ||
|
5b3466bedf | ||
|
249a7902ec | ||
|
71a64989a5 | ||
|
4a57d362e1 | ||
|
c9b00a70b0 | ||
|
de5097351c | ||
|
5a349f2809 | ||
|
30ec398321 | ||
|
be0e350c8b | ||
|
249cd93da3 | ||
|
904109ed0d | ||
|
45abe0f74e | ||
|
0bbd2262a3 | ||
|
ab96610b1e | ||
|
7db3846a94 | ||
|
c6807b3f28 | ||
|
25669aa92c | ||
|
84e1c33cde | ||
|
811872a59d | ||
|
9a4b79bcfa | ||
|
7066b4cce2 | ||
|
0eb4e12bee | ||
|
0cc63754b8 | ||
|
50d5cecbda | ||
|
9fd8c2687f | ||
|
47f931c8f9 | ||
|
106964e3d2 | ||
|
80acb7b430 | ||
|
10bce0450f | ||
|
1f922254f0 | ||
|
a9a678a6b2 | ||
|
9ca2e67762 | ||
|
5931c1f233 | ||
|
f6d12e7df8 | ||
|
b756441104 | ||
|
5a8987793f | ||
|
d9d54e498d | ||
|
cce5a90075 | ||
|
dc39012cba | ||
|
9336db462c | ||
|
96fa2c5e2d | ||
|
55ed008b2d | ||
|
6dfcfef078 | ||
|
599b3e0cd4 | ||
|
c18610b4ee | ||
|
a5e47592b6 | ||
|
1bb30bf28c | ||
|
87a533be57 | ||
|
59b9172822 | ||
|
02e4eaf22f | ||
|
9abe9eeae9 | ||
|
f95caa7954 | ||
|
fab5d30ff6 | ||
|
8fd4b7fa29 | ||
|
1bacb9f625 | ||
|
ad21c9e1f1 | ||
|
3952a221af | ||
|
42ae10bbcd | ||
|
9fe0fb0626 | ||
|
611fabd792 | ||
|
12b0ad953a | ||
|
342397dc7e | ||
|
2a11b6b094 | ||
|
3ee6382d48 | ||
|
8e752a777b | ||
|
a88ad007de | ||
|
2a1507c162 | ||
|
b3e585988f | ||
|
557924f222 | ||
|
d3481e6316 | ||
|
531cb1c233 | ||
|
f139d2ea61 | ||
|
2eb76b2a5e | ||
|
9b75f03cd2 | ||
|
75207b3a88 | ||
|
76e9e58b78 | ||
|
ce2e59ba10 | ||
|
be5caccef9 | ||
|
20a780c7b6 | ||
|
cf32a9b93a | ||
|
a43178299c | ||
|
c3ea58aca4 | ||
|
467576b6cc | ||
|
eda7e1d4f5 | ||
|
24203e9dd7 | ||
|
5d9e59979c | ||
|
a4200cafad | ||
|
84274a10c3 | ||
|
68fcb4759c | ||
|
8a43e940ab | ||
|
5c9a8b22b1 | ||
|
0fff7fd798 | ||
|
4e54be0ec6 | ||
|
db4cfd5dbc | ||
|
8ee0d09ae6 | ||
|
bcdb7a2386 | ||
|
f245cc28d4 | ||
|
772703c8ff | ||
|
dd3a6ce9f8 | ||
|
1e58ee1318 | ||
|
89e4caaaf0 | ||
|
74d73dc85c | ||
|
4047be74da | ||
|
883d206fbd | ||
|
09ecbcb596 | ||
|
3225008973 | ||
|
cbf5541a82 | ||
|
18429220bd | ||
|
f0204a0ec7 | ||
|
57f8355b29 | ||
|
9901068ac7 | ||
|
231f9360d9 | ||
|
4802ad350b | ||
|
5a54af4d4f | ||
|
1607a5e5b0 | ||
|
ae8de6d50a | ||
|
4a8ccb37ad | ||
|
2a82891a85 | ||
|
af148c9386 | ||
|
66798e42fb | ||
|
fb4a0ec083 | ||
|
5ea926dad7 | ||
|
1ee9eea094 | ||
|
ff7fb670d0 | ||
|
0e712a5acb | ||
|
a0ec17b32e | ||
|
2e82ffa4af | ||
|
80dd7ff22f | ||
|
54ef9cfc72 | ||
|
b0cefea58a | ||
|
b141e5f6ef | ||
|
4b3a9212b6 | ||
|
505f33274d | ||
|
160687b3ed | ||
|
6423c65aa8 | ||
|
39a334a9aa | ||
|
bb38cdd8ba | ||
|
f018acba22 | ||
|
46323fa9ef | ||
|
5b359bb1e3 | ||
|
e89213492d | ||
|
8fc393f246 | ||
|
ec450d3bbf | ||
|
695ad752b2 | ||
|
841f27abdb | ||
|
d05b3127bd | ||
|
76c6e7f105 | ||
|
a71d81cf8c | ||
|
eec4d71737 | ||
|
3b08828674 | ||
|
a2c6fd747c | ||
|
97404c4a03 | ||
|
60e17ce23c | ||
|
5107e8cea3 | ||
|
2319126a70 | ||
|
3bcd40b3c5 | ||
|
5c333e0140 | ||
|
b11f9ba9b8 | ||
|
94d8cb8be1 | ||
|
1dc04b2dee | ||
|
a1eaf6a960 | ||
|
b8deef0ec0 | ||
|
a9e8a9a030 | ||
|
3407364776 | ||
|
d5a409e57f | ||
|
401558b7ba | ||
|
9e0ecfb697 | ||
|
6a066b9978 | ||
|
ea02c753eb | ||
|
05697f670b | ||
|
f8e58135cf | ||
|
329ed914c9 | ||
|
ce027adfb3 | ||
|
284e5b0275 | ||
|
e2292aaa17 | ||
|
9f40989351 | ||
|
08828a6d7d | ||
|
1839f69130 | ||
|
9830b6923b | ||
|
42cadc74bd | ||
|
45950415ed | ||
|
1926d6e39d | ||
|
b634f8a26f | ||
|
7554aa4655 | ||
|
a6744e43e8 | ||
|
e991e3127f | ||
|
418f5eef26 | ||
|
ba6f62eb79 | ||
|
d865d1478c | ||
|
1804adb0cf | ||
|
815fe72adc | ||
|
f221d56220 | ||
|
e597e50794 | ||
|
85679d37f3 | ||
|
1e9f94994e | ||
|
c02e5ab2a6 | ||
|
ab3d71f97f | ||
|
0a683e8088 | ||
|
dea5e86051 | ||
|
1329c0a75e | ||
|
61408e7fad | ||
|
b9e02e8184 | ||
|
6763f713bb | ||
|
79a2bc042d | ||
|
fc83a9e584 | ||
|
c5b0f4b5d9 | ||
|
8f275a7c45 | ||
|
8d8ff71536 | ||
|
61715d5cc8 | ||
|
07028f9d74 | ||
|
524afeec9d | ||
|
8125e6cbfc | ||
|
8841ce3f43 | ||
|
cc2983d375 | ||
|
8c60a8a462 | ||
|
9e4a2563ea | ||
|
668750357e | ||
|
ff252ea48e | ||
|
d80fb71f8b | ||
|
2f8bd2b901 | ||
|
bc5ba007b2 | ||
|
958367bf53 | ||
|
40f2555797 | ||
|
167a515651 | ||
|
c39665f589 | ||
|
0a1c750c80 | ||
|
190a37d797 | ||
|
2d3aba9ee8 | ||
|
80273a306d | ||
|
c19af0acb1 | ||
|
ac113a0fee | ||
|
4c9388fb96 | ||
|
873279b159 | ||
|
c8c07d658a | ||
|
19d900a756 | ||
|
11d47057a5 | ||
|
c421ac072d | ||
|
4ff7fe1fb3 | ||
|
6b8447352d | ||
|
674804a996 | ||
|
e94a138d64 | ||
|
e01c67affe | ||
|
994cfb1acb | ||
|
94008cc760 | ||
|
dbd5f2f573 | ||
|
f594bc80ba | ||
|
d5ebd79c76 | ||
|
55e47786e3 | ||
|
bc21975084 | ||
|
1db8c84fc6 | ||
|
45f097645e | ||
|
7cab2083c7 | ||
|
cda0e4b648 | ||
|
afd9909a64 | ||
|
87421a23e8 | ||
|
60ce97c9d8 | ||
|
8901755ba3 | ||
|
6f55bccbb8 | ||
|
17bb928080 | ||
|
9f45fc1e99 | ||
|
99bd4ac28c | ||
|
3752217ed5 | ||
|
f010b77a37 | ||
|
2194200278 | ||
|
73afe681aa | ||
|
9e04102448 | ||
|
dbf18e4de9 | ||
|
66c2c93082 | ||
|
10433e8b45 | ||
|
1f66b699c4 | ||
|
0e41b300ed | ||
|
cd60b88bf7 | ||
|
becfd387f6 | ||
|
755a9b2bf0 | ||
|
223c25a72f | ||
|
fbc98b748e | ||
|
dcdd535302 | ||
|
4c42f93b22 | ||
|
a89f75e1b7 | ||
|
13dca2a54a | ||
|
d4c19c0f5c | ||
|
c7181bd294 | ||
|
92be9f1216 | ||
|
edc265661c | ||
|
1bde94dd02 | ||
|
95c76e8e92 | ||
|
11ac9800af | ||
|
943d20b411 | ||
|
96776405a1 | ||
|
7eee341bee | ||
|
0e9f760eb1 | ||
|
cf8e0a3bb9 | ||
|
c7499c557c | ||
|
c81f3bbb05 | ||
|
e7022064ab | ||
|
3dc48fe75a | ||
|
dca1d4b58a | ||
|
458367a906 | ||
|
fa42aa6d89 | ||
|
6374743747 | ||
|
f1af42fa8c | ||
|
6279dac039 | ||
|
d5ac8cf2f2 | ||
|
96b6912103 | ||
|
d5cb86844f | ||
|
f4b2dcdf49 | ||
|
b6d6c5289f | ||
|
b0915d5b51 | ||
|
8c475b97b8 | ||
|
58b16695e1 | ||
|
905f5485b2 | ||
|
71967c2a6d | ||
|
17880771ad | ||
|
55951c018d | ||
|
ff565769f2 | ||
|
f3fdcfaa79 | ||
|
133c7b46b3 | ||
|
d5ed2b929d | ||
|
1bb8a64ebf | ||
|
fabdc3bda3 | ||
|
eee39bdc96 | ||
|
5d5ab1e5cc | ||
|
a7ad553513 | ||
|
d6fe7abf04 | ||
|
e3c355ba65 | ||
|
841713e1e4 | ||
|
5639971466 | ||
|
c83ad6d01e | ||
|
a39ab216aa | ||
|
f536f4c439 | ||
|
00b7317e63 | ||
|
76b37d1541 | ||
|
148844fe97 | ||
|
3f1ae2e32c | ||
|
f1b8c42711 | ||
|
e98c1c188e | ||
|
cb00020504 | ||
|
6c5322481a | ||
|
7254cdf7e8 | ||
|
cad341d889 | ||
|
a90484c6d9 | ||
|
1927378bcc | ||
|
6f1d9d71f4 | ||
|
511636df0c | ||
|
08a43d05b6 | ||
|
ace4f4be37 | ||
|
8277a817f1 | ||
|
c919d5db39 | ||
|
d0b1d663e4 | ||
|
aaa4099925 | ||
|
641002fba8 | ||
|
0de8b203f1 | ||
|
544f409b4b | ||
|
6084bfb261 | ||
|
faac0bae26 | ||
|
f99d3f8367 | ||
|
589b48d41e | ||
|
f4d2b8846a | ||
|
1b2f992cd2 | ||
|
739842703e | ||
|
6102037bbb | ||
|
9a913110cf | ||
|
43bcdd9703 | ||
|
6a0f779484 | ||
|
89f9944981 | ||
|
b5de3b74a5 | ||
|
44f59b4301 | ||
|
95bc82fbc0 | ||
|
7691654c68 | ||
|
ea9c32be71 | ||
|
1e43630218 | ||
|
afbbfaa537 | ||
|
3d6bf6919f | ||
|
904837e0cb | ||
|
70392f1f81 | ||
|
bb5f819975 | ||
|
c038931615 | ||
|
31ac5834fe | ||
|
cea1486ecf | ||
|
0aa15011e3 | ||
|
b0f27361f3 | ||
|
c087b6f11d | ||
|
116efee0ee | ||
|
0b3bf966f4 | ||
|
f0c7b5edf8 | ||
|
1d48e98e4f | ||
|
f3979df762 | ||
|
1e7b9299c6 | ||
|
37f8c7b4c9 | ||
|
bf9c1013ac | ||
|
e62e9789cd | ||
|
c35e586ea5 | ||
|
912c331d3d | ||
|
a5b57b08ce | ||
|
ecd5d6b65b | ||
|
2a63caaa69 | ||
|
d09770cae7 | ||
|
41f477879f | ||
|
e948a7da7a | ||
|
63351143b2 | ||
|
d13edb17ed | ||
|
27609c49b9 | ||
|
4301535326 | ||
|
424c5d00a9 | ||
|
a6809c6a2e | ||
|
5cb12f6839 | ||
|
d39e26741f | ||
|
722ec1eb51 | ||
|
6026da52d6 | ||
|
eca0fab44e | ||
|
64c6af3195 | ||
|
0d2f22e45c | ||
|
6443ddd985 | ||
|
8a308354f6 | ||
|
f799155ab8 | ||
|
faf67b3de4 | ||
|
7be099fa81 | ||
|
8b836ae731 | ||
|
8344ef58f8 | ||
|
0226613853 | ||
|
503147a9f9 | ||
|
0d2ec43833 | ||
|
37f3a3810e | ||
|
23e0d70bac | ||
|
acb2c32c33 | ||
|
a6a3a5c531 | ||
|
d54c21df7e | ||
|
19514d632e | ||
|
5c3d0f1824 | ||
|
0aadac10c7 | ||
|
95ca85168b | ||
|
441b72b91f | ||
|
c4965a64f7 | ||
|
90a2fff0e7 | ||
|
6262d13e0b | ||
|
e6deac31f7 | ||
|
6988da94a2 | ||
|
3c7989fd29 | ||
|
d6b37c881f | ||
|
7596487beb | ||
|
822b6322de | ||
|
dcdcee3a74 | ||
|
1f4111e540 | ||
|
befaf1197f | ||
|
feff4aa846 | ||
|
0abc6a2c25 | ||
|
bd35cb0ae3 | ||
|
78203641fe | ||
|
e6b7801bd1 | ||
|
e665744317 | ||
|
d4c3c10fad | ||
|
2a825116b6 | ||
|
4dc4f5f14a | ||
|
c837981bba | ||
|
3c26a1644d | ||
|
ff76e18516 | ||
|
39f852f440 | ||
|
2b00fa7997 | ||
|
d6a04f872d | ||
|
c9c8575a1a | ||
|
df4b7945ae | ||
|
449ccfb6f5 | ||
|
1b28061400 | ||
|
8db003a19d | ||
|
0996c5597f | ||
|
5bb2c5dbd2 | ||
|
67155ab7f5 | ||
|
5af118efda | ||
|
d2b496bff4 | ||
|
b34e023480 | ||
|
51b6038636 | ||
|
cb9c933eb2 | ||
|
6cd4e03444 | ||
|
8d300bd35f | ||
|
49006c67b4 | ||
|
00ba2ff781 | ||
|
83008b7cfe | ||
|
0b4ac75772 | ||
|
fb3f249815 | ||
|
bfe76d4a17 | ||
|
293bebe077 | ||
|
5fac4d5764 | ||
|
5fb5e24811 | ||
|
38ca6f644b | ||
|
8e6e2fbe14 | ||
|
5ed087573e | ||
|
54f376d0b9 | ||
|
b2e89a3274 | ||
|
daa9623ab0 | ||
|
e079bffb66 | ||
|
3f7ccfd649 | ||
|
a249843d89 | ||
|
19f4a7b296 | ||
|
2a358fb0c4 | ||
|
eae597182c | ||
|
00b02bb249 | ||
|
a876861455 | ||
|
385decbd63 | ||
|
60a3107ccd | ||
|
406c1a32a1 | ||
|
9cb9260861 | ||
|
202084d31d | ||
|
dbbebcab33 | ||
|
ba1cf846ed | ||
|
d2d3200b38 | ||
|
51d964a4ef | ||
|
efe6a83e30 | ||
|
fbb7fcffbc | ||
|
a5b5d9a101 | ||
|
f12295b8a9 | ||
|
faf69d4237 | ||
|
e536426ded | ||
|
1b9ae5189c | ||
|
e32d0816ed | ||
|
df270ef745 | ||
|
947538acb8 | ||
|
6c89eb0b47 | ||
|
9b2c24c099 | ||
|
134bc38ecf | ||
|
815b1fb20a | ||
|
409dc4f8bb | ||
|
4a1411b4f1 | ||
|
8ebe8ddebd | ||
|
9bc6db28d0 | ||
|
32b2ec88bc | ||
|
1031771faa | ||
|
4db04784f9 | ||
|
bdf314f38a | ||
|
581c305186 | ||
|
5910ea9427 | ||
|
c8671ae282 | ||
|
82e3b03c11 | ||
|
9379d3cc17 | ||
|
7605ae7daf | ||
|
8962422b1c | ||
|
b69a480af4 | ||
|
48baa61ecc | ||
|
f1485161e5 | ||
|
048de848ee | ||
|
f771d064a9 | ||
|
6e7d133a5f | ||
|
b60074f1c2 | ||
|
9c1ba55733 | ||
|
c6d4cb4655 | ||
|
8f1d81a0b6 | ||
|
a47667cff4 | ||
|
ea5d7478b1 | ||
|
49271efbaf | ||
|
0ab30f8d82 | ||
|
cddae4884c | ||
|
7ea8d80d53 | ||
|
42c76d1358 | ||
|
9f7d4bcf5c | ||
|
1d1ccce676 | ||
|
9fe94ccac9 | ||
|
66b039a501 | ||
|
20f1789dfb | ||
|
231cff5f6f | ||
|
3246fe84d7 | ||
|
78eb487bb0 | ||
|
a77feb5d71 | ||
|
2e59d61c1b | ||
|
75e1dbbaab | ||
|
ad76569f8e | ||
|
7d787ed96c | ||
|
06658ad7c3 | ||
|
fc18425b6a | ||
|
879275ac98 | ||
|
7a3df798fc | ||
|
e5edb210cd | ||
|
0c41e03ceb | ||
|
f12ceaca0c | ||
|
436787f170 | ||
|
93bc3839f9 | ||
|
f91fc5639b | ||
|
e11bd856d5 | ||
|
8f824ffe8e | ||
|
3ba780e2a8 | ||
|
a07c32ea54 | ||
|
11b84eb457 | ||
|
1731d4238f | ||
|
a1631e53f6 | ||
|
fc54ef0d1c | ||
|
b40eb84895 | ||
|
f63f603c87 | ||
|
8455340b87 | ||
|
2f3c1466ff | ||
|
50addec9a5 | ||
|
4f8d19ff17 | ||
|
90db8146d5 | ||
|
cfac111e2b | ||
|
1b6ff90ff8 | ||
|
18eaf29f4c | ||
|
554b049068 | ||
|
2339a0be1c | ||
|
2fb9267887 | ||
|
8b3befc0e2 | ||
|
d565bb2fd5 | ||
|
ee2984bdaf | ||
|
c8ddce8560 | ||
|
23fd453544 | ||
|
c679e0cb5c | ||
|
fb487bb567 | ||
|
2a24c8caa6 | ||
|
e3f6fd56b1 | ||
|
4b9afbbe90 | ||
|
37501d9c79 | ||
|
4af8420afb | ||
|
6bda7ce6c3 | ||
|
d5492f0525 | ||
|
234b30676a | ||
|
5fd89a70ea | ||
|
98a532d474 | ||
|
43bdd3ce18 | ||
|
06943a69f6 | ||
|
828d6ff7d7 | ||
|
fc4ca27b25 | ||
|
1f67436c5e | ||
|
0fd93cdef5 | ||
|
84eb2f4fad | ||
|
1262e7ed13 | ||
|
df5478fbea | ||
|
2589292cde | ||
|
d3ae0ee8d7 | ||
|
5ef07e25ac | ||
|
4134999e01 | ||
|
8cd1bcfd3f | ||
|
a21c6fd450 | ||
|
33309f661a | ||
|
7c5bfd57f8 | ||
|
6e02327e8b | ||
|
7eb23840ed | ||
|
7c3f55c100 | ||
|
911b437f22 | ||
|
b72942fac9 | ||
|
6afd1a99dc | ||
|
272e3bd95e | ||
|
45a55b91aa | ||
|
3071c0a5f2 | ||
|
4305b57c80 | ||
|
70c0ea3560 | ||
|
5b2c04f492 | ||
|
6f6496bb09 | ||
|
daef3ab233 | ||
|
345a686d82 | ||
|
3a14e00366 | ||
|
afd27f01fe | ||
|
366d486c16 | ||
|
e44a561ab0 | ||
|
f93d49ab1e | ||
|
5b33ea1ee7 | ||
|
85fca8deb6 | ||
|
ebd541a570 | ||
|
15fa07a5c5 | ||
|
be55695eff | ||
|
0478174d59 | ||
|
a8dbc6f753 | ||
|
506122d854 | ||
|
725e3d9437 | ||
|
31958546c3 | ||
|
1e6f6554aa | ||
|
641f5dd2a6 | ||
|
5f4dcb1e60 | ||
|
db20f50cf4 | ||
|
efda90c93a | ||
|
0bf16de07b | ||
|
2d5dd7bb3f | ||
|
cdd1889de6 | ||
|
c21a896405 | ||
|
d4ff847153 | ||
|
0a4ce78681 | ||
|
bc0f887e15 | ||
|
b42978e7e4 | ||
|
b9dfc25ca3 | ||
|
1ef14b3007 | ||
|
d3f0c7166a | ||
|
e31a4f6797 | ||
|
400ae6f65f | ||
|
f1ea5146d7 | ||
|
064cdc265f | ||
|
5587e57a76 | ||
|
a3738b2fa7 | ||
|
655858ace0 | ||
|
c02b0a8a4d | ||
|
0d6fb52be0 | ||
|
978ba3d83d | ||
|
ecf6b7f23e | ||
|
01aae2b497 | ||
|
4b77ea95f5 | ||
|
76614f352e | ||
|
b72c20b85c | ||
|
e09a800f9a | ||
|
0fbbd88458 | ||
|
afbb4c1322 | ||
|
b7a08fd5e0 | ||
|
7a11eb3a26 | ||
|
c8a0090922 | ||
|
afbbcf3c04 | ||
|
ed9d2854c9 | ||
|
398ede5efe | ||
|
44d28ddd5c | ||
|
268c566006 | ||
|
7e72aa74fd | ||
|
7c27a19b2e | ||
|
140074bb86 | ||
|
6e2b6000e5 | ||
|
c887d8b017 | ||
|
75af08c475 | ||
|
439b3fc75a | ||
|
0832de7236 | ||
|
6eeaeba126 | ||
|
4730faca61 | ||
|
4c676c85e5 | ||
|
e54c35e4fb | ||
|
5e2727fe03 | ||
|
56f20aa25d | ||
|
345c8c0c87 | ||
|
ae7985cd7b | ||
|
a05ca93697 | ||
|
9f77d899b7 | ||
|
203b7f1531 | ||
|
d2b851bfa1 | ||
|
c12b6e8ee7 | ||
|
b5e95468b1 | ||
|
92090eca21 | ||
|
9d03d085dd | ||
|
bfb4c74981 | ||
|
2b1f616b20 | ||
|
01245f5b16 | ||
|
01aec4a631 | ||
|
41cd47caab | ||
|
49ce0ab6d4 | ||
|
4226a8d10e | ||
|
bf5a81df37 | ||
|
88954f7fbd | ||
|
ed67bcb24f | ||
|
eddcb5238b | ||
|
be6d7c0791 | ||
|
4b0eff3df5 | ||
|
8a4bad50a8 | ||
|
68504f0970 | ||
|
f19bf99c01 | ||
|
3a7ac5300a | ||
|
96952e7181 | ||
|
79167d9e49 | ||
|
b115105f05 | ||
|
de280085e7 | ||
|
b841d07408 | ||
|
64cf50a0ed | ||
|
938943cdbf | ||
|
751fcfc6c3 | ||
|
46e47417aa | ||
|
e7e6487ba0 | ||
|
063d99ad11 | ||
|
081fe431aa | ||
|
d94c6e0ccb | ||
|
566daa5a5b | ||
|
6f11a83e4e | ||
|
e093dd2382 | ||
|
50e05353e8 | ||
|
628154492a | ||
|
04bab6b7da | ||
|
b7c11d36e6 | ||
|
45f2c19cc5 | ||
|
22f281aa16 | ||
|
328884f421 | ||
|
c69c63039c | ||
|
69c487f4ed | ||
|
07283b1a90 | ||
|
940362224d | ||
|
69b9945b44 | ||
|
c3776cacab | ||
|
87e397d00b | ||
|
57b1d4f9eb | ||
|
d197545530 | ||
|
be0cfb4175 | ||
|
b57eb9ca4f | ||
|
f299aa98ec | ||
|
3d0e4367d9 | ||
|
a15ef8f8a0 | ||
|
705b7ecf60 | ||
|
0d2c7321e9 | ||
|
672a6f1018 | ||
|
3807c3de04 | ||
|
e02b597be3 | ||
|
b3283448ce | ||
|
30f80ca0bc | ||
|
1bdd8ae19f | ||
|
da3913d8f9 | ||
|
d65a8361fe | ||
|
5e116e8dd5 | ||
|
1666f92dcd | ||
|
37b12f92ab | ||
|
0efec57787 | ||
|
7acfd4e8d5 | ||
|
97bdd26eee | ||
|
4db8f60fe7 | ||
|
8fac431b06 | ||
|
f17f39ff9c | ||
|
9104bc20ed | ||
|
fc690b018e | ||
|
16bdfa42ac | ||
|
3dfda05956 | ||
|
bda62d7999 | ||
|
090fca7a07 | ||
|
aaab2419ea | ||
|
73cf442e7b | ||
|
e236528e76 | ||
|
fa79495bb4 | ||
|
17eb6aa8a9 | ||
|
c917b67f06 | ||
|
4e24cffd8c | ||
|
6af51c0d96 | ||
|
f53226245f | ||
|
c3ebcfa148 | ||
|
8a4441ea1a | ||
|
5aefbce27a | ||
|
71c1121d11 | ||
|
370b1f7e7a | ||
|
b549a1bbef | ||
|
368645698a | ||
|
b078c619aa | ||
|
808aba3916 | ||
|
a977c11544 | ||
|
9a55ffe6fb | ||
|
7a221b672e | ||
|
278d0e1846 | ||
|
dd07a123b7 | ||
|
f4444d992c | ||
|
6b2a849d1f | ||
|
0f1a39f343 | ||
|
83321c6958 | ||
|
cc61948b1f | ||
|
7a80710d93 | ||
|
a8be1e6f59 | ||
|
e4dd31ff89 | ||
|
8f0fad42b9 | ||
|
a59f8fdc85 | ||
|
fd560fe680 | ||
|
e500d6135a | ||
|
a03e8dd99d | ||
|
5b0b8d8cfb | ||
|
9925ca4087 | ||
|
9beb2dda03 | ||
|
7d0e23d72e | ||
|
7fdb6f73e3 | ||
|
a130eccef4 | ||
|
c4dd11d1d3 | ||
|
2ec846d558 | ||
|
3f2d538b81 | ||
|
2ee44c9a18 | ||
|
6847d54c4f | ||
|
fde13b3bb9 | ||
|
470939d483 | ||
|
6f0dbf6ab0 | ||
|
ffd00797d8 | ||
|
04ce3a8b19 | ||
|
3fd62a6b1c | ||
|
a8db2a9ce6 | ||
|
4090ea5501 | ||
|
f1948f1e10 | ||
|
f7cab35ef9 | ||
|
905942abdb | ||
|
b5040086d4 | ||
|
d39130a398 | ||
|
b81ba1f96b | ||
|
210eb9ed0a | ||
|
cb4d86c4d7 | ||
|
86e7299ef5 | ||
|
60d83a0149 | ||
|
87e25a1d1b | ||
|
213701b51a | ||
|
be20e7f49d | ||
|
7ed03b8974 | ||
|
1d894a790e | ||
|
1f3e1b66e2 | ||
|
148ec970b6 | ||
|
2cccbaa008 | ||
|
8e558309dc | ||
|
0a423800ff | ||
|
d12f781074 | ||
|
bcefa03bc0 | ||
|
5a7447c569 | ||
|
61ecafa390 | ||
|
aa5898dc53 | ||
|
6c05752c50 | ||
|
a9554e20b6 | ||
|
e235b267a2 | ||
|
f09b7cb609 | ||
|
a38b884c6c | ||
|
d7fd29fff1 | ||
|
6f63d646c1 | ||
|
51d2ebadbb | ||
|
1e920018d3 | ||
|
01a5f06550 | ||
|
07786a61a2 | ||
|
de14e2ea2b | ||
|
821922916f | ||
|
b1c3f26e5e | ||
|
b0a46993df | ||
|
807b0c49ff | ||
|
f8c4c0738d | ||
|
402d6feffa | ||
|
20fc3804bf | ||
|
f619024764 | ||
|
d23287f122 | ||
|
5f2d4e60e2 | ||
|
916248af1f | ||
|
f8d6a23804 | ||
|
fadde67135 | ||
|
a27152b602 | ||
|
3e2618bc7b | ||
|
07a3fc0608 | ||
|
968967376d | ||
|
023b8807e1 | ||
|
0e0590adab | ||
|
a9f3b10215 | ||
|
d08c20edde | ||
|
5fac350b9c | ||
|
cb5fad4c6c | ||
|
dae57a1ebc | ||
|
49122a873f | ||
|
0ddeff1023 | ||
|
3840b6f593 | ||
|
257f8e41e2 | ||
|
694c59cb42 | ||
|
197fe6c1d7 | ||
|
d0a7145ba9 | ||
|
9ef0780062 | ||
|
1c5eba6f8e | ||
|
72272b83a3 | ||
|
8748d8ac6f | ||
|
26a39bbd6b | ||
|
38373cfbab | ||
|
b851b3fba0 | ||
|
139cc621e9 | ||
|
e57dc62057 | ||
|
a27aa50ab7 | ||
|
cb0b06a8a6 | ||
|
558f44bf83 | ||
|
8172ee9da9 | ||
|
16791b8f0b | ||
|
ab3679112d | ||
|
97877eb10b | ||
|
387952651a | ||
|
6030c61281 | ||
|
85a267daaa | ||
|
f675b20a3b | ||
|
911e35bb8b | ||
|
ac146628e4 | ||
|
9b31a40c6d | ||
|
c70d117c37 | ||
|
ae5d0f4b89 | ||
|
31ec3993f6 | ||
|
c7ab7b612c | ||
|
f2d48fffde | ||
|
4713bf3093 | ||
|
0e814dfc42 | ||
|
a95631ee97 | ||
|
f3f65429c4 | ||
|
8854044561 | ||
|
c8771ab5f8 | ||
|
494165f3b6 | ||
|
9b2f16f805 | ||
|
6777c544bd | ||
|
163d50adaf | ||
|
6fcbf68235 | ||
|
e6bf007744 | ||
|
84631fe150 | ||
|
dd047b476c | ||
|
925c30956d | ||
|
c8ad35955a | ||
|
49c03c79cd | ||
|
48e6b92cc3 | ||
|
3791ad2193 | ||
|
f702a90e24 | ||
|
083bacce14 | ||
|
2df373ac40 | ||
|
3b099bcd9c | ||
|
a818f3028d | ||
|
d62e4aaa02 | ||
|
9a590c8226 | ||
|
52fc8705a0 | ||
|
8cb508d0d5 | ||
|
646ef4a9cf | ||
|
de0d6a68ac | ||
|
95f57bb5d5 | ||
|
e112b610a1 | ||
|
6a2f298bd7 | ||
|
11318d9aa1 | ||
|
b6b9a8e606 | ||
|
45c0e2e4c1 | ||
|
b5a5f34efa | ||
|
3e58b0ee35 | ||
|
adf480c3ab | ||
|
3aa184a8c7 | ||
|
5b48cd53a8 | ||
|
c5a8d4b749 | ||
|
557b653dc9 | ||
|
7d5e8777ae | ||
|
a927b0f3dd | ||
|
80ea089d77 | ||
|
0e64591e82 | ||
|
b1ef562bc1 | ||
|
17b291a6a5 | ||
|
abd894ad96 | ||
|
de391e4c80 | ||
|
d50f8897a7 | ||
|
2075a66a96 | ||
|
ba58993152 | ||
|
a7854743c5 | ||
|
9c77ec1d74 | ||
|
a04a953cab | ||
|
623494a478 | ||
|
37bef89433 | ||
|
91c188d6c2 | ||
|
84f6de17f6 | ||
|
61665277af | ||
|
b96f9afb0d | ||
|
1193778105 | ||
|
5326bcceeb | ||
|
e6ecc2be47 | ||
|
a94e6ff877 | ||
|
5b6da18750 | ||
|
7c26775adb | ||
|
b473e95084 | ||
|
99052cd227 | ||
|
c637fcd34d | ||
|
6a2f0b3474 | ||
|
21be9cab94 | ||
|
006167aaf6 | ||
|
df68d4fa5d | ||
|
43b35e38ba | ||
|
19b7a836f6 | ||
|
b5fcf8ef5c | ||
|
398105ff43 | ||
|
bc6c457fa3 | ||
|
52399254b3 | ||
|
6fe1c62741 | ||
|
cddaf028ad | ||
|
c8a82194a8 | ||
|
7c7836d9d4 | ||
|
0c7b3595b9 | ||
|
7b2f4a7d19 | ||
|
f8ec8877b7 | ||
|
76d66ee0be | ||
|
66ef1ceedf | ||
|
e65bbf606c | ||
|
6fcd1331ef | ||
|
41b9260f18 | ||
|
172c825684 | ||
|
a55eb1bf0f | ||
|
f578b86b21 | ||
|
1c641e6aac | ||
|
963552903f |
1118 changed files with 243001 additions and 267932 deletions
161
.clang-format
Normal file
161
.clang-format
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
AlignAfterOpenBracket: Align
|
||||||
|
AlignArrayOfStructures: Left
|
||||||
|
AlignConsecutiveAssignments: AcrossComments
|
||||||
|
AlignConsecutiveBitFields: AcrossComments
|
||||||
|
AlignConsecutiveDeclarations: AcrossComments
|
||||||
|
AlignConsecutiveMacros: AcrossComments
|
||||||
|
# AlignConsecutiveShortCaseStatements: AcrossComments
|
||||||
|
AlignEscapedNewlines: Left # LeftWithLastLine
|
||||||
|
AlignOperands: Align
|
||||||
|
AlignTrailingComments:
|
||||||
|
Kind: Always
|
||||||
|
OverEmptyLines: 1
|
||||||
|
AllowAllArgumentsOnNextLine: true
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: false
|
||||||
|
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
||||||
|
AllowShortBlocksOnASingleLine: Never
|
||||||
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
|
AllowShortFunctionsOnASingleLine: Inline
|
||||||
|
AllowShortIfStatementsOnASingleLine: Never
|
||||||
|
AllowShortLambdasOnASingleLine: Inline
|
||||||
|
AllowShortLoopsOnASingleLine: false
|
||||||
|
AlwaysBreakBeforeMultilineStrings: true
|
||||||
|
BinPackArguments: true
|
||||||
|
BinPackParameters: true # OnePerLine
|
||||||
|
BitFieldColonSpacing: Both
|
||||||
|
BreakBeforeBraces: Custom # Attach
|
||||||
|
BraceWrapping:
|
||||||
|
AfterCaseLabel: true
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: false
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: false
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterObjCDeclaration: false
|
||||||
|
AfterStruct: false
|
||||||
|
AfterUnion: false
|
||||||
|
AfterExternBlock: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
BeforeLambdaBody: false
|
||||||
|
BeforeWhile: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: false
|
||||||
|
SplitEmptyRecord: false
|
||||||
|
SplitEmptyNamespace: false
|
||||||
|
# BreakAdjacentStringLiterals: true
|
||||||
|
BreakAfterAttributes: Never
|
||||||
|
BreakBeforeBinaryOperators: None
|
||||||
|
BreakBeforeInlineASMColon: OnlyMultiline
|
||||||
|
BreakBeforeTernaryOperators: false
|
||||||
|
# BreakBinaryOperations: Never
|
||||||
|
BreakConstructorInitializers: AfterColon
|
||||||
|
# BreakFunctionDefinitionParameters: false
|
||||||
|
BreakInheritanceList: AfterComma
|
||||||
|
BreakStringLiterals: true
|
||||||
|
# BreakTemplateDeclarations: Yes
|
||||||
|
ColumnLimit: 120
|
||||||
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
|
CompactNamespaces: false
|
||||||
|
ConstructorInitializerIndentWidth: 4
|
||||||
|
ContinuationIndentWidth: 4
|
||||||
|
Cpp11BracedListStyle: false
|
||||||
|
DerivePointerAlignment: false
|
||||||
|
DisableFormat: false
|
||||||
|
EmptyLineBeforeAccessModifier: Leave
|
||||||
|
EmptyLineAfterAccessModifier: Never
|
||||||
|
ExperimentalAutoDetectBinPacking: false
|
||||||
|
FixNamespaceComments: true
|
||||||
|
IncludeBlocks: Regroup
|
||||||
|
IncludeCategories:
|
||||||
|
- Regex: '^<.*\.h>'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '^<.*'
|
||||||
|
Priority: 2
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '.*'
|
||||||
|
Priority: 3
|
||||||
|
SortPriority: 0
|
||||||
|
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||||
|
IncludeIsMainSourceRegex: ''
|
||||||
|
IndentAccessModifiers: false
|
||||||
|
IndentCaseBlocks: true
|
||||||
|
IndentCaseLabels: true
|
||||||
|
IndentExternBlock: NoIndent
|
||||||
|
IndentGotoLabels: false
|
||||||
|
IndentPPDirectives: AfterHash
|
||||||
|
IndentWidth: 4
|
||||||
|
IndentWrappedFunctionNames: false
|
||||||
|
InsertBraces: true # NOTE: may lead to incorrect formatting
|
||||||
|
InsertNewlineAtEOF: true
|
||||||
|
JavaScriptQuotes: Leave
|
||||||
|
JavaScriptWrapImports: true
|
||||||
|
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||||
|
LambdaBodyIndentation: Signature
|
||||||
|
LineEnding: LF
|
||||||
|
MacroBlockBegin: ''
|
||||||
|
MacroBlockEnd: ''
|
||||||
|
MaxEmptyLinesToKeep: 1
|
||||||
|
NamespaceIndentation: None
|
||||||
|
ObjCBinPackProtocolList: Auto
|
||||||
|
ObjCBlockIndentWidth: 4
|
||||||
|
ObjCSpaceAfterProperty: true
|
||||||
|
ObjCSpaceBeforeProtocolList: true
|
||||||
|
PPIndentWidth: -1
|
||||||
|
PackConstructorInitializers: CurrentLine
|
||||||
|
PenaltyBreakAssignment: 2
|
||||||
|
PenaltyBreakBeforeFirstCallParameter: 1
|
||||||
|
PenaltyBreakComment: 300
|
||||||
|
PenaltyBreakFirstLessLess: 120
|
||||||
|
PenaltyBreakString: 1000
|
||||||
|
PenaltyBreakTemplateDeclaration: 10
|
||||||
|
PenaltyExcessCharacter: 1000000
|
||||||
|
PenaltyReturnTypeOnItsOwnLine: 200
|
||||||
|
PointerAlignment: Middle
|
||||||
|
QualifierAlignment: Left
|
||||||
|
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
||||||
|
RawStringFormats:
|
||||||
|
- Language: Cpp
|
||||||
|
Delimiters:
|
||||||
|
- cc
|
||||||
|
- CC
|
||||||
|
- cpp
|
||||||
|
- Cpp
|
||||||
|
- CPP
|
||||||
|
- 'c++'
|
||||||
|
- 'C++'
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
ReferenceAlignment: Middle
|
||||||
|
ReflowComments: false # IndentOnly
|
||||||
|
SeparateDefinitionBlocks: Always
|
||||||
|
SortIncludes: CaseInsensitive
|
||||||
|
SortUsingDeclarations: LexicographicNumeric
|
||||||
|
SpaceAfterCStyleCast: true
|
||||||
|
SpaceAfterLogicalNot: false
|
||||||
|
SpaceAfterTemplateKeyword: true
|
||||||
|
SpaceBeforeAssignmentOperators: true
|
||||||
|
SpaceBeforeCpp11BracedList: false
|
||||||
|
SpaceBeforeCtorInitializerColon: true
|
||||||
|
SpaceBeforeInheritanceColon: true
|
||||||
|
SpaceBeforeParens: ControlStatements
|
||||||
|
SpaceBeforeRangeBasedForLoopColon: true
|
||||||
|
SpaceInEmptyBlock: false
|
||||||
|
SpaceInEmptyParentheses: false
|
||||||
|
SpacesBeforeTrailingComments: 2
|
||||||
|
SpacesInAngles: Never
|
||||||
|
SpacesInContainerLiterals: true
|
||||||
|
SpacesInLineCommentPrefix:
|
||||||
|
Minimum: 1
|
||||||
|
Maximum: -1
|
||||||
|
SpacesInParentheses: false
|
||||||
|
SpacesInSquareBrackets: false
|
||||||
|
SpaceBeforeSquareBrackets: false
|
||||||
|
Standard: c++17
|
||||||
|
TabWidth: 4
|
||||||
|
UseTab: Never
|
||||||
|
WhitespaceSensitiveMacros: ['STRINGIZE']
|
||||||
|
...
|
||||||
|
|
|
@ -17,8 +17,10 @@ Checks: >
|
||||||
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
||||||
performance-*,
|
performance-*,
|
||||||
portability-*,
|
portability-*,
|
||||||
|
-portability-simd-intrinsics,
|
||||||
misc-*,
|
misc-*,
|
||||||
-misc-const-correctness,
|
-misc-const-correctness,
|
||||||
-misc-non-private-member-variables-in-classes,
|
-misc-non-private-member-variables-in-classes,
|
||||||
-misc-no-recursion,
|
-misc-no-recursion,
|
||||||
|
-misc-use-anonymous-namespace,
|
||||||
FormatStyle: none
|
FormatStyle: none
|
||||||
|
|
|
@ -15,7 +15,7 @@ node('x86_runner1'){ // Running on x86 runner containing latest vecto
|
||||||
stage('Running llama.cpp'){
|
stage('Running llama.cpp'){
|
||||||
sh'''#!/bin/bash
|
sh'''#!/bin/bash
|
||||||
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
|
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
|
||||||
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./main -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
||||||
cat llama_log.txt # Printing results
|
cat llama_log.txt # Printing results
|
||||||
'''
|
'''
|
||||||
}
|
}
|
||||||
|
|
92
.devops/cpu.Dockerfile
Normal file
92
.devops/cpu.Dockerfile
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS build
|
||||||
|
|
||||||
|
ARG TARGETARCH
|
||||||
|
|
||||||
|
ARG GGML_CPU_ARM_ARCH=armv8-a
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
||||||
|
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
||||||
|
elif [ "$TARGETARCH" = "arm64" ]; then \
|
||||||
|
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
|
||||||
|
else \
|
||||||
|
echo "Unsupported architecture"; \
|
||||||
|
exit 1; \
|
||||||
|
fi && \
|
||||||
|
cmake --build build -j $(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
&& pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
94
.devops/cuda.Dockerfile
Normal file
94
.devops/cuda.Dockerfile
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG CUDA_VERSION=12.6.0
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
||||||
|
|
||||||
|
# CUDA architecture to build for (defaults to all supported archs)
|
||||||
|
ARG CUDA_DOCKER_ARCH=default
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
||||||
|
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
||||||
|
fi && \
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
||||||
|
cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
&& pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -1,36 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG CUDA_VERSION=11.7.1
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
ARG CUDA_DOCKER_ARCH=all
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|
||||||
# Enable CUDA
|
|
||||||
ENV LLAMA_CUDA=1
|
|
||||||
# Enable cURL
|
|
||||||
ENV LLAMA_CURL=1
|
|
||||||
|
|
||||||
RUN make -j$(nproc)
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/.devops/tools.sh"]
|
|
|
@ -1,50 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG ROCM_VERSION=5.6
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
|
||||||
|
|
||||||
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
|
||||||
# This is mostly tied to rocBLAS supported archs.
|
|
||||||
ARG ROCM_DOCKER_ARCH=\
|
|
||||||
gfx803 \
|
|
||||||
gfx900 \
|
|
||||||
gfx906 \
|
|
||||||
gfx908 \
|
|
||||||
gfx90a \
|
|
||||||
gfx1010 \
|
|
||||||
gfx1030 \
|
|
||||||
gfx1100 \
|
|
||||||
gfx1101 \
|
|
||||||
gfx1102
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
|
||||||
# Enable ROCm
|
|
||||||
ENV LLAMA_HIPBLAS=1
|
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
|
|
||||||
# Enable cURL
|
|
||||||
ENV LLAMA_CURL=1
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y libcurl4-openssl-dev
|
|
||||||
|
|
||||||
RUN make -j$(nproc)
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/.devops/tools.sh"]
|
|
|
@ -1,25 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
ENV LLAMA_CURL=1
|
|
||||||
|
|
||||||
|
|
||||||
RUN make -j$(nproc)
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/.devops/tools.sh"]
|
|
91
.devops/intel.Dockerfile
Normal file
91
.devops/intel.Dockerfile
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
|
||||||
|
|
||||||
|
## Build Image
|
||||||
|
|
||||||
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
||||||
|
|
||||||
|
ARG GGML_SYCL_F16=OFF
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y git libcurl4-openssl-dev
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||||
|
echo "GGML_SYCL_F16 is set" \
|
||||||
|
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||||
|
fi && \
|
||||||
|
echo "Building with dynamic libs" && \
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
||||||
|
cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
&& pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
||||||
|
|
44
.devops/llama-cli-cann.Dockerfile
Normal file
44
.devops/llama-cli-cann.Dockerfile
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
|
||||||
|
|
||||||
|
FROM ascendai/cann:$ASCEND_VERSION AS build
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN yum install -y gcc g++ cmake make
|
||||||
|
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
||||||
|
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
||||||
|
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
||||||
|
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
||||||
|
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
||||||
|
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
||||||
|
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
||||||
|
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
||||||
|
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
||||||
|
|
||||||
|
# find libascend_hal.so, because the drive hasn`t been mounted.
|
||||||
|
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
RUN echo "Building with static libs" && \
|
||||||
|
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
|
||||||
|
cmake --build build --config Release --target llama-cli
|
||||||
|
|
||||||
|
# TODO: use image with NNRT
|
||||||
|
FROM ascendai/cann:$ASCEND_VERSION AS runtime
|
||||||
|
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
||||||
|
|
||||||
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
||||||
|
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
||||||
|
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
||||||
|
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
||||||
|
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
||||||
|
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
||||||
|
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
||||||
|
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
||||||
|
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
||||||
|
|
||||||
|
ENTRYPOINT ["/llama-cli" ]
|
|
@ -1,84 +0,0 @@
|
||||||
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
|
||||||
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
|
||||||
# Built and maintained by John Boero - boeroboy@gmail.com
|
|
||||||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
|
||||||
|
|
||||||
# Notes for llama.cpp:
|
|
||||||
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
|
||||||
# We need to declare standard versioning if people want to sort latest releases.
|
|
||||||
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
|
||||||
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
|
||||||
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
|
||||||
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
|
||||||
# It is up to the user to install the correct vendor-specific support.
|
|
||||||
|
|
||||||
Name: llama.cpp-clblast
|
|
||||||
Version: %( date "+%%Y%%m%%d" )
|
|
||||||
Release: 1%{?dist}
|
|
||||||
Summary: OpenCL Inference of LLaMA model in C/C++
|
|
||||||
License: MIT
|
|
||||||
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
|
|
||||||
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel
|
|
||||||
Requires: clblast
|
|
||||||
URL: https://github.com/ggerganov/llama.cpp
|
|
||||||
|
|
||||||
%define debug_package %{nil}
|
|
||||||
%define source_date_epoch_from_changelog 0
|
|
||||||
|
|
||||||
%description
|
|
||||||
CPU inference for Meta's Lllama2 models using default options.
|
|
||||||
|
|
||||||
%prep
|
|
||||||
%setup -n llama.cpp-master
|
|
||||||
|
|
||||||
%build
|
|
||||||
make -j LLAMA_CLBLAST=1
|
|
||||||
|
|
||||||
%install
|
|
||||||
mkdir -p %{buildroot}%{_bindir}/
|
|
||||||
cp -p main %{buildroot}%{_bindir}/llamaclblast
|
|
||||||
cp -p server %{buildroot}%{_bindir}/llamaclblastserver
|
|
||||||
cp -p simple %{buildroot}%{_bindir}/llamaclblastsimple
|
|
||||||
|
|
||||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
|
||||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service
|
|
||||||
[Unit]
|
|
||||||
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
|
||||||
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=simple
|
|
||||||
EnvironmentFile=/etc/sysconfig/llama
|
|
||||||
ExecStart=/usr/bin/llamaclblastserver $LLAMA_ARGS
|
|
||||||
ExecReload=/bin/kill -s HUP $MAINPID
|
|
||||||
Restart=never
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=default.target
|
|
||||||
EOF
|
|
||||||
|
|
||||||
mkdir -p %{buildroot}/etc/sysconfig
|
|
||||||
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
|
||||||
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
|
||||||
EOF
|
|
||||||
|
|
||||||
%clean
|
|
||||||
rm -rf %{buildroot}
|
|
||||||
rm -rf %{_builddir}/*
|
|
||||||
|
|
||||||
%files
|
|
||||||
%{_bindir}/llamaclblast
|
|
||||||
%{_bindir}/llamaclblastserver
|
|
||||||
%{_bindir}/llamaclblastsimple
|
|
||||||
/usr/lib/systemd/system/llamaclblast.service
|
|
||||||
%config /etc/sysconfig/llama
|
|
||||||
|
|
||||||
|
|
||||||
%pre
|
|
||||||
|
|
||||||
%post
|
|
||||||
|
|
||||||
%preun
|
|
||||||
%postun
|
|
||||||
|
|
||||||
%changelog
|
|
|
@ -32,13 +32,13 @@ CPU inference for Meta's Lllama2 models using default options.
|
||||||
%setup -n llama.cpp-master
|
%setup -n llama.cpp-master
|
||||||
|
|
||||||
%build
|
%build
|
||||||
make -j LLAMA_CUDA=1
|
make -j GGML_CUDA=1
|
||||||
|
|
||||||
%install
|
%install
|
||||||
mkdir -p %{buildroot}%{_bindir}/
|
mkdir -p %{buildroot}%{_bindir}/
|
||||||
cp -p main %{buildroot}%{_bindir}/llamacppcuda
|
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
|
||||||
cp -p server %{buildroot}%{_bindir}/llamacppcudaserver
|
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
|
||||||
cp -p simple %{buildroot}%{_bindir}/llamacppcudasimple
|
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
|
||||||
|
|
||||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
mkdir -p %{buildroot}/usr/lib/systemd/system
|
||||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
|
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
|
||||||
|
@ -49,7 +49,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
EnvironmentFile=/etc/sysconfig/llama
|
EnvironmentFile=/etc/sysconfig/llama
|
||||||
ExecStart=/usr/bin/llamacppcudaserver $LLAMA_ARGS
|
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
|
||||||
ExecReload=/bin/kill -s HUP $MAINPID
|
ExecReload=/bin/kill -s HUP $MAINPID
|
||||||
Restart=never
|
Restart=never
|
||||||
|
|
||||||
|
@ -67,9 +67,9 @@ rm -rf %{buildroot}
|
||||||
rm -rf %{_builddir}/*
|
rm -rf %{_builddir}/*
|
||||||
|
|
||||||
%files
|
%files
|
||||||
%{_bindir}/llamacppcuda
|
%{_bindir}/llama-cuda-cli
|
||||||
%{_bindir}/llamacppcudaserver
|
%{_bindir}/llama-cuda-server
|
||||||
%{_bindir}/llamacppcudasimple
|
%{_bindir}/llama-cuda-simple
|
||||||
/usr/lib/systemd/system/llamacuda.service
|
/usr/lib/systemd/system/llamacuda.service
|
||||||
%config /etc/sysconfig/llama
|
%config /etc/sysconfig/llama
|
||||||
|
|
||||||
|
|
|
@ -38,9 +38,9 @@ make -j
|
||||||
|
|
||||||
%install
|
%install
|
||||||
mkdir -p %{buildroot}%{_bindir}/
|
mkdir -p %{buildroot}%{_bindir}/
|
||||||
cp -p main %{buildroot}%{_bindir}/llama
|
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
|
||||||
cp -p server %{buildroot}%{_bindir}/llamaserver
|
cp -p llama-server %{buildroot}%{_bindir}/llama-server
|
||||||
cp -p simple %{buildroot}%{_bindir}/llamasimple
|
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
|
||||||
|
|
||||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
mkdir -p %{buildroot}/usr/lib/systemd/system
|
||||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
||||||
|
@ -51,7 +51,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
EnvironmentFile=/etc/sysconfig/llama
|
EnvironmentFile=/etc/sysconfig/llama
|
||||||
ExecStart=/usr/bin/llamaserver $LLAMA_ARGS
|
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
|
||||||
ExecReload=/bin/kill -s HUP $MAINPID
|
ExecReload=/bin/kill -s HUP $MAINPID
|
||||||
Restart=never
|
Restart=never
|
||||||
|
|
||||||
|
@ -69,9 +69,9 @@ rm -rf %{buildroot}
|
||||||
rm -rf %{_builddir}/*
|
rm -rf %{_builddir}/*
|
||||||
|
|
||||||
%files
|
%files
|
||||||
%{_bindir}/llama
|
%{_bindir}/llama-cli
|
||||||
%{_bindir}/llamaserver
|
%{_bindir}/llama-server
|
||||||
%{_bindir}/llamasimple
|
%{_bindir}/llama-simple
|
||||||
/usr/lib/systemd/system/llama.service
|
/usr/lib/systemd/system/llama.service
|
||||||
%config /etc/sysconfig/llama
|
%config /etc/sysconfig/llama
|
||||||
|
|
||||||
|
|
|
@ -1,35 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG CUDA_VERSION=11.7.1
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
# Target the CUDA runtime image
|
|
||||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
ARG CUDA_DOCKER_ARCH=all
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|
||||||
# Enable CUDA
|
|
||||||
ENV LLAMA_CUDA=1
|
|
||||||
|
|
||||||
RUN make -j$(nproc) main
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y libgomp1
|
|
||||||
|
|
||||||
COPY --from=build /app/main /main
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/main" ]
|
|
|
@ -1,26 +0,0 @@
|
||||||
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
|
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
|
||||||
|
|
||||||
ARG LLAMA_SYCL_F16=OFF
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
|
|
||||||
echo "LLAMA_SYCL_F16 is set" && \
|
|
||||||
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
|
|
||||||
fi && \
|
|
||||||
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
|
|
||||||
cmake --build build --config Release --target main
|
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
|
||||||
|
|
||||||
COPY --from=build /app/build/bin/main /main
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/main" ]
|
|
|
@ -1,45 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG ROCM_VERSION=5.6
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
|
||||||
|
|
||||||
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
|
||||||
# This is mostly tied to rocBLAS supported archs.
|
|
||||||
ARG ROCM_DOCKER_ARCH=\
|
|
||||||
gfx803 \
|
|
||||||
gfx900 \
|
|
||||||
gfx906 \
|
|
||||||
gfx908 \
|
|
||||||
gfx90a \
|
|
||||||
gfx1010 \
|
|
||||||
gfx1030 \
|
|
||||||
gfx1100 \
|
|
||||||
gfx1101 \
|
|
||||||
gfx1102
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
|
||||||
# Enable ROCm
|
|
||||||
ENV LLAMA_HIPBLAS=1
|
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
|
|
||||||
RUN make -j$(nproc) main
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/main" ]
|
|
|
@ -1,27 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=jammy
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as build
|
|
||||||
|
|
||||||
# Install build tools
|
|
||||||
RUN apt update && apt install -y git build-essential cmake wget libgomp1
|
|
||||||
|
|
||||||
# Install Vulkan SDK
|
|
||||||
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
|
||||||
apt update -y && \
|
|
||||||
apt-get install -y vulkan-sdk
|
|
||||||
|
|
||||||
# Build it
|
|
||||||
WORKDIR /app
|
|
||||||
COPY . .
|
|
||||||
RUN cmake -B build -DLLAMA_VULKAN=1 && \
|
|
||||||
cmake --build build --config Release --target main
|
|
||||||
|
|
||||||
# Clean up
|
|
||||||
WORKDIR /
|
|
||||||
RUN cp /app/build/bin/main /main && \
|
|
||||||
rm -rf /app
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/main" ]
|
|
|
@ -1,23 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN make -j$(nproc) main
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y libgomp1
|
|
||||||
|
|
||||||
COPY --from=build /app/main /main
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/main" ]
|
|
108
.devops/musa.Dockerfile
Normal file
108
.devops/musa.Dockerfile
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG MUSA_VERSION=rc3.1.0
|
||||||
|
# Target the MUSA build image
|
||||||
|
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
||||||
|
|
||||||
|
# MUSA architecture to build for (defaults to all supported archs)
|
||||||
|
ARG MUSA_DOCKER_ARCH=default
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
cmake \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
git \
|
||||||
|
libcurl4-openssl-dev \
|
||||||
|
libgomp1
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
COPY requirements requirements
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Use the default MUSA archs if not specified
|
||||||
|
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
||||||
|
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
||||||
|
fi && \
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
||||||
|
cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
&& pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -6,11 +6,10 @@
|
||||||
let
|
let
|
||||||
inherit (config.packages) default;
|
inherit (config.packages) default;
|
||||||
binaries = [
|
binaries = [
|
||||||
"llama"
|
"llama-cli"
|
||||||
"llama-embedding"
|
"llama-embedding"
|
||||||
"llama-server"
|
"llama-server"
|
||||||
"quantize"
|
"llama-quantize"
|
||||||
"train-text-from-scratch"
|
|
||||||
];
|
];
|
||||||
mkApp = name: {
|
mkApp = name: {
|
||||||
type = "app";
|
type = "app";
|
||||||
|
|
|
@ -1,13 +1,52 @@
|
||||||
|
{ inputs, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
perSystem =
|
perSystem =
|
||||||
{ config, lib, ... }:
|
{
|
||||||
|
config,
|
||||||
|
lib,
|
||||||
|
system,
|
||||||
|
...
|
||||||
|
}:
|
||||||
{
|
{
|
||||||
devShells =
|
devShells =
|
||||||
lib.concatMapAttrs
|
let
|
||||||
(name: package: {
|
pkgs = import inputs.nixpkgs { inherit system; };
|
||||||
${name} = package.passthru.shell;
|
stdenv = pkgs.stdenv;
|
||||||
${name + "-extra"} = package.passthru.shell-extra;
|
scripts = config.packages.python-scripts;
|
||||||
})
|
in
|
||||||
config.packages;
|
lib.pipe (config.packages) [
|
||||||
|
(lib.concatMapAttrs (
|
||||||
|
name: package: {
|
||||||
|
${name} = pkgs.mkShell {
|
||||||
|
name = "${name}";
|
||||||
|
inputsFrom = [ package ];
|
||||||
|
shellHook = ''
|
||||||
|
echo "Entering ${name} devShell"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
"${name}-extra" =
|
||||||
|
if (name == "python-scripts") then
|
||||||
|
null
|
||||||
|
else
|
||||||
|
pkgs.mkShell {
|
||||||
|
name = "${name}-extra";
|
||||||
|
inputsFrom = [
|
||||||
|
package
|
||||||
|
scripts
|
||||||
|
];
|
||||||
|
# Extra packages that *may* be used by some scripts
|
||||||
|
packages = [
|
||||||
|
pkgs.python3Packages.tiktoken
|
||||||
|
];
|
||||||
|
shellHook = ''
|
||||||
|
echo "Entering ${name} devShell"
|
||||||
|
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
}
|
||||||
|
))
|
||||||
|
(lib.filterAttrs (name: value: value != null))
|
||||||
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,16 +26,14 @@
|
||||||
config.cudaSupport = true;
|
config.cudaSupport = true;
|
||||||
config.allowUnfreePredicate =
|
config.allowUnfreePredicate =
|
||||||
p:
|
p:
|
||||||
builtins.all
|
builtins.all (
|
||||||
(
|
license:
|
||||||
license:
|
license.free
|
||||||
license.free
|
|| builtins.elem license.shortName [
|
||||||
|| builtins.elem license.shortName [
|
"CUDA EULA"
|
||||||
"CUDA EULA"
|
"cuDNN EULA"
|
||||||
"cuDNN EULA"
|
]
|
||||||
]
|
) (p.meta.licenses or [ p.meta.license ]);
|
||||||
)
|
|
||||||
(p.meta.licenses or [ p.meta.license ]);
|
|
||||||
};
|
};
|
||||||
# Ensure dependencies use ROCm consistently
|
# Ensure dependencies use ROCm consistently
|
||||||
pkgsRocm = import inputs.nixpkgs {
|
pkgsRocm = import inputs.nixpkgs {
|
||||||
|
|
36
.devops/nix/package-gguf-py.nix
Normal file
36
.devops/nix/package-gguf-py.nix
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
{
|
||||||
|
lib,
|
||||||
|
llamaVersion,
|
||||||
|
numpy,
|
||||||
|
tqdm,
|
||||||
|
sentencepiece,
|
||||||
|
pyyaml,
|
||||||
|
poetry-core,
|
||||||
|
buildPythonPackage,
|
||||||
|
pytestCheckHook,
|
||||||
|
}:
|
||||||
|
|
||||||
|
buildPythonPackage {
|
||||||
|
pname = "gguf";
|
||||||
|
version = llamaVersion;
|
||||||
|
pyproject = true;
|
||||||
|
nativeBuildInputs = [ poetry-core ];
|
||||||
|
propagatedBuildInputs = [
|
||||||
|
numpy
|
||||||
|
tqdm
|
||||||
|
sentencepiece
|
||||||
|
pyyaml
|
||||||
|
];
|
||||||
|
src = lib.cleanSource ../../gguf-py;
|
||||||
|
pythonImportsCheck = [
|
||||||
|
"numpy"
|
||||||
|
"gguf"
|
||||||
|
];
|
||||||
|
nativeCheckInputs = [ pytestCheckHook ];
|
||||||
|
doCheck = true;
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Python package for writing binary files in the GGUF format";
|
||||||
|
license = licenses.mit;
|
||||||
|
maintainers = [ maintainers.ditsuke ];
|
||||||
|
};
|
||||||
|
}
|
|
@ -3,33 +3,36 @@
|
||||||
glibc,
|
glibc,
|
||||||
config,
|
config,
|
||||||
stdenv,
|
stdenv,
|
||||||
mkShell,
|
|
||||||
runCommand,
|
runCommand,
|
||||||
cmake,
|
cmake,
|
||||||
ninja,
|
ninja,
|
||||||
pkg-config,
|
pkg-config,
|
||||||
git,
|
git,
|
||||||
python3,
|
|
||||||
mpi,
|
mpi,
|
||||||
blas,
|
blas,
|
||||||
cudaPackages,
|
cudaPackages,
|
||||||
|
autoAddDriverRunpath,
|
||||||
darwin,
|
darwin,
|
||||||
rocmPackages,
|
rocmPackages,
|
||||||
vulkan-headers,
|
vulkan-headers,
|
||||||
vulkan-loader,
|
vulkan-loader,
|
||||||
clblast,
|
curl,
|
||||||
useBlas ? builtins.all (x: !x) [
|
shaderc,
|
||||||
useCuda
|
useBlas ?
|
||||||
useMetalKit
|
builtins.all (x: !x) [
|
||||||
useOpenCL
|
useCuda
|
||||||
useRocm
|
useMetalKit
|
||||||
useVulkan
|
useRocm
|
||||||
] && blas.meta.available,
|
useVulkan
|
||||||
|
]
|
||||||
|
&& blas.meta.available,
|
||||||
useCuda ? config.cudaSupport,
|
useCuda ? config.cudaSupport,
|
||||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
||||||
useMpi ? false, # Increases the runtime closure size by ~700M
|
# Increases the runtime closure size by ~700M
|
||||||
useOpenCL ? false,
|
useMpi ? false,
|
||||||
useRocm ? config.rocmSupport,
|
useRocm ? config.rocmSupport,
|
||||||
|
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
|
||||||
|
enableCurl ? true,
|
||||||
useVulkan ? false,
|
useVulkan ? false,
|
||||||
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
||||||
|
|
||||||
|
@ -37,8 +40,8 @@
|
||||||
# otherwise we get libstdc++ errors downstream.
|
# otherwise we get libstdc++ errors downstream.
|
||||||
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
||||||
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
||||||
precompileMetalShaders ? false
|
precompileMetalShaders ? false,
|
||||||
}@inputs:
|
}:
|
||||||
|
|
||||||
let
|
let
|
||||||
inherit (lib)
|
inherit (lib)
|
||||||
|
@ -46,7 +49,6 @@ let
|
||||||
cmakeFeature
|
cmakeFeature
|
||||||
optionals
|
optionals
|
||||||
strings
|
strings
|
||||||
versionOlder
|
|
||||||
;
|
;
|
||||||
|
|
||||||
stdenv = throw "Use effectiveStdenv instead";
|
stdenv = throw "Use effectiveStdenv instead";
|
||||||
|
@ -56,45 +58,17 @@ let
|
||||||
++ lib.optionals useCuda [ "CUDA" ]
|
++ lib.optionals useCuda [ "CUDA" ]
|
||||||
++ lib.optionals useMetalKit [ "MetalKit" ]
|
++ lib.optionals useMetalKit [ "MetalKit" ]
|
||||||
++ lib.optionals useMpi [ "MPI" ]
|
++ lib.optionals useMpi [ "MPI" ]
|
||||||
++ lib.optionals useOpenCL [ "OpenCL" ]
|
|
||||||
++ lib.optionals useRocm [ "ROCm" ]
|
++ lib.optionals useRocm [ "ROCm" ]
|
||||||
++ lib.optionals useVulkan [ "Vulkan" ];
|
++ lib.optionals useVulkan [ "Vulkan" ];
|
||||||
|
|
||||||
pnameSuffix =
|
pnameSuffix =
|
||||||
strings.optionalString (suffices != [ ])
|
strings.optionalString (suffices != [ ])
|
||||||
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
||||||
descriptionSuffix =
|
descriptionSuffix = strings.optionalString (
|
||||||
strings.optionalString (suffices != [ ])
|
suffices != [ ]
|
||||||
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||||
|
|
||||||
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
|
xcrunHost = runCommand "xcrunHost" { } ''
|
||||||
|
|
||||||
# TODO: package the Python in this repository in a Nix-like way.
|
|
||||||
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
|
||||||
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
|
||||||
# https://peps.python.org/pep-0517/
|
|
||||||
#
|
|
||||||
# TODO: Package up each Python script or service appropriately, by making
|
|
||||||
# them into "entrypoints"
|
|
||||||
llama-python = python3.withPackages (
|
|
||||||
ps: [
|
|
||||||
ps.numpy
|
|
||||||
ps.sentencepiece
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
|
||||||
llama-python-extra = python3.withPackages (
|
|
||||||
ps: [
|
|
||||||
ps.numpy
|
|
||||||
ps.sentencepiece
|
|
||||||
ps.tiktoken
|
|
||||||
ps.torchWithoutCuda
|
|
||||||
ps.transformers
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
xcrunHost = runCommand "xcrunHost" {} ''
|
|
||||||
mkdir -p $out/bin
|
mkdir -p $out/bin
|
||||||
ln -s /usr/bin/xcrun $out/bin
|
ln -s /usr/bin/xcrun $out/bin
|
||||||
'';
|
'';
|
||||||
|
@ -111,16 +85,9 @@ let
|
||||||
++ optionals useMetalKit [ MetalKit ];
|
++ optionals useMetalKit [ MetalKit ];
|
||||||
|
|
||||||
cudaBuildInputs = with cudaPackages; [
|
cudaBuildInputs = with cudaPackages; [
|
||||||
cuda_cccl.dev # <nv/target>
|
cuda_cudart
|
||||||
|
cuda_cccl # <nv/target>
|
||||||
# A temporary hack for reducing the closure size, remove once cudaPackages
|
libcublas
|
||||||
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
|
|
||||||
cuda_cudart.dev
|
|
||||||
cuda_cudart.lib
|
|
||||||
cuda_cudart.static
|
|
||||||
libcublas.dev
|
|
||||||
libcublas.lib
|
|
||||||
libcublas.static
|
|
||||||
];
|
];
|
||||||
|
|
||||||
rocmBuildInputs = with rocmPackages; [
|
rocmBuildInputs = with rocmPackages; [
|
||||||
|
@ -132,187 +99,149 @@ let
|
||||||
vulkanBuildInputs = [
|
vulkanBuildInputs = [
|
||||||
vulkan-headers
|
vulkan-headers
|
||||||
vulkan-loader
|
vulkan-loader
|
||||||
|
shaderc
|
||||||
];
|
];
|
||||||
in
|
in
|
||||||
|
|
||||||
effectiveStdenv.mkDerivation (
|
effectiveStdenv.mkDerivation (finalAttrs: {
|
||||||
finalAttrs: {
|
pname = "llama-cpp${pnameSuffix}";
|
||||||
pname = "llama-cpp${pnameSuffix}";
|
version = llamaVersion;
|
||||||
version = llamaVersion;
|
|
||||||
|
|
||||||
# Note: none of the files discarded here are visible in the sandbox or
|
# Note: none of the files discarded here are visible in the sandbox or
|
||||||
# affect the output hash. This also means they can be modified without
|
# affect the output hash. This also means they can be modified without
|
||||||
# triggering a rebuild.
|
# triggering a rebuild.
|
||||||
src = lib.cleanSourceWith {
|
src = lib.cleanSourceWith {
|
||||||
filter =
|
filter =
|
||||||
name: type:
|
name: type:
|
||||||
let
|
let
|
||||||
noneOf = builtins.all (x: !x);
|
noneOf = builtins.all (x: !x);
|
||||||
baseName = baseNameOf name;
|
baseName = baseNameOf name;
|
||||||
in
|
in
|
||||||
noneOf [
|
noneOf [
|
||||||
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
||||||
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
||||||
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
||||||
(baseName == "flake.lock")
|
(baseName == "flake.lock")
|
||||||
];
|
|
||||||
src = lib.cleanSource ../../.;
|
|
||||||
};
|
|
||||||
|
|
||||||
postPatch = ''
|
|
||||||
substituteInPlace ./ggml-metal.m \
|
|
||||||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
|
||||||
substituteInPlace ./ggml-metal.m \
|
|
||||||
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
|
||||||
'';
|
|
||||||
|
|
||||||
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
|
||||||
# `default.metallib` may be compiled with Metal compiler from XCode
|
|
||||||
# and we need to escape sandbox on MacOS to access Metal compiler.
|
|
||||||
# `xcrun` is used find the path of the Metal compiler, which is varible
|
|
||||||
# and not on $PATH
|
|
||||||
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
|
||||||
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
|
||||||
|
|
||||||
nativeBuildInputs =
|
|
||||||
[
|
|
||||||
cmake
|
|
||||||
ninja
|
|
||||||
pkg-config
|
|
||||||
git
|
|
||||||
]
|
|
||||||
++ optionals useCuda [
|
|
||||||
cudaPackages.cuda_nvcc
|
|
||||||
|
|
||||||
# TODO: Replace with autoAddDriverRunpath
|
|
||||||
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
|
|
||||||
cudaPackages.autoAddOpenGLRunpathHook
|
|
||||||
]
|
|
||||||
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
|
|
||||||
glibc.static
|
|
||||||
] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
|
|
||||||
xcrunHost
|
|
||||||
];
|
];
|
||||||
|
src = lib.cleanSource ../../.;
|
||||||
|
};
|
||||||
|
|
||||||
buildInputs =
|
postPatch = ''
|
||||||
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
||||||
++ optionals useCuda cudaBuildInputs
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
||||||
++ optionals useMpi [ mpi ]
|
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
||||||
++ optionals useOpenCL [ clblast ]
|
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
||||||
++ optionals useRocm rocmBuildInputs
|
'';
|
||||||
++ optionals useBlas [ blas ]
|
|
||||||
++ optionals useVulkan vulkanBuildInputs;
|
|
||||||
|
|
||||||
cmakeFlags =
|
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
||||||
[
|
# `default.metallib` may be compiled with Metal compiler from XCode
|
||||||
(cmakeBool "LLAMA_NATIVE" false)
|
# and we need to escape sandbox on MacOS to access Metal compiler.
|
||||||
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
# `xcrun` is used find the path of the Metal compiler, which is varible
|
||||||
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
# and not on $PATH
|
||||||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
||||||
(cmakeBool "LLAMA_BLAS" useBlas)
|
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
||||||
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
|
|
||||||
(cmakeBool "LLAMA_CUDA" useCuda)
|
nativeBuildInputs =
|
||||||
(cmakeBool "LLAMA_HIPBLAS" useRocm)
|
[
|
||||||
(cmakeBool "LLAMA_METAL" useMetalKit)
|
cmake
|
||||||
(cmakeBool "LLAMA_VULKAN" useVulkan)
|
ninja
|
||||||
(cmakeBool "LLAMA_STATIC" enableStatic)
|
pkg-config
|
||||||
]
|
git
|
||||||
++ optionals useCuda [
|
]
|
||||||
(
|
++ optionals useCuda [
|
||||||
with cudaPackages.flags;
|
cudaPackages.cuda_nvcc
|
||||||
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
|
||||||
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
autoAddDriverRunpath
|
||||||
)
|
]
|
||||||
|
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
||||||
|
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
||||||
|
++ optionals useCuda cudaBuildInputs
|
||||||
|
++ optionals useMpi [ mpi ]
|
||||||
|
++ optionals useRocm rocmBuildInputs
|
||||||
|
++ optionals useBlas [ blas ]
|
||||||
|
++ optionals useVulkan vulkanBuildInputs
|
||||||
|
++ optionals enableCurl [ curl ];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
||||||
|
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
||||||
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
||||||
|
(cmakeBool "LLAMA_CURL" enableCurl)
|
||||||
|
(cmakeBool "GGML_NATIVE" false)
|
||||||
|
(cmakeBool "GGML_BLAS" useBlas)
|
||||||
|
(cmakeBool "GGML_CUDA" useCuda)
|
||||||
|
(cmakeBool "GGML_HIP" useRocm)
|
||||||
|
(cmakeBool "GGML_METAL" useMetalKit)
|
||||||
|
(cmakeBool "GGML_VULKAN" useVulkan)
|
||||||
|
(cmakeBool "GGML_STATIC" enableStatic)
|
||||||
|
]
|
||||||
|
++ optionals useCuda [
|
||||||
|
(
|
||||||
|
with cudaPackages.flags;
|
||||||
|
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
||||||
|
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
||||||
)
|
)
|
||||||
]
|
)
|
||||||
++ optionals useRocm [
|
]
|
||||||
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
++ optionals useRocm [
|
||||||
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
||||||
]
|
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
|
||||||
++ optionals useMetalKit [
|
]
|
||||||
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
++ optionals useMetalKit [
|
||||||
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
||||||
];
|
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
||||||
|
];
|
||||||
|
|
||||||
# Environment variables needed for ROCm
|
# Environment variables needed for ROCm
|
||||||
env = optionals useRocm {
|
env = optionals useRocm {
|
||||||
ROCM_PATH = "${rocmPackages.clr}";
|
ROCM_PATH = "${rocmPackages.clr}";
|
||||||
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
||||||
};
|
};
|
||||||
|
|
||||||
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
||||||
# if they haven't been added yet.
|
# if they haven't been added yet.
|
||||||
postInstall = ''
|
postInstall = ''
|
||||||
mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
|
mkdir -p $out/include
|
||||||
mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
|
cp $src/include/llama.h $out/include/
|
||||||
mkdir -p $out/include
|
'';
|
||||||
cp $src/llama.h $out/include/
|
|
||||||
'';
|
|
||||||
|
|
||||||
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
meta = {
|
||||||
passthru = {
|
# Configurations we don't want even the CI to evaluate. Results in the
|
||||||
inherit
|
# "unsupported platform" messages. This is mostly a no-op, because
|
||||||
useBlas
|
# cudaPackages would've refused to evaluate anyway.
|
||||||
useCuda
|
badPlatforms = optionals useCuda lib.platforms.darwin;
|
||||||
useMetalKit
|
|
||||||
useMpi
|
|
||||||
useOpenCL
|
|
||||||
useRocm
|
|
||||||
useVulkan
|
|
||||||
;
|
|
||||||
|
|
||||||
shell = mkShell {
|
# Configurations that are known to result in build failures. Can be
|
||||||
name = "shell-${finalAttrs.finalPackage.name}";
|
# overridden by importing Nixpkgs with `allowBroken = true`.
|
||||||
description = "contains numpy and sentencepiece";
|
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
||||||
buildInputs = [ llama-python ];
|
|
||||||
inputsFrom = [ finalAttrs.finalPackage ];
|
|
||||||
shellHook = ''
|
|
||||||
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
shell-extra = mkShell {
|
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
||||||
name = "shell-extra-${finalAttrs.finalPackage.name}";
|
homepage = "https://github.com/ggerganov/llama.cpp/";
|
||||||
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
license = lib.licenses.mit;
|
||||||
buildInputs = [ llama-python-extra ];
|
|
||||||
inputsFrom = [ finalAttrs.finalPackage ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
meta = {
|
# Accommodates `nix run` and `lib.getExe`
|
||||||
# Configurations we don't want even the CI to evaluate. Results in the
|
mainProgram = "llama-cli";
|
||||||
# "unsupported platform" messages. This is mostly a no-op, because
|
|
||||||
# cudaPackages would've refused to evaluate anyway.
|
|
||||||
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
|
|
||||||
|
|
||||||
# Configurations that are known to result in build failures. Can be
|
# These people might respond, on the best effort basis, if you ping them
|
||||||
# overridden by importing Nixpkgs with `allowBroken = true`.
|
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
||||||
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
# Consider adding yourself to this list if you want to ensure this flake
|
||||||
|
# stays maintained and you're willing to invest your time. Do not add
|
||||||
|
# other people without their consent. Consider removing people after
|
||||||
|
# they've been unreachable for long periods of time.
|
||||||
|
|
||||||
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
||||||
homepage = "https://github.com/ggerganov/llama.cpp/";
|
# an attrset following the same format as in
|
||||||
license = lib.licenses.mit;
|
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
||||||
|
maintainers = with lib.maintainers; [
|
||||||
|
philiptaron
|
||||||
|
SomeoneSerge
|
||||||
|
];
|
||||||
|
|
||||||
# Accommodates `nix run` and `lib.getExe`
|
# Extend `badPlatforms` instead
|
||||||
mainProgram = "llama";
|
platforms = lib.platforms.all;
|
||||||
|
};
|
||||||
# These people might respond, on the best effort basis, if you ping them
|
})
|
||||||
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
|
||||||
# Consider adding yourself to this list if you want to ensure this flake
|
|
||||||
# stays maintained and you're willing to invest your time. Do not add
|
|
||||||
# other people without their consent. Consider removing people after
|
|
||||||
# they've been unreachable for long periods of time.
|
|
||||||
|
|
||||||
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
|
||||||
# an attrset following the same format as in
|
|
||||||
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
|
||||||
maintainers = with lib.maintainers; [
|
|
||||||
philiptaron
|
|
||||||
SomeoneSerge
|
|
||||||
];
|
|
||||||
|
|
||||||
# Extend `badPlatforms` instead
|
|
||||||
platforms = lib.platforms.all;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
66
.devops/nix/python-scripts.nix
Normal file
66
.devops/nix/python-scripts.nix
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
buildPythonPackage,
|
||||||
|
poetry-core,
|
||||||
|
mkShell,
|
||||||
|
python3Packages,
|
||||||
|
gguf-py,
|
||||||
|
}@inputs:
|
||||||
|
|
||||||
|
let
|
||||||
|
llama-python-deps = with python3Packages; [
|
||||||
|
numpy
|
||||||
|
sentencepiece
|
||||||
|
transformers
|
||||||
|
protobuf
|
||||||
|
torchWithoutCuda
|
||||||
|
gguf-py
|
||||||
|
tqdm
|
||||||
|
|
||||||
|
# for scripts/compare-llama-bench.py
|
||||||
|
gitpython
|
||||||
|
tabulate
|
||||||
|
|
||||||
|
# for examples/pydantic-models-to-grammar-examples.py
|
||||||
|
docstring-parser
|
||||||
|
pydantic
|
||||||
|
|
||||||
|
];
|
||||||
|
|
||||||
|
llama-python-test-deps = with python3Packages; [
|
||||||
|
# Server bench
|
||||||
|
matplotlib
|
||||||
|
|
||||||
|
# server tests
|
||||||
|
openai
|
||||||
|
pytest
|
||||||
|
prometheus-client
|
||||||
|
];
|
||||||
|
in
|
||||||
|
|
||||||
|
buildPythonPackage ({
|
||||||
|
pname = "llama-scripts";
|
||||||
|
version = "0.0.0";
|
||||||
|
pyproject = true;
|
||||||
|
|
||||||
|
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
||||||
|
# do they affect the output hash. They can be modified without triggering a rebuild.
|
||||||
|
src = lib.cleanSourceWith {
|
||||||
|
filter =
|
||||||
|
name: type:
|
||||||
|
let
|
||||||
|
any = builtins.any (x: x);
|
||||||
|
baseName = builtins.baseNameOf name;
|
||||||
|
in
|
||||||
|
any [
|
||||||
|
(lib.hasSuffix ".py" name)
|
||||||
|
(baseName == "README.md")
|
||||||
|
(baseName == "pyproject.toml")
|
||||||
|
];
|
||||||
|
src = lib.cleanSource ../../.;
|
||||||
|
};
|
||||||
|
nativeBuildInputs = [ poetry-core ];
|
||||||
|
nativeCheckInputs = llama-python-test-deps;
|
||||||
|
dependencies = llama-python-deps;
|
||||||
|
})
|
|
@ -1,19 +1,41 @@
|
||||||
{
|
{
|
||||||
lib,
|
lib,
|
||||||
newScope,
|
newScope,
|
||||||
|
python3,
|
||||||
llamaVersion ? "0.0.0",
|
llamaVersion ? "0.0.0",
|
||||||
}:
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
pythonPackages = python3.pkgs;
|
||||||
|
buildPythonPackage = pythonPackages.buildPythonPackage;
|
||||||
|
numpy = pythonPackages.numpy;
|
||||||
|
tqdm = pythonPackages.tqdm;
|
||||||
|
sentencepiece = pythonPackages.sentencepiece;
|
||||||
|
pyyaml = pythonPackages.pyyaml;
|
||||||
|
poetry-core = pythonPackages.poetry-core;
|
||||||
|
pytestCheckHook = pythonPackages.pytestCheckHook;
|
||||||
|
in
|
||||||
|
|
||||||
# We're using `makeScope` instead of just writing out an attrset
|
# We're using `makeScope` instead of just writing out an attrset
|
||||||
# because it allows users to apply overlays later using `overrideScope'`.
|
# because it allows users to apply overlays later using `overrideScope'`.
|
||||||
# Cf. https://noogle.dev/f/lib/makeScope
|
# Cf. https://noogle.dev/f/lib/makeScope
|
||||||
|
|
||||||
lib.makeScope newScope (
|
lib.makeScope newScope (self: {
|
||||||
self: {
|
inherit llamaVersion;
|
||||||
inherit llamaVersion;
|
gguf-py = self.callPackage ./package-gguf-py.nix {
|
||||||
llama-cpp = self.callPackage ./package.nix { };
|
inherit
|
||||||
docker = self.callPackage ./docker.nix { };
|
buildPythonPackage
|
||||||
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
numpy
|
||||||
sif = self.callPackage ./sif.nix { };
|
tqdm
|
||||||
}
|
sentencepiece
|
||||||
)
|
poetry-core
|
||||||
|
pyyaml
|
||||||
|
pytestCheckHook
|
||||||
|
;
|
||||||
|
};
|
||||||
|
python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
|
||||||
|
llama-cpp = self.callPackage ./package.nix { };
|
||||||
|
docker = self.callPackage ./docker.nix { };
|
||||||
|
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
||||||
|
sif = self.callPackage ./sif.nix { };
|
||||||
|
})
|
||||||
|
|
113
.devops/rocm.Dockerfile
Normal file
113
.devops/rocm.Dockerfile
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
ARG UBUNTU_VERSION=24.04
|
||||||
|
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG ROCM_VERSION=6.3
|
||||||
|
ARG AMDGPU_VERSION=6.3
|
||||||
|
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||||
|
|
||||||
|
### Build image
|
||||||
|
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
||||||
|
# This is mostly tied to rocBLAS supported archs.
|
||||||
|
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
|
||||||
|
# gfx906 is deprecated
|
||||||
|
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
|
||||||
|
|
||||||
|
#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
|
||||||
|
ARG ROCM_DOCKER_ARCH=gfx1100
|
||||||
|
|
||||||
|
# Set nvcc architectured
|
||||||
|
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||||
|
# Enable ROCm
|
||||||
|
# ENV CC=/opt/rocm/llvm/bin/clang
|
||||||
|
# ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
cmake \
|
||||||
|
git \
|
||||||
|
libcurl4-openssl-dev \
|
||||||
|
curl \
|
||||||
|
libgomp1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
||||||
|
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
|
||||||
|
&& cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib \
|
||||||
|
&& find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3-pip \
|
||||||
|
python3 \
|
||||||
|
python3-wheel\
|
||||||
|
&& pip install --break-system-packages --upgrade setuptools \
|
||||||
|
&& pip install --break-system-packages -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -1,37 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG CUDA_VERSION=11.7.1
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
# Target the CUDA runtime image
|
|
||||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
ARG CUDA_DOCKER_ARCH=all
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git libcurl4-openssl-dev
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|
||||||
# Enable CUDA
|
|
||||||
ENV LLAMA_CUDA=1
|
|
||||||
# Enable cURL
|
|
||||||
ENV LLAMA_CURL=1
|
|
||||||
|
|
||||||
RUN make -j$(nproc) server
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y libcurl4-openssl-dev libgomp1
|
|
||||||
|
|
||||||
COPY --from=build /app/server /server
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/server" ]
|
|
|
@ -1,29 +0,0 @@
|
||||||
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
|
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
|
||||||
|
|
||||||
ARG LLAMA_SYCL_F16=OFF
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y git libcurl4-openssl-dev
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
|
|
||||||
echo "LLAMA_SYCL_F16 is set" && \
|
|
||||||
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
|
|
||||||
fi && \
|
|
||||||
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
|
||||||
cmake --build build --config Release --target server
|
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y libcurl4-openssl-dev
|
|
||||||
|
|
||||||
COPY --from=build /app/build/bin/server /server
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/server" ]
|
|
|
@ -1,50 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG ROCM_VERSION=5.6
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
|
||||||
|
|
||||||
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
|
||||||
# This is mostly tied to rocBLAS supported archs.
|
|
||||||
ARG ROCM_DOCKER_ARCH=\
|
|
||||||
gfx803 \
|
|
||||||
gfx900 \
|
|
||||||
gfx906 \
|
|
||||||
gfx908 \
|
|
||||||
gfx90a \
|
|
||||||
gfx1010 \
|
|
||||||
gfx1030 \
|
|
||||||
gfx1100 \
|
|
||||||
gfx1101 \
|
|
||||||
gfx1102
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
|
||||||
# Enable ROCm
|
|
||||||
ENV LLAMA_HIPBLAS=1
|
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
|
|
||||||
# Enable cURL
|
|
||||||
ENV LLAMA_CURL=1
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y libcurl4-openssl-dev
|
|
||||||
|
|
||||||
RUN make -j$(nproc)
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/server" ]
|
|
|
@ -1,31 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=jammy
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as build
|
|
||||||
|
|
||||||
# Install build tools
|
|
||||||
RUN apt update && apt install -y git build-essential cmake wget
|
|
||||||
|
|
||||||
# Install Vulkan SDK
|
|
||||||
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
|
||||||
apt update -y && \
|
|
||||||
apt-get install -y vulkan-sdk
|
|
||||||
|
|
||||||
# Install cURL
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y libcurl4-openssl-dev
|
|
||||||
|
|
||||||
# Build it
|
|
||||||
WORKDIR /app
|
|
||||||
COPY . .
|
|
||||||
RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
|
|
||||||
cmake --build build --config Release --target server
|
|
||||||
|
|
||||||
# Clean up
|
|
||||||
WORKDIR /
|
|
||||||
RUN cp /app/build/bin/server /server && \
|
|
||||||
rm -rf /app
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/server" ]
|
|
|
@ -1,25 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git libcurl4-openssl-dev
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
ENV LLAMA_CURL=1
|
|
||||||
|
|
||||||
RUN make -j$(nproc) server
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y libcurl4-openssl-dev libgomp1
|
|
||||||
|
|
||||||
COPY --from=build /app/server /server
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/server" ]
|
|
|
@ -8,36 +8,40 @@ arg1="$1"
|
||||||
shift
|
shift
|
||||||
|
|
||||||
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
||||||
python3 ./convert-hf-to-gguf.py "$@"
|
exec python3 ./convert_hf_to_gguf.py "$@"
|
||||||
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
||||||
./quantize "$@"
|
exec ./llama-quantize "$@"
|
||||||
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
||||||
./main "$@"
|
exec ./llama-cli "$@"
|
||||||
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
|
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
|
||||||
./finetune "$@"
|
exec ./llama-bench "$@"
|
||||||
|
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
|
||||||
|
exec ./llama-perplexity "$@"
|
||||||
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
||||||
echo "Converting PTH to GGML..."
|
echo "Converting PTH to GGML..."
|
||||||
for i in `ls $1/$2/ggml-model-f16.bin*`; do
|
for i in $(ls $1/$2/ggml-model-f16.bin*); do
|
||||||
if [ -f "${i/f16/q4_0}" ]; then
|
if [ -f "${i/f16/q4_0}" ]; then
|
||||||
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
||||||
else
|
else
|
||||||
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
||||||
./quantize "$i" "${i/f16/q4_0}" q4_0
|
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
||||||
./server "$@"
|
exec ./llama-server "$@"
|
||||||
else
|
else
|
||||||
echo "Unknown command: $arg1"
|
echo "Unknown command: $arg1"
|
||||||
echo "Available commands: "
|
echo "Available commands: "
|
||||||
echo " --run (-r): Run a model previously converted into ggml"
|
echo " --run (-r): Run a model previously converted into ggml"
|
||||||
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
||||||
|
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
|
||||||
|
echo " ex: -m model.gguf"
|
||||||
|
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
|
||||||
|
echo " ex: -m model.gguf -f file.txt"
|
||||||
echo " --convert (-c): Convert a llama model into ggml"
|
echo " --convert (-c): Convert a llama model into ggml"
|
||||||
echo " ex: --outtype f16 \"/models/7B/\" "
|
echo " ex: --outtype f16 \"/models/7B/\" "
|
||||||
echo " --quantize (-q): Optimize with quantization process ggml"
|
echo " --quantize (-q): Optimize with quantization process ggml"
|
||||||
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
||||||
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
|
|
||||||
echo " See documentation for finetune for command-line parameters"
|
|
||||||
echo " --all-in-one (-a): Execute --convert & --quantize"
|
echo " --all-in-one (-a): Execute --convert & --quantize"
|
||||||
echo " ex: \"/models/\" 7B"
|
echo " ex: \"/models/\" 7B"
|
||||||
echo " --server (-s): Run a model on the server"
|
echo " --server (-s): Run a model on the server"
|
||||||
|
|
89
.devops/vulkan.Dockerfile
Normal file
89
.devops/vulkan.Dockerfile
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
ARG UBUNTU_VERSION=24.04
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS build
|
||||||
|
|
||||||
|
# Install build tools
|
||||||
|
RUN apt update && apt install -y git build-essential cmake wget
|
||||||
|
|
||||||
|
# Install Vulkan SDK and cURL
|
||||||
|
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||||
|
apt update -y && \
|
||||||
|
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
||||||
|
|
||||||
|
# Build it
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
||||||
|
cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl libvulkan-dev \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
python3-wheel \
|
||||||
|
&& pip install --break-system-packages --upgrade setuptools \
|
||||||
|
&& pip install --break-system-packages -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -1,7 +1,7 @@
|
||||||
*.o
|
*.o
|
||||||
*.a
|
*.a
|
||||||
.cache/
|
.cache/
|
||||||
.git/
|
# Do not ignore .git directory, otherwise the reported build number will always be 0
|
||||||
.github/
|
.github/
|
||||||
.gitignore
|
.gitignore
|
||||||
.vs/
|
.vs/
|
||||||
|
@ -12,8 +12,8 @@ build*/
|
||||||
|
|
||||||
models/*
|
models/*
|
||||||
|
|
||||||
/main
|
/llama-cli
|
||||||
/quantize
|
/llama-quantize
|
||||||
|
|
||||||
arm_neon.h
|
arm_neon.h
|
||||||
compile_commands.json
|
compile_commands.json
|
||||||
|
|
2
.ecrc
2
.ecrc
|
@ -1,5 +1,5 @@
|
||||||
{
|
{
|
||||||
"Exclude": ["^\\.gitmodules$"],
|
"Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
|
||||||
"Disable": {
|
"Disable": {
|
||||||
"IndentSize": true
|
"IndentSize": true
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,5 +24,27 @@ insert_final_newline = unset
|
||||||
[examples/server/public/*]
|
[examples/server/public/*]
|
||||||
indent_size = 2
|
indent_size = 2
|
||||||
|
|
||||||
|
[examples/server/public/deps_*]
|
||||||
|
trim_trailing_whitespace = unset
|
||||||
|
indent_style = unset
|
||||||
|
indent_size = unset
|
||||||
|
|
||||||
|
[examples/server/deps_*]
|
||||||
|
trim_trailing_whitespace = unset
|
||||||
|
indent_style = unset
|
||||||
|
indent_size = unset
|
||||||
|
|
||||||
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
||||||
indent_style = tab
|
indent_style = tab
|
||||||
|
|
||||||
|
[examples/cvector-generator/*.txt]
|
||||||
|
trim_trailing_whitespace = unset
|
||||||
|
insert_final_newline = unset
|
||||||
|
|
||||||
|
[models/templates/*.jinja]
|
||||||
|
indent_style = unset
|
||||||
|
indent_size = unset
|
||||||
|
end_of_line = unset
|
||||||
|
charset = unset
|
||||||
|
trim_trailing_whitespace = unset
|
||||||
|
insert_final_newline = unset
|
||||||
|
|
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
|
@ -1,50 +0,0 @@
|
||||||
name: Low Severity Bugs
|
|
||||||
description: Used to report low severity bugs in llama.cpp (e.g. cosmetic issues, non critical UI glitches)
|
|
||||||
title: "Bug: "
|
|
||||||
labels: ["bug-unconfirmed", "low severity"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
Please include information about your system, the steps to reproduce the bug,
|
|
||||||
and the version of llama.cpp that you are using.
|
|
||||||
If possible, please provide a minimal code example that reproduces the bug.
|
|
||||||
- type: textarea
|
|
||||||
id: what-happened
|
|
||||||
attributes:
|
|
||||||
label: What happened?
|
|
||||||
description: Also tell us, what did you expect to happen?
|
|
||||||
placeholder: Tell us what you see!
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
|
||||||
placeholder: |
|
|
||||||
$./main --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: What operating system are you seeing the problem on?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
87
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
Normal file
87
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
name: Bug (compilation)
|
||||||
|
description: Something goes wrong when trying to compile llama.cpp.
|
||||||
|
title: "Compile bug: "
|
||||||
|
labels: ["bug-unconfirmed", "compilation"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: >
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
This issue template is intended for bug reports where the compilation of llama.cpp fails.
|
||||||
|
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
|
||||||
|
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
|
||||||
|
by clearing `~/.cache/ccache` (on Linux).
|
||||||
|
- type: textarea
|
||||||
|
id: commit
|
||||||
|
attributes:
|
||||||
|
label: Git commit
|
||||||
|
description: Which commit are you trying to compile?
|
||||||
|
placeholder: |
|
||||||
|
$git rev-parse HEAD
|
||||||
|
84a07a17b1b08cf2b9747c633a2372782848a27f
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: Operating systems
|
||||||
|
description: Which operating systems do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: backends
|
||||||
|
attributes:
|
||||||
|
label: GGML backends
|
||||||
|
description: Which GGML backends do you know to be affected?
|
||||||
|
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
||||||
|
multiple: true
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: info
|
||||||
|
attributes:
|
||||||
|
label: Problem description & steps to reproduce
|
||||||
|
description: >
|
||||||
|
Please give us a summary of the problem and tell us how to reproduce it.
|
||||||
|
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
|
||||||
|
placeholder: >
|
||||||
|
I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
|
||||||
|
Here are the exact commands that I used: ...
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: first_bad_commit
|
||||||
|
attributes:
|
||||||
|
label: First Bad Commit
|
||||||
|
description: >
|
||||||
|
If the bug was not present on an earlier version: when did it start appearing?
|
||||||
|
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: command
|
||||||
|
attributes:
|
||||||
|
label: Compile command
|
||||||
|
description: >
|
||||||
|
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: >
|
||||||
|
Please copy and paste any relevant log output, including any generated text.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: true
|
101
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
Normal file
101
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
name: Bug (model use)
|
||||||
|
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
|
||||||
|
title: "Eval bug: "
|
||||||
|
labels: ["bug-unconfirmed", "model evaluation"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: >
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
This issue template is intended for bug reports where the model evaluation results
|
||||||
|
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
|
||||||
|
If you encountered the issue while using an external UI (e.g. ollama),
|
||||||
|
please reproduce your issue using one of the examples/binaries in this repository.
|
||||||
|
The `llama-cli` binary can be used for simple and reproducible model inference.
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which version of our software are you running? (use `--version` to get a version string)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: Operating systems
|
||||||
|
description: Which operating systems do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: backends
|
||||||
|
attributes:
|
||||||
|
label: GGML backends
|
||||||
|
description: Which GGML backends do you know to be affected?
|
||||||
|
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
||||||
|
multiple: true
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: hardware
|
||||||
|
attributes:
|
||||||
|
label: Hardware
|
||||||
|
description: Which CPUs/GPUs are you using?
|
||||||
|
placeholder: >
|
||||||
|
e.g. Ryzen 5950X + 2x RTX 4090
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: model
|
||||||
|
attributes:
|
||||||
|
label: Models
|
||||||
|
description: >
|
||||||
|
Which model(s) at which quantization were you using when encountering the bug?
|
||||||
|
If you downloaded a GGUF file off of Huggingface, please provide a link.
|
||||||
|
placeholder: >
|
||||||
|
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: info
|
||||||
|
attributes:
|
||||||
|
label: Problem description & steps to reproduce
|
||||||
|
description: >
|
||||||
|
Please give us a summary of the problem and tell us how to reproduce it.
|
||||||
|
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
|
||||||
|
that information would be very much appreciated by us.
|
||||||
|
placeholder: >
|
||||||
|
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
|
||||||
|
When I use -ngl 0 it works correctly.
|
||||||
|
Here are the exact commands that I used: ...
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: first_bad_commit
|
||||||
|
attributes:
|
||||||
|
label: First Bad Commit
|
||||||
|
description: >
|
||||||
|
If the bug was not present on an earlier version: when did it start appearing?
|
||||||
|
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: >
|
||||||
|
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: true
|
91
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
Normal file
91
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
name: Bug (misc.)
|
||||||
|
description: Something is not working the way it should (and it's not covered by any of the above cases).
|
||||||
|
title: "Misc. bug: "
|
||||||
|
labels: ["bug-unconfirmed"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: >
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
This issue template is intended for miscellaneous bugs that don't fit into any other category.
|
||||||
|
If you encountered the issue while using an external UI (e.g. ollama),
|
||||||
|
please reproduce your issue using one of the examples/binaries in this repository.
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which version of our software is affected? (You can use `--version` to get a version string.)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: Operating systems
|
||||||
|
description: Which operating systems do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: dropdown
|
||||||
|
id: module
|
||||||
|
attributes:
|
||||||
|
label: Which llama.cpp modules do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Documentation/Github
|
||||||
|
- libllama (core library)
|
||||||
|
- llama-cli
|
||||||
|
- llama-server
|
||||||
|
- llama-bench
|
||||||
|
- llama-quantize
|
||||||
|
- Python/Bash scripts
|
||||||
|
- Test code
|
||||||
|
- Other (Please specify in the next section)
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: command
|
||||||
|
attributes:
|
||||||
|
label: Command line
|
||||||
|
description: >
|
||||||
|
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: info
|
||||||
|
attributes:
|
||||||
|
label: Problem description & steps to reproduce
|
||||||
|
description: >
|
||||||
|
Please give us a summary of the problem and tell us how to reproduce it (if applicable).
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: first_bad_commit
|
||||||
|
attributes:
|
||||||
|
label: First Bad Commit
|
||||||
|
description: >
|
||||||
|
If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
|
||||||
|
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: >
|
||||||
|
If applicable, please copy and paste any relevant log output, including any generated text.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: false
|
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
|
@ -1,50 +0,0 @@
|
||||||
name: Medium Severity Bug
|
|
||||||
description: Used to report medium severity bugs in llama.cpp (e.g. Malfunctioning Features but generally still useable)
|
|
||||||
title: "Bug: "
|
|
||||||
labels: ["bug-unconfirmed", "medium severity"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
Please include information about your system, the steps to reproduce the bug,
|
|
||||||
and the version of llama.cpp that you are using.
|
|
||||||
If possible, please provide a minimal code example that reproduces the bug.
|
|
||||||
- type: textarea
|
|
||||||
id: what-happened
|
|
||||||
attributes:
|
|
||||||
label: What happened?
|
|
||||||
description: Also tell us, what did you expect to happen?
|
|
||||||
placeholder: Tell us what you see!
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
|
||||||
placeholder: |
|
|
||||||
$./main --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: What operating system are you seeing the problem on?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
|
@ -1,5 +1,5 @@
|
||||||
name: Enhancement
|
name: Enhancement
|
||||||
description: Used to request enhancements for llama.cpp
|
description: Used to request enhancements for llama.cpp.
|
||||||
title: "Feature Request: "
|
title: "Feature Request: "
|
||||||
labels: ["enhancement"]
|
labels: ["enhancement"]
|
||||||
body:
|
body:
|
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
|
@ -1,50 +0,0 @@
|
||||||
name: High Severity Bug
|
|
||||||
description: Used to report high severity bugs in llama.cpp (e.g. Malfunctioning features hindering important common workflow)
|
|
||||||
title: "Bug: "
|
|
||||||
labels: ["bug-unconfirmed", "high severity"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
Please include information about your system, the steps to reproduce the bug,
|
|
||||||
and the version of llama.cpp that you are using.
|
|
||||||
If possible, please provide a minimal code example that reproduces the bug.
|
|
||||||
- type: textarea
|
|
||||||
id: what-happened
|
|
||||||
attributes:
|
|
||||||
label: What happened?
|
|
||||||
description: Also tell us, what did you expect to happen?
|
|
||||||
placeholder: Tell us what you see!
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
|
||||||
placeholder: |
|
|
||||||
$./main --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: What operating system are you seeing the problem on?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
|
@ -1,5 +1,5 @@
|
||||||
name: Research
|
name: Research
|
||||||
description: Track new technical research area
|
description: Track new technical research area.
|
||||||
title: "Research: "
|
title: "Research: "
|
||||||
labels: ["research 🔬"]
|
labels: ["research 🔬"]
|
||||||
body:
|
body:
|
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
|
@ -1,50 +0,0 @@
|
||||||
name: Critical Severity Bug
|
|
||||||
description: Used to report critical severity bugs in llama.cpp (e.g. Crashing, Corrupted, Dataloss)
|
|
||||||
title: "Bug: "
|
|
||||||
labels: ["bug-unconfirmed", "critical severity"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
Please include information about your system, the steps to reproduce the bug,
|
|
||||||
and the version of llama.cpp that you are using.
|
|
||||||
If possible, please provide a minimal code example that reproduces the bug.
|
|
||||||
- type: textarea
|
|
||||||
id: what-happened
|
|
||||||
attributes:
|
|
||||||
label: What happened?
|
|
||||||
description: Also tell us, what did you expect to happen?
|
|
||||||
placeholder: Tell us what you see!
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
|
||||||
placeholder: |
|
|
||||||
$./main --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: What operating system are you seeing the problem on?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
|
@ -1,5 +1,5 @@
|
||||||
name: Refactor (Maintainers)
|
name: Refactor (Maintainers)
|
||||||
description: Used to track refactoring opportunities
|
description: Used to track refactoring opportunities.
|
||||||
title: "Refactor: "
|
title: "Refactor: "
|
||||||
labels: ["refactor"]
|
labels: ["refactor"]
|
||||||
body:
|
body:
|
2
.github/ISSUE_TEMPLATE/config.yml
vendored
2
.github/ISSUE_TEMPLATE/config.yml
vendored
|
@ -9,5 +9,3 @@ contact_links:
|
||||||
- name: Want to contribute?
|
- name: Want to contribute?
|
||||||
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
||||||
about: Head to the contribution guide page of the wiki for areas you can help with
|
about: Head to the contribution guide page of the wiki for areas you can help with
|
||||||
|
|
||||||
|
|
||||||
|
|
30
.github/labeler.yml
vendored
30
.github/labeler.yml
vendored
|
@ -2,31 +2,32 @@
|
||||||
Kompute:
|
Kompute:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml-kompute.h
|
- ggml/include/ggml-kompute.h
|
||||||
- ggml-kompute.cpp
|
- ggml/src/ggml-kompute/**
|
||||||
- README-kompute.md
|
- README-kompute.md
|
||||||
Apple Metal:
|
Apple Metal:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml-metal.h
|
- ggml/include/ggml-metal.h
|
||||||
- ggml-metal.cpp
|
- ggml/src/ggml-metal/**
|
||||||
- README-metal.md
|
- README-metal.md
|
||||||
SYCL:
|
SYCL:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml-sycl.h
|
- ggml/include/ggml-sycl.h
|
||||||
- ggml-sycl.cpp
|
- ggml/src/ggml-sycl/**
|
||||||
- README-sycl.md
|
- docs/backend/SYCL.md
|
||||||
|
- examples/sycl/**
|
||||||
Nvidia GPU:
|
Nvidia GPU:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml-cuda.h
|
- ggml/include/ggml-cuda.h
|
||||||
- ggml-cuda/**
|
- ggml/src/ggml-cuda/**
|
||||||
Vulkan:
|
Vulkan:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml_vk_generate_shaders.py
|
- ggml/include/ggml-vulkan.h
|
||||||
- ggml-vulkan*
|
- ggml/src/ggml-vulkan/**
|
||||||
documentation:
|
documentation:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
|
@ -42,7 +43,6 @@ build:
|
||||||
- cmake/**
|
- cmake/**
|
||||||
- CMakeLists.txt
|
- CMakeLists.txt
|
||||||
- CMakePresets.json
|
- CMakePresets.json
|
||||||
- codecov.yml
|
|
||||||
examples:
|
examples:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file: examples/**
|
- any-glob-to-any-file: examples/**
|
||||||
|
@ -74,11 +74,7 @@ server:
|
||||||
ggml:
|
ggml:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml.c
|
- ggml/**
|
||||||
- ggml.h
|
|
||||||
- ggml-*.c
|
|
||||||
- ggml-*.h
|
|
||||||
- ggml-cuda/**
|
|
||||||
nix:
|
nix:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
|
|
6
.github/pull_request_template.md
vendored
6
.github/pull_request_template.md
vendored
|
@ -1,5 +1 @@
|
||||||
- Self Reported Review Complexity:
|
*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
|
||||||
- [ ] Review Complexity : Low
|
|
||||||
- [ ] Review Complexity : Medium
|
|
||||||
- [ ] Review Complexity : High
|
|
||||||
- [ ] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
# TODO: there have been some issues with the workflow, so disabling for now
|
||||||
|
# https://github.com/ggerganov/llama.cpp/issues/7893
|
||||||
|
#
|
||||||
# Benchmark
|
# Benchmark
|
||||||
name: Benchmark
|
name: Benchmark
|
||||||
|
|
||||||
|
@ -24,10 +27,10 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||||
pull_request_target:
|
pull_request_target:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '04 2 * * *'
|
- cron: '04 2 * * *'
|
||||||
|
|
||||||
|
@ -109,7 +112,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
set -eux
|
set -eux
|
||||||
cmake -B build \
|
cmake -B build \
|
||||||
-DLLAMA_NATIVE=OFF \
|
-DGGML_NATIVE=OFF \
|
||||||
-DLLAMA_BUILD_SERVER=ON \
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
-DLLAMA_CURL=ON \
|
-DLLAMA_CURL=ON \
|
||||||
-DLLAMA_CUBLAS=ON \
|
-DLLAMA_CUBLAS=ON \
|
||||||
|
@ -119,7 +122,7 @@ jobs:
|
||||||
-DLLAMA_FATAL_WARNINGS=OFF \
|
-DLLAMA_FATAL_WARNINGS=OFF \
|
||||||
-DLLAMA_ALL_WARNINGS=OFF \
|
-DLLAMA_ALL_WARNINGS=OFF \
|
||||||
-DCMAKE_BUILD_TYPE=Release;
|
-DCMAKE_BUILD_TYPE=Release;
|
||||||
cmake --build build --config Release -j $(nproc) --target server
|
cmake --build build --config Release -j $(nproc) --target llama-server
|
||||||
|
|
||||||
- name: Download the dataset
|
- name: Download the dataset
|
||||||
id: download_dataset
|
id: download_dataset
|
||||||
|
@ -129,6 +132,8 @@ jobs:
|
||||||
|
|
||||||
- name: Server bench
|
- name: Server bench
|
||||||
id: server_bench
|
id: server_bench
|
||||||
|
env:
|
||||||
|
HEAD_REF: ${{ github.head_ref || github.ref_name }}
|
||||||
run: |
|
run: |
|
||||||
set -eux
|
set -eux
|
||||||
|
|
||||||
|
@ -137,7 +142,7 @@ jobs:
|
||||||
python bench.py \
|
python bench.py \
|
||||||
--runner-label ${{ env.RUNNER_LABEL }} \
|
--runner-label ${{ env.RUNNER_LABEL }} \
|
||||||
--name ${{ github.job }} \
|
--name ${{ github.job }} \
|
||||||
--branch ${{ github.head_ref || github.ref_name }} \
|
--branch $HEAD_REF \
|
||||||
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
||||||
--scenario script.js \
|
--scenario script.js \
|
||||||
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
891
.github/workflows/build.yml
vendored
891
.github/workflows/build.yml
vendored
File diff suppressed because it is too large
Load diff
7
.github/workflows/close-issue.yml
vendored
7
.github/workflows/close-issue.yml
vendored
|
@ -3,6 +3,11 @@ on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: "42 0 * * *"
|
- cron: "42 0 * * *"
|
||||||
|
|
||||||
|
# Fine-grant permission
|
||||||
|
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||||
|
permissions:
|
||||||
|
issues: write
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
close-issues:
|
close-issues:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
@ -12,7 +17,7 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/stale@v5
|
- uses: actions/stale@v5
|
||||||
with:
|
with:
|
||||||
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
|
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
|
||||||
days-before-issue-stale: 30
|
days-before-issue-stale: 30
|
||||||
days-before-issue-close: 14
|
days-before-issue-close: 14
|
||||||
stale-issue-label: "stale"
|
stale-issue-label: "stale"
|
||||||
|
|
40
.github/workflows/code-coverage.yml
vendored
40
.github/workflows/code-coverage.yml
vendored
|
@ -1,40 +0,0 @@
|
||||||
name: Code Coverage
|
|
||||||
on: [push, pull_request]
|
|
||||||
|
|
||||||
env:
|
|
||||||
GGML_NLOOP: 3
|
|
||||||
GGML_N_THREADS: 1
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
run:
|
|
||||||
runs-on: ubuntu-20.04
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential gcc-8 lcov
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
run: CC=gcc-8 make -j LLAMA_CODE_COVERAGE=1 tests
|
|
||||||
|
|
||||||
- name: Run tests
|
|
||||||
run: CC=gcc-8 make test
|
|
||||||
|
|
||||||
- name: Generate coverage report
|
|
||||||
run: |
|
|
||||||
make coverage
|
|
||||||
make lcov-report
|
|
||||||
|
|
||||||
- name: Upload coverage to Codecov
|
|
||||||
uses: codecov/codecov-action@v3
|
|
||||||
env:
|
|
||||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
|
||||||
with:
|
|
||||||
files: lcov-report/coverage.info
|
|
158
.github/workflows/docker.yml
vendored
158
.github/workflows/docker.yml
vendored
|
@ -10,49 +10,50 @@
|
||||||
name: Publish Docker image
|
name: Publish Docker image
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
workflow_dispatch: # allows manual triggering
|
||||||
push:
|
schedule:
|
||||||
branches:
|
# Rebuild daily rather than on every push because it is expensive
|
||||||
- master
|
- cron: '12 4 * * *'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
# Fine-grant permission
|
||||||
|
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||||
|
permissions:
|
||||||
|
packages: write
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
push_to_registry:
|
push_to_registry:
|
||||||
name: Push Docker image to Docker Hub
|
name: Push Docker image to Docker Hub
|
||||||
if: github.event.pull_request.draft == false
|
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
env:
|
env:
|
||||||
COMMIT_SHA: ${{ github.sha }}
|
COMMIT_SHA: ${{ github.sha }}
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- { tag: "light", dockerfile: ".devops/main.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
# Multi-stage build
|
||||||
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
|
||||||
- { tag: "server", dockerfile: ".devops/server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||||
# NOTE(canardletter): The CUDA builds on arm64 are very slow, so I
|
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||||
# have disabled them for now until the reason why
|
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||||
# is understood.
|
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||||
- { tag: "light-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platforms: "linux/amd64" }
|
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
||||||
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
|
||||||
- { tag: "server-cuda", dockerfile: ".devops/server-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
- { tag: "light-rocm", dockerfile: ".devops/main-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "server-rocm", dockerfile: ".devops/server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "light-intel", dockerfile: ".devops/main-intel.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
- { tag: "server-intel", dockerfile: ".devops/server-intel.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
steps:
|
steps:
|
||||||
- name: Check out the repo
|
- name: Check out the repo
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0 # preserve git history, so we can determine the build number
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v2
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
- name: Log in to Docker Hub
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v2
|
||||||
|
@ -61,9 +62,45 @@ jobs:
|
||||||
username: ${{ github.repository_owner }}
|
username: ${{ github.repository_owner }}
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
|
- name: Determine tag name
|
||||||
|
id: tag
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
||||||
|
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
||||||
|
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
||||||
|
REPO_NAME="${{ github.event.repository.name }}"
|
||||||
|
|
||||||
|
# determine tag name postfix (build number, commit hash)
|
||||||
|
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
||||||
|
TAG_POSTFIX="-b${BUILD_NUMBER}"
|
||||||
|
else
|
||||||
|
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
||||||
|
TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
|
||||||
|
fi
|
||||||
|
# list all tags possible
|
||||||
|
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
|
||||||
|
TYPE=""
|
||||||
|
else
|
||||||
|
TYPE="-${{ matrix.config.tag }}"
|
||||||
|
fi
|
||||||
|
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
||||||
|
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
|
||||||
|
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
|
||||||
|
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
|
||||||
|
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
|
||||||
|
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
|
||||||
|
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
|
||||||
|
echo "full_output_tags=$FULLTAGS" # print out for debugging
|
||||||
|
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
|
||||||
|
echo "server_output_tags=$SERVERTAGS" # print out for debugging
|
||||||
|
env:
|
||||||
|
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||||
|
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
||||||
|
|
||||||
- name: Free Disk Space (Ubuntu)
|
- name: Free Disk Space (Ubuntu)
|
||||||
uses: jlumbroso/free-disk-space@main
|
if: ${{ matrix.config.free_disk_space == true }}
|
||||||
|
uses: ggml-org/free-disk-space@v1.3.1
|
||||||
with:
|
with:
|
||||||
# this might remove tools that are actually needed,
|
# this might remove tools that are actually needed,
|
||||||
# if set to "true" but frees about 6 GB
|
# if set to "true" but frees about 6 GB
|
||||||
|
@ -78,40 +115,59 @@ jobs:
|
||||||
docker-images: true
|
docker-images: true
|
||||||
swap-storage: true
|
swap-storage: true
|
||||||
|
|
||||||
- name: Determine tag name
|
- name: Build and push Full Docker image (tagged + versioned)
|
||||||
id: tag
|
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
|
||||||
shell: bash
|
uses: docker/build-push-action@v6
|
||||||
run: |
|
|
||||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
||||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
||||||
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
||||||
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
||||||
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Downcase github.repository_owner
|
|
||||||
run: |
|
|
||||||
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
|
|
||||||
env:
|
|
||||||
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
|
||||||
|
|
||||||
- name: Build and push Docker image (versioned)
|
|
||||||
if: github.event_name == 'push'
|
|
||||||
uses: docker/build-push-action@v4
|
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: true
|
push: true
|
||||||
platforms: ${{ matrix.config.platforms }}
|
platforms: ${{ matrix.config.platforms }}
|
||||||
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
# tag list is generated from step above
|
||||||
|
tags: ${{ steps.tag.outputs.full_output_tags }}
|
||||||
file: ${{ matrix.config.dockerfile }}
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
target: full
|
||||||
|
provenance: false
|
||||||
|
# using github experimental cache
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
# return to this if the experimental github cache is having issues
|
||||||
|
#cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
#cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
|
|
||||||
- name: Build and push Docker image (tagged)
|
- name: Build and push Light Docker image (tagged + versioned)
|
||||||
uses: docker/build-push-action@v4
|
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: ${{ github.event_name == 'push' }}
|
push: true
|
||||||
platforms: ${{ matrix.config.platforms }}
|
platforms: ${{ matrix.config.platforms }}
|
||||||
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
|
# tag list is generated from step above
|
||||||
|
tags: ${{ steps.tag.outputs.light_output_tags }}
|
||||||
file: ${{ matrix.config.dockerfile }}
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
target: light
|
||||||
|
provenance: false
|
||||||
|
# using github experimental cache
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
# return to this if the experimental github cache is having issues
|
||||||
|
#cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
#cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
|
|
||||||
|
- name: Build and push Server Docker image (tagged + versioned)
|
||||||
|
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
platforms: ${{ matrix.config.platforms }}
|
||||||
|
# tag list is generated from step above
|
||||||
|
tags: ${{ steps.tag.outputs.server_output_tags }}
|
||||||
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
target: server
|
||||||
|
provenance: false
|
||||||
|
# using github experimental cache
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
# return to this if the experimental github cache is having issues
|
||||||
|
#cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
#cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
|
|
4
.github/workflows/editorconfig.yml
vendored
4
.github/workflows/editorconfig.yml
vendored
|
@ -23,5 +23,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: editorconfig-checker/action-editorconfig-checker@main
|
- uses: editorconfig-checker/action-editorconfig-checker@v2
|
||||||
|
with:
|
||||||
|
version: v3.0.3
|
||||||
- run: editorconfig-checker
|
- run: editorconfig-checker
|
||||||
|
|
65
.github/workflows/nix-ci-aarch64.yml
vendored
65
.github/workflows/nix-ci-aarch64.yml
vendored
|
@ -1,65 +0,0 @@
|
||||||
name: Nix aarch64 builds
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch: # allows manual triggering
|
|
||||||
schedule:
|
|
||||||
# Rebuild daily rather than on every push because QEMU is expensive (e.g.
|
|
||||||
# 1.5h instead of minutes with the cold cache).
|
|
||||||
#
|
|
||||||
# randint(0, 59), randint(0, 23)
|
|
||||||
- cron: '26 12 * * *'
|
|
||||||
# But also rebuild if we touched any of the Nix expressions:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
paths: ['**/*.nix', 'flake.lock']
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
paths: ['**/*.nix', 'flake.lock']
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
nix-build-aarch64:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install QEMU
|
|
||||||
# Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y qemu-user-static qemu-system-aarch64
|
|
||||||
sudo usermod -a -G kvm $USER
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@v9
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
extra-conf: |
|
|
||||||
extra-platforms = aarch64-linux
|
|
||||||
extra-system-features = nixos-test kvm
|
|
||||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
|
||||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
|
||||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
|
||||||
with:
|
|
||||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
|
||||||
- name: Set-up cachix to push the results to
|
|
||||||
uses: cachix/cachix-action@v13
|
|
||||||
with:
|
|
||||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
|
||||||
name: llama-cpp
|
|
||||||
- name: Show all output paths
|
|
||||||
run: >
|
|
||||||
nix run github:nix-community/nix-eval-jobs
|
|
||||||
-- --gc-roots-dir gcroot
|
|
||||||
--flake
|
|
||||||
".#packages.aarch64-linux"
|
|
||||||
- name: Build
|
|
||||||
run: >
|
|
||||||
nix run github:Mic92/nix-fast-build
|
|
||||||
-- --skip-cached --no-nom
|
|
||||||
--systems aarch64-linux
|
|
||||||
--flake
|
|
||||||
".#checks.aarch64-linux"
|
|
72
.github/workflows/nix-ci.yml
vendored
72
.github/workflows/nix-ci.yml
vendored
|
@ -1,72 +0,0 @@
|
||||||
name: Nix CI
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch: # allows manual triggering
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
nix-eval:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
os: [ ubuntu-latest, macos-latest ]
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@v9
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
extra-conf: |
|
|
||||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
|
||||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
|
||||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
|
||||||
with:
|
|
||||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
|
||||||
- name: List all flake outputs
|
|
||||||
run: nix flake show --all-systems
|
|
||||||
- name: Show all output paths
|
|
||||||
run: >
|
|
||||||
nix run github:nix-community/nix-eval-jobs
|
|
||||||
-- --gc-roots-dir gcroot
|
|
||||||
--flake
|
|
||||||
".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
|
||||||
nix-build:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
os: [ ubuntu-latest, macos-latest ]
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@v9
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
extra-conf: |
|
|
||||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
|
||||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
|
||||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
|
||||||
with:
|
|
||||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
|
||||||
- name: Set-up cachix to push the results to
|
|
||||||
uses: cachix/cachix-action@v13
|
|
||||||
with:
|
|
||||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
|
||||||
name: llama-cpp
|
|
||||||
- name: Build
|
|
||||||
run: >
|
|
||||||
nix run github:Mic92/nix-fast-build
|
|
||||||
-- --skip-cached --no-nom
|
|
||||||
--flake
|
|
||||||
".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
|
22
.github/workflows/nix-flake-update.yml
vendored
22
.github/workflows/nix-flake-update.yml
vendored
|
@ -1,22 +0,0 @@
|
||||||
name: update-flake-lock
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
schedule:
|
|
||||||
- cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
lockfile:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@main
|
|
||||||
- name: Update flake.lock
|
|
||||||
uses: DeterminateSystems/update-flake-lock@main
|
|
||||||
with:
|
|
||||||
pr-title: "nix: update flake.lock"
|
|
||||||
pr-labels: |
|
|
||||||
nix
|
|
||||||
pr-reviewers: philiptaron,SomeoneSerge
|
|
||||||
token: ${{ secrets.FLAKE_TOKEN }}
|
|
36
.github/workflows/nix-publish-flake.yml
vendored
36
.github/workflows/nix-publish-flake.yml
vendored
|
@ -1,36 +0,0 @@
|
||||||
# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
|
|
||||||
name: "Publish a flake to flakestry & flakehub"
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- "*"
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
tag:
|
|
||||||
description: "The existing tag to publish"
|
|
||||||
type: "string"
|
|
||||||
required: true
|
|
||||||
jobs:
|
|
||||||
flakestry-publish:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
id-token: "write"
|
|
||||||
contents: "read"
|
|
||||||
steps:
|
|
||||||
- uses: flakestry/flakestry-publish@main
|
|
||||||
with:
|
|
||||||
version: "${{ inputs.tag || github.ref_name }}"
|
|
||||||
flakehub-publish:
|
|
||||||
runs-on: "ubuntu-latest"
|
|
||||||
permissions:
|
|
||||||
id-token: "write"
|
|
||||||
contents: "read"
|
|
||||||
steps:
|
|
||||||
- uses: "actions/checkout@v4"
|
|
||||||
with:
|
|
||||||
ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
|
|
||||||
- uses: "DeterminateSystems/nix-installer-action@main"
|
|
||||||
- uses: "DeterminateSystems/flakehub-push@main"
|
|
||||||
with:
|
|
||||||
visibility: "public"
|
|
||||||
tag: "${{ inputs.tag }}"
|
|
|
@ -6,15 +6,13 @@ on:
|
||||||
- '.github/workflows/python-check-requirements.yml'
|
- '.github/workflows/python-check-requirements.yml'
|
||||||
- 'scripts/check-requirements.sh'
|
- 'scripts/check-requirements.sh'
|
||||||
- 'convert*.py'
|
- 'convert*.py'
|
||||||
- 'requirements.txt'
|
- '**/requirements*.txt'
|
||||||
- 'requirements/*.txt'
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- '.github/workflows/python-check-requirements.yml'
|
- '.github/workflows/python-check-requirements.yml'
|
||||||
- 'scripts/check-requirements.sh'
|
- 'scripts/check-requirements.sh'
|
||||||
- 'convert*.py'
|
- 'convert*.py'
|
||||||
- 'requirements.txt'
|
- '**/requirements*.txt'
|
||||||
- 'requirements/*.txt'
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
|
9
.github/workflows/python-lint.yml
vendored
9
.github/workflows/python-lint.yml
vendored
|
@ -1,6 +1,13 @@
|
||||||
name: flake8 Lint
|
name: flake8 Lint
|
||||||
|
|
||||||
on: [push, pull_request]
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
|
40
.github/workflows/python-type-check.yml
vendored
Normal file
40
.github/workflows/python-type-check.yml
vendored
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
name: Python Type-Check
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- '.github/workflows/python-type-check.yml'
|
||||||
|
- 'pyrightconfig.json'
|
||||||
|
- '**.py'
|
||||||
|
- '**/requirements*.txt'
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- '.github/workflows/python-type-check.yml'
|
||||||
|
- 'pyrightconfig.json'
|
||||||
|
- '**.py'
|
||||||
|
- '**/requirements*.txt'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
python-type-check:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
name: pyright type-check
|
||||||
|
steps:
|
||||||
|
- name: Check out source repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Set up Python environment
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
- name: Install Python dependencies
|
||||||
|
# TODO: use a venv
|
||||||
|
run: pip install -r requirements/requirements-all.txt
|
||||||
|
- name: Type-check with Pyright
|
||||||
|
uses: jakebailey/pyright-action@v2
|
||||||
|
with:
|
||||||
|
version: 1.1.382
|
||||||
|
level: warning
|
||||||
|
warnings: true
|
110
.github/workflows/server.yml
vendored
110
.github/workflows/server.yml
vendored
|
@ -20,6 +20,12 @@ on:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
||||||
|
|
||||||
|
env:
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
LLAMA_LOG_VERBOSITY: 10
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
@ -30,7 +36,7 @@ jobs:
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
|
||||||
build_type: [RelWithDebInfo]
|
build_type: [RelWithDebInfo]
|
||||||
include:
|
include:
|
||||||
- build_type: Release
|
- build_type: Release
|
||||||
|
@ -70,46 +76,108 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
pip install -r examples/server/tests/requirements.txt
|
pip install -r examples/server/tests/requirements.txt
|
||||||
|
|
||||||
- name: Verify server deps
|
# Setup nodejs (to be used for verifying bundled index.html)
|
||||||
id: verify_server_deps
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '22.11.0'
|
||||||
|
|
||||||
|
- name: WebUI - Install dependencies
|
||||||
|
id: webui_lint
|
||||||
|
run: |
|
||||||
|
cd examples/server/webui
|
||||||
|
npm ci
|
||||||
|
|
||||||
|
- name: WebUI - Check code format
|
||||||
|
id: webui_format
|
||||||
run: |
|
run: |
|
||||||
git config --global --add safe.directory $(realpath .)
|
git config --global --add safe.directory $(realpath .)
|
||||||
cd examples/server
|
cd examples/server/webui
|
||||||
git ls-files --others --modified
|
|
||||||
git status
|
git status
|
||||||
./deps.sh
|
|
||||||
|
npm run format
|
||||||
git status
|
git status
|
||||||
not_ignored_files="$(git ls-files --others --modified)"
|
modified_files="$(git status -s)"
|
||||||
echo "Modified files: ${not_ignored_files}"
|
echo "Modified files: ${modified_files}"
|
||||||
if [ -n "${not_ignored_files}" ]; then
|
if [ -n "${modified_files}" ]; then
|
||||||
echo "Repository is dirty or server deps are not built as expected"
|
echo "Files do not follow coding style. To fix: npm run format"
|
||||||
echo "${not_ignored_files}"
|
echo "${modified_files}"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Build
|
- name: Verify bundled index.html
|
||||||
id: cmake_build
|
id: verify_server_index_html
|
||||||
|
run: |
|
||||||
|
git config --global --add safe.directory $(realpath .)
|
||||||
|
cd examples/server/webui
|
||||||
|
git status
|
||||||
|
|
||||||
|
npm run build
|
||||||
|
git status
|
||||||
|
modified_files="$(git status -s)"
|
||||||
|
echo "Modified files: ${modified_files}"
|
||||||
|
if [ -n "${modified_files}" ]; then
|
||||||
|
echo "Repository is dirty or server/webui is not built as expected"
|
||||||
|
echo "Hint: You may need to follow Web UI build guide in server/README.md"
|
||||||
|
echo "${modified_files}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Build (no OpenMP)
|
||||||
|
id: cmake_build_no_openmp
|
||||||
|
if: ${{ matrix.sanitizer == 'THREAD' }}
|
||||||
run: |
|
run: |
|
||||||
cmake -B build \
|
cmake -B build \
|
||||||
-DLLAMA_NATIVE=OFF \
|
-DGGML_NATIVE=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
|
-DLLAMA_CURL=ON \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||||
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||||
|
-DGGML_OPENMP=OFF ;
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
|
- name: Build (sanitizers)
|
||||||
|
id: cmake_build_sanitizers
|
||||||
|
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DGGML_NATIVE=OFF \
|
||||||
-DLLAMA_BUILD_SERVER=ON \
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
-DLLAMA_CURL=ON \
|
-DLLAMA_CURL=ON \
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
||||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
|
- name: Build (sanitizers)
|
||||||
|
id: cmake_build
|
||||||
|
if: ${{ matrix.sanitizer == '' }}
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DGGML_NATIVE=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
|
-DLLAMA_CURL=ON \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
- name: Tests
|
- name: Tests
|
||||||
id: server_integration_tests
|
id: server_integration_tests
|
||||||
|
if: ${{ matrix.sanitizer == '' }}
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
PORT=8888 ./tests.sh
|
./tests.sh
|
||||||
|
|
||||||
|
- name: Tests (sanitizers)
|
||||||
|
id: server_integration_tests_sanitizers
|
||||||
|
if: ${{ matrix.sanitizer != '' }}
|
||||||
|
run: |
|
||||||
|
cd examples/server/tests
|
||||||
|
LLAMA_SANITIZE=1 ./tests.sh
|
||||||
|
|
||||||
- name: Slow tests
|
- name: Slow tests
|
||||||
id: server_integration_tests_slow
|
id: server_integration_tests_slow
|
||||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|
SLOW_TESTS=1 ./tests.sh
|
||||||
|
|
||||||
|
|
||||||
server-windows:
|
server-windows:
|
||||||
|
@ -136,7 +204,7 @@ jobs:
|
||||||
id: cmake_build
|
id: cmake_build
|
||||||
run: |
|
run: |
|
||||||
cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
|
cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
|
||||||
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
|
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
|
||||||
|
|
||||||
- name: Python setup
|
- name: Python setup
|
||||||
id: setup_python
|
id: setup_python
|
||||||
|
@ -159,11 +227,13 @@ jobs:
|
||||||
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
$env:PYTHONIOENCODING = ":replace"
|
||||||
|
pytest -v -x -m "not slow"
|
||||||
|
|
||||||
- name: Slow tests
|
- name: Slow tests
|
||||||
id: server_integration_tests_slow
|
id: server_integration_tests_slow
|
||||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
behave.exe --stop --no-skipped --no-capture --tags slow
|
$env:SLOW_TESTS = "1"
|
||||||
|
pytest -v -x
|
||||||
|
|
178
.gitignore
vendored
178
.gitignore
vendored
|
@ -1,129 +1,145 @@
|
||||||
*.o
|
# Extensions
|
||||||
|
|
||||||
*.a
|
*.a
|
||||||
*.so
|
*.bat
|
||||||
|
*.bin
|
||||||
|
*.d
|
||||||
|
*.dll
|
||||||
|
*.dot
|
||||||
|
*.etag
|
||||||
|
*.exe
|
||||||
|
*.gcda
|
||||||
|
*.gcno
|
||||||
|
*.gcov
|
||||||
*.gguf
|
*.gguf
|
||||||
*.gguf.json
|
*.gguf.json
|
||||||
*.bin
|
|
||||||
*.exe
|
|
||||||
*.dll
|
|
||||||
*.log
|
|
||||||
*.gcov
|
|
||||||
*.gcno
|
|
||||||
*.gcda
|
|
||||||
*.dot
|
|
||||||
*.bat
|
|
||||||
*.tmp
|
|
||||||
*.metallib
|
|
||||||
*.etag
|
|
||||||
*.lastModified
|
*.lastModified
|
||||||
.DS_Store
|
*.log
|
||||||
.build/
|
*.metallib
|
||||||
|
*.o
|
||||||
|
*.so
|
||||||
|
*.swp
|
||||||
|
*.tmp
|
||||||
|
|
||||||
|
# IDE / OS
|
||||||
|
|
||||||
.cache/
|
.cache/
|
||||||
.ccls-cache/
|
.ccls-cache/
|
||||||
.direnv/
|
.direnv/
|
||||||
|
.DS_Store
|
||||||
.envrc
|
.envrc
|
||||||
|
.idea/
|
||||||
.swiftpm
|
.swiftpm
|
||||||
.venv
|
|
||||||
.clang-tidy
|
|
||||||
.vs/
|
.vs/
|
||||||
.vscode/
|
.vscode/
|
||||||
.idea/
|
nppBackup
|
||||||
|
|
||||||
ggml-metal-embed.metal
|
|
||||||
|
|
||||||
lcov-report/
|
# Coverage
|
||||||
|
|
||||||
gcovr-report/
|
gcovr-report/
|
||||||
|
lcov-report/
|
||||||
|
|
||||||
|
# Build Artifacts
|
||||||
|
|
||||||
tags
|
tags
|
||||||
|
.build/
|
||||||
build*
|
build*
|
||||||
|
!build-info.cmake
|
||||||
|
!build-info.cpp.in
|
||||||
|
!build-info.sh
|
||||||
!build.zig
|
!build.zig
|
||||||
cmake-build-*
|
!docs/build.md
|
||||||
|
/libllama.so
|
||||||
|
/llama-*
|
||||||
|
/vulkan-shaders-gen
|
||||||
android-ndk-*
|
android-ndk-*
|
||||||
|
arm_neon.h
|
||||||
|
cmake-build-*
|
||||||
|
CMakeSettings.json
|
||||||
|
compile_commands.json
|
||||||
|
ggml-metal-embed.metal
|
||||||
|
llama-batched-swift
|
||||||
|
/rpc-server
|
||||||
out/
|
out/
|
||||||
tmp/
|
tmp/
|
||||||
|
autogen-*.md
|
||||||
|
|
||||||
|
# Deprecated
|
||||||
|
|
||||||
|
/main
|
||||||
|
/server
|
||||||
|
|
||||||
|
# CI
|
||||||
|
|
||||||
|
!.github/workflows/*.yml
|
||||||
|
|
||||||
|
# Models
|
||||||
|
|
||||||
models/*
|
models/*
|
||||||
models-mnt
|
models-mnt
|
||||||
|
!models/.editorconfig
|
||||||
|
!models/ggml-vocab-*.gguf*
|
||||||
|
|
||||||
/Pipfile
|
# Zig
|
||||||
/baby-llama
|
|
||||||
/beam-search
|
|
||||||
/benchmark-matmult
|
|
||||||
/convert-llama2c-to-ggml
|
|
||||||
/embd-input-test
|
|
||||||
/embedding
|
|
||||||
/eval-callback
|
|
||||||
/gguf
|
|
||||||
/gguf-llama-simple
|
|
||||||
/gguf-split
|
|
||||||
/gritlm
|
|
||||||
/imatrix
|
|
||||||
/infill
|
|
||||||
/libllama.so
|
|
||||||
/llama-bench
|
|
||||||
/llava-cli
|
|
||||||
/lookahead
|
|
||||||
/lookup
|
|
||||||
/lookup-create
|
|
||||||
/lookup-merge
|
|
||||||
/lookup-stats
|
|
||||||
/main
|
|
||||||
/metal
|
|
||||||
/passkey
|
|
||||||
/perplexity
|
|
||||||
/q8dot
|
|
||||||
/quantize
|
|
||||||
/quantize-stats
|
|
||||||
/result
|
|
||||||
/save-load-state
|
|
||||||
/server
|
|
||||||
/simple
|
|
||||||
/batched
|
|
||||||
/batched-bench
|
|
||||||
/export-lora
|
|
||||||
/finetune
|
|
||||||
/retrieval
|
|
||||||
/speculative
|
|
||||||
/parallel
|
|
||||||
/train-text-from-scratch
|
|
||||||
/tokenize
|
|
||||||
/vdot
|
|
||||||
/common/build-info.cpp
|
|
||||||
arm_neon.h
|
|
||||||
compile_commands.json
|
|
||||||
CMakeSettings.json
|
|
||||||
|
|
||||||
__pycache__
|
|
||||||
dist
|
|
||||||
|
|
||||||
zig-out/
|
zig-out/
|
||||||
zig-cache/
|
zig-cache/
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
|
||||||
ppl-*.txt
|
ppl-*.txt
|
||||||
qnt-*.txt
|
qnt-*.txt
|
||||||
perf-*.txt
|
perf-*.txt
|
||||||
|
|
||||||
|
# Examples
|
||||||
|
|
||||||
examples/jeopardy/results.txt
|
examples/jeopardy/results.txt
|
||||||
|
examples/server/*.css.hpp
|
||||||
examples/server/*.html.hpp
|
examples/server/*.html.hpp
|
||||||
examples/server/*.js.hpp
|
examples/server/*.js.hpp
|
||||||
examples/server/*.mjs.hpp
|
examples/server/*.mjs.hpp
|
||||||
examples/server/*.css.hpp
|
!build_64.sh
|
||||||
|
!examples/*.bat
|
||||||
|
!examples/*/*.kts
|
||||||
|
!examples/*/*/*.kts
|
||||||
|
!examples/sycl/*.bat
|
||||||
|
!examples/sycl/*.sh
|
||||||
|
|
||||||
poetry.lock
|
# Server Web UI temporary files
|
||||||
|
node_modules
|
||||||
|
examples/server/webui/dist
|
||||||
|
|
||||||
|
# Python
|
||||||
|
|
||||||
|
/.venv
|
||||||
|
__pycache__/
|
||||||
|
*/poetry.lock
|
||||||
poetry.toml
|
poetry.toml
|
||||||
nppBackup
|
|
||||||
|
# Nix
|
||||||
|
/result
|
||||||
|
|
||||||
# Test binaries
|
# Test binaries
|
||||||
/tests/test-grammar-parser
|
/tests/test-backend-ops
|
||||||
/tests/test-llama-grammar
|
|
||||||
/tests/test-double-float
|
/tests/test-double-float
|
||||||
/tests/test-grad0
|
/tests/test-grad0
|
||||||
|
/tests/test-grammar-parser
|
||||||
|
/tests/test-llama-grammar
|
||||||
/tests/test-opt
|
/tests/test-opt
|
||||||
/tests/test-quantize-fns
|
/tests/test-quantize-fns
|
||||||
/tests/test-quantize-perf
|
/tests/test-quantize-perf
|
||||||
|
/tests/test-rope
|
||||||
/tests/test-sampling
|
/tests/test-sampling
|
||||||
/tests/test-tokenizer-0
|
/tests/test-tokenizer-0
|
||||||
/tests/test-tokenizer-1-spm
|
|
||||||
/tests/test-tokenizer-1-bpe
|
/tests/test-tokenizer-1-bpe
|
||||||
/tests/test-rope
|
/tests/test-tokenizer-1-spm
|
||||||
/tests/test-backend-ops
|
|
||||||
|
# Scripts
|
||||||
|
!/scripts/install-oneapi.bat
|
||||||
|
|
||||||
|
# Test models for lora adapters
|
||||||
|
/lora-tests
|
||||||
|
|
||||||
|
# Local scripts
|
||||||
|
/run-vim.sh
|
||||||
|
/run-chat.sh
|
||||||
|
|
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -1,3 +1,3 @@
|
||||||
[submodule "kompute"]
|
[submodule "kompute"]
|
||||||
path = kompute
|
path = ggml/src/ggml-kompute/kompute
|
||||||
url = https://github.com/nomic-ai/kompute.git
|
url = https://github.com/nomic-ai/kompute.git
|
||||||
|
|
1408
CMakeLists.txt
1408
CMakeLists.txt
File diff suppressed because it is too large
Load diff
|
@ -11,15 +11,37 @@
|
||||||
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "sycl-base",
|
||||||
|
"hidden": true,
|
||||||
|
"generator": "Ninja",
|
||||||
|
"binaryDir": "${sourceDir}/build-${presetName}",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
|
||||||
|
"CMAKE_CXX_COMPILER": "icx",
|
||||||
|
"CMAKE_C_COMPILER": "cl",
|
||||||
|
"GGML_SYCL": "ON",
|
||||||
|
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
|
||||||
|
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
|
||||||
|
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
|
||||||
|
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
|
||||||
|
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
|
||||||
|
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
|
||||||
|
|
||||||
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
|
{
|
||||||
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
|
"name": "x64-windows-llvm", "hidden": true,
|
||||||
{ "name": "static", "hidden": true, "cacheVariables": { "LLAMA_STATIC": "ON" } },
|
"cacheVariables": {
|
||||||
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "arm64-windows-msvc", "hidden": true,
|
"name": "arm64-windows-msvc", "hidden": true,
|
||||||
"architecture": { "value": "arm64", "strategy": "external" },
|
"architecture": { "value": "arm64", "strategy": "external" },
|
||||||
"toolset": { "value": "host=x86_64", "strategy": "external" },
|
"toolset": { "value": "host=x64", "strategy": "external" },
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
|
||||||
}
|
}
|
||||||
|
@ -27,23 +49,49 @@
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "arm64-windows-llvm", "hidden": true,
|
"name": "arm64-windows-llvm", "hidden": true,
|
||||||
"architecture": { "value": "arm64", "strategy": "external" },
|
"architecture": { "value": "arm64", "strategy": "external" },
|
||||||
"toolset": { "value": "host=x86_64", "strategy": "external" },
|
"toolset": { "value": "host=x64", "strategy": "external" },
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
|
{
|
||||||
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "release" ] },
|
"name": "arm64-apple-clang", "hidden": true,
|
||||||
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "release", "static" ] },
|
"architecture": { "value": "arm64", "strategy": "external" },
|
||||||
|
"toolset": { "value": "host=x64", "strategy": "external" },
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
|
{ "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
|
||||||
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "release" ] },
|
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
|
||||||
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "release", "static" ] },
|
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
|
||||||
|
|
||||||
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
|
{ "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] },
|
||||||
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "release" ] },
|
{ "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
|
||||||
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "release", "static" ] }
|
{ "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
|
||||||
|
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
|
||||||
|
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] },
|
||||||
|
{ "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] },
|
||||||
|
{ "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] },
|
||||||
|
{ "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
|
||||||
|
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
|
||||||
|
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] },
|
||||||
|
{ "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
|
||||||
|
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
|
||||||
|
{ "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
|
||||||
|
|
||||||
|
{ "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] },
|
||||||
|
{ "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] }
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
11
CODEOWNERS
Normal file
11
CODEOWNERS
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
|
||||||
|
|
||||||
|
/ci/ @ggerganov
|
||||||
|
/.devops/*.Dockerfile @ngxson
|
||||||
|
/examples/server/ @ngxson
|
||||||
|
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
|
||||||
|
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
|
||||||
|
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
|
||||||
|
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
|
||||||
|
/ggml/src/ggml-opt.cpp @JohannesGaessler
|
||||||
|
/ggml/src/gguf.cpp @JohannesGaessler
|
131
CONTRIBUTING.md
131
CONTRIBUTING.md
|
@ -1,14 +1,125 @@
|
||||||
# Contributing Guidelines
|
# Pull requests (for contributors)
|
||||||
|
|
||||||
## Checklist
|
- Test your changes:
|
||||||
|
- Execute [the full CI locally on your machine](ci/README.md) before publishing
|
||||||
|
- Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`)
|
||||||
|
- If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends)
|
||||||
|
- If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops`
|
||||||
|
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
|
||||||
|
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
|
||||||
|
|
||||||
* Make sure your PR follows the [coding guidelines](https://github.com/ggerganov/llama.cpp/blob/master/README.md#coding-guidelines)
|
# Pull requests (for collaborators)
|
||||||
* Test your changes using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
|
|
||||||
* Execute [the full CI locally on your machine](ci/README.md) before publishing
|
|
||||||
|
|
||||||
## PR formatting
|
- Squash-merge PRs
|
||||||
|
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
|
||||||
|
- Optionally pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
|
||||||
|
- Consider adding yourself to [CODEOWNERS](CODEOWNERS)
|
||||||
|
|
||||||
* Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
|
# Coding guidelines
|
||||||
- The PR template has a series of review complexity checkboxes `[ ]` that you can mark as `[X]` for your conveience. Refer to [About task lists](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) for more information.
|
|
||||||
* If the pull request only contains documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times.
|
- Avoid adding third-party dependencies, extra files, extra headers, etc.
|
||||||
* When squashing multiple commits on merge, use the following format for your commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : Fix typo in utils.py (#1234)`
|
- Always consider cross-compatibility with other operating systems and architectures
|
||||||
|
- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
|
||||||
|
- Vertical alignment makes things more readable and easier to batch edit
|
||||||
|
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
|
||||||
|
- Use sized integer types such as `int32_t` in the public API, e.g. `size_t` may also be appropriate for allocation sizes or byte offsets
|
||||||
|
- Declare structs with `struct foo {}` instead of `typedef struct foo {} foo`
|
||||||
|
- In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary
|
||||||
|
```cpp
|
||||||
|
// OK
|
||||||
|
llama_context * ctx;
|
||||||
|
const llama_rope_type rope_type;
|
||||||
|
|
||||||
|
// not OK
|
||||||
|
struct llama_context * ctx;
|
||||||
|
const enum llama_rope_type rope_type;
|
||||||
|
```
|
||||||
|
|
||||||
|
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_
|
||||||
|
|
||||||
|
- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code
|
||||||
|
- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines)
|
||||||
|
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
|
||||||
|
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
# Naming guidelines
|
||||||
|
|
||||||
|
- Use `snake_case` for function, variable and type names
|
||||||
|
- Naming usually optimizes for longest common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
// not OK
|
||||||
|
int small_number;
|
||||||
|
int big_number;
|
||||||
|
|
||||||
|
// OK
|
||||||
|
int number_small;
|
||||||
|
int number_big;
|
||||||
|
```
|
||||||
|
|
||||||
|
- Enum values are always in upper case and prefixed with the enum name
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
enum llama_vocab_type {
|
||||||
|
LLAMA_VOCAB_TYPE_NONE = 0,
|
||||||
|
LLAMA_VOCAB_TYPE_SPM = 1,
|
||||||
|
LLAMA_VOCAB_TYPE_BPE = 2,
|
||||||
|
LLAMA_VOCAB_TYPE_WPM = 3,
|
||||||
|
LLAMA_VOCAB_TYPE_UGM = 4,
|
||||||
|
LLAMA_VOCAB_TYPE_RWKV = 5,
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
- The general naming pattern is `<class>_<method>`, with `<method>` being `<action>_<noun>`
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
llama_model_init(); // class: "llama_model", method: "init"
|
||||||
|
llama_sampler_chain_remove(); // class: "llama_sampler_chain", method: "remove"
|
||||||
|
llama_sampler_get_seed(); // class: "llama_sampler", method: "get_seed"
|
||||||
|
llama_set_embeddings(); // class: "llama_context", method: "set_embeddings"
|
||||||
|
llama_n_threads(); // class: "llama_context", method: "n_threads"
|
||||||
|
llama_adapter_lora_free(); // class: "llama_adapter_lora", method: "free"
|
||||||
|
```
|
||||||
|
|
||||||
|
- The `get` `<action>` can be omitted
|
||||||
|
- The `<noun>` can be omitted if not necessary
|
||||||
|
- The `_context` suffix of the `<class>` is optional. Use it to disambiguate symbols when needed
|
||||||
|
- Use `init`/`free` for constructor/destructor `<action>`
|
||||||
|
|
||||||
|
- Use the `_t` suffix when a type is supposed to be opaque to the user - it's not relevant to them if it is a struct or anything else
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
typedef struct llama_context * llama_context_t;
|
||||||
|
|
||||||
|
enum llama_pooling_type llama_pooling_type(const llama_context_t ctx);
|
||||||
|
```
|
||||||
|
|
||||||
|
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline)_
|
||||||
|
|
||||||
|
- C/C++ filenames are all lowercase with dashes. Headers use the `.h` extension. Source files use the `.c` or `.cpp` extension
|
||||||
|
- Python filenames are all lowercase with underscores
|
||||||
|
|
||||||
|
- _(TODO: abbreviations usage)_
|
||||||
|
|
||||||
|
# Preprocessor directives
|
||||||
|
|
||||||
|
- _(TODO: add guidelines with examples and apply them to the codebase)_
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
#ifdef FOO
|
||||||
|
#endif // FOO
|
||||||
|
```
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
|
||||||
|
- Documentation is a community effort
|
||||||
|
- When you need to look into the source code to figure out how to use an API consider adding a short summary to the header file for future reference
|
||||||
|
- When you notice incorrect or outdated documentation, please update it
|
||||||
|
|
||||||
|
# Resources
|
||||||
|
|
||||||
|
The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects:
|
||||||
|
|
||||||
|
https://github.com/ggerganov/llama.cpp/projects
|
||||||
|
|
|
@ -2,45 +2,6 @@
|
||||||
|
|
||||||
import PackageDescription
|
import PackageDescription
|
||||||
|
|
||||||
var sources = [
|
|
||||||
"ggml.c",
|
|
||||||
"sgemm.cpp",
|
|
||||||
"llama.cpp",
|
|
||||||
"unicode.cpp",
|
|
||||||
"unicode-data.cpp",
|
|
||||||
"ggml-alloc.c",
|
|
||||||
"ggml-backend.c",
|
|
||||||
"ggml-quants.c",
|
|
||||||
]
|
|
||||||
|
|
||||||
var resources: [Resource] = []
|
|
||||||
var linkerSettings: [LinkerSetting] = []
|
|
||||||
var cSettings: [CSetting] = [
|
|
||||||
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
|
||||||
.unsafeFlags(["-fno-objc-arc"]),
|
|
||||||
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
|
||||||
// We should consider add this in the future when we drop support for iOS 14
|
|
||||||
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
|
||||||
// .define("ACCELERATE_NEW_LAPACK"),
|
|
||||||
// .define("ACCELERATE_LAPACK_ILP64")
|
|
||||||
]
|
|
||||||
|
|
||||||
#if canImport(Darwin)
|
|
||||||
sources.append("ggml-metal.m")
|
|
||||||
resources.append(.process("ggml-metal.metal"))
|
|
||||||
linkerSettings.append(.linkedFramework("Accelerate"))
|
|
||||||
cSettings.append(
|
|
||||||
contentsOf: [
|
|
||||||
.define("GGML_USE_ACCELERATE"),
|
|
||||||
.define("GGML_USE_METAL")
|
|
||||||
]
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if os(Linux)
|
|
||||||
cSettings.append(.define("_GNU_SOURCE"))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
let package = Package(
|
let package = Package(
|
||||||
name: "llama",
|
name: "llama",
|
||||||
platforms: [
|
platforms: [
|
||||||
|
@ -53,26 +14,6 @@ let package = Package(
|
||||||
.library(name: "llama", targets: ["llama"]),
|
.library(name: "llama", targets: ["llama"]),
|
||||||
],
|
],
|
||||||
targets: [
|
targets: [
|
||||||
.target(
|
.systemLibrary(name: "llama", pkgConfig: "llama"),
|
||||||
name: "llama",
|
]
|
||||||
path: ".",
|
|
||||||
exclude: [
|
|
||||||
"cmake",
|
|
||||||
"examples",
|
|
||||||
"scripts",
|
|
||||||
"models",
|
|
||||||
"tests",
|
|
||||||
"CMakeLists.txt",
|
|
||||||
"ggml-cuda.cu",
|
|
||||||
"ggml-cuda.h",
|
|
||||||
"Makefile"
|
|
||||||
],
|
|
||||||
sources: sources,
|
|
||||||
resources: resources,
|
|
||||||
publicHeadersPath: "spm-headers",
|
|
||||||
cSettings: cSettings,
|
|
||||||
linkerSettings: linkerSettings
|
|
||||||
)
|
|
||||||
],
|
|
||||||
cxxLanguageStandard: .cxx11
|
|
||||||
)
|
)
|
||||||
|
|
4
Sources/llama/llama.h
Normal file
4
Sources/llama/llama.h
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <llama.h>
|
||||||
|
|
5
Sources/llama/module.modulemap
Normal file
5
Sources/llama/module.modulemap
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
module llama [system] {
|
||||||
|
header "llama.h"
|
||||||
|
link "llama"
|
||||||
|
export *
|
||||||
|
}
|
363
ci/run.sh
363
ci/run.sh
|
@ -1,4 +1,4 @@
|
||||||
#/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# sample usage:
|
# sample usage:
|
||||||
#
|
#
|
||||||
|
@ -13,6 +13,9 @@
|
||||||
# # with SYCL support
|
# # with SYCL support
|
||||||
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
#
|
#
|
||||||
|
# # with VULKAN support
|
||||||
|
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
|
#
|
||||||
|
|
||||||
if [ -z "$2" ]; then
|
if [ -z "$2" ]; then
|
||||||
echo "usage: $0 <output-dir> <mnt-dir>"
|
echo "usage: $0 <output-dir> <mnt-dir>"
|
||||||
|
@ -36,11 +39,11 @@ SRC=`pwd`
|
||||||
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON"
|
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON"
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_METAL} ]; then
|
if [ ! -z ${GG_BUILD_METAL} ]; then
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_METAL_SHADER_DEBUG=ON"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_CUDA=1"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||||
|
@ -50,7 +53,11 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_SYCL=1 DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -z ${GG_BUILD_VULKAN} ]; then
|
||||||
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
|
||||||
fi
|
fi
|
||||||
## helpers
|
## helpers
|
||||||
|
|
||||||
|
@ -103,8 +110,11 @@ function gg_run_ctest_debug {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# Check cmake, make and ctest are installed
|
||||||
|
gg_check_build_requirements
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
||||||
|
|
||||||
|
@ -131,8 +141,11 @@ function gg_run_ctest_release {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# Check cmake, make and ctest are installed
|
||||||
|
gg_check_build_requirements
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
||||||
|
@ -260,7 +273,6 @@ function gg_sum_ctest_with_model_release {
|
||||||
}
|
}
|
||||||
|
|
||||||
# open_llama_7b_v2
|
# open_llama_7b_v2
|
||||||
# requires: GG_BUILD_CUDA
|
|
||||||
|
|
||||||
function gg_run_open_llama_7b_v2 {
|
function gg_run_open_llama_7b_v2 {
|
||||||
cd ${SRC}
|
cd ${SRC}
|
||||||
|
@ -284,10 +296,10 @@ function gg_run_open_llama_7b_v2 {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DLLAMA_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../examples/convert-legacy-llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||||
|
@ -303,47 +315,47 @@ function gg_run_open_llama_7b_v2 {
|
||||||
|
|
||||||
wiki_test="${path_wiki}/wiki.test.raw"
|
wiki_test="${path_wiki}/wiki.test.raw"
|
||||||
|
|
||||||
./bin/quantize ${model_f16} ${model_q8_0} q8_0
|
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_0} q4_0
|
./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_1} q4_1
|
./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1
|
||||||
./bin/quantize ${model_f16} ${model_q5_0} q5_0
|
./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0
|
||||||
./bin/quantize ${model_f16} ${model_q5_1} q5_1
|
./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1
|
||||||
./bin/quantize ${model_f16} ${model_q2_k} q2_k
|
./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k
|
||||||
./bin/quantize ${model_f16} ${model_q3_k} q3_k
|
./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k
|
||||||
./bin/quantize ${model_f16} ${model_q4_k} q4_k
|
./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k
|
||||||
./bin/quantize ${model_f16} ${model_q5_k} q5_k
|
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||||
./bin/quantize ${model_f16} ${model_q6_k} q6_k
|
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||||
|
|
||||||
(time ./bin/main --model ${model_f16} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/main --model ${model_q8_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/main --model ${model_q4_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/main --model ${model_q4_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/main --model ${model_q5_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/main --model ${model_q5_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/main --model ${model_q2_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/main --model ${model_q3_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/main --model ${model_q4_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/main --model ${model_q5_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/main --model ${model_q6_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||||
|
|
||||||
(time ./bin/save-load-state -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/save-load-state -fa -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/save-load-state -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/save-load-state -fa -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
|
||||||
function check_ppl {
|
function check_ppl {
|
||||||
qnt="$1"
|
qnt="$1"
|
||||||
|
@ -419,9 +431,9 @@ function gg_run_pythia_1_4b {
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||||
|
@ -437,45 +449,45 @@ function gg_run_pythia_1_4b {
|
||||||
|
|
||||||
wiki_test_60="${path_wiki}/wiki.test-60.raw"
|
wiki_test_60="${path_wiki}/wiki.test-60.raw"
|
||||||
|
|
||||||
./bin/quantize ${model_f16} ${model_q8_0} q8_0
|
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_0} q4_0
|
./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_1} q4_1
|
./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1
|
||||||
./bin/quantize ${model_f16} ${model_q5_0} q5_0
|
./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0
|
||||||
./bin/quantize ${model_f16} ${model_q5_1} q5_1
|
./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1
|
||||||
./bin/quantize ${model_f16} ${model_q2_k} q2_k
|
./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k
|
||||||
./bin/quantize ${model_f16} ${model_q3_k} q3_k
|
./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k
|
||||||
./bin/quantize ${model_f16} ${model_q4_k} q4_k
|
./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k
|
||||||
./bin/quantize ${model_f16} ${model_q5_k} q5_k
|
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||||
./bin/quantize ${model_f16} ${model_q6_k} q6_k
|
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||||
|
|
||||||
(time ./bin/main --model ${model_f16} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/main --model ${model_q8_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/main --model ${model_q4_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/main --model ${model_q4_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/main --model ${model_q5_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/main --model ${model_q5_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/main --model ${model_q2_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/main --model ${model_q3_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/main --model ${model_q4_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/main --model ${model_q5_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/main --model ${model_q6_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/imatrix --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||||
|
|
||||||
(time ./bin/save-load-state --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/save-load-state -fa --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
|
||||||
function check_ppl {
|
function check_ppl {
|
||||||
qnt="$1"
|
qnt="$1"
|
||||||
|
@ -529,7 +541,6 @@ function gg_sum_pythia_1_4b {
|
||||||
}
|
}
|
||||||
|
|
||||||
# pythia_2_8b
|
# pythia_2_8b
|
||||||
# requires: GG_BUILD_CUDA
|
|
||||||
|
|
||||||
function gg_run_pythia_2_8b {
|
function gg_run_pythia_2_8b {
|
||||||
cd ${SRC}
|
cd ${SRC}
|
||||||
|
@ -550,10 +561,10 @@ function gg_run_pythia_2_8b {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DLLAMA_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||||
|
@ -569,47 +580,47 @@ function gg_run_pythia_2_8b {
|
||||||
|
|
||||||
wiki_test="${path_wiki}/wiki.test.raw"
|
wiki_test="${path_wiki}/wiki.test.raw"
|
||||||
|
|
||||||
./bin/quantize ${model_f16} ${model_q8_0} q8_0
|
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_0} q4_0
|
./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_1} q4_1
|
./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1
|
||||||
./bin/quantize ${model_f16} ${model_q5_0} q5_0
|
./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0
|
||||||
./bin/quantize ${model_f16} ${model_q5_1} q5_1
|
./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1
|
||||||
./bin/quantize ${model_f16} ${model_q2_k} q2_k
|
./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k
|
||||||
./bin/quantize ${model_f16} ${model_q3_k} q3_k
|
./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k
|
||||||
./bin/quantize ${model_f16} ${model_q4_k} q4_k
|
./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k
|
||||||
./bin/quantize ${model_f16} ${model_q5_k} q5_k
|
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||||
./bin/quantize ${model_f16} ${model_q6_k} q6_k
|
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||||
|
|
||||||
(time ./bin/main --model ${model_f16} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/main --model ${model_q8_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/main --model ${model_q4_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/main --model ${model_q4_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/main --model ${model_q5_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/main --model ${model_q5_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/main --model ${model_q2_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/main --model ${model_q3_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/main --model ${model_q4_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/main --model ${model_q5_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/main --model ${model_q6_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||||
|
|
||||||
(time ./bin/save-load-state -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/save-load-state -fa -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/save-load-state -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/save-load-state -fa -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
|
||||||
function check_ppl {
|
function check_ppl {
|
||||||
qnt="$1"
|
qnt="$1"
|
||||||
|
@ -686,17 +697,17 @@ function gg_run_embd_bge_small {
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||||
|
|
||||||
./bin/quantize ${model_f16} ${model_q8_0} q8_0
|
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
|
|
||||||
(time ./bin/embedding --model ${model_f16} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/embedding --model ${model_q8_0} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
}
|
}
|
||||||
|
@ -710,8 +721,92 @@ function gg_sum_embd_bge_small {
|
||||||
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# rerank_tiny
|
||||||
|
|
||||||
|
function gg_run_rerank_tiny {
|
||||||
|
cd ${SRC}
|
||||||
|
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer_config.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/special_tokens_map.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/resolve/main/pytorch_model.bin
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/sentence_bert_config.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/vocab.txt
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/modules.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
|
||||||
|
|
||||||
|
gg_wget models-mnt/rerank-tiny/1_Pooling https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/1_Pooling/config.json
|
||||||
|
|
||||||
|
path_models="../models-mnt/rerank-tiny"
|
||||||
|
|
||||||
|
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
|
|
||||||
|
# for this model, the SEP token is "</s>"
|
||||||
|
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
|
||||||
|
|
||||||
|
# sample output
|
||||||
|
# rerank score 0: 0.029
|
||||||
|
# rerank score 1: 0.029
|
||||||
|
# rerank score 2: 0.135
|
||||||
|
|
||||||
|
# check that the score is in the range [$3, $4]
|
||||||
|
function check_score {
|
||||||
|
qnt="$1"
|
||||||
|
score=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
||||||
|
|
||||||
|
if [ $(echo "$score < $3" | bc) -eq 1 ] || [ $(echo "$score > $4" | bc) -eq 1 ]; then
|
||||||
|
printf ' - %s @ %s (FAIL: score not in range [%s, %s])\n' "$qnt" "$score" "$3" "$4"
|
||||||
|
return 20
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf ' - %s @ %s OK\n' "$qnt" "$score"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
||||||
|
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
||||||
|
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
|
||||||
|
|
||||||
|
set +e
|
||||||
|
}
|
||||||
|
|
||||||
|
function gg_sum_rerank_tiny {
|
||||||
|
gg_printf '### %s\n\n' "${ci}"
|
||||||
|
|
||||||
|
gg_printf 'Rerank Tiny (Jina):\n'
|
||||||
|
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||||
|
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-rk-f16.log)"
|
||||||
|
}
|
||||||
|
|
||||||
|
function gg_check_build_requirements {
|
||||||
|
if ! command -v cmake &> /dev/null; then
|
||||||
|
gg_printf 'cmake not found, please install'
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v make &> /dev/null; then
|
||||||
|
gg_printf 'make not found, please install'
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v ctest &> /dev/null; then
|
||||||
|
gg_printf 'ctest not found, please install'
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
## main
|
## main
|
||||||
|
|
||||||
|
export LLAMA_LOG_PREFIX=1
|
||||||
|
export LLAMA_LOG_TIMESTAMPS=1
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
|
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
|
||||||
rm -rf ${SRC}/models-mnt
|
rm -rf ${SRC}/models-mnt
|
||||||
|
@ -720,7 +815,10 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
ln -sfn ${mnt_models} ${SRC}/models-mnt
|
ln -sfn ${mnt_models} ${SRC}/models-mnt
|
||||||
|
|
||||||
# Create a fresh python3 venv and enter it
|
# Create a fresh python3 venv and enter it
|
||||||
python3 -m venv "$MNT/venv"
|
if ! python3 -m venv "$MNT/venv"; then
|
||||||
|
echo "Error: Failed to create Python virtual environment at $MNT/venv."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
source "$MNT/venv/bin/activate"
|
source "$MNT/venv/bin/activate"
|
||||||
|
|
||||||
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
|
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
|
||||||
|
@ -734,6 +832,7 @@ test $ret -eq 0 && gg_run ctest_release
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
test $ret -eq 0 && gg_run embd_bge_small
|
test $ret -eq 0 && gg_run embd_bge_small
|
||||||
|
test $ret -eq 0 && gg_run rerank_tiny
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
|
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
|
||||||
test $ret -eq 0 && gg_run test_scripts_debug
|
test $ret -eq 0 && gg_run test_scripts_debug
|
||||||
|
@ -741,7 +840,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
||||||
if [ -z ${GG_BUILD_CUDA} ]; then
|
if [ -z ${GG_BUILD_CUDA} ] && [ -z ${GG_BUILD_VULKAN} ]; then
|
||||||
test $ret -eq 0 && gg_run pythia_1_4b
|
test $ret -eq 0 && gg_run pythia_1_4b
|
||||||
else
|
else
|
||||||
test $ret -eq 0 && gg_run pythia_2_8b
|
test $ret -eq 0 && gg_run pythia_2_8b
|
||||||
|
|
16
cmake/arm64-apple-clang.cmake
Normal file
16
cmake/arm64-apple-clang.cmake
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
set( CMAKE_SYSTEM_NAME Darwin )
|
||||||
|
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
||||||
|
|
||||||
|
set( target arm64-apple-darwin-macho )
|
||||||
|
|
||||||
|
set( CMAKE_C_COMPILER clang )
|
||||||
|
set( CMAKE_CXX_COMPILER clang++ )
|
||||||
|
|
||||||
|
set( CMAKE_C_COMPILER_TARGET ${target} )
|
||||||
|
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
||||||
|
|
||||||
|
set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
|
||||||
|
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
|
||||||
|
|
||||||
|
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
||||||
|
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
|
@ -44,7 +44,7 @@ if(MSVC)
|
||||||
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
||||||
else()
|
else()
|
||||||
execute_process(
|
execute_process(
|
||||||
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
|
COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
|
||||||
OUTPUT_VARIABLE OUT
|
OUTPUT_VARIABLE OUT
|
||||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
)
|
)
|
33
cmake/common.cmake
Normal file
33
cmake/common.cmake
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
function(llama_add_compile_flags)
|
||||||
|
if (LLAMA_FATAL_WARNINGS)
|
||||||
|
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||||
|
list(APPEND C_FLAGS -Werror)
|
||||||
|
list(APPEND CXX_FLAGS -Werror)
|
||||||
|
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||||
|
add_compile_options(/WX)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (LLAMA_ALL_WARNINGS)
|
||||||
|
if (NOT MSVC)
|
||||||
|
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
||||||
|
-Werror=implicit-int -Werror=implicit-function-declaration)
|
||||||
|
|
||||||
|
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
||||||
|
|
||||||
|
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||||
|
|
||||||
|
list(APPEND C_FLAGS ${WARNING_FLAGS})
|
||||||
|
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
|
||||||
|
|
||||||
|
ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
||||||
|
|
||||||
|
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
|
||||||
|
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
|
||||||
|
else()
|
||||||
|
# todo : msvc
|
||||||
|
set(C_FLAGS "" PARENT_SCOPE)
|
||||||
|
set(CXX_FLAGS "" PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endfunction()
|
22
cmake/git-vars.cmake
Normal file
22
cmake/git-vars.cmake
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
find_package(Git)
|
||||||
|
|
||||||
|
# the commit's SHA1
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_SHA1
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
|
# the date of the commit
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_DATE
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
|
# the subject of the commit
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" log -1 --format=%s
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
30
cmake/llama-config.cmake.in
Normal file
30
cmake/llama-config.cmake.in
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@)
|
||||||
|
set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
||||||
|
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
||||||
|
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
||||||
|
|
||||||
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
|
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
||||||
|
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
||||||
|
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
||||||
|
|
||||||
|
find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake)
|
||||||
|
|
||||||
|
find_library(llama_LIBRARY llama
|
||||||
|
REQUIRED
|
||||||
|
HINTS ${LLAMA_LIB_DIR}
|
||||||
|
NO_CMAKE_FIND_ROOT_PATH
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(llama UNKNOWN IMPORTED)
|
||||||
|
set_target_properties(llama
|
||||||
|
PROPERTIES
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
||||||
|
INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
|
||||||
|
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||||
|
IMPORTED_LOCATION "${llama_LIBRARY}"
|
||||||
|
INTERFACE_COMPILE_FEATURES c_std_90
|
||||||
|
POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
check_required_components(Llama)
|
|
@ -1,10 +1,10 @@
|
||||||
prefix=@CMAKE_INSTALL_PREFIX@
|
prefix=@CMAKE_INSTALL_PREFIX@
|
||||||
exec_prefix=${prefix}
|
exec_prefix=@CMAKE_INSTALL_PREFIX@
|
||||||
libdir=${exec_prefix}/lib
|
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||||
includedir=${prefix}/include
|
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||||
|
|
||||||
Name: llama
|
Name: llama
|
||||||
Description: Port of Facebook's LLaMA model in C/C++
|
Description: Port of Facebook's LLaMA model in C/C++
|
||||||
Version: @PROJECT_VERSION@
|
Version: @LLAMA_INSTALL_VERSION@
|
||||||
Libs: -L${libdir} -lllama
|
Libs: -L${libdir} -lggml -lggml-base -lllama
|
||||||
Cflags: -I${includedir}
|
Cflags: -I${includedir}
|
||||||
|
|
11
cmake/x64-windows-llvm.cmake
Normal file
11
cmake/x64-windows-llvm.cmake
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
set( CMAKE_SYSTEM_NAME Windows )
|
||||||
|
set( CMAKE_SYSTEM_PROCESSOR x86_64 )
|
||||||
|
|
||||||
|
set( CMAKE_C_COMPILER clang )
|
||||||
|
set( CMAKE_CXX_COMPILER clang++ )
|
||||||
|
|
||||||
|
set( arch_c_flags "-march=native" )
|
||||||
|
|
||||||
|
set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" )
|
||||||
|
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )
|
||||||
|
|
14
codecov.yml
14
codecov.yml
|
@ -1,14 +0,0 @@
|
||||||
comment: off
|
|
||||||
|
|
||||||
coverage:
|
|
||||||
status:
|
|
||||||
project:
|
|
||||||
default:
|
|
||||||
target: auto
|
|
||||||
threshold: 0
|
|
||||||
base: auto
|
|
||||||
patch:
|
|
||||||
default:
|
|
||||||
target: auto
|
|
||||||
threshold: 0
|
|
||||||
base: auto
|
|
|
@ -1,5 +1,8 @@
|
||||||
# common
|
# common
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
llama_add_compile_flags()
|
||||||
|
|
||||||
# Build info header
|
# Build info header
|
||||||
#
|
#
|
||||||
|
@ -36,7 +39,7 @@ add_custom_command(
|
||||||
COMMENT "Generating build details from Git"
|
COMMENT "Generating build details from Git"
|
||||||
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
|
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
|
||||||
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
|
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
|
||||||
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/gen-build-info-cpp.cmake"
|
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
|
||||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
|
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
|
||||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
|
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
|
||||||
VERBATIM
|
VERBATIM
|
||||||
|
@ -50,21 +53,28 @@ endif()
|
||||||
set(TARGET common)
|
set(TARGET common)
|
||||||
|
|
||||||
add_library(${TARGET} STATIC
|
add_library(${TARGET} STATIC
|
||||||
|
arg.cpp
|
||||||
|
arg.h
|
||||||
base64.hpp
|
base64.hpp
|
||||||
common.h
|
chat.cpp
|
||||||
|
chat.hpp
|
||||||
|
chat-template.hpp
|
||||||
common.cpp
|
common.cpp
|
||||||
sampling.h
|
common.h
|
||||||
sampling.cpp
|
|
||||||
console.h
|
|
||||||
console.cpp
|
console.cpp
|
||||||
grammar-parser.h
|
console.h
|
||||||
grammar-parser.cpp
|
|
||||||
json.hpp
|
|
||||||
json-schema-to-grammar.cpp
|
json-schema-to-grammar.cpp
|
||||||
train.h
|
json.hpp
|
||||||
train.cpp
|
llguidance.cpp
|
||||||
ngram-cache.h
|
log.cpp
|
||||||
|
log.h
|
||||||
|
minja.hpp
|
||||||
ngram-cache.cpp
|
ngram-cache.cpp
|
||||||
|
ngram-cache.h
|
||||||
|
sampling.cpp
|
||||||
|
sampling.h
|
||||||
|
speculative.cpp
|
||||||
|
speculative.h
|
||||||
)
|
)
|
||||||
|
|
||||||
if (BUILD_SHARED_LIBS)
|
if (BUILD_SHARED_LIBS)
|
||||||
|
@ -76,12 +86,39 @@ set(LLAMA_COMMON_EXTRA_LIBS build_info)
|
||||||
# Use curl to download model url
|
# Use curl to download model url
|
||||||
if (LLAMA_CURL)
|
if (LLAMA_CURL)
|
||||||
find_package(CURL REQUIRED)
|
find_package(CURL REQUIRED)
|
||||||
add_definitions(-DLLAMA_USE_CURL)
|
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
||||||
include_directories(${CURL_INCLUDE_DIRS})
|
include_directories(${CURL_INCLUDE_DIRS})
|
||||||
find_library(CURL_LIBRARY curl REQUIRED)
|
find_library(CURL_LIBRARY curl REQUIRED)
|
||||||
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (LLAMA_LLGUIDANCE)
|
||||||
|
include(ExternalProject)
|
||||||
|
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
|
||||||
|
set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)
|
||||||
|
ExternalProject_Add(llguidance_ext
|
||||||
|
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
|
||||||
|
# v0.6.12:
|
||||||
|
GIT_TAG ced1c9023d47ec194fa977932d35ce65c2ebfc09
|
||||||
|
PREFIX ${CMAKE_BINARY_DIR}/llguidance
|
||||||
|
SOURCE_DIR ${LLGUIDANCE_SRC}
|
||||||
|
BUILD_IN_SOURCE TRUE
|
||||||
|
CONFIGURE_COMMAND ""
|
||||||
|
BUILD_COMMAND cargo build --release
|
||||||
|
INSTALL_COMMAND ""
|
||||||
|
BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/libllguidance.a ${LLGUIDANCE_PATH}/llguidance.h
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
)
|
||||||
|
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE)
|
||||||
|
|
||||||
|
add_library(llguidance STATIC IMPORTED)
|
||||||
|
set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/libllguidance.a)
|
||||||
|
add_dependencies(llguidance llguidance_ext)
|
||||||
|
|
||||||
|
target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
|
||||||
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance)
|
||||||
|
endif ()
|
||||||
|
|
||||||
target_include_directories(${TARGET} PUBLIC .)
|
target_include_directories(${TARGET} PUBLIC .)
|
||||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
|
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
||||||
target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
||||||
|
|
2370
common/arg.cpp
Normal file
2370
common/arg.cpp
Normal file
File diff suppressed because it is too large
Load diff
80
common/arg.h
Normal file
80
common/arg.h
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
//
|
||||||
|
// CLI argument parsing
|
||||||
|
//
|
||||||
|
|
||||||
|
struct common_arg {
|
||||||
|
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
||||||
|
std::set<enum llama_example> excludes = {};
|
||||||
|
std::vector<const char *> args;
|
||||||
|
const char * value_hint = nullptr; // help text or example for arg value
|
||||||
|
const char * value_hint_2 = nullptr; // for second arg value
|
||||||
|
const char * env = nullptr;
|
||||||
|
std::string help;
|
||||||
|
bool is_sparam = false; // is current arg a sampling param?
|
||||||
|
void (*handler_void) (common_params & params) = nullptr;
|
||||||
|
void (*handler_string) (common_params & params, const std::string &) = nullptr;
|
||||||
|
void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr;
|
||||||
|
void (*handler_int) (common_params & params, int) = nullptr;
|
||||||
|
|
||||||
|
common_arg(
|
||||||
|
const std::initializer_list<const char *> & args,
|
||||||
|
const char * value_hint,
|
||||||
|
const std::string & help,
|
||||||
|
void (*handler)(common_params & params, const std::string &)
|
||||||
|
) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
|
||||||
|
|
||||||
|
common_arg(
|
||||||
|
const std::initializer_list<const char *> & args,
|
||||||
|
const char * value_hint,
|
||||||
|
const std::string & help,
|
||||||
|
void (*handler)(common_params & params, int)
|
||||||
|
) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
|
||||||
|
|
||||||
|
common_arg(
|
||||||
|
const std::initializer_list<const char *> & args,
|
||||||
|
const std::string & help,
|
||||||
|
void (*handler)(common_params & params)
|
||||||
|
) : args(args), help(help), handler_void(handler) {}
|
||||||
|
|
||||||
|
// support 2 values for arg
|
||||||
|
common_arg(
|
||||||
|
const std::initializer_list<const char *> & args,
|
||||||
|
const char * value_hint,
|
||||||
|
const char * value_hint_2,
|
||||||
|
const std::string & help,
|
||||||
|
void (*handler)(common_params & params, const std::string &, const std::string &)
|
||||||
|
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
|
||||||
|
|
||||||
|
common_arg & set_examples(std::initializer_list<enum llama_example> examples);
|
||||||
|
common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
|
||||||
|
common_arg & set_env(const char * env);
|
||||||
|
common_arg & set_sparam();
|
||||||
|
bool in_example(enum llama_example ex);
|
||||||
|
bool is_exclude(enum llama_example ex);
|
||||||
|
bool get_value_from_env(std::string & output);
|
||||||
|
bool has_value_from_env();
|
||||||
|
std::string to_string();
|
||||||
|
};
|
||||||
|
|
||||||
|
struct common_params_context {
|
||||||
|
enum llama_example ex = LLAMA_EXAMPLE_COMMON;
|
||||||
|
common_params & params;
|
||||||
|
std::vector<common_arg> options;
|
||||||
|
void(*print_usage)(int, char **) = nullptr;
|
||||||
|
common_params_context(common_params & params) : params(params) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// parse input arguments from CLI
|
||||||
|
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
||||||
|
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
||||||
|
|
||||||
|
// function to be used by test-arg-parser
|
||||||
|
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
529
common/chat-template.hpp
Normal file
529
common/chat-template.hpp
Normal file
|
@ -0,0 +1,529 @@
|
||||||
|
/*
|
||||||
|
Copyright 2024 Google LLC
|
||||||
|
|
||||||
|
Use of this source code is governed by an MIT-style
|
||||||
|
license that can be found in the LICENSE file or at
|
||||||
|
https://opensource.org/licenses/MIT.
|
||||||
|
*/
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "minja.hpp"
|
||||||
|
#include <json.hpp>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
|
namespace minja {
|
||||||
|
|
||||||
|
struct chat_template_caps {
|
||||||
|
bool supports_tools = false;
|
||||||
|
bool supports_tool_calls = false;
|
||||||
|
bool supports_tool_responses = false;
|
||||||
|
bool supports_system_role = false;
|
||||||
|
bool supports_parallel_tool_calls = false;
|
||||||
|
bool supports_tool_call_id = false;
|
||||||
|
// meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object.
|
||||||
|
// Most other templates (and OpenAI's API) expect the arguments object to be stringified.
|
||||||
|
bool requires_object_arguments = false;
|
||||||
|
// CohereForAI/c4ai-command-r-plus simple variant
|
||||||
|
bool requires_non_null_content = false;
|
||||||
|
// MiniMaxAI/MiniMax-Text-01 special
|
||||||
|
bool requires_typed_content = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct chat_template_inputs {
|
||||||
|
nlohmann::ordered_json messages;
|
||||||
|
nlohmann::ordered_json tools;
|
||||||
|
bool add_generation_prompt = true;
|
||||||
|
nlohmann::ordered_json extra_context;
|
||||||
|
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
||||||
|
};
|
||||||
|
|
||||||
|
struct chat_template_options {
|
||||||
|
bool apply_polyfills = true;
|
||||||
|
bool use_bos_token = true;
|
||||||
|
bool use_eos_token = true;
|
||||||
|
bool define_strftime_now = true;
|
||||||
|
|
||||||
|
bool polyfill_tools = true;
|
||||||
|
bool polyfill_tool_call_examples = true;
|
||||||
|
bool polyfill_tool_calls = true;
|
||||||
|
bool polyfill_tool_responses = true;
|
||||||
|
bool polyfill_system_role = true;
|
||||||
|
bool polyfill_object_arguments = true;
|
||||||
|
bool polyfill_typed_content = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
class chat_template {
|
||||||
|
|
||||||
|
private:
|
||||||
|
chat_template_caps caps_;
|
||||||
|
std::string source_;
|
||||||
|
std::string bos_token_;
|
||||||
|
std::string eos_token_;
|
||||||
|
std::shared_ptr<minja::TemplateNode> template_root_;
|
||||||
|
std::string tool_call_example_;
|
||||||
|
|
||||||
|
std::string try_raw_render(
|
||||||
|
const nlohmann::ordered_json & messages,
|
||||||
|
const nlohmann::ordered_json & tools,
|
||||||
|
bool add_generation_prompt,
|
||||||
|
const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
chat_template_inputs inputs;
|
||||||
|
inputs.messages = messages;
|
||||||
|
inputs.tools = tools;
|
||||||
|
inputs.add_generation_prompt = add_generation_prompt;
|
||||||
|
inputs.extra_context = extra_context;
|
||||||
|
// Use fixed date for tests
|
||||||
|
inputs.now = std::chrono::system_clock::from_time_t(0);
|
||||||
|
|
||||||
|
chat_template_options opts;
|
||||||
|
opts.apply_polyfills = false;
|
||||||
|
|
||||||
|
auto prompt = apply(inputs, opts);
|
||||||
|
// fprintf(stderr, "try_raw_render: %s\n", prompt.c_str());
|
||||||
|
return prompt;
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
// fprintf(stderr, "try_raw_render error: %s\n", e.what());
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token)
|
||||||
|
: source_(source), bos_token_(bos_token), eos_token_(eos_token)
|
||||||
|
{
|
||||||
|
template_root_ = minja::Parser::parse(source_, {
|
||||||
|
/* .trim_blocks = */ true,
|
||||||
|
/* .lstrip_blocks = */ true,
|
||||||
|
/* .keep_trailing_newline = */ false,
|
||||||
|
});
|
||||||
|
|
||||||
|
auto contains = [](const std::string & haystack, const std::string & needle) {
|
||||||
|
return haystack.find(needle) != std::string::npos;
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::string user_needle = "<User Needle>";
|
||||||
|
const std::string sys_needle = "<System Needle>";
|
||||||
|
const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}};
|
||||||
|
const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}};
|
||||||
|
|
||||||
|
caps_.requires_typed_content =
|
||||||
|
!contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle)
|
||||||
|
&& contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle);
|
||||||
|
|
||||||
|
const auto dummy_user_msg = caps_.requires_typed_content
|
||||||
|
? dummy_typed_user_msg
|
||||||
|
: dummy_str_user_msg;
|
||||||
|
const json needle_system_msg = {
|
||||||
|
{"role", "system"},
|
||||||
|
{"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)},
|
||||||
|
};
|
||||||
|
|
||||||
|
caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle);
|
||||||
|
|
||||||
|
auto out = try_raw_render(json::array({
|
||||||
|
dummy_user_msg
|
||||||
|
}), json::array({
|
||||||
|
{
|
||||||
|
{"name", "some_tool"},
|
||||||
|
{"type", "function"},
|
||||||
|
{"function", {
|
||||||
|
{"name", "some_tool"},
|
||||||
|
{"description", "Some tool."},
|
||||||
|
{"parameters", {
|
||||||
|
{"type", "object"},
|
||||||
|
{"properties", {
|
||||||
|
{"arg", {
|
||||||
|
{"type", "string"},
|
||||||
|
{"description", "Some argument."},
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
{"required", json::array({ "arg" })},
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
}), false);
|
||||||
|
caps_.supports_tools = contains(out, "some_tool");
|
||||||
|
|
||||||
|
auto make_tool_calls_msg = [&](const json & tool_calls) {
|
||||||
|
return json {
|
||||||
|
{"role", "assistant"},
|
||||||
|
{"content", nullptr},
|
||||||
|
{"tool_calls", tool_calls},
|
||||||
|
};
|
||||||
|
};
|
||||||
|
auto make_tool_call = [](const std::string & tool_name, const json & arguments) {
|
||||||
|
return json {
|
||||||
|
{"id", "call_1___"},
|
||||||
|
{"type", "function"},
|
||||||
|
{"function", {
|
||||||
|
{"arguments", arguments},
|
||||||
|
{"name", tool_name},
|
||||||
|
}},
|
||||||
|
};
|
||||||
|
};
|
||||||
|
const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}};
|
||||||
|
|
||||||
|
// Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want.
|
||||||
|
out = try_raw_render(json::array({
|
||||||
|
dummy_user_msg,
|
||||||
|
make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})),
|
||||||
|
}), {}, false);
|
||||||
|
auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
|
||||||
|
out = try_raw_render(json::array({
|
||||||
|
dummy_user_msg,
|
||||||
|
make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})),
|
||||||
|
}), {}, false);
|
||||||
|
auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
|
||||||
|
|
||||||
|
caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments;
|
||||||
|
caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments;
|
||||||
|
auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false);
|
||||||
|
auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false);
|
||||||
|
caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle);
|
||||||
|
|
||||||
|
if (caps_.supports_tool_calls) {
|
||||||
|
auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump());
|
||||||
|
auto tc1 = make_tool_call("test_tool1", dummy_args);
|
||||||
|
auto tc2 = make_tool_call("test_tool2", dummy_args);
|
||||||
|
auto out = try_raw_render(json::array({
|
||||||
|
dummy_user_msg,
|
||||||
|
make_tool_calls_msg(json::array({tc1, tc2})),
|
||||||
|
}), {}, false);
|
||||||
|
caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2");
|
||||||
|
|
||||||
|
out = try_raw_render(json::array({
|
||||||
|
dummy_user_msg,
|
||||||
|
make_tool_calls_msg(json::array({tc1})),
|
||||||
|
{
|
||||||
|
{"role", "tool"},
|
||||||
|
{"name", "test_tool1"},
|
||||||
|
{"content", "Some response!"},
|
||||||
|
{"tool_call_id", "call_911_"},
|
||||||
|
}
|
||||||
|
}), {}, false);
|
||||||
|
caps_.supports_tool_responses = contains(out, "Some response!");
|
||||||
|
caps_.supports_tool_call_id = contains(out, "call_911_");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!caps_.supports_tools) {
|
||||||
|
const json user_msg {
|
||||||
|
{"role", "user"},
|
||||||
|
{"content", "Hey"},
|
||||||
|
};
|
||||||
|
const json args {
|
||||||
|
{"arg1", "some_value"},
|
||||||
|
};
|
||||||
|
const json tool_call_msg {
|
||||||
|
{"role", "assistant"},
|
||||||
|
{"content", nullptr},
|
||||||
|
{"tool_calls", json::array({
|
||||||
|
{
|
||||||
|
// TODO: detect if requires numerical id or fixed length == 6 like Nemo
|
||||||
|
{"id", "call_1___"},
|
||||||
|
{"type", "function"},
|
||||||
|
{"function", {
|
||||||
|
{"name", "tool_name"},
|
||||||
|
{"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
})},
|
||||||
|
};
|
||||||
|
std::string prefix, full;
|
||||||
|
{
|
||||||
|
chat_template_inputs inputs;
|
||||||
|
inputs.messages = json::array({user_msg});
|
||||||
|
inputs.add_generation_prompt = true;
|
||||||
|
prefix = apply(inputs);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
chat_template_inputs inputs;
|
||||||
|
inputs.messages = json::array({user_msg, tool_call_msg});
|
||||||
|
inputs.add_generation_prompt = false;
|
||||||
|
full = apply(inputs);
|
||||||
|
}
|
||||||
|
auto eos_pos_last = full.rfind(eos_token_);
|
||||||
|
if (eos_pos_last == prefix.size() - eos_token_.size() ||
|
||||||
|
(full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
|
||||||
|
full = full.substr(0, eos_pos_last);
|
||||||
|
}
|
||||||
|
size_t common_prefix_length = 0;
|
||||||
|
for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
|
||||||
|
if (prefix[i] != full[i]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (prefix[i] == '<') {
|
||||||
|
// DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
|
||||||
|
// but it removes thinking tags for past messages.
|
||||||
|
// The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
common_prefix_length = i + 1;
|
||||||
|
}
|
||||||
|
auto example = full.substr(common_prefix_length);
|
||||||
|
if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
|
||||||
|
fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
|
||||||
|
} else {
|
||||||
|
tool_call_example_ = example;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string & source() const { return source_; }
|
||||||
|
const std::string & bos_token() const { return bos_token_; }
|
||||||
|
const std::string & eos_token() const { return eos_token_; }
|
||||||
|
const chat_template_caps & original_caps() const { return caps_; }
|
||||||
|
|
||||||
|
// Deprecated, please use the form with chat_template_inputs and chat_template_options
|
||||||
|
std::string apply(
|
||||||
|
const nlohmann::ordered_json & messages,
|
||||||
|
const nlohmann::ordered_json & tools,
|
||||||
|
bool add_generation_prompt,
|
||||||
|
const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(),
|
||||||
|
bool apply_polyfills = true)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "[%s] Deprecated!\n", __func__);
|
||||||
|
chat_template_inputs inputs;
|
||||||
|
inputs.messages = messages;
|
||||||
|
inputs.tools = tools;
|
||||||
|
inputs.add_generation_prompt = add_generation_prompt;
|
||||||
|
inputs.extra_context = extra_context;
|
||||||
|
inputs.now = std::chrono::system_clock::now();
|
||||||
|
|
||||||
|
chat_template_options opts;
|
||||||
|
opts.apply_polyfills = apply_polyfills;
|
||||||
|
|
||||||
|
return apply(inputs, opts);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string apply(
|
||||||
|
const chat_template_inputs & inputs,
|
||||||
|
const chat_template_options & opts = chat_template_options()) const
|
||||||
|
{
|
||||||
|
json actual_messages;
|
||||||
|
|
||||||
|
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||||
|
auto has_tool_calls = false;
|
||||||
|
auto has_tool_responses = false;
|
||||||
|
auto has_string_content = false;
|
||||||
|
for (const auto & message : inputs.messages) {
|
||||||
|
if (message.contains("tool_calls") && !message["tool_calls"].is_null()) {
|
||||||
|
has_tool_calls = true;
|
||||||
|
}
|
||||||
|
if (message.contains("role") && message["role"] == "tool") {
|
||||||
|
has_tool_responses = true;
|
||||||
|
}
|
||||||
|
if (message.contains("content") && message["content"].is_string()) {
|
||||||
|
has_string_content = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role;
|
||||||
|
auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools;
|
||||||
|
auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples;
|
||||||
|
auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls;
|
||||||
|
auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses;
|
||||||
|
auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments;
|
||||||
|
auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content;
|
||||||
|
|
||||||
|
auto needs_polyfills = opts.apply_polyfills && (false
|
||||||
|
|| polyfill_system_role
|
||||||
|
|| polyfill_tools
|
||||||
|
|| polyfill_tool_calls
|
||||||
|
|| polyfill_tool_responses
|
||||||
|
|| polyfill_object_arguments
|
||||||
|
|| polyfill_typed_content
|
||||||
|
);
|
||||||
|
|
||||||
|
if (needs_polyfills) {
|
||||||
|
actual_messages = json::array();
|
||||||
|
|
||||||
|
auto add_message = [&](const json & msg) {
|
||||||
|
if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) {
|
||||||
|
actual_messages.push_back({
|
||||||
|
{"role", msg.at("role")},
|
||||||
|
{"content", {{
|
||||||
|
{"type", "text"},
|
||||||
|
{"text", msg.at("content")},
|
||||||
|
}}},
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
actual_messages.push_back(msg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string pending_system;
|
||||||
|
auto flush_sys = [&]() {
|
||||||
|
if (!pending_system.empty()) {
|
||||||
|
add_message({
|
||||||
|
{"role", "user"},
|
||||||
|
{"content", pending_system},
|
||||||
|
});
|
||||||
|
pending_system.clear();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
json adjusted_messages;
|
||||||
|
if (polyfill_tools) {
|
||||||
|
adjusted_messages = add_system(inputs.messages,
|
||||||
|
"You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
|
||||||
|
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
|
||||||
|
} else {
|
||||||
|
adjusted_messages = inputs.messages;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto & message_ : adjusted_messages) {
|
||||||
|
auto message = message_;
|
||||||
|
if (!message.contains("role") || !message.contains("content")) {
|
||||||
|
throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
|
||||||
|
}
|
||||||
|
std::string role = message.at("role");
|
||||||
|
|
||||||
|
if (message.contains("tool_calls")) {
|
||||||
|
if (polyfill_object_arguments || polyfill_tool_calls) {
|
||||||
|
for (auto & tool_call : message.at("tool_calls")) {
|
||||||
|
if (tool_call["type"] == "function") {
|
||||||
|
auto & function = tool_call.at("function");
|
||||||
|
auto & arguments = function.at("arguments");
|
||||||
|
if (arguments.is_string()) {
|
||||||
|
try {
|
||||||
|
arguments = json::parse(arguments.get<std::string>());
|
||||||
|
} catch (const std::exception & ecvt) {
|
||||||
|
fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (polyfill_tool_calls) {
|
||||||
|
auto content = message.at("content");
|
||||||
|
auto tool_calls = json::array();
|
||||||
|
for (const auto & tool_call : message.at("tool_calls")) {
|
||||||
|
if (tool_call.at("type") != "function") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto & function = tool_call.at("function");
|
||||||
|
auto tc = json {
|
||||||
|
{"name", function.at("name")},
|
||||||
|
{"arguments", function.at("arguments")},
|
||||||
|
};
|
||||||
|
if (tool_call.contains("id")) {
|
||||||
|
tc["id"] = tool_call["id"];
|
||||||
|
}
|
||||||
|
tool_calls.push_back(tc);
|
||||||
|
}
|
||||||
|
auto obj = json {
|
||||||
|
{"tool_calls", tool_calls},
|
||||||
|
};
|
||||||
|
if (!content.is_null() && content != "") {
|
||||||
|
obj["content"] = content;
|
||||||
|
}
|
||||||
|
message["content"] = obj.dump(2);
|
||||||
|
message.erase("tool_calls");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (polyfill_tool_responses && role == "tool") {
|
||||||
|
message["role"] = "user";
|
||||||
|
auto obj = json {
|
||||||
|
{"tool_response", {
|
||||||
|
{"content", message.at("content")},
|
||||||
|
}},
|
||||||
|
};
|
||||||
|
if (message.contains("name")) {
|
||||||
|
obj["tool_response"]["name"] = message.at("name");
|
||||||
|
}
|
||||||
|
if (message.contains("tool_call_id")) {
|
||||||
|
obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
|
||||||
|
}
|
||||||
|
message["content"] = obj.dump(2);
|
||||||
|
message.erase("name");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!message["content"].is_null() && polyfill_system_role) {
|
||||||
|
std::string content = message.at("content");
|
||||||
|
if (role == "system") {
|
||||||
|
if (!pending_system.empty()) pending_system += "\n";
|
||||||
|
pending_system += content;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
if (role == "user") {
|
||||||
|
if (!pending_system.empty()) {
|
||||||
|
message["content"] = pending_system + (content.empty() ? "" : "\n" + content);
|
||||||
|
pending_system.clear();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
flush_sys();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
add_message(message);
|
||||||
|
}
|
||||||
|
flush_sys();
|
||||||
|
} else {
|
||||||
|
actual_messages = inputs.messages;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto context = minja::Context::make(json({
|
||||||
|
{"messages", actual_messages},
|
||||||
|
{"add_generation_prompt", inputs.add_generation_prompt},
|
||||||
|
}));
|
||||||
|
context->set("bos_token", opts.use_bos_token ? bos_token_ : "");
|
||||||
|
context->set("eos_token", opts.use_eos_token ? eos_token_ : "");
|
||||||
|
if (opts.define_strftime_now) {
|
||||||
|
auto now = inputs.now;
|
||||||
|
context->set("strftime_now", Value::callable([now](const std::shared_ptr<minja::Context> &, minja::ArgumentsValue & args) {
|
||||||
|
args.expectArgs("strftime_now", {1, 1}, {0, 0});
|
||||||
|
auto format = args.args[0].get<std::string>();
|
||||||
|
|
||||||
|
auto time = std::chrono::system_clock::to_time_t(now);
|
||||||
|
auto local_time = *std::localtime(&time);
|
||||||
|
std::ostringstream ss;
|
||||||
|
ss << std::put_time(&local_time, format.c_str());
|
||||||
|
return ss.str();
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
if (!inputs.tools.is_null()) {
|
||||||
|
context->set("tools", minja::Value(inputs.tools));
|
||||||
|
}
|
||||||
|
if (!inputs.extra_context.is_null()) {
|
||||||
|
for (auto & kv : inputs.extra_context.items()) {
|
||||||
|
context->set(kv.key(), minja::Value(kv.value()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ret = template_root_->render(context);
|
||||||
|
// fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str());
|
||||||
|
// fprintf(stderr, "apply: %s\n\n", ret.c_str());
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
|
||||||
|
json messages_with_system = messages;
|
||||||
|
|
||||||
|
if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
|
||||||
|
std::string existing_system = messages_with_system.at(0).at("content");
|
||||||
|
messages_with_system[0] = json {
|
||||||
|
{"role", "system"},
|
||||||
|
{"content", existing_system + "\n\n" + system_prompt},
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
messages_with_system.insert(messages_with_system.begin(), json {
|
||||||
|
{"role", "system"},
|
||||||
|
{"content", system_prompt},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return messages_with_system;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace minja
|
966
common/chat.cpp
Normal file
966
common/chat.cpp
Normal file
|
@ -0,0 +1,966 @@
|
||||||
|
#include "chat.hpp"
|
||||||
|
#include "chat-template.hpp"
|
||||||
|
#include "json-schema-to-grammar.h"
|
||||||
|
#include "log.h"
|
||||||
|
#include "minja.hpp"
|
||||||
|
|
||||||
|
std::string common_chat_format_name(common_chat_format format) {
|
||||||
|
switch (format) {
|
||||||
|
case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
|
||||||
|
case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
|
||||||
|
case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
|
||||||
|
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
|
||||||
|
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
|
||||||
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
|
||||||
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
||||||
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
||||||
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
||||||
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
||||||
|
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
||||||
|
default:
|
||||||
|
throw std::runtime_error("Unknown chat format");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const common_grammar_options grammar_options {
|
||||||
|
/* .dotall = */ false,
|
||||||
|
/* .compact_spaces = */ false,
|
||||||
|
// /* .compact_spaces = */ true,
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool parse_json(std::string::const_iterator & it, const std::string::const_iterator & end, json & out) {
|
||||||
|
// // https://json.nlohmann.me/features/parsing/sax_interface/
|
||||||
|
struct json_error_locator : public nlohmann::json_sax<json> {
|
||||||
|
std::size_t position;
|
||||||
|
bool found_error;
|
||||||
|
|
||||||
|
json_error_locator() : position(0), found_error(false) {}
|
||||||
|
|
||||||
|
bool parse_error(std::size_t position, const std::string &, const json::exception &) override {
|
||||||
|
this->position = position - 1;
|
||||||
|
this->found_error = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool null() override { return true; }
|
||||||
|
bool boolean(bool) override { return true; }
|
||||||
|
bool number_integer(number_integer_t) override { return true; }
|
||||||
|
bool number_unsigned(number_unsigned_t) override { return true; }
|
||||||
|
bool number_float(number_float_t, const string_t &) override { return true; }
|
||||||
|
bool string(string_t &) override { return true; }
|
||||||
|
bool binary(binary_t &) override { return true; }
|
||||||
|
bool start_object(std::size_t) override { return true; }
|
||||||
|
bool key(string_t &) override { return true; }
|
||||||
|
bool end_object() override { return true; }
|
||||||
|
bool start_array(std::size_t) override { return true; }
|
||||||
|
bool end_array() override { return true; }
|
||||||
|
};
|
||||||
|
json_error_locator err_loc;
|
||||||
|
json::sax_parse(it, end, &err_loc);
|
||||||
|
|
||||||
|
std::string::const_iterator temptative_end;
|
||||||
|
if (err_loc.found_error) {
|
||||||
|
temptative_end = it + err_loc.position;
|
||||||
|
} else {
|
||||||
|
temptative_end = end;
|
||||||
|
}
|
||||||
|
std::string json_sub {it, temptative_end};
|
||||||
|
try {
|
||||||
|
out = json::parse(json_sub);
|
||||||
|
it = temptative_end;
|
||||||
|
return true;
|
||||||
|
} catch (const std::exception &) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
|
||||||
|
* Aggregates the prefix, suffix and in-between text into the content.
|
||||||
|
*/
|
||||||
|
static common_chat_msg parse_json_tool_calls(
|
||||||
|
const std::string& input,
|
||||||
|
const std::optional<std::regex> & trigger_opt,
|
||||||
|
const std::regex & function_regex,
|
||||||
|
const std::regex & close_regex) {
|
||||||
|
std::smatch match;
|
||||||
|
|
||||||
|
common_chat_msg result;
|
||||||
|
result.role = "assistant";
|
||||||
|
|
||||||
|
|
||||||
|
auto end = input.end();
|
||||||
|
auto it = input.begin();
|
||||||
|
|
||||||
|
if (trigger_opt) {
|
||||||
|
if (!std::regex_search(it, end, match, *trigger_opt)) {
|
||||||
|
result.content = input;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
result.content = match.prefix().str();
|
||||||
|
it = match.suffix().first;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (it != end) {
|
||||||
|
std::sregex_iterator rend;
|
||||||
|
std::sregex_iterator rit(it, end, function_regex);
|
||||||
|
if (rit == rend) {
|
||||||
|
fprintf(stderr, "No more tool calls found\n");
|
||||||
|
result.content += std::string(it, end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
auto name = rit->str(1);
|
||||||
|
result.content += std::string(it, rit->prefix().second);
|
||||||
|
it = rit->suffix().first;
|
||||||
|
|
||||||
|
json arguments;
|
||||||
|
if (!parse_json(it, end, arguments)) {
|
||||||
|
throw std::runtime_error("Failed to parse json tool call arguments");
|
||||||
|
}
|
||||||
|
if (!std::regex_search(it, end, match, close_regex)) {
|
||||||
|
throw std::runtime_error("Malformed input, missing closing pattern");
|
||||||
|
}
|
||||||
|
it = match.suffix().first;
|
||||||
|
result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) {
|
||||||
|
auto content_end = input.find(prefix);
|
||||||
|
size_t tc_start = std::string::npos;
|
||||||
|
|
||||||
|
common_chat_msg result;
|
||||||
|
result.role = "assistant";
|
||||||
|
const auto process_tool_calls = [&](const json & tool_calls) {
|
||||||
|
for (const auto & tool_call : tool_calls) {
|
||||||
|
const auto & arguments = tool_call["arguments"];
|
||||||
|
result.tool_calls.push_back({
|
||||||
|
tool_call["name"],
|
||||||
|
arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
|
||||||
|
tool_call.contains("id") ? tool_call["id"] : "",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if (content_end == std::string::npos) {
|
||||||
|
result.content = input;
|
||||||
|
} else {
|
||||||
|
tc_start = content_end + prefix.size() - rstrip_prefix;
|
||||||
|
result.content = input.substr(0, content_end);
|
||||||
|
auto tool_calls = json::parse(input.substr(tc_start));
|
||||||
|
process_tool_calls(tool_calls);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
|
||||||
|
for (const auto & tool : tools) {
|
||||||
|
if (!tool.contains("type") || tool["type"] != "function" || !tool.contains("function")) {
|
||||||
|
LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
fn(tool);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string apply(
|
||||||
|
const common_chat_template & tmpl,
|
||||||
|
const nlohmann::ordered_json & messages,
|
||||||
|
const nlohmann::ordered_json & tools,
|
||||||
|
bool add_generation_prompt,
|
||||||
|
const nlohmann::ordered_json & extra_context = nlohmann::ordered_json())
|
||||||
|
{
|
||||||
|
minja::chat_template_inputs tmpl_inputs;
|
||||||
|
tmpl_inputs.messages = messages;
|
||||||
|
tmpl_inputs.tools = tools;
|
||||||
|
tmpl_inputs.add_generation_prompt = add_generation_prompt;
|
||||||
|
tmpl_inputs.extra_context = extra_context;
|
||||||
|
// TODO: add flag to control date/time, if only for testing purposes.
|
||||||
|
// tmpl_inputs.now = std::chrono::system_clock::now();
|
||||||
|
|
||||||
|
minja::chat_template_options tmpl_opts;
|
||||||
|
tmpl_opts.use_bos_token = false;
|
||||||
|
tmpl_opts.use_eos_token = false;
|
||||||
|
|
||||||
|
return tmpl.apply(tmpl_inputs, tmpl_opts);
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
common_chat_params data;
|
||||||
|
|
||||||
|
auto tool_call_schemas = json::array();
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool["function"];
|
||||||
|
auto tool_schema = json {
|
||||||
|
{"type", "object"},
|
||||||
|
{"properties", {
|
||||||
|
{"name", {
|
||||||
|
{"type", "string"},
|
||||||
|
{"const", function["name"]},
|
||||||
|
}},
|
||||||
|
{"arguments", function["parameters"]},
|
||||||
|
}},
|
||||||
|
{"required", json::array({"name", "arguments"})},
|
||||||
|
};
|
||||||
|
if (function.contains("description")) {
|
||||||
|
tool_schema["description"] = function["description"];
|
||||||
|
}
|
||||||
|
if (inputs.parallel_tool_calls) {
|
||||||
|
tool_schema["properties"]["id"] = {
|
||||||
|
{"type", "string"},
|
||||||
|
{"minLength", 4},
|
||||||
|
};
|
||||||
|
tool_schema["required"].push_back("id");
|
||||||
|
}
|
||||||
|
tool_call_schemas.emplace_back(tool_schema);
|
||||||
|
});
|
||||||
|
const auto tool_call =
|
||||||
|
inputs.parallel_tool_calls
|
||||||
|
? json {
|
||||||
|
{"type", "object"},
|
||||||
|
{"properties", {
|
||||||
|
{"tool_calls", {
|
||||||
|
{"type", "array"},
|
||||||
|
{"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
|
||||||
|
{"anyOf", tool_call_schemas},
|
||||||
|
}},
|
||||||
|
{"minItems", 1},
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
{"required", json::array({"tool_calls"})},
|
||||||
|
}
|
||||||
|
: json {
|
||||||
|
{"type", "object"},
|
||||||
|
{"properties", {
|
||||||
|
{"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
|
||||||
|
{"anyOf", tool_call_schemas},
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
{"required", json::array({"tool_call"})},
|
||||||
|
};
|
||||||
|
const auto schema =
|
||||||
|
inputs.tool_choice != "required"
|
||||||
|
? json {
|
||||||
|
{"anyOf", json::array({
|
||||||
|
tool_call,
|
||||||
|
{
|
||||||
|
{"type", "object"},
|
||||||
|
{"properties", {
|
||||||
|
{"response", inputs.json_schema.is_null()
|
||||||
|
? json {{"type", "string"}}
|
||||||
|
: inputs.json_schema
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
{"required", json::array({"response"})},
|
||||||
|
},
|
||||||
|
})}
|
||||||
|
}
|
||||||
|
: tool_call;
|
||||||
|
|
||||||
|
data.grammar_lazy = false;
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
builder.add_schema("root", schema);
|
||||||
|
}, grammar_options);
|
||||||
|
|
||||||
|
auto tweaked_messages = common_chat_template::add_system(
|
||||||
|
inputs.messages,
|
||||||
|
"Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
|
||||||
|
|
||||||
|
data.prompt = apply(tmpl, tweaked_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||||
|
data.format = COMMON_CHAT_FORMAT_GENERIC;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
static common_chat_msg common_chat_parse_generic(const std::string & input) {
|
||||||
|
json data = json::parse(input);
|
||||||
|
common_chat_msg result;
|
||||||
|
result.role = "assistant";
|
||||||
|
if (data.contains("tool_calls")) {
|
||||||
|
for (const auto & tool_call : data["tool_calls"]) {
|
||||||
|
result.tool_calls.push_back({
|
||||||
|
tool_call["name"],
|
||||||
|
tool_call["arguments"].dump(),
|
||||||
|
tool_call.contains("id") ? tool_call["id"] : "",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if (data.contains("tool_call")) {
|
||||||
|
result.tool_calls.push_back({
|
||||||
|
data["tool_call"]["name"],
|
||||||
|
data["tool_call"]["arguments"].dump(),
|
||||||
|
/* id= */ "",
|
||||||
|
});
|
||||||
|
} else if (data.contains("response")) {
|
||||||
|
const auto & response = data["response"];
|
||||||
|
result.content = response.is_string() ? response.get<std::string>() : response.dump(2);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
common_chat_params data;
|
||||||
|
data.grammar_lazy = inputs.tool_choice != "required";
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
auto schemas = json::array();
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool["function"];
|
||||||
|
schemas.push_back({
|
||||||
|
{"type", "object"},
|
||||||
|
{"properties", {
|
||||||
|
// Important note: the model is probably trained to take a JSON stringified arguments value.
|
||||||
|
// It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
|
||||||
|
{"name", {
|
||||||
|
{"type", "string"},
|
||||||
|
{"const", function["name"]},
|
||||||
|
}},
|
||||||
|
{"arguments", function["parameters"]},
|
||||||
|
{"id", {
|
||||||
|
{"type", "string"},
|
||||||
|
// Nemo's template expects a 9-character alphanumeric ID.
|
||||||
|
{"pattern", "^[a-zA-Z0-9]{9}$"},
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
{"required", json::array({"name", "arguments", "id"})},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
auto schema = json {
|
||||||
|
{"type", "array"},
|
||||||
|
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
||||||
|
{"minItems", 1},
|
||||||
|
};
|
||||||
|
if (!inputs.parallel_tool_calls) {
|
||||||
|
schema["maxItems"] = 1;
|
||||||
|
}
|
||||||
|
builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
|
||||||
|
}, grammar_options);
|
||||||
|
data.grammar_triggers.push_back({"[TOOL_CALLS]", /* .at_start = */ true});
|
||||||
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||||
|
data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) {
|
||||||
|
return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]");
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
common_chat_params data;
|
||||||
|
data.grammar_lazy = inputs.tool_choice != "required";
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
auto schemas = json::array();
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool["function"];
|
||||||
|
schemas.push_back({
|
||||||
|
{"type", "object"},
|
||||||
|
{"properties", {
|
||||||
|
{"tool_call_id", {
|
||||||
|
{"type", "string"},
|
||||||
|
// Command-R's template expects an integer string.
|
||||||
|
{"pattern", "^[0-9]{1,10}$"},
|
||||||
|
}},
|
||||||
|
{"tool_name", {
|
||||||
|
{"type", "string"},
|
||||||
|
{"const", function["name"]},
|
||||||
|
}},
|
||||||
|
{"parameters", function["parameters"]},
|
||||||
|
}},
|
||||||
|
{"required", json::array({"tool_call_id", "tool_name", "parameters"})},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
auto schema = json {
|
||||||
|
{"type", "array"},
|
||||||
|
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
||||||
|
{"minItems", 1},
|
||||||
|
};
|
||||||
|
if (!inputs.parallel_tool_calls) {
|
||||||
|
schema["maxItems"] = 1;
|
||||||
|
}
|
||||||
|
builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
|
||||||
|
}, grammar_options);
|
||||||
|
data.grammar_triggers.push_back({"<|START_ACTION|>", /* .at_start = */ false});
|
||||||
|
data.preserved_tokens = {
|
||||||
|
"<|START_RESPONSE|>",
|
||||||
|
"<|END_RESPONSE|>",
|
||||||
|
"<|START_THINKING|>",
|
||||||
|
"<|END_THINKING|>",
|
||||||
|
"<|END_ACTION|>",
|
||||||
|
};
|
||||||
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||||
|
data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
static common_chat_msg common_chat_parse_command_r7b(const std::string & input) {
|
||||||
|
static std::regex response_regex("<\\|START_RESPONSE\\|>([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>");
|
||||||
|
static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>");
|
||||||
|
std::smatch match;
|
||||||
|
|
||||||
|
common_chat_msg result;
|
||||||
|
result.role = "assistant";
|
||||||
|
if (std::regex_match(input, match, response_regex)) {
|
||||||
|
result.content = match[1].str();
|
||||||
|
} else if (std::regex_match(input, match, thought_action_regex)) {
|
||||||
|
result.tool_plan = match[1].str();
|
||||||
|
auto actions_str = match[2].str();
|
||||||
|
auto actions = json::parse(actions_str);
|
||||||
|
for (const auto & action : actions) {
|
||||||
|
result.tool_calls.push_back({
|
||||||
|
/* .name = */ action["tool_name"],
|
||||||
|
/* .arguments = */ action["parameters"].dump(),
|
||||||
|
/* .id = */ action["tool_call_id"],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG_ERR("Failed to parse command_r output");
|
||||||
|
result.content = input;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
|
||||||
|
if (!parameters.is_object() || !parameters.contains("type") || parameters["type"] != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
|
||||||
|
throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
|
||||||
|
}
|
||||||
|
const auto & parameters_properties = parameters.at("properties");
|
||||||
|
const auto & parameters_required = parameters.at("required");
|
||||||
|
for (const auto & prop : expected_properties) {
|
||||||
|
if (!parameters_properties.contains(prop)) {
|
||||||
|
throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop);
|
||||||
|
}
|
||||||
|
if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) {
|
||||||
|
throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (parameters_properties.size() != expected_properties.size()) {
|
||||||
|
throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", "));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, bool allow_python_tag_builtin_tools) {
|
||||||
|
auto builtin_tools = json::array();
|
||||||
|
common_chat_params data;
|
||||||
|
data.grammar_lazy = inputs.tool_choice != "required";
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
std::vector<std::string> tool_rules;
|
||||||
|
|
||||||
|
auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
|
||||||
|
if (name == "wolfram_alpha") {
|
||||||
|
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
|
||||||
|
expect_tool_parameters(name, parameters, {"query"});
|
||||||
|
} else if (name == "web_search" || name == "brave_search") {
|
||||||
|
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
|
||||||
|
expect_tool_parameters(name, parameters, {"query"});
|
||||||
|
} else if (name == "python" || name == "code_interpreter") {
|
||||||
|
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
|
||||||
|
expect_tool_parameters(name, parameters, {"code"});
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> kvs;
|
||||||
|
for (const auto & [key, value] : parameters.at("properties").items()) {
|
||||||
|
kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
tool_rules.push_back(
|
||||||
|
builder.add_rule(
|
||||||
|
name + "-call",
|
||||||
|
"\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
|
||||||
|
builtin_tools.push_back(name);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool["function"];
|
||||||
|
std::string name = function["name"];
|
||||||
|
auto parameters = function["parameters"];
|
||||||
|
builder.resolve_refs(parameters);
|
||||||
|
|
||||||
|
// https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
|
||||||
|
if (allow_python_tag_builtin_tools) {
|
||||||
|
handle_builtin_tool(name, parameters);
|
||||||
|
}
|
||||||
|
tool_rules.push_back(
|
||||||
|
builder.add_rule(
|
||||||
|
name + "-call",
|
||||||
|
"\"{\" space "
|
||||||
|
"( \"\\\"type\\\":\" space \"\\\"function\\\",\" space )? "
|
||||||
|
"\"\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " +
|
||||||
|
builder.add_schema(name + "-args", parameters) +
|
||||||
|
" \"}\""));
|
||||||
|
data.grammar_triggers.push_back({"{\"name\": \"" + name + "\"", /* .at_start = */ true});
|
||||||
|
});
|
||||||
|
data.grammar_triggers.push_back({"{\"name\":", /* .at_start = */ true});
|
||||||
|
data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true});
|
||||||
|
data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true});
|
||||||
|
data.grammar_triggers.push_back({"{\"type\": \"function\"", /* .at_start = */ true});
|
||||||
|
data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true});
|
||||||
|
data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true});
|
||||||
|
if (!builtin_tools.empty()) {
|
||||||
|
data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false});
|
||||||
|
}
|
||||||
|
builder.add_rule("root", string_join(tool_rules, " | "));
|
||||||
|
}, grammar_options);
|
||||||
|
data.additional_stops.push_back("<|eom_id|>");
|
||||||
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
|
||||||
|
{"tools_in_user_message", false},
|
||||||
|
{"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
|
||||||
|
});
|
||||||
|
data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
|
||||||
|
? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
|
||||||
|
: COMMON_CHAT_FORMAT_LLAMA_3_X;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
|
||||||
|
// TODO: tighten & simplify the parser, don't accept leading text context.
|
||||||
|
static std::regex function_regex("\\{[\\s\\n\\r]*(?:\"type\"[\\s\\n\\r]*:[\\s\\n\\r]*\"function\"[\\s\\n\\r]*,[\\s\\n\\r]*|[\\s\\n\\r]*)\"name\"[\\s\\n\\r]*:[\\s\\n\\r]*\"([^\"]+)\"[\\s\\n\\r]*,[\\s\\n\\r]*\"parameters\": ");
|
||||||
|
static std::regex close_regex("\\}");
|
||||||
|
static std::regex builtin_call_regex("<\\|python_tag\\|>([^.(]+)\\.call\\((.*)\\)");
|
||||||
|
|
||||||
|
if (with_builtin_tools) {
|
||||||
|
std::smatch match;
|
||||||
|
if (std::regex_match(input, match, builtin_call_regex)) {
|
||||||
|
auto name = match[1].str();
|
||||||
|
auto raw_args = match[2].str();
|
||||||
|
|
||||||
|
// TODO: if/when builtin tools start accepting more than 1 argument, use parse_json for real parsing.
|
||||||
|
auto it_eq = raw_args.find('=');
|
||||||
|
auto arg_name = raw_args.substr(0, it_eq);
|
||||||
|
auto arg_value_str = raw_args.substr(it_eq + 1);
|
||||||
|
auto arg_value = json::parse(arg_value_str);
|
||||||
|
|
||||||
|
return {
|
||||||
|
/* .role = */ "assistant",
|
||||||
|
/* .content = */ match.prefix().str(),
|
||||||
|
/* .tool_calls = */ {
|
||||||
|
{
|
||||||
|
/* .name = */ match[1],
|
||||||
|
/* .arguments = */ (json {
|
||||||
|
{arg_name, arg_value},
|
||||||
|
}).dump(),
|
||||||
|
/* .id = */ "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
common_chat_params data;
|
||||||
|
data.grammar_lazy = inputs.tool_choice != "required";
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
std::vector<std::string> tool_rules;
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool["function"];
|
||||||
|
std::string name = function["name"];
|
||||||
|
auto parameters = function["parameters"];
|
||||||
|
auto args_rule = builder.add_schema(name + "-args", parameters);
|
||||||
|
tool_rules.push_back(builder.add_rule(name + "-call",
|
||||||
|
"\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
|
||||||
|
});
|
||||||
|
data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
|
||||||
|
data.preserved_tokens = {
|
||||||
|
"<|tool▁sep|>",
|
||||||
|
"<|tool▁call▁end|>",
|
||||||
|
};
|
||||||
|
builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space");
|
||||||
|
}, grammar_options);
|
||||||
|
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||||
|
data.prompt = prompt;
|
||||||
|
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) {
|
||||||
|
static std::regex trigger_regex("<|tool▁calls▁begin|>");
|
||||||
|
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
||||||
|
static std::regex close_regex("```<|tool▁call▁end|>");
|
||||||
|
return parse_json_tool_calls(input, trigger_regex, function_regex, close_regex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
fprintf(stderr, "%s\n", __func__);
|
||||||
|
common_chat_params data;
|
||||||
|
data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, {
|
||||||
|
{"datetime", "Jan 29 2025 13:00:00 GMT"},
|
||||||
|
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
||||||
|
});
|
||||||
|
if (!inputs.tools.is_null() && !inputs.tools.empty()) {
|
||||||
|
data.grammar_lazy = inputs.tool_choice != "required";
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
auto schemas = json::array();
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool["function"];
|
||||||
|
schemas.push_back({
|
||||||
|
{"type", "object"},
|
||||||
|
{"properties", {
|
||||||
|
{"name", {
|
||||||
|
{"type", "string"},
|
||||||
|
{"const", function["name"]},
|
||||||
|
}},
|
||||||
|
{"arguments", function["parameters"]},
|
||||||
|
}},
|
||||||
|
{"required", json::array({"name", "arguments", "id"})},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
auto schema = json {
|
||||||
|
{"type", "array"},
|
||||||
|
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
||||||
|
{"minItems", 1},
|
||||||
|
};
|
||||||
|
if (!inputs.parallel_tool_calls) {
|
||||||
|
schema["maxItems"] = 1;
|
||||||
|
}
|
||||||
|
builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
|
||||||
|
}, grammar_options);
|
||||||
|
data.grammar_triggers.push_back({" functools[", /* .at_start = */ false});
|
||||||
|
data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
|
||||||
|
} else {
|
||||||
|
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
static common_chat_msg common_chat_parse_firefunction_v2(const std::string & input) {
|
||||||
|
return parse_prefixed_json_tool_call_array(input, " functools[", /* rstrip_prefix= */ 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
// >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
|
||||||
|
// Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
|
||||||
|
common_chat_params data;
|
||||||
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||||
|
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
|
||||||
|
if (!inputs.tools.is_null() && !inputs.tools.empty()) {
|
||||||
|
data.grammar_lazy = inputs.tool_choice != "required";
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
std::vector<std::string> first_tool_rules;
|
||||||
|
std::vector<std::string> subsequent_tool_rules;
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool["function"];
|
||||||
|
std::string name = function["name"];
|
||||||
|
auto parameters = function["parameters"];
|
||||||
|
auto args_rule = builder.add_schema(name + "-args", parameters);
|
||||||
|
first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule));
|
||||||
|
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
|
||||||
|
data.grammar_triggers.push_back({name, /* .at_start = */ true});
|
||||||
|
data.grammar_triggers.push_back({">>>" + name, /* .at_start = */ false});
|
||||||
|
});
|
||||||
|
auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
|
||||||
|
if (inputs.parallel_tool_calls) {
|
||||||
|
auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
|
||||||
|
builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
|
||||||
|
} else {
|
||||||
|
builder.add_rule("root", first_rule);
|
||||||
|
}
|
||||||
|
|
||||||
|
}, grammar_options);
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool consume(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) {
|
||||||
|
auto expected_it = expected.begin();
|
||||||
|
auto tmp_it = it;
|
||||||
|
while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) {
|
||||||
|
++tmp_it;
|
||||||
|
++expected_it;
|
||||||
|
}
|
||||||
|
if (expected_it == expected.end()) {
|
||||||
|
it = tmp_it;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) {
|
||||||
|
static std::regex function_regex(R"((?:>>>)?(\w+)\n)");
|
||||||
|
static std::regex close_regex(R"($|(?=>>>))");
|
||||||
|
|
||||||
|
std::string content;
|
||||||
|
auto it = input.begin();
|
||||||
|
const auto end = input.end();
|
||||||
|
|
||||||
|
if (consume(it, end, "all\n")) {
|
||||||
|
std::smatch match;
|
||||||
|
if (std::regex_search(it, end, match, function_regex)) {
|
||||||
|
auto fun_it = match.prefix().second;
|
||||||
|
content = std::string(it, fun_it);
|
||||||
|
it = fun_it;
|
||||||
|
} else {
|
||||||
|
common_chat_msg res;
|
||||||
|
res.role = "assistant";
|
||||||
|
res.content = std::string(it, end);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// TODO: tighten & simplify.
|
||||||
|
try {
|
||||||
|
auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex);
|
||||||
|
res.content = content + res.content;
|
||||||
|
return res;
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what());
|
||||||
|
common_chat_msg res;
|
||||||
|
res.role = "assistant";
|
||||||
|
res.content = input;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
// https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
|
||||||
|
common_chat_params data;
|
||||||
|
json tools = inputs.tools.is_null() ? inputs.tools : json::array();
|
||||||
|
std::string python_code_argument_name;
|
||||||
|
auto has_raw_python = false;
|
||||||
|
|
||||||
|
data.grammar_lazy = inputs.tool_choice != "required";
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
std::vector<std::string> tool_rules;
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool["function"];
|
||||||
|
const auto & parameters = function["parameters"];
|
||||||
|
std::string name = function["name"];
|
||||||
|
if (name == "python" || name == "ipython") {
|
||||||
|
if (!parameters.contains("type")) {
|
||||||
|
throw std::runtime_error("Missing type in python tool");
|
||||||
|
}
|
||||||
|
has_raw_python = true;
|
||||||
|
auto type = parameters.at("type");
|
||||||
|
if (type == "object") {
|
||||||
|
auto properties = parameters.at("properties");
|
||||||
|
for (auto it = properties.begin(); it != properties.end(); ++it) {
|
||||||
|
if (it.value().at("type") == "string") {
|
||||||
|
if (!python_code_argument_name.empty()) {
|
||||||
|
throw std::runtime_error("Multiple string arguments found in python tool");
|
||||||
|
}
|
||||||
|
python_code_argument_name = it.key();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (python_code_argument_name.empty()) {
|
||||||
|
throw std::runtime_error("No string argument found in python tool");
|
||||||
|
}
|
||||||
|
} else if (type != "string") {
|
||||||
|
throw std::runtime_error("Invalid type in python tool: " + type.dump());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
|
||||||
|
});
|
||||||
|
if (has_raw_python) {
|
||||||
|
tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
|
||||||
|
data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false});
|
||||||
|
}
|
||||||
|
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
|
||||||
|
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
||||||
|
data.grammar_triggers.push_back({"<function=", /* .at_start = */ false});
|
||||||
|
}, grammar_options);
|
||||||
|
|
||||||
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||||
|
// TODO: if (has_raw_python)
|
||||||
|
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
|
||||||
|
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
||||||
|
static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
|
||||||
|
std::smatch match;
|
||||||
|
if (std::regex_search(input, match, python_tag_regex)) {
|
||||||
|
auto code = match[1].str();
|
||||||
|
return {
|
||||||
|
/* .role = */ "assistant",
|
||||||
|
/* .content = */ match.prefix().str(),
|
||||||
|
/* .tool_calls = */ {
|
||||||
|
{
|
||||||
|
/* .name = */ "python",
|
||||||
|
/* .arguments = */ (json {{"code", code}}).dump(),
|
||||||
|
/* .id = */ "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
static std::regex function_regex(R"(<function=(\w+)>)");
|
||||||
|
static std::regex close_regex(R"(</function>)");
|
||||||
|
// TODO: tighten & simplify.
|
||||||
|
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
common_chat_params data;
|
||||||
|
// (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
|
||||||
|
data.grammar_lazy = inputs.tool_choice != "required";
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
std::vector<std::string> tool_rules;
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool["function"];
|
||||||
|
std::string name = function["name"];
|
||||||
|
auto parameters = function["parameters"];
|
||||||
|
builder.resolve_refs(parameters);
|
||||||
|
tool_rules.push_back(builder.add_schema(name + "-call", {
|
||||||
|
{"type", "object"},
|
||||||
|
{"properties", json {
|
||||||
|
{"name", json {{"const", name}}},
|
||||||
|
{"arguments", parameters},
|
||||||
|
}},
|
||||||
|
{"required", json::array({"name", "arguments"})},
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
auto tool_call = "\"<tool_call>\" space " + builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " \"</tool_call>\" space";
|
||||||
|
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
||||||
|
data.grammar_triggers.push_back({"<tool_call>", /* .at_start = */ false});
|
||||||
|
data.preserved_tokens = { "</tool_call>" };
|
||||||
|
}, grammar_options);
|
||||||
|
|
||||||
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||||
|
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string & input) {
|
||||||
|
try {
|
||||||
|
std::regex start_pattern(R"([\n\s]*<tool_call>)");
|
||||||
|
std::regex middle_pattern(R"([\n\s]*</tool_call>[\n\s]*<tool_call>)");
|
||||||
|
std::regex end_pattern(R"([\n\s]*</tool_call>[\n\s]*$)");
|
||||||
|
|
||||||
|
auto end = input.end();
|
||||||
|
std::sregex_iterator rend;
|
||||||
|
std::sregex_iterator rit(input.begin(), end, start_pattern);
|
||||||
|
if (rit == rend) {
|
||||||
|
return {
|
||||||
|
/* .role = */ "assistant",
|
||||||
|
/* .content = */ input,
|
||||||
|
/* .tool_calls = */ {},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
common_chat_msg result;
|
||||||
|
result.role = "assistant";
|
||||||
|
result.content = rit->prefix();
|
||||||
|
|
||||||
|
auto it = rit->suffix().first;
|
||||||
|
while (it != end) {
|
||||||
|
json call;
|
||||||
|
if (!parse_json(it, end, call)) {
|
||||||
|
throw std::runtime_error("Failed to parse json tool call");
|
||||||
|
}
|
||||||
|
const auto & arguments = call["arguments"];
|
||||||
|
result.tool_calls.push_back({
|
||||||
|
call["name"],
|
||||||
|
arguments.dump(),
|
||||||
|
// arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
|
||||||
|
/* id= */ "",
|
||||||
|
});
|
||||||
|
rit = {it, end, middle_pattern};
|
||||||
|
if (rit != rend) {
|
||||||
|
it = rit->suffix().first;
|
||||||
|
} else {
|
||||||
|
rit = {it, end, end_pattern};
|
||||||
|
if (rit == rend) {
|
||||||
|
throw std::runtime_error("Malformed input, missing </tool_call>");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
return {
|
||||||
|
/* .role = */ "assistant",
|
||||||
|
/* .content = */ input,
|
||||||
|
/* .tool_calls = */ {},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
common_chat_params data;
|
||||||
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||||
|
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||||
|
data.grammar_lazy = false;
|
||||||
|
if (!inputs.json_schema.is_null()) {
|
||||||
|
if (!inputs.grammar.empty()) {
|
||||||
|
throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
|
||||||
|
}
|
||||||
|
data.grammar = json_schema_to_grammar(inputs.json_schema);
|
||||||
|
} else {
|
||||||
|
data.grammar = inputs.grammar.empty();
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||||
|
auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none";
|
||||||
|
LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false");
|
||||||
|
|
||||||
|
if (has_tools && !inputs.grammar.empty()) {
|
||||||
|
throw std::runtime_error("Cannot specify grammar with tools");
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto & src = tmpl.source();
|
||||||
|
if (src.find(">>>all") != std::string::npos) {
|
||||||
|
// Functionary prepends "all\n" to plain content outputs, so we use the parser no matter when
|
||||||
|
return common_chat_params_init_functionary_v3_2(tmpl, inputs);
|
||||||
|
}
|
||||||
|
if (src.find(" functools[") != std::string::npos) {
|
||||||
|
// Firefunction v2 requires datetime and functions in the context, even w/o tools.
|
||||||
|
return common_chat_params_init_firefunction_v2(tmpl, inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!has_tools) {
|
||||||
|
return common_chat_params_init_without_tools(tmpl, inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src.find("<tool_call>") != std::string::npos) {
|
||||||
|
return common_chat_params_init_hermes_2_pro(tmpl, inputs);
|
||||||
|
}
|
||||||
|
if (src.find("<|start_header_id|>") != std::string::npos
|
||||||
|
&& src.find("<function=") != std::string::npos) {
|
||||||
|
return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, inputs);
|
||||||
|
}
|
||||||
|
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
|
||||||
|
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
|
||||||
|
return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools);
|
||||||
|
}
|
||||||
|
if (src.find("<|tool▁calls▁begin|>") != std::string::npos) {
|
||||||
|
return common_chat_params_init_deepseek_r1(tmpl, inputs);
|
||||||
|
}
|
||||||
|
if (src.find("[TOOL_CALLS]") != std::string::npos) {
|
||||||
|
return common_chat_params_init_mistral_nemo(tmpl, inputs);
|
||||||
|
}
|
||||||
|
if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos) {
|
||||||
|
return common_chat_params_init_command_r7b(tmpl, inputs);
|
||||||
|
}
|
||||||
|
return common_chat_params_init_generic(tmpl, inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static common_chat_msg common_chat_parse_content_only(const std::string & input) {
|
||||||
|
return {
|
||||||
|
/* .role = */ "assistant",
|
||||||
|
/* .content = */ input,
|
||||||
|
/* .tool_calls = */ {},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
common_chat_msg common_chat_parse(const std::string & input, common_chat_format format) {
|
||||||
|
switch (format) {
|
||||||
|
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
|
||||||
|
return common_chat_parse_content_only(input);
|
||||||
|
case COMMON_CHAT_FORMAT_GENERIC:
|
||||||
|
return common_chat_parse_generic(input);
|
||||||
|
case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
|
||||||
|
return common_chat_parse_mistral_nemo(input);
|
||||||
|
case COMMON_CHAT_FORMAT_LLAMA_3_X:
|
||||||
|
return common_chat_parse_llama_3_1(input);
|
||||||
|
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
|
||||||
|
return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true);
|
||||||
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
||||||
|
return common_chat_parse_deepseek_r1(input);
|
||||||
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
||||||
|
return common_chat_parse_functionary_v3_2(input);
|
||||||
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
||||||
|
return common_chat_parse_functionary_v3_1_llama_3_1(input);
|
||||||
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
||||||
|
return common_chat_parse_hermes_2_pro(input);
|
||||||
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
||||||
|
return common_chat_parse_firefunction_v2(input);
|
||||||
|
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
||||||
|
return common_chat_parse_command_r7b(input);
|
||||||
|
default:
|
||||||
|
throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
|
||||||
|
}
|
||||||
|
}
|
52
common/chat.hpp
Normal file
52
common/chat.hpp
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include <json.hpp>
|
||||||
|
#include <optional>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
|
struct common_chat_inputs {
|
||||||
|
json messages;
|
||||||
|
json tools;
|
||||||
|
json tool_choice;
|
||||||
|
json json_schema;
|
||||||
|
bool parallel_tool_calls;
|
||||||
|
bool stream;
|
||||||
|
std::string grammar;
|
||||||
|
bool add_generation_prompt = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum common_chat_format {
|
||||||
|
COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
||||||
|
COMMON_CHAT_FORMAT_GENERIC,
|
||||||
|
COMMON_CHAT_FORMAT_MISTRAL_NEMO,
|
||||||
|
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
||||||
|
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
||||||
|
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
||||||
|
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
||||||
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
||||||
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
||||||
|
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||||
|
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||||
|
|
||||||
|
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
||||||
|
};
|
||||||
|
|
||||||
|
struct common_chat_params {
|
||||||
|
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||||
|
json prompt;
|
||||||
|
std::string grammar;
|
||||||
|
bool grammar_lazy = false;
|
||||||
|
std::vector<common_grammar_trigger> grammar_triggers;
|
||||||
|
std::vector<std::string> preserved_tokens;
|
||||||
|
std::vector<std::string> additional_stops;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params);
|
||||||
|
std::string common_chat_format_name(common_chat_format format);
|
||||||
|
common_chat_msg common_chat_parse( const std::string & input, common_chat_format format);
|
|
@ -1,7 +1,7 @@
|
||||||
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
||||||
|
|
||||||
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in")
|
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in")
|
||||||
set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp")
|
set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp")
|
||||||
|
|
||||||
# Only write the build info if it changed
|
# Only write the build info if it changed
|
||||||
if(EXISTS ${OUTPUT_FILE})
|
if(EXISTS ${OUTPUT_FILE})
|
3429
common/common.cpp
3429
common/common.cpp
File diff suppressed because it is too large
Load diff
549
common/common.h
549
common/common.h
|
@ -2,20 +2,12 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "llama.h"
|
#include "llama-cpp.h"
|
||||||
|
|
||||||
#include "sampling.h"
|
#include <set>
|
||||||
|
|
||||||
#define LOG_NO_FILE_LINE_FUNCTION
|
|
||||||
#include "log.h"
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <random>
|
#include <sstream>
|
||||||
#include <thread>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <tuple>
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define DIRECTORY_SEPARATOR '\\'
|
#define DIRECTORY_SEPARATOR '\\'
|
||||||
|
@ -33,47 +25,192 @@
|
||||||
|
|
||||||
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
||||||
|
|
||||||
|
struct common_adapter_lora_info {
|
||||||
|
std::string path;
|
||||||
|
float scale;
|
||||||
|
|
||||||
|
struct llama_adapter_lora * ptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
using llama_tokens = std::vector<llama_token>;
|
||||||
|
|
||||||
// build info
|
// build info
|
||||||
extern int LLAMA_BUILD_NUMBER;
|
extern int LLAMA_BUILD_NUMBER;
|
||||||
extern char const * LLAMA_COMMIT;
|
extern const char * LLAMA_COMMIT;
|
||||||
extern char const * LLAMA_COMPILER;
|
extern const char * LLAMA_COMPILER;
|
||||||
extern char const * LLAMA_BUILD_TARGET;
|
extern const char * LLAMA_BUILD_TARGET;
|
||||||
|
|
||||||
struct llama_control_vector_load_info;
|
struct common_control_vector_load_info;
|
||||||
|
|
||||||
//
|
//
|
||||||
// CPU utils
|
// CPU utils
|
||||||
//
|
//
|
||||||
|
|
||||||
|
struct cpu_params {
|
||||||
|
int n_threads = -1;
|
||||||
|
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
||||||
|
bool mask_valid = false; // Default: any CPU
|
||||||
|
enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
|
||||||
|
bool strict_cpu = false; // Use strict CPU placement
|
||||||
|
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
||||||
|
};
|
||||||
|
|
||||||
int32_t cpu_get_num_physical_cores();
|
int32_t cpu_get_num_physical_cores();
|
||||||
int32_t cpu_get_num_math();
|
int32_t cpu_get_num_math();
|
||||||
|
|
||||||
//
|
//
|
||||||
// CLI argument parsing
|
// Common params
|
||||||
//
|
//
|
||||||
|
|
||||||
struct gpt_params {
|
enum llama_example {
|
||||||
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
LLAMA_EXAMPLE_COMMON,
|
||||||
|
LLAMA_EXAMPLE_SPECULATIVE,
|
||||||
|
LLAMA_EXAMPLE_MAIN,
|
||||||
|
LLAMA_EXAMPLE_INFILL,
|
||||||
|
LLAMA_EXAMPLE_EMBEDDING,
|
||||||
|
LLAMA_EXAMPLE_PERPLEXITY,
|
||||||
|
LLAMA_EXAMPLE_RETRIEVAL,
|
||||||
|
LLAMA_EXAMPLE_PASSKEY,
|
||||||
|
LLAMA_EXAMPLE_IMATRIX,
|
||||||
|
LLAMA_EXAMPLE_BENCH,
|
||||||
|
LLAMA_EXAMPLE_SERVER,
|
||||||
|
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
||||||
|
LLAMA_EXAMPLE_EXPORT_LORA,
|
||||||
|
LLAMA_EXAMPLE_LLAVA,
|
||||||
|
LLAMA_EXAMPLE_LOOKUP,
|
||||||
|
LLAMA_EXAMPLE_PARALLEL,
|
||||||
|
LLAMA_EXAMPLE_TTS,
|
||||||
|
|
||||||
int32_t n_threads = cpu_get_num_math();
|
LLAMA_EXAMPLE_COUNT,
|
||||||
int32_t n_threads_draft = -1;
|
};
|
||||||
int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)
|
|
||||||
int32_t n_threads_batch_draft = -1;
|
enum common_sampler_type {
|
||||||
|
COMMON_SAMPLER_TYPE_NONE = 0,
|
||||||
|
COMMON_SAMPLER_TYPE_DRY = 1,
|
||||||
|
COMMON_SAMPLER_TYPE_TOP_K = 2,
|
||||||
|
COMMON_SAMPLER_TYPE_TOP_P = 3,
|
||||||
|
COMMON_SAMPLER_TYPE_MIN_P = 4,
|
||||||
|
//COMMON_SAMPLER_TYPE_TFS_Z = 5,
|
||||||
|
COMMON_SAMPLER_TYPE_TYPICAL_P = 6,
|
||||||
|
COMMON_SAMPLER_TYPE_TEMPERATURE = 7,
|
||||||
|
COMMON_SAMPLER_TYPE_XTC = 8,
|
||||||
|
COMMON_SAMPLER_TYPE_INFILL = 9,
|
||||||
|
COMMON_SAMPLER_TYPE_PENALTIES = 10,
|
||||||
|
};
|
||||||
|
|
||||||
|
// dimensionality reduction methods, used by cvector-generator
|
||||||
|
enum dimre_method {
|
||||||
|
DIMRE_METHOD_PCA,
|
||||||
|
DIMRE_METHOD_MEAN,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum common_conversation_mode {
|
||||||
|
COMMON_CONVERSATION_MODE_DISABLED = 0,
|
||||||
|
COMMON_CONVERSATION_MODE_ENABLED = 1,
|
||||||
|
COMMON_CONVERSATION_MODE_AUTO = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct common_grammar_trigger {
|
||||||
|
std::string word;
|
||||||
|
bool at_start;
|
||||||
|
};
|
||||||
|
|
||||||
|
// sampling parameters
|
||||||
|
struct common_params_sampling {
|
||||||
|
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
||||||
|
|
||||||
|
int32_t n_prev = 64; // number of previous tokens to remember
|
||||||
|
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||||
|
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||||
|
int32_t top_k = 40; // <= 0 to use vocab size
|
||||||
|
float top_p = 0.95f; // 1.0 = disabled
|
||||||
|
float min_p = 0.05f; // 0.0 = disabled
|
||||||
|
float xtc_probability = 0.00f; // 0.0 = disabled
|
||||||
|
float xtc_threshold = 0.10f; // > 0.5 disables XTC
|
||||||
|
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
||||||
|
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||||
|
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||||
|
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||||
|
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||||
|
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||||
|
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||||
|
float penalty_present = 0.00f; // 0.0 = disabled
|
||||||
|
float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
|
||||||
|
float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
|
||||||
|
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
|
||||||
|
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
|
||||||
|
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||||
|
float mirostat_tau = 5.00f; // target entropy
|
||||||
|
float mirostat_eta = 0.10f; // learning rate
|
||||||
|
bool ignore_eos = false;
|
||||||
|
bool no_perf = false; // disable performance metrics
|
||||||
|
bool timing_per_token = false;
|
||||||
|
|
||||||
|
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<enum common_sampler_type> samplers = {
|
||||||
|
COMMON_SAMPLER_TYPE_PENALTIES,
|
||||||
|
COMMON_SAMPLER_TYPE_DRY,
|
||||||
|
COMMON_SAMPLER_TYPE_TOP_K,
|
||||||
|
COMMON_SAMPLER_TYPE_TYPICAL_P,
|
||||||
|
COMMON_SAMPLER_TYPE_TOP_P,
|
||||||
|
COMMON_SAMPLER_TYPE_MIN_P,
|
||||||
|
COMMON_SAMPLER_TYPE_XTC,
|
||||||
|
COMMON_SAMPLER_TYPE_TEMPERATURE,
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string grammar; // optional BNF-like grammar to constrain sampling
|
||||||
|
bool grammar_lazy = false;
|
||||||
|
std::vector<common_grammar_trigger> grammar_trigger_words; // optional trigger words to trigger lazy grammar
|
||||||
|
std::vector<llama_token> grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens.
|
||||||
|
std::set<llama_token> preserved_tokens;
|
||||||
|
|
||||||
|
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
|
||||||
|
|
||||||
|
// print the parameters into a string
|
||||||
|
std::string print() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct common_params_speculative {
|
||||||
|
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
|
||||||
|
|
||||||
|
int32_t n_ctx = 0; // draft context size
|
||||||
|
int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding
|
||||||
|
int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding
|
||||||
|
int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
|
||||||
|
float p_split = 0.1f; // speculative decoding split probability
|
||||||
|
float p_min = 0.9f; // minimum speculative decoding probability (greedy)
|
||||||
|
|
||||||
|
struct cpu_params cpuparams;
|
||||||
|
struct cpu_params cpuparams_batch;
|
||||||
|
|
||||||
|
std::string hf_repo = ""; // HF repo // NOLINT
|
||||||
|
std::string hf_file = ""; // HF file // NOLINT
|
||||||
|
|
||||||
|
std::string model = ""; // draft model for speculative decoding // NOLINT
|
||||||
|
std::string model_url = ""; // model url to download // NOLINT
|
||||||
|
};
|
||||||
|
|
||||||
|
struct common_params_vocoder {
|
||||||
|
std::string hf_repo = ""; // HF repo // NOLINT
|
||||||
|
std::string hf_file = ""; // HF file // NOLINT
|
||||||
|
|
||||||
|
std::string model = ""; // model path // NOLINT
|
||||||
|
std::string model_url = ""; // model url to download // NOLINT
|
||||||
|
|
||||||
|
bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT
|
||||||
|
};
|
||||||
|
|
||||||
|
struct common_params {
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
int32_t n_ctx = 0; // context size
|
int32_t n_ctx = 4096; // context size
|
||||||
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
int32_t n_draft = 5; // number of tokens to draft during speculative decoding
|
|
||||||
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
||||||
int32_t n_parallel = 1; // number of parallel sequences to decode
|
int32_t n_parallel = 1; // number of parallel sequences to decode
|
||||||
int32_t n_sequences = 1; // number of sequences to decode
|
int32_t n_sequences = 1; // number of sequences to decode
|
||||||
float p_split = 0.1f; // speculative decoding split probability
|
|
||||||
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
|
|
||||||
int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
|
|
||||||
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
|
||||||
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
|
|
||||||
int32_t n_beams = 0; // if non-zero then use beam search of given width.
|
|
||||||
int32_t grp_attn_n = 1; // group-attention factor
|
int32_t grp_attn_n = 1; // group-attention factor
|
||||||
int32_t grp_attn_w = 512; // group-attention width
|
int32_t grp_attn_w = 512; // group-attention width
|
||||||
int32_t n_print = -1; // print token count every n tokens (-1 = disabled)
|
int32_t n_print = -1; // print token count every n tokens (-1 = disabled)
|
||||||
|
@ -84,46 +221,56 @@ struct gpt_params {
|
||||||
float yarn_beta_fast = 32.0f; // YaRN low correction dim
|
float yarn_beta_fast = 32.0f; // YaRN low correction dim
|
||||||
float yarn_beta_slow = 1.0f; // YaRN high correction dim
|
float yarn_beta_slow = 1.0f; // YaRN high correction dim
|
||||||
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
||||||
float defrag_thold = -1.0f; // KV cache defragmentation threshold
|
float defrag_thold = 0.1f; // KV cache defragmentation threshold
|
||||||
|
|
||||||
|
// offload params
|
||||||
|
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
|
||||||
|
|
||||||
|
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
|
||||||
|
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
||||||
|
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
|
||||||
|
|
||||||
|
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
|
||||||
|
|
||||||
|
struct cpu_params cpuparams;
|
||||||
|
struct cpu_params cpuparams_batch;
|
||||||
|
|
||||||
ggml_backend_sched_eval_callback cb_eval = nullptr;
|
ggml_backend_sched_eval_callback cb_eval = nullptr;
|
||||||
void * cb_eval_user_data = nullptr;
|
void * cb_eval_user_data = nullptr;
|
||||||
|
|
||||||
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
||||||
|
|
||||||
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
|
|
||||||
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
||||||
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
||||||
|
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
|
||||||
|
|
||||||
// // sampling parameters
|
struct common_params_sampling sampling;
|
||||||
struct llama_sampling_params sparams;
|
struct common_params_speculative speculative;
|
||||||
|
struct common_params_vocoder vocoder;
|
||||||
|
|
||||||
std::string model = ""; // model path
|
std::string model = ""; // model path // NOLINT
|
||||||
std::string model_draft = ""; // draft model for speculative decoding
|
std::string model_alias = ""; // model alias // NOLINT
|
||||||
std::string model_alias = "unknown"; // model alias
|
std::string model_url = ""; // model url to download // NOLINT
|
||||||
std::string model_url = ""; // model url to download
|
std::string hf_token = ""; // HF token // NOLINT
|
||||||
std::string hf_repo = ""; // HF repo
|
std::string hf_repo = ""; // HF repo // NOLINT
|
||||||
std::string hf_file = ""; // HF file
|
std::string hf_file = ""; // HF file // NOLINT
|
||||||
std::string prompt = "";
|
std::string prompt = ""; // NOLINT
|
||||||
std::string prompt_file = ""; // store the external prompt file name
|
std::string prompt_file = ""; // store the external prompt file name // NOLINT
|
||||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT
|
||||||
std::string input_prefix = ""; // string to prefix user inputs with
|
std::string input_prefix = ""; // string to prefix user inputs with // NOLINT
|
||||||
std::string input_suffix = ""; // string to suffix user inputs with
|
std::string input_suffix = ""; // string to suffix user inputs with // NOLINT
|
||||||
std::string logdir = ""; // directory in which to save YAML log files
|
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT
|
||||||
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
|
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT
|
||||||
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
|
std::string logits_file = ""; // file for saving *all* logits // NOLINT
|
||||||
std::string logits_file = ""; // file for saving *all* logits
|
|
||||||
std::string rpc_servers = ""; // comma separated list of RPC servers
|
|
||||||
|
|
||||||
std::vector<std::string> in_files; // all input files
|
std::vector<std::string> in_files; // all input files
|
||||||
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
||||||
std::vector<llama_model_kv_override> kv_overrides;
|
std::vector<llama_model_kv_override> kv_overrides;
|
||||||
|
|
||||||
// TODO: avoid tuple, use struct
|
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
|
||||||
std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
|
std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
|
||||||
std::string lora_base = ""; // base model path for the lora adapter
|
|
||||||
|
|
||||||
std::vector<llama_control_vector_load_info> control_vectors; // control vector with user defined scale
|
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
||||||
|
|
||||||
int32_t verbosity = 0;
|
int32_t verbosity = 0;
|
||||||
int32_t control_vector_layer_start = -1; // layer range for control vector
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
||||||
|
@ -149,54 +296,66 @@ struct gpt_params {
|
||||||
bool special = false; // enable special token output
|
bool special = false; // enable special token output
|
||||||
bool interactive = false; // interactive mode
|
bool interactive = false; // interactive mode
|
||||||
bool interactive_first = false; // wait for user input immediately
|
bool interactive_first = false; // wait for user input immediately
|
||||||
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
|
|
||||||
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
||||||
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
||||||
|
|
||||||
bool embedding = false; // get only sentence embedding
|
|
||||||
bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
|
bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
|
||||||
bool multiline_input = false; // reverse the usage of `\`
|
bool multiline_input = false; // reverse the usage of `\`
|
||||||
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||||
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
||||||
bool flash_attn = false; // flash attention
|
bool flash_attn = false; // flash attention
|
||||||
|
bool no_perf = false; // disable performance metrics
|
||||||
|
bool ctx_shift = true; // context shift on inifinite text generation
|
||||||
|
|
||||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||||
bool ignore_eos = false; // ignore generated EOS tokens
|
|
||||||
bool logits_all = false; // return logits for all tokens in the batch
|
bool logits_all = false; // return logits for all tokens in the batch
|
||||||
bool use_mmap = true; // use mmap for faster loads
|
bool use_mmap = true; // use mmap for faster loads
|
||||||
bool use_mlock = false; // use mlock to keep model in memory
|
bool use_mlock = false; // use mlock to keep model in memory
|
||||||
bool verbose_prompt = false; // print prompt tokens before generation
|
bool verbose_prompt = false; // print prompt tokens before generation
|
||||||
bool display_prompt = true; // print prompt before generation
|
bool display_prompt = true; // print prompt before generation
|
||||||
bool infill = false; // use infill mode
|
|
||||||
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
||||||
bool no_kv_offload = false; // disable KV offloading
|
bool no_kv_offload = false; // disable KV offloading
|
||||||
bool warmup = true; // warmup run
|
bool warmup = true; // warmup run
|
||||||
bool check_tensors = false; // validate tensor data
|
bool check_tensors = false; // validate tensor data
|
||||||
|
|
||||||
std::string cache_type_k = "f16"; // KV cache data type for the K
|
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
||||||
std::string cache_type_v = "f16"; // KV cache data type for the V
|
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
||||||
|
|
||||||
|
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
||||||
|
|
||||||
// multimodal models (see examples/llava)
|
// multimodal models (see examples/llava)
|
||||||
std::string mmproj = ""; // path to multimodal projector
|
std::string mmproj = ""; // path to multimodal projector // NOLINT
|
||||||
std::vector<std::string> image; // path to image file(s)
|
std::vector<std::string> image; // path to image file(s)
|
||||||
|
|
||||||
|
// embedding
|
||||||
|
bool embedding = false; // get only sentence embedding
|
||||||
|
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
||||||
|
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
|
||||||
|
std::string embd_sep = "\n"; // separator of embeddings
|
||||||
|
bool reranking = false; // enable reranking support on server
|
||||||
|
|
||||||
// server params
|
// server params
|
||||||
int32_t port = 8080; // server listens on this network port
|
int32_t port = 8080; // server listens on this network port
|
||||||
int32_t timeout_read = 600; // http read timeout in seconds
|
int32_t timeout_read = 600; // http read timeout in seconds
|
||||||
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
||||||
int32_t n_threads_http = -1; // number of threads to process HTTP requests
|
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
||||||
|
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
|
||||||
|
|
||||||
std::string hostname = "127.0.0.1";
|
std::string hostname = "127.0.0.1";
|
||||||
std::string public_path = "";
|
std::string public_path = ""; // NOLINT
|
||||||
std::string chat_template = "";
|
std::string chat_template = ""; // NOLINT
|
||||||
std::string system_prompt = "";
|
bool use_jinja = false; // NOLINT
|
||||||
|
bool enable_chat_template = true;
|
||||||
|
|
||||||
std::vector<std::string> api_keys;
|
std::vector<std::string> api_keys;
|
||||||
|
|
||||||
std::string ssl_file_key = "";
|
std::string ssl_file_key = ""; // NOLINT
|
||||||
std::string ssl_file_cert = "";
|
std::string ssl_file_cert = ""; // NOLINT
|
||||||
|
|
||||||
bool endpoint_slots = true;
|
// "advanced" endpoints are disabled by default for better security
|
||||||
|
bool webui = true;
|
||||||
|
bool endpoint_slots = false;
|
||||||
|
bool endpoint_props = false; // only control POST requests, not GET
|
||||||
bool endpoint_metrics = false;
|
bool endpoint_metrics = false;
|
||||||
|
|
||||||
bool log_json = false;
|
bool log_json = false;
|
||||||
|
@ -232,28 +391,63 @@ struct gpt_params {
|
||||||
|
|
||||||
bool process_output = false; // collect data for the output tensor
|
bool process_output = false; // collect data for the output tensor
|
||||||
bool compute_ppl = true; // whether to compute perplexity
|
bool compute_ppl = true; // whether to compute perplexity
|
||||||
|
|
||||||
|
// cvector-generator params
|
||||||
|
int n_pca_batch = 100;
|
||||||
|
int n_pca_iterations = 1000;
|
||||||
|
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
|
||||||
|
std::string cvector_outfile = "control_vector.gguf";
|
||||||
|
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
|
||||||
|
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
||||||
|
|
||||||
|
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
||||||
|
|
||||||
|
std::string lora_outfile = "ggml-lora-merged-f16.gguf";
|
||||||
|
|
||||||
|
// batched-bench params
|
||||||
|
bool batched_bench_output_jsonl = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
void gpt_params_handle_model_default(gpt_params & params);
|
// call once at the start of a program if it uses libcommon
|
||||||
|
// initializes the logging system and prints info about the build
|
||||||
|
void common_init();
|
||||||
|
|
||||||
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
|
std::string common_params_get_system_info(const common_params & params);
|
||||||
bool gpt_params_parse (int argc, char ** argv, gpt_params & params);
|
|
||||||
bool gpt_params_find_arg (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
|
|
||||||
void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
|
|
||||||
|
|
||||||
std::string gpt_params_get_system_info(const gpt_params & params);
|
bool parse_cpu_range(const std::string & range, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
||||||
|
bool parse_cpu_mask(const std::string & mask, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
||||||
|
void postprocess_cpu_params(cpu_params & cpuparams, const cpu_params * role_model = nullptr);
|
||||||
|
bool set_process_priority(enum ggml_sched_priority prio);
|
||||||
|
|
||||||
//
|
//
|
||||||
// String utils
|
// String utils
|
||||||
//
|
//
|
||||||
|
|
||||||
std::vector<std::string> string_split(std::string input, char separator);
|
#ifdef __GNUC__
|
||||||
|
#ifdef __MINGW32__
|
||||||
|
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
||||||
|
#else
|
||||||
|
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
|
||||||
|
std::string string_format(const char * fmt, ...);
|
||||||
|
|
||||||
std::string string_strip(const std::string & str);
|
std::string string_strip(const std::string & str);
|
||||||
std::string string_get_sortable_timestamp();
|
std::string string_get_sortable_timestamp();
|
||||||
|
|
||||||
|
std::string string_join(const std::vector<std::string> & values, const std::string & separator);
|
||||||
|
std::vector<std::string> string_split(const std::string & str, const std::string & delimiter);
|
||||||
|
std::string string_repeat(const std::string & str, size_t n);
|
||||||
|
|
||||||
|
void string_replace_all(std::string & s, const std::string & search, const std::string & replace);
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
static std::vector<T> string_split(const std::string & str, char delim) {
|
static std::vector<T> string_split(const std::string & str, char delim) {
|
||||||
|
static_assert(!std::is_same<T, std::string>::value, "Please use the specialized version for std::string");
|
||||||
std::vector<T> values;
|
std::vector<T> values;
|
||||||
std::istringstream str_stream(str);
|
std::istringstream str_stream(str);
|
||||||
std::string token;
|
std::string token;
|
||||||
|
@ -266,9 +460,40 @@ static std::vector<T> string_split(const std::string & str, char delim) {
|
||||||
return values;
|
return values;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
std::vector<std::string> string_split<std::string>(const std::string & input, char separator)
|
||||||
|
{
|
||||||
|
std::vector<std::string> parts;
|
||||||
|
size_t begin_pos = 0;
|
||||||
|
size_t separator_pos = input.find(separator);
|
||||||
|
while (separator_pos != std::string::npos) {
|
||||||
|
std::string part = input.substr(begin_pos, separator_pos - begin_pos);
|
||||||
|
parts.emplace_back(part);
|
||||||
|
begin_pos = separator_pos + 1;
|
||||||
|
separator_pos = input.find(separator, begin_pos);
|
||||||
|
}
|
||||||
|
parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos));
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool string_starts_with(const std::string & str,
|
||||||
|
const std::string & prefix) { // While we wait for C++20's std::string::starts_with...
|
||||||
|
return str.rfind(prefix, 0) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool string_ends_with(const std::string & str,
|
||||||
|
const std::string & suffix) { // While we wait for C++20's std::string::ends_with...
|
||||||
|
return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
||||||
void string_process_escapes(std::string & input);
|
void string_process_escapes(std::string & input);
|
||||||
|
|
||||||
|
std::string string_from(bool value);
|
||||||
|
std::string string_from(const std::vector<int> & values);
|
||||||
|
std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens);
|
||||||
|
std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Filesystem utils
|
// Filesystem utils
|
||||||
//
|
//
|
||||||
|
@ -283,108 +508,193 @@ std::string fs_get_cache_file(const std::string & filename);
|
||||||
// Model utils
|
// Model utils
|
||||||
//
|
//
|
||||||
|
|
||||||
// TODO: avoid tuplue, use struct
|
// note: defines object's lifetime
|
||||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params);
|
struct common_init_result {
|
||||||
|
llama_model_ptr model;
|
||||||
|
llama_context_ptr context;
|
||||||
|
|
||||||
struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params);
|
std::vector<llama_adapter_lora_ptr> lora;
|
||||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
};
|
||||||
|
|
||||||
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, const struct llama_model_params & params);
|
struct common_init_result common_init_from_params(common_params & params);
|
||||||
struct llama_model * llama_load_model_from_hf(const char * repo, const char * file, const char * path_model, const struct llama_model_params & params);
|
|
||||||
|
|
||||||
|
struct llama_model_params common_model_params_to_llama ( common_params & params);
|
||||||
|
struct llama_context_params common_context_params_to_llama(const common_params & params);
|
||||||
|
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
|
||||||
|
|
||||||
|
struct llama_model * common_load_model_from_url(
|
||||||
|
const std::string & model_url,
|
||||||
|
const std::string & local_path,
|
||||||
|
const std::string & hf_token,
|
||||||
|
const struct llama_model_params & params);
|
||||||
|
|
||||||
|
struct llama_model * common_load_model_from_hf(
|
||||||
|
const std::string & repo,
|
||||||
|
const std::string & remote_path,
|
||||||
|
const std::string & local_path,
|
||||||
|
const std::string & hf_token,
|
||||||
|
const struct llama_model_params & params);
|
||||||
|
|
||||||
|
std::pair<std::string, std::string> common_get_hf_file(
|
||||||
|
const std::string & hf_repo_with_tag,
|
||||||
|
const std::string & hf_token);
|
||||||
|
|
||||||
|
// clear LoRA adapters from context, then apply new list of adapters
|
||||||
|
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
|
||||||
|
|
||||||
|
//
|
||||||
// Batch utils
|
// Batch utils
|
||||||
|
//
|
||||||
|
|
||||||
void llama_batch_clear(struct llama_batch & batch);
|
void common_batch_clear(struct llama_batch & batch);
|
||||||
|
|
||||||
void llama_batch_add(
|
void common_batch_add(
|
||||||
struct llama_batch & batch,
|
struct llama_batch & batch,
|
||||||
llama_token id,
|
llama_token id,
|
||||||
llama_pos pos,
|
llama_pos pos,
|
||||||
const std::vector<llama_seq_id> & seq_ids,
|
const std::vector<llama_seq_id> & seq_ids,
|
||||||
bool logits);
|
bool logits);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Token utils
|
||||||
|
//
|
||||||
|
|
||||||
|
// longest common prefix
|
||||||
|
size_t common_lcp(const llama_tokens & a, const llama_tokens & b);
|
||||||
|
|
||||||
|
// longet common subsequence
|
||||||
|
size_t common_lcs(const llama_tokens & a, const llama_tokens & b);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Vocab utils
|
// Vocab utils
|
||||||
//
|
//
|
||||||
|
|
||||||
// tokenizes a string into a vector of tokens
|
// tokenizes a string into a vector of tokens
|
||||||
// should work similar to Python's `tokenizer.encode`
|
// should work similar to Python's `tokenizer.encode`
|
||||||
std::vector<llama_token> llama_tokenize(
|
std::vector<llama_token> common_tokenize(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
const std::string & text,
|
const std::string & text,
|
||||||
bool add_special,
|
bool add_special,
|
||||||
bool parse_special = false);
|
bool parse_special = false);
|
||||||
|
|
||||||
std::vector<llama_token> llama_tokenize(
|
std::vector<llama_token> common_tokenize(
|
||||||
const struct llama_model * model,
|
const struct llama_vocab * vocab,
|
||||||
const std::string & text,
|
const std::string & text,
|
||||||
bool add_special,
|
bool add_special,
|
||||||
bool parse_special = false);
|
bool parse_special = false);
|
||||||
|
|
||||||
// tokenizes a token into a piece, optionally renders special/control tokens
|
// tokenizes a token into a piece, optionally renders special/control tokens
|
||||||
// should work similar to Python's `tokenizer.id_to_piece`
|
// should work similar to Python's `tokenizer.id_to_piece`
|
||||||
std::string llama_token_to_piece(
|
std::string common_token_to_piece(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
llama_token token,
|
llama_token token,
|
||||||
bool special = true);
|
bool special = true);
|
||||||
|
|
||||||
// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
|
std::string common_token_to_piece(
|
||||||
// that takes into account the tokenizer type and decides how to handle the leading space
|
const struct llama_vocab * vocab,
|
||||||
//
|
llama_token token,
|
||||||
// detokenizes a vector of tokens into a string
|
bool special = true);
|
||||||
// should work similar to Python's `tokenizer.decode`
|
|
||||||
// removes the leading space from the first non-BOS token
|
|
||||||
std::string llama_detokenize_spm(
|
|
||||||
llama_context * ctx,
|
|
||||||
const std::vector<llama_token> & tokens);
|
|
||||||
|
|
||||||
// detokenizes a vector of tokens into a string
|
// detokenizes a vector of tokens into a string
|
||||||
// should work similar to Python's `tokenizer.decode`
|
// should work similar to Python's `tokenizer.decode`
|
||||||
std::string llama_detokenize_bpe(
|
// optionally renders special/control tokens
|
||||||
llama_context * ctx,
|
std::string common_detokenize(
|
||||||
const std::vector<llama_token> & tokens);
|
const struct llama_context * ctx,
|
||||||
|
const std::vector<llama_token> & tokens,
|
||||||
|
bool special = true);
|
||||||
|
|
||||||
// Uses the value from the model metadata if possible, otherwise
|
std::string common_detokenize(
|
||||||
// defaults to true when model type is SPM, otherwise false.
|
const struct llama_vocab * vocab,
|
||||||
bool llama_should_add_bos_token(const llama_model * model);
|
const std::vector<llama_token> & tokens,
|
||||||
|
bool special = true);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Chat template utils
|
// Chat template utils
|
||||||
//
|
//
|
||||||
|
|
||||||
|
struct common_tool_call {
|
||||||
|
std::string name;
|
||||||
|
std::string arguments;
|
||||||
|
std::string id;
|
||||||
|
};
|
||||||
|
|
||||||
|
// same with llama_chat_message, but uses std::string
|
||||||
|
struct common_chat_msg {
|
||||||
|
std::string role;
|
||||||
|
std::string content;
|
||||||
|
std::vector<common_tool_call> tool_calls;
|
||||||
|
std::string tool_plan = "";
|
||||||
|
};
|
||||||
|
|
||||||
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
||||||
bool llama_chat_verify_template(const std::string & tmpl);
|
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
|
||||||
|
|
||||||
|
namespace minja {
|
||||||
|
class chat_template;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef minja::chat_template common_chat_template;
|
||||||
|
|
||||||
|
struct common_chat_templates {
|
||||||
|
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
||||||
|
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
||||||
|
std::unique_ptr<common_chat_template> template_tool_use;
|
||||||
|
};
|
||||||
|
|
||||||
|
// CPP wrapper for llama_chat_apply_template
|
||||||
|
// If the built-in template is not supported, we default to chatml
|
||||||
|
// If the custom "tmpl" is not supported, we throw an error
|
||||||
|
std::string common_chat_apply_template(
|
||||||
|
const common_chat_template & tmpl,
|
||||||
|
const std::vector<common_chat_msg> & chat,
|
||||||
|
bool add_ass,
|
||||||
|
bool use_jinja);
|
||||||
|
|
||||||
|
// Format single message, while taking into account the position of that message in chat history
|
||||||
|
std::string common_chat_format_single(
|
||||||
|
const common_chat_template & tmpl,
|
||||||
|
const std::vector<common_chat_msg> & past_msg,
|
||||||
|
const common_chat_msg & new_msg,
|
||||||
|
bool add_ass,
|
||||||
|
bool use_jinja);
|
||||||
|
|
||||||
|
// Returns an example of formatted chat
|
||||||
|
std::string common_chat_format_example(
|
||||||
|
const common_chat_template & tmpl, bool use_jinja);
|
||||||
|
|
||||||
|
common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override);
|
||||||
|
|
||||||
//
|
//
|
||||||
// KV cache utils
|
// KV cache utils
|
||||||
//
|
//
|
||||||
|
|
||||||
// Dump the KV cache view with the number of sequences per cell.
|
// Dump the KV cache view with the number of sequences per cell.
|
||||||
void llama_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
||||||
|
|
||||||
// Dump the KV cache view showing individual sequences in each cell (long output).
|
// Dump the KV cache view showing individual sequences in each cell (long output).
|
||||||
void llama_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Embedding utils
|
// Embedding utils
|
||||||
//
|
//
|
||||||
|
|
||||||
void llama_embd_normalize(const float * inp, float * out, int n);
|
// TODO: repace embd_norm with an enum
|
||||||
|
void common_embd_normalize(const float * inp, float * out, int n, int embd_norm);
|
||||||
|
|
||||||
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
float common_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Control vector utils
|
// Control vector utils
|
||||||
//
|
//
|
||||||
|
|
||||||
struct llama_control_vector_data {
|
struct common_control_vector_data {
|
||||||
int n_embd;
|
int n_embd;
|
||||||
|
|
||||||
// stores data for layers [1, n_layer] where n_layer = data.size() / n_embd
|
// stores data for layers [1, n_layer] where n_layer = data.size() / n_embd
|
||||||
std::vector<float> data;
|
std::vector<float> data;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llama_control_vector_load_info {
|
struct common_control_vector_load_info {
|
||||||
float strength;
|
float strength;
|
||||||
|
|
||||||
std::string fname;
|
std::string fname;
|
||||||
|
@ -392,25 +702,16 @@ struct llama_control_vector_load_info {
|
||||||
|
|
||||||
// Load control vectors, scale each by strength, and add them together.
|
// Load control vectors, scale each by strength, and add them together.
|
||||||
// On error, returns {-1, empty}
|
// On error, returns {-1, empty}
|
||||||
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos);
|
common_control_vector_data common_control_vector_load(const std::vector<common_control_vector_load_info> & load_infos);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Split utils
|
// Split utils
|
||||||
//
|
//
|
||||||
|
|
||||||
static const char * const LLM_KV_SPLIT_NO = "split.no";
|
namespace {
|
||||||
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
|
||||||
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
|
||||||
|
|
||||||
//
|
const char * const LLM_KV_SPLIT_NO = "split.no";
|
||||||
// YAML utils
|
const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
||||||
//
|
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
||||||
|
|
||||||
void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector<float> & data);
|
|
||||||
void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector<int> & data);
|
|
||||||
void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const char * data);
|
|
||||||
|
|
||||||
void yaml_dump_non_result_info(
|
|
||||||
FILE * stream, const gpt_params & params, const llama_context * lctx,
|
|
||||||
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -94,6 +94,9 @@ namespace console {
|
||||||
simple_io = true;
|
simple_io = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (simple_io) {
|
||||||
|
_setmode(_fileno(stdin), _O_U8TEXT);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
// POSIX-specific console initialization
|
// POSIX-specific console initialization
|
||||||
if (!simple_io) {
|
if (!simple_io) {
|
||||||
|
|
|
@ -1,536 +0,0 @@
|
||||||
#include "grammar-parser.h"
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cwchar>
|
|
||||||
#include <string>
|
|
||||||
#include <utility>
|
|
||||||
#include <stdexcept>
|
|
||||||
#include <exception>
|
|
||||||
|
|
||||||
namespace grammar_parser {
|
|
||||||
// NOTE: assumes valid utf8 (but checks for overrun)
|
|
||||||
// copied from llama.cpp
|
|
||||||
static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
|
|
||||||
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
|
||||||
uint8_t first_byte = static_cast<uint8_t>(*src);
|
|
||||||
uint8_t highbits = first_byte >> 4;
|
|
||||||
int len = lookup[highbits];
|
|
||||||
uint8_t mask = (1 << (8 - len)) - 1;
|
|
||||||
uint32_t value = first_byte & mask;
|
|
||||||
const char * end = src + len; // may overrun!
|
|
||||||
const char * pos = src + 1;
|
|
||||||
for ( ; pos < end && *pos; pos++) {
|
|
||||||
value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
|
|
||||||
}
|
|
||||||
return std::make_pair(value, pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
|
||||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
|
||||||
auto result = state.symbol_ids.emplace(std::string(src, len), next_id);
|
|
||||||
return result.first->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
|
||||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
|
||||||
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
|
|
||||||
return next_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void add_rule(
|
|
||||||
parse_state & state,
|
|
||||||
uint32_t rule_id,
|
|
||||||
const std::vector<llama_grammar_element> & rule) {
|
|
||||||
if (state.rules.size() <= rule_id) {
|
|
||||||
state.rules.resize(rule_id + 1);
|
|
||||||
}
|
|
||||||
state.rules[rule_id] = rule;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool is_digit_char(char c) {
|
|
||||||
return '0' <= c && c <= '9';
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool is_word_char(char c) {
|
|
||||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || is_digit_char(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
|
|
||||||
const char * pos = src;
|
|
||||||
const char * end = src + size;
|
|
||||||
uint32_t value = 0;
|
|
||||||
for ( ; pos < end && *pos; pos++) {
|
|
||||||
value <<= 4;
|
|
||||||
char c = *pos;
|
|
||||||
if ('a' <= c && c <= 'f') {
|
|
||||||
value += c - 'a' + 10;
|
|
||||||
} else if ('A' <= c && c <= 'F') {
|
|
||||||
value += c - 'A' + 10;
|
|
||||||
} else if ('0' <= c && c <= '9') {
|
|
||||||
value += c - '0';
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (pos != end) {
|
|
||||||
throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
|
|
||||||
}
|
|
||||||
return std::make_pair(value, pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char * parse_space(const char * src, bool newline_ok) {
|
|
||||||
const char * pos = src;
|
|
||||||
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
|
|
||||||
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
|
|
||||||
if (*pos == '#') {
|
|
||||||
while (*pos && *pos != '\r' && *pos != '\n') {
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char * parse_name(const char * src) {
|
|
||||||
const char * pos = src;
|
|
||||||
while (is_word_char(*pos)) {
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
if (pos == src) {
|
|
||||||
throw std::runtime_error(std::string("expecting name at ") + src);
|
|
||||||
}
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char * parse_int(const char * src) {
|
|
||||||
const char * pos = src;
|
|
||||||
while (is_digit_char(*pos)) {
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
if (pos == src) {
|
|
||||||
throw std::runtime_error(std::string("expecting integer at ") + src);
|
|
||||||
}
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::pair<uint32_t, const char *> parse_char(const char * src) {
|
|
||||||
if (*src == '\\') {
|
|
||||||
switch (src[1]) {
|
|
||||||
case 'x': return parse_hex(src + 2, 2);
|
|
||||||
case 'u': return parse_hex(src + 2, 4);
|
|
||||||
case 'U': return parse_hex(src + 2, 8);
|
|
||||||
case 't': return std::make_pair('\t', src + 2);
|
|
||||||
case 'r': return std::make_pair('\r', src + 2);
|
|
||||||
case 'n': return std::make_pair('\n', src + 2);
|
|
||||||
case '\\':
|
|
||||||
case '"':
|
|
||||||
case '[':
|
|
||||||
case ']':
|
|
||||||
return std::make_pair(src[1], src + 2);
|
|
||||||
default:
|
|
||||||
throw std::runtime_error(std::string("unknown escape at ") + src);
|
|
||||||
}
|
|
||||||
} else if (*src) {
|
|
||||||
return decode_utf8(src);
|
|
||||||
}
|
|
||||||
throw std::runtime_error("unexpected end of input");
|
|
||||||
}
|
|
||||||
|
|
||||||
const char * parse_alternates(
|
|
||||||
parse_state & state,
|
|
||||||
const char * src,
|
|
||||||
const std::string & rule_name,
|
|
||||||
uint32_t rule_id,
|
|
||||||
bool is_nested);
|
|
||||||
|
|
||||||
static const char * parse_sequence(
|
|
||||||
parse_state & state,
|
|
||||||
const char * src,
|
|
||||||
const std::string & rule_name,
|
|
||||||
std::vector<llama_grammar_element> & out_elements,
|
|
||||||
bool is_nested) {
|
|
||||||
size_t last_sym_start = out_elements.size();
|
|
||||||
const char * pos = src;
|
|
||||||
|
|
||||||
auto handle_repetitions = [&](int min_times, int max_times) {
|
|
||||||
|
|
||||||
if (last_sym_start == out_elements.size()) {
|
|
||||||
throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
// apply transformation to previous symbol (last_sym_start to end) according to
|
|
||||||
// the following rewrite rules:
|
|
||||||
// S{m,n} --> S S S (m times) S'(n-m)
|
|
||||||
// S'(x) ::= S S'(x-1) |
|
|
||||||
// (... n-m definitions of these S' rules ...)
|
|
||||||
// S'(1) ::= S |
|
|
||||||
// S{m,} --> S S S (m times) S'
|
|
||||||
// S' ::= S S' |
|
|
||||||
// S* --> S{0,}
|
|
||||||
// --> S' ::= S S' |
|
|
||||||
// S+ --> S{1,}
|
|
||||||
// --> S S'
|
|
||||||
// S' ::= S S' |
|
|
||||||
// S? --> S{0,1}
|
|
||||||
// --> S'
|
|
||||||
// S' ::= S |
|
|
||||||
|
|
||||||
std::vector<llama_grammar_element> previous_elements(out_elements.begin() + last_sym_start, out_elements.end());
|
|
||||||
if (min_times == 0) {
|
|
||||||
out_elements.resize(last_sym_start);
|
|
||||||
} else {
|
|
||||||
// Repeat the previous elements (min_times - 1) times
|
|
||||||
for (int i = 1; i < min_times; i++) {
|
|
||||||
out_elements.insert(out_elements.end(), previous_elements.begin(), previous_elements.end());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t last_rec_rule_id = 0;
|
|
||||||
auto n_opt = max_times < 0 ? 1 : max_times - min_times;
|
|
||||||
|
|
||||||
std::vector<llama_grammar_element> rec_rule(previous_elements);
|
|
||||||
for (int i = 0; i < n_opt; i++) {
|
|
||||||
rec_rule.resize(previous_elements.size());
|
|
||||||
uint32_t rec_rule_id = generate_symbol_id(state, rule_name);
|
|
||||||
if (i > 0 || max_times < 0) {
|
|
||||||
rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
|
|
||||||
}
|
|
||||||
rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
|
|
||||||
rec_rule.push_back({LLAMA_GRETYPE_END, 0});
|
|
||||||
add_rule(state, rec_rule_id, rec_rule);
|
|
||||||
last_rec_rule_id = rec_rule_id;
|
|
||||||
}
|
|
||||||
if (n_opt > 0) {
|
|
||||||
out_elements.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
while (*pos) {
|
|
||||||
if (*pos == '"') { // literal string
|
|
||||||
pos++;
|
|
||||||
last_sym_start = out_elements.size();
|
|
||||||
while (*pos != '"') {
|
|
||||||
if (!*pos) {
|
|
||||||
throw std::runtime_error("unexpected end of input");
|
|
||||||
}
|
|
||||||
auto char_pair = parse_char(pos);
|
|
||||||
pos = char_pair.second;
|
|
||||||
out_elements.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
|
|
||||||
}
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
} else if (*pos == '[') { // char range(s)
|
|
||||||
pos++;
|
|
||||||
enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
|
|
||||||
if (*pos == '^') {
|
|
||||||
pos++;
|
|
||||||
start_type = LLAMA_GRETYPE_CHAR_NOT;
|
|
||||||
}
|
|
||||||
last_sym_start = out_elements.size();
|
|
||||||
while (*pos != ']') {
|
|
||||||
if (!*pos) {
|
|
||||||
throw std::runtime_error("unexpected end of input");
|
|
||||||
}
|
|
||||||
auto char_pair = parse_char(pos);
|
|
||||||
pos = char_pair.second;
|
|
||||||
enum llama_gretype type = last_sym_start < out_elements.size()
|
|
||||||
? LLAMA_GRETYPE_CHAR_ALT
|
|
||||||
: start_type;
|
|
||||||
|
|
||||||
out_elements.push_back({type, char_pair.first});
|
|
||||||
if (pos[0] == '-' && pos[1] != ']') {
|
|
||||||
if (!pos[1]) {
|
|
||||||
throw std::runtime_error("unexpected end of input");
|
|
||||||
}
|
|
||||||
auto endchar_pair = parse_char(pos + 1);
|
|
||||||
pos = endchar_pair.second;
|
|
||||||
out_elements.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
} else if (is_word_char(*pos)) { // rule reference
|
|
||||||
const char * name_end = parse_name(pos);
|
|
||||||
uint32_t ref_rule_id = get_symbol_id(state, pos, name_end - pos);
|
|
||||||
pos = parse_space(name_end, is_nested);
|
|
||||||
last_sym_start = out_elements.size();
|
|
||||||
out_elements.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
|
|
||||||
} else if (*pos == '(') { // grouping
|
|
||||||
// parse nested alternates into synthesized rule
|
|
||||||
pos = parse_space(pos + 1, true);
|
|
||||||
uint32_t sub_rule_id = generate_symbol_id(state, rule_name);
|
|
||||||
pos = parse_alternates(state, pos, rule_name, sub_rule_id, true);
|
|
||||||
last_sym_start = out_elements.size();
|
|
||||||
// output reference to synthesized rule
|
|
||||||
out_elements.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
|
|
||||||
if (*pos != ')') {
|
|
||||||
throw std::runtime_error(std::string("expecting ')' at ") + pos);
|
|
||||||
}
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
} else if (*pos == '.') { // any char
|
|
||||||
last_sym_start = out_elements.size();
|
|
||||||
out_elements.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
} else if (*pos == '*') {
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
handle_repetitions(0, -1);
|
|
||||||
} else if (*pos == '+') {
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
handle_repetitions(1, -1);
|
|
||||||
} else if (*pos == '?') {
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
handle_repetitions(0, 1);
|
|
||||||
} else if (*pos == '{') {
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
|
|
||||||
if (!is_digit_char(*pos)) {
|
|
||||||
throw std::runtime_error(std::string("expecting an int at ") + pos);
|
|
||||||
}
|
|
||||||
const char * int_end = parse_int(pos);
|
|
||||||
int min_times = std::stoul(std::string(pos, int_end - pos));
|
|
||||||
pos = parse_space(int_end, is_nested);
|
|
||||||
|
|
||||||
int max_times = -1;
|
|
||||||
|
|
||||||
if (*pos == '}') {
|
|
||||||
max_times = min_times;
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
} else if (*pos == ',') {
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
|
|
||||||
if (is_digit_char(*pos)) {
|
|
||||||
const char * int_end = parse_int(pos);
|
|
||||||
max_times = std::stoul(std::string(pos, int_end - pos));
|
|
||||||
pos = parse_space(int_end, is_nested);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*pos != '}') {
|
|
||||||
throw std::runtime_error(std::string("expecting '}' at ") + pos);
|
|
||||||
}
|
|
||||||
pos = parse_space(pos + 1, is_nested);
|
|
||||||
} else {
|
|
||||||
throw std::runtime_error(std::string("expecting ',' at ") + pos);
|
|
||||||
}
|
|
||||||
handle_repetitions(min_times, max_times);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char * parse_alternates(
|
|
||||||
parse_state & state,
|
|
||||||
const char * src,
|
|
||||||
const std::string & rule_name,
|
|
||||||
uint32_t rule_id,
|
|
||||||
bool is_nested) {
|
|
||||||
std::vector<llama_grammar_element> rule;
|
|
||||||
const char * pos = parse_sequence(state, src, rule_name, rule, is_nested);
|
|
||||||
while (*pos == '|') {
|
|
||||||
rule.push_back({LLAMA_GRETYPE_ALT, 0});
|
|
||||||
pos = parse_space(pos + 1, true);
|
|
||||||
pos = parse_sequence(state, pos, rule_name, rule, is_nested);
|
|
||||||
}
|
|
||||||
rule.push_back({LLAMA_GRETYPE_END, 0});
|
|
||||||
add_rule(state, rule_id, rule);
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char * parse_rule(parse_state & state, const char * src) {
|
|
||||||
const char * name_end = parse_name(src);
|
|
||||||
const char * pos = parse_space(name_end, false);
|
|
||||||
size_t name_len = name_end - src;
|
|
||||||
uint32_t rule_id = get_symbol_id(state, src, name_len);
|
|
||||||
const std::string name(src, name_len);
|
|
||||||
|
|
||||||
if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
|
|
||||||
throw std::runtime_error(std::string("expecting ::= at ") + pos);
|
|
||||||
}
|
|
||||||
pos = parse_space(pos + 3, true);
|
|
||||||
|
|
||||||
pos = parse_alternates(state, pos, name, rule_id, false);
|
|
||||||
|
|
||||||
if (*pos == '\r') {
|
|
||||||
pos += pos[1] == '\n' ? 2 : 1;
|
|
||||||
} else if (*pos == '\n') {
|
|
||||||
pos++;
|
|
||||||
} else if (*pos) {
|
|
||||||
throw std::runtime_error(std::string("expecting newline or end at ") + pos);
|
|
||||||
}
|
|
||||||
return parse_space(pos, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
parse_state parse(const char * src) {
|
|
||||||
try {
|
|
||||||
parse_state state;
|
|
||||||
const char * pos = parse_space(src, true);
|
|
||||||
while (*pos) {
|
|
||||||
pos = parse_rule(state, pos);
|
|
||||||
}
|
|
||||||
// Validate the state to ensure that all rules are defined
|
|
||||||
for (const auto & rule : state.rules) {
|
|
||||||
for (const auto & elem : rule) {
|
|
||||||
if (elem.type == LLAMA_GRETYPE_RULE_REF) {
|
|
||||||
// Ensure that the rule at that location exists
|
|
||||||
if (elem.value >= state.rules.size() || state.rules[elem.value].empty()) {
|
|
||||||
// Get the name of the rule that is missing
|
|
||||||
for (const auto & kv : state.symbol_ids) {
|
|
||||||
if (kv.second == elem.value) {
|
|
||||||
throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return state;
|
|
||||||
} catch (const std::exception & err) {
|
|
||||||
fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
|
|
||||||
return parse_state();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void print_grammar_char(FILE * file, uint32_t c) {
|
|
||||||
if (0x20 <= c && c <= 0x7f) {
|
|
||||||
fprintf(file, "%c", static_cast<char>(c));
|
|
||||||
} else {
|
|
||||||
// cop out of encoding UTF-8
|
|
||||||
fprintf(file, "<U+%04X>", c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool is_char_element(llama_grammar_element elem) {
|
|
||||||
switch (elem.type) {
|
|
||||||
case LLAMA_GRETYPE_CHAR: return true;
|
|
||||||
case LLAMA_GRETYPE_CHAR_NOT: return true;
|
|
||||||
case LLAMA_GRETYPE_CHAR_ALT: return true;
|
|
||||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
|
|
||||||
case LLAMA_GRETYPE_CHAR_ANY: return true;
|
|
||||||
default: return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
|
|
||||||
for (auto elem : rule) {
|
|
||||||
switch (elem.type) {
|
|
||||||
case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
|
|
||||||
case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break;
|
|
||||||
case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
|
|
||||||
case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
|
|
||||||
case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
|
|
||||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
|
|
||||||
case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
|
|
||||||
case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
|
|
||||||
}
|
|
||||||
switch (elem.type) {
|
|
||||||
case LLAMA_GRETYPE_END:
|
|
||||||
case LLAMA_GRETYPE_ALT:
|
|
||||||
case LLAMA_GRETYPE_RULE_REF:
|
|
||||||
fprintf(file, "(%u) ", elem.value);
|
|
||||||
break;
|
|
||||||
case LLAMA_GRETYPE_CHAR:
|
|
||||||
case LLAMA_GRETYPE_CHAR_NOT:
|
|
||||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
|
|
||||||
case LLAMA_GRETYPE_CHAR_ALT:
|
|
||||||
case LLAMA_GRETYPE_CHAR_ANY:
|
|
||||||
fprintf(file, "(\"");
|
|
||||||
print_grammar_char(file, elem.value);
|
|
||||||
fprintf(file, "\") ");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fprintf(file, "\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void print_rule(
|
|
||||||
FILE * file,
|
|
||||||
uint32_t rule_id,
|
|
||||||
const std::vector<llama_grammar_element> & rule,
|
|
||||||
const std::map<uint32_t, std::string> & symbol_id_names) {
|
|
||||||
if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) {
|
|
||||||
throw std::runtime_error(
|
|
||||||
"malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id));
|
|
||||||
}
|
|
||||||
fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
|
|
||||||
for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
|
|
||||||
llama_grammar_element elem = rule[i];
|
|
||||||
switch (elem.type) {
|
|
||||||
case LLAMA_GRETYPE_END:
|
|
||||||
throw std::runtime_error(
|
|
||||||
"unexpected end of rule: " + std::to_string(rule_id) + "," +
|
|
||||||
std::to_string(i));
|
|
||||||
case LLAMA_GRETYPE_ALT:
|
|
||||||
fprintf(file, "| ");
|
|
||||||
break;
|
|
||||||
case LLAMA_GRETYPE_RULE_REF:
|
|
||||||
fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
|
|
||||||
break;
|
|
||||||
case LLAMA_GRETYPE_CHAR:
|
|
||||||
fprintf(file, "[");
|
|
||||||
print_grammar_char(file, elem.value);
|
|
||||||
break;
|
|
||||||
case LLAMA_GRETYPE_CHAR_NOT:
|
|
||||||
fprintf(file, "[^");
|
|
||||||
print_grammar_char(file, elem.value);
|
|
||||||
break;
|
|
||||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
|
|
||||||
if (i == 0 || !is_char_element(rule[i - 1])) {
|
|
||||||
throw std::runtime_error(
|
|
||||||
"LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
|
|
||||||
std::to_string(rule_id) + "," + std::to_string(i));
|
|
||||||
}
|
|
||||||
fprintf(file, "-");
|
|
||||||
print_grammar_char(file, elem.value);
|
|
||||||
break;
|
|
||||||
case LLAMA_GRETYPE_CHAR_ALT:
|
|
||||||
if (i == 0 || !is_char_element(rule[i - 1])) {
|
|
||||||
throw std::runtime_error(
|
|
||||||
"LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
|
|
||||||
std::to_string(rule_id) + "," + std::to_string(i));
|
|
||||||
}
|
|
||||||
print_grammar_char(file, elem.value);
|
|
||||||
break;
|
|
||||||
case LLAMA_GRETYPE_CHAR_ANY:
|
|
||||||
fprintf(file, ".");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (is_char_element(elem)) {
|
|
||||||
switch (rule[i + 1].type) {
|
|
||||||
case LLAMA_GRETYPE_CHAR_ALT:
|
|
||||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
|
|
||||||
case LLAMA_GRETYPE_CHAR_ANY:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fprintf(file, "] ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fprintf(file, "\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_grammar(FILE * file, const parse_state & state) {
|
|
||||||
try {
|
|
||||||
std::map<uint32_t, std::string> symbol_id_names;
|
|
||||||
for (const auto & kv : state.symbol_ids) {
|
|
||||||
symbol_id_names[kv.second] = kv.first;
|
|
||||||
}
|
|
||||||
for (size_t i = 0, end = state.rules.size(); i < end; i++) {
|
|
||||||
// fprintf(file, "%zu: ", i);
|
|
||||||
// print_rule_binary(file, state.rules[i]);
|
|
||||||
print_rule(file, uint32_t(i), state.rules[i], symbol_id_names);
|
|
||||||
// fprintf(file, "\n");
|
|
||||||
}
|
|
||||||
} catch (const std::exception & err) {
|
|
||||||
fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<const llama_grammar_element *> parse_state::c_rules() {
|
|
||||||
std::vector<const llama_grammar_element *> ret;
|
|
||||||
ret.reserve(rules.size());
|
|
||||||
for (const auto & rule : rules) {
|
|
||||||
ret.push_back(rule.data());
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,29 +0,0 @@
|
||||||
// Implements a parser for an extended Backus-Naur form (BNF), producing the
|
|
||||||
// binary context-free grammar format specified by llama.h. Supports character
|
|
||||||
// ranges, grouping, and repetition operators. As an example, a grammar for
|
|
||||||
// arithmetic might look like:
|
|
||||||
//
|
|
||||||
// root ::= expr
|
|
||||||
// expr ::= term ([-+*/] term)*
|
|
||||||
// term ::= num | "(" space expr ")" space
|
|
||||||
// num ::= [0-9]+ space
|
|
||||||
// space ::= [ \t\n]*
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
#include "llama.h"
|
|
||||||
#include <vector>
|
|
||||||
#include <map>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
namespace grammar_parser {
|
|
||||||
struct parse_state {
|
|
||||||
std::map<std::string, uint32_t> symbol_ids;
|
|
||||||
std::vector<std::vector<llama_grammar_element>> rules;
|
|
||||||
|
|
||||||
std::vector<const llama_grammar_element *> c_rules();
|
|
||||||
};
|
|
||||||
|
|
||||||
parse_state parse(const char * src);
|
|
||||||
void print_grammar(FILE * file, const parse_state & state);
|
|
||||||
}
|
|
|
@ -1,4 +1,6 @@
|
||||||
#include "json-schema-to-grammar.h"
|
#include "json-schema-to-grammar.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
@ -11,11 +13,6 @@
|
||||||
|
|
||||||
using json = nlohmann::ordered_json;
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
template <typename Iterator>
|
|
||||||
static std::string join(Iterator begin, Iterator end, const std::string & separator);
|
|
||||||
|
|
||||||
static std::string repeat(const std::string & str, size_t n);
|
|
||||||
|
|
||||||
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
||||||
auto has_max = max_items != std::numeric_limits<int>::max();
|
auto has_max = max_items != std::numeric_limits<int>::max();
|
||||||
|
|
||||||
|
@ -40,6 +37,233 @@ static std::string build_repetition(const std::string & item_rule, int min_items
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */
|
||||||
|
class string_view {
|
||||||
|
const std::string & _str;
|
||||||
|
const size_t _start;
|
||||||
|
const size_t _end;
|
||||||
|
public:
|
||||||
|
string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {}
|
||||||
|
|
||||||
|
size_t size() const {
|
||||||
|
return _end - _start;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t length() const {
|
||||||
|
return size();
|
||||||
|
}
|
||||||
|
|
||||||
|
operator std::string() const {
|
||||||
|
return str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string str() const {
|
||||||
|
return _str.substr(_start, _end - _start);
|
||||||
|
}
|
||||||
|
|
||||||
|
string_view substr(size_t pos, size_t len = std::string::npos) const {
|
||||||
|
return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len);
|
||||||
|
}
|
||||||
|
|
||||||
|
char operator[](size_t pos) const {
|
||||||
|
auto index = _start + pos;
|
||||||
|
if (index >= _end) {
|
||||||
|
throw std::out_of_range("string_view index out of range");
|
||||||
|
}
|
||||||
|
return _str[_start + pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const string_view & other) const {
|
||||||
|
std::string this_str = *this;
|
||||||
|
std::string other_str = other;
|
||||||
|
return this_str == other_str;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
|
||||||
|
auto has_min = min_value != std::numeric_limits<int>::min();
|
||||||
|
auto has_max = max_value != std::numeric_limits<int>::max();
|
||||||
|
|
||||||
|
auto digit_range = [&](char from, char to) {
|
||||||
|
out << "[";
|
||||||
|
if (from == to) {
|
||||||
|
out << from;
|
||||||
|
} else {
|
||||||
|
out << from << "-" << to;
|
||||||
|
}
|
||||||
|
out << "]";
|
||||||
|
};
|
||||||
|
auto more_digits = [&](int min_digits, int max_digits) {
|
||||||
|
out << "[0-9]";
|
||||||
|
if (min_digits == max_digits && min_digits == 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
out << "{";
|
||||||
|
out << min_digits;
|
||||||
|
if (max_digits != min_digits) {
|
||||||
|
out << ",";
|
||||||
|
if (max_digits != std::numeric_limits<int>::max()) {
|
||||||
|
out << max_digits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out << "}";
|
||||||
|
};
|
||||||
|
std::function<void(const string_view &, const string_view &)> uniform_range =
|
||||||
|
[&](const string_view & from, const string_view & to) {
|
||||||
|
size_t i = 0;
|
||||||
|
while (i < from.length() && i < to.length() && from[i] == to[i]) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
if (i > 0) {
|
||||||
|
out << "\"" << from.substr(0, i).str() << "\"";
|
||||||
|
}
|
||||||
|
if (i < from.length() && i < to.length()) {
|
||||||
|
if (i > 0) {
|
||||||
|
out << " ";
|
||||||
|
}
|
||||||
|
auto sub_len = from.length() - i - 1;
|
||||||
|
if (sub_len > 0) {
|
||||||
|
auto from_sub = from.substr(i + 1);
|
||||||
|
auto to_sub = to.substr(i + 1);
|
||||||
|
auto sub_zeros = string_repeat("0", sub_len);
|
||||||
|
auto sub_nines = string_repeat("9", sub_len);
|
||||||
|
|
||||||
|
auto to_reached = false;
|
||||||
|
out << "(";
|
||||||
|
if (from_sub == sub_zeros) {
|
||||||
|
digit_range(from[i], to[i] - 1);
|
||||||
|
out << " ";
|
||||||
|
more_digits(sub_len, sub_len);
|
||||||
|
} else {
|
||||||
|
out << "[" << from[i] << "] ";
|
||||||
|
out << "(";
|
||||||
|
uniform_range(from_sub, sub_nines);
|
||||||
|
out << ")";
|
||||||
|
if (from[i] < to[i] - 1) {
|
||||||
|
out << " | ";
|
||||||
|
if (to_sub == sub_nines) {
|
||||||
|
digit_range(from[i] + 1, to[i]);
|
||||||
|
to_reached = true;
|
||||||
|
} else {
|
||||||
|
digit_range(from[i] + 1, to[i] - 1);
|
||||||
|
}
|
||||||
|
out << " ";
|
||||||
|
more_digits(sub_len, sub_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!to_reached) {
|
||||||
|
out << " | ";
|
||||||
|
digit_range(to[i], to[i]);
|
||||||
|
out << " ";
|
||||||
|
uniform_range(sub_zeros, to_sub);
|
||||||
|
}
|
||||||
|
out << ")";
|
||||||
|
} else {
|
||||||
|
out << "[" << from[i] << "-" << to[i] << "]";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (has_min && has_max) {
|
||||||
|
if (min_value < 0 && max_value < 0) {
|
||||||
|
out << "\"-\" (";
|
||||||
|
_build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
|
||||||
|
out << ")";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (min_value < 0) {
|
||||||
|
out << "\"-\" (";
|
||||||
|
_build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
|
||||||
|
out << ") | ";
|
||||||
|
min_value = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto min_s = std::to_string(min_value);
|
||||||
|
auto max_s = std::to_string(max_value);
|
||||||
|
auto min_digits = min_s.length();
|
||||||
|
auto max_digits = max_s.length();
|
||||||
|
|
||||||
|
for (auto digits = min_digits; digits < max_digits; digits++) {
|
||||||
|
uniform_range(min_s, string_repeat("9", digits));
|
||||||
|
min_s = "1" + string_repeat("0", digits);
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
uniform_range(min_s, max_s);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto less_decimals = std::max(decimals_left - 1, 1);
|
||||||
|
|
||||||
|
if (has_min) {
|
||||||
|
if (min_value < 0) {
|
||||||
|
out << "\"-\" (";
|
||||||
|
_build_min_max_int(std::numeric_limits<int>::min(), -min_value, out, decimals_left, /* top_level= */ false);
|
||||||
|
out << ") | [0] | [1-9] ";
|
||||||
|
more_digits(0, decimals_left - 1);
|
||||||
|
} else if (min_value == 0) {
|
||||||
|
if (top_level) {
|
||||||
|
out << "[0] | [1-9] ";
|
||||||
|
more_digits(0, less_decimals);
|
||||||
|
} else {
|
||||||
|
more_digits(1, decimals_left);
|
||||||
|
}
|
||||||
|
} else if (min_value <= 9) {
|
||||||
|
char c = '0' + min_value;
|
||||||
|
auto range_start = top_level ? '1' : '0';
|
||||||
|
if (c > range_start) {
|
||||||
|
digit_range(range_start, c - 1);
|
||||||
|
out << " ";
|
||||||
|
more_digits(1, less_decimals);
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
digit_range(c, '9');
|
||||||
|
out << " ";
|
||||||
|
more_digits(0, less_decimals);
|
||||||
|
} else {
|
||||||
|
auto min_s = std::to_string(min_value);
|
||||||
|
auto len = min_s.length();
|
||||||
|
auto c = min_s[0];
|
||||||
|
|
||||||
|
if (c > '1') {
|
||||||
|
digit_range(top_level ? '1' : '0', c - 1);
|
||||||
|
out << " ";
|
||||||
|
more_digits(len, less_decimals);
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
digit_range(c, c);
|
||||||
|
out << " (";
|
||||||
|
_build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits<int>::max(), out, less_decimals, /* top_level= */ false);
|
||||||
|
out << ")";
|
||||||
|
if (c < '9') {
|
||||||
|
out << " | ";
|
||||||
|
digit_range(c + 1, '9');
|
||||||
|
out << " ";
|
||||||
|
more_digits(len - 1, less_decimals);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_max) {
|
||||||
|
if (max_value >= 0) {
|
||||||
|
if (top_level) {
|
||||||
|
out << "\"-\" [1-9] ";
|
||||||
|
more_digits(0, less_decimals);
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
|
||||||
|
} else {
|
||||||
|
out << "\"-\" (";
|
||||||
|
_build_min_max_int(-max_value, std::numeric_limits<int>::max(), out, decimals_left, /* top_level= */ false);
|
||||||
|
out << ")";
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw std::runtime_error("At least one of min_value or max_value must be set");
|
||||||
|
}
|
||||||
|
|
||||||
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
|
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
|
||||||
|
|
||||||
struct BuiltinRule {
|
struct BuiltinRule {
|
||||||
|
@ -89,50 +313,7 @@ std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
||||||
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
||||||
|
|
||||||
template <typename Iterator>
|
|
||||||
std::string join(Iterator begin, Iterator end, const std::string & separator) {
|
|
||||||
std::ostringstream result;
|
|
||||||
if (begin != end) {
|
|
||||||
result << *begin;
|
|
||||||
for (Iterator it = begin + 1; it != end; ++it) {
|
|
||||||
result << separator << *it;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<std::string> split(const std::string & str, const std::string & delimiter) {
|
|
||||||
std::vector<std::string> tokens;
|
|
||||||
size_t start = 0;
|
|
||||||
size_t end = str.find(delimiter);
|
|
||||||
|
|
||||||
while (end != std::string::npos) {
|
|
||||||
tokens.push_back(str.substr(start, end - start));
|
|
||||||
start = end + delimiter.length();
|
|
||||||
end = str.find(delimiter, start);
|
|
||||||
}
|
|
||||||
|
|
||||||
tokens.push_back(str.substr(start));
|
|
||||||
|
|
||||||
return tokens;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string repeat(const std::string & str, size_t n) {
|
|
||||||
if (n == 0) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string result;
|
|
||||||
result.reserve(str.length() * n);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < n; ++i) {
|
|
||||||
result += str;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
|
static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
|
||||||
std::smatch match;
|
std::smatch match;
|
||||||
|
@ -160,9 +341,9 @@ static std::string format_literal(const std::string & literal) {
|
||||||
return "\"" + escaped + "\"";
|
return "\"" + escaped + "\"";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SchemaConverter {
|
class SchemaConverter {
|
||||||
private:
|
private:
|
||||||
|
friend std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options);
|
||||||
std::function<json(const std::string &)> _fetch_json;
|
std::function<json(const std::string &)> _fetch_json;
|
||||||
bool _dotall;
|
bool _dotall;
|
||||||
std::map<std::string, std::string> _rules;
|
std::map<std::string, std::string> _rules;
|
||||||
|
@ -192,7 +373,7 @@ private:
|
||||||
for (size_t i = 0; i < alt_schemas.size(); i++) {
|
for (size_t i = 0; i < alt_schemas.size(); i++) {
|
||||||
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
|
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
|
||||||
}
|
}
|
||||||
return join(rules.begin(), rules.end(), " | ");
|
return string_join(rules, " | ");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string _visit_pattern(const std::string & pattern, const std::string & name) {
|
std::string _visit_pattern(const std::string & pattern, const std::string & name) {
|
||||||
|
@ -255,7 +436,7 @@ private:
|
||||||
for (const auto & item : ret) {
|
for (const auto & item : ret) {
|
||||||
results.push_back(to_rule(item));
|
results.push_back(to_rule(item));
|
||||||
}
|
}
|
||||||
return std::make_pair(join(results.begin(), results.end(), " "), false);
|
return std::make_pair(string_join(results, " "), false);
|
||||||
};
|
};
|
||||||
|
|
||||||
while (i < length) {
|
while (i < length) {
|
||||||
|
@ -313,7 +494,7 @@ private:
|
||||||
}
|
}
|
||||||
curly_brackets += '}';
|
curly_brackets += '}';
|
||||||
i++;
|
i++;
|
||||||
auto nums = split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
|
auto nums = string_split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
|
||||||
int min_times = 0;
|
int min_times = 0;
|
||||||
int max_times = std::numeric_limits<int>::max();
|
int max_times = std::numeric_limits<int>::max();
|
||||||
try {
|
try {
|
||||||
|
@ -385,7 +566,76 @@ private:
|
||||||
}
|
}
|
||||||
return join_seq();
|
return join_seq();
|
||||||
};
|
};
|
||||||
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
|
return _add_rule(name, "\"\\\"\" (" + to_rule(transform()) + ") \"\\\"\" space");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Returns a rule that matches a JSON string that is none of the provided strings
|
||||||
|
|
||||||
|
not_strings({"a"})
|
||||||
|
-> ["] ( [a] char+ | [^"a] char* )? ["] space
|
||||||
|
not_strings({"and", "also"})
|
||||||
|
-> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
|
||||||
|
*/
|
||||||
|
std::string _not_strings(const std::vector<std::string> & strings) {
|
||||||
|
|
||||||
|
struct TrieNode {
|
||||||
|
std::map<char, TrieNode> children;
|
||||||
|
bool is_end_of_string;
|
||||||
|
|
||||||
|
TrieNode() : is_end_of_string(false) {}
|
||||||
|
|
||||||
|
void insert(const std::string & string) {
|
||||||
|
auto node = this;
|
||||||
|
for (char c : string) {
|
||||||
|
node = &node->children[c];
|
||||||
|
}
|
||||||
|
node->is_end_of_string = true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TrieNode trie;
|
||||||
|
for (const auto & s : strings) {
|
||||||
|
trie.insert(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
|
||||||
|
std::ostringstream out;
|
||||||
|
out << "[\"] ( ";
|
||||||
|
std::function<void(const TrieNode &)> visit = [&](const TrieNode & node) {
|
||||||
|
std::ostringstream rejects;
|
||||||
|
auto first = true;
|
||||||
|
for (const auto & kv : node.children) {
|
||||||
|
rejects << kv.first;
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
out << "[" << kv.first << "]";
|
||||||
|
if (!kv.second.children.empty()) {
|
||||||
|
out << " (";
|
||||||
|
visit(kv.second);
|
||||||
|
out << ")";
|
||||||
|
} else if (kv.second.is_end_of_string) {
|
||||||
|
out << " " << char_rule << "+";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!node.children.empty()) {
|
||||||
|
if (!first) {
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
out << "[^\"" << rejects.str() << "] " << char_rule << "*";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
visit(trie);
|
||||||
|
|
||||||
|
out << " )";
|
||||||
|
if (!trie.is_end_of_string) {
|
||||||
|
out << "?";
|
||||||
|
}
|
||||||
|
out << " [\"] space";
|
||||||
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string _resolve_ref(const std::string & ref) {
|
std::string _resolve_ref(const std::string & ref) {
|
||||||
|
@ -408,6 +658,7 @@ private:
|
||||||
std::vector<std::string> required_props;
|
std::vector<std::string> required_props;
|
||||||
std::vector<std::string> optional_props;
|
std::vector<std::string> optional_props;
|
||||||
std::unordered_map<std::string, std::string> prop_kv_rule_names;
|
std::unordered_map<std::string, std::string> prop_kv_rule_names;
|
||||||
|
std::vector<std::string> prop_names;
|
||||||
for (const auto & kv : properties) {
|
for (const auto & kv : properties) {
|
||||||
const auto &prop_name = kv.first;
|
const auto &prop_name = kv.first;
|
||||||
const auto &prop_schema = kv.second;
|
const auto &prop_schema = kv.second;
|
||||||
|
@ -422,11 +673,18 @@ private:
|
||||||
} else {
|
} else {
|
||||||
optional_props.push_back(prop_name);
|
optional_props.push_back(prop_name);
|
||||||
}
|
}
|
||||||
|
prop_names.push_back(prop_name);
|
||||||
}
|
}
|
||||||
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
|
if ((additional_properties.is_boolean() && additional_properties.get<bool>()) || additional_properties.is_object()) {
|
||||||
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
||||||
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
|
std::string value_rule =
|
||||||
std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
additional_properties.is_object() ? visit(additional_properties, sub_name + "-value")
|
||||||
|
: _add_primitive("value", PRIMITIVE_RULES.at("value"));
|
||||||
|
|
||||||
|
auto key_rule =
|
||||||
|
prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string"))
|
||||||
|
: _add_rule(sub_name + "-k", _not_strings(prop_names));
|
||||||
|
std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule);
|
||||||
prop_kv_rule_names["*"] = kv_rule;
|
prop_kv_rule_names["*"] = kv_rule;
|
||||||
optional_props.push_back("*");
|
optional_props.push_back("*");
|
||||||
}
|
}
|
||||||
|
@ -452,15 +710,11 @@ private:
|
||||||
}
|
}
|
||||||
std::string k = ks[0];
|
std::string k = ks[0];
|
||||||
std::string kv_rule_name = prop_kv_rule_names[k];
|
std::string kv_rule_name = prop_kv_rule_names[k];
|
||||||
if (k == "*") {
|
std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
|
||||||
res = _add_rule(
|
if (first_is_optional) {
|
||||||
name + (name.empty() ? "" : "-") + "additional-kvs",
|
res = comma_ref + (k == "*" ? "*" : "?");
|
||||||
kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
|
|
||||||
);
|
|
||||||
} else if (first_is_optional) {
|
|
||||||
res = "( \",\" space " + kv_rule_name + " )?";
|
|
||||||
} else {
|
} else {
|
||||||
res = kv_rule_name;
|
res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : "");
|
||||||
}
|
}
|
||||||
if (ks.size() > 1) {
|
if (ks.size() > 1) {
|
||||||
res += " " + _add_rule(
|
res += " " + _add_rule(
|
||||||
|
@ -510,10 +764,11 @@ private:
|
||||||
public:
|
public:
|
||||||
SchemaConverter(
|
SchemaConverter(
|
||||||
const std::function<json(const std::string &)> & fetch_json,
|
const std::function<json(const std::string &)> & fetch_json,
|
||||||
bool dotall)
|
bool dotall,
|
||||||
|
bool compact_spaces)
|
||||||
: _fetch_json(fetch_json), _dotall(dotall)
|
: _fetch_json(fetch_json), _dotall(dotall)
|
||||||
{
|
{
|
||||||
_rules["space"] = SPACE_RULE;
|
_rules["space"] = compact_spaces ? "\" \"?" : SPACE_RULE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void resolve_refs(json & schema, const std::string & url) {
|
void resolve_refs(json & schema, const std::string & url) {
|
||||||
|
@ -555,7 +810,7 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::string pointer = ref.substr(ref.find('#') + 1);
|
std::string pointer = ref.substr(ref.find('#') + 1);
|
||||||
std::vector<std::string> tokens = split(pointer, "/");
|
std::vector<std::string> tokens = string_split(pointer, "/");
|
||||||
for (size_t i = 1; i < tokens.size(); ++i) {
|
for (size_t i = 1; i < tokens.size(); ++i) {
|
||||||
std::string sel = tokens[i];
|
std::string sel = tokens[i];
|
||||||
if (target.is_null() || !target.contains(sel)) {
|
if (target.is_null() || !target.contains(sel)) {
|
||||||
|
@ -594,17 +849,19 @@ public:
|
||||||
} else if (schema_type.is_array()) {
|
} else if (schema_type.is_array()) {
|
||||||
std::vector<json> schema_types;
|
std::vector<json> schema_types;
|
||||||
for (const auto & t : schema_type) {
|
for (const auto & t : schema_type) {
|
||||||
schema_types.push_back({{"type", t}});
|
json schema_copy(schema);
|
||||||
|
schema_copy["type"] = t;
|
||||||
|
schema_types.push_back(schema_copy);
|
||||||
}
|
}
|
||||||
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
|
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
|
||||||
} else if (schema.contains("const")) {
|
} else if (schema.contains("const")) {
|
||||||
return _add_rule(rule_name, _generate_constant_rule(schema["const"]));
|
return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
|
||||||
} else if (schema.contains("enum")) {
|
} else if (schema.contains("enum")) {
|
||||||
std::vector<std::string> enum_values;
|
std::vector<std::string> enum_values;
|
||||||
for (const auto & v : schema["enum"]) {
|
for (const auto & v : schema["enum"]) {
|
||||||
enum_values.push_back(_generate_constant_rule(v));
|
enum_values.push_back(_generate_constant_rule(v));
|
||||||
}
|
}
|
||||||
return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
|
return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space");
|
||||||
} else if ((schema_type.is_null() || schema_type == "object")
|
} else if ((schema_type.is_null() || schema_type == "object")
|
||||||
&& (schema.contains("properties") ||
|
&& (schema.contains("properties") ||
|
||||||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
||||||
|
@ -686,6 +943,24 @@ public:
|
||||||
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
|
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
|
||||||
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
||||||
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
||||||
|
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
|
||||||
|
int min_value = std::numeric_limits<int>::min();
|
||||||
|
int max_value = std::numeric_limits<int>::max();
|
||||||
|
if (schema.contains("minimum")) {
|
||||||
|
min_value = schema["minimum"].get<int>();
|
||||||
|
} else if (schema.contains("exclusiveMinimum")) {
|
||||||
|
min_value = schema["exclusiveMinimum"].get<int>() + 1;
|
||||||
|
}
|
||||||
|
if (schema.contains("maximum")) {
|
||||||
|
max_value = schema["maximum"].get<int>();
|
||||||
|
} else if (schema.contains("exclusiveMaximum")) {
|
||||||
|
max_value = schema["exclusiveMaximum"].get<int>() - 1;
|
||||||
|
}
|
||||||
|
std::stringstream out;
|
||||||
|
out << "(";
|
||||||
|
_build_min_max_int(min_value, max_value, out);
|
||||||
|
out << ") space";
|
||||||
|
return _add_rule(rule_name, out.str());
|
||||||
} else if (schema.empty() || schema_type == "object") {
|
} else if (schema.empty() || schema_type == "object") {
|
||||||
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
||||||
} else {
|
} else {
|
||||||
|
@ -700,10 +975,10 @@ public:
|
||||||
|
|
||||||
void check_errors() {
|
void check_errors() {
|
||||||
if (!_errors.empty()) {
|
if (!_errors.empty()) {
|
||||||
throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n"));
|
throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
|
||||||
}
|
}
|
||||||
if (!_warnings.empty()) {
|
if (!_warnings.empty()) {
|
||||||
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", join(_warnings.begin(), _warnings.end(), "; ").c_str());
|
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -716,11 +991,35 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string json_schema_to_grammar(const json & schema) {
|
std::string json_schema_to_grammar(const json & schema, bool force_gbnf) {
|
||||||
SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false);
|
#ifdef LLAMA_USE_LLGUIDANCE
|
||||||
auto copy = schema;
|
if (!force_gbnf) {
|
||||||
converter.resolve_refs(copy, "input");
|
return "%llguidance {}\nstart: %json " + schema.dump();
|
||||||
converter.visit(copy, "");
|
}
|
||||||
|
#else
|
||||||
|
(void)force_gbnf;
|
||||||
|
#endif // LLAMA_USE_LLGUIDANCE
|
||||||
|
return build_grammar([&](const common_grammar_builder & callbacks) {
|
||||||
|
auto copy = schema;
|
||||||
|
callbacks.resolve_refs(copy);
|
||||||
|
callbacks.add_schema("", copy);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options) {
|
||||||
|
SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall, options.compact_spaces);
|
||||||
|
common_grammar_builder builder {
|
||||||
|
/* .add_rule = */ [&](const std::string & name, const std::string & rule) {
|
||||||
|
return converter._add_rule(name, rule);
|
||||||
|
},
|
||||||
|
/* .add_schema = */ [&](const std::string & name, const nlohmann::ordered_json & schema) {
|
||||||
|
return converter.visit(schema, name == "root" ? "" : name);
|
||||||
|
},
|
||||||
|
/* .resolve_refs = */ [&](nlohmann::ordered_json & schema) {
|
||||||
|
converter.resolve_refs(schema, "");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
cb(builder);
|
||||||
converter.check_errors();
|
converter.check_errors();
|
||||||
return converter.format_grammar();
|
return converter.format_grammar();
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,4 +5,18 @@
|
||||||
#define JSON_ASSERT GGML_ASSERT
|
#define JSON_ASSERT GGML_ASSERT
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);
|
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
|
||||||
|
bool force_gbnf = false);
|
||||||
|
|
||||||
|
struct common_grammar_builder {
|
||||||
|
std::function<std::string(const std::string &, const std::string &)> add_rule;
|
||||||
|
std::function<std::string(const std::string &, const nlohmann::ordered_json &)> add_schema;
|
||||||
|
std::function<void(nlohmann::ordered_json &)> resolve_refs;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct common_grammar_options {
|
||||||
|
bool dotall = false;
|
||||||
|
bool compact_spaces = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options = {});
|
||||||
|
|
270
common/llguidance.cpp
Normal file
270
common/llguidance.cpp
Normal file
|
@ -0,0 +1,270 @@
|
||||||
|
#include "sampling.h"
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
|
#ifdef LLAMA_USE_LLGUIDANCE
|
||||||
|
|
||||||
|
# include "llguidance.h"
|
||||||
|
# include <cmath>
|
||||||
|
|
||||||
|
struct llama_sampler_llg {
|
||||||
|
const llama_vocab * vocab;
|
||||||
|
std::string grammar_kind;
|
||||||
|
std::string grammar_data;
|
||||||
|
LlgTokenizer * tokenizer;
|
||||||
|
LlgConstraint * grammar;
|
||||||
|
LlgMaskResult llg_res;
|
||||||
|
bool has_llg_res;
|
||||||
|
};
|
||||||
|
|
||||||
|
static LlgConstraint * llama_sampler_llg_new(LlgTokenizer * tokenizer, const char * grammar_kind,
|
||||||
|
const char * grammar_data) {
|
||||||
|
LlgConstraintInit cinit;
|
||||||
|
llg_constraint_init_set_defaults(&cinit, tokenizer);
|
||||||
|
const char * log_level = getenv("LLGUIDANCE_LOG_LEVEL");
|
||||||
|
if (log_level && *log_level) {
|
||||||
|
cinit.log_stderr_level = atoi(log_level);
|
||||||
|
}
|
||||||
|
auto c = llg_new_constraint_any(&cinit, grammar_kind, grammar_data);
|
||||||
|
if (llg_get_error(c)) {
|
||||||
|
LOG_ERR("llg error: %s\n", llg_get_error(c));
|
||||||
|
llg_free_constraint(c);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char * llama_sampler_llg_name(const llama_sampler * /*smpl*/) {
|
||||||
|
return "llguidance";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void llama_sampler_llg_accept_impl(llama_sampler * smpl, llama_token token) {
|
||||||
|
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
||||||
|
if (ctx->grammar) {
|
||||||
|
LlgCommitResult res;
|
||||||
|
llg_commit_token(ctx->grammar, token, &res);
|
||||||
|
ctx->has_llg_res = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void llama_sampler_llg_apply(llama_sampler * smpl, llama_token_data_array * cur_p) {
|
||||||
|
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
||||||
|
if (ctx->grammar) {
|
||||||
|
if (!ctx->has_llg_res) {
|
||||||
|
if (llg_compute_mask(ctx->grammar, &ctx->llg_res) == 0) {
|
||||||
|
ctx->has_llg_res = true;
|
||||||
|
} else {
|
||||||
|
LOG_ERR("llg error: %s\n", llg_get_error(ctx->grammar));
|
||||||
|
llg_free_constraint(ctx->grammar);
|
||||||
|
ctx->grammar = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ctx->has_llg_res) {
|
||||||
|
if (ctx->llg_res.is_stop) {
|
||||||
|
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||||
|
if (!llama_vocab_is_eog(ctx->vocab, cur_p->data[i].id)) {
|
||||||
|
cur_p->data[i].logit = -INFINITY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const uint32_t * mask = ctx->llg_res.sample_mask;
|
||||||
|
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||||
|
auto token = cur_p->data[i].id;
|
||||||
|
if ((mask[token / 32] & (1 << (token % 32))) == 0) {
|
||||||
|
cur_p->data[i].logit = -INFINITY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void llama_sampler_llg_reset(llama_sampler * smpl) {
|
||||||
|
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
||||||
|
if (!ctx->grammar) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto * grammar_new = llama_sampler_llg_new(ctx->tokenizer, ctx->grammar_kind.c_str(), ctx->grammar_data.c_str());
|
||||||
|
llg_free_constraint(ctx->grammar);
|
||||||
|
ctx->grammar = grammar_new;
|
||||||
|
ctx->has_llg_res = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static llama_sampler * llama_sampler_llg_clone(const llama_sampler * smpl) {
|
||||||
|
const auto * ctx = (const llama_sampler_llg *) smpl->ctx;
|
||||||
|
|
||||||
|
auto * result = llama_sampler_init_llg(ctx->vocab, nullptr, nullptr);
|
||||||
|
|
||||||
|
// copy the state
|
||||||
|
{
|
||||||
|
auto * result_ctx = (llama_sampler_llg *) result->ctx;
|
||||||
|
|
||||||
|
if (ctx->grammar) {
|
||||||
|
result_ctx->grammar_kind = ctx->grammar_kind;
|
||||||
|
result_ctx->grammar_data = ctx->grammar_data;
|
||||||
|
result_ctx->grammar = llg_clone_constraint(ctx->grammar);
|
||||||
|
result_ctx->tokenizer = llg_clone_tokenizer(ctx->tokenizer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void llama_sampler_llg_free(llama_sampler * smpl) {
|
||||||
|
const auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
||||||
|
|
||||||
|
if (ctx->grammar) {
|
||||||
|
llg_free_constraint(ctx->grammar);
|
||||||
|
llg_free_tokenizer(ctx->tokenizer);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
static llama_sampler_i llama_sampler_llg_i = {
|
||||||
|
/* .name = */ llama_sampler_llg_name,
|
||||||
|
/* .accept = */ llama_sampler_llg_accept_impl,
|
||||||
|
/* .apply = */ llama_sampler_llg_apply,
|
||||||
|
/* .reset = */ llama_sampler_llg_reset,
|
||||||
|
/* .clone = */ llama_sampler_llg_clone,
|
||||||
|
/* .free = */ llama_sampler_llg_free,
|
||||||
|
};
|
||||||
|
|
||||||
|
static size_t llama_sampler_llg_tokenize_fn(const void * user_data, const uint8_t * bytes, size_t bytes_len,
|
||||||
|
uint32_t * output_tokens, size_t output_tokens_len) {
|
||||||
|
const llama_vocab * vocab = (const llama_vocab *) user_data;
|
||||||
|
int r = 0;
|
||||||
|
try {
|
||||||
|
r = llama_tokenize(vocab, (const char *) bytes, bytes_len, (int32_t *) output_tokens, output_tokens_len, false,
|
||||||
|
true);
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
GGML_ABORT("llama_tokenize failed: %s\n", e.what());
|
||||||
|
}
|
||||||
|
if (r < 0) {
|
||||||
|
return -r;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static LlgTokenizer * llama_sampler_llg_new_tokenizer(const llama_vocab * vocab) {
|
||||||
|
// TODO store the tokenizer in the vocab somehow
|
||||||
|
static const llama_vocab * vocab_cache;
|
||||||
|
static LlgTokenizer * tokenizer_cache;
|
||||||
|
|
||||||
|
if (vocab_cache == vocab) {
|
||||||
|
return llg_clone_tokenizer(tokenizer_cache);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto tok_eos = llama_vocab_eot(vocab);
|
||||||
|
if (tok_eos == LLAMA_TOKEN_NULL) {
|
||||||
|
tok_eos = llama_vocab_eos(vocab);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t vocab_size = llama_vocab_n_tokens(vocab);
|
||||||
|
|
||||||
|
auto token_lens = new uint32_t[vocab_size];
|
||||||
|
// we typically have ~7 bytes per token; let's go on the safe side here
|
||||||
|
auto token_bytes_size = vocab_size * 16 + 1024 * 1024;
|
||||||
|
auto token_bytes = new uint8_t[token_bytes_size];
|
||||||
|
|
||||||
|
size_t offset = 0;
|
||||||
|
for (size_t i = 0; i < vocab_size; i++) {
|
||||||
|
size_t max_token = 1024;
|
||||||
|
if (token_bytes_size - offset < max_token) {
|
||||||
|
GGML_ABORT("token_bytes buffer too small\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_token token = i;
|
||||||
|
auto dp = (char *) token_bytes + offset;
|
||||||
|
auto size = llama_detokenize(vocab, &token, 1, dp, max_token, false, false);
|
||||||
|
if (size < 0) {
|
||||||
|
GGML_ABORT("llama_detokenize failed\n");
|
||||||
|
}
|
||||||
|
if (size == 0) {
|
||||||
|
size = llama_detokenize(vocab, &token, 1, dp + 1, max_token - 1, false, true);
|
||||||
|
if (size < 0) {
|
||||||
|
GGML_ABORT("llama_detokenize failed\n");
|
||||||
|
}
|
||||||
|
if (size != 0) {
|
||||||
|
*dp = '\xff'; // special token prefix marker
|
||||||
|
size += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
token_lens[i] = size;
|
||||||
|
offset += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
LlgTokenizerInit tinit = {
|
||||||
|
/* .vocab_size = */ (uint32_t) vocab_size,
|
||||||
|
/* .tok_eos = */ (uint32_t) tok_eos,
|
||||||
|
/* .token_lens = */ token_lens,
|
||||||
|
/* .token_bytes = */ token_bytes,
|
||||||
|
/* .tokenizer_json = */ nullptr,
|
||||||
|
/* .tokenize_assumes_string = */ true,
|
||||||
|
/* .tokenize_fn = */ llama_sampler_llg_tokenize_fn,
|
||||||
|
/* .use_approximate_greedy_tokenize_fn = */ false,
|
||||||
|
/* .tokenize_user_data = */ vocab,
|
||||||
|
};
|
||||||
|
|
||||||
|
char error_buffer[1024];
|
||||||
|
LlgTokenizer * tokenizer = llg_new_tokenizer(&tinit, error_buffer, sizeof(error_buffer));
|
||||||
|
|
||||||
|
delete[] token_bytes;
|
||||||
|
delete[] token_lens;
|
||||||
|
|
||||||
|
if (tokenizer == nullptr) {
|
||||||
|
LOG_ERR("llg tokenizer error: %s\n", error_buffer);
|
||||||
|
return tokenizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tokenizer_cache) {
|
||||||
|
llg_free_tokenizer(tokenizer_cache);
|
||||||
|
}
|
||||||
|
vocab_cache = vocab;
|
||||||
|
tokenizer_cache = tokenizer;
|
||||||
|
|
||||||
|
return llg_clone_tokenizer(tokenizer_cache);
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * grammar_kind,
|
||||||
|
const char * grammar_data) {
|
||||||
|
auto * ctx = new llama_sampler_llg;
|
||||||
|
|
||||||
|
if (grammar_kind != nullptr && grammar_kind[0] != '\0') {
|
||||||
|
auto tokenizer = llama_sampler_llg_new_tokenizer(vocab);
|
||||||
|
*ctx = {
|
||||||
|
/* .vocab = */ vocab,
|
||||||
|
/* .grammar_kind = */ grammar_kind,
|
||||||
|
/* .grammar_data = */ grammar_data,
|
||||||
|
/* .tokenizer = */ tokenizer,
|
||||||
|
/* .grammar = */ llama_sampler_llg_new(tokenizer, grammar_kind, grammar_data),
|
||||||
|
/* .llg_res = */ {},
|
||||||
|
/* .has_llg_res = */ false,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
*ctx = {
|
||||||
|
/* .vocab = */ vocab,
|
||||||
|
/* .grammar_kind = */ {},
|
||||||
|
/* .grammar_data = */ {},
|
||||||
|
/* .tokenizer = */ nullptr,
|
||||||
|
/* .grammar = */ nullptr,
|
||||||
|
/* .llg_res = */ {},
|
||||||
|
/* .has_llg_res = */ false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return llama_sampler_init(
|
||||||
|
/* .iface = */ &llama_sampler_llg_i,
|
||||||
|
/* .ctx = */ ctx
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
llama_sampler * llama_sampler_init_llg(const llama_vocab *, const char *, const char *) {
|
||||||
|
LOG_WRN("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // LLAMA_USE_LLGUIDANCE
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue