Compare commits
2645 commits
gg/flash-a
...
master
Author | SHA1 | Date | |
---|---|---|---|
|
d7b31a9d84 | ||
|
9ac3457b39 | ||
|
c2a67efe38 | ||
|
b044a0fe3c | ||
|
19d3c8293b | ||
|
98f6b0fd1e | ||
|
55ac8c7791 | ||
|
e6e6583199 | ||
|
aaa5505307 | ||
|
bdcf8b6a56 | ||
|
4d3465c5ae | ||
|
d80be897ac | ||
|
3ab410f55f | ||
|
0cf867160c | ||
|
d2fe216fb2 | ||
|
ed926d8833 | ||
|
2d219b389e | ||
|
333820d749 | ||
|
c026ba3c23 | ||
|
7ee953a64a | ||
|
ec3bc8270b | ||
|
b7552cfcbc | ||
|
225bbbfa39 | ||
|
855cd0734a | ||
|
8a59053f63 | ||
|
1d20e53c40 | ||
|
2fb3c32a16 | ||
|
9ab42dc722 | ||
|
194b2e69f8 | ||
|
9dd7a0390f | ||
|
c0d4843225 | ||
|
8d4d2be143 | ||
|
2c6c8df56d | ||
|
8a7e3bf17a | ||
|
1b598b3058 | ||
|
902368a06b | ||
|
c3db0480bb | ||
|
d774ab3acc | ||
|
fa62da9b2d | ||
|
1ec208083c | ||
|
9f4cc8f8d3 | ||
|
fd08255d0d | ||
|
3ec9fd4b77 | ||
|
3962fc1a79 | ||
|
1bef571f6a | ||
|
db288b60cb | ||
|
106045e7bb | ||
|
f117d84b48 | ||
|
534c46b53c | ||
|
387a1598ca | ||
|
7c9e0ca520 | ||
|
8f8290ada9 | ||
|
b34aedd558 | ||
|
cde3833239 | ||
|
b3451785ac | ||
|
1d1e6a90bc | ||
|
5598f475be | ||
|
8ec05832fa | ||
|
21c84b5d2d | ||
|
d92cb67e37 | ||
|
6eecde3cc8 | ||
|
396856b400 | ||
|
4d0598e144 | ||
|
90f9b88afb | ||
|
864a0b67a6 | ||
|
84ec8a58f7 | ||
|
bfcce4d693 | ||
|
69804487e0 | ||
|
ff227703d6 | ||
|
0cec062a63 | ||
|
53debe6f3c | ||
|
cfd74c86db | ||
|
ecef206ccb | ||
|
5bbc7362cb | ||
|
aa6fb13213 | ||
|
a83f528688 | ||
|
b1bcd309fc | ||
|
5783575c9d | ||
|
4a2b196d03 | ||
|
1bd3047a93 | ||
|
a2df2787b3 | ||
|
553f1e46e9 | ||
|
8b576b6c55 | ||
|
27d135c970 | ||
|
6af1ca48cb | ||
|
c300e68ef4 | ||
|
3d804dec76 | ||
|
ffd0821c57 | ||
|
4314e56c4f | ||
|
496e5bf46b | ||
|
7919256c57 | ||
|
e0449763a4 | ||
|
eb7cf15a80 | ||
|
66ee4f297c | ||
|
e51c47b401 | ||
|
2711d0215f | ||
|
f0d4b29edf | ||
|
815857791d | ||
|
1a0e87d291 | ||
|
d2e518e9b4 | ||
|
b636228c0a | ||
|
325afb370a | ||
|
794fe23f29 | ||
|
cf8cc856d7 | ||
|
d0c08040b6 | ||
|
be5ef7963f | ||
|
cae9fb4361 | ||
|
7fee2889e6 | ||
|
d7d1eccacc | ||
|
4bf3119d61 | ||
|
f643120bad | ||
|
6e84b0ab8e | ||
|
2b8525d5c8 | ||
|
a4417ddda9 | ||
|
d6d24cd9ed | ||
|
a5203b4465 | ||
|
df984e0147 | ||
|
acd38efee3 | ||
|
caf773f249 | ||
|
178a7eb952 | ||
|
6f53d8a6b4 | ||
|
19f65187cb | ||
|
1d8ee06000 | ||
|
2cc9b8c32c | ||
|
f35726c2fb | ||
|
4a75d19376 | ||
|
26771a1491 | ||
|
ca6baf76c1 | ||
|
6e264a905b | ||
|
49b0e3cec4 | ||
|
20a758155b | ||
|
00c24acb2a | ||
|
466ea66f33 | ||
|
5f0db9522f | ||
|
c5d9effb49 | ||
|
9fbadaef4f | ||
|
9755129c27 | ||
|
a07c2c8a52 | ||
|
8137b4bb2b | ||
|
1af6945eb0 | ||
|
01f37edf1a | ||
|
c07e87f38b | ||
|
564804b79b | ||
|
05f63cc9ee | ||
|
f7fb43cd0b | ||
|
5845661640 | ||
|
f211d1dc10 | ||
|
955a6c2d91 | ||
|
1971adf55e | ||
|
5245729e33 | ||
|
6152129d05 | ||
|
16d3df7ab0 | ||
|
12c2bdf2de | ||
|
c64d2becb1 | ||
|
96f4053934 | ||
|
a94f3b2727 | ||
|
3e3357fd77 | ||
|
6171c9d258 | ||
|
e28245f35f | ||
|
6da5bec81c | ||
|
2e2f8f093c | ||
|
2139667ec4 | ||
|
80d0d6b4b7 | ||
|
aea8ddd516 | ||
|
9f7add1cde | ||
|
90d987b105 | ||
|
a4251edd6f | ||
|
ec7f3ac9ab | ||
|
ef6dada60c | ||
|
ae3c1db2f9 | ||
|
92bc493917 | ||
|
b9daaffe02 | ||
|
99487b57d4 | ||
|
a1649cc13f | ||
|
4dd34ff831 | ||
|
f30f099228 | ||
|
f26c874179 | ||
|
6390a998bf | ||
|
44e18ef939 | ||
|
3edfa7d375 | ||
|
667d72846c | ||
|
a133566d34 | ||
|
960ec65273 | ||
|
7a689c415e | ||
|
bd38ddea01 | ||
|
466300fe14 | ||
|
206bc53422 | ||
|
4dbc8b9cb7 | ||
|
9c8dcefe17 | ||
|
681149ced2 | ||
|
c67cc9837d | ||
|
adc5dd92e8 | ||
|
f11cfdfd7f | ||
|
1d8504338e | ||
|
432df2d5f9 | ||
|
0ccd7f3eb2 | ||
|
f446c2cf6a | ||
|
b4d92a59a2 | ||
|
bbf3e55e35 | ||
|
c5bf0d1bd7 | ||
|
091592d758 | ||
|
44d1e796d0 | ||
|
a4f3f5d8e6 | ||
|
48e1ae0e61 | ||
|
d00a80e89d | ||
|
504af20ee4 | ||
|
84a44815f7 | ||
|
39509fb082 | ||
|
a29f0870d4 | ||
|
437e05f714 | ||
|
ca001f6656 | ||
|
00b4c3da62 | ||
|
7426a26b24 | ||
|
8f70fc3d1b | ||
|
1244cdcf14 | ||
|
924518e2e5 | ||
|
9a483999a6 | ||
|
08f10f69c3 | ||
|
afa8a9ec9b | ||
|
c05e8c9934 | ||
|
2739a71e4b | ||
|
ba8a1f9c5b | ||
|
ff3fcabc72 | ||
|
c3f9d25706 | ||
|
ee7136c6d1 | ||
|
c6860cc734 | ||
|
1204f97270 | ||
|
8eceb888d7 | ||
|
f8feb4b01a | ||
|
be0e950c91 | ||
|
d9feae1c06 | ||
|
8d59d91171 | ||
|
8a1d9c25fa | ||
|
1bf839b1e8 | ||
|
f7cd13301c | ||
|
4d2b3d8804 | ||
|
c07d437bbd | ||
|
99a3755a3c | ||
|
c792dcf488 | ||
|
80ccf5d725 | ||
|
a3c1232c3f | ||
|
8cef75c743 | ||
|
0d52a69e4b | ||
|
02f0430141 | ||
|
bec2183f2c | ||
|
53ff6b9b9f | ||
|
017cc5f446 | ||
|
a3d50bc022 | ||
|
a4dd490069 | ||
|
c0d6f790d0 | ||
|
dc7cef9f37 | ||
|
ecebbd292d | ||
|
96be8c3264 | ||
|
e6e7c75d94 | ||
|
09186fabbe | ||
|
96a1dc27c3 | ||
|
6369f867a4 | ||
|
47182dd03f | ||
|
3e6e7a6bc2 | ||
|
ae2f606bb5 | ||
|
727368c60f | ||
|
5047dd3546 | ||
|
46e3556e01 | ||
|
b56f079e28 | ||
|
9394bbd484 | ||
|
f922a9c542 | ||
|
46be942214 | ||
|
78c6785175 | ||
|
5e3b08d606 | ||
|
db68c93b57 | ||
|
c31fc8b966 | ||
|
4b0c638b9a | ||
|
e7da954ecc | ||
|
f66f582927 | ||
|
2f0ee84b9b | ||
|
0da5d86026 | ||
|
a45433ba20 | ||
|
0827b2c1da | ||
|
45095a61bf | ||
|
5896c65232 | ||
|
bc7b1f8632 | ||
|
6e1531aca5 | ||
|
716bd6dec3 | ||
|
c250ecb315 | ||
|
a813badbbd | ||
|
fdd2188912 | ||
|
f865ea149d | ||
|
16cdce7b68 | ||
|
d79d8f39b4 | ||
|
d283d02bf2 | ||
|
9ba399dfa7 | ||
|
2cd43f4900 | ||
|
09fe2e7613 | ||
|
30caac3a68 | ||
|
60cfa728e2 | ||
|
3327bb0f8d | ||
|
32d6ee6385 | ||
|
14b699ecde | ||
|
485dc01214 | ||
|
86bf31cfe6 | ||
|
b92a14a841 | ||
|
6f0c9e034b | ||
|
dab76c92cc | ||
|
7024d59e6a | ||
|
7c0e285858 | ||
|
7ae33a616f | ||
|
ebdee9478c | ||
|
5cd85b5e00 | ||
|
a91a41364b | ||
|
e34c5af43f | ||
|
eb5c3dc64b | ||
|
0ca416c91a | ||
|
21ae3b9be8 | ||
|
0a11f8b7b5 | ||
|
d408bb9268 | ||
|
5cab3e4aaa | ||
|
36319dec5d | ||
|
57bb2c40cd | ||
|
a3c33b1dce | ||
|
2fffc52b50 | ||
|
7585edbdeb | ||
|
cd920d0ac3 | ||
|
7909e8588d | ||
|
9177484f58 | ||
|
0bf2d10c55 | ||
|
7bbb5acf12 | ||
|
152610eda9 | ||
|
0e70ba686e | ||
|
46828872c3 | ||
|
6b064c92b4 | ||
|
4da69d1abd | ||
|
d62b532c52 | ||
|
081b29bd2a | ||
|
5437d4aaf5 | ||
|
78f766768d | ||
|
8dd19a4812 | ||
|
130d0c90bd | ||
|
3919da8e33 | ||
|
0006f5a74a | ||
|
05c3a444b8 | ||
|
382bc7f2e8 | ||
|
4f51968aca | ||
|
227d7c5a7f | ||
|
7b1ec53f56 | ||
|
160bc039c8 | ||
|
08ea539df2 | ||
|
644fd71b44 | ||
|
4ddd199f6f | ||
|
a0974156f3 | ||
|
87cf323cef | ||
|
5478bbcd17 | ||
|
b5ae1ddff9 | ||
|
89d604f2c8 | ||
|
e52aba537a | ||
|
ba1cb19cdd | ||
|
56eea0781c | ||
|
a76c56fa1a | ||
|
c27ac678dd | ||
|
11e07fd63b | ||
|
4601a8bb67 | ||
|
9f35e44592 | ||
|
64ae065511 | ||
|
83ed24a97b | ||
|
d583cd03f6 | ||
|
adffa6ffd5 | ||
|
274ec65af6 | ||
|
8faa1d4dd4 | ||
|
cb13ef85a4 | ||
|
4064c0e3b6 | ||
|
dc5301d565 | ||
|
9fdb124304 | ||
|
5555c0c1f6 | ||
|
973f328b1e | ||
|
fb18934a97 | ||
|
235f6e14bf | ||
|
1a31d0dc00 | ||
|
92f77a640f | ||
|
484d2f31ae | ||
|
4b4d92b098 | ||
|
43041d2eb3 | ||
|
b685daf386 | ||
|
dafae66cc2 | ||
|
ae4b922614 | ||
|
750cb3e246 | ||
|
a86ad841f1 | ||
|
a05e2afcc2 | ||
|
26a8406ba9 | ||
|
c37fb4cf62 | ||
|
3d98b4cb22 | ||
|
1a05004743 | ||
|
ce8784bdb1 | ||
|
e52522b869 | ||
|
06d70147e6 | ||
|
43ed389a3f | ||
|
ecc93d0558 | ||
|
62e84d9848 | ||
|
3573fa8e7b | ||
|
d9c3ba2b77 | ||
|
ce4a7b8493 | ||
|
19d8762ab6 | ||
|
c2a16c0bdb | ||
|
3df784b305 | ||
|
86a1934978 | ||
|
784a14aa49 | ||
|
c5ede3849f | ||
|
f162d45a21 | ||
|
6c5bc0625f | ||
|
7736837d62 | ||
|
c9c6e01dae | ||
|
6fe6247831 | ||
|
0cd182ebcc | ||
|
a8cbab201d | ||
|
c2082d93a8 | ||
|
d405804be8 | ||
|
f112d198cd | ||
|
1da7b76569 | ||
|
59f4db1088 | ||
|
2803540814 | ||
|
253b7fde91 | ||
|
8d0cfd554a | ||
|
2759916d86 | ||
|
40c6d79fb5 | ||
|
98036d5670 | ||
|
cd2f37b304 | ||
|
da6aac91f1 | ||
|
01e6d9bb71 | ||
|
cc98896db8 | ||
|
91c36c269b | ||
|
1cd3df46bd | ||
|
c505471857 | ||
|
e9e661bd59 | ||
|
efb6ae9630 | ||
|
667d70d170 | ||
|
3b4f2e33e2 | ||
|
82bca2257b | ||
|
0115df2f65 | ||
|
515d4e5372 | ||
|
844e2e1fee | ||
|
70b98fadbc | ||
|
642330ac7c | ||
|
8648c52101 | ||
|
64ed2091b2 | ||
|
991f8aabee | ||
|
4cb003dd8d | ||
|
917786f43d | ||
|
5e1ed95583 | ||
|
5c7a5aa0c3 | ||
|
3420909dff | ||
|
86dc11c5bc | ||
|
6acce39710 | ||
|
43957ef203 | ||
|
0c39f44d70 | ||
|
3e0ba0e604 | ||
|
abadba05be | ||
|
0533e7fb38 | ||
|
7cc2d2c889 | ||
|
b782e5c7d4 | ||
|
3a8e9af402 | ||
|
a3a3048e7a | ||
|
f0678c5ff4 | ||
|
4b3242bbea | ||
|
0f77aae560 | ||
|
266b8519ee | ||
|
938f608742 | ||
|
f095a649ec | ||
|
678d7994f4 | ||
|
dc22344088 | ||
|
4c0a95b107 | ||
|
6c59567689 | ||
|
890719311b | ||
|
7281cf13ad | ||
|
e90688edd0 | ||
|
76b27d29c2 | ||
|
eea986f215 | ||
|
c202cef168 | ||
|
2025fa67e9 | ||
|
c6bc73951e | ||
|
605fa66c50 | ||
|
b7420131bf | ||
|
9f912511bc | ||
|
3ad5451f3b | ||
|
46c69e0e75 | ||
|
9e2301f4a4 | ||
|
fee824a1a1 | ||
|
9150f8fef9 | ||
|
c31ed2abfc | ||
|
5b3466bedf | ||
|
249a7902ec | ||
|
71a64989a5 | ||
|
4a57d362e1 | ||
|
c9b00a70b0 | ||
|
de5097351c | ||
|
5a349f2809 | ||
|
30ec398321 | ||
|
be0e350c8b | ||
|
249cd93da3 | ||
|
904109ed0d | ||
|
45abe0f74e | ||
|
0bbd2262a3 | ||
|
ab96610b1e | ||
|
7db3846a94 | ||
|
c6807b3f28 | ||
|
25669aa92c | ||
|
84e1c33cde | ||
|
811872a59d | ||
|
9a4b79bcfa | ||
|
7066b4cce2 | ||
|
0eb4e12bee | ||
|
0cc63754b8 | ||
|
50d5cecbda | ||
|
9fd8c2687f | ||
|
47f931c8f9 | ||
|
106964e3d2 | ||
|
80acb7b430 | ||
|
10bce0450f | ||
|
1f922254f0 | ||
|
a9a678a6b2 | ||
|
9ca2e67762 | ||
|
5931c1f233 | ||
|
f6d12e7df8 | ||
|
b756441104 | ||
|
5a8987793f | ||
|
d9d54e498d | ||
|
cce5a90075 | ||
|
dc39012cba | ||
|
9336db462c | ||
|
96fa2c5e2d | ||
|
55ed008b2d | ||
|
6dfcfef078 | ||
|
599b3e0cd4 | ||
|
c18610b4ee | ||
|
a5e47592b6 | ||
|
1bb30bf28c | ||
|
87a533be57 | ||
|
59b9172822 | ||
|
02e4eaf22f | ||
|
9abe9eeae9 | ||
|
f95caa7954 | ||
|
fab5d30ff6 | ||
|
8fd4b7fa29 | ||
|
1bacb9f625 | ||
|
ad21c9e1f1 | ||
|
3952a221af | ||
|
42ae10bbcd | ||
|
9fe0fb0626 | ||
|
611fabd792 | ||
|
12b0ad953a | ||
|
342397dc7e | ||
|
2a11b6b094 | ||
|
3ee6382d48 | ||
|
8e752a777b | ||
|
a88ad007de | ||
|
2a1507c162 | ||
|
b3e585988f | ||
|
557924f222 | ||
|
d3481e6316 | ||
|
531cb1c233 | ||
|
f139d2ea61 | ||
|
2eb76b2a5e | ||
|
9b75f03cd2 | ||
|
75207b3a88 | ||
|
76e9e58b78 | ||
|
ce2e59ba10 | ||
|
be5caccef9 | ||
|
20a780c7b6 | ||
|
cf32a9b93a | ||
|
a43178299c | ||
|
c3ea58aca4 | ||
|
467576b6cc | ||
|
eda7e1d4f5 | ||
|
24203e9dd7 | ||
|
5d9e59979c | ||
|
a4200cafad | ||
|
84274a10c3 | ||
|
68fcb4759c | ||
|
8a43e940ab | ||
|
5c9a8b22b1 | ||
|
0fff7fd798 | ||
|
4e54be0ec6 | ||
|
db4cfd5dbc | ||
|
8ee0d09ae6 | ||
|
bcdb7a2386 | ||
|
f245cc28d4 | ||
|
772703c8ff | ||
|
dd3a6ce9f8 | ||
|
1e58ee1318 | ||
|
89e4caaaf0 | ||
|
74d73dc85c | ||
|
4047be74da | ||
|
883d206fbd | ||
|
09ecbcb596 | ||
|
3225008973 | ||
|
cbf5541a82 | ||
|
18429220bd | ||
|
f0204a0ec7 | ||
|
57f8355b29 | ||
|
9901068ac7 | ||
|
231f9360d9 | ||
|
4802ad350b | ||
|
5a54af4d4f | ||
|
1607a5e5b0 | ||
|
ae8de6d50a | ||
|
4a8ccb37ad | ||
|
2a82891a85 | ||
|
af148c9386 | ||
|
66798e42fb | ||
|
fb4a0ec083 | ||
|
5ea926dad7 | ||
|
1ee9eea094 | ||
|
ff7fb670d0 | ||
|
0e712a5acb | ||
|
a0ec17b32e | ||
|
2e82ffa4af | ||
|
80dd7ff22f | ||
|
54ef9cfc72 | ||
|
b0cefea58a | ||
|
b141e5f6ef | ||
|
4b3a9212b6 | ||
|
505f33274d | ||
|
160687b3ed | ||
|
6423c65aa8 | ||
|
39a334a9aa | ||
|
bb38cdd8ba | ||
|
f018acba22 | ||
|
46323fa9ef | ||
|
5b359bb1e3 | ||
|
e89213492d | ||
|
8fc393f246 | ||
|
ec450d3bbf | ||
|
695ad752b2 | ||
|
841f27abdb | ||
|
d05b3127bd | ||
|
76c6e7f105 | ||
|
a71d81cf8c | ||
|
eec4d71737 | ||
|
3b08828674 | ||
|
a2c6fd747c | ||
|
97404c4a03 | ||
|
60e17ce23c | ||
|
5107e8cea3 | ||
|
2319126a70 | ||
|
3bcd40b3c5 | ||
|
5c333e0140 | ||
|
b11f9ba9b8 | ||
|
94d8cb8be1 | ||
|
1dc04b2dee | ||
|
a1eaf6a960 | ||
|
b8deef0ec0 | ||
|
a9e8a9a030 | ||
|
3407364776 | ||
|
d5a409e57f | ||
|
401558b7ba | ||
|
9e0ecfb697 | ||
|
6a066b9978 | ||
|
ea02c753eb | ||
|
05697f670b | ||
|
f8e58135cf | ||
|
329ed914c9 | ||
|
ce027adfb3 | ||
|
284e5b0275 | ||
|
e2292aaa17 | ||
|
9f40989351 | ||
|
08828a6d7d | ||
|
1839f69130 | ||
|
9830b6923b | ||
|
42cadc74bd | ||
|
45950415ed | ||
|
1926d6e39d | ||
|
b634f8a26f | ||
|
7554aa4655 | ||
|
a6744e43e8 | ||
|
e991e3127f | ||
|
418f5eef26 | ||
|
ba6f62eb79 | ||
|
d865d1478c | ||
|
1804adb0cf | ||
|
815fe72adc | ||
|
f221d56220 | ||
|
e597e50794 | ||
|
85679d37f3 | ||
|
1e9f94994e | ||
|
c02e5ab2a6 | ||
|
ab3d71f97f | ||
|
0a683e8088 | ||
|
dea5e86051 | ||
|
1329c0a75e | ||
|
61408e7fad | ||
|
b9e02e8184 | ||
|
6763f713bb | ||
|
79a2bc042d | ||
|
fc83a9e584 | ||
|
c5b0f4b5d9 | ||
|
8f275a7c45 | ||
|
8d8ff71536 | ||
|
61715d5cc8 | ||
|
07028f9d74 | ||
|
524afeec9d | ||
|
8125e6cbfc | ||
|
8841ce3f43 | ||
|
cc2983d375 | ||
|
8c60a8a462 | ||
|
9e4a2563ea | ||
|
668750357e | ||
|
ff252ea48e | ||
|
d80fb71f8b | ||
|
2f8bd2b901 | ||
|
bc5ba007b2 | ||
|
958367bf53 | ||
|
40f2555797 | ||
|
167a515651 | ||
|
c39665f589 | ||
|
0a1c750c80 | ||
|
190a37d797 | ||
|
2d3aba9ee8 | ||
|
80273a306d | ||
|
c19af0acb1 | ||
|
ac113a0fee | ||
|
4c9388fb96 | ||
|
873279b159 | ||
|
c8c07d658a | ||
|
19d900a756 | ||
|
11d47057a5 | ||
|
c421ac072d | ||
|
4ff7fe1fb3 | ||
|
6b8447352d | ||
|
674804a996 | ||
|
e94a138d64 | ||
|
e01c67affe | ||
|
994cfb1acb | ||
|
94008cc760 | ||
|
dbd5f2f573 | ||
|
f594bc80ba | ||
|
d5ebd79c76 | ||
|
55e47786e3 | ||
|
bc21975084 | ||
|
1db8c84fc6 | ||
|
45f097645e | ||
|
7cab2083c7 | ||
|
cda0e4b648 | ||
|
afd9909a64 | ||
|
87421a23e8 | ||
|
60ce97c9d8 | ||
|
8901755ba3 | ||
|
6f55bccbb8 | ||
|
17bb928080 | ||
|
9f45fc1e99 | ||
|
99bd4ac28c | ||
|
3752217ed5 | ||
|
f010b77a37 | ||
|
2194200278 | ||
|
73afe681aa | ||
|
9e04102448 | ||
|
dbf18e4de9 | ||
|
66c2c93082 | ||
|
10433e8b45 | ||
|
1f66b699c4 | ||
|
0e41b300ed | ||
|
cd60b88bf7 | ||
|
becfd387f6 | ||
|
755a9b2bf0 | ||
|
223c25a72f | ||
|
fbc98b748e | ||
|
dcdd535302 | ||
|
4c42f93b22 | ||
|
a89f75e1b7 | ||
|
13dca2a54a | ||
|
d4c19c0f5c | ||
|
c7181bd294 | ||
|
92be9f1216 | ||
|
edc265661c | ||
|
1bde94dd02 | ||
|
95c76e8e92 | ||
|
11ac9800af | ||
|
943d20b411 | ||
|
96776405a1 | ||
|
7eee341bee | ||
|
0e9f760eb1 | ||
|
cf8e0a3bb9 | ||
|
c7499c557c | ||
|
c81f3bbb05 | ||
|
e7022064ab | ||
|
3dc48fe75a | ||
|
dca1d4b58a | ||
|
458367a906 | ||
|
fa42aa6d89 | ||
|
6374743747 | ||
|
f1af42fa8c | ||
|
6279dac039 | ||
|
d5ac8cf2f2 | ||
|
96b6912103 | ||
|
d5cb86844f | ||
|
f4b2dcdf49 | ||
|
b6d6c5289f | ||
|
b0915d5b51 | ||
|
8c475b97b8 | ||
|
58b16695e1 | ||
|
905f5485b2 | ||
|
71967c2a6d | ||
|
17880771ad | ||
|
55951c018d | ||
|
ff565769f2 | ||
|
f3fdcfaa79 | ||
|
133c7b46b3 | ||
|
d5ed2b929d | ||
|
1bb8a64ebf | ||
|
fabdc3bda3 | ||
|
eee39bdc96 | ||
|
5d5ab1e5cc | ||
|
a7ad553513 | ||
|
d6fe7abf04 | ||
|
e3c355ba65 | ||
|
841713e1e4 | ||
|
5639971466 | ||
|
c83ad6d01e | ||
|
a39ab216aa | ||
|
f536f4c439 | ||
|
00b7317e63 | ||
|
76b37d1541 | ||
|
148844fe97 | ||
|
3f1ae2e32c | ||
|
f1b8c42711 | ||
|
e98c1c188e | ||
|
cb00020504 | ||
|
6c5322481a | ||
|
7254cdf7e8 | ||
|
cad341d889 | ||
|
a90484c6d9 | ||
|
1927378bcc | ||
|
6f1d9d71f4 | ||
|
511636df0c | ||
|
08a43d05b6 | ||
|
ace4f4be37 | ||
|
8277a817f1 | ||
|
c919d5db39 | ||
|
d0b1d663e4 | ||
|
aaa4099925 | ||
|
641002fba8 | ||
|
0de8b203f1 | ||
|
544f409b4b | ||
|
6084bfb261 | ||
|
faac0bae26 | ||
|
f99d3f8367 | ||
|
589b48d41e | ||
|
f4d2b8846a | ||
|
1b2f992cd2 | ||
|
739842703e | ||
|
6102037bbb | ||
|
9a913110cf | ||
|
43bcdd9703 | ||
|
6a0f779484 | ||
|
89f9944981 | ||
|
b5de3b74a5 | ||
|
44f59b4301 | ||
|
95bc82fbc0 | ||
|
7691654c68 | ||
|
ea9c32be71 | ||
|
1e43630218 | ||
|
afbbfaa537 | ||
|
3d6bf6919f | ||
|
904837e0cb | ||
|
70392f1f81 | ||
|
bb5f819975 | ||
|
c038931615 | ||
|
31ac5834fe | ||
|
cea1486ecf | ||
|
0aa15011e3 | ||
|
b0f27361f3 | ||
|
c087b6f11d | ||
|
116efee0ee | ||
|
0b3bf966f4 | ||
|
f0c7b5edf8 | ||
|
1d48e98e4f | ||
|
f3979df762 | ||
|
1e7b9299c6 | ||
|
37f8c7b4c9 | ||
|
bf9c1013ac | ||
|
e62e9789cd | ||
|
c35e586ea5 | ||
|
912c331d3d | ||
|
a5b57b08ce | ||
|
ecd5d6b65b | ||
|
2a63caaa69 | ||
|
d09770cae7 | ||
|
41f477879f | ||
|
e948a7da7a | ||
|
63351143b2 | ||
|
d13edb17ed | ||
|
27609c49b9 | ||
|
4301535326 | ||
|
424c5d00a9 | ||
|
a6809c6a2e | ||
|
5cb12f6839 | ||
|
d39e26741f | ||
|
722ec1eb51 | ||
|
6026da52d6 | ||
|
eca0fab44e | ||
|
64c6af3195 | ||
|
0d2f22e45c | ||
|
6443ddd985 | ||
|
8a308354f6 | ||
|
f799155ab8 | ||
|
faf67b3de4 | ||
|
7be099fa81 | ||
|
8b836ae731 | ||
|
8344ef58f8 | ||
|
0226613853 | ||
|
503147a9f9 | ||
|
0d2ec43833 | ||
|
37f3a3810e | ||
|
23e0d70bac | ||
|
acb2c32c33 | ||
|
a6a3a5c531 | ||
|
d54c21df7e | ||
|
19514d632e | ||
|
5c3d0f1824 | ||
|
0aadac10c7 | ||
|
95ca85168b | ||
|
441b72b91f | ||
|
c4965a64f7 | ||
|
90a2fff0e7 | ||
|
6262d13e0b | ||
|
e6deac31f7 | ||
|
6988da94a2 | ||
|
3c7989fd29 | ||
|
d6b37c881f | ||
|
7596487beb | ||
|
822b6322de | ||
|
dcdcee3a74 | ||
|
1f4111e540 | ||
|
befaf1197f | ||
|
feff4aa846 | ||
|
0abc6a2c25 | ||
|
bd35cb0ae3 | ||
|
78203641fe | ||
|
e6b7801bd1 | ||
|
e665744317 | ||
|
d4c3c10fad | ||
|
2a825116b6 | ||
|
4dc4f5f14a | ||
|
c837981bba | ||
|
3c26a1644d | ||
|
ff76e18516 | ||
|
39f852f440 | ||
|
2b00fa7997 | ||
|
d6a04f872d | ||
|
c9c8575a1a | ||
|
df4b7945ae | ||
|
449ccfb6f5 | ||
|
1b28061400 | ||
|
8db003a19d | ||
|
0996c5597f | ||
|
5bb2c5dbd2 | ||
|
67155ab7f5 | ||
|
5af118efda | ||
|
d2b496bff4 | ||
|
b34e023480 | ||
|
51b6038636 | ||
|
cb9c933eb2 | ||
|
6cd4e03444 | ||
|
8d300bd35f | ||
|
49006c67b4 | ||
|
00ba2ff781 | ||
|
83008b7cfe | ||
|
0b4ac75772 | ||
|
fb3f249815 | ||
|
bfe76d4a17 | ||
|
293bebe077 | ||
|
5fac4d5764 | ||
|
5fb5e24811 | ||
|
38ca6f644b | ||
|
8e6e2fbe14 | ||
|
5ed087573e | ||
|
54f376d0b9 | ||
|
b2e89a3274 | ||
|
daa9623ab0 | ||
|
e079bffb66 | ||
|
3f7ccfd649 | ||
|
a249843d89 | ||
|
19f4a7b296 | ||
|
2a358fb0c4 | ||
|
eae597182c | ||
|
00b02bb249 | ||
|
a876861455 | ||
|
385decbd63 | ||
|
60a3107ccd | ||
|
406c1a32a1 | ||
|
9cb9260861 | ||
|
202084d31d | ||
|
dbbebcab33 | ||
|
ba1cf846ed | ||
|
d2d3200b38 | ||
|
51d964a4ef | ||
|
efe6a83e30 | ||
|
fbb7fcffbc | ||
|
a5b5d9a101 | ||
|
f12295b8a9 | ||
|
faf69d4237 | ||
|
e536426ded | ||
|
1b9ae5189c | ||
|
e32d0816ed | ||
|
df270ef745 | ||
|
947538acb8 | ||
|
6c89eb0b47 | ||
|
9b2c24c099 | ||
|
134bc38ecf | ||
|
815b1fb20a | ||
|
409dc4f8bb | ||
|
4a1411b4f1 | ||
|
8ebe8ddebd | ||
|
9bc6db28d0 | ||
|
32b2ec88bc | ||
|
1031771faa | ||
|
4db04784f9 | ||
|
bdf314f38a | ||
|
581c305186 | ||
|
5910ea9427 | ||
|
c8671ae282 | ||
|
82e3b03c11 | ||
|
9379d3cc17 | ||
|
7605ae7daf | ||
|
8962422b1c | ||
|
b69a480af4 | ||
|
48baa61ecc | ||
|
f1485161e5 | ||
|
048de848ee | ||
|
f771d064a9 | ||
|
6e7d133a5f | ||
|
b60074f1c2 | ||
|
9c1ba55733 | ||
|
c6d4cb4655 | ||
|
8f1d81a0b6 | ||
|
a47667cff4 | ||
|
ea5d7478b1 | ||
|
49271efbaf | ||
|
0ab30f8d82 | ||
|
cddae4884c | ||
|
7ea8d80d53 | ||
|
42c76d1358 | ||
|
9f7d4bcf5c | ||
|
1d1ccce676 | ||
|
9fe94ccac9 | ||
|
66b039a501 | ||
|
20f1789dfb | ||
|
231cff5f6f | ||
|
3246fe84d7 | ||
|
78eb487bb0 | ||
|
a77feb5d71 | ||
|
2e59d61c1b | ||
|
75e1dbbaab | ||
|
ad76569f8e | ||
|
7d787ed96c | ||
|
06658ad7c3 | ||
|
fc18425b6a | ||
|
879275ac98 | ||
|
7a3df798fc | ||
|
e5edb210cd | ||
|
0c41e03ceb | ||
|
f12ceaca0c | ||
|
436787f170 | ||
|
93bc3839f9 | ||
|
f91fc5639b | ||
|
e11bd856d5 | ||
|
8f824ffe8e | ||
|
3ba780e2a8 | ||
|
a07c32ea54 | ||
|
11b84eb457 | ||
|
1731d4238f | ||
|
a1631e53f6 | ||
|
fc54ef0d1c | ||
|
b40eb84895 | ||
|
f63f603c87 | ||
|
8455340b87 | ||
|
2f3c1466ff | ||
|
50addec9a5 | ||
|
4f8d19ff17 | ||
|
90db8146d5 | ||
|
cfac111e2b | ||
|
1b6ff90ff8 | ||
|
18eaf29f4c | ||
|
554b049068 | ||
|
2339a0be1c | ||
|
2fb9267887 | ||
|
8b3befc0e2 | ||
|
d565bb2fd5 | ||
|
ee2984bdaf | ||
|
c8ddce8560 | ||
|
23fd453544 | ||
|
c679e0cb5c | ||
|
fb487bb567 | ||
|
2a24c8caa6 | ||
|
e3f6fd56b1 | ||
|
4b9afbbe90 | ||
|
37501d9c79 | ||
|
4af8420afb | ||
|
6bda7ce6c3 | ||
|
d5492f0525 | ||
|
234b30676a | ||
|
5fd89a70ea | ||
|
98a532d474 | ||
|
43bdd3ce18 | ||
|
06943a69f6 | ||
|
828d6ff7d7 | ||
|
fc4ca27b25 | ||
|
1f67436c5e | ||
|
0fd93cdef5 | ||
|
84eb2f4fad | ||
|
1262e7ed13 | ||
|
df5478fbea | ||
|
2589292cde | ||
|
d3ae0ee8d7 | ||
|
5ef07e25ac | ||
|
4134999e01 | ||
|
8cd1bcfd3f | ||
|
a21c6fd450 | ||
|
33309f661a | ||
|
7c5bfd57f8 | ||
|
6e02327e8b | ||
|
7eb23840ed | ||
|
7c3f55c100 | ||
|
911b437f22 | ||
|
b72942fac9 | ||
|
6afd1a99dc | ||
|
272e3bd95e | ||
|
45a55b91aa | ||
|
3071c0a5f2 | ||
|
4305b57c80 | ||
|
70c0ea3560 | ||
|
5b2c04f492 | ||
|
6f6496bb09 | ||
|
daef3ab233 | ||
|
345a686d82 | ||
|
3a14e00366 | ||
|
afd27f01fe | ||
|
366d486c16 | ||
|
e44a561ab0 | ||
|
f93d49ab1e | ||
|
5b33ea1ee7 | ||
|
85fca8deb6 | ||
|
ebd541a570 | ||
|
15fa07a5c5 | ||
|
be55695eff | ||
|
0478174d59 | ||
|
a8dbc6f753 | ||
|
506122d854 | ||
|
725e3d9437 | ||
|
31958546c3 | ||
|
1e6f6554aa | ||
|
641f5dd2a6 | ||
|
5f4dcb1e60 | ||
|
db20f50cf4 | ||
|
efda90c93a | ||
|
0bf16de07b | ||
|
2d5dd7bb3f | ||
|
cdd1889de6 | ||
|
c21a896405 | ||
|
d4ff847153 | ||
|
0a4ce78681 | ||
|
bc0f887e15 | ||
|
b42978e7e4 | ||
|
b9dfc25ca3 | ||
|
1ef14b3007 | ||
|
d3f0c7166a | ||
|
e31a4f6797 | ||
|
400ae6f65f | ||
|
f1ea5146d7 | ||
|
064cdc265f | ||
|
5587e57a76 | ||
|
a3738b2fa7 | ||
|
655858ace0 | ||
|
c02b0a8a4d | ||
|
0d6fb52be0 | ||
|
978ba3d83d | ||
|
ecf6b7f23e | ||
|
01aae2b497 | ||
|
4b77ea95f5 | ||
|
76614f352e | ||
|
b72c20b85c | ||
|
e09a800f9a | ||
|
0fbbd88458 | ||
|
afbb4c1322 | ||
|
b7a08fd5e0 | ||
|
7a11eb3a26 | ||
|
c8a0090922 | ||
|
afbbcf3c04 | ||
|
ed9d2854c9 | ||
|
398ede5efe | ||
|
44d28ddd5c | ||
|
268c566006 | ||
|
7e72aa74fd | ||
|
7c27a19b2e | ||
|
140074bb86 | ||
|
6e2b6000e5 | ||
|
c887d8b017 | ||
|
75af08c475 | ||
|
439b3fc75a | ||
|
0832de7236 | ||
|
6eeaeba126 | ||
|
4730faca61 | ||
|
4c676c85e5 | ||
|
e54c35e4fb | ||
|
5e2727fe03 | ||
|
56f20aa25d | ||
|
345c8c0c87 | ||
|
ae7985cd7b | ||
|
a05ca93697 | ||
|
9f77d899b7 | ||
|
203b7f1531 | ||
|
d2b851bfa1 | ||
|
c12b6e8ee7 | ||
|
b5e95468b1 | ||
|
92090eca21 | ||
|
9d03d085dd | ||
|
bfb4c74981 | ||
|
2b1f616b20 | ||
|
01245f5b16 | ||
|
01aec4a631 | ||
|
41cd47caab | ||
|
49ce0ab6d4 | ||
|
4226a8d10e | ||
|
bf5a81df37 | ||
|
88954f7fbd | ||
|
ed67bcb24f | ||
|
eddcb5238b | ||
|
be6d7c0791 | ||
|
4b0eff3df5 | ||
|
8a4bad50a8 | ||
|
68504f0970 | ||
|
f19bf99c01 | ||
|
3a7ac5300a | ||
|
96952e7181 | ||
|
79167d9e49 | ||
|
b115105f05 | ||
|
de280085e7 | ||
|
b841d07408 | ||
|
64cf50a0ed | ||
|
938943cdbf | ||
|
751fcfc6c3 | ||
|
46e47417aa | ||
|
e7e6487ba0 | ||
|
063d99ad11 | ||
|
081fe431aa | ||
|
d94c6e0ccb | ||
|
566daa5a5b | ||
|
6f11a83e4e | ||
|
e093dd2382 | ||
|
50e05353e8 | ||
|
628154492a | ||
|
04bab6b7da | ||
|
b7c11d36e6 | ||
|
45f2c19cc5 | ||
|
22f281aa16 | ||
|
328884f421 | ||
|
c69c63039c | ||
|
69c487f4ed | ||
|
07283b1a90 | ||
|
940362224d | ||
|
69b9945b44 | ||
|
c3776cacab | ||
|
87e397d00b | ||
|
57b1d4f9eb | ||
|
d197545530 | ||
|
be0cfb4175 | ||
|
b57eb9ca4f | ||
|
f299aa98ec | ||
|
3d0e4367d9 | ||
|
a15ef8f8a0 | ||
|
705b7ecf60 | ||
|
0d2c7321e9 | ||
|
672a6f1018 | ||
|
3807c3de04 | ||
|
e02b597be3 | ||
|
b3283448ce | ||
|
30f80ca0bc | ||
|
1bdd8ae19f | ||
|
da3913d8f9 | ||
|
d65a8361fe | ||
|
5e116e8dd5 | ||
|
1666f92dcd | ||
|
37b12f92ab | ||
|
0efec57787 | ||
|
7acfd4e8d5 | ||
|
97bdd26eee | ||
|
4db8f60fe7 | ||
|
8fac431b06 | ||
|
f17f39ff9c | ||
|
9104bc20ed | ||
|
fc690b018e | ||
|
16bdfa42ac | ||
|
3dfda05956 | ||
|
bda62d7999 | ||
|
090fca7a07 | ||
|
aaab2419ea | ||
|
73cf442e7b | ||
|
e236528e76 | ||
|
fa79495bb4 | ||
|
17eb6aa8a9 | ||
|
c917b67f06 | ||
|
4e24cffd8c | ||
|
6af51c0d96 | ||
|
f53226245f | ||
|
c3ebcfa148 | ||
|
8a4441ea1a | ||
|
5aefbce27a | ||
|
71c1121d11 | ||
|
370b1f7e7a | ||
|
b549a1bbef | ||
|
368645698a | ||
|
b078c619aa | ||
|
808aba3916 | ||
|
a977c11544 | ||
|
9a55ffe6fb | ||
|
7a221b672e | ||
|
278d0e1846 | ||
|
dd07a123b7 | ||
|
f4444d992c | ||
|
6b2a849d1f | ||
|
0f1a39f343 | ||
|
83321c6958 | ||
|
cc61948b1f | ||
|
7a80710d93 | ||
|
a8be1e6f59 | ||
|
e4dd31ff89 | ||
|
8f0fad42b9 | ||
|
a59f8fdc85 | ||
|
fd560fe680 | ||
|
e500d6135a | ||
|
a03e8dd99d | ||
|
5b0b8d8cfb | ||
|
9925ca4087 | ||
|
9beb2dda03 | ||
|
7d0e23d72e | ||
|
7fdb6f73e3 | ||
|
a130eccef4 | ||
|
c4dd11d1d3 | ||
|
2ec846d558 | ||
|
3f2d538b81 | ||
|
2ee44c9a18 | ||
|
6847d54c4f | ||
|
fde13b3bb9 | ||
|
470939d483 | ||
|
6f0dbf6ab0 | ||
|
ffd00797d8 | ||
|
04ce3a8b19 | ||
|
3fd62a6b1c | ||
|
a8db2a9ce6 | ||
|
4090ea5501 | ||
|
f1948f1e10 | ||
|
f7cab35ef9 | ||
|
905942abdb | ||
|
b5040086d4 | ||
|
d39130a398 | ||
|
b81ba1f96b | ||
|
210eb9ed0a | ||
|
cb4d86c4d7 | ||
|
86e7299ef5 | ||
|
60d83a0149 | ||
|
87e25a1d1b | ||
|
213701b51a | ||
|
be20e7f49d | ||
|
7ed03b8974 | ||
|
1d894a790e | ||
|
1f3e1b66e2 | ||
|
148ec970b6 | ||
|
2cccbaa008 | ||
|
8e558309dc | ||
|
0a423800ff | ||
|
d12f781074 | ||
|
bcefa03bc0 | ||
|
5a7447c569 | ||
|
61ecafa390 | ||
|
aa5898dc53 | ||
|
6c05752c50 | ||
|
a9554e20b6 | ||
|
e235b267a2 | ||
|
f09b7cb609 | ||
|
a38b884c6c | ||
|
d7fd29fff1 | ||
|
6f63d646c1 | ||
|
51d2ebadbb | ||
|
1e920018d3 | ||
|
01a5f06550 | ||
|
07786a61a2 | ||
|
de14e2ea2b | ||
|
821922916f | ||
|
b1c3f26e5e | ||
|
b0a46993df | ||
|
807b0c49ff | ||
|
f8c4c0738d | ||
|
402d6feffa | ||
|
20fc3804bf | ||
|
f619024764 | ||
|
d23287f122 | ||
|
5f2d4e60e2 | ||
|
916248af1f | ||
|
f8d6a23804 | ||
|
fadde67135 | ||
|
a27152b602 | ||
|
3e2618bc7b | ||
|
07a3fc0608 | ||
|
968967376d | ||
|
023b8807e1 | ||
|
0e0590adab | ||
|
a9f3b10215 | ||
|
d08c20edde | ||
|
5fac350b9c | ||
|
cb5fad4c6c | ||
|
dae57a1ebc | ||
|
49122a873f | ||
|
0ddeff1023 | ||
|
3840b6f593 | ||
|
257f8e41e2 | ||
|
694c59cb42 | ||
|
197fe6c1d7 | ||
|
d0a7145ba9 | ||
|
9ef0780062 | ||
|
1c5eba6f8e | ||
|
72272b83a3 | ||
|
8748d8ac6f | ||
|
26a39bbd6b | ||
|
38373cfbab | ||
|
b851b3fba0 | ||
|
139cc621e9 | ||
|
e57dc62057 | ||
|
a27aa50ab7 | ||
|
cb0b06a8a6 | ||
|
558f44bf83 | ||
|
8172ee9da9 | ||
|
16791b8f0b | ||
|
ab3679112d | ||
|
97877eb10b | ||
|
387952651a | ||
|
6030c61281 | ||
|
85a267daaa | ||
|
f675b20a3b | ||
|
911e35bb8b | ||
|
ac146628e4 | ||
|
9b31a40c6d | ||
|
c70d117c37 | ||
|
ae5d0f4b89 | ||
|
31ec3993f6 | ||
|
c7ab7b612c | ||
|
f2d48fffde | ||
|
4713bf3093 | ||
|
0e814dfc42 | ||
|
a95631ee97 | ||
|
f3f65429c4 | ||
|
8854044561 | ||
|
c8771ab5f8 | ||
|
494165f3b6 | ||
|
9b2f16f805 | ||
|
6777c544bd | ||
|
163d50adaf | ||
|
6fcbf68235 | ||
|
e6bf007744 | ||
|
84631fe150 | ||
|
dd047b476c | ||
|
925c30956d | ||
|
c8ad35955a | ||
|
49c03c79cd | ||
|
48e6b92cc3 | ||
|
3791ad2193 | ||
|
f702a90e24 | ||
|
083bacce14 | ||
|
2df373ac40 | ||
|
3b099bcd9c | ||
|
a818f3028d | ||
|
d62e4aaa02 | ||
|
9a590c8226 | ||
|
52fc8705a0 | ||
|
8cb508d0d5 | ||
|
646ef4a9cf | ||
|
de0d6a68ac | ||
|
95f57bb5d5 | ||
|
e112b610a1 | ||
|
6a2f298bd7 | ||
|
11318d9aa1 | ||
|
b6b9a8e606 | ||
|
45c0e2e4c1 | ||
|
b5a5f34efa | ||
|
3e58b0ee35 | ||
|
adf480c3ab | ||
|
3aa184a8c7 | ||
|
5b48cd53a8 | ||
|
c5a8d4b749 | ||
|
557b653dc9 | ||
|
7d5e8777ae | ||
|
a927b0f3dd | ||
|
80ea089d77 | ||
|
0e64591e82 | ||
|
b1ef562bc1 | ||
|
17b291a6a5 | ||
|
abd894ad96 | ||
|
de391e4c80 | ||
|
d50f8897a7 | ||
|
2075a66a96 | ||
|
ba58993152 | ||
|
a7854743c5 | ||
|
9c77ec1d74 | ||
|
a04a953cab | ||
|
623494a478 | ||
|
37bef89433 | ||
|
91c188d6c2 | ||
|
84f6de17f6 | ||
|
61665277af | ||
|
b96f9afb0d | ||
|
1193778105 | ||
|
5326bcceeb | ||
|
e6ecc2be47 | ||
|
a94e6ff877 | ||
|
5b6da18750 | ||
|
7c26775adb | ||
|
b473e95084 | ||
|
99052cd227 | ||
|
c637fcd34d | ||
|
6a2f0b3474 | ||
|
21be9cab94 | ||
|
006167aaf6 | ||
|
df68d4fa5d | ||
|
43b35e38ba | ||
|
19b7a836f6 | ||
|
b5fcf8ef5c | ||
|
398105ff43 | ||
|
bc6c457fa3 | ||
|
52399254b3 | ||
|
6fe1c62741 | ||
|
cddaf028ad | ||
|
c8a82194a8 | ||
|
7c7836d9d4 | ||
|
0c7b3595b9 | ||
|
7b2f4a7d19 | ||
|
f8ec8877b7 | ||
|
76d66ee0be | ||
|
66ef1ceedf | ||
|
e65bbf606c | ||
|
6fcd1331ef | ||
|
41b9260f18 | ||
|
172c825684 | ||
|
a55eb1bf0f | ||
|
f578b86b21 | ||
|
1c641e6aac | ||
|
963552903f | ||
|
a9cae48003 | ||
|
bfaa676b08 | ||
|
704a35b183 | ||
|
dcf752707d | ||
|
f2b5764beb | ||
|
73bac2b11d | ||
|
ef52d1d16a | ||
|
14f83526cd | ||
|
6fe42d073f | ||
|
148995e5e5 | ||
|
4bfe50f741 | ||
|
bdcb8f4222 | ||
|
c2ce6c47e4 | ||
|
b61eb9644d | ||
|
396b18dfec | ||
|
864a99e7a0 | ||
|
fd5ea0f897 | ||
|
c28a83902c | ||
|
d9da0e4986 | ||
|
1f0dabda8d | ||
|
af4ae502dd | ||
|
10ceba354a | ||
|
e95beeb1fc | ||
|
57bf62ce7c | ||
|
3e2ee44315 | ||
|
42b53d192f | ||
|
2decf57bc6 | ||
|
5795b94182 | ||
|
ed9f252118 | ||
|
fe1e3917cf | ||
|
d4d915d351 | ||
|
7a16ce7db2 | ||
|
da799b4189 | ||
|
c00fad71e5 | ||
|
27615f5ab2 | ||
|
7027b27d76 | ||
|
a5cabd7649 | ||
|
d5c938cd77 | ||
|
c9ee7118d5 | ||
|
ee459f40f6 | ||
|
f83351f9a6 | ||
|
ad675e1c67 | ||
|
a143c04375 | ||
|
55b2d0849d | ||
|
f5d7b268ec | ||
|
2d08b7fbb4 | ||
|
d67caea0d6 | ||
|
7672adeec7 | ||
|
7d1a378b8f | ||
|
2b3389677a | ||
|
9973e81c5c | ||
|
c90dbe026b | ||
|
b90dc566c1 | ||
|
1442677f92 | ||
|
554c247caf | ||
|
0cd6bd3483 | ||
|
5ca0944a15 | ||
|
adc9ff3841 | ||
|
987d743d6b | ||
|
b226c1227b | ||
|
3b38d48609 | ||
|
6d1616944d | ||
|
bde7cd3cd9 | ||
|
a5735e4426 | ||
|
0b832d53ba | ||
|
3d7ebf6312 | ||
|
a10cda58d3 | ||
|
6f28a333c1 | ||
|
549279d804 | ||
|
9e405b6e2e | ||
|
3413ae2193 | ||
|
1669810d7c | ||
|
7c4e5b7eae | ||
|
9422c5e34b | ||
|
e141ce624a | ||
|
2e666832e6 | ||
|
2ac95c9d56 | ||
|
750f60c03e | ||
|
9b596417af | ||
|
a323ec60af | ||
|
0515ad93f4 | ||
|
c8047d538f | ||
|
30e238b246 | ||
|
16926dff92 | ||
|
0c27e6f62e | ||
|
2e32f874e6 | ||
|
1af511fc22 | ||
|
0541f06296 | ||
|
9022c33646 | ||
|
5921b8f089 | ||
|
5dcdf94676 | ||
|
2e2340de17 | ||
|
7846540bd2 | ||
|
e6157f94c8 | ||
|
9c4c9cc83f | ||
|
59b0d07766 | ||
|
d5c05821f3 | ||
|
972b555ab9 | ||
|
3854c9d07f | ||
|
eb57fee51f | ||
|
55d62262a9 | ||
|
975ec63ff2 | ||
|
fb76ec31a9 | ||
|
cce3dcffc5 | ||
|
210d99173d | ||
|
87bdf2a199 | ||
|
00281b7be3 | ||
|
2ab977282b | ||
|
72de268bec | ||
|
0e8d8bfd6c | ||
|
504f0c340f | ||
|
b864b50ce5 | ||
|
02c1ecad07 | ||
|
6bd12ce409 | ||
|
5442939fcc | ||
|
56411a950f | ||
|
2b737caae1 | ||
|
ee3dff6b8e | ||
|
edc29433fa | ||
|
8b99e2aa66 | ||
|
271ff3fc44 | ||
|
e2b065071c | ||
|
0548a4187f | ||
|
9335b969e8 | ||
|
c41767154e | ||
|
74b239b3d5 | ||
|
852aafb163 | ||
|
0136966daf | ||
|
10b1e45876 | ||
|
197c00681b | ||
|
95f84d5ce8 | ||
|
5487593bc7 | ||
|
1d8fca72ae | ||
|
62bfef5194 | ||
|
eaf6e03174 | ||
|
d6ef0e77dd | ||
|
dff451cfa1 | ||
|
d298382ad9 | ||
|
32a28217f4 | ||
|
c429b33beb | ||
|
9146d36fe7 | ||
|
b9adcbbf92 | ||
|
9588f196b1 | ||
|
3cbd23ed88 | ||
|
00c6390793 | ||
|
faa0e6979a | ||
|
9791f40258 | ||
|
902184dd3a | ||
|
57684331fc | ||
|
b83bab15a5 | ||
|
d041d2ceaa | ||
|
27891f6db0 | ||
|
fbca2f27fc | ||
|
0df0aa8e43 | ||
|
74f33adf5f | ||
|
1debe72737 | ||
|
007489e895 | ||
|
8b94e799df | ||
|
3015851c5a | ||
|
55ac3b7aea | ||
|
dacfcebd60 | ||
|
9b82476ee9 | ||
|
a61a94e543 | ||
|
152da28ae5 | ||
|
d48c88cbd5 | ||
|
e84b71c2c6 | ||
|
1b1e27cb49 | ||
|
fbf777d2b9 | ||
|
cd93a28cb1 | ||
|
1e374365d1 | ||
|
197ff91462 | ||
|
6ff13987ad | ||
|
38c03478a3 | ||
|
b18532a4ef | ||
|
fcda1128bc | ||
|
03d8900ebe | ||
|
9b3d833189 | ||
|
95fb0aefab | ||
|
3e5faa8503 | ||
|
201cc11afa | ||
|
6369bf0433 | ||
|
e402de364b | ||
|
fcf6538ba6 | ||
|
c3f8d58356 | ||
|
11474e756d | ||
|
d8ee902227 | ||
|
d7e852c1bc | ||
|
917dc8cfa6 | ||
|
fabf30b4c4 | ||
|
20385cebcc | ||
|
db10f01310 | ||
|
3bc10cb485 | ||
|
6bf9b66fa3 | ||
|
26cd4237bc | ||
|
213e90ed73 | ||
|
65c58207ec | ||
|
1cc0155d04 | ||
|
e932094d58 | ||
|
2789baf480 | ||
|
33c8d50acc | ||
|
d359f30921 | ||
|
1ea2a0036e | ||
|
f030ec1f7a | ||
|
e4e6f67be6 | ||
|
5ca49cbecd | ||
|
1b01f06db0 | ||
|
41858392e1 | ||
|
6aade19ee7 | ||
|
ab33f7a338 | ||
|
e23b974f4c | ||
|
854d365aba | ||
|
f5bf761747 | ||
|
059031b8c4 | ||
|
511182eabb | ||
|
133d99c599 | ||
|
cb42c29427 | ||
|
d233b507cd | ||
|
0f98acfac6 | ||
|
ca57e0f35e | ||
|
c1b295eea5 | ||
|
de73196344 | ||
|
b49a13dd2f | ||
|
05834841dc | ||
|
ef277de2ad | ||
|
b43272afa2 | ||
|
0fc1e820a9 | ||
|
82ca83db3c | ||
|
f4bd8b3d26 | ||
|
51e9d02599 | ||
|
d273c1402b | ||
|
27b040691c | ||
|
29c60d8cdd | ||
|
359cbe3f46 | ||
|
e18bc6aaf3 | ||
|
ee94172d33 | ||
|
934266c0e0 | ||
|
9c4fdcbec8 | ||
|
24ecb58168 | ||
|
9afdffe70e | ||
|
3b3963c55c | ||
|
dda64fc17c | ||
|
0350f58152 | ||
|
ad52d5c259 | ||
|
172b78210a | ||
|
13ad16af12 | ||
|
8f7080bf48 | ||
|
e1b40ac3b9 | ||
|
dc020985b8 | ||
|
344f9126cc | ||
|
9a17ab914b | ||
|
ea3b0590ee | ||
|
29499bb593 | ||
|
48aa8fd1f2 | ||
|
583fd6b000 | ||
|
9f773486ab | ||
|
e8a7fd4fb0 | ||
|
a5e3fde857 | ||
|
f308ea7059 | ||
|
c3c88f296a | ||
|
182adefcf3 | ||
|
0d26d8ccd8 | ||
|
4f0263633b | ||
|
1265c670fd | ||
|
5e31828d3e | ||
|
541600201e | ||
|
efc8f767c8 | ||
|
e0f556186b | ||
|
27f65d6267 | ||
|
ee52225067 | ||
|
614d3b914e | ||
|
30e70334f7 | ||
|
1c570d8bee | ||
|
948f4ec7c5 | ||
|
9aa672490c | ||
|
b1f8af1886 | ||
|
e586ee4259 | ||
|
cbf75894d2 | ||
|
0d5cef78ae | ||
|
dc685be466 | ||
|
6f1b63606f | ||
|
b228aba91a | ||
|
7bd4ffb780 | ||
|
1622ac023f | ||
|
6aeff24f8b | ||
|
325756d28d | ||
|
fed0108491 | ||
|
72c177c1f6 | ||
|
5a419926b0 | ||
|
fae9d234b6 | ||
|
f5ef34e428 | ||
|
ef0d5e3ec9 | ||
|
3292733f95 | ||
|
988631335a | ||
|
f99e1e456e | ||
|
5ae3426b0b | ||
|
b83cc3f5b3 | ||
|
9cb317f77e | ||
|
e849648888 | ||
|
18e437665c | ||
|
8c660242d7 | ||
|
25c6e82e7a | ||
|
4e3880978f | ||
|
f89fe2732c | ||
|
d11afd6652 | ||
|
8c570c9496 | ||
|
eaf4bd8b39 | ||
|
befddd0f15 | ||
|
d46dbc76f8 | ||
|
0961d86604 | ||
|
43248e5594 | ||
|
a743d76a01 | ||
|
f31ec120bc | ||
|
fd9f92b154 | ||
|
22842164bc | ||
|
4734524882 | ||
|
07cd41d096 | ||
|
4426e2987b | ||
|
f98eb31c51 | ||
|
bc4bba364f | ||
|
c12452c7ae | ||
|
9da243b36a | ||
|
bd1871fa2b | ||
|
26458af1d6 | ||
|
83330d8cd6 | ||
|
465263d0cf | ||
|
911b3900dd | ||
|
ad211edef5 | ||
|
229ffff872 | ||
|
1fd9c1741d | ||
|
4cd621c26d | ||
|
7e0b6a7b3b | ||
|
acdce3cdef | ||
|
3855416027 | ||
|
c0e6fbf8c3 | ||
|
c780e75305 | ||
|
48b2f9c1fc | ||
|
af0a5b6163 | ||
|
b6aa670203 | ||
|
260b7c6529 | ||
|
53d6c52e22 | ||
|
3af34c1d1b | ||
|
04976db7a8 | ||
|
947d3ad27d | ||
|
858f6b73f6 | ||
|
b3a995b416 | ||
|
bcdee0daa7 | ||
|
628b299106 | ||
|
8f8acc8683 | ||
|
ca36326020 | ||
|
889bdd7686 | ||
|
6fbd432211 | ||
|
842500144e | ||
|
cf768b7e71 | ||
|
fcd84a0f5a | ||
|
03fb8a002d | ||
|
92139b90af | ||
|
a2ac89d6ef | ||
|
433def286e | ||
|
60325fa56f | ||
|
6ecf3189e0 | ||
|
b0d943de17 | ||
|
8d608a81b7 | ||
|
3ea0d36000 | ||
|
1613ef8d8e | ||
|
c4ec9c0d3d | ||
|
a8f9b07631 | ||
|
f364eb6fb5 | ||
|
77e15bec62 | ||
|
a68a1e7ed0 | ||
|
9c67c2773d | ||
|
952d03dbea | ||
|
8843a98c2b | ||
|
b8c1476e44 | ||
|
5539e6fdd1 | ||
|
b8a7a5a90f | ||
|
d2c898f746 | ||
|
544f1f10ad | ||
|
ffe666572f | ||
|
24affa7db3 | ||
|
f4ab2a4147 | ||
|
3f167476b1 | ||
|
3055a41805 | ||
|
577277ffd2 | ||
|
ca7f29f568 | ||
|
c4f708a93f | ||
|
e00b4a8f81 | ||
|
7bb36ccf91 | ||
|
ce023f6f2f | ||
|
6e472f58e4 | ||
|
4dba7e8114 | ||
|
b7368332e2 | ||
|
928e0b7013 | ||
|
0c4d489e29 | ||
|
017e6999b5 | ||
|
e2764cd7ca | ||
|
4b1c3c98b4 | ||
|
bbe3c6e761 | ||
|
7f5ff558ee | ||
|
9e4e077ec5 | ||
|
83b72cb086 | ||
|
d4a9afc100 | ||
|
7d641c26ac | ||
|
5790c8dac1 | ||
|
46e12c4692 | ||
|
dba497e0c1 | ||
|
fa0b4ad252 | ||
|
d6e1d44f16 | ||
|
853d06ffe2 | ||
|
3fe0596c18 | ||
|
0ead1f1072 | ||
|
51543729ff | ||
|
4ab99d8d47 | ||
|
54770413c4 | ||
|
aa750c1ede | ||
|
1966eb2615 | ||
|
784e11dea1 | ||
|
b4e4b8a935 | ||
|
3fe847b574 | ||
|
37246b1031 | ||
|
28103f4832 | ||
|
c0d1b3e03e | ||
|
abd3314064 | ||
|
3fec68be4e | ||
|
c8297c6af5 | ||
|
4e96a812b3 | ||
|
192090bae4 | ||
|
e931888d50 | ||
|
8960fe86ae | ||
|
c0956b09ba | ||
|
e9b4a1bf68 | ||
|
5cf5e7d490 | ||
|
40f74e4d73 | ||
|
b9cc76d87e | ||
|
7dbdba5690 | ||
|
c1386c936e | ||
|
e8d35f47cb | ||
|
2cca09d509 | ||
|
89b0bf0d5d | ||
|
b97bc3966e | ||
|
b8109bc013 | ||
|
aed82f6837 | ||
|
0e4802b2ec | ||
|
637e9a86c2 | ||
|
9958c81b79 | ||
|
8b1b1f4982 | ||
|
bca40e9814 | ||
|
0d56246f4b | ||
|
03c0946d73 | ||
|
e11b2e6e1e | ||
|
c71bfd736e | ||
|
3b8f1ec4b1 | ||
|
8dd1ec8b3f | ||
|
facb8b56f8 | ||
|
532c1737a1 | ||
|
666867b799 | ||
|
8cc91dc63c | ||
|
dbceec87c0 | ||
|
f4dea7da18 | ||
|
8a56075b07 | ||
|
58227ffdeb | ||
|
4fbd8098e6 | ||
|
7593639ce3 | ||
|
132f55795e | ||
|
3272896d79 | ||
|
7fc16a2c32 | ||
|
17e98d4c96 | ||
|
1958f7e06c | ||
|
04fbc5f23e | ||
|
f184dd9208 | ||
|
422c2aff1c | ||
|
8800226d65 | ||
|
e689fc4e91 | ||
|
a4ec34e1cd | ||
|
de17e3f745 | ||
|
b5e7285baf | ||
|
4bd0f93e4a | ||
|
ab9a3240a9 | ||
|
fbbc030ba9 | ||
|
4cc120c744 | ||
|
24ee66ed0d | ||
|
91c736015b | ||
|
5c4d767ac0 | ||
|
ef21ce4ccb | ||
|
dee7f8d692 | ||
|
81da18e71c | ||
|
9ed2737acc | ||
|
04a5ac211e | ||
|
f7001ccc5a | ||
|
a474f50ebb | ||
|
cbaadc9294 | ||
|
1bbdaf6ecd | ||
|
f4183afe6a | ||
|
b804b1ef77 | ||
|
8228b66dbc | ||
|
b3a96f27f0 | ||
|
4f407a0a35 | ||
|
65c64dc36f | ||
|
67fac4b95f | ||
|
29122d32ac | ||
|
b231b37b09 | ||
|
ba5e134e07 | ||
|
1b67731e18 | ||
|
c4a3a4ff47 | ||
|
400d5d722d | ||
|
5dc9dd7152 | ||
|
e11a8999b5 | ||
|
cc4a95426d | ||
|
cecd8d3c98 | ||
|
b73e564b16 | ||
|
e3c337d87c | ||
|
beea6e1b16 | ||
|
87fb5b4234 | ||
|
d752327c33 | ||
|
855f54402e | ||
|
b909236c0b | ||
|
e0717e751e | ||
|
c37247796b | ||
|
f77261a7c5 | ||
|
43e8995e75 | ||
|
9472bce308 | ||
|
d4f220a5cc | ||
|
54ea0698fb | ||
|
b66aec675c | ||
|
57dd02c44b | ||
|
75cd4c7729 | ||
|
a8bd14d557 | ||
|
d0f5deebf8 | ||
|
87e21bbacd | ||
|
1b496a745c | ||
|
a307375c02 | ||
|
b660a5729e | ||
|
0a1d889e27 | ||
|
7dda1b727e | ||
|
c666ba26c3 | ||
|
2e66913e5f | ||
|
8120efee1d | ||
|
a74401f0e5 | ||
|
7a2c92637a | ||
|
4bcd6b959c | ||
|
9b84ae1806 | ||
|
4399f13fb9 | ||
|
1a43c7254e | ||
|
72d73af651 | ||
|
5fb1574c81 | ||
|
60cdf40cc3 | ||
|
bb43cf7e9d | ||
|
9f62c0173d | ||
|
5d4f12e462 | ||
|
154d4ee39c | ||
|
e69945d953 | ||
|
db214fa578 | ||
|
1ff4d9f3d6 | ||
|
076b08649e | ||
|
08a0c02060 | ||
|
52604860f9 | ||
|
f87f7b8986 | ||
|
33a5244806 | ||
|
226e819371 | ||
|
c50a82ce0f | ||
|
37e7854c10 | ||
|
c342d070c6 | ||
|
f7fc5f6c6f | ||
|
ba0c7c70ab | ||
|
d48ccf3ad4 | ||
|
069574775c | ||
|
cfde806eb9 | ||
|
b910287954 | ||
|
8093987090 | ||
|
057400a3fd | ||
|
b75c38166c | ||
|
bfe7dafc9c | ||
|
5106ef482c | ||
|
be55134a53 | ||
|
66ba560256 | ||
|
0308f5e3d7 | ||
|
28cb9a09c4 | ||
|
cfc4d75df6 | ||
|
6902cb7f2e | ||
|
d2d8f38996 | ||
|
d39b308eaf | ||
|
c873976649 | ||
|
dbb03e2b9c | ||
|
e9f17dc3bf | ||
|
22a462cc1f | ||
|
f6a0f5c642 | ||
|
d0e2f6416b | ||
|
25f4a613c4 | ||
|
a016026a3a | ||
|
53c7ec53d5 | ||
|
e5b89a441a | ||
|
3a0345970e | ||
|
1e13987fba | ||
|
e82f9e2b83 | ||
|
cbc8343619 | ||
|
e562b9714b | ||
|
2ab4f00d25 | ||
|
1740d6dd4e | ||
|
0642b22cd1 | ||
|
a4f569e8a3 | ||
|
32c8486e1f | ||
|
557410b8f0 | ||
|
55c1b2a3bb | ||
|
e097633f63 | ||
|
d25b1c31b0 | ||
|
deb7240100 | ||
|
3d032ece8e | ||
|
e190f1fca6 | ||
|
280345968d | ||
|
b06c16ef9f | ||
|
1f2fd4e727 | ||
|
43139cc528 | ||
|
2f34b865b6 | ||
|
ae1f211ce2 | ||
|
ad3a0505e3 | ||
|
95ad616cdd | ||
|
64e7b47c69 | ||
|
7733f0c760 | ||
|
a32b77c4b2 | ||
|
a0e584defd | ||
|
7aed0ffe68 | ||
|
ea279d5609 | ||
|
586e7bc561 | ||
|
ddf6568510 | ||
|
d03224ac98 | ||
|
94d1b3b411 | ||
|
95562175f8 | ||
|
f482bb2e49 | ||
|
1997577d5e | ||
|
476b0251b2 | ||
|
21cad01b6e | ||
|
1b26aebe4d | ||
|
50ccaf5eac | ||
|
56a00f0a2f | ||
|
92397d87a4 | ||
|
1d0331c12a | ||
|
dba1af6129 | ||
|
ee804f6223 | ||
|
80bd33bc2c | ||
|
e80f06d2a1 | ||
|
f77a8ffd3b | ||
|
72114edf06 | ||
|
2f0e81e053 | ||
|
29ab270e65 | ||
|
6b8bb3a31d | ||
|
68e210b354 | ||
|
b3e94f26ba | ||
|
b2075fd6a5 | ||
|
95d576b48e | ||
|
59c17f02de | ||
|
fa046eafbc | ||
|
be07a03217 | ||
|
d0a71233fb | ||
|
f372c49ccd | ||
|
924ce1dce7 | ||
|
03a8f8fafe | ||
|
cfd3be76e3 | ||
|
5b7b0ac8df | ||
|
1943c01981 | ||
|
5e43ba8742 | ||
|
76aa30a263 | ||
|
c5b8595e3f | ||
|
42e21c6882 | ||
|
1c51f98adc | ||
|
f9c7ba3447 | ||
|
272935b281 | ||
|
ccf58aa3ec | ||
|
91f8ad167d | ||
|
6b7e76d28c | ||
|
bc0baab2ea | ||
|
d795988d9e | ||
|
f8c4e745e1 | ||
|
47cc7a7bf9 | ||
|
bd60d82d0c | ||
|
6c0b287748 | ||
|
d26e8b669d | ||
|
d8b009a945 | ||
|
d0d5de42e5 | ||
|
b80cf3b2d1 | ||
|
970a48060a | ||
|
4c28b82529 | ||
|
2d15886bb0 | ||
|
d199ca79f2 | ||
|
104f5e0fc1 | ||
|
5e1b7f94a0 | ||
|
ac9ee6a4ad | ||
|
4f6d1337ca | ||
|
2bf8d0f7c4 | ||
|
496bc79bc2 | ||
|
9b03719ad7 | ||
|
3a6efdd03c | ||
|
d01b3c4c32 | ||
|
cd776c37c9 | ||
|
dc0f612548 | ||
|
c47cf414ef | ||
|
b5f4ae09c3 | ||
|
dfbfdd60f9 | ||
|
15961ec04d | ||
|
a56d09a440 | ||
|
d84c48505f | ||
|
877b4d0c62 | ||
|
12247f4c69 | ||
|
4e9a7f7f7f | ||
|
3020327f6c | ||
|
46acb36767 | ||
|
131b058409 | ||
|
753e36f650 | ||
|
7ce2c77f88 | ||
|
aab606a11f | ||
|
b0bc9f4a9d | ||
|
4755afd1cb | ||
|
6e0438da3c | ||
|
727107707a | ||
|
69ff61397d | ||
|
044ec4b2a5 | ||
|
77178eedc8 | ||
|
15a333260a | ||
|
43241adf22 | ||
|
a44bc969e4 | ||
|
2c4fb69246 | ||
|
3ca23481dd | ||
|
3fe8d7a17f | ||
|
68265ebfc6 | ||
|
381da2d9f0 | ||
|
0fd6c1f015 | ||
|
19885d205e | ||
|
76a936c893 | ||
|
463628372d | ||
|
f30ea47a87 | ||
|
d8fd0ccf6a | ||
|
b3d978600f | ||
|
99b71c068f | ||
|
306d34be7a | ||
|
8030da7afe | ||
|
184215e783 | ||
|
48358b2e5b | ||
|
5cdb371731 | ||
|
44ca159faf | ||
|
05b06210c9 | ||
|
83796e62bc | ||
|
828defefb6 | ||
|
caa106d4e0 | ||
|
3202361c5b | ||
|
332bdfd798 | ||
|
ecab1c75de | ||
|
ee35600b90 | ||
|
be858f6205 | ||
|
ef3ced26a3 | ||
|
3814a07392 | ||
|
bb6d00bbf9 | ||
|
7ab7b733bb | ||
|
d9f65c97c3 | ||
|
b838b53ad6 | ||
|
df4dc3e7cb | ||
|
bf47a5eefc | ||
|
fa8a809a91 | ||
|
bcebd7dbf6 | ||
|
2960eae847 | ||
|
c78541479c | ||
|
621e86b331 | ||
|
77d1ac7e00 | ||
|
d894f352bf | ||
|
098dbaab44 | ||
|
8380ecfb21 | ||
|
58308a0ecc | ||
|
5b09797321 | ||
|
97c09585d6 | ||
|
fb215c3832 | ||
|
2c4f566c88 | ||
|
0db32beaf0 | ||
|
8a3012a4ad | ||
|
9674aaf35c | ||
|
950ba1ab84 | ||
|
e1fa9569ba | ||
|
fd72d2d2a5 | ||
|
c2101a2e90 | ||
|
515f7d0d4f | ||
|
76e868821a | ||
|
e457fb3540 | ||
|
af37fd8b30 | ||
|
581ed5c4fe | ||
|
6cdabe6526 | ||
|
89fb735fcf | ||
|
55a2a900ff | ||
|
2002bc96bf | ||
|
ceca1aef07 | ||
|
e04e04f8fa | ||
|
e25fb4b18f | ||
|
1e35d619a6 | ||
|
8ced9f7e32 | ||
|
652ca2bded | ||
|
bd836944f8 | ||
|
3de31677d3 | ||
|
82cb31eb93 | ||
|
b1a4e994fd | ||
|
61d1c88e15 | ||
|
21b0867433 | ||
|
6a87ac3a52 | ||
|
29eee40474 | ||
|
1d41d6f7c2 | ||
|
29ae62d2ae | ||
|
e0843afe1b | ||
|
a1c6d96ed8 | ||
|
efd8533ef8 | ||
|
9fa2627347 | ||
|
fe52be11e3 | ||
|
6d341ab6c5 | ||
|
4ffcdce2ff | ||
|
a0fc62661f | ||
|
7d43c585dc | ||
|
82f3e668ad | ||
|
5a51cc1bb4 | ||
|
67be2ce101 | ||
|
231ae28f07 | ||
|
475df1d6cf | ||
|
87c2e8b279 | ||
|
de9692a7d2 | ||
|
e6029348e8 | ||
|
8ef969afce | ||
|
fa974646e1 | ||
|
9731134296 | ||
|
4a6e2d6142 | ||
|
494c870326 | ||
|
4d4d2366fc | ||
|
c7a0ad8ec9 | ||
|
bbde6eb256 | ||
|
ef2cd694c4 | ||
|
6c32d8c7ad | ||
|
802da0091b | ||
|
715641391d | ||
|
9bf297a02b | ||
|
cb5e8f7fc4 | ||
|
da3b9ba2b7 | ||
|
c29af7e225 | ||
|
38d16b1426 | ||
|
c2224f003b | ||
|
e743386728 | ||
|
f49a535686 | ||
|
3ab8b3a92e | ||
|
9600d59e01 | ||
|
5cb02b4a01 | ||
|
6ea0f010ff | ||
|
f105471ef6 | ||
|
38d1521608 | ||
|
052051d8ae | ||
|
d5ab29757e | ||
|
87c91c0766 | ||
|
317709b2a8 | ||
|
08c5ee87e4 | ||
|
78aacf3634 | ||
|
8c0e8f4e73 | ||
|
2774b0c974 | ||
|
5f70671856 | ||
|
a693bea1e6 | ||
|
adcb12a9ba | ||
|
177628bfd8 | ||
|
6c4416868d | ||
|
efc72253f7 | ||
|
7c4263d426 | ||
|
cb49e0f8c9 | ||
|
0becb22ac0 | ||
|
c24a2a6e60 | ||
|
1f30b7a9f1 | ||
|
9d533a77d0 | ||
|
cbbd1efa06 | ||
|
b11a93df41 | ||
|
a33e6a0d2a | ||
|
47bb7b48c7 | ||
|
c4d7f81786 | ||
|
e849078c6e | ||
|
67fd33132f | ||
|
4804215cb8 | ||
|
8a533f0d90 | ||
|
269de86ba0 | ||
|
c393733988 | ||
|
e3965cf35a | ||
|
8b350356b2 | ||
|
bf08e00643 | ||
|
f7625019c5 | ||
|
abbabc5e51 | ||
|
f1a98c5254 | ||
|
7d548a1827 | ||
|
930b178026 | ||
|
d52d7819b8 | ||
|
1289408817 | ||
|
ab336a9d5e | ||
|
69917dfa55 | ||
|
9e359a4f47 | ||
|
4c4cb30736 | ||
|
525213d2f5 | ||
|
fd43d66f46 | ||
|
54fbcd2ce6 | ||
|
15499eb942 | ||
|
96633eeca1 | ||
|
847eedbdb2 | ||
|
7e4f339c40 | ||
|
334f76fa38 | ||
|
efd56b1c21 | ||
|
201294ae17 | ||
|
5a9e2f60ba | ||
|
373ee3fbba | ||
|
4cb4d8b22d | ||
|
3a03541ced | ||
|
56d03d92be | ||
|
a46f50747b | ||
|
c5688c6250 | ||
|
4ef245a92a | ||
|
973053d8b0 | ||
|
7c8bcc11dc | ||
|
7fe4678b02 | ||
|
ba2135ccae | ||
|
89febfed93 | ||
|
5022cf242d | ||
|
1ecea255eb | ||
|
a00a35cef9 | ||
|
eccd7a26dd | ||
|
c14f72db9c | ||
|
cc6cac08e3 | ||
|
580111d42b | ||
|
88c46cbdac | ||
|
a14679cc30 | ||
|
6560bed3f0 | ||
|
06bf2cf8c4 | ||
|
4ed8e4fbef | ||
|
9c405c9f9a | ||
|
5207b3fbc5 | ||
|
8dbbd75754 | ||
|
c0a8c6db37 | ||
|
b9111bd209 | ||
|
633782b8d9 | ||
|
22f83f0c38 | ||
|
bb9dcd560a | ||
|
f50db6ae0b | ||
|
d8c054517d | ||
|
42f664a382 | ||
|
5dde540897 | ||
|
40c3a6c1e1 | ||
|
f24ed14ee0 | ||
|
9d679f0fcc | ||
|
1387cf60f7 | ||
|
6fd413791a | ||
|
337c9cbd52 | ||
|
a3145bdc30 | ||
|
890559ab28 | ||
|
d0e3ce51f4 | ||
|
68a6b98b3c | ||
|
70d45af0ef | ||
|
13e2c771aa | ||
|
f53119cec4 | ||
|
7084755396 | ||
|
4480542b22 | ||
|
11b12de39b | ||
|
3a9cb4ca64 | ||
|
769a716e30 | ||
|
f0d1fafc02 | ||
|
a0c2dad9d4 | ||
|
14278f55d2 | ||
|
b1de96824b | ||
|
7ad554f90e | ||
|
5ee99c32f5 | ||
|
c145f8a132 | ||
|
689a091bbe | ||
|
f3f28c5395 | ||
|
e75c6279d1 | ||
|
36376abe05 | ||
|
66c1968f7a | ||
|
1dcc3fde00 | ||
|
5d3de51f97 | ||
|
fc0c8d286a | ||
|
bd2d4e393b | ||
|
c8e0d7efeb | ||
|
8f1be0d42f | ||
|
6e4e973b26 | ||
|
d250c9d61d | ||
|
5bf2b94dd4 | ||
|
d2819d5577 | ||
|
4cb0727698 | ||
|
65085c713e | ||
|
6dcc02d244 | ||
|
5f5808ca7b | ||
|
f486f6e1e5 | ||
|
60ed04cf82 | ||
|
594845aab1 | ||
|
4524290e87 | ||
|
c06e45d729 | ||
|
9060a1e9df | ||
|
9350a1cf21 | ||
|
73122473ff | ||
|
0d4177126b | ||
|
7930a8a6e8 | ||
|
704359e299 | ||
|
594fca3fef | ||
|
ccbb277f46 | ||
|
8084d55440 | ||
|
aa23412989 | ||
|
f5ca054855 | ||
|
6c00a06692 | ||
|
ea9c8e1143 | ||
|
c4e6dd59e4 | ||
|
037259be68 | ||
|
263978904c | ||
|
cf45252a7c | ||
|
03bf161eb6 | ||
|
ad014bba97 | ||
|
49cc1f7d67 | ||
|
99b8b43d7b | ||
|
895407f31b | ||
|
099afc6274 | ||
|
df334a1125 | ||
|
dbd8828eb0 | ||
|
43fe07c1a4 | ||
|
4a46d2b792 | ||
|
3b169441df | ||
|
3bdc4cd0f5 | ||
|
2891c8aa9a | ||
|
97a336507e | ||
|
c88c74f967 | ||
|
a803333a4e | ||
|
684780141a | ||
|
85910c5b30 | ||
|
139b62a839 | ||
|
0f2411f154 | ||
|
a07d0fee1f | ||
|
e4640d8fdf | ||
|
907e08c110 | ||
|
f026f8120f | ||
|
cd9aea63b5 | ||
|
43b65f5eb8 | ||
|
4633d93af0 | ||
|
4b7b38bef5 | ||
|
e00d2a62dd | ||
|
7c777fcd5d | ||
|
e5ca3937c6 | ||
|
e4124c2477 | ||
|
b2f87cb64d | ||
|
44fbe34360 | ||
|
8e6a9d2de0 | ||
|
41f308f58e | ||
|
6e99f2a04f | ||
|
ff4ff05c5f | ||
|
b7b74cef36 | ||
|
4aa43fab56 | ||
|
a6e514a85f | ||
|
26d4efd11e | ||
|
8504d2d0da | ||
|
c4fbb6717c | ||
|
8c933b70c2 | ||
|
b906596bb7 | ||
|
aa7ab99be2 | ||
|
10afa6f1d1 | ||
|
0ef46da632 | ||
|
ee1628bdfe | ||
|
ed0bf32290 | ||
|
9a697d842b | ||
|
316c7faf77 | ||
|
f3e2b4fa3f | ||
|
f68664ac24 | ||
|
213d1439fa | ||
|
17c97fb062 | ||
|
b08f22c882 | ||
|
f57fadc009 | ||
|
2e9c0bd6b3 | ||
|
2c516611f1 | ||
|
8a79c591de | ||
|
31e7903221 | ||
|
4ffc7a17d4 | ||
|
906cff55c2 | ||
|
098f6d737b | ||
|
78b00dda6c | ||
|
c6b395535a | ||
|
abb61944a5 | ||
|
89503dcb5f | ||
|
7e1ae372f3 | ||
|
6fdfa2ecc6 | ||
|
a2d60c9158 | ||
|
e6f8177532 | ||
|
30679d438d | ||
|
4be04c8965 | ||
|
5d55b0cd82 | ||
|
4833ac209d | ||
|
9392ebd49e | ||
|
5ed26e1fc9 | ||
|
277fad30c6 | ||
|
3c0d25c475 | ||
|
3cc5ed353c | ||
|
60ecf099ed | ||
|
e920ed393d | ||
|
52bb63c708 | ||
|
1ec3332ade | ||
|
6a66c5071a | ||
|
a305dba8ff | ||
|
191221178f | ||
|
e437b37fd0 | ||
|
2d40085c26 | ||
|
b05102fe8c | ||
|
6b91b1e0a9 | ||
|
e805f0fa99 | ||
|
af3ba5d946 | ||
|
e1e721094d | ||
|
128dcbd3c9 | ||
|
4d0924a890 | ||
|
8ca511cade | ||
|
d71ac90985 | ||
|
ce32060198 | ||
|
1cfb5372cf |
1192 changed files with 318276 additions and 200859 deletions
161
.clang-format
Normal file
161
.clang-format
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
AlignAfterOpenBracket: Align
|
||||||
|
AlignArrayOfStructures: Left
|
||||||
|
AlignConsecutiveAssignments: AcrossComments
|
||||||
|
AlignConsecutiveBitFields: AcrossComments
|
||||||
|
AlignConsecutiveDeclarations: AcrossComments
|
||||||
|
AlignConsecutiveMacros: AcrossComments
|
||||||
|
# AlignConsecutiveShortCaseStatements: AcrossComments
|
||||||
|
AlignEscapedNewlines: Left # LeftWithLastLine
|
||||||
|
AlignOperands: Align
|
||||||
|
AlignTrailingComments:
|
||||||
|
Kind: Always
|
||||||
|
OverEmptyLines: 1
|
||||||
|
AllowAllArgumentsOnNextLine: true
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: false
|
||||||
|
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
||||||
|
AllowShortBlocksOnASingleLine: Never
|
||||||
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
|
AllowShortFunctionsOnASingleLine: Inline
|
||||||
|
AllowShortIfStatementsOnASingleLine: Never
|
||||||
|
AllowShortLambdasOnASingleLine: Inline
|
||||||
|
AllowShortLoopsOnASingleLine: false
|
||||||
|
AlwaysBreakBeforeMultilineStrings: true
|
||||||
|
BinPackArguments: true
|
||||||
|
BinPackParameters: true # OnePerLine
|
||||||
|
BitFieldColonSpacing: Both
|
||||||
|
BreakBeforeBraces: Custom # Attach
|
||||||
|
BraceWrapping:
|
||||||
|
AfterCaseLabel: true
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: false
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: false
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterObjCDeclaration: false
|
||||||
|
AfterStruct: false
|
||||||
|
AfterUnion: false
|
||||||
|
AfterExternBlock: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
BeforeLambdaBody: false
|
||||||
|
BeforeWhile: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: false
|
||||||
|
SplitEmptyRecord: false
|
||||||
|
SplitEmptyNamespace: false
|
||||||
|
# BreakAdjacentStringLiterals: true
|
||||||
|
BreakAfterAttributes: Never
|
||||||
|
BreakBeforeBinaryOperators: None
|
||||||
|
BreakBeforeInlineASMColon: OnlyMultiline
|
||||||
|
BreakBeforeTernaryOperators: false
|
||||||
|
# BreakBinaryOperations: Never
|
||||||
|
BreakConstructorInitializers: AfterColon
|
||||||
|
# BreakFunctionDefinitionParameters: false
|
||||||
|
BreakInheritanceList: AfterComma
|
||||||
|
BreakStringLiterals: true
|
||||||
|
# BreakTemplateDeclarations: Yes
|
||||||
|
ColumnLimit: 120
|
||||||
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
|
CompactNamespaces: false
|
||||||
|
ConstructorInitializerIndentWidth: 4
|
||||||
|
ContinuationIndentWidth: 4
|
||||||
|
Cpp11BracedListStyle: false
|
||||||
|
DerivePointerAlignment: false
|
||||||
|
DisableFormat: false
|
||||||
|
EmptyLineBeforeAccessModifier: Leave
|
||||||
|
EmptyLineAfterAccessModifier: Never
|
||||||
|
ExperimentalAutoDetectBinPacking: false
|
||||||
|
FixNamespaceComments: true
|
||||||
|
IncludeBlocks: Regroup
|
||||||
|
IncludeCategories:
|
||||||
|
- Regex: '^<.*\.h>'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '^<.*'
|
||||||
|
Priority: 2
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '.*'
|
||||||
|
Priority: 3
|
||||||
|
SortPriority: 0
|
||||||
|
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||||
|
IncludeIsMainSourceRegex: ''
|
||||||
|
IndentAccessModifiers: false
|
||||||
|
IndentCaseBlocks: true
|
||||||
|
IndentCaseLabels: true
|
||||||
|
IndentExternBlock: NoIndent
|
||||||
|
IndentGotoLabels: false
|
||||||
|
IndentPPDirectives: AfterHash
|
||||||
|
IndentWidth: 4
|
||||||
|
IndentWrappedFunctionNames: false
|
||||||
|
InsertBraces: true # NOTE: may lead to incorrect formatting
|
||||||
|
InsertNewlineAtEOF: true
|
||||||
|
JavaScriptQuotes: Leave
|
||||||
|
JavaScriptWrapImports: true
|
||||||
|
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||||
|
LambdaBodyIndentation: Signature
|
||||||
|
LineEnding: LF
|
||||||
|
MacroBlockBegin: ''
|
||||||
|
MacroBlockEnd: ''
|
||||||
|
MaxEmptyLinesToKeep: 1
|
||||||
|
NamespaceIndentation: None
|
||||||
|
ObjCBinPackProtocolList: Auto
|
||||||
|
ObjCBlockIndentWidth: 4
|
||||||
|
ObjCSpaceAfterProperty: true
|
||||||
|
ObjCSpaceBeforeProtocolList: true
|
||||||
|
PPIndentWidth: -1
|
||||||
|
PackConstructorInitializers: CurrentLine
|
||||||
|
PenaltyBreakAssignment: 2
|
||||||
|
PenaltyBreakBeforeFirstCallParameter: 1
|
||||||
|
PenaltyBreakComment: 300
|
||||||
|
PenaltyBreakFirstLessLess: 120
|
||||||
|
PenaltyBreakString: 1000
|
||||||
|
PenaltyBreakTemplateDeclaration: 10
|
||||||
|
PenaltyExcessCharacter: 1000000
|
||||||
|
PenaltyReturnTypeOnItsOwnLine: 200
|
||||||
|
PointerAlignment: Middle
|
||||||
|
QualifierAlignment: Left
|
||||||
|
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
||||||
|
RawStringFormats:
|
||||||
|
- Language: Cpp
|
||||||
|
Delimiters:
|
||||||
|
- cc
|
||||||
|
- CC
|
||||||
|
- cpp
|
||||||
|
- Cpp
|
||||||
|
- CPP
|
||||||
|
- 'c++'
|
||||||
|
- 'C++'
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
ReferenceAlignment: Middle
|
||||||
|
ReflowComments: false # IndentOnly
|
||||||
|
SeparateDefinitionBlocks: Always
|
||||||
|
SortIncludes: CaseInsensitive
|
||||||
|
SortUsingDeclarations: LexicographicNumeric
|
||||||
|
SpaceAfterCStyleCast: true
|
||||||
|
SpaceAfterLogicalNot: false
|
||||||
|
SpaceAfterTemplateKeyword: true
|
||||||
|
SpaceBeforeAssignmentOperators: true
|
||||||
|
SpaceBeforeCpp11BracedList: false
|
||||||
|
SpaceBeforeCtorInitializerColon: true
|
||||||
|
SpaceBeforeInheritanceColon: true
|
||||||
|
SpaceBeforeParens: ControlStatements
|
||||||
|
SpaceBeforeRangeBasedForLoopColon: true
|
||||||
|
SpaceInEmptyBlock: false
|
||||||
|
SpaceInEmptyParentheses: false
|
||||||
|
SpacesBeforeTrailingComments: 2
|
||||||
|
SpacesInAngles: Never
|
||||||
|
SpacesInContainerLiterals: true
|
||||||
|
SpacesInLineCommentPrefix:
|
||||||
|
Minimum: 1
|
||||||
|
Maximum: -1
|
||||||
|
SpacesInParentheses: false
|
||||||
|
SpacesInSquareBrackets: false
|
||||||
|
SpaceBeforeSquareBrackets: false
|
||||||
|
Standard: c++17
|
||||||
|
TabWidth: 4
|
||||||
|
UseTab: Never
|
||||||
|
WhitespaceSensitiveMacros: ['STRINGIZE']
|
||||||
|
...
|
||||||
|
|
|
@ -12,12 +12,15 @@ Checks: >
|
||||||
-readability-implicit-bool-conversion,
|
-readability-implicit-bool-conversion,
|
||||||
-readability-magic-numbers,
|
-readability-magic-numbers,
|
||||||
-readability-uppercase-literal-suffix,
|
-readability-uppercase-literal-suffix,
|
||||||
|
-readability-simplify-boolean-expr,
|
||||||
clang-analyzer-*,
|
clang-analyzer-*,
|
||||||
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
||||||
performance-*,
|
performance-*,
|
||||||
portability-*,
|
portability-*,
|
||||||
|
-portability-simd-intrinsics,
|
||||||
misc-*,
|
misc-*,
|
||||||
-misc-const-correctness,
|
-misc-const-correctness,
|
||||||
-misc-non-private-member-variables-in-classes,
|
-misc-non-private-member-variables-in-classes,
|
||||||
-misc-no-recursion,
|
-misc-no-recursion,
|
||||||
|
-misc-use-anonymous-namespace,
|
||||||
FormatStyle: none
|
FormatStyle: none
|
||||||
|
|
|
@ -15,7 +15,7 @@ node('x86_runner1'){ // Running on x86 runner containing latest vecto
|
||||||
stage('Running llama.cpp'){
|
stage('Running llama.cpp'){
|
||||||
sh'''#!/bin/bash
|
sh'''#!/bin/bash
|
||||||
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
|
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
|
||||||
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./main -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
||||||
cat llama_log.txt # Printing results
|
cat llama_log.txt # Printing results
|
||||||
'''
|
'''
|
||||||
}
|
}
|
||||||
|
|
92
.devops/cpu.Dockerfile
Normal file
92
.devops/cpu.Dockerfile
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS build
|
||||||
|
|
||||||
|
ARG TARGETARCH
|
||||||
|
|
||||||
|
ARG GGML_CPU_ARM_ARCH=armv8-a
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
||||||
|
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
||||||
|
elif [ "$TARGETARCH" = "arm64" ]; then \
|
||||||
|
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
|
||||||
|
else \
|
||||||
|
echo "Unsupported architecture"; \
|
||||||
|
exit 1; \
|
||||||
|
fi && \
|
||||||
|
cmake --build build -j $(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
&& pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
94
.devops/cuda.Dockerfile
Normal file
94
.devops/cuda.Dockerfile
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG CUDA_VERSION=12.6.0
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
||||||
|
|
||||||
|
# CUDA architecture to build for (defaults to all supported archs)
|
||||||
|
ARG CUDA_DOCKER_ARCH=default
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
||||||
|
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
||||||
|
fi && \
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
||||||
|
cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
&& pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -1,34 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG CUDA_VERSION=11.7.1
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
ARG CUDA_DOCKER_ARCH=all
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential python3 python3-pip git
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|
||||||
# Enable cuBLAS
|
|
||||||
ENV LLAMA_CUBLAS=1
|
|
||||||
|
|
||||||
RUN make
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/.devops/tools.sh"]
|
|
|
@ -1,45 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG ROCM_VERSION=5.6
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
|
||||||
|
|
||||||
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
|
||||||
# This is mostly tied to rocBLAS supported archs.
|
|
||||||
ARG ROCM_DOCKER_ARCH=\
|
|
||||||
gfx803 \
|
|
||||||
gfx900 \
|
|
||||||
gfx906 \
|
|
||||||
gfx908 \
|
|
||||||
gfx90a \
|
|
||||||
gfx1010 \
|
|
||||||
gfx1030 \
|
|
||||||
gfx1100 \
|
|
||||||
gfx1101 \
|
|
||||||
gfx1102
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
|
||||||
# Enable ROCm
|
|
||||||
ENV LLAMA_HIPBLAS=1
|
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
|
|
||||||
RUN make
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/.devops/tools.sh"]
|
|
|
@ -1,22 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential python3 python3-pip git
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN make
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/.devops/tools.sh"]
|
|
91
.devops/intel.Dockerfile
Normal file
91
.devops/intel.Dockerfile
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
|
||||||
|
|
||||||
|
## Build Image
|
||||||
|
|
||||||
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
||||||
|
|
||||||
|
ARG GGML_SYCL_F16=OFF
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y git libcurl4-openssl-dev
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||||
|
echo "GGML_SYCL_F16 is set" \
|
||||||
|
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||||
|
fi && \
|
||||||
|
echo "Building with dynamic libs" && \
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
||||||
|
cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
&& pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
||||||
|
|
44
.devops/llama-cli-cann.Dockerfile
Normal file
44
.devops/llama-cli-cann.Dockerfile
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
|
||||||
|
|
||||||
|
FROM ascendai/cann:$ASCEND_VERSION AS build
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN yum install -y gcc g++ cmake make
|
||||||
|
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
||||||
|
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
||||||
|
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
||||||
|
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
||||||
|
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
||||||
|
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
||||||
|
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
||||||
|
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
||||||
|
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
||||||
|
|
||||||
|
# find libascend_hal.so, because the drive hasn`t been mounted.
|
||||||
|
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
RUN echo "Building with static libs" && \
|
||||||
|
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
|
||||||
|
cmake --build build --config Release --target llama-cli
|
||||||
|
|
||||||
|
# TODO: use image with NNRT
|
||||||
|
FROM ascendai/cann:$ASCEND_VERSION AS runtime
|
||||||
|
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
||||||
|
|
||||||
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
||||||
|
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
||||||
|
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
||||||
|
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
||||||
|
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
||||||
|
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
||||||
|
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
||||||
|
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
||||||
|
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
||||||
|
|
||||||
|
ENTRYPOINT ["/llama-cli" ]
|
|
@ -1,84 +0,0 @@
|
||||||
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
|
||||||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
|
|
||||||
# Built and maintained by John Boero - boeroboy@gmail.com
|
|
||||||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
|
||||||
|
|
||||||
# Notes for llama.cpp:
|
|
||||||
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
|
||||||
# We need to declare standard versioning if people want to sort latest releases.
|
|
||||||
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
|
||||||
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
|
||||||
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
|
||||||
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
|
||||||
# It is up to the user to install the correct vendor-specific support.
|
|
||||||
|
|
||||||
Name: llama.cpp-clblast
|
|
||||||
Version: %( date "+%%Y%%m%%d" )
|
|
||||||
Release: 1%{?dist}
|
|
||||||
Summary: OpenCL Inference of LLaMA model in C/C++
|
|
||||||
License: MIT
|
|
||||||
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
|
|
||||||
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel
|
|
||||||
Requires: clblast
|
|
||||||
URL: https://github.com/ggerganov/llama.cpp
|
|
||||||
|
|
||||||
%define debug_package %{nil}
|
|
||||||
%define source_date_epoch_from_changelog 0
|
|
||||||
|
|
||||||
%description
|
|
||||||
CPU inference for Meta's Lllama2 models using default options.
|
|
||||||
|
|
||||||
%prep
|
|
||||||
%setup -n llama.cpp-master
|
|
||||||
|
|
||||||
%build
|
|
||||||
make -j LLAMA_CLBLAST=1
|
|
||||||
|
|
||||||
%install
|
|
||||||
mkdir -p %{buildroot}%{_bindir}/
|
|
||||||
cp -p main %{buildroot}%{_bindir}/llamaclblast
|
|
||||||
cp -p server %{buildroot}%{_bindir}/llamaclblastserver
|
|
||||||
cp -p simple %{buildroot}%{_bindir}/llamaclblastsimple
|
|
||||||
|
|
||||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
|
||||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service
|
|
||||||
[Unit]
|
|
||||||
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
|
||||||
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=simple
|
|
||||||
EnvironmentFile=/etc/sysconfig/llama
|
|
||||||
ExecStart=/usr/bin/llamaclblastserver $LLAMA_ARGS
|
|
||||||
ExecReload=/bin/kill -s HUP $MAINPID
|
|
||||||
Restart=never
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=default.target
|
|
||||||
EOF
|
|
||||||
|
|
||||||
mkdir -p %{buildroot}/etc/sysconfig
|
|
||||||
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
|
||||||
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
|
||||||
EOF
|
|
||||||
|
|
||||||
%clean
|
|
||||||
rm -rf %{buildroot}
|
|
||||||
rm -rf %{_builddir}/*
|
|
||||||
|
|
||||||
%files
|
|
||||||
%{_bindir}/llamaclblast
|
|
||||||
%{_bindir}/llamaclblastserver
|
|
||||||
%{_bindir}/llamaclblastsimple
|
|
||||||
/usr/lib/systemd/system/llamaclblast.service
|
|
||||||
%config /etc/sysconfig/llama
|
|
||||||
|
|
||||||
|
|
||||||
%pre
|
|
||||||
|
|
||||||
%post
|
|
||||||
|
|
||||||
%preun
|
|
||||||
%postun
|
|
||||||
|
|
||||||
%changelog
|
|
|
@ -1,5 +1,5 @@
|
||||||
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
||||||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
|
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
||||||
# Built and maintained by John Boero - boeroboy@gmail.com
|
# Built and maintained by John Boero - boeroboy@gmail.com
|
||||||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@
|
||||||
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
||||||
# It is up to the user to install the correct vendor-specific support.
|
# It is up to the user to install the correct vendor-specific support.
|
||||||
|
|
||||||
Name: llama.cpp-cublas
|
Name: llama.cpp-cuda
|
||||||
Version: %( date "+%%Y%%m%%d" )
|
Version: %( date "+%%Y%%m%%d" )
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
||||||
|
@ -32,16 +32,16 @@ CPU inference for Meta's Lllama2 models using default options.
|
||||||
%setup -n llama.cpp-master
|
%setup -n llama.cpp-master
|
||||||
|
|
||||||
%build
|
%build
|
||||||
make -j LLAMA_CUBLAS=1
|
make -j GGML_CUDA=1
|
||||||
|
|
||||||
%install
|
%install
|
||||||
mkdir -p %{buildroot}%{_bindir}/
|
mkdir -p %{buildroot}%{_bindir}/
|
||||||
cp -p main %{buildroot}%{_bindir}/llamacppcublas
|
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
|
||||||
cp -p server %{buildroot}%{_bindir}/llamacppcublasserver
|
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
|
||||||
cp -p simple %{buildroot}%{_bindir}/llamacppcublassimple
|
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
|
||||||
|
|
||||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
mkdir -p %{buildroot}/usr/lib/systemd/system
|
||||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacublas.service
|
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
||||||
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
||||||
|
@ -49,7 +49,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
EnvironmentFile=/etc/sysconfig/llama
|
EnvironmentFile=/etc/sysconfig/llama
|
||||||
ExecStart=/usr/bin/llamacppcublasserver $LLAMA_ARGS
|
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
|
||||||
ExecReload=/bin/kill -s HUP $MAINPID
|
ExecReload=/bin/kill -s HUP $MAINPID
|
||||||
Restart=never
|
Restart=never
|
||||||
|
|
||||||
|
@ -67,10 +67,10 @@ rm -rf %{buildroot}
|
||||||
rm -rf %{_builddir}/*
|
rm -rf %{_builddir}/*
|
||||||
|
|
||||||
%files
|
%files
|
||||||
%{_bindir}/llamacppcublas
|
%{_bindir}/llama-cuda-cli
|
||||||
%{_bindir}/llamacppcublasserver
|
%{_bindir}/llama-cuda-server
|
||||||
%{_bindir}/llamacppcublassimple
|
%{_bindir}/llama-cuda-simple
|
||||||
/usr/lib/systemd/system/llamacublas.service
|
/usr/lib/systemd/system/llamacuda.service
|
||||||
%config /etc/sysconfig/llama
|
%config /etc/sysconfig/llama
|
||||||
|
|
||||||
%pre
|
%pre
|
|
@ -1,5 +1,5 @@
|
||||||
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
||||||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
|
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
||||||
# Built and maintained by John Boero - boeroboy@gmail.com
|
# Built and maintained by John Boero - boeroboy@gmail.com
|
||||||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
||||||
|
|
||||||
|
@ -38,9 +38,9 @@ make -j
|
||||||
|
|
||||||
%install
|
%install
|
||||||
mkdir -p %{buildroot}%{_bindir}/
|
mkdir -p %{buildroot}%{_bindir}/
|
||||||
cp -p main %{buildroot}%{_bindir}/llama
|
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
|
||||||
cp -p server %{buildroot}%{_bindir}/llamaserver
|
cp -p llama-server %{buildroot}%{_bindir}/llama-server
|
||||||
cp -p simple %{buildroot}%{_bindir}/llamasimple
|
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
|
||||||
|
|
||||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
mkdir -p %{buildroot}/usr/lib/systemd/system
|
||||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
||||||
|
@ -51,7 +51,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
EnvironmentFile=/etc/sysconfig/llama
|
EnvironmentFile=/etc/sysconfig/llama
|
||||||
ExecStart=/usr/bin/llamaserver $LLAMA_ARGS
|
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
|
||||||
ExecReload=/bin/kill -s HUP $MAINPID
|
ExecReload=/bin/kill -s HUP $MAINPID
|
||||||
Restart=never
|
Restart=never
|
||||||
|
|
||||||
|
@ -69,9 +69,9 @@ rm -rf %{buildroot}
|
||||||
rm -rf %{_builddir}/*
|
rm -rf %{_builddir}/*
|
||||||
|
|
||||||
%files
|
%files
|
||||||
%{_bindir}/llama
|
%{_bindir}/llama-cli
|
||||||
%{_bindir}/llamaserver
|
%{_bindir}/llama-server
|
||||||
%{_bindir}/llamasimple
|
%{_bindir}/llama-simple
|
||||||
/usr/lib/systemd/system/llama.service
|
/usr/lib/systemd/system/llama.service
|
||||||
%config /etc/sysconfig/llama
|
%config /etc/sysconfig/llama
|
||||||
|
|
||||||
|
|
|
@ -1,32 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG CUDA_VERSION=11.7.1
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
# Target the CUDA runtime image
|
|
||||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
ARG CUDA_DOCKER_ARCH=all
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|
||||||
# Enable cuBLAS
|
|
||||||
ENV LLAMA_CUBLAS=1
|
|
||||||
|
|
||||||
RUN make
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
|
||||||
|
|
||||||
COPY --from=build /app/main /main
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/main" ]
|
|
|
@ -1,26 +0,0 @@
|
||||||
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
|
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
FROM intel/hpckit:$ONEAPI_VERSION as build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance
|
|
||||||
RUN mkdir build && \
|
|
||||||
cd build && \
|
|
||||||
cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \
|
|
||||||
cmake --build . --config Release --target main server
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
|
||||||
|
|
||||||
COPY --from=build /app/build/bin/main /main
|
|
||||||
COPY --from=build /app/build/bin/server /server
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/main" ]
|
|
|
@ -1,45 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG ROCM_VERSION=5.6
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
|
||||||
|
|
||||||
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
|
||||||
# This is mostly tied to rocBLAS supported archs.
|
|
||||||
ARG ROCM_DOCKER_ARCH=\
|
|
||||||
gfx803 \
|
|
||||||
gfx900 \
|
|
||||||
gfx906 \
|
|
||||||
gfx908 \
|
|
||||||
gfx90a \
|
|
||||||
gfx1010 \
|
|
||||||
gfx1030 \
|
|
||||||
gfx1100 \
|
|
||||||
gfx1101 \
|
|
||||||
gfx1102
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
|
||||||
# Enable ROCm
|
|
||||||
ENV LLAMA_HIPBLAS=1
|
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
|
|
||||||
RUN make
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/main" ]
|
|
|
@ -1,20 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN make
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
|
||||||
|
|
||||||
COPY --from=build /app/main /main
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/main" ]
|
|
108
.devops/musa.Dockerfile
Normal file
108
.devops/musa.Dockerfile
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG MUSA_VERSION=rc3.1.0
|
||||||
|
# Target the MUSA build image
|
||||||
|
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
||||||
|
|
||||||
|
# MUSA architecture to build for (defaults to all supported archs)
|
||||||
|
ARG MUSA_DOCKER_ARCH=default
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
cmake \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
git \
|
||||||
|
libcurl4-openssl-dev \
|
||||||
|
libgomp1
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
COPY requirements requirements
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Use the default MUSA archs if not specified
|
||||||
|
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
||||||
|
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
||||||
|
fi && \
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
||||||
|
cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
&& pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -6,11 +6,10 @@
|
||||||
let
|
let
|
||||||
inherit (config.packages) default;
|
inherit (config.packages) default;
|
||||||
binaries = [
|
binaries = [
|
||||||
"llama"
|
"llama-cli"
|
||||||
"llama-embedding"
|
"llama-embedding"
|
||||||
"llama-server"
|
"llama-server"
|
||||||
"quantize"
|
"llama-quantize"
|
||||||
"train-text-from-scratch"
|
|
||||||
];
|
];
|
||||||
mkApp = name: {
|
mkApp = name: {
|
||||||
type = "app";
|
type = "app";
|
||||||
|
|
|
@ -1,13 +1,52 @@
|
||||||
|
{ inputs, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
perSystem =
|
perSystem =
|
||||||
{ config, lib, ... }:
|
{
|
||||||
|
config,
|
||||||
|
lib,
|
||||||
|
system,
|
||||||
|
...
|
||||||
|
}:
|
||||||
{
|
{
|
||||||
devShells =
|
devShells =
|
||||||
lib.concatMapAttrs
|
let
|
||||||
(name: package: {
|
pkgs = import inputs.nixpkgs { inherit system; };
|
||||||
${name} = package.passthru.shell;
|
stdenv = pkgs.stdenv;
|
||||||
${name + "-extra"} = package.passthru.shell-extra;
|
scripts = config.packages.python-scripts;
|
||||||
})
|
in
|
||||||
config.packages;
|
lib.pipe (config.packages) [
|
||||||
|
(lib.concatMapAttrs (
|
||||||
|
name: package: {
|
||||||
|
${name} = pkgs.mkShell {
|
||||||
|
name = "${name}";
|
||||||
|
inputsFrom = [ package ];
|
||||||
|
shellHook = ''
|
||||||
|
echo "Entering ${name} devShell"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
"${name}-extra" =
|
||||||
|
if (name == "python-scripts") then
|
||||||
|
null
|
||||||
|
else
|
||||||
|
pkgs.mkShell {
|
||||||
|
name = "${name}-extra";
|
||||||
|
inputsFrom = [
|
||||||
|
package
|
||||||
|
scripts
|
||||||
|
];
|
||||||
|
# Extra packages that *may* be used by some scripts
|
||||||
|
packages = [
|
||||||
|
pkgs.python3Packages.tiktoken
|
||||||
|
];
|
||||||
|
shellHook = ''
|
||||||
|
echo "Entering ${name} devShell"
|
||||||
|
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
}
|
||||||
|
))
|
||||||
|
(lib.filterAttrs (name: value: value != null))
|
||||||
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
37
.devops/nix/docker.nix
Normal file
37
.devops/nix/docker.nix
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
lib,
|
||||||
|
dockerTools,
|
||||||
|
buildEnv,
|
||||||
|
llama-cpp,
|
||||||
|
interactive ? true,
|
||||||
|
coreutils,
|
||||||
|
}:
|
||||||
|
|
||||||
|
# A tar that can be fed into `docker load`:
|
||||||
|
#
|
||||||
|
# $ nix build .#llamaPackages.docker
|
||||||
|
# $ docker load < result
|
||||||
|
|
||||||
|
# For details and variations cf.
|
||||||
|
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
|
||||||
|
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
|
||||||
|
# - https://nixery.dev/
|
||||||
|
|
||||||
|
# Approximate (compressed) sizes, at the time of writing, are:
|
||||||
|
#
|
||||||
|
# .#llamaPackages.docker: 125M;
|
||||||
|
# .#llamaPackagesCuda.docker: 537M;
|
||||||
|
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
|
||||||
|
|
||||||
|
dockerTools.buildLayeredImage {
|
||||||
|
name = llama-cpp.pname;
|
||||||
|
tag = "latest";
|
||||||
|
|
||||||
|
contents =
|
||||||
|
[ llama-cpp ]
|
||||||
|
++ lib.optionals interactive [
|
||||||
|
coreutils
|
||||||
|
dockerTools.binSh
|
||||||
|
dockerTools.caCertificates
|
||||||
|
];
|
||||||
|
}
|
|
@ -26,16 +26,14 @@
|
||||||
config.cudaSupport = true;
|
config.cudaSupport = true;
|
||||||
config.allowUnfreePredicate =
|
config.allowUnfreePredicate =
|
||||||
p:
|
p:
|
||||||
builtins.all
|
builtins.all (
|
||||||
(
|
license:
|
||||||
license:
|
license.free
|
||||||
license.free
|
|| builtins.elem license.shortName [
|
||||||
|| builtins.elem license.shortName [
|
"CUDA EULA"
|
||||||
"CUDA EULA"
|
"cuDNN EULA"
|
||||||
"cuDNN EULA"
|
]
|
||||||
]
|
) (p.meta.licenses or [ p.meta.license ]);
|
||||||
)
|
|
||||||
(p.meta.licenses or [ p.meta.license ]);
|
|
||||||
};
|
};
|
||||||
# Ensure dependencies use ROCm consistently
|
# Ensure dependencies use ROCm consistently
|
||||||
pkgsRocm = import inputs.nixpkgs {
|
pkgsRocm = import inputs.nixpkgs {
|
||||||
|
|
36
.devops/nix/package-gguf-py.nix
Normal file
36
.devops/nix/package-gguf-py.nix
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
{
|
||||||
|
lib,
|
||||||
|
llamaVersion,
|
||||||
|
numpy,
|
||||||
|
tqdm,
|
||||||
|
sentencepiece,
|
||||||
|
pyyaml,
|
||||||
|
poetry-core,
|
||||||
|
buildPythonPackage,
|
||||||
|
pytestCheckHook,
|
||||||
|
}:
|
||||||
|
|
||||||
|
buildPythonPackage {
|
||||||
|
pname = "gguf";
|
||||||
|
version = llamaVersion;
|
||||||
|
pyproject = true;
|
||||||
|
nativeBuildInputs = [ poetry-core ];
|
||||||
|
propagatedBuildInputs = [
|
||||||
|
numpy
|
||||||
|
tqdm
|
||||||
|
sentencepiece
|
||||||
|
pyyaml
|
||||||
|
];
|
||||||
|
src = lib.cleanSource ../../gguf-py;
|
||||||
|
pythonImportsCheck = [
|
||||||
|
"numpy"
|
||||||
|
"gguf"
|
||||||
|
];
|
||||||
|
nativeCheckInputs = [ pytestCheckHook ];
|
||||||
|
doCheck = true;
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Python package for writing binary files in the GGUF format";
|
||||||
|
license = licenses.mit;
|
||||||
|
maintainers = [ maintainers.ditsuke ];
|
||||||
|
};
|
||||||
|
}
|
|
@ -1,32 +1,47 @@
|
||||||
{
|
{
|
||||||
lib,
|
lib,
|
||||||
|
glibc,
|
||||||
config,
|
config,
|
||||||
stdenv,
|
stdenv,
|
||||||
mkShell,
|
runCommand,
|
||||||
cmake,
|
cmake,
|
||||||
ninja,
|
ninja,
|
||||||
pkg-config,
|
pkg-config,
|
||||||
git,
|
git,
|
||||||
python3,
|
|
||||||
mpi,
|
mpi,
|
||||||
openblas, # TODO: Use the generic `blas` so users could switch between alternative implementations
|
blas,
|
||||||
cudaPackages,
|
cudaPackages,
|
||||||
|
autoAddDriverRunpath,
|
||||||
darwin,
|
darwin,
|
||||||
rocmPackages,
|
rocmPackages,
|
||||||
clblast,
|
vulkan-headers,
|
||||||
useBlas ? builtins.all (x: !x) [
|
vulkan-loader,
|
||||||
useCuda
|
curl,
|
||||||
useMetalKit
|
shaderc,
|
||||||
useOpenCL
|
useBlas ?
|
||||||
useRocm
|
builtins.all (x: !x) [
|
||||||
],
|
useCuda
|
||||||
|
useMetalKit
|
||||||
|
useRocm
|
||||||
|
useVulkan
|
||||||
|
]
|
||||||
|
&& blas.meta.available,
|
||||||
useCuda ? config.cudaSupport,
|
useCuda ? config.cudaSupport,
|
||||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
||||||
useMpi ? false, # Increases the runtime closure size by ~700M
|
# Increases the runtime closure size by ~700M
|
||||||
useOpenCL ? false,
|
useMpi ? false,
|
||||||
useRocm ? config.rocmSupport,
|
useRocm ? config.rocmSupport,
|
||||||
|
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
|
||||||
|
enableCurl ? true,
|
||||||
|
useVulkan ? false,
|
||||||
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
||||||
}@inputs:
|
|
||||||
|
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
||||||
|
# otherwise we get libstdc++ errors downstream.
|
||||||
|
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
||||||
|
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
||||||
|
precompileMetalShaders ? false,
|
||||||
|
}:
|
||||||
|
|
||||||
let
|
let
|
||||||
inherit (lib)
|
inherit (lib)
|
||||||
|
@ -34,50 +49,29 @@ let
|
||||||
cmakeFeature
|
cmakeFeature
|
||||||
optionals
|
optionals
|
||||||
strings
|
strings
|
||||||
versionOlder
|
|
||||||
;
|
;
|
||||||
|
|
||||||
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
|
||||||
# otherwise we get libstdc++ errors downstream.
|
|
||||||
stdenv = throw "Use effectiveStdenv instead";
|
stdenv = throw "Use effectiveStdenv instead";
|
||||||
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
|
|
||||||
|
|
||||||
suffices =
|
suffices =
|
||||||
lib.optionals useBlas [ "BLAS" ]
|
lib.optionals useBlas [ "BLAS" ]
|
||||||
++ lib.optionals useCuda [ "CUDA" ]
|
++ lib.optionals useCuda [ "CUDA" ]
|
||||||
++ lib.optionals useMetalKit [ "MetalKit" ]
|
++ lib.optionals useMetalKit [ "MetalKit" ]
|
||||||
++ lib.optionals useMpi [ "MPI" ]
|
++ lib.optionals useMpi [ "MPI" ]
|
||||||
++ lib.optionals useOpenCL [ "OpenCL" ]
|
++ lib.optionals useRocm [ "ROCm" ]
|
||||||
++ lib.optionals useRocm [ "ROCm" ];
|
++ lib.optionals useVulkan [ "Vulkan" ];
|
||||||
|
|
||||||
pnameSuffix =
|
pnameSuffix =
|
||||||
strings.optionalString (suffices != [ ])
|
strings.optionalString (suffices != [ ])
|
||||||
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
||||||
descriptionSuffix =
|
descriptionSuffix = strings.optionalString (
|
||||||
strings.optionalString (suffices != [ ])
|
suffices != [ ]
|
||||||
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||||
|
|
||||||
# TODO: package the Python in this repository in a Nix-like way.
|
xcrunHost = runCommand "xcrunHost" { } ''
|
||||||
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
mkdir -p $out/bin
|
||||||
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
ln -s /usr/bin/xcrun $out/bin
|
||||||
# https://peps.python.org/pep-0517/
|
'';
|
||||||
llama-python = python3.withPackages (
|
|
||||||
ps: [
|
|
||||||
ps.numpy
|
|
||||||
ps.sentencepiece
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
|
||||||
llama-python-extra = python3.withPackages (
|
|
||||||
ps: [
|
|
||||||
ps.numpy
|
|
||||||
ps.sentencepiece
|
|
||||||
ps.tiktoken
|
|
||||||
ps.torchWithoutCuda
|
|
||||||
ps.transformers
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
||||||
# separately
|
# separately
|
||||||
|
@ -91,16 +85,9 @@ let
|
||||||
++ optionals useMetalKit [ MetalKit ];
|
++ optionals useMetalKit [ MetalKit ];
|
||||||
|
|
||||||
cudaBuildInputs = with cudaPackages; [
|
cudaBuildInputs = with cudaPackages; [
|
||||||
cuda_cccl.dev # <nv/target>
|
cuda_cudart
|
||||||
|
cuda_cccl # <nv/target>
|
||||||
# A temporary hack for reducing the closure size, remove once cudaPackages
|
libcublas
|
||||||
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
|
|
||||||
cuda_cudart.dev
|
|
||||||
cuda_cudart.lib
|
|
||||||
cuda_cudart.static
|
|
||||||
libcublas.dev
|
|
||||||
libcublas.lib
|
|
||||||
libcublas.static
|
|
||||||
];
|
];
|
||||||
|
|
||||||
rocmBuildInputs = with rocmPackages; [
|
rocmBuildInputs = with rocmPackages; [
|
||||||
|
@ -108,170 +95,153 @@ let
|
||||||
hipblas
|
hipblas
|
||||||
rocblas
|
rocblas
|
||||||
];
|
];
|
||||||
|
|
||||||
|
vulkanBuildInputs = [
|
||||||
|
vulkan-headers
|
||||||
|
vulkan-loader
|
||||||
|
shaderc
|
||||||
|
];
|
||||||
in
|
in
|
||||||
|
|
||||||
effectiveStdenv.mkDerivation (
|
effectiveStdenv.mkDerivation (finalAttrs: {
|
||||||
finalAttrs: {
|
pname = "llama-cpp${pnameSuffix}";
|
||||||
pname = "llama-cpp${pnameSuffix}";
|
version = llamaVersion;
|
||||||
version = llamaVersion;
|
|
||||||
|
|
||||||
# Note: none of the files discarded here are visible in the sandbox or
|
# Note: none of the files discarded here are visible in the sandbox or
|
||||||
# affect the output hash. This also means they can be modified without
|
# affect the output hash. This also means they can be modified without
|
||||||
# triggering a rebuild.
|
# triggering a rebuild.
|
||||||
src = lib.cleanSourceWith {
|
src = lib.cleanSourceWith {
|
||||||
filter =
|
filter =
|
||||||
name: type:
|
name: type:
|
||||||
let
|
let
|
||||||
noneOf = builtins.all (x: !x);
|
noneOf = builtins.all (x: !x);
|
||||||
baseName = baseNameOf name;
|
baseName = baseNameOf name;
|
||||||
in
|
in
|
||||||
noneOf [
|
noneOf [
|
||||||
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
||||||
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
||||||
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
||||||
(baseName == "flake.lock")
|
(baseName == "flake.lock")
|
||||||
];
|
|
||||||
src = lib.cleanSource ../../.;
|
|
||||||
};
|
|
||||||
|
|
||||||
postPatch = ''
|
|
||||||
substituteInPlace ./ggml-metal.m \
|
|
||||||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
|
||||||
|
|
||||||
# TODO: Package up each Python script or service appropriately.
|
|
||||||
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
|
|
||||||
# we could make those *.py into setuptools' entrypoints
|
|
||||||
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
|
|
||||||
'';
|
|
||||||
|
|
||||||
nativeBuildInputs =
|
|
||||||
[
|
|
||||||
cmake
|
|
||||||
ninja
|
|
||||||
pkg-config
|
|
||||||
git
|
|
||||||
]
|
|
||||||
++ optionals useCuda [
|
|
||||||
cudaPackages.cuda_nvcc
|
|
||||||
|
|
||||||
# TODO: Replace with autoAddDriverRunpath
|
|
||||||
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
|
|
||||||
cudaPackages.autoAddOpenGLRunpathHook
|
|
||||||
];
|
];
|
||||||
|
src = lib.cleanSource ../../.;
|
||||||
|
};
|
||||||
|
|
||||||
buildInputs =
|
postPatch = ''
|
||||||
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
||||||
++ optionals useCuda cudaBuildInputs
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
||||||
++ optionals useMpi [ mpi ]
|
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
||||||
++ optionals useOpenCL [ clblast ]
|
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
||||||
++ optionals useRocm rocmBuildInputs;
|
'';
|
||||||
|
|
||||||
cmakeFlags =
|
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
||||||
[
|
# `default.metallib` may be compiled with Metal compiler from XCode
|
||||||
(cmakeBool "LLAMA_NATIVE" false)
|
# and we need to escape sandbox on MacOS to access Metal compiler.
|
||||||
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
# `xcrun` is used find the path of the Metal compiler, which is varible
|
||||||
(cmakeBool "BUILD_SHARED_LIBS" true)
|
# and not on $PATH
|
||||||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
||||||
(cmakeBool "LLAMA_BLAS" useBlas)
|
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
||||||
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
|
|
||||||
(cmakeBool "LLAMA_CUBLAS" useCuda)
|
nativeBuildInputs =
|
||||||
(cmakeBool "LLAMA_HIPBLAS" useRocm)
|
[
|
||||||
(cmakeBool "LLAMA_METAL" useMetalKit)
|
cmake
|
||||||
(cmakeBool "LLAMA_MPI" useMpi)
|
ninja
|
||||||
]
|
pkg-config
|
||||||
++ optionals useCuda [
|
git
|
||||||
(
|
]
|
||||||
with cudaPackages.flags;
|
++ optionals useCuda [
|
||||||
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
cudaPackages.cuda_nvcc
|
||||||
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
|
||||||
)
|
autoAddDriverRunpath
|
||||||
|
]
|
||||||
|
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
||||||
|
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
||||||
|
++ optionals useCuda cudaBuildInputs
|
||||||
|
++ optionals useMpi [ mpi ]
|
||||||
|
++ optionals useRocm rocmBuildInputs
|
||||||
|
++ optionals useBlas [ blas ]
|
||||||
|
++ optionals useVulkan vulkanBuildInputs
|
||||||
|
++ optionals enableCurl [ curl ];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
||||||
|
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
||||||
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
||||||
|
(cmakeBool "LLAMA_CURL" enableCurl)
|
||||||
|
(cmakeBool "GGML_NATIVE" false)
|
||||||
|
(cmakeBool "GGML_BLAS" useBlas)
|
||||||
|
(cmakeBool "GGML_CUDA" useCuda)
|
||||||
|
(cmakeBool "GGML_HIP" useRocm)
|
||||||
|
(cmakeBool "GGML_METAL" useMetalKit)
|
||||||
|
(cmakeBool "GGML_VULKAN" useVulkan)
|
||||||
|
(cmakeBool "GGML_STATIC" enableStatic)
|
||||||
|
]
|
||||||
|
++ optionals useCuda [
|
||||||
|
(
|
||||||
|
with cudaPackages.flags;
|
||||||
|
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
||||||
|
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
||||||
)
|
)
|
||||||
]
|
)
|
||||||
++ optionals useRocm [
|
]
|
||||||
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
|
++ optionals useRocm [
|
||||||
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
|
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
||||||
|
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
|
||||||
|
]
|
||||||
|
++ optionals useMetalKit [
|
||||||
|
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
||||||
|
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
||||||
|
];
|
||||||
|
|
||||||
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
|
# Environment variables needed for ROCm
|
||||||
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
|
env = optionals useRocm {
|
||||||
# and select the line that matches the current nixpkgs version of rocBLAS.
|
ROCM_PATH = "${rocmPackages.clr}";
|
||||||
# Should likely use `rocmPackages.clr.gpuTargets`.
|
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
||||||
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
};
|
||||||
]
|
|
||||||
++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
|
|
||||||
++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
|
|
||||||
|
|
||||||
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
||||||
# if they haven't been added yet.
|
# if they haven't been added yet.
|
||||||
postInstall = ''
|
postInstall = ''
|
||||||
mv $out/bin/main $out/bin/llama
|
mkdir -p $out/include
|
||||||
mv $out/bin/server $out/bin/llama-server
|
cp $src/include/llama.h $out/include/
|
||||||
mkdir -p $out/include
|
'';
|
||||||
cp $src/llama.h $out/include/
|
|
||||||
'';
|
|
||||||
|
|
||||||
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
meta = {
|
||||||
passthru = {
|
# Configurations we don't want even the CI to evaluate. Results in the
|
||||||
inherit
|
# "unsupported platform" messages. This is mostly a no-op, because
|
||||||
useBlas
|
# cudaPackages would've refused to evaluate anyway.
|
||||||
useCuda
|
badPlatforms = optionals useCuda lib.platforms.darwin;
|
||||||
useMetalKit
|
|
||||||
useMpi
|
|
||||||
useOpenCL
|
|
||||||
useRocm
|
|
||||||
;
|
|
||||||
|
|
||||||
shell = mkShell {
|
# Configurations that are known to result in build failures. Can be
|
||||||
name = "shell-${finalAttrs.finalPackage.name}";
|
# overridden by importing Nixpkgs with `allowBroken = true`.
|
||||||
description = "contains numpy and sentencepiece";
|
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
||||||
buildInputs = [ llama-python ];
|
|
||||||
inputsFrom = [ finalAttrs.finalPackage ];
|
|
||||||
shellHook = ''
|
|
||||||
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
shell-extra = mkShell {
|
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
||||||
name = "shell-extra-${finalAttrs.finalPackage.name}";
|
homepage = "https://github.com/ggerganov/llama.cpp/";
|
||||||
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
license = lib.licenses.mit;
|
||||||
buildInputs = [ llama-python-extra ];
|
|
||||||
inputsFrom = [ finalAttrs.finalPackage ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
meta = {
|
# Accommodates `nix run` and `lib.getExe`
|
||||||
# Configurations we don't want even the CI to evaluate. Results in the
|
mainProgram = "llama-cli";
|
||||||
# "unsupported platform" messages. This is mostly a no-op, because
|
|
||||||
# cudaPackages would've refused to evaluate anyway.
|
|
||||||
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
|
|
||||||
|
|
||||||
# Configurations that are known to result in build failures. Can be
|
# These people might respond, on the best effort basis, if you ping them
|
||||||
# overridden by importing Nixpkgs with `allowBroken = true`.
|
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
||||||
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
# Consider adding yourself to this list if you want to ensure this flake
|
||||||
|
# stays maintained and you're willing to invest your time. Do not add
|
||||||
|
# other people without their consent. Consider removing people after
|
||||||
|
# they've been unreachable for long periods of time.
|
||||||
|
|
||||||
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
||||||
homepage = "https://github.com/ggerganov/llama.cpp/";
|
# an attrset following the same format as in
|
||||||
license = lib.licenses.mit;
|
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
||||||
|
maintainers = with lib.maintainers; [
|
||||||
|
philiptaron
|
||||||
|
SomeoneSerge
|
||||||
|
];
|
||||||
|
|
||||||
# Accommodates `nix run` and `lib.getExe`
|
# Extend `badPlatforms` instead
|
||||||
mainProgram = "llama";
|
platforms = lib.platforms.all;
|
||||||
|
};
|
||||||
# These people might respond, on the best effort basis, if you ping them
|
})
|
||||||
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
|
||||||
# Consider adding yourself to this list if you want to ensure this flake
|
|
||||||
# stays maintained and you're willing to invest your time. Do not add
|
|
||||||
# other people without their consent. Consider removing people after
|
|
||||||
# they've been unreachable for long periods of time.
|
|
||||||
|
|
||||||
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
|
||||||
# an attrset following the same format as in
|
|
||||||
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
|
||||||
maintainers = with lib.maintainers; [
|
|
||||||
philiptaron
|
|
||||||
SomeoneSerge
|
|
||||||
];
|
|
||||||
|
|
||||||
# Extend `badPlatforms` instead
|
|
||||||
platforms = lib.platforms.all;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
66
.devops/nix/python-scripts.nix
Normal file
66
.devops/nix/python-scripts.nix
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
buildPythonPackage,
|
||||||
|
poetry-core,
|
||||||
|
mkShell,
|
||||||
|
python3Packages,
|
||||||
|
gguf-py,
|
||||||
|
}@inputs:
|
||||||
|
|
||||||
|
let
|
||||||
|
llama-python-deps = with python3Packages; [
|
||||||
|
numpy
|
||||||
|
sentencepiece
|
||||||
|
transformers
|
||||||
|
protobuf
|
||||||
|
torchWithoutCuda
|
||||||
|
gguf-py
|
||||||
|
tqdm
|
||||||
|
|
||||||
|
# for scripts/compare-llama-bench.py
|
||||||
|
gitpython
|
||||||
|
tabulate
|
||||||
|
|
||||||
|
# for examples/pydantic-models-to-grammar-examples.py
|
||||||
|
docstring-parser
|
||||||
|
pydantic
|
||||||
|
|
||||||
|
];
|
||||||
|
|
||||||
|
llama-python-test-deps = with python3Packages; [
|
||||||
|
# Server bench
|
||||||
|
matplotlib
|
||||||
|
|
||||||
|
# server tests
|
||||||
|
openai
|
||||||
|
pytest
|
||||||
|
prometheus-client
|
||||||
|
];
|
||||||
|
in
|
||||||
|
|
||||||
|
buildPythonPackage ({
|
||||||
|
pname = "llama-scripts";
|
||||||
|
version = "0.0.0";
|
||||||
|
pyproject = true;
|
||||||
|
|
||||||
|
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
||||||
|
# do they affect the output hash. They can be modified without triggering a rebuild.
|
||||||
|
src = lib.cleanSourceWith {
|
||||||
|
filter =
|
||||||
|
name: type:
|
||||||
|
let
|
||||||
|
any = builtins.any (x: x);
|
||||||
|
baseName = builtins.baseNameOf name;
|
||||||
|
in
|
||||||
|
any [
|
||||||
|
(lib.hasSuffix ".py" name)
|
||||||
|
(baseName == "README.md")
|
||||||
|
(baseName == "pyproject.toml")
|
||||||
|
];
|
||||||
|
src = lib.cleanSource ../../.;
|
||||||
|
};
|
||||||
|
nativeBuildInputs = [ poetry-core ];
|
||||||
|
nativeCheckInputs = llama-python-test-deps;
|
||||||
|
dependencies = llama-python-deps;
|
||||||
|
})
|
|
@ -1,16 +1,41 @@
|
||||||
{
|
{
|
||||||
lib,
|
lib,
|
||||||
newScope,
|
newScope,
|
||||||
|
python3,
|
||||||
llamaVersion ? "0.0.0",
|
llamaVersion ? "0.0.0",
|
||||||
}:
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
pythonPackages = python3.pkgs;
|
||||||
|
buildPythonPackage = pythonPackages.buildPythonPackage;
|
||||||
|
numpy = pythonPackages.numpy;
|
||||||
|
tqdm = pythonPackages.tqdm;
|
||||||
|
sentencepiece = pythonPackages.sentencepiece;
|
||||||
|
pyyaml = pythonPackages.pyyaml;
|
||||||
|
poetry-core = pythonPackages.poetry-core;
|
||||||
|
pytestCheckHook = pythonPackages.pytestCheckHook;
|
||||||
|
in
|
||||||
|
|
||||||
# We're using `makeScope` instead of just writing out an attrset
|
# We're using `makeScope` instead of just writing out an attrset
|
||||||
# because it allows users to apply overlays later using `overrideScope'`.
|
# because it allows users to apply overlays later using `overrideScope'`.
|
||||||
# Cf. https://noogle.dev/f/lib/makeScope
|
# Cf. https://noogle.dev/f/lib/makeScope
|
||||||
|
|
||||||
lib.makeScope newScope (
|
lib.makeScope newScope (self: {
|
||||||
self: {
|
inherit llamaVersion;
|
||||||
inherit llamaVersion;
|
gguf-py = self.callPackage ./package-gguf-py.nix {
|
||||||
llama-cpp = self.callPackage ./package.nix { };
|
inherit
|
||||||
}
|
buildPythonPackage
|
||||||
)
|
numpy
|
||||||
|
tqdm
|
||||||
|
sentencepiece
|
||||||
|
poetry-core
|
||||||
|
pyyaml
|
||||||
|
pytestCheckHook
|
||||||
|
;
|
||||||
|
};
|
||||||
|
python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
|
||||||
|
llama-cpp = self.callPackage ./package.nix { };
|
||||||
|
docker = self.callPackage ./docker.nix { };
|
||||||
|
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
||||||
|
sif = self.callPackage ./sif.nix { };
|
||||||
|
})
|
||||||
|
|
27
.devops/nix/sif.nix
Normal file
27
.devops/nix/sif.nix
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
{
|
||||||
|
lib,
|
||||||
|
singularity-tools,
|
||||||
|
llama-cpp,
|
||||||
|
bashInteractive,
|
||||||
|
interactive ? false,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
optionalInt = cond: x: if cond then x else 0;
|
||||||
|
in
|
||||||
|
singularity-tools.buildImage rec {
|
||||||
|
inherit (llama-cpp) name;
|
||||||
|
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
|
||||||
|
|
||||||
|
# These are excessive (but safe) for most variants. Building singularity
|
||||||
|
# images requires superuser privileges, so we build them inside a VM in a
|
||||||
|
# writable image of pre-determined size.
|
||||||
|
#
|
||||||
|
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
|
||||||
|
#
|
||||||
|
# Expected image sizes:
|
||||||
|
# - cpu/blas: 150M,
|
||||||
|
# - cuda, all gencodes: 560M,
|
||||||
|
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
|
||||||
|
memSize = diskSize;
|
||||||
|
}
|
113
.devops/rocm.Dockerfile
Normal file
113
.devops/rocm.Dockerfile
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
ARG UBUNTU_VERSION=24.04
|
||||||
|
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG ROCM_VERSION=6.3
|
||||||
|
ARG AMDGPU_VERSION=6.3
|
||||||
|
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||||
|
|
||||||
|
### Build image
|
||||||
|
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
||||||
|
# This is mostly tied to rocBLAS supported archs.
|
||||||
|
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
|
||||||
|
# gfx906 is deprecated
|
||||||
|
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
|
||||||
|
|
||||||
|
#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
|
||||||
|
ARG ROCM_DOCKER_ARCH=gfx1100
|
||||||
|
|
||||||
|
# Set nvcc architectured
|
||||||
|
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||||
|
# Enable ROCm
|
||||||
|
# ENV CC=/opt/rocm/llvm/bin/clang
|
||||||
|
# ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
cmake \
|
||||||
|
git \
|
||||||
|
libcurl4-openssl-dev \
|
||||||
|
curl \
|
||||||
|
libgomp1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
||||||
|
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
|
||||||
|
&& cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib \
|
||||||
|
&& find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3-pip \
|
||||||
|
python3 \
|
||||||
|
python3-wheel\
|
||||||
|
&& pip install --break-system-packages --upgrade setuptools \
|
||||||
|
&& pip install --break-system-packages -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -1,32 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG CUDA_VERSION=11.7.1
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
# Target the CUDA runtime image
|
|
||||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
ARG CUDA_DOCKER_ARCH=all
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|
||||||
# Enable cuBLAS
|
|
||||||
ENV LLAMA_CUBLAS=1
|
|
||||||
|
|
||||||
RUN make
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
|
||||||
|
|
||||||
COPY --from=build /app/server /server
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/server" ]
|
|
|
@ -1,25 +0,0 @@
|
||||||
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
|
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
FROM intel/hpckit:$ONEAPI_VERSION as build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance
|
|
||||||
RUN mkdir build && \
|
|
||||||
cd build && \
|
|
||||||
cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \
|
|
||||||
cmake --build . --config Release --target main server
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
|
||||||
|
|
||||||
COPY --from=build /app/build/bin/server /server
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/server" ]
|
|
|
@ -1,45 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG ROCM_VERSION=5.6
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
|
||||||
|
|
||||||
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
|
||||||
# This is mostly tied to rocBLAS supported archs.
|
|
||||||
ARG ROCM_DOCKER_ARCH=\
|
|
||||||
gfx803 \
|
|
||||||
gfx900 \
|
|
||||||
gfx906 \
|
|
||||||
gfx908 \
|
|
||||||
gfx90a \
|
|
||||||
gfx1010 \
|
|
||||||
gfx1030 \
|
|
||||||
gfx1100 \
|
|
||||||
gfx1101 \
|
|
||||||
gfx1102
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
|
||||||
# Enable ROCm
|
|
||||||
ENV LLAMA_HIPBLAS=1
|
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
|
|
||||||
RUN make
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/server" ]
|
|
|
@ -1,20 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN make
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
|
||||||
|
|
||||||
COPY --from=build /app/server /server
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/server" ]
|
|
|
@ -8,36 +8,40 @@ arg1="$1"
|
||||||
shift
|
shift
|
||||||
|
|
||||||
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
||||||
python3 ./convert.py "$@"
|
exec python3 ./convert_hf_to_gguf.py "$@"
|
||||||
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
||||||
./quantize "$@"
|
exec ./llama-quantize "$@"
|
||||||
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
||||||
./main "$@"
|
exec ./llama-cli "$@"
|
||||||
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
|
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
|
||||||
./finetune "$@"
|
exec ./llama-bench "$@"
|
||||||
|
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
|
||||||
|
exec ./llama-perplexity "$@"
|
||||||
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
||||||
echo "Converting PTH to GGML..."
|
echo "Converting PTH to GGML..."
|
||||||
for i in `ls $1/$2/ggml-model-f16.bin*`; do
|
for i in $(ls $1/$2/ggml-model-f16.bin*); do
|
||||||
if [ -f "${i/f16/q4_0}" ]; then
|
if [ -f "${i/f16/q4_0}" ]; then
|
||||||
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
||||||
else
|
else
|
||||||
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
||||||
./quantize "$i" "${i/f16/q4_0}" q4_0
|
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
||||||
./server "$@"
|
exec ./llama-server "$@"
|
||||||
else
|
else
|
||||||
echo "Unknown command: $arg1"
|
echo "Unknown command: $arg1"
|
||||||
echo "Available commands: "
|
echo "Available commands: "
|
||||||
echo " --run (-r): Run a model previously converted into ggml"
|
echo " --run (-r): Run a model previously converted into ggml"
|
||||||
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
||||||
|
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
|
||||||
|
echo " ex: -m model.gguf"
|
||||||
|
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
|
||||||
|
echo " ex: -m model.gguf -f file.txt"
|
||||||
echo " --convert (-c): Convert a llama model into ggml"
|
echo " --convert (-c): Convert a llama model into ggml"
|
||||||
echo " ex: --outtype f16 \"/models/7B/\" "
|
echo " ex: --outtype f16 \"/models/7B/\" "
|
||||||
echo " --quantize (-q): Optimize with quantization process ggml"
|
echo " --quantize (-q): Optimize with quantization process ggml"
|
||||||
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
||||||
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
|
|
||||||
echo " See documentation for finetune for command-line parameters"
|
|
||||||
echo " --all-in-one (-a): Execute --convert & --quantize"
|
echo " --all-in-one (-a): Execute --convert & --quantize"
|
||||||
echo " ex: \"/models/\" 7B"
|
echo " ex: \"/models/\" 7B"
|
||||||
echo " --server (-s): Run a model on the server"
|
echo " --server (-s): Run a model on the server"
|
||||||
|
|
89
.devops/vulkan.Dockerfile
Normal file
89
.devops/vulkan.Dockerfile
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
ARG UBUNTU_VERSION=24.04
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS build
|
||||||
|
|
||||||
|
# Install build tools
|
||||||
|
RUN apt update && apt install -y git build-essential cmake wget
|
||||||
|
|
||||||
|
# Install Vulkan SDK and cURL
|
||||||
|
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||||
|
apt update -y && \
|
||||||
|
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
||||||
|
|
||||||
|
# Build it
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
||||||
|
cmake --build build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build -name "*.so" -exec cp {} /app/lib \;
|
||||||
|
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/bin/* /app/full \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base image
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 curl libvulkan-dev \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app
|
||||||
|
|
||||||
|
### Full
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
python3-wheel \
|
||||||
|
&& pip install --break-system-packages --upgrade setuptools \
|
||||||
|
&& pip install --break-system-packages -r requirements.txt \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/tools.sh"]
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -1,7 +1,7 @@
|
||||||
*.o
|
*.o
|
||||||
*.a
|
*.a
|
||||||
.cache/
|
.cache/
|
||||||
.git/
|
# Do not ignore .git directory, otherwise the reported build number will always be 0
|
||||||
.github/
|
.github/
|
||||||
.gitignore
|
.gitignore
|
||||||
.vs/
|
.vs/
|
||||||
|
@ -12,8 +12,8 @@ build*/
|
||||||
|
|
||||||
models/*
|
models/*
|
||||||
|
|
||||||
/main
|
/llama-cli
|
||||||
/quantize
|
/llama-quantize
|
||||||
|
|
||||||
arm_neon.h
|
arm_neon.h
|
||||||
compile_commands.json
|
compile_commands.json
|
||||||
|
|
2
.ecrc
2
.ecrc
|
@ -1,5 +1,5 @@
|
||||||
{
|
{
|
||||||
"Exclude": ["^\\.gitmodules$"],
|
"Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
|
||||||
"Disable": {
|
"Disable": {
|
||||||
"IndentSize": true
|
"IndentSize": true
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,5 +24,27 @@ insert_final_newline = unset
|
||||||
[examples/server/public/*]
|
[examples/server/public/*]
|
||||||
indent_size = 2
|
indent_size = 2
|
||||||
|
|
||||||
|
[examples/server/public/deps_*]
|
||||||
|
trim_trailing_whitespace = unset
|
||||||
|
indent_style = unset
|
||||||
|
indent_size = unset
|
||||||
|
|
||||||
|
[examples/server/deps_*]
|
||||||
|
trim_trailing_whitespace = unset
|
||||||
|
indent_style = unset
|
||||||
|
indent_size = unset
|
||||||
|
|
||||||
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
||||||
indent_style = tab
|
indent_style = tab
|
||||||
|
|
||||||
|
[examples/cvector-generator/*.txt]
|
||||||
|
trim_trailing_whitespace = unset
|
||||||
|
insert_final_newline = unset
|
||||||
|
|
||||||
|
[models/templates/*.jinja]
|
||||||
|
indent_style = unset
|
||||||
|
indent_size = unset
|
||||||
|
end_of_line = unset
|
||||||
|
charset = unset
|
||||||
|
trim_trailing_whitespace = unset
|
||||||
|
insert_final_newline = unset
|
||||||
|
|
15
.flake8
15
.flake8
|
@ -1,2 +1,17 @@
|
||||||
[flake8]
|
[flake8]
|
||||||
max-line-length = 125
|
max-line-length = 125
|
||||||
|
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
|
||||||
|
exclude =
|
||||||
|
# Do not traverse examples
|
||||||
|
examples,
|
||||||
|
# Do not include package initializers
|
||||||
|
__init__.py,
|
||||||
|
# No need to traverse our git directory
|
||||||
|
.git,
|
||||||
|
# There's no value in checking cache directories
|
||||||
|
__pycache__,
|
||||||
|
# No need to include the build path
|
||||||
|
build,
|
||||||
|
# This contains builds that we don't want to check
|
||||||
|
dist # This is generated with `python build .` for package releases
|
||||||
|
# max-complexity = 10
|
||||||
|
|
87
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
Normal file
87
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
name: Bug (compilation)
|
||||||
|
description: Something goes wrong when trying to compile llama.cpp.
|
||||||
|
title: "Compile bug: "
|
||||||
|
labels: ["bug-unconfirmed", "compilation"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: >
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
This issue template is intended for bug reports where the compilation of llama.cpp fails.
|
||||||
|
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
|
||||||
|
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
|
||||||
|
by clearing `~/.cache/ccache` (on Linux).
|
||||||
|
- type: textarea
|
||||||
|
id: commit
|
||||||
|
attributes:
|
||||||
|
label: Git commit
|
||||||
|
description: Which commit are you trying to compile?
|
||||||
|
placeholder: |
|
||||||
|
$git rev-parse HEAD
|
||||||
|
84a07a17b1b08cf2b9747c633a2372782848a27f
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: Operating systems
|
||||||
|
description: Which operating systems do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: backends
|
||||||
|
attributes:
|
||||||
|
label: GGML backends
|
||||||
|
description: Which GGML backends do you know to be affected?
|
||||||
|
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
||||||
|
multiple: true
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: info
|
||||||
|
attributes:
|
||||||
|
label: Problem description & steps to reproduce
|
||||||
|
description: >
|
||||||
|
Please give us a summary of the problem and tell us how to reproduce it.
|
||||||
|
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
|
||||||
|
placeholder: >
|
||||||
|
I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
|
||||||
|
Here are the exact commands that I used: ...
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: first_bad_commit
|
||||||
|
attributes:
|
||||||
|
label: First Bad Commit
|
||||||
|
description: >
|
||||||
|
If the bug was not present on an earlier version: when did it start appearing?
|
||||||
|
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: command
|
||||||
|
attributes:
|
||||||
|
label: Compile command
|
||||||
|
description: >
|
||||||
|
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: >
|
||||||
|
Please copy and paste any relevant log output, including any generated text.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: true
|
101
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
Normal file
101
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
name: Bug (model use)
|
||||||
|
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
|
||||||
|
title: "Eval bug: "
|
||||||
|
labels: ["bug-unconfirmed", "model evaluation"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: >
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
This issue template is intended for bug reports where the model evaluation results
|
||||||
|
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
|
||||||
|
If you encountered the issue while using an external UI (e.g. ollama),
|
||||||
|
please reproduce your issue using one of the examples/binaries in this repository.
|
||||||
|
The `llama-cli` binary can be used for simple and reproducible model inference.
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which version of our software are you running? (use `--version` to get a version string)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: Operating systems
|
||||||
|
description: Which operating systems do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: backends
|
||||||
|
attributes:
|
||||||
|
label: GGML backends
|
||||||
|
description: Which GGML backends do you know to be affected?
|
||||||
|
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
||||||
|
multiple: true
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: hardware
|
||||||
|
attributes:
|
||||||
|
label: Hardware
|
||||||
|
description: Which CPUs/GPUs are you using?
|
||||||
|
placeholder: >
|
||||||
|
e.g. Ryzen 5950X + 2x RTX 4090
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: model
|
||||||
|
attributes:
|
||||||
|
label: Models
|
||||||
|
description: >
|
||||||
|
Which model(s) at which quantization were you using when encountering the bug?
|
||||||
|
If you downloaded a GGUF file off of Huggingface, please provide a link.
|
||||||
|
placeholder: >
|
||||||
|
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: info
|
||||||
|
attributes:
|
||||||
|
label: Problem description & steps to reproduce
|
||||||
|
description: >
|
||||||
|
Please give us a summary of the problem and tell us how to reproduce it.
|
||||||
|
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
|
||||||
|
that information would be very much appreciated by us.
|
||||||
|
placeholder: >
|
||||||
|
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
|
||||||
|
When I use -ngl 0 it works correctly.
|
||||||
|
Here are the exact commands that I used: ...
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: first_bad_commit
|
||||||
|
attributes:
|
||||||
|
label: First Bad Commit
|
||||||
|
description: >
|
||||||
|
If the bug was not present on an earlier version: when did it start appearing?
|
||||||
|
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: >
|
||||||
|
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: true
|
91
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
Normal file
91
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
name: Bug (misc.)
|
||||||
|
description: Something is not working the way it should (and it's not covered by any of the above cases).
|
||||||
|
title: "Misc. bug: "
|
||||||
|
labels: ["bug-unconfirmed"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: >
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
This issue template is intended for miscellaneous bugs that don't fit into any other category.
|
||||||
|
If you encountered the issue while using an external UI (e.g. ollama),
|
||||||
|
please reproduce your issue using one of the examples/binaries in this repository.
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which version of our software is affected? (You can use `--version` to get a version string.)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: Operating systems
|
||||||
|
description: Which operating systems do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: dropdown
|
||||||
|
id: module
|
||||||
|
attributes:
|
||||||
|
label: Which llama.cpp modules do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Documentation/Github
|
||||||
|
- libllama (core library)
|
||||||
|
- llama-cli
|
||||||
|
- llama-server
|
||||||
|
- llama-bench
|
||||||
|
- llama-quantize
|
||||||
|
- Python/Bash scripts
|
||||||
|
- Test code
|
||||||
|
- Other (Please specify in the next section)
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: command
|
||||||
|
attributes:
|
||||||
|
label: Command line
|
||||||
|
description: >
|
||||||
|
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: info
|
||||||
|
attributes:
|
||||||
|
label: Problem description & steps to reproduce
|
||||||
|
description: >
|
||||||
|
Please give us a summary of the problem and tell us how to reproduce it (if applicable).
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: first_bad_commit
|
||||||
|
attributes:
|
||||||
|
label: First Bad Commit
|
||||||
|
description: >
|
||||||
|
If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
|
||||||
|
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: >
|
||||||
|
If applicable, please copy and paste any relevant log output, including any generated text.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: false
|
51
.github/ISSUE_TEMPLATE/020-enhancement.yml
vendored
Normal file
51
.github/ISSUE_TEMPLATE/020-enhancement.yml
vendored
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
name: Enhancement
|
||||||
|
description: Used to request enhancements for llama.cpp.
|
||||||
|
title: "Feature Request: "
|
||||||
|
labels: ["enhancement"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
[Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas)
|
||||||
|
|
||||||
|
- type: checkboxes
|
||||||
|
id: prerequisites
|
||||||
|
attributes:
|
||||||
|
label: Prerequisites
|
||||||
|
description: Please confirm the following before submitting your enhancement request.
|
||||||
|
options:
|
||||||
|
- label: I am running the latest code. Mention the version if possible as well.
|
||||||
|
required: true
|
||||||
|
- label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
|
||||||
|
required: true
|
||||||
|
- label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
|
||||||
|
required: true
|
||||||
|
- label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share.
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: feature-description
|
||||||
|
attributes:
|
||||||
|
label: Feature Description
|
||||||
|
description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
|
||||||
|
placeholder: Detailed description of the enhancement
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: motivation
|
||||||
|
attributes:
|
||||||
|
label: Motivation
|
||||||
|
description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
|
||||||
|
placeholder: Explanation of why this feature is needed and its benefits
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: possible-implementation
|
||||||
|
attributes:
|
||||||
|
label: Possible Implementation
|
||||||
|
description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
|
||||||
|
placeholder: Detailed description of potential implementation
|
||||||
|
validations:
|
||||||
|
required: false
|
52
.github/ISSUE_TEMPLATE/030-research.yml
vendored
Normal file
52
.github/ISSUE_TEMPLATE/030-research.yml
vendored
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
name: Research
|
||||||
|
description: Track new technical research area.
|
||||||
|
title: "Research: "
|
||||||
|
labels: ["research 🔬"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
|
||||||
|
|
||||||
|
- type: checkboxes
|
||||||
|
id: research-stage
|
||||||
|
attributes:
|
||||||
|
label: Research Stage
|
||||||
|
description: Track general state of this research ticket
|
||||||
|
options:
|
||||||
|
- label: Background Research (Let's try to avoid reinventing the wheel)
|
||||||
|
- label: Hypothesis Formed (How do you think this will work and it's effect?)
|
||||||
|
- label: Strategy / Implementation Forming
|
||||||
|
- label: Analysis of results
|
||||||
|
- label: Debrief / Documentation (So people in the future can learn from us)
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: background
|
||||||
|
attributes:
|
||||||
|
label: Previous existing literature and research
|
||||||
|
description: Whats the current state of the art and whats the motivation for this research?
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: hypothesis
|
||||||
|
attributes:
|
||||||
|
label: Hypothesis
|
||||||
|
description: How do you think this will work and it's effect?
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: implementation
|
||||||
|
attributes:
|
||||||
|
label: Implementation
|
||||||
|
description: Got an approach? e.g. a PR ready to go?
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: analysis
|
||||||
|
attributes:
|
||||||
|
label: Analysis
|
||||||
|
description: How does the proposed implementation behave?
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
28
.github/ISSUE_TEMPLATE/040-refactor.yml
vendored
Normal file
28
.github/ISSUE_TEMPLATE/040-refactor.yml
vendored
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
name: Refactor (Maintainers)
|
||||||
|
description: Used to track refactoring opportunities.
|
||||||
|
title: "Refactor: "
|
||||||
|
labels: ["refactor"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
|
||||||
|
Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: background-description
|
||||||
|
attributes:
|
||||||
|
label: Background Description
|
||||||
|
description: Please provide a detailed written description of the pain points you are trying to solve.
|
||||||
|
placeholder: Detailed description behind your motivation to request refactor
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: possible-approaches
|
||||||
|
attributes:
|
||||||
|
label: Possible Refactor Approaches
|
||||||
|
description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
|
||||||
|
placeholder: Your idea of possible refactoring opportunity/approaches
|
||||||
|
validations:
|
||||||
|
required: false
|
9
.github/ISSUE_TEMPLATE/bug.md
vendored
9
.github/ISSUE_TEMPLATE/bug.md
vendored
|
@ -1,9 +0,0 @@
|
||||||
---
|
|
||||||
name: Bug template
|
|
||||||
about: Used to report bugs in llama.cpp
|
|
||||||
labels: ["bug-unconfirmed"]
|
|
||||||
assignees: ''
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
Please include information about your system, the steps to reproduce the bug, and the version of llama.cpp that you are using. If possible, please provide a minimal code example that reproduces the bug.
|
|
11
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
11
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
blank_issues_enabled: true
|
||||||
|
contact_links:
|
||||||
|
- name: Got an idea?
|
||||||
|
url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas
|
||||||
|
about: Pop it there. It may then become an enhancement ticket.
|
||||||
|
- name: Got a question?
|
||||||
|
url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a
|
||||||
|
about: Ask a question there!
|
||||||
|
- name: Want to contribute?
|
||||||
|
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
||||||
|
about: Head to the contribution guide page of the wiki for areas you can help with
|
28
.github/ISSUE_TEMPLATE/enhancement.md
vendored
28
.github/ISSUE_TEMPLATE/enhancement.md
vendored
|
@ -1,28 +0,0 @@
|
||||||
---
|
|
||||||
name: Enhancement template
|
|
||||||
about: Used to request enhancements for llama.cpp
|
|
||||||
labels: ["enhancement"]
|
|
||||||
assignees: ''
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# Prerequisites
|
|
||||||
|
|
||||||
Please answer the following questions for yourself before submitting an issue.
|
|
||||||
|
|
||||||
- [ ] I am running the latest code. Development is very rapid so there are no tagged versions as of now.
|
|
||||||
- [ ] I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
|
|
||||||
- [ ] I [searched using keywords relevant to my issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) to make sure that I am creating a new issue that is not already open (or closed).
|
|
||||||
- [ ] I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new bug or useful enhancement to share.
|
|
||||||
|
|
||||||
# Feature Description
|
|
||||||
|
|
||||||
Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
|
|
||||||
|
|
||||||
# Motivation
|
|
||||||
|
|
||||||
Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
|
|
||||||
|
|
||||||
# Possible Implementation
|
|
||||||
|
|
||||||
If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
|
|
86
.github/labeler.yml
vendored
Normal file
86
.github/labeler.yml
vendored
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
# https://github.com/actions/labeler
|
||||||
|
Kompute:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- ggml/include/ggml-kompute.h
|
||||||
|
- ggml/src/ggml-kompute/**
|
||||||
|
- README-kompute.md
|
||||||
|
Apple Metal:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- ggml/include/ggml-metal.h
|
||||||
|
- ggml/src/ggml-metal/**
|
||||||
|
- README-metal.md
|
||||||
|
SYCL:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- ggml/include/ggml-sycl.h
|
||||||
|
- ggml/src/ggml-sycl/**
|
||||||
|
- docs/backend/SYCL.md
|
||||||
|
- examples/sycl/**
|
||||||
|
Nvidia GPU:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- ggml/include/ggml-cuda.h
|
||||||
|
- ggml/src/ggml-cuda/**
|
||||||
|
Vulkan:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- ggml/include/ggml-vulkan.h
|
||||||
|
- ggml/src/ggml-vulkan/**
|
||||||
|
documentation:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- docs/**
|
||||||
|
- media/**
|
||||||
|
testing:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- tests/**
|
||||||
|
build:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- cmake/**
|
||||||
|
- CMakeLists.txt
|
||||||
|
- CMakePresets.json
|
||||||
|
examples:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file: examples/**
|
||||||
|
devops:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- .devops/**
|
||||||
|
- .github/**
|
||||||
|
- ci/**
|
||||||
|
python:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- "**/*.py"
|
||||||
|
- requirements/**
|
||||||
|
- gguf-py/**
|
||||||
|
- .flake8
|
||||||
|
script:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- scripts/**
|
||||||
|
android:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- examples/llama.android/**
|
||||||
|
server:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- examples/server/**
|
||||||
|
ggml:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- ggml/**
|
||||||
|
nix:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file:
|
||||||
|
- "**/*.nix"
|
||||||
|
- .github/workflows/nix-*.yml
|
||||||
|
- .devops/nix/nixpkgs-instances.nix
|
||||||
|
embedding:
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file: examples/embedding/
|
1
.github/pull_request_template.md
vendored
Normal file
1
.github/pull_request_template.md
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
|
315
.github/workflows/bench.yml.disabled
vendored
Normal file
315
.github/workflows/bench.yml.disabled
vendored
Normal file
|
@ -0,0 +1,315 @@
|
||||||
|
# TODO: there have been some issues with the workflow, so disabling for now
|
||||||
|
# https://github.com/ggerganov/llama.cpp/issues/7893
|
||||||
|
#
|
||||||
|
# Benchmark
|
||||||
|
name: Benchmark
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
gpu-series:
|
||||||
|
description: 'Azure GPU series to run with'
|
||||||
|
required: true
|
||||||
|
type: choice
|
||||||
|
options:
|
||||||
|
- Standard_NC4as_T4_v3
|
||||||
|
- Standard_NC24ads_A100_v4
|
||||||
|
- Standard_NC80adis_H100_v5
|
||||||
|
sha:
|
||||||
|
description: 'Commit SHA1 to build'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
duration:
|
||||||
|
description: 'Duration of the bench'
|
||||||
|
type: string
|
||||||
|
default: 10m
|
||||||
|
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||||
|
pull_request_target:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||||
|
schedule:
|
||||||
|
- cron: '04 2 * * *'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
bench-server-baseline:
|
||||||
|
runs-on: Standard_NC4as_T4_v3
|
||||||
|
env:
|
||||||
|
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
|
||||||
|
N_USERS: 8
|
||||||
|
DURATION: 10m
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
model: [phi-2]
|
||||||
|
ftype: [q4_0, q8_0, f16]
|
||||||
|
include:
|
||||||
|
- model: phi-2
|
||||||
|
ftype: q4_0
|
||||||
|
pr_comment_enabled: "true"
|
||||||
|
|
||||||
|
if: |
|
||||||
|
inputs.gpu-series == 'Standard_NC4as_T4_v3'
|
||||||
|
|| (
|
||||||
|
github.event_name == 'schedule'
|
||||||
|
&& github.ref_name == 'master'
|
||||||
|
&& github.repository_owner == 'ggerganov'
|
||||||
|
)
|
||||||
|
|| github.event_name == 'pull_request_target'
|
||||||
|
|| (
|
||||||
|
github.event_name == 'push'
|
||||||
|
&& github.event.ref == 'refs/heads/master'
|
||||||
|
&& github.repository_owner == 'ggerganov'
|
||||||
|
)
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
||||||
|
|
||||||
|
- name: Install python env
|
||||||
|
id: pipenv
|
||||||
|
run: |
|
||||||
|
cd examples/server/bench
|
||||||
|
python3 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
- name: Prometheus
|
||||||
|
id: install_prometheus
|
||||||
|
run: |
|
||||||
|
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
|
||||||
|
tar xzf prometheus*.tar.gz --strip-components=1
|
||||||
|
./prometheus --config.file=examples/server/bench/prometheus.yml &
|
||||||
|
while ! nc -z localhost 9090; do
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21'
|
||||||
|
|
||||||
|
- name: Install k6 and xk6-sse
|
||||||
|
id: k6_installation
|
||||||
|
run: |
|
||||||
|
cd examples/server/bench
|
||||||
|
go install go.k6.io/xk6/cmd/xk6@latest
|
||||||
|
xk6 build master \
|
||||||
|
--with github.com/phymbert/xk6-sse
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
set -eux
|
||||||
|
cmake -B build \
|
||||||
|
-DGGML_NATIVE=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
|
-DLLAMA_CURL=ON \
|
||||||
|
-DLLAMA_CUBLAS=ON \
|
||||||
|
-DCUDAToolkit_ROOT=/usr/local/cuda \
|
||||||
|
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
|
||||||
|
-DCMAKE_CUDA_ARCHITECTURES=75 \
|
||||||
|
-DLLAMA_FATAL_WARNINGS=OFF \
|
||||||
|
-DLLAMA_ALL_WARNINGS=OFF \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release;
|
||||||
|
cmake --build build --config Release -j $(nproc) --target llama-server
|
||||||
|
|
||||||
|
- name: Download the dataset
|
||||||
|
id: download_dataset
|
||||||
|
run: |
|
||||||
|
cd examples/server/bench
|
||||||
|
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
||||||
|
|
||||||
|
- name: Server bench
|
||||||
|
id: server_bench
|
||||||
|
env:
|
||||||
|
HEAD_REF: ${{ github.head_ref || github.ref_name }}
|
||||||
|
run: |
|
||||||
|
set -eux
|
||||||
|
|
||||||
|
cd examples/server/bench
|
||||||
|
source venv/bin/activate
|
||||||
|
python bench.py \
|
||||||
|
--runner-label ${{ env.RUNNER_LABEL }} \
|
||||||
|
--name ${{ github.job }} \
|
||||||
|
--branch $HEAD_REF \
|
||||||
|
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
||||||
|
--scenario script.js \
|
||||||
|
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
||||||
|
--hf-repo ggml-org/models \
|
||||||
|
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
|
||||||
|
--model-path-prefix /models \
|
||||||
|
--parallel ${{ env.N_USERS }} \
|
||||||
|
-ngl 33 \
|
||||||
|
--batch-size 2048 \
|
||||||
|
--ubatch-size 256 \
|
||||||
|
--ctx-size 16384 \
|
||||||
|
--n-prompts 1000 \
|
||||||
|
--max-prompt-tokens 1024 \
|
||||||
|
--max-tokens 2048
|
||||||
|
|
||||||
|
cat results.github.env >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Remove dataset as we do not want it in the artefact
|
||||||
|
rm ShareGPT_V3_unfiltered_cleaned_split.json
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
||||||
|
compression-level: 9
|
||||||
|
path: |
|
||||||
|
examples/server/bench/*.jpg
|
||||||
|
examples/server/bench/*.json
|
||||||
|
examples/server/bench/*.log
|
||||||
|
|
||||||
|
- name: Commit status
|
||||||
|
uses: Sibz/github-status-action@v1
|
||||||
|
with:
|
||||||
|
authToken: ${{secrets.GITHUB_TOKEN}}
|
||||||
|
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
||||||
|
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
||||||
|
description: |
|
||||||
|
${{ env.BENCH_RESULTS }}
|
||||||
|
state: 'success'
|
||||||
|
|
||||||
|
- name: Upload benchmark images
|
||||||
|
uses: devicons/public-upload-to-imgur@v2.2.2
|
||||||
|
continue-on-error: true # Important as it looks unstable: 503
|
||||||
|
id: imgur_step
|
||||||
|
with:
|
||||||
|
client_id: ${{secrets.IMGUR_CLIENT_ID}}
|
||||||
|
path: |
|
||||||
|
examples/server/bench/prompt_tokens_seconds.jpg
|
||||||
|
examples/server/bench/predicted_tokens_seconds.jpg
|
||||||
|
examples/server/bench/kv_cache_usage_ratio.jpg
|
||||||
|
examples/server/bench/requests_processing.jpg
|
||||||
|
|
||||||
|
- name: Extract mermaid
|
||||||
|
id: set_mermaid
|
||||||
|
run: |
|
||||||
|
set -eux
|
||||||
|
|
||||||
|
cd examples/server/bench
|
||||||
|
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
|
||||||
|
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
|
||||||
|
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
|
||||||
|
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
|
||||||
|
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Extract image url
|
||||||
|
id: extract_image_url
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
set -eux
|
||||||
|
|
||||||
|
echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
|
||||||
|
echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
|
||||||
|
echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
|
||||||
|
echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Comment PR
|
||||||
|
uses: mshick/add-pr-comment@v2
|
||||||
|
id: comment_pr
|
||||||
|
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
|
||||||
|
with:
|
||||||
|
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
||||||
|
message: |
|
||||||
|
<p align="center">
|
||||||
|
|
||||||
|
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
||||||
|
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
<summary>Expand details for performance related PR only</summary>
|
||||||
|
|
||||||
|
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
||||||
|
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
||||||
|
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
|
||||||
|
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
|
||||||
|
- ${{ env.BENCH_GRAPH_XLABEL }}
|
||||||
|
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
|
||||||
|
<img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.PROMPT_TOKENS_SECONDS }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.PREDICTED_TOKENS_SECONDS }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
<summary>Details</summary>
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
|
||||||
|
<img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.KV_CACHE_USAGE_RATIO }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.REQUESTS_PROCESSING }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
</details>
|
||||||
|
</details>
|
1248
.github/workflows/build.yml
vendored
1248
.github/workflows/build.yml
vendored
File diff suppressed because it is too large
Load diff
28
.github/workflows/close-issue.yml
vendored
Normal file
28
.github/workflows/close-issue.yml
vendored
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
name: Close inactive issues
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: "42 0 * * *"
|
||||||
|
|
||||||
|
# Fine-grant permission
|
||||||
|
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||||
|
permissions:
|
||||||
|
issues: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
close-issues:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
issues: write
|
||||||
|
pull-requests: write
|
||||||
|
steps:
|
||||||
|
- uses: actions/stale@v5
|
||||||
|
with:
|
||||||
|
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
|
||||||
|
days-before-issue-stale: 30
|
||||||
|
days-before-issue-close: 14
|
||||||
|
stale-issue-label: "stale"
|
||||||
|
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
|
||||||
|
days-before-pr-stale: -1
|
||||||
|
days-before-pr-close: -1
|
||||||
|
operations-per-run: 10000
|
||||||
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
36
.github/workflows/code-coverage.yml
vendored
36
.github/workflows/code-coverage.yml
vendored
|
@ -1,36 +0,0 @@
|
||||||
name: Code Coverage
|
|
||||||
on: [push, pull_request]
|
|
||||||
|
|
||||||
env:
|
|
||||||
GGML_NLOOP: 3
|
|
||||||
GGML_N_THREADS: 1
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
run:
|
|
||||||
runs-on: ubuntu-20.04
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential gcc-8 lcov
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
run: CC=gcc-8 make -j LLAMA_CODE_COVERAGE=1 tests
|
|
||||||
|
|
||||||
- name: Run tests
|
|
||||||
run: CC=gcc-8 make test
|
|
||||||
|
|
||||||
- name: Generate coverage report
|
|
||||||
run: |
|
|
||||||
make coverage
|
|
||||||
make lcov-report
|
|
||||||
|
|
||||||
- name: Upload coverage to Codecov
|
|
||||||
uses: codecov/codecov-action@v3
|
|
||||||
env:
|
|
||||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
|
||||||
with:
|
|
||||||
files: lcov-report/coverage.info
|
|
158
.github/workflows/docker.yml
vendored
158
.github/workflows/docker.yml
vendored
|
@ -10,45 +10,50 @@
|
||||||
name: Publish Docker image
|
name: Publish Docker image
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
workflow_dispatch: # allows manual triggering
|
||||||
push:
|
schedule:
|
||||||
branches:
|
# Rebuild daily rather than on every push because it is expensive
|
||||||
- master
|
- cron: '12 4 * * *'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
# Fine-grant permission
|
||||||
|
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||||
|
permissions:
|
||||||
|
packages: write
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
push_to_registry:
|
push_to_registry:
|
||||||
name: Push Docker image to Docker Hub
|
name: Push Docker image to Docker Hub
|
||||||
if: github.event.pull_request.draft == false
|
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
env:
|
env:
|
||||||
COMMIT_SHA: ${{ github.sha }}
|
COMMIT_SHA: ${{ github.sha }}
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- { tag: "light", dockerfile: ".devops/main.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
# Multi-stage build
|
||||||
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
|
||||||
- { tag: "server", dockerfile: ".devops/server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||||
# NOTE(canardletter): The CUDA builds on arm64 are very slow, so I
|
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||||
# have disabled them for now until the reason why
|
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||||
# is understood.
|
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
||||||
- { tag: "light-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platforms: "linux/amd64" }
|
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
||||||
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
|
||||||
- { tag: "server-cuda", dockerfile: ".devops/server-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
- { tag: "light-rocm", dockerfile: ".devops/main-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "server-rocm", dockerfile: ".devops/server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "light-intel", dockerfile: ".devops/main-intel.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
- { tag: "server-intel", dockerfile: ".devops/server-intel.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
steps:
|
steps:
|
||||||
- name: Check out the repo
|
- name: Check out the repo
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0 # preserve git history, so we can determine the build number
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v2
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
- name: Log in to Docker Hub
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v2
|
||||||
|
@ -57,9 +62,45 @@ jobs:
|
||||||
username: ${{ github.repository_owner }}
|
username: ${{ github.repository_owner }}
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
|
- name: Determine tag name
|
||||||
|
id: tag
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
||||||
|
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
||||||
|
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
||||||
|
REPO_NAME="${{ github.event.repository.name }}"
|
||||||
|
|
||||||
|
# determine tag name postfix (build number, commit hash)
|
||||||
|
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
||||||
|
TAG_POSTFIX="-b${BUILD_NUMBER}"
|
||||||
|
else
|
||||||
|
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
||||||
|
TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
|
||||||
|
fi
|
||||||
|
# list all tags possible
|
||||||
|
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
|
||||||
|
TYPE=""
|
||||||
|
else
|
||||||
|
TYPE="-${{ matrix.config.tag }}"
|
||||||
|
fi
|
||||||
|
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
||||||
|
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
|
||||||
|
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
|
||||||
|
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
|
||||||
|
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
|
||||||
|
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
|
||||||
|
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
|
||||||
|
echo "full_output_tags=$FULLTAGS" # print out for debugging
|
||||||
|
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
|
||||||
|
echo "server_output_tags=$SERVERTAGS" # print out for debugging
|
||||||
|
env:
|
||||||
|
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||||
|
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
||||||
|
|
||||||
- name: Free Disk Space (Ubuntu)
|
- name: Free Disk Space (Ubuntu)
|
||||||
uses: jlumbroso/free-disk-space@main
|
if: ${{ matrix.config.free_disk_space == true }}
|
||||||
|
uses: ggml-org/free-disk-space@v1.3.1
|
||||||
with:
|
with:
|
||||||
# this might remove tools that are actually needed,
|
# this might remove tools that are actually needed,
|
||||||
# if set to "true" but frees about 6 GB
|
# if set to "true" but frees about 6 GB
|
||||||
|
@ -74,34 +115,59 @@ jobs:
|
||||||
docker-images: true
|
docker-images: true
|
||||||
swap-storage: true
|
swap-storage: true
|
||||||
|
|
||||||
- name: Determine tag name
|
- name: Build and push Full Docker image (tagged + versioned)
|
||||||
id: tag
|
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
|
||||||
shell: bash
|
uses: docker/build-push-action@v6
|
||||||
run: |
|
|
||||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
||||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
||||||
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
||||||
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
||||||
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Build and push Docker image (versioned)
|
|
||||||
if: github.event_name == 'push'
|
|
||||||
uses: docker/build-push-action@v4
|
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: true
|
push: true
|
||||||
platforms: ${{ matrix.config.platforms }}
|
platforms: ${{ matrix.config.platforms }}
|
||||||
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
# tag list is generated from step above
|
||||||
|
tags: ${{ steps.tag.outputs.full_output_tags }}
|
||||||
file: ${{ matrix.config.dockerfile }}
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
target: full
|
||||||
|
provenance: false
|
||||||
|
# using github experimental cache
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
# return to this if the experimental github cache is having issues
|
||||||
|
#cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
#cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
|
|
||||||
- name: Build and push Docker image (tagged)
|
- name: Build and push Light Docker image (tagged + versioned)
|
||||||
uses: docker/build-push-action@v4
|
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: ${{ github.event_name == 'push' }}
|
push: true
|
||||||
platforms: ${{ matrix.config.platforms }}
|
platforms: ${{ matrix.config.platforms }}
|
||||||
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
|
# tag list is generated from step above
|
||||||
|
tags: ${{ steps.tag.outputs.light_output_tags }}
|
||||||
file: ${{ matrix.config.dockerfile }}
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
target: light
|
||||||
|
provenance: false
|
||||||
|
# using github experimental cache
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
# return to this if the experimental github cache is having issues
|
||||||
|
#cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
#cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
|
|
||||||
|
- name: Build and push Server Docker image (tagged + versioned)
|
||||||
|
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
platforms: ${{ matrix.config.platforms }}
|
||||||
|
# tag list is generated from step above
|
||||||
|
tags: ${{ steps.tag.outputs.server_output_tags }}
|
||||||
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
target: server
|
||||||
|
provenance: false
|
||||||
|
# using github experimental cache
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
# return to this if the experimental github cache is having issues
|
||||||
|
#cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
#cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
|
|
10
.github/workflows/editorconfig.yml
vendored
10
.github/workflows/editorconfig.yml
vendored
|
@ -14,10 +14,16 @@ on:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
editorconfig:
|
editorconfig:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- uses: editorconfig-checker/action-editorconfig-checker@main
|
- uses: editorconfig-checker/action-editorconfig-checker@v2
|
||||||
|
with:
|
||||||
|
version: v3.0.3
|
||||||
- run: editorconfig-checker
|
- run: editorconfig-checker
|
||||||
|
|
4
.github/workflows/gguf-publish.yml
vendored
4
.github/workflows/gguf-publish.yml
vendored
|
@ -24,9 +24,9 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.9.x'
|
python-version: '3.9.x'
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
|
|
17
.github/workflows/labeler.yml
vendored
Normal file
17
.github/workflows/labeler.yml
vendored
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
name: "Pull Request Labeler"
|
||||||
|
on:
|
||||||
|
- pull_request_target
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
labeler:
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pull-requests: write
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
repository: "ggerganov/llama.cpp"
|
||||||
|
- uses: actions/labeler@v5
|
||||||
|
with:
|
||||||
|
configuration-path: '.github/labeler.yml'
|
62
.github/workflows/nix-ci-aarch64.yml
vendored
62
.github/workflows/nix-ci-aarch64.yml
vendored
|
@ -1,62 +0,0 @@
|
||||||
name: Nix aarch64 builds
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch: # allows manual triggering
|
|
||||||
schedule:
|
|
||||||
# Rebuild daily rather than on every push because QEMU is expensive (e.g.
|
|
||||||
# 1.5h instead of minutes with the cold cache).
|
|
||||||
#
|
|
||||||
# randint(0, 59), randint(0, 23)
|
|
||||||
- cron: '26 12 * * *'
|
|
||||||
# But also rebuild if we touched any of the Nix expressions:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
paths: ['**/*.nix', 'flake.lock']
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
paths: ['**/*.nix', 'flake.lock']
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
nix-build-aarch64:
|
|
||||||
if: ${{ vars.CACHIX_NAME != '' }}
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install QEMU
|
|
||||||
# Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y qemu-user-static qemu-system-aarch64
|
|
||||||
sudo usermod -a -G kvm $USER
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@v9
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
extra-conf: |
|
|
||||||
extra-platforms = aarch64-linux
|
|
||||||
extra-system-features = nixos-test kvm
|
|
||||||
extra-substituters = https://${{ vars.CACHIX_NAME }}.cachix.org https://cuda-maintainers.cachix.org
|
|
||||||
extra-trusted-public-keys = ${{ vars.CACHIX_PUBLIC_KEY }} cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
|
||||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
|
||||||
with:
|
|
||||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
|
||||||
- name: Set-up cachix to push the results to
|
|
||||||
uses: cachix/cachix-action@v13
|
|
||||||
with:
|
|
||||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
|
||||||
name: ${{ vars.CACHIX_NAME }}
|
|
||||||
- name: Show all output paths
|
|
||||||
run: >
|
|
||||||
nix run github:nix-community/nix-eval-jobs
|
|
||||||
-- --gc-roots-dir gcroot
|
|
||||||
--flake
|
|
||||||
".#packages.aarch64-linux"
|
|
||||||
- name: Build
|
|
||||||
run: >
|
|
||||||
nix run github:Mic92/nix-fast-build
|
|
||||||
-- --skip-cached --no-nom
|
|
||||||
--systems aarch64-linux
|
|
||||||
--flake
|
|
||||||
".#checks.aarch64-linux"
|
|
69
.github/workflows/nix-ci.yml
vendored
69
.github/workflows/nix-ci.yml
vendored
|
@ -1,69 +0,0 @@
|
||||||
name: Nix CI
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch: # allows manual triggering
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
nix-eval:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
os: [ ubuntu-latest, macos-latest ]
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@v9
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
extra-conf: |
|
|
||||||
extra-substituters = https://${{ vars.CACHIX_NAME }}.cachix.org https://cuda-maintainers.cachix.org
|
|
||||||
extra-trusted-public-keys = ${{ vars.CACHIX_PUBLIC_KEY }} cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
|
||||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
|
||||||
with:
|
|
||||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
|
||||||
- name: List all flake outputs
|
|
||||||
run: nix flake show --all-systems
|
|
||||||
- name: Show all output paths
|
|
||||||
run: >
|
|
||||||
nix run github:nix-community/nix-eval-jobs
|
|
||||||
-- --gc-roots-dir gcroot
|
|
||||||
--flake
|
|
||||||
".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
|
||||||
nix-build:
|
|
||||||
if: ${{ vars.CACHIX_NAME != '' }}
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
os: [ ubuntu-latest, macos-latest ]
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@v9
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
extra-conf: |
|
|
||||||
extra-substituters = https://${{ vars.CACHIX_NAME }}.cachix.org https://cuda-maintainers.cachix.org
|
|
||||||
extra-trusted-public-keys = ${{ vars.CACHIX_PUBLIC_KEY }} cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
|
||||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
|
||||||
with:
|
|
||||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
|
||||||
- name: Set-up cachix to push the results to
|
|
||||||
uses: cachix/cachix-action@v13
|
|
||||||
with:
|
|
||||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
|
||||||
name: ${{ vars.CACHIX_NAME }}
|
|
||||||
- name: Build
|
|
||||||
run: >
|
|
||||||
nix run github:Mic92/nix-fast-build
|
|
||||||
-- --skip-cached --no-nom
|
|
||||||
--flake
|
|
||||||
".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
|
22
.github/workflows/nix-flake-update.yml
vendored
22
.github/workflows/nix-flake-update.yml
vendored
|
@ -1,22 +0,0 @@
|
||||||
name: update-flake-lock
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
schedule:
|
|
||||||
- cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
lockfile:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@main
|
|
||||||
- name: Update flake.lock
|
|
||||||
uses: DeterminateSystems/update-flake-lock@main
|
|
||||||
with:
|
|
||||||
pr-title: "nix: update flake.lock"
|
|
||||||
pr-labels: |
|
|
||||||
nix
|
|
||||||
pr-reviewers: philiptaron,SomeoneSerge
|
|
||||||
token: ${{ secrets.FLAKE_TOKEN }}
|
|
36
.github/workflows/nix-publish-flake.yml
vendored
36
.github/workflows/nix-publish-flake.yml
vendored
|
@ -1,36 +0,0 @@
|
||||||
# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
|
|
||||||
name: "Publish a flake to flakestry & flakehub"
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- "*"
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
tag:
|
|
||||||
description: "The existing tag to publish"
|
|
||||||
type: "string"
|
|
||||||
required: true
|
|
||||||
jobs:
|
|
||||||
flakestry-publish:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
id-token: "write"
|
|
||||||
contents: "read"
|
|
||||||
steps:
|
|
||||||
- uses: flakestry/flakestry-publish@main
|
|
||||||
with:
|
|
||||||
version: "${{ inputs.tag || github.ref_name }}"
|
|
||||||
flakehub-publish:
|
|
||||||
runs-on: "ubuntu-latest"
|
|
||||||
permissions:
|
|
||||||
id-token: "write"
|
|
||||||
contents: "read"
|
|
||||||
steps:
|
|
||||||
- uses: "actions/checkout@v4"
|
|
||||||
with:
|
|
||||||
ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
|
|
||||||
- uses: "DeterminateSystems/nix-installer-action@main"
|
|
||||||
- uses: "DeterminateSystems/flakehub-push@main"
|
|
||||||
with:
|
|
||||||
visibility: "public"
|
|
||||||
tag: "${{ inputs.tag }}"
|
|
18
.github/workflows/python-check-requirements.yml
vendored
18
.github/workflows/python-check-requirements.yml
vendored
|
@ -3,16 +3,20 @@ name: Python check requirements.txt
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
paths:
|
paths:
|
||||||
|
- '.github/workflows/python-check-requirements.yml'
|
||||||
- 'scripts/check-requirements.sh'
|
- 'scripts/check-requirements.sh'
|
||||||
- 'convert*.py'
|
- 'convert*.py'
|
||||||
- 'requirements.txt'
|
- '**/requirements*.txt'
|
||||||
- 'requirements/*.txt'
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
|
- '.github/workflows/python-check-requirements.yml'
|
||||||
- 'scripts/check-requirements.sh'
|
- 'scripts/check-requirements.sh'
|
||||||
- 'convert*.py'
|
- 'convert*.py'
|
||||||
- 'requirements.txt'
|
- '**/requirements*.txt'
|
||||||
- 'requirements/*.txt'
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
python-check-requirements:
|
python-check-requirements:
|
||||||
|
@ -20,10 +24,10 @@ jobs:
|
||||||
name: check-requirements
|
name: check-requirements
|
||||||
steps:
|
steps:
|
||||||
- name: Check out source repository
|
- name: Check out source repository
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
- name: Set up Python environment
|
- name: Set up Python environment
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Run check-requirements.sh script
|
- name: Run check-requirements.sh script
|
||||||
run: bash scripts/check-requirements.sh nocleanup
|
run: bash scripts/check-requirements.sh
|
||||||
|
|
20
.github/workflows/python-lint.yml
vendored
20
.github/workflows/python-lint.yml
vendored
|
@ -1,6 +1,17 @@
|
||||||
name: flake8 Lint
|
name: flake8 Lint
|
||||||
|
|
||||||
on: [push, pull_request]
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
flake8-lint:
|
flake8-lint:
|
||||||
|
@ -8,13 +19,12 @@ jobs:
|
||||||
name: Lint
|
name: Lint
|
||||||
steps:
|
steps:
|
||||||
- name: Check out source repository
|
- name: Check out source repository
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
- name: Set up Python environment
|
- name: Set up Python environment
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: flake8 Lint
|
- name: flake8 Lint
|
||||||
uses: py-actions/flake8@v2
|
uses: py-actions/flake8@v2
|
||||||
with:
|
with:
|
||||||
ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704"
|
plugins: "flake8-no-print"
|
||||||
exclude: "examples/*,examples/*/**,*/**/__init__.py"
|
|
||||||
|
|
40
.github/workflows/python-type-check.yml
vendored
Normal file
40
.github/workflows/python-type-check.yml
vendored
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
name: Python Type-Check
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- '.github/workflows/python-type-check.yml'
|
||||||
|
- 'pyrightconfig.json'
|
||||||
|
- '**.py'
|
||||||
|
- '**/requirements*.txt'
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- '.github/workflows/python-type-check.yml'
|
||||||
|
- 'pyrightconfig.json'
|
||||||
|
- '**.py'
|
||||||
|
- '**/requirements*.txt'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
python-type-check:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
name: pyright type-check
|
||||||
|
steps:
|
||||||
|
- name: Check out source repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Set up Python environment
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
- name: Install Python dependencies
|
||||||
|
# TODO: use a venv
|
||||||
|
run: pip install -r requirements/requirements-all.txt
|
||||||
|
- name: Type-check with Pyright
|
||||||
|
uses: jakebailey/pyright-action@v2
|
||||||
|
with:
|
||||||
|
version: 1.1.382
|
||||||
|
level: warning
|
||||||
|
warnings: true
|
239
.github/workflows/server.yml
vendored
Normal file
239
.github/workflows/server.yml
vendored
Normal file
|
@ -0,0 +1,239 @@
|
||||||
|
# Server build and tests
|
||||||
|
name: Server
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
inputs:
|
||||||
|
sha:
|
||||||
|
description: 'Commit SHA1 to build'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
slow_tests:
|
||||||
|
description: 'Run slow tests'
|
||||||
|
required: true
|
||||||
|
type: boolean
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
||||||
|
|
||||||
|
env:
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
LLAMA_LOG_VERBOSITY: 10
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
server:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
|
||||||
|
build_type: [RelWithDebInfo]
|
||||||
|
include:
|
||||||
|
- build_type: Release
|
||||||
|
sanitizer: ""
|
||||||
|
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get -y install \
|
||||||
|
build-essential \
|
||||||
|
xxd \
|
||||||
|
git \
|
||||||
|
cmake \
|
||||||
|
curl \
|
||||||
|
wget \
|
||||||
|
language-pack-en \
|
||||||
|
libcurl4-openssl-dev
|
||||||
|
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
||||||
|
|
||||||
|
- name: Python setup
|
||||||
|
id: setup_python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Tests dependencies
|
||||||
|
id: test_dependencies
|
||||||
|
run: |
|
||||||
|
pip install -r examples/server/tests/requirements.txt
|
||||||
|
|
||||||
|
# Setup nodejs (to be used for verifying bundled index.html)
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '22.11.0'
|
||||||
|
|
||||||
|
- name: WebUI - Install dependencies
|
||||||
|
id: webui_lint
|
||||||
|
run: |
|
||||||
|
cd examples/server/webui
|
||||||
|
npm ci
|
||||||
|
|
||||||
|
- name: WebUI - Check code format
|
||||||
|
id: webui_format
|
||||||
|
run: |
|
||||||
|
git config --global --add safe.directory $(realpath .)
|
||||||
|
cd examples/server/webui
|
||||||
|
git status
|
||||||
|
|
||||||
|
npm run format
|
||||||
|
git status
|
||||||
|
modified_files="$(git status -s)"
|
||||||
|
echo "Modified files: ${modified_files}"
|
||||||
|
if [ -n "${modified_files}" ]; then
|
||||||
|
echo "Files do not follow coding style. To fix: npm run format"
|
||||||
|
echo "${modified_files}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Verify bundled index.html
|
||||||
|
id: verify_server_index_html
|
||||||
|
run: |
|
||||||
|
git config --global --add safe.directory $(realpath .)
|
||||||
|
cd examples/server/webui
|
||||||
|
git status
|
||||||
|
|
||||||
|
npm run build
|
||||||
|
git status
|
||||||
|
modified_files="$(git status -s)"
|
||||||
|
echo "Modified files: ${modified_files}"
|
||||||
|
if [ -n "${modified_files}" ]; then
|
||||||
|
echo "Repository is dirty or server/webui is not built as expected"
|
||||||
|
echo "Hint: You may need to follow Web UI build guide in server/README.md"
|
||||||
|
echo "${modified_files}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Build (no OpenMP)
|
||||||
|
id: cmake_build_no_openmp
|
||||||
|
if: ${{ matrix.sanitizer == 'THREAD' }}
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DGGML_NATIVE=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
|
-DLLAMA_CURL=ON \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||||
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||||
|
-DGGML_OPENMP=OFF ;
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
|
- name: Build (sanitizers)
|
||||||
|
id: cmake_build_sanitizers
|
||||||
|
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DGGML_NATIVE=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
|
-DLLAMA_CURL=ON \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||||
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
|
- name: Build (sanitizers)
|
||||||
|
id: cmake_build
|
||||||
|
if: ${{ matrix.sanitizer == '' }}
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DGGML_NATIVE=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
|
-DLLAMA_CURL=ON \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
|
- name: Tests
|
||||||
|
id: server_integration_tests
|
||||||
|
if: ${{ matrix.sanitizer == '' }}
|
||||||
|
run: |
|
||||||
|
cd examples/server/tests
|
||||||
|
./tests.sh
|
||||||
|
|
||||||
|
- name: Tests (sanitizers)
|
||||||
|
id: server_integration_tests_sanitizers
|
||||||
|
if: ${{ matrix.sanitizer != '' }}
|
||||||
|
run: |
|
||||||
|
cd examples/server/tests
|
||||||
|
LLAMA_SANITIZE=1 ./tests.sh
|
||||||
|
|
||||||
|
- name: Slow tests
|
||||||
|
id: server_integration_tests_slow
|
||||||
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||||
|
run: |
|
||||||
|
cd examples/server/tests
|
||||||
|
SLOW_TESTS=1 ./tests.sh
|
||||||
|
|
||||||
|
|
||||||
|
server-windows:
|
||||||
|
runs-on: windows-2019
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
||||||
|
|
||||||
|
- name: libCURL
|
||||||
|
id: get_libcurl
|
||||||
|
env:
|
||||||
|
CURL_VERSION: 8.6.0_6
|
||||||
|
run: |
|
||||||
|
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
|
||||||
|
mkdir $env:RUNNER_TEMP/libcurl
|
||||||
|
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
|
||||||
|
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
|
||||||
|
|
||||||
|
- name: Python setup
|
||||||
|
id: setup_python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Tests dependencies
|
||||||
|
id: test_dependencies
|
||||||
|
run: |
|
||||||
|
pip install -r examples/server/tests/requirements.txt
|
||||||
|
|
||||||
|
- name: Copy Libcurl
|
||||||
|
id: prepare_libcurl
|
||||||
|
run: |
|
||||||
|
cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
|
||||||
|
|
||||||
|
- name: Tests
|
||||||
|
id: server_integration_tests
|
||||||
|
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
||||||
|
run: |
|
||||||
|
cd examples/server/tests
|
||||||
|
$env:PYTHONIOENCODING = ":replace"
|
||||||
|
pytest -v -x -m "not slow"
|
||||||
|
|
||||||
|
- name: Slow tests
|
||||||
|
id: server_integration_tests_slow
|
||||||
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||||
|
run: |
|
||||||
|
cd examples/server/tests
|
||||||
|
$env:SLOW_TESTS = "1"
|
||||||
|
pytest -v -x
|
20
.github/workflows/tidy-post.yml
vendored
20
.github/workflows/tidy-post.yml
vendored
|
@ -1,20 +0,0 @@
|
||||||
name: clang-tidy review post comments
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
workflows: ["clang-tidy-review"]
|
|
||||||
types:
|
|
||||||
- completed
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: ZedThree/clang-tidy-review/post@v0.13.0
|
|
||||||
# lgtm_comment_body, max_comments, and annotations need to be set on the posting workflow in a split setup
|
|
||||||
with:
|
|
||||||
# adjust options as necessary
|
|
||||||
lgtm_comment_body: ''
|
|
||||||
annotations: false
|
|
||||||
max_comments: 25
|
|
23
.github/workflows/tidy-review.yml
vendored
23
.github/workflows/tidy-review.yml
vendored
|
@ -1,23 +0,0 @@
|
||||||
name: clang-tidy-review
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
clang-tidy-review:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
|
|
||||||
- uses: ZedThree/clang-tidy-review@v0.13.0
|
|
||||||
id: review
|
|
||||||
with:
|
|
||||||
lgtm_comment_body: ''
|
|
||||||
build_dir: build
|
|
||||||
cmake_command: cmake . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=on
|
|
||||||
split_workflow: true
|
|
||||||
|
|
||||||
- uses: ZedThree/clang-tidy-review/upload@v0.13.0
|
|
25
.github/workflows/zig-build.yml
vendored
25
.github/workflows/zig-build.yml
vendored
|
@ -1,25 +0,0 @@
|
||||||
name: Zig CI
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
runs-on: [ubuntu-latest, macos-latest, windows-latest]
|
|
||||||
runs-on: ${{ matrix.runs-on }}
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
fetch-depth: 0
|
|
||||||
- uses: goto-bus-stop/setup-zig@v2
|
|
||||||
with:
|
|
||||||
version: 0.11.0
|
|
||||||
- name: Build Summary
|
|
||||||
run: zig build --summary all -freference-trace
|
|
179
.gitignore
vendored
179
.gitignore
vendored
|
@ -1,92 +1,145 @@
|
||||||
*.o
|
# Extensions
|
||||||
|
|
||||||
*.a
|
*.a
|
||||||
*.so
|
|
||||||
*.gguf
|
|
||||||
*.bin
|
|
||||||
*.exe
|
|
||||||
*.dll
|
|
||||||
*.log
|
|
||||||
*.gcov
|
|
||||||
*.gcno
|
|
||||||
*.gcda
|
|
||||||
*.dot
|
|
||||||
*.bat
|
*.bat
|
||||||
|
*.bin
|
||||||
|
*.d
|
||||||
|
*.dll
|
||||||
|
*.dot
|
||||||
|
*.etag
|
||||||
|
*.exe
|
||||||
|
*.gcda
|
||||||
|
*.gcno
|
||||||
|
*.gcov
|
||||||
|
*.gguf
|
||||||
|
*.gguf.json
|
||||||
|
*.lastModified
|
||||||
|
*.log
|
||||||
*.metallib
|
*.metallib
|
||||||
.DS_Store
|
*.o
|
||||||
.build/
|
*.so
|
||||||
|
*.swp
|
||||||
|
*.tmp
|
||||||
|
|
||||||
|
# IDE / OS
|
||||||
|
|
||||||
.cache/
|
.cache/
|
||||||
.ccls-cache/
|
.ccls-cache/
|
||||||
.direnv/
|
.direnv/
|
||||||
|
.DS_Store
|
||||||
.envrc
|
.envrc
|
||||||
|
.idea/
|
||||||
.swiftpm
|
.swiftpm
|
||||||
.venv
|
|
||||||
.clang-tidy
|
|
||||||
.vs/
|
.vs/
|
||||||
.vscode/
|
.vscode/
|
||||||
|
nppBackup
|
||||||
|
|
||||||
|
|
||||||
|
# Coverage
|
||||||
|
|
||||||
lcov-report/
|
|
||||||
gcovr-report/
|
gcovr-report/
|
||||||
|
lcov-report/
|
||||||
|
|
||||||
|
# Build Artifacts
|
||||||
|
|
||||||
|
tags
|
||||||
|
.build/
|
||||||
build*
|
build*
|
||||||
|
!build-info.cmake
|
||||||
|
!build-info.cpp.in
|
||||||
|
!build-info.sh
|
||||||
|
!build.zig
|
||||||
|
!docs/build.md
|
||||||
|
/libllama.so
|
||||||
|
/llama-*
|
||||||
|
/vulkan-shaders-gen
|
||||||
|
android-ndk-*
|
||||||
|
arm_neon.h
|
||||||
|
cmake-build-*
|
||||||
|
CMakeSettings.json
|
||||||
|
compile_commands.json
|
||||||
|
ggml-metal-embed.metal
|
||||||
|
llama-batched-swift
|
||||||
|
/rpc-server
|
||||||
out/
|
out/
|
||||||
tmp/
|
tmp/
|
||||||
|
autogen-*.md
|
||||||
|
|
||||||
|
# Deprecated
|
||||||
|
|
||||||
|
/main
|
||||||
|
/server
|
||||||
|
|
||||||
|
# CI
|
||||||
|
|
||||||
|
!.github/workflows/*.yml
|
||||||
|
|
||||||
|
# Models
|
||||||
|
|
||||||
models/*
|
models/*
|
||||||
models-mnt
|
models-mnt
|
||||||
|
!models/.editorconfig
|
||||||
|
!models/ggml-vocab-*.gguf*
|
||||||
|
|
||||||
/Pipfile
|
# Zig
|
||||||
/baby-llama
|
|
||||||
/beam-search
|
|
||||||
/benchmark-matmult
|
|
||||||
/convert-llama2c-to-ggml
|
|
||||||
/embd-input-test
|
|
||||||
/embedding
|
|
||||||
/gguf
|
|
||||||
/gguf-llama-simple
|
|
||||||
/imatrix
|
|
||||||
/infill
|
|
||||||
/libllama.so
|
|
||||||
/llama-bench
|
|
||||||
/llava-cli
|
|
||||||
/lookahead
|
|
||||||
/lookup
|
|
||||||
/main
|
|
||||||
/metal
|
|
||||||
/passkey
|
|
||||||
/perplexity
|
|
||||||
/q8dot
|
|
||||||
/quantize
|
|
||||||
/quantize-stats
|
|
||||||
/result
|
|
||||||
/save-load-state
|
|
||||||
/server
|
|
||||||
/simple
|
|
||||||
/batched
|
|
||||||
/batched-bench
|
|
||||||
/export-lora
|
|
||||||
/finetune
|
|
||||||
/speculative
|
|
||||||
/parallel
|
|
||||||
/train-text-from-scratch
|
|
||||||
/tokenize
|
|
||||||
/vdot
|
|
||||||
/common/build-info.cpp
|
|
||||||
arm_neon.h
|
|
||||||
compile_commands.json
|
|
||||||
CMakeSettings.json
|
|
||||||
|
|
||||||
__pycache__
|
|
||||||
dist
|
|
||||||
|
|
||||||
zig-out/
|
zig-out/
|
||||||
zig-cache/
|
zig-cache/
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
|
||||||
ppl-*.txt
|
ppl-*.txt
|
||||||
qnt-*.txt
|
qnt-*.txt
|
||||||
perf-*.txt
|
perf-*.txt
|
||||||
|
|
||||||
examples/jeopardy/results.txt
|
# Examples
|
||||||
|
|
||||||
poetry.lock
|
examples/jeopardy/results.txt
|
||||||
|
examples/server/*.css.hpp
|
||||||
|
examples/server/*.html.hpp
|
||||||
|
examples/server/*.js.hpp
|
||||||
|
examples/server/*.mjs.hpp
|
||||||
|
!build_64.sh
|
||||||
|
!examples/*.bat
|
||||||
|
!examples/*/*.kts
|
||||||
|
!examples/*/*/*.kts
|
||||||
|
!examples/sycl/*.bat
|
||||||
|
!examples/sycl/*.sh
|
||||||
|
|
||||||
|
# Server Web UI temporary files
|
||||||
|
node_modules
|
||||||
|
examples/server/webui/dist
|
||||||
|
|
||||||
|
# Python
|
||||||
|
|
||||||
|
/.venv
|
||||||
|
__pycache__/
|
||||||
|
*/poetry.lock
|
||||||
poetry.toml
|
poetry.toml
|
||||||
nppBackup
|
|
||||||
|
# Nix
|
||||||
|
/result
|
||||||
|
|
||||||
|
# Test binaries
|
||||||
|
/tests/test-backend-ops
|
||||||
|
/tests/test-double-float
|
||||||
|
/tests/test-grad0
|
||||||
|
/tests/test-grammar-parser
|
||||||
|
/tests/test-llama-grammar
|
||||||
|
/tests/test-opt
|
||||||
|
/tests/test-quantize-fns
|
||||||
|
/tests/test-quantize-perf
|
||||||
|
/tests/test-rope
|
||||||
|
/tests/test-sampling
|
||||||
|
/tests/test-tokenizer-0
|
||||||
|
/tests/test-tokenizer-1-bpe
|
||||||
|
/tests/test-tokenizer-1-spm
|
||||||
|
|
||||||
|
# Scripts
|
||||||
|
!/scripts/install-oneapi.bat
|
||||||
|
|
||||||
|
# Test models for lora adapters
|
||||||
|
/lora-tests
|
||||||
|
|
||||||
|
# Local scripts
|
||||||
|
/run-vim.sh
|
||||||
|
/run-chat.sh
|
||||||
|
|
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -1,3 +1,3 @@
|
||||||
[submodule "kompute"]
|
[submodule "kompute"]
|
||||||
path = kompute
|
path = ggml/src/ggml-kompute/kompute
|
||||||
url = https://github.com/nomic-ai/kompute.git
|
url = https://github.com/nomic-ai/kompute.git
|
||||||
|
|
|
@ -3,13 +3,14 @@
|
||||||
exclude: prompts/.*.txt
|
exclude: prompts/.*.txt
|
||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v3.2.0
|
rev: v4.6.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
- id: check-added-large-files
|
- id: check-added-large-files
|
||||||
- repo: https://github.com/PyCQA/flake8
|
- repo: https://github.com/PyCQA/flake8
|
||||||
rev: 6.0.0
|
rev: 7.0.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
|
additional_dependencies: [flake8-no-print]
|
||||||
|
|
1199
CMakeLists.txt
1199
CMakeLists.txt
File diff suppressed because it is too large
Load diff
97
CMakePresets.json
Normal file
97
CMakePresets.json
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
{
|
||||||
|
"version": 4,
|
||||||
|
"configurePresets": [
|
||||||
|
{
|
||||||
|
"name": "base",
|
||||||
|
"hidden": true,
|
||||||
|
"generator": "Ninja",
|
||||||
|
"binaryDir": "${sourceDir}/build-${presetName}",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
|
||||||
|
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "sycl-base",
|
||||||
|
"hidden": true,
|
||||||
|
"generator": "Ninja",
|
||||||
|
"binaryDir": "${sourceDir}/build-${presetName}",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
|
||||||
|
"CMAKE_CXX_COMPILER": "icx",
|
||||||
|
"CMAKE_C_COMPILER": "cl",
|
||||||
|
"GGML_SYCL": "ON",
|
||||||
|
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
|
||||||
|
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
|
||||||
|
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
|
||||||
|
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
|
||||||
|
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
|
||||||
|
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "x64-windows-llvm", "hidden": true,
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "arm64-windows-msvc", "hidden": true,
|
||||||
|
"architecture": { "value": "arm64", "strategy": "external" },
|
||||||
|
"toolset": { "value": "host=x64", "strategy": "external" },
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "arm64-windows-llvm", "hidden": true,
|
||||||
|
"architecture": { "value": "arm64", "strategy": "external" },
|
||||||
|
"toolset": { "value": "host=x64", "strategy": "external" },
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "arm64-apple-clang", "hidden": true,
|
||||||
|
"architecture": { "value": "arm64", "strategy": "external" },
|
||||||
|
"toolset": { "value": "host=x64", "strategy": "external" },
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
{ "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
|
||||||
|
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
|
||||||
|
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] },
|
||||||
|
{ "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
|
||||||
|
{ "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
|
||||||
|
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
|
||||||
|
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] },
|
||||||
|
{ "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] },
|
||||||
|
{ "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] },
|
||||||
|
{ "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
|
||||||
|
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
|
||||||
|
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] },
|
||||||
|
{ "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
|
||||||
|
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
|
||||||
|
{ "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
|
||||||
|
|
||||||
|
{ "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] },
|
||||||
|
{ "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] }
|
||||||
|
]
|
||||||
|
}
|
11
CODEOWNERS
Normal file
11
CODEOWNERS
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
|
||||||
|
|
||||||
|
/ci/ @ggerganov
|
||||||
|
/.devops/*.Dockerfile @ngxson
|
||||||
|
/examples/server/ @ngxson
|
||||||
|
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
|
||||||
|
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
|
||||||
|
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
|
||||||
|
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
|
||||||
|
/ggml/src/ggml-opt.cpp @JohannesGaessler
|
||||||
|
/ggml/src/gguf.cpp @JohannesGaessler
|
125
CONTRIBUTING.md
Normal file
125
CONTRIBUTING.md
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
# Pull requests (for contributors)
|
||||||
|
|
||||||
|
- Test your changes:
|
||||||
|
- Execute [the full CI locally on your machine](ci/README.md) before publishing
|
||||||
|
- Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`)
|
||||||
|
- If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends)
|
||||||
|
- If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops`
|
||||||
|
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
|
||||||
|
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
|
||||||
|
|
||||||
|
# Pull requests (for collaborators)
|
||||||
|
|
||||||
|
- Squash-merge PRs
|
||||||
|
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
|
||||||
|
- Optionally pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
|
||||||
|
- Consider adding yourself to [CODEOWNERS](CODEOWNERS)
|
||||||
|
|
||||||
|
# Coding guidelines
|
||||||
|
|
||||||
|
- Avoid adding third-party dependencies, extra files, extra headers, etc.
|
||||||
|
- Always consider cross-compatibility with other operating systems and architectures
|
||||||
|
- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
|
||||||
|
- Vertical alignment makes things more readable and easier to batch edit
|
||||||
|
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
|
||||||
|
- Use sized integer types such as `int32_t` in the public API, e.g. `size_t` may also be appropriate for allocation sizes or byte offsets
|
||||||
|
- Declare structs with `struct foo {}` instead of `typedef struct foo {} foo`
|
||||||
|
- In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary
|
||||||
|
```cpp
|
||||||
|
// OK
|
||||||
|
llama_context * ctx;
|
||||||
|
const llama_rope_type rope_type;
|
||||||
|
|
||||||
|
// not OK
|
||||||
|
struct llama_context * ctx;
|
||||||
|
const enum llama_rope_type rope_type;
|
||||||
|
```
|
||||||
|
|
||||||
|
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_
|
||||||
|
|
||||||
|
- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code
|
||||||
|
- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines)
|
||||||
|
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
|
||||||
|
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
# Naming guidelines
|
||||||
|
|
||||||
|
- Use `snake_case` for function, variable and type names
|
||||||
|
- Naming usually optimizes for longest common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
// not OK
|
||||||
|
int small_number;
|
||||||
|
int big_number;
|
||||||
|
|
||||||
|
// OK
|
||||||
|
int number_small;
|
||||||
|
int number_big;
|
||||||
|
```
|
||||||
|
|
||||||
|
- Enum values are always in upper case and prefixed with the enum name
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
enum llama_vocab_type {
|
||||||
|
LLAMA_VOCAB_TYPE_NONE = 0,
|
||||||
|
LLAMA_VOCAB_TYPE_SPM = 1,
|
||||||
|
LLAMA_VOCAB_TYPE_BPE = 2,
|
||||||
|
LLAMA_VOCAB_TYPE_WPM = 3,
|
||||||
|
LLAMA_VOCAB_TYPE_UGM = 4,
|
||||||
|
LLAMA_VOCAB_TYPE_RWKV = 5,
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
- The general naming pattern is `<class>_<method>`, with `<method>` being `<action>_<noun>`
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
llama_model_init(); // class: "llama_model", method: "init"
|
||||||
|
llama_sampler_chain_remove(); // class: "llama_sampler_chain", method: "remove"
|
||||||
|
llama_sampler_get_seed(); // class: "llama_sampler", method: "get_seed"
|
||||||
|
llama_set_embeddings(); // class: "llama_context", method: "set_embeddings"
|
||||||
|
llama_n_threads(); // class: "llama_context", method: "n_threads"
|
||||||
|
llama_adapter_lora_free(); // class: "llama_adapter_lora", method: "free"
|
||||||
|
```
|
||||||
|
|
||||||
|
- The `get` `<action>` can be omitted
|
||||||
|
- The `<noun>` can be omitted if not necessary
|
||||||
|
- The `_context` suffix of the `<class>` is optional. Use it to disambiguate symbols when needed
|
||||||
|
- Use `init`/`free` for constructor/destructor `<action>`
|
||||||
|
|
||||||
|
- Use the `_t` suffix when a type is supposed to be opaque to the user - it's not relevant to them if it is a struct or anything else
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
typedef struct llama_context * llama_context_t;
|
||||||
|
|
||||||
|
enum llama_pooling_type llama_pooling_type(const llama_context_t ctx);
|
||||||
|
```
|
||||||
|
|
||||||
|
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline)_
|
||||||
|
|
||||||
|
- C/C++ filenames are all lowercase with dashes. Headers use the `.h` extension. Source files use the `.c` or `.cpp` extension
|
||||||
|
- Python filenames are all lowercase with underscores
|
||||||
|
|
||||||
|
- _(TODO: abbreviations usage)_
|
||||||
|
|
||||||
|
# Preprocessor directives
|
||||||
|
|
||||||
|
- _(TODO: add guidelines with examples and apply them to the codebase)_
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
#ifdef FOO
|
||||||
|
#endif // FOO
|
||||||
|
```
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
|
||||||
|
- Documentation is a community effort
|
||||||
|
- When you need to look into the source code to figure out how to use an API consider adding a short summary to the header file for future reference
|
||||||
|
- When you notice incorrect or outdated documentation, please update it
|
||||||
|
|
||||||
|
# Resources
|
||||||
|
|
||||||
|
The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects:
|
||||||
|
|
||||||
|
https://github.com/ggerganov/llama.cpp/projects
|
2
LICENSE
2
LICENSE
|
@ -1,6 +1,6 @@
|
||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2023 Georgi Gerganov
|
Copyright (c) 2023-2024 The ggml authors
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|
|
@ -13,34 +13,7 @@ let package = Package(
|
||||||
products: [
|
products: [
|
||||||
.library(name: "llama", targets: ["llama"]),
|
.library(name: "llama", targets: ["llama"]),
|
||||||
],
|
],
|
||||||
dependencies: [
|
|
||||||
.package(url: "https://github.com/ggerganov/ggml.git", .branch("release"))
|
|
||||||
],
|
|
||||||
targets: [
|
targets: [
|
||||||
.target(
|
.systemLibrary(name: "llama", pkgConfig: "llama"),
|
||||||
name: "llama",
|
]
|
||||||
dependencies: ["ggml"],
|
|
||||||
path: ".",
|
|
||||||
exclude: ["ggml-metal.metal"],
|
|
||||||
sources: [
|
|
||||||
"llama.cpp",
|
|
||||||
],
|
|
||||||
publicHeadersPath: "spm-headers",
|
|
||||||
cSettings: [
|
|
||||||
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
|
||||||
.define("GGML_USE_ACCELERATE"),
|
|
||||||
.unsafeFlags(["-fno-objc-arc"]),
|
|
||||||
.define("GGML_USE_METAL"),
|
|
||||||
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
|
||||||
// We should consider add this in the future when we drop support for iOS 14
|
|
||||||
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
|
||||||
// .define("ACCELERATE_NEW_LAPACK"),
|
|
||||||
// .define("ACCELERATE_LAPACK_ILP64")
|
|
||||||
],
|
|
||||||
linkerSettings: [
|
|
||||||
.linkedFramework("Accelerate")
|
|
||||||
]
|
|
||||||
)
|
|
||||||
],
|
|
||||||
cxxLanguageStandard: .cxx11
|
|
||||||
)
|
)
|
||||||
|
|
426
README-sycl.md
426
README-sycl.md
|
@ -1,426 +0,0 @@
|
||||||
# llama.cpp for SYCL
|
|
||||||
|
|
||||||
[Background](#background)
|
|
||||||
|
|
||||||
[OS](#os)
|
|
||||||
|
|
||||||
[Intel GPU](#intel-gpu)
|
|
||||||
|
|
||||||
[Linux](#linux)
|
|
||||||
|
|
||||||
[Windows](#windows)
|
|
||||||
|
|
||||||
[Environment Variable](#environment-variable)
|
|
||||||
|
|
||||||
[Known Issue](#known-issue)
|
|
||||||
|
|
||||||
[Q&A](#q&a)
|
|
||||||
|
|
||||||
[Todo](#todo)
|
|
||||||
|
|
||||||
## Background
|
|
||||||
|
|
||||||
SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators—such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
|
|
||||||
|
|
||||||
oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
|
|
||||||
|
|
||||||
Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
|
|
||||||
|
|
||||||
To avoid to re-invent the wheel, this code refer other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
|
|
||||||
|
|
||||||
The llama.cpp for SYCL is used to support Intel GPUs.
|
|
||||||
|
|
||||||
For Intel CPU, recommend to use llama.cpp for X86 (Intel MKL building).
|
|
||||||
|
|
||||||
## OS
|
|
||||||
|
|
||||||
|OS|Status|Verified|
|
|
||||||
|-|-|-|
|
|
||||||
|Linux|Support|Ubuntu 22.04|
|
|
||||||
|Windows|Support|Windows 11|
|
|
||||||
|
|
||||||
|
|
||||||
## Intel GPU
|
|
||||||
|
|
||||||
|Intel GPU| Status | Verified Model|
|
|
||||||
|-|-|-|
|
|
||||||
|Intel Data Center Max Series| Support| Max 1550|
|
|
||||||
|Intel Data Center Flex Series| Support| Flex 170|
|
|
||||||
|Intel Arc Series| Support| Arc 770, 730M|
|
|
||||||
|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
|
|
||||||
|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
|
|
||||||
|
|
||||||
|
|
||||||
## Linux
|
|
||||||
|
|
||||||
### Setup Environment
|
|
||||||
|
|
||||||
1. Install Intel GPU driver.
|
|
||||||
|
|
||||||
a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
|
|
||||||
|
|
||||||
Note: for iGPU, please install the client GPU driver.
|
|
||||||
|
|
||||||
b. Add user to group: video, render.
|
|
||||||
|
|
||||||
```
|
|
||||||
sudo usermod -aG render username
|
|
||||||
sudo usermod -aG video username
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: re-login to enable it.
|
|
||||||
|
|
||||||
c. Check
|
|
||||||
|
|
||||||
```
|
|
||||||
sudo apt install clinfo
|
|
||||||
sudo clinfo -l
|
|
||||||
```
|
|
||||||
|
|
||||||
Output (example):
|
|
||||||
|
|
||||||
```
|
|
||||||
Platform #0: Intel(R) OpenCL Graphics
|
|
||||||
`-- Device #0: Intel(R) Arc(TM) A770 Graphics
|
|
||||||
|
|
||||||
|
|
||||||
Platform #0: Intel(R) OpenCL HD Graphics
|
|
||||||
`-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Install Intel® oneAPI Base toolkit.
|
|
||||||
|
|
||||||
|
|
||||||
a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
|
|
||||||
|
|
||||||
Recommend to install to default folder: **/opt/intel/oneapi**.
|
|
||||||
|
|
||||||
Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
|
|
||||||
|
|
||||||
b. Check
|
|
||||||
|
|
||||||
```
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
|
|
||||||
sycl-ls
|
|
||||||
```
|
|
||||||
|
|
||||||
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
|
|
||||||
|
|
||||||
Output (example):
|
|
||||||
```
|
|
||||||
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
|
||||||
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
|
||||||
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
|
||||||
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Build locally:
|
|
||||||
|
|
||||||
```
|
|
||||||
mkdir -p build
|
|
||||||
cd build
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
|
|
||||||
#for FP16
|
|
||||||
#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
|
|
||||||
|
|
||||||
#for FP32
|
|
||||||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
|
||||||
|
|
||||||
#build example/main only
|
|
||||||
#cmake --build . --config Release --target main
|
|
||||||
|
|
||||||
#build all binary
|
|
||||||
cmake --build . --config Release -v
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
```
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
```
|
|
||||||
./examples/sycl/build.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
Note:
|
|
||||||
|
|
||||||
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
|
|
||||||
|
|
||||||
### Run
|
|
||||||
|
|
||||||
1. Put model file to folder **models**
|
|
||||||
|
|
||||||
2. Enable oneAPI running environment
|
|
||||||
|
|
||||||
```
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
3. List device ID
|
|
||||||
|
|
||||||
Run without parameter:
|
|
||||||
|
|
||||||
```
|
|
||||||
./build/bin/ls-sycl-device
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
./build/bin/main
|
|
||||||
```
|
|
||||||
|
|
||||||
Check the ID in startup log, like:
|
|
||||||
|
|
||||||
```
|
|
||||||
found 4 SYCL devices:
|
|
||||||
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
|
||||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
|
||||||
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
|
||||||
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
|
||||||
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
|
||||||
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
|
||||||
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
|
||||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|Attribute|Note|
|
|
||||||
|-|-|
|
|
||||||
|compute capability 1.3|Level-zero running time, recommended |
|
|
||||||
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
|
||||||
|
|
||||||
4. Set device ID and execute llama.cpp
|
|
||||||
|
|
||||||
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
|
|
||||||
|
|
||||||
```
|
|
||||||
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33
|
|
||||||
```
|
|
||||||
or run by script:
|
|
||||||
|
|
||||||
```
|
|
||||||
./examples/sycl/run-llama2.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
Note:
|
|
||||||
|
|
||||||
- By default, mmap is used to read model file. In some cases, it leads to the hang issue. Recommend to use parameter **--no-mmap** to disable mmap() to skip this issue.
|
|
||||||
|
|
||||||
|
|
||||||
5. Check the device ID in output
|
|
||||||
|
|
||||||
Like:
|
|
||||||
```
|
|
||||||
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
|
||||||
```
|
|
||||||
|
|
||||||
## Windows
|
|
||||||
|
|
||||||
### Setup Environment
|
|
||||||
|
|
||||||
1. Install Intel GPU driver.
|
|
||||||
|
|
||||||
Please install Intel GPU driver by official guide: [Install GPU Drivers](https://www.intel.com/content/www/us/en/products/docs/discrete-gpus/arc/software/drivers.html).
|
|
||||||
|
|
||||||
2. Install Intel® oneAPI Base toolkit.
|
|
||||||
|
|
||||||
a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
|
|
||||||
|
|
||||||
Recommend to install to default folder: **/opt/intel/oneapi**.
|
|
||||||
|
|
||||||
Following guide uses the default folder as example. If you use other folder, please modify the following guide info with your folder.
|
|
||||||
|
|
||||||
b. Enable oneAPI running environment:
|
|
||||||
|
|
||||||
- In Search, input 'oneAPI'.
|
|
||||||
|
|
||||||
Search & open "Intel oneAPI command prompt for Intel 64 for Visual Studio 2022"
|
|
||||||
|
|
||||||
- In Run:
|
|
||||||
|
|
||||||
In CMD:
|
|
||||||
```
|
|
||||||
"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64
|
|
||||||
```
|
|
||||||
|
|
||||||
c. Check GPU
|
|
||||||
|
|
||||||
In oneAPI command line:
|
|
||||||
|
|
||||||
```
|
|
||||||
sycl-ls
|
|
||||||
```
|
|
||||||
|
|
||||||
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
|
|
||||||
|
|
||||||
Output (example):
|
|
||||||
```
|
|
||||||
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
|
||||||
[opencl:cpu:1] Intel(R) OpenCL, 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
|
||||||
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Iris(R) Xe Graphics OpenCL 3.0 NEO [31.0.101.5186]
|
|
||||||
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Iris(R) Xe Graphics 1.3 [1.3.28044]
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Install cmake & make
|
|
||||||
|
|
||||||
a. Download & install cmake for windows: https://cmake.org/download/
|
|
||||||
|
|
||||||
b. Download & install make for windows provided by mingw-w64: https://www.mingw-w64.org/downloads/
|
|
||||||
|
|
||||||
|
|
||||||
### Build locally:
|
|
||||||
|
|
||||||
In oneAPI command line window:
|
|
||||||
|
|
||||||
```
|
|
||||||
mkdir -p build
|
|
||||||
cd build
|
|
||||||
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
|
||||||
|
|
||||||
:: for FP16
|
|
||||||
:: faster for long-prompt inference
|
|
||||||
:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
|
|
||||||
|
|
||||||
:: for FP32
|
|
||||||
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
|
|
||||||
|
|
||||||
|
|
||||||
:: build example/main only
|
|
||||||
:: make main
|
|
||||||
|
|
||||||
:: build all binary
|
|
||||||
make -j
|
|
||||||
cd ..
|
|
||||||
```
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
```
|
|
||||||
.\examples\sycl\win-build-sycl.bat
|
|
||||||
```
|
|
||||||
|
|
||||||
Note:
|
|
||||||
|
|
||||||
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
|
|
||||||
|
|
||||||
### Run
|
|
||||||
|
|
||||||
1. Put model file to folder **models**
|
|
||||||
|
|
||||||
2. Enable oneAPI running environment
|
|
||||||
|
|
||||||
- In Search, input 'oneAPI'.
|
|
||||||
|
|
||||||
Search & open "Intel oneAPI command prompt for Intel 64 for Visual Studio 2022"
|
|
||||||
|
|
||||||
- In Run:
|
|
||||||
|
|
||||||
In CMD:
|
|
||||||
```
|
|
||||||
"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64
|
|
||||||
```
|
|
||||||
|
|
||||||
3. List device ID
|
|
||||||
|
|
||||||
Run without parameter:
|
|
||||||
|
|
||||||
```
|
|
||||||
build\bin\ls-sycl-device.exe
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
build\bin\main.exe
|
|
||||||
```
|
|
||||||
|
|
||||||
Check the ID in startup log, like:
|
|
||||||
|
|
||||||
```
|
|
||||||
found 4 SYCL devices:
|
|
||||||
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
|
||||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
|
||||||
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
|
||||||
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
|
||||||
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
|
||||||
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
|
||||||
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
|
||||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|Attribute|Note|
|
|
||||||
|-|-|
|
|
||||||
|compute capability 1.3|Level-zero running time, recommended |
|
|
||||||
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
|
||||||
|
|
||||||
4. Set device ID and execute llama.cpp
|
|
||||||
|
|
||||||
Set device ID = 0 by **set GGML_SYCL_DEVICE=0**
|
|
||||||
|
|
||||||
```
|
|
||||||
set GGML_SYCL_DEVICE=0
|
|
||||||
build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0
|
|
||||||
```
|
|
||||||
or run by script:
|
|
||||||
|
|
||||||
```
|
|
||||||
.\examples\sycl\win-run-llama2.bat
|
|
||||||
```
|
|
||||||
|
|
||||||
Note:
|
|
||||||
|
|
||||||
- By default, mmap is used to read model file. In some cases, it leads to the hang issue. Recommend to use parameter **--no-mmap** to disable mmap() to skip this issue.
|
|
||||||
|
|
||||||
|
|
||||||
5. Check the device ID in output
|
|
||||||
|
|
||||||
Like:
|
|
||||||
```
|
|
||||||
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
|
||||||
```
|
|
||||||
|
|
||||||
## Environment Variable
|
|
||||||
|
|
||||||
#### Build
|
|
||||||
|
|
||||||
|Name|Value|Function|
|
|
||||||
|-|-|-|
|
|
||||||
|LLAMA_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, LLAMA_SYCL=ON is mandatory.|
|
|
||||||
|LLAMA_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path. Faster for long-prompt inference. <br>For FP32, not set it.|
|
|
||||||
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
|
|
||||||
|CMAKE_CXX_COMPILER|icpx (Linux), icx (Windows)|use icpx/icx for SYCL code path|
|
|
||||||
|
|
||||||
#### Running
|
|
||||||
|
|
||||||
|
|
||||||
|Name|Value|Function|
|
|
||||||
|-|-|-|
|
|
||||||
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
|
||||||
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
|
||||||
|
|
||||||
## Known Issue
|
|
||||||
|
|
||||||
- Hang during startup
|
|
||||||
|
|
||||||
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
|
|
||||||
|
|
||||||
Solution: add **--no-mmap**.
|
|
||||||
|
|
||||||
## Q&A
|
|
||||||
|
|
||||||
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
|
||||||
|
|
||||||
Miss to enable oneAPI running environment.
|
|
||||||
|
|
||||||
Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
|
|
||||||
|
|
||||||
- In Windows, no result, not error.
|
|
||||||
|
|
||||||
Miss to enable oneAPI running environment.
|
|
||||||
|
|
||||||
## Todo
|
|
||||||
|
|
||||||
- Support to build in Windows.
|
|
||||||
|
|
||||||
- Support multiple cards.
|
|
67
SECURITY.md
Normal file
67
SECURITY.md
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
# Security Policy
|
||||||
|
|
||||||
|
- [**Using llama.cpp securely**](#using-llamacpp-securely)
|
||||||
|
- [Untrusted models](#untrusted-models)
|
||||||
|
- [Untrusted inputs](#untrusted-inputs)
|
||||||
|
- [Data privacy](#data-privacy)
|
||||||
|
- [Untrusted environments or networks](#untrusted-environments-or-networks)
|
||||||
|
- [Multi-Tenant environments](#multi-tenant-environments)
|
||||||
|
- [**Reporting a vulnerability**](#reporting-a-vulnerability)
|
||||||
|
|
||||||
|
## Using llama.cpp securely
|
||||||
|
|
||||||
|
### Untrusted models
|
||||||
|
Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources.
|
||||||
|
|
||||||
|
*Always execute untrusted models within a secure, isolated environment such as a sandbox* (e.g., containers, virtual machines). This helps protect your system from potentially malicious code.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance.
|
||||||
|
|
||||||
|
### Untrusted inputs
|
||||||
|
|
||||||
|
Some models accept various input formats (text, images, audio, etc.). The libraries converting these inputs have varying security levels, so it's crucial to isolate the model and carefully pre-process inputs to mitigate script injection risks.
|
||||||
|
|
||||||
|
For maximum security when handling untrusted inputs, you may need to employ the following:
|
||||||
|
|
||||||
|
* Sandboxing: Isolate the environment where the inference happens.
|
||||||
|
* Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics.
|
||||||
|
* Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches.
|
||||||
|
* Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as:
|
||||||
|
* Validation: Enforce strict rules on allowed characters and data types.
|
||||||
|
* Filtering: Remove potentially malicious scripts or code fragments.
|
||||||
|
* Encoding: Convert special characters into safe representations.
|
||||||
|
* Verification: Run tooling that identifies potential script injections (e.g. [models that detect prompt injection attempts](https://python.langchain.com/docs/guides/safety/hugging_face_prompt_injection)).
|
||||||
|
|
||||||
|
### Data privacy
|
||||||
|
|
||||||
|
To protect sensitive data from potential leaks or unauthorized access, it is crucial to sandbox the model execution. This means running the model in a secure, isolated environment, which helps mitigate many attack vectors.
|
||||||
|
|
||||||
|
### Untrusted environments or networks
|
||||||
|
|
||||||
|
If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions:
|
||||||
|
* Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value
|
||||||
|
* Encrypt your data if sending it over the network.
|
||||||
|
|
||||||
|
### Multi-Tenant environments
|
||||||
|
|
||||||
|
If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks.
|
||||||
|
|
||||||
|
1. Tenant Isolation: Models should run separately with strong isolation methods to prevent unwanted data access. Separating networks is crucial for isolation, as it prevents unauthorized access to data or models and malicious users from sending graphs to execute under another tenant's identity.
|
||||||
|
|
||||||
|
2. Resource Allocation: A denial of service caused by one model can impact the overall system health. Implement safeguards like rate limits, access controls, and health monitoring.
|
||||||
|
|
||||||
|
3. Model Sharing: In a multitenant model sharing design, tenants and users must understand the security risks of running code provided by others. Since there are no reliable methods to detect malicious models, sandboxing the model execution is the recommended approach to mitigate the risk.
|
||||||
|
|
||||||
|
4. Hardware Attacks: GPUs or TPUs can also be attacked. [Researches](https://scholar.google.com/scholar?q=gpu+side+channel) has shown that side channel attacks on GPUs are possible, which can make data leak from other models or processes running on the same system at the same time.
|
||||||
|
|
||||||
|
## Reporting a vulnerability
|
||||||
|
|
||||||
|
Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp-securely) are considered vulnerabilities of LLaMA C++.
|
||||||
|
|
||||||
|
<!-- normal version -->
|
||||||
|
However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
|
||||||
|
|
||||||
|
Please disclose it as a private [security advisory](https://github.com/ggerganov/llama.cpp/security/advisories/new).
|
||||||
|
|
||||||
|
A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
|
40
SHA256SUMS
40
SHA256SUMS
|
@ -1,40 +0,0 @@
|
||||||
700df0d3013b703a806d2ae7f1bfb8e59814e3d06ae78be0c66368a50059f33d models/7B/consolidated.00.pth
|
|
||||||
666a4bb533b303bdaf89e1b6a3b6f93535d868de31d903afdc20983dc526c847 models/7B/ggml-model-f16.bin
|
|
||||||
ec2f2d1f0dfb73b72a4cbac7fa121abbe04c37ab327125a38248f930c0f09ddf models/7B/ggml-model-q4_0.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q4_1.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q5_0.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q5_1.bin
|
|
||||||
7e89e242ddc0dd6f060b43ca219ce8b3e8f08959a72cb3c0855df8bb04d46265 models/7B/params.json
|
|
||||||
745bf4e29a4dd6f411e72976d92b452da1b49168a4f41c951cfcc8051823cf08 models/13B/consolidated.00.pth
|
|
||||||
d5ccbcc465c71c0de439a5aeffebe8344c68a519bce70bc7f9f92654ee567085 models/13B/consolidated.01.pth
|
|
||||||
2b206e9b21fb1076f11cafc624e2af97c9e48ea09312a0962153acc20d45f808 models/13B/ggml-model-f16.bin
|
|
||||||
fad169e6f0f575402cf75945961cb4a8ecd824ba4da6be2af831f320c4348fa5 models/13B/ggml-model-q4_0.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q4_1.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q5_0.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q5_1.bin
|
|
||||||
4ab77bec4d4405ccb66a97b282574c89a94417e3c32e5f68f37e2876fc21322f models/13B/params.json
|
|
||||||
e23294a58552d8cdec5b7e8abb87993b97ea6eced4178ff2697c02472539d067 models/30B/consolidated.00.pth
|
|
||||||
4e077b7136c7ae2302e954860cf64930458d3076fcde9443f4d0e939e95903ff models/30B/consolidated.01.pth
|
|
||||||
24a87f01028cbd3a12de551dcedb712346c0b5cbdeff1454e0ddf2df9b675378 models/30B/consolidated.02.pth
|
|
||||||
1adfcef71420886119544949767f6a56cb6339b4d5fcde755d80fe68b49de93b models/30B/consolidated.03.pth
|
|
||||||
7e1b524061a9f4b27c22a12d6d2a5bf13b8ebbea73e99f218809351ed9cf7d37 models/30B/ggml-model-f16.bin
|
|
||||||
d2a441403944819492ec8c2002cc36fa38468149bfb4b7b4c52afc7bd9a7166d models/30B/ggml-model-q4_0.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q4_1.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q5_0.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q5_1.bin
|
|
||||||
2c07118ea98d69dbe7810d88520e30288fa994751b337f8fca02b171955f44cb models/30B/params.json
|
|
||||||
135c563f6b3938114458183afb01adc9a63bef3d8ff7cccc3977e5d3664ecafe models/65B/consolidated.00.pth
|
|
||||||
9a600b37b19d38c7e43809485f70d17d1dc12206c07efa83bc72bb498a568bde models/65B/consolidated.01.pth
|
|
||||||
e7babf7c5606f165a3756f527cb0fedc4f83e67ef1290391e52fb1cce5f26770 models/65B/consolidated.02.pth
|
|
||||||
73176ffb426b40482f2aa67ae1217ef79fbbd1fff5482bae5060cdc5a24ab70e models/65B/consolidated.03.pth
|
|
||||||
882e6431d0b08a8bc66261a0d3607da21cbaeafa96a24e7e59777632dbdac225 models/65B/consolidated.04.pth
|
|
||||||
a287c0dfe49081626567c7fe87f74cce5831f58e459b427b5e05567641f47b78 models/65B/consolidated.05.pth
|
|
||||||
72b4eba67a1a3b18cb67a85b70f8f1640caae9b40033ea943fb166bd80a7b36b models/65B/consolidated.06.pth
|
|
||||||
d27f5b0677d7ff129ceacd73fd461c4d06910ad7787cf217b249948c3f3bc638 models/65B/consolidated.07.pth
|
|
||||||
60758f2384d74e423dffddfd020ffed9d3bb186ebc54506f9c4a787d0f5367b0 models/65B/ggml-model-f16.bin
|
|
||||||
cde053439fa4910ae454407e2717cc46cc2c2b4995c00c93297a2b52e790fa92 models/65B/ggml-model-q4_0.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q4_1.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q5_0.bin
|
|
||||||
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q5_1.bin
|
|
||||||
999ed1659b469ccc2a941714c0a9656fa571d17c9f7c8c7589817ca90edef51b models/65B/params.json
|
|
||||||
9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 models/tokenizer.model
|
|
4
Sources/llama/llama.h
Normal file
4
Sources/llama/llama.h
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <llama.h>
|
||||||
|
|
5
Sources/llama/module.modulemap
Normal file
5
Sources/llama/module.modulemap
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
module llama [system] {
|
||||||
|
header "llama.h"
|
||||||
|
link "llama"
|
||||||
|
export *
|
||||||
|
}
|
116
awq-py/README.md
116
awq-py/README.md
|
@ -1,116 +0,0 @@
|
||||||
# AWQ: Activation-aware Weight Quantization for LLM - version apply to llamacpp
|
|
||||||
[[Paper](https://arxiv.org/abs/2306.00978)][[Original Repo](https://github.com/mit-han-lab/llm-awq)][[Easy-to-use Repo](https://github.com/casper-hansen/AutoAWQ)]
|
|
||||||
|
|
||||||
**Supported models:**
|
|
||||||
|
|
||||||
- [X] LLaMA
|
|
||||||
- [x] LLaMA 2
|
|
||||||
- [X] MPT
|
|
||||||
- [X] Mistral AI v0.1
|
|
||||||
- [ ] Bloom
|
|
||||||
- [ ] Mixtral MoE
|
|
||||||
|
|
||||||
**TODO:**
|
|
||||||
- [x] Update version work with both MPT and MPT-AWQ model
|
|
||||||
- [ ] Add OPT model
|
|
||||||
- [ ] Add Bloom model
|
|
||||||
- [ ] Add Mixtral MoE
|
|
||||||
- [ ] Support w3, w2
|
|
||||||
|
|
||||||
|
|
||||||
## Contents
|
|
||||||
|
|
||||||
- [Install](##Install)
|
|
||||||
- [Convert](##Convert)
|
|
||||||
- [Quantize](##Quantize)
|
|
||||||
- [Test](##Test)
|
|
||||||
- [Benchmark](##Benchmark)
|
|
||||||
- [Results](##Results)
|
|
||||||
|
|
||||||
## Install
|
|
||||||
Install requirements
|
|
||||||
```bash
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
Get the pre-computed AWQ search results for multiple model families, including LLaMA, LLaMA2, MPT, OPT
|
|
||||||
```bash
|
|
||||||
git clone https://huggingface.co/datasets/mit-han-lab/awq-model-zoo awq_cache
|
|
||||||
```
|
|
||||||
|
|
||||||
## Convert
|
|
||||||
Example for llama model
|
|
||||||
```bash
|
|
||||||
# For llama7b and llama2 models
|
|
||||||
python convert.py models/llama-7b/ --awq-path awq_cache/llama-7b-w4-g128.pt --outfile models/llama_7b_fp16.gguf
|
|
||||||
# For mistral and mpt models
|
|
||||||
python convert-hf-to-gguf.py models/mpt-7b/ --awq-path awq_cache/mpt-7b-w4-g128.pt --outfile models/mpt_7b_fp16.gguf
|
|
||||||
```
|
|
||||||
|
|
||||||
## Quantize
|
|
||||||
```bash
|
|
||||||
# We only benchmark and confirm the results on q4_0, q4_1, and q2_k types.
|
|
||||||
./quantize models/llama_7b_fp16.gguf models/llama_7b_q4_0.gguf q4_0
|
|
||||||
```
|
|
||||||
|
|
||||||
## Test
|
|
||||||
```bash
|
|
||||||
# For all models.
|
|
||||||
./build/bin/main -m models/llama_7b_q4_0.gguf -n 128 --prompt "Once upon a time"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Benchmark
|
|
||||||
The perplexity measurements in table above are done against the `wikitext2` test dataset (https://paperswithcode.com/dataset/wikitext-2), with context length of 512.
|
|
||||||
```bash
|
|
||||||
# For llama and llama2, and mistral models.
|
|
||||||
./perplexity -m models/llama_7b_q4_0.gguf -f datasets/wikitext-2-raw/wiki.test.raw
|
|
||||||
```
|
|
||||||
|
|
||||||
## Results
|
|
||||||
Results are run on OpenBLAS (CPU) and CuBLAS (GPU) for fair comparison
|
|
||||||
We use three types of llamacpp quantization methods to work with our version, including q4_0, q4_1, and q2_k
|
|
||||||
|
|
||||||
### Llama 7B (Build with OpenBLAS)
|
|
||||||
|
|
||||||
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
|
||||||
|-----------:|--------------|-------:|-------:|-------:|-------:|
|
|
||||||
|Llama 7B | perplexity | 5.9066 | 6.1214 | 6.0643 | 6.5808 |
|
|
||||||
|Llama 7B | file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
|
||||||
|Llama 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
||||||
|AWQ-LLama 7B| perplexity | 5.9175 | 6.0252 | 5.9987 | 6.3692 |
|
|
||||||
|AWQ-LLama 7B| file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
|
||||||
|AWQ-LLama 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
||||||
|
|
||||||
|
|
||||||
### Llama2 7B (Build with CuBLAS)
|
|
||||||
|
|
||||||
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
|
||||||
|------------:|--------------|-------:|-------:|-------:|-------:|
|
|
||||||
|Llama2 7B | perplexity | 5.8664 | 6.0260 | 6.0656 | 6.4496 |
|
|
||||||
|Llama2 7B | file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
|
||||||
|Llama2 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
||||||
|AWQ-LLama2 7B| perplexity | 5.8801 | 6.0054 | 5.9849 | 6.3650 |
|
|
||||||
|AWQ-LLama2 7B| file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
|
||||||
|AWQ-LLama2 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
||||||
|
|
||||||
|
|
||||||
### Mistral 7B v0.1 (Build with CuBLAS)
|
|
||||||
|
|
||||||
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
|
||||||
|-------------:|--------------|-------:|-------:|-------:|-------:|
|
|
||||||
|Mistral 7B | perplexity | 5.6931 | 5.8202 | 5.8268 | 6.1645 |
|
|
||||||
|Mistral 7B | file size | 14.5G | 4.1G | 4.5G | 3.1G |
|
|
||||||
|Mistral 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
||||||
|AWQ-Mistral 7B| perplexity | 5.6934 | 5.8020 | 5.7691 | 6.0426 |
|
|
||||||
|AWQ-Mistral 7B| file size | 14.5G | 4.1G | 4.5G | 3.1G |
|
|
||||||
|AWQ-Mistral 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
||||||
|
|
||||||
### MPT 7B (Build with OpenBLAS)
|
|
||||||
|
|
||||||
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
|
||||||
|---------:|--------------|-------:|-------:|-------:|--------:|
|
|
||||||
|MPT 7B | perplexity | 8.4369 | 8.7956 | 8.6265 | 11.4913 |
|
|
||||||
|MPT 7B | file size | 13.7G | 3.9G | 4.3G | 2.8G |
|
|
||||||
|MPT 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
||||||
|AWQ-MPT 7B| perplexity | 8.4944 | 8.7053 | 8.6750 | 10.2873|
|
|
||||||
|AWQ-MPT 7B| file size | 13.7G | 3.9G | 4.3G | 2.8G |
|
|
||||||
|AWQ-MPT 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
|
|
@ -1,254 +0,0 @@
|
||||||
"""
|
|
||||||
Implements the AWQ for llama.cpp use cases.
|
|
||||||
Original paper: https://arxiv.org/abs/2306.00978
|
|
||||||
|
|
||||||
This code is based on versions of the AWQ implementation found in the following repositories:
|
|
||||||
* https://github.com/mit-han-lab/llm-awq
|
|
||||||
* https://github.com/casper-hansen/AutoAWQ
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
|
|
||||||
from transformers import AutoModelForCausalLM, AutoConfig
|
|
||||||
from transformers.models.bloom.modeling_bloom import BloomGelu
|
|
||||||
from transformers.models.llama.modeling_llama import LlamaRMSNorm
|
|
||||||
from transformers.activations import GELUActivation
|
|
||||||
|
|
||||||
|
|
||||||
class ScaledActivation(nn.Module):
|
|
||||||
"""
|
|
||||||
ScaledActivation module wraps an existing activation function and applies a
|
|
||||||
scale factor to its output.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
module (nn.Module): The activation function to be scaled.
|
|
||||||
scales (torch.Tensor): A tensor of size (num_features,) containing the initial
|
|
||||||
scale factors for each feature.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
torch.Tensor: The scaled output of the activation function.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, module, scales):
|
|
||||||
super().__init__()
|
|
||||||
self.act = module
|
|
||||||
self.scales = nn.Parameter(scales.data)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return self.act(x) / self.scales.view(1, 1, -1).to(x.device)
|
|
||||||
|
|
||||||
|
|
||||||
def set_op_by_name(layer, name, new_module):
|
|
||||||
"""
|
|
||||||
Set the new module for given module's name.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
layer (nn.Module): The layer in which to replace the submodule.
|
|
||||||
name (str): The path to the submodule to be replaced, using dot notation
|
|
||||||
to access nested modules.
|
|
||||||
new_module (nn.Module): The new module to replace the existing one.
|
|
||||||
"""
|
|
||||||
levels = name.split(".")
|
|
||||||
if len(levels) > 1:
|
|
||||||
mod_ = layer
|
|
||||||
for l_idx in range(len(levels) - 1):
|
|
||||||
if levels[l_idx].isdigit():
|
|
||||||
mod_ = mod_[int(levels[l_idx])]
|
|
||||||
else:
|
|
||||||
mod_ = getattr(mod_, levels[l_idx])
|
|
||||||
setattr(mod_, levels[-1], new_module)
|
|
||||||
else:
|
|
||||||
setattr(layer, name, new_module)
|
|
||||||
|
|
||||||
|
|
||||||
def get_op_by_name(module, op_name):
|
|
||||||
"""
|
|
||||||
Retrieves a submodule within a given layer based on its name.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
module (nn.Module): The layer containing the submodule to find.
|
|
||||||
op_name (str): The name of the submodule.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
nn.Module: The requested submodule found within the given layer.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If the specified submodule cannot be found within the layer.
|
|
||||||
"""
|
|
||||||
for name, m in module.named_modules():
|
|
||||||
if name == op_name:
|
|
||||||
return m
|
|
||||||
raise ValueError(f"Cannot find op {op_name} in module {module}")
|
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
|
||||||
def scale_ln_fcs(ln, fcs, scales):
|
|
||||||
"""
|
|
||||||
Scales the weights of a LayerNorm and a list of fully-connected layers proportionally.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
ln (nn.LayerNorm): The LayerNorm module to be scaled.
|
|
||||||
fcs (List[nn.Linear]): A list of fully-connected layers to be scaled.
|
|
||||||
scales (torch.Tensor): A 1D tensor of size (num_features,).
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not isinstance(fcs, list):
|
|
||||||
fcs = [fcs]
|
|
||||||
|
|
||||||
scales = scales.to(ln.weight.device)
|
|
||||||
|
|
||||||
ln.weight.div_(scales)
|
|
||||||
if hasattr(ln, "bias") and ln.bias is not None:
|
|
||||||
ln.bias.div_(scales)
|
|
||||||
|
|
||||||
for fc in fcs:
|
|
||||||
fc.weight.mul_(scales.view(1, -1))
|
|
||||||
|
|
||||||
for p in ln.parameters():
|
|
||||||
assert torch.isnan(p).sum() == 0
|
|
||||||
for fc in fcs:
|
|
||||||
for p in fc.parameters():
|
|
||||||
assert torch.isnan(p).sum() == 0
|
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
|
||||||
def scale_fc_fc(fc1, fc2, scales):
|
|
||||||
"""
|
|
||||||
Scales the weights of two fully-connected layers in a specific pattern.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
fc1 (nn.Linear): The first fully-connected layer to be scaled.
|
|
||||||
fc2 (nn.Linear): The second fully-connected layer to be scaled.
|
|
||||||
scales (torch.Tensor): A 1D tensor of size (num_features,).
|
|
||||||
"""
|
|
||||||
assert isinstance(fc1, nn.Linear)
|
|
||||||
assert isinstance(fc2, nn.Linear)
|
|
||||||
|
|
||||||
scales = scales.to(fc1.weight.device)
|
|
||||||
|
|
||||||
fc1.weight[-scales.size(0):].div_(scales.view(-1, 1))
|
|
||||||
if fc1.bias is not None:
|
|
||||||
fc1.bias.div_(scales.view(-1))
|
|
||||||
|
|
||||||
fc2.weight.mul_(scales.view(1, -1))
|
|
||||||
|
|
||||||
for p in fc1.parameters():
|
|
||||||
assert torch.isnan(p).sum() == 0
|
|
||||||
for p in fc2.parameters():
|
|
||||||
assert torch.isnan(p).sum() == 0
|
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
|
||||||
def scale_gelu_fc(gelu, fc, scales):
|
|
||||||
"""
|
|
||||||
Scales the weight of a GELU activation and a fully-connected layer proportionally.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
gelu (Union[nn.GELU, BloomGelu, GELUActivation]): The GELU activation module to be scaled.
|
|
||||||
fc (nn.Linear): The fully-connected layer to be scaled.
|
|
||||||
scales (torch.Tensor): A 1D tensor of size (num_features,).
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
TypeError: If the `gelu` module is not of type `nn.GELU`, `BloomGelu`, or `GELUActivation`.
|
|
||||||
TypeError: If the `fc` module is not of type `nn.Linear`.
|
|
||||||
"""
|
|
||||||
assert isinstance(gelu, (nn.GELU, BloomGelu, GELUActivation))
|
|
||||||
assert isinstance(fc, nn.Linear)
|
|
||||||
|
|
||||||
fc.weight.mul_(scales.view(1, -1).to(fc.weight.device))
|
|
||||||
|
|
||||||
for p in fc.parameters():
|
|
||||||
assert torch.isnan(p).sum() == 0
|
|
||||||
|
|
||||||
|
|
||||||
def apply_scale(module, scales_list, input_feat_dict=None):
|
|
||||||
"""
|
|
||||||
Applies different scaling strategies to layers based on their type and hierarchy within a given module.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
module (nn.Module): The module containing the layers to be scaled.
|
|
||||||
scales_list (List[Tuple[str, List[str], torch.Tensor]]): A list of tuples containing:
|
|
||||||
* prev_op_name (str): The name of the preceding operation or module,
|
|
||||||
relative to which the layers to be scaled are located.
|
|
||||||
* layer_names (List[str]): A list of names of the layers to be scaled, relative to the preceding operation.
|
|
||||||
* scales (torch.Tensor): A 1D tensor of size (num_features,) containing the scaling factors for each feature.
|
|
||||||
input_feat_dict (Optional[Dict[str, torch.Tensor]]): A dictionary mapping layer names to their corresponding
|
|
||||||
input features (optional).
|
|
||||||
"""
|
|
||||||
for prev_op_name, layer_names, scales in scales_list:
|
|
||||||
prev_op = get_op_by_name(module, prev_op_name)
|
|
||||||
layers = [get_op_by_name(module, name) for name in layer_names]
|
|
||||||
|
|
||||||
prev_op.cuda()
|
|
||||||
for layer in layers:
|
|
||||||
layer.cuda()
|
|
||||||
scales.cuda()
|
|
||||||
|
|
||||||
if isinstance(prev_op, nn.Linear):
|
|
||||||
assert len(layers) == 1
|
|
||||||
scale_fc_fc(prev_op, layers[0], scales)
|
|
||||||
elif isinstance(prev_op, (nn.LayerNorm, LlamaRMSNorm)) or "rmsnorm" in str(prev_op.__class__).lower():
|
|
||||||
scale_ln_fcs(prev_op, layers, scales)
|
|
||||||
elif isinstance(prev_op, (nn.GELU, BloomGelu, GELUActivation)):
|
|
||||||
new_module = ScaledActivation(prev_op, scales)
|
|
||||||
set_op_by_name(module, prev_op_name, new_module)
|
|
||||||
scale_gelu_fc(prev_op, layers[0], scales)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(f"prev_op {type(prev_op)} not supported yet!")
|
|
||||||
|
|
||||||
# apply the scaling to input feat if given; prepare it for clipping
|
|
||||||
if input_feat_dict is not None:
|
|
||||||
for layer_name in layer_names:
|
|
||||||
inp = input_feat_dict[layer_name]
|
|
||||||
inp.div_(scales.view(1, -1).to(inp.device))
|
|
||||||
|
|
||||||
prev_op.cpu()
|
|
||||||
for layer in layers:
|
|
||||||
layer.cpu()
|
|
||||||
scales.cpu()
|
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
|
||||||
def apply_clip(module, clip_list):
|
|
||||||
"""
|
|
||||||
Applies element-wise clipping to the weight of a specific layer within a given module.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
module (nn.Module): The module containing the layer to be clipped.
|
|
||||||
clip_list (List[Tuple[str, torch.Tensor]]): A list of tuples containing:
|
|
||||||
* name (str): The name of the layer to be clipped, relative to the root of the module.
|
|
||||||
* max_val (torch.Tensor): A 1D or 2D tensor defining the upper bound for each element of the layer's weight.
|
|
||||||
"""
|
|
||||||
for name, max_val in clip_list:
|
|
||||||
layer = get_op_by_name(module, name)
|
|
||||||
layer.cuda()
|
|
||||||
max_val = max_val.to(layer.weight.device)
|
|
||||||
org_shape = layer.weight.shape
|
|
||||||
layer.weight.data = layer.weight.data.reshape(*max_val.shape[:2], -1)
|
|
||||||
layer.weight.data = torch.clamp(layer.weight.data, -max_val, max_val)
|
|
||||||
layer.weight.data = layer.weight.data.reshape(org_shape)
|
|
||||||
layer.cpu()
|
|
||||||
|
|
||||||
|
|
||||||
def add_scale_weights(model_path, scale_path, tmp_path):
|
|
||||||
"""
|
|
||||||
Adds pre-computed Activation Weight Quantization (AWQ) results to a model,
|
|
||||||
including scaling factors and clipping bounds.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_path (str): Path to the pre-trained model to be equipped with AWQ.
|
|
||||||
scale_path (str): Path to the AWQ scale factors (.pt file).
|
|
||||||
tmp_path (str): Path to the temporary directory where the equipped model will be saved.
|
|
||||||
"""
|
|
||||||
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
|
||||||
model_path, config=config, trust_remote_code=True
|
|
||||||
)
|
|
||||||
model.eval()
|
|
||||||
awq_results = torch.load(str(scale_path), map_location="cpu")
|
|
||||||
apply_scale(model, awq_results["scale"])
|
|
||||||
apply_clip(model, awq_results["clip"])
|
|
||||||
model.save_pretrained(str(tmp_path))
|
|
||||||
os.system(f"cp {str(model_path)}/tokenizer* {str(tmp_path)}")
|
|
|
@ -1,2 +0,0 @@
|
||||||
torch>=2.1.1
|
|
||||||
transformers>=4.32.0
|
|
138
build.zig
138
build.zig
|
@ -1,138 +0,0 @@
|
||||||
// Compatible with Zig Version 0.11.0
|
|
||||||
const std = @import("std");
|
|
||||||
const ArrayList = std.ArrayList;
|
|
||||||
const Compile = std.Build.Step.Compile;
|
|
||||||
const ConfigHeader = std.Build.Step.ConfigHeader;
|
|
||||||
const Mode = std.builtin.Mode;
|
|
||||||
const CrossTarget = std.zig.CrossTarget;
|
|
||||||
|
|
||||||
const Maker = struct {
|
|
||||||
builder: *std.build.Builder,
|
|
||||||
target: CrossTarget,
|
|
||||||
optimize: Mode,
|
|
||||||
enable_lto: bool,
|
|
||||||
|
|
||||||
include_dirs: ArrayList([]const u8),
|
|
||||||
cflags: ArrayList([]const u8),
|
|
||||||
cxxflags: ArrayList([]const u8),
|
|
||||||
objs: ArrayList(*Compile),
|
|
||||||
|
|
||||||
fn addInclude(m: *Maker, dir: []const u8) !void {
|
|
||||||
try m.include_dirs.append(dir);
|
|
||||||
}
|
|
||||||
fn addProjectInclude(m: *Maker, path: []const []const u8) !void {
|
|
||||||
try m.addInclude(try m.builder.build_root.join(m.builder.allocator, path));
|
|
||||||
}
|
|
||||||
fn addCFlag(m: *Maker, flag: []const u8) !void {
|
|
||||||
try m.cflags.append(flag);
|
|
||||||
}
|
|
||||||
fn addCxxFlag(m: *Maker, flag: []const u8) !void {
|
|
||||||
try m.cxxflags.append(flag);
|
|
||||||
}
|
|
||||||
fn addFlag(m: *Maker, flag: []const u8) !void {
|
|
||||||
try m.addCFlag(flag);
|
|
||||||
try m.addCxxFlag(flag);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn init(builder: *std.build.Builder) !Maker {
|
|
||||||
const target = builder.standardTargetOptions(.{});
|
|
||||||
const zig_version = @import("builtin").zig_version_string;
|
|
||||||
const commit_hash = try std.ChildProcess.exec(
|
|
||||||
.{ .allocator = builder.allocator, .argv = &.{ "git", "rev-parse", "HEAD" } },
|
|
||||||
);
|
|
||||||
try std.fs.cwd().writeFile("common/build-info.cpp", builder.fmt(
|
|
||||||
\\int LLAMA_BUILD_NUMBER = {};
|
|
||||||
\\char const *LLAMA_COMMIT = "{s}";
|
|
||||||
\\char const *LLAMA_COMPILER = "Zig {s}";
|
|
||||||
\\char const *LLAMA_BUILD_TARGET = "{s}";
|
|
||||||
\\
|
|
||||||
, .{ 0, commit_hash.stdout[0 .. commit_hash.stdout.len - 1], zig_version, try target.allocDescription(builder.allocator) }));
|
|
||||||
var m = Maker{
|
|
||||||
.builder = builder,
|
|
||||||
.target = target,
|
|
||||||
.optimize = builder.standardOptimizeOption(.{}),
|
|
||||||
.enable_lto = false,
|
|
||||||
.include_dirs = ArrayList([]const u8).init(builder.allocator),
|
|
||||||
.cflags = ArrayList([]const u8).init(builder.allocator),
|
|
||||||
.cxxflags = ArrayList([]const u8).init(builder.allocator),
|
|
||||||
.objs = ArrayList(*Compile).init(builder.allocator),
|
|
||||||
};
|
|
||||||
|
|
||||||
try m.addCFlag("-std=c11");
|
|
||||||
try m.addCxxFlag("-std=c++11");
|
|
||||||
try m.addProjectInclude(&.{});
|
|
||||||
try m.addProjectInclude(&.{"common"});
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn obj(m: *const Maker, name: []const u8, src: []const u8) *Compile {
|
|
||||||
const o = m.builder.addObject(.{ .name = name, .target = m.target, .optimize = m.optimize });
|
|
||||||
if (o.target.getAbi() != .msvc)
|
|
||||||
o.defineCMacro("_GNU_SOURCE", null);
|
|
||||||
|
|
||||||
if (std.mem.endsWith(u8, src, ".c")) {
|
|
||||||
o.addCSourceFiles(&.{src}, m.cflags.items);
|
|
||||||
o.linkLibC();
|
|
||||||
} else {
|
|
||||||
o.addCSourceFiles(&.{src}, m.cxxflags.items);
|
|
||||||
if (o.target.getAbi() == .msvc) {
|
|
||||||
o.linkLibC(); // need winsdk + crt
|
|
||||||
} else {
|
|
||||||
// linkLibCpp already add (libc++ + libunwind + libc)
|
|
||||||
o.linkLibCpp();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (m.include_dirs.items) |i| o.addIncludePath(.{ .path = i });
|
|
||||||
o.want_lto = m.enable_lto;
|
|
||||||
return o;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn exe(m: *const Maker, name: []const u8, src: []const u8, deps: []const *Compile) *Compile {
|
|
||||||
const e = m.builder.addExecutable(.{ .name = name, .target = m.target, .optimize = m.optimize });
|
|
||||||
e.addCSourceFiles(&.{src}, m.cxxflags.items);
|
|
||||||
for (deps) |d| e.addObject(d);
|
|
||||||
for (m.objs.items) |o| e.addObject(o);
|
|
||||||
for (m.include_dirs.items) |i| e.addIncludePath(.{ .path = i });
|
|
||||||
|
|
||||||
// https://github.com/ziglang/zig/issues/15448
|
|
||||||
if (e.target.getAbi() == .msvc) {
|
|
||||||
e.linkLibC(); // need winsdk + crt
|
|
||||||
} else {
|
|
||||||
// linkLibCpp already add (libc++ + libunwind + libc)
|
|
||||||
e.linkLibCpp();
|
|
||||||
}
|
|
||||||
m.builder.installArtifact(e);
|
|
||||||
e.want_lto = m.enable_lto;
|
|
||||||
return e;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
pub fn build(b: *std.build.Builder) !void {
|
|
||||||
var make = try Maker.init(b);
|
|
||||||
make.enable_lto = b.option(bool, "lto", "Enable LTO optimization, (default: false)") orelse false;
|
|
||||||
|
|
||||||
const ggml = make.obj("ggml", "ggml.c");
|
|
||||||
const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c");
|
|
||||||
const ggml_backend = make.obj("ggml-backend", "ggml-backend.c");
|
|
||||||
const ggml_quants = make.obj("ggml-quants", "ggml-quants.c");
|
|
||||||
const llama = make.obj("llama", "llama.cpp");
|
|
||||||
const buildinfo = make.obj("common", "common/build-info.cpp");
|
|
||||||
const common = make.obj("common", "common/common.cpp");
|
|
||||||
const console = make.obj("console", "common/console.cpp");
|
|
||||||
const sampling = make.obj("sampling", "common/sampling.cpp");
|
|
||||||
const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp");
|
|
||||||
const train = make.obj("train", "common/train.cpp");
|
|
||||||
const clip = make.obj("clip", "examples/llava/clip.cpp");
|
|
||||||
|
|
||||||
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, console, grammar_parser });
|
|
||||||
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo });
|
|
||||||
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo });
|
|
||||||
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo });
|
|
||||||
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
|
|
||||||
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
|
|
||||||
|
|
||||||
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip });
|
|
||||||
if (server.target.isWindows()) {
|
|
||||||
server.linkSystemLibrary("ws2_32");
|
|
||||||
}
|
|
||||||
}
|
|
817
ci/run.sh
817
ci/run.sh
|
@ -1,4 +1,4 @@
|
||||||
#/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# sample usage:
|
# sample usage:
|
||||||
#
|
#
|
||||||
|
@ -13,6 +13,9 @@
|
||||||
# # with SYCL support
|
# # with SYCL support
|
||||||
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
#
|
#
|
||||||
|
# # with VULKAN support
|
||||||
|
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
|
#
|
||||||
|
|
||||||
if [ -z "$2" ]; then
|
if [ -z "$2" ]; then
|
||||||
echo "usage: $0 <output-dir> <mnt-dir>"
|
echo "usage: $0 <output-dir> <mnt-dir>"
|
||||||
|
@ -33,23 +36,28 @@ sd=`dirname $0`
|
||||||
cd $sd/../
|
cd $sd/../
|
||||||
SRC=`pwd`
|
SRC=`pwd`
|
||||||
|
|
||||||
CMAKE_EXTRA=""
|
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON"
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_METAL} ]; then
|
if [ ! -z ${GG_BUILD_METAL} ]; then
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_METAL_SHADER_DEBUG=ON"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_CUBLAS=1"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||||
if [ -z ${ONEAPI_ROOT} ]; then
|
if [ -z ${ONEAPI_ROOT} ]; then
|
||||||
echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:\n source /opt/intel/oneapi/setvars.sh"
|
echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:"
|
||||||
|
echo "source /opt/intel/oneapi/setvars.sh"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_SYCL=1 DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -z ${GG_BUILD_VULKAN} ]; then
|
||||||
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
|
||||||
fi
|
fi
|
||||||
## helpers
|
## helpers
|
||||||
|
|
||||||
|
@ -102,8 +110,11 @@ function gg_run_ctest_debug {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# Check cmake, make and ctest are installed
|
||||||
|
gg_check_build_requirements
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
||||||
|
|
||||||
|
@ -130,8 +141,11 @@ function gg_run_ctest_release {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# Check cmake, make and ctest are installed
|
||||||
|
gg_check_build_requirements
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
||||||
|
@ -152,13 +166,64 @@ function gg_sum_ctest_release {
|
||||||
gg_printf '```\n'
|
gg_printf '```\n'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# test_scripts_debug
|
||||||
|
|
||||||
|
function gg_run_test_scripts_debug {
|
||||||
|
cd ${SRC}
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||||
|
(cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||||
|
|
||||||
|
set +e
|
||||||
|
}
|
||||||
|
|
||||||
|
function gg_sum_test_scripts_debug {
|
||||||
|
gg_printf '### %s\n\n' "${ci}"
|
||||||
|
|
||||||
|
gg_printf 'Runs test scripts in debug mode\n'
|
||||||
|
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||||
|
gg_printf '```\n'
|
||||||
|
gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
|
||||||
|
gg_printf '```\n'
|
||||||
|
gg_printf '\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
# test_scripts_release
|
||||||
|
|
||||||
|
function gg_run_test_scripts_release {
|
||||||
|
cd ${SRC}
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||||
|
(cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||||
|
|
||||||
|
set +e
|
||||||
|
}
|
||||||
|
|
||||||
|
function gg_sum_test_scripts_release {
|
||||||
|
gg_printf '### %s\n\n' "${ci}"
|
||||||
|
|
||||||
|
gg_printf 'Runs test scripts in release mode\n'
|
||||||
|
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||||
|
gg_printf '```\n'
|
||||||
|
gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
|
||||||
|
gg_printf '```\n'
|
||||||
|
gg_printf '\n'
|
||||||
|
}
|
||||||
|
|
||||||
function gg_get_model {
|
function gg_get_model {
|
||||||
local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf"
|
local gguf_0="$MNT/models/pythia/1.4B/ggml-model-f16.gguf"
|
||||||
local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
|
local gguf_1="$MNT/models/pythia/2.8B/ggml-model-f16.gguf"
|
||||||
if [[ -s $gguf_3b ]]; then
|
local gguf_2="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
|
||||||
echo -n "$gguf_3b"
|
if [[ -s $gguf_0 ]]; then
|
||||||
elif [[ -s $gguf_7b ]]; then
|
echo -n "$gguf_0"
|
||||||
echo -n "$gguf_7b"
|
elif [[ -s $gguf_1 ]]; then
|
||||||
|
echo -n "$gguf_1"
|
||||||
|
elif [[ -s $gguf_2 ]]; then
|
||||||
|
echo -n "$gguf_2"
|
||||||
else
|
else
|
||||||
echo >&2 "No model found. Can't run gg_run_ctest_with_model."
|
echo >&2 "No model found. Can't run gg_run_ctest_with_model."
|
||||||
exit 1
|
exit 1
|
||||||
|
@ -207,187 +272,7 @@ function gg_sum_ctest_with_model_release {
|
||||||
gg_printf '```\n'
|
gg_printf '```\n'
|
||||||
}
|
}
|
||||||
|
|
||||||
# open_llama_3b_v2
|
|
||||||
|
|
||||||
function gg_run_open_llama_3b_v2 {
|
|
||||||
cd ${SRC}
|
|
||||||
|
|
||||||
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/config.json
|
|
||||||
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model
|
|
||||||
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/tokenizer_config.json
|
|
||||||
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/special_tokens_map.json
|
|
||||||
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
|
|
||||||
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
|
|
||||||
|
|
||||||
gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
|
|
||||||
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
|
|
||||||
head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
|
|
||||||
|
|
||||||
path_models="../models-mnt/open-llama/3B-v2"
|
|
||||||
path_wiki="../models-mnt/wikitext/wikitext-2-raw"
|
|
||||||
|
|
||||||
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
|
||||||
|
|
||||||
python3 ../convert.py ${path_models}
|
|
||||||
|
|
||||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
|
||||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
|
||||||
model_q4_0="${path_models}/ggml-model-q4_0.gguf"
|
|
||||||
model_q4_1="${path_models}/ggml-model-q4_1.gguf"
|
|
||||||
model_q5_0="${path_models}/ggml-model-q5_0.gguf"
|
|
||||||
model_q5_1="${path_models}/ggml-model-q5_1.gguf"
|
|
||||||
model_q2_k="${path_models}/ggml-model-q2_k.gguf"
|
|
||||||
model_q3_k="${path_models}/ggml-model-q3_k.gguf"
|
|
||||||
model_q4_k="${path_models}/ggml-model-q4_k.gguf"
|
|
||||||
model_q5_k="${path_models}/ggml-model-q5_k.gguf"
|
|
||||||
model_q6_k="${path_models}/ggml-model-q6_k.gguf"
|
|
||||||
|
|
||||||
wiki_test_60="${path_wiki}/wiki.test-60.raw"
|
|
||||||
|
|
||||||
./bin/quantize ${model_f16} ${model_q8_0} q8_0
|
|
||||||
./bin/quantize ${model_f16} ${model_q4_0} q4_0
|
|
||||||
./bin/quantize ${model_f16} ${model_q4_1} q4_1
|
|
||||||
./bin/quantize ${model_f16} ${model_q5_0} q5_0
|
|
||||||
./bin/quantize ${model_f16} ${model_q5_1} q5_1
|
|
||||||
./bin/quantize ${model_f16} ${model_q2_k} q2_k
|
|
||||||
./bin/quantize ${model_f16} ${model_q3_k} q3_k
|
|
||||||
./bin/quantize ${model_f16} ${model_q4_k} q4_k
|
|
||||||
./bin/quantize ${model_f16} ${model_q5_k} q5_k
|
|
||||||
./bin/quantize ${model_f16} ${model_q6_k} q6_k
|
|
||||||
|
|
||||||
(time ./bin/main --model ${model_f16} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
|
||||||
(time ./bin/main --model ${model_q8_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
|
||||||
(time ./bin/main --model ${model_q4_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
|
||||||
(time ./bin/main --model ${model_q4_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
|
||||||
(time ./bin/main --model ${model_q5_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
|
||||||
(time ./bin/main --model ${model_q5_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
|
||||||
(time ./bin/main --model ${model_q2_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
|
||||||
(time ./bin/main --model ${model_q3_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
|
||||||
(time ./bin/main --model ${model_q4_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
|
||||||
(time ./bin/main --model ${model_q5_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
|
||||||
(time ./bin/main --model ${model_q6_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
|
||||||
|
|
||||||
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
|
||||||
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
|
||||||
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
|
||||||
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
|
||||||
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
|
||||||
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
|
||||||
(time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
|
||||||
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
|
||||||
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
|
||||||
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
|
||||||
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
|
||||||
|
|
||||||
(time ./bin/imatrix --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
|
||||||
|
|
||||||
(time ./bin/save-load-state --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
|
||||||
|
|
||||||
function check_ppl {
|
|
||||||
qnt="$1"
|
|
||||||
ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
|
||||||
|
|
||||||
if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
|
|
||||||
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
|
|
||||||
return 20
|
|
||||||
fi
|
|
||||||
|
|
||||||
printf ' - %s @ %s OK\n' "$qnt" "$ppl"
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
|
||||||
|
|
||||||
cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log
|
|
||||||
|
|
||||||
# lora
|
|
||||||
function compare_ppl {
|
|
||||||
qnt="$1"
|
|
||||||
ppl1=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
|
||||||
ppl2=$(echo "$3" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
|
||||||
|
|
||||||
if [ $(echo "$ppl1 < $ppl2" | bc) -eq 1 ]; then
|
|
||||||
printf ' - %s @ %s (FAIL: %s > %s)\n' "$qnt" "$ppl" "$ppl1" "$ppl2"
|
|
||||||
return 20
|
|
||||||
fi
|
|
||||||
|
|
||||||
printf ' - %s @ %s %s OK\n' "$qnt" "$ppl1" "$ppl2"
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
path_lora="../models-mnt/open-llama/3B-v2/lora"
|
|
||||||
path_shakespeare="../models-mnt/shakespeare"
|
|
||||||
|
|
||||||
shakespeare="${path_shakespeare}/shakespeare.txt"
|
|
||||||
lora_shakespeare="${path_lora}/ggml-adapter-model.bin"
|
|
||||||
|
|
||||||
gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_config.json
|
|
||||||
gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_model.bin
|
|
||||||
gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/shakespeare.txt
|
|
||||||
|
|
||||||
python3 ../convert-lora-to-ggml.py ${path_lora}
|
|
||||||
|
|
||||||
# f16
|
|
||||||
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-f16.log
|
|
||||||
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-f16.log
|
|
||||||
compare_ppl "f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-f16.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
|
|
||||||
|
|
||||||
# q8_0
|
|
||||||
(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log
|
|
||||||
(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log
|
|
||||||
compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
|
|
||||||
|
|
||||||
# q8_0 + f16 lora-base
|
|
||||||
(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log
|
|
||||||
compare_ppl "q8_0 / f16 base shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
|
|
||||||
|
|
||||||
set +e
|
|
||||||
}
|
|
||||||
|
|
||||||
function gg_sum_open_llama_3b_v2 {
|
|
||||||
gg_printf '### %s\n\n' "${ci}"
|
|
||||||
|
|
||||||
gg_printf 'OpenLLaMA 3B-v2:\n'
|
|
||||||
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
|
||||||
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
|
|
||||||
gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
|
|
||||||
gg_printf '- lora:\n%s\n' "$(cat $OUT/${ci}-lora-ppl.log)"
|
|
||||||
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
|
|
||||||
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
|
||||||
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
|
|
||||||
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
|
|
||||||
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
|
|
||||||
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
|
|
||||||
gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)"
|
|
||||||
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
|
|
||||||
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
|
|
||||||
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
|
|
||||||
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
|
|
||||||
gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)"
|
|
||||||
gg_printf '- shakespeare (f16):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-f16.log)"
|
|
||||||
gg_printf '- shakespeare (f16 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log)"
|
|
||||||
gg_printf '- shakespeare (q8_0):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log)"
|
|
||||||
gg_printf '- shakespeare (q8_0 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log)"
|
|
||||||
gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)"
|
|
||||||
}
|
|
||||||
|
|
||||||
# open_llama_7b_v2
|
# open_llama_7b_v2
|
||||||
# requires: GG_BUILD_CUDA
|
|
||||||
|
|
||||||
function gg_run_open_llama_7b_v2 {
|
function gg_run_open_llama_7b_v2 {
|
||||||
cd ${SRC}
|
cd ${SRC}
|
||||||
|
@ -401,7 +286,7 @@ function gg_run_open_llama_7b_v2 {
|
||||||
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin
|
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin
|
||||||
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json
|
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json
|
||||||
|
|
||||||
gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
|
gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
|
||||||
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
|
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
|
||||||
|
|
||||||
path_models="../models-mnt/open-llama/7B-v2"
|
path_models="../models-mnt/open-llama/7B-v2"
|
||||||
|
@ -411,10 +296,10 @@ function gg_run_open_llama_7b_v2 {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DLLAMA_CUBLAS=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../convert.py ${path_models}
|
python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||||
|
@ -430,44 +315,47 @@ function gg_run_open_llama_7b_v2 {
|
||||||
|
|
||||||
wiki_test="${path_wiki}/wiki.test.raw"
|
wiki_test="${path_wiki}/wiki.test.raw"
|
||||||
|
|
||||||
./bin/quantize ${model_f16} ${model_q8_0} q8_0
|
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_0} q4_0
|
./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_1} q4_1
|
./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1
|
||||||
./bin/quantize ${model_f16} ${model_q5_0} q5_0
|
./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0
|
||||||
./bin/quantize ${model_f16} ${model_q5_1} q5_1
|
./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1
|
||||||
./bin/quantize ${model_f16} ${model_q2_k} q2_k
|
./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k
|
||||||
./bin/quantize ${model_f16} ${model_q3_k} q3_k
|
./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k
|
||||||
./bin/quantize ${model_f16} ${model_q4_k} q4_k
|
./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k
|
||||||
./bin/quantize ${model_f16} ${model_q5_k} q5_k
|
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||||
./bin/quantize ${model_f16} ${model_q6_k} q6_k
|
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||||
|
|
||||||
(time ./bin/main --model ${model_f16} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/main --model ${model_q8_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/main --model ${model_q4_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/main --model ${model_q4_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/main --model ${model_q5_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/main --model ${model_q5_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/main --model ${model_q2_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/main --model ${model_q3_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/main --model ${model_q4_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/main --model ${model_q5_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/main --model ${model_q6_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||||
|
|
||||||
(time ./bin/save-load-state --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
|
||||||
function check_ppl {
|
function check_ppl {
|
||||||
qnt="$1"
|
qnt="$1"
|
||||||
|
@ -496,48 +384,6 @@ function gg_run_open_llama_7b_v2 {
|
||||||
|
|
||||||
cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log
|
cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log
|
||||||
|
|
||||||
# lora
|
|
||||||
function compare_ppl {
|
|
||||||
qnt="$1"
|
|
||||||
ppl1=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
|
||||||
ppl2=$(echo "$3" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
|
||||||
|
|
||||||
if [ $(echo "$ppl1 < $ppl2" | bc) -eq 1 ]; then
|
|
||||||
printf ' - %s @ %s (FAIL: %s > %s)\n' "$qnt" "$ppl" "$ppl1" "$ppl2"
|
|
||||||
return 20
|
|
||||||
fi
|
|
||||||
|
|
||||||
printf ' - %s @ %s %s OK\n' "$qnt" "$ppl1" "$ppl2"
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
path_lora="../models-mnt/open-llama/7B-v2/lora"
|
|
||||||
path_shakespeare="../models-mnt/shakespeare"
|
|
||||||
|
|
||||||
shakespeare="${path_shakespeare}/shakespeare.txt"
|
|
||||||
lora_shakespeare="${path_lora}/ggml-adapter-model.bin"
|
|
||||||
|
|
||||||
gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_config.json
|
|
||||||
gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_model.bin
|
|
||||||
gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/shakespeare.txt
|
|
||||||
|
|
||||||
python3 ../convert-lora-to-ggml.py ${path_lora}
|
|
||||||
|
|
||||||
# f16
|
|
||||||
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-f16.log
|
|
||||||
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-f16.log
|
|
||||||
compare_ppl "f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-f16.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
|
|
||||||
|
|
||||||
# currently not supported by the CUDA backend
|
|
||||||
# q8_0
|
|
||||||
#(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log
|
|
||||||
#(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log
|
|
||||||
#compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
|
|
||||||
|
|
||||||
# q8_0 + f16 lora-base
|
|
||||||
#(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log
|
|
||||||
#compare_ppl "q8_0 / f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
|
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -548,7 +394,6 @@ function gg_sum_open_llama_7b_v2 {
|
||||||
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||||
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
|
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
|
||||||
gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
|
gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
|
||||||
gg_printf '- lora:\n%s\n' "$(cat $OUT/${ci}-lora-ppl.log)"
|
|
||||||
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
|
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
|
||||||
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
||||||
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
|
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
|
||||||
|
@ -561,15 +406,407 @@ function gg_sum_open_llama_7b_v2 {
|
||||||
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
|
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
|
||||||
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
|
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
|
||||||
gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)"
|
gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)"
|
||||||
gg_printf '- shakespeare (f16):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-f16.log)"
|
}
|
||||||
gg_printf '- shakespeare (f16 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log)"
|
|
||||||
#gg_printf '- shakespeare (q8_0):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log)"
|
# pythia_1.4b
|
||||||
#gg_printf '- shakespeare (q8_0 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log)"
|
|
||||||
#gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)"
|
function gg_run_pythia_1_4b {
|
||||||
|
cd ${SRC}
|
||||||
|
|
||||||
|
gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/config.json
|
||||||
|
gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer.json
|
||||||
|
gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer_config.json
|
||||||
|
gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/special_tokens_map.json
|
||||||
|
gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/resolve/main/pytorch_model.bin
|
||||||
|
|
||||||
|
gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
|
||||||
|
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
|
||||||
|
head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
|
||||||
|
|
||||||
|
path_models="../models-mnt/pythia/1.4B"
|
||||||
|
path_wiki="../models-mnt/wikitext/wikitext-2-raw"
|
||||||
|
|
||||||
|
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
|
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||||
|
model_q4_0="${path_models}/ggml-model-q4_0.gguf"
|
||||||
|
model_q4_1="${path_models}/ggml-model-q4_1.gguf"
|
||||||
|
model_q5_0="${path_models}/ggml-model-q5_0.gguf"
|
||||||
|
model_q5_1="${path_models}/ggml-model-q5_1.gguf"
|
||||||
|
model_q2_k="${path_models}/ggml-model-q2_k.gguf"
|
||||||
|
model_q3_k="${path_models}/ggml-model-q3_k.gguf"
|
||||||
|
model_q4_k="${path_models}/ggml-model-q4_k.gguf"
|
||||||
|
model_q5_k="${path_models}/ggml-model-q5_k.gguf"
|
||||||
|
model_q6_k="${path_models}/ggml-model-q6_k.gguf"
|
||||||
|
|
||||||
|
wiki_test_60="${path_wiki}/wiki.test-60.raw"
|
||||||
|
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||||
|
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
|
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
|
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||||
|
|
||||||
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
|
||||||
|
function check_ppl {
|
||||||
|
qnt="$1"
|
||||||
|
ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
||||||
|
|
||||||
|
if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
|
||||||
|
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
|
||||||
|
return 20
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf ' - %s @ %s OK\n' "$qnt" "$ppl"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
#check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model
|
||||||
|
check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
|
||||||
|
cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log
|
||||||
|
|
||||||
|
set +e
|
||||||
|
}
|
||||||
|
|
||||||
|
function gg_sum_pythia_1_4b {
|
||||||
|
gg_printf '### %s\n\n' "${ci}"
|
||||||
|
|
||||||
|
gg_printf 'Pythia 1.4B:\n'
|
||||||
|
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||||
|
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
|
||||||
|
gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
|
||||||
|
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
|
||||||
|
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
||||||
|
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
|
||||||
|
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
|
||||||
|
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
|
||||||
|
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
|
||||||
|
gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)"
|
||||||
|
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
|
||||||
|
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
|
||||||
|
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
|
||||||
|
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
|
||||||
|
gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# pythia_2_8b
|
||||||
|
|
||||||
|
function gg_run_pythia_2_8b {
|
||||||
|
cd ${SRC}
|
||||||
|
|
||||||
|
gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/config.json
|
||||||
|
gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer.json
|
||||||
|
gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer_config.json
|
||||||
|
gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/special_tokens_map.json
|
||||||
|
gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/resolve/main/pytorch_model.bin
|
||||||
|
|
||||||
|
gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
|
||||||
|
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
|
||||||
|
|
||||||
|
path_models="../models-mnt/pythia/2.8B"
|
||||||
|
path_wiki="../models-mnt/wikitext/wikitext-2-raw"
|
||||||
|
|
||||||
|
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
|
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||||
|
model_q4_0="${path_models}/ggml-model-q4_0.gguf"
|
||||||
|
model_q4_1="${path_models}/ggml-model-q4_1.gguf"
|
||||||
|
model_q5_0="${path_models}/ggml-model-q5_0.gguf"
|
||||||
|
model_q5_1="${path_models}/ggml-model-q5_1.gguf"
|
||||||
|
model_q2_k="${path_models}/ggml-model-q2_k.gguf"
|
||||||
|
model_q3_k="${path_models}/ggml-model-q3_k.gguf"
|
||||||
|
model_q4_k="${path_models}/ggml-model-q4_k.gguf"
|
||||||
|
model_q5_k="${path_models}/ggml-model-q5_k.gguf"
|
||||||
|
model_q6_k="${path_models}/ggml-model-q6_k.gguf"
|
||||||
|
|
||||||
|
wiki_test="${path_wiki}/wiki.test.raw"
|
||||||
|
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||||
|
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
|
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
|
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
|
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
|
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||||
|
|
||||||
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
|
||||||
|
function check_ppl {
|
||||||
|
qnt="$1"
|
||||||
|
ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
||||||
|
|
||||||
|
if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
|
||||||
|
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
|
||||||
|
return 20
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf ' - %s @ %s OK\n' "$qnt" "$ppl"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
#check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model
|
||||||
|
check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
||||||
|
|
||||||
|
cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log
|
||||||
|
|
||||||
|
set +e
|
||||||
|
}
|
||||||
|
|
||||||
|
function gg_sum_pythia_2_8b {
|
||||||
|
gg_printf '### %s\n\n' "${ci}"
|
||||||
|
|
||||||
|
gg_printf 'Pythia 2.8B:\n'
|
||||||
|
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||||
|
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
|
||||||
|
gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
|
||||||
|
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
|
||||||
|
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
||||||
|
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
|
||||||
|
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
|
||||||
|
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
|
||||||
|
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
|
||||||
|
gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)"
|
||||||
|
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
|
||||||
|
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
|
||||||
|
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
|
||||||
|
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
|
||||||
|
gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# bge-small
|
||||||
|
|
||||||
|
function gg_run_embd_bge_small {
|
||||||
|
cd ${SRC}
|
||||||
|
|
||||||
|
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
|
||||||
|
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer.json
|
||||||
|
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json
|
||||||
|
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json
|
||||||
|
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin
|
||||||
|
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json
|
||||||
|
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt
|
||||||
|
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/modules.json
|
||||||
|
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
|
||||||
|
|
||||||
|
gg_wget models-mnt/bge-small/1_Pooling https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json
|
||||||
|
|
||||||
|
path_models="../models-mnt/bge-small"
|
||||||
|
|
||||||
|
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
|
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||||
|
|
||||||
|
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
|
|
||||||
|
(time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
|
(time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
|
|
||||||
|
set +e
|
||||||
|
}
|
||||||
|
|
||||||
|
function gg_sum_embd_bge_small {
|
||||||
|
gg_printf '### %s\n\n' "${ci}"
|
||||||
|
|
||||||
|
gg_printf 'BGE Small (BERT):\n'
|
||||||
|
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||||
|
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
|
||||||
|
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# rerank_tiny
|
||||||
|
|
||||||
|
function gg_run_rerank_tiny {
|
||||||
|
cd ${SRC}
|
||||||
|
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer_config.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/special_tokens_map.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/resolve/main/pytorch_model.bin
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/sentence_bert_config.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/vocab.txt
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/modules.json
|
||||||
|
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
|
||||||
|
|
||||||
|
gg_wget models-mnt/rerank-tiny/1_Pooling https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/1_Pooling/config.json
|
||||||
|
|
||||||
|
path_models="../models-mnt/rerank-tiny"
|
||||||
|
|
||||||
|
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
|
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
|
|
||||||
|
# for this model, the SEP token is "</s>"
|
||||||
|
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
|
||||||
|
|
||||||
|
# sample output
|
||||||
|
# rerank score 0: 0.029
|
||||||
|
# rerank score 1: 0.029
|
||||||
|
# rerank score 2: 0.135
|
||||||
|
|
||||||
|
# check that the score is in the range [$3, $4]
|
||||||
|
function check_score {
|
||||||
|
qnt="$1"
|
||||||
|
score=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
||||||
|
|
||||||
|
if [ $(echo "$score < $3" | bc) -eq 1 ] || [ $(echo "$score > $4" | bc) -eq 1 ]; then
|
||||||
|
printf ' - %s @ %s (FAIL: score not in range [%s, %s])\n' "$qnt" "$score" "$3" "$4"
|
||||||
|
return 20
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf ' - %s @ %s OK\n' "$qnt" "$score"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
||||||
|
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
||||||
|
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
|
||||||
|
|
||||||
|
set +e
|
||||||
|
}
|
||||||
|
|
||||||
|
function gg_sum_rerank_tiny {
|
||||||
|
gg_printf '### %s\n\n' "${ci}"
|
||||||
|
|
||||||
|
gg_printf 'Rerank Tiny (Jina):\n'
|
||||||
|
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||||
|
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-rk-f16.log)"
|
||||||
|
}
|
||||||
|
|
||||||
|
function gg_check_build_requirements {
|
||||||
|
if ! command -v cmake &> /dev/null; then
|
||||||
|
gg_printf 'cmake not found, please install'
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v make &> /dev/null; then
|
||||||
|
gg_printf 'make not found, please install'
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v ctest &> /dev/null; then
|
||||||
|
gg_printf 'ctest not found, please install'
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
## main
|
## main
|
||||||
|
|
||||||
|
export LLAMA_LOG_PREFIX=1
|
||||||
|
export LLAMA_LOG_TIMESTAMPS=1
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
|
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
|
||||||
rm -rf ${SRC}/models-mnt
|
rm -rf ${SRC}/models-mnt
|
||||||
|
@ -578,7 +815,10 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
ln -sfn ${mnt_models} ${SRC}/models-mnt
|
ln -sfn ${mnt_models} ${SRC}/models-mnt
|
||||||
|
|
||||||
# Create a fresh python3 venv and enter it
|
# Create a fresh python3 venv and enter it
|
||||||
python3 -m venv "$MNT/venv"
|
if ! python3 -m venv "$MNT/venv"; then
|
||||||
|
echo "Error: Failed to create Python virtual environment at $MNT/venv."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
source "$MNT/venv/bin/activate"
|
source "$MNT/venv/bin/activate"
|
||||||
|
|
||||||
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
|
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
|
||||||
|
@ -591,11 +831,20 @@ test $ret -eq 0 && gg_run ctest_debug
|
||||||
test $ret -eq 0 && gg_run ctest_release
|
test $ret -eq 0 && gg_run ctest_release
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
|
test $ret -eq 0 && gg_run embd_bge_small
|
||||||
|
test $ret -eq 0 && gg_run rerank_tiny
|
||||||
|
|
||||||
|
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
|
||||||
|
test $ret -eq 0 && gg_run test_scripts_debug
|
||||||
|
test $ret -eq 0 && gg_run test_scripts_release
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
||||||
if [ -z ${GG_BUILD_CUDA} ]; then
|
if [ -z ${GG_BUILD_CUDA} ] && [ -z ${GG_BUILD_VULKAN} ]; then
|
||||||
test $ret -eq 0 && gg_run open_llama_3b_v2
|
test $ret -eq 0 && gg_run pythia_1_4b
|
||||||
else
|
else
|
||||||
test $ret -eq 0 && gg_run open_llama_7b_v2
|
test $ret -eq 0 && gg_run pythia_2_8b
|
||||||
|
#test $ret -eq 0 && gg_run open_llama_7b_v2
|
||||||
fi
|
fi
|
||||||
test $ret -eq 0 && gg_run ctest_with_model_debug
|
test $ret -eq 0 && gg_run ctest_with_model_debug
|
||||||
test $ret -eq 0 && gg_run ctest_with_model_release
|
test $ret -eq 0 && gg_run ctest_with_model_release
|
||||||
|
|
16
cmake/arm64-apple-clang.cmake
Normal file
16
cmake/arm64-apple-clang.cmake
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
set( CMAKE_SYSTEM_NAME Darwin )
|
||||||
|
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
||||||
|
|
||||||
|
set( target arm64-apple-darwin-macho )
|
||||||
|
|
||||||
|
set( CMAKE_C_COMPILER clang )
|
||||||
|
set( CMAKE_CXX_COMPILER clang++ )
|
||||||
|
|
||||||
|
set( CMAKE_C_COMPILER_TARGET ${target} )
|
||||||
|
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
||||||
|
|
||||||
|
set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
|
||||||
|
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
|
||||||
|
|
||||||
|
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
||||||
|
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
16
cmake/arm64-windows-llvm.cmake
Normal file
16
cmake/arm64-windows-llvm.cmake
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
set( CMAKE_SYSTEM_NAME Windows )
|
||||||
|
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
||||||
|
|
||||||
|
set( target arm64-pc-windows-msvc )
|
||||||
|
|
||||||
|
set( CMAKE_C_COMPILER clang )
|
||||||
|
set( CMAKE_CXX_COMPILER clang++ )
|
||||||
|
|
||||||
|
set( CMAKE_C_COMPILER_TARGET ${target} )
|
||||||
|
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
||||||
|
|
||||||
|
set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
|
||||||
|
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
|
||||||
|
|
||||||
|
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
||||||
|
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
6
cmake/arm64-windows-msvc.cmake
Normal file
6
cmake/arm64-windows-msvc.cmake
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
set( CMAKE_SYSTEM_NAME Windows )
|
||||||
|
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
||||||
|
|
||||||
|
set( target arm64-pc-windows-msvc )
|
||||||
|
set( CMAKE_C_COMPILER_TARGET ${target} )
|
||||||
|
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
|
@ -44,7 +44,7 @@ if(MSVC)
|
||||||
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
||||||
else()
|
else()
|
||||||
execute_process(
|
execute_process(
|
||||||
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
|
COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
|
||||||
OUTPUT_VARIABLE OUT
|
OUTPUT_VARIABLE OUT
|
||||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
)
|
)
|
33
cmake/common.cmake
Normal file
33
cmake/common.cmake
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
function(llama_add_compile_flags)
|
||||||
|
if (LLAMA_FATAL_WARNINGS)
|
||||||
|
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||||
|
list(APPEND C_FLAGS -Werror)
|
||||||
|
list(APPEND CXX_FLAGS -Werror)
|
||||||
|
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||||
|
add_compile_options(/WX)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (LLAMA_ALL_WARNINGS)
|
||||||
|
if (NOT MSVC)
|
||||||
|
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
||||||
|
-Werror=implicit-int -Werror=implicit-function-declaration)
|
||||||
|
|
||||||
|
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
||||||
|
|
||||||
|
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||||
|
|
||||||
|
list(APPEND C_FLAGS ${WARNING_FLAGS})
|
||||||
|
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
|
||||||
|
|
||||||
|
ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
||||||
|
|
||||||
|
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
|
||||||
|
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
|
||||||
|
else()
|
||||||
|
# todo : msvc
|
||||||
|
set(C_FLAGS "" PARENT_SCOPE)
|
||||||
|
set(CXX_FLAGS "" PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endfunction()
|
22
cmake/git-vars.cmake
Normal file
22
cmake/git-vars.cmake
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
find_package(Git)
|
||||||
|
|
||||||
|
# the commit's SHA1
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_SHA1
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
|
# the date of the commit
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_DATE
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
|
# the subject of the commit
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" log -1 --format=%s
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
30
cmake/llama-config.cmake.in
Normal file
30
cmake/llama-config.cmake.in
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@)
|
||||||
|
set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
||||||
|
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
||||||
|
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
||||||
|
|
||||||
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
|
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
||||||
|
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
||||||
|
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
||||||
|
|
||||||
|
find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake)
|
||||||
|
|
||||||
|
find_library(llama_LIBRARY llama
|
||||||
|
REQUIRED
|
||||||
|
HINTS ${LLAMA_LIB_DIR}
|
||||||
|
NO_CMAKE_FIND_ROOT_PATH
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(llama UNKNOWN IMPORTED)
|
||||||
|
set_target_properties(llama
|
||||||
|
PROPERTIES
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
||||||
|
INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
|
||||||
|
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||||
|
IMPORTED_LOCATION "${llama_LIBRARY}"
|
||||||
|
INTERFACE_COMPILE_FEATURES c_std_90
|
||||||
|
POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
check_required_components(Llama)
|
10
cmake/llama.pc.in
Normal file
10
cmake/llama.pc.in
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
prefix=@CMAKE_INSTALL_PREFIX@
|
||||||
|
exec_prefix=@CMAKE_INSTALL_PREFIX@
|
||||||
|
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||||
|
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||||
|
|
||||||
|
Name: llama
|
||||||
|
Description: Port of Facebook's LLaMA model in C/C++
|
||||||
|
Version: @LLAMA_INSTALL_VERSION@
|
||||||
|
Libs: -L${libdir} -lggml -lggml-base -lllama
|
||||||
|
Cflags: -I${includedir}
|
11
cmake/x64-windows-llvm.cmake
Normal file
11
cmake/x64-windows-llvm.cmake
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
set( CMAKE_SYSTEM_NAME Windows )
|
||||||
|
set( CMAKE_SYSTEM_PROCESSOR x86_64 )
|
||||||
|
|
||||||
|
set( CMAKE_C_COMPILER clang )
|
||||||
|
set( CMAKE_CXX_COMPILER clang++ )
|
||||||
|
|
||||||
|
set( arch_c_flags "-march=native" )
|
||||||
|
|
||||||
|
set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" )
|
||||||
|
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )
|
||||||
|
|
14
codecov.yml
14
codecov.yml
|
@ -1,14 +0,0 @@
|
||||||
comment: off
|
|
||||||
|
|
||||||
coverage:
|
|
||||||
status:
|
|
||||||
project:
|
|
||||||
default:
|
|
||||||
target: auto
|
|
||||||
threshold: 0
|
|
||||||
base: auto
|
|
||||||
patch:
|
|
||||||
default:
|
|
||||||
target: auto
|
|
||||||
threshold: 0
|
|
||||||
base: auto
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue