Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d117d4dc5d 
								
							 
						 
						
							
							
								
								llama : print tensor meta for debugging  
							
							
							
						 
						
							2024-01-07 09:51:12 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Alex Azarov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3418c03ecc 
								
							 
						 
						
							
							
								
								llama.swiftui : add visionOS target ( #4805 )  
							
							
							
						 
						
							2024-01-07 09:46:55 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Konstantin Zhuravlyov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								63ee677efd 
								
							 
						 
						
							
							
								
								ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 ( #4787 )  
							
							
							
						 
						
							2024-01-07 08:52:42 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								67984921a7 
								
							 
						 
						
							
							
								
								server : fix n_predict check ( #4798 )  
							
							
							
						 
						
							2024-01-07 08:45:26 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Daniel Illescas Romero 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c75ca5d96f 
								
							 
						 
						
							
							
								
								llama.swiftui : use correct pointer for llama_token_eos ( #4797 )  
							
							
							
						 
						
							2024-01-06 17:12:59 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								96e80dabc6 
								
							 
						 
						
							
							
								
								examples : improve base-translate.sh script ( #4783 )  
							
							
							
						 
						
							2024-01-06 11:40:24 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									a-n-n-a-l-e-e 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								eec22a1c63 
								
							 
						 
						
							
							
								
								cmake : check for openblas64 ( #4134 )  
							
							... 
							
							
							
							openblas v0.3.22 64-bit pkg-config file is named openblas64.pc
https://github.com/OpenMathLib/OpenBLAS/issues/3790  
							
						 
						
							2024-01-05 18:04:40 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ikko Eltociear Ashimine 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								be36bb946a 
								
							 
						 
						
							
							
								
								flake.nix : fix typo ( #4700 )  
							
							... 
							
							
							
							betwen -> between 
							
						 
						
							2024-01-05 18:02:44 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
							
							
								
							
							
								91d38876df 
								
							 
						 
						
							
							
								
								metal : switch back to default.metallib (ggml/681)  
							
							... 
							
							
							
							ggml-ci 
							
						 
						
							2024-01-05 18:02:06 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
							
							
								
							
							
								d061bf9405 
								
							 
						 
						
							
							
								
								ggml : fix q2_k bpw in comments (ggml/680)  
							
							
							
						 
						
							2024-01-05 18:02:06 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Finn Voorhees 
								
							 
						 
						
							
							
							
							
								
							
							
								1bf681f90e 
								
							 
						 
						
							
							
								
								ggml : add error handling to graph_compute (whisper/1714)  
							
							
							
						 
						
							2024-01-05 18:02:06 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c1d7cb28d3 
								
							 
						 
						
							
							
								
								ggml : do not sched_yield when calling BLAS ( #4761 )  
							
							... 
							
							
							
							* ggml : do not sched_yield when calling BLAS
ggml-ci
* ggml : fix do_yield logic
ggml-ci
* ggml : simplify do_yield logic
ggml-ci 
							
						 
						
							2024-01-05 15:18:21 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3681f22443 
								
							 
						 
						
							
							
								
								examples : add few-shot translation example ( #4783 )  
							
							
							
						 
						
							2024-01-05 15:11:10 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Daniel Bevenius 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b3a7c20b5c 
								
							 
						 
						
							
							
								
								finetune : remove unused includes ( #4756 )  
							
							... 
							
							
							
							This commit removes unused includes from finetune.cpp.
Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com> 
							
						 
						
							2024-01-04 21:45:37 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								012cf349ae 
								
							 
						 
						
							
							
								
								server : send token probs for "stream == false" ( #4714 )  
							
							
							
						 
						
							2024-01-04 19:56:33 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Johannes Gäßler 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a91928014f 
								
							 
						 
						
							
							
								
								Print backend name on test-backend-ops failure ( #4751 )  
							
							
							
						 
						
							2024-01-04 09:43:23 +01:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									singularity 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3c0b585561 
								
							 
						 
						
							
							
								
								llama.swiftui : support loading custom model from file picker ( #4767 )  
							
							... 
							
							
							
							* swiftui: support load model from file picker
* swiftui: remove trailing whitespace 
							
						 
						
							2024-01-04 10:22:38 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Michael Coppola 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e5804313a1 
								
							 
						 
						
							
							
								
								server : fix options in README.md ( #4765 )  
							
							... 
							
							
							
							* fix examples/server/README.md
* minor : fix whitespace
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 
							
						 
						
							2024-01-04 10:17:09 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dc891b7f7a 
								
							 
						 
						
							
							
								
								ggml : include stdlib.h before intrin.h ( #4736 )  
							
							
							
						 
						
							2024-01-04 10:12:26 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									singularity 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								46cea79e1f 
								
							 
						 
						
							
							
								
								llama.swiftui : fix build of ggml.metallib ( #4754 )  
							
							... 
							
							
							
							* metal: fix metal backend init failure in swiftui
* metal: build ggml.metallib instead of copy src
* llama.swift : remove debug flags from metallib build
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 
							
						 
						
							2024-01-04 09:58:16 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Daniel Bevenius 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cb1e2818e0 
								
							 
						 
						
							
							
								
								train : fix typo in overlapping-samples help msg ( #4758 )  
							
							... 
							
							
							
							This commit fixes a typo in the help message for the
--overlapping-samples option.
Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com> 
							
						 
						
							2024-01-03 19:53:40 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashraful Islam 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ece9a45e8f 
								
							 
						 
						
							
							
								
								swift : update Package.swift to use ggml as dependency ( #4691 )  
							
							... 
							
							
							
							* updates the package.swift to use ggml as dependency
* changes the ggml package url src to ggerganov 
							
						 
						
							2024-01-03 19:30:02 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
							
							
								
							
							
								7bed7eba35 
								
							 
						 
						
							
							
								
								cuda : simplify expression  
							
							... 
							
							
							
							Co-authored-by: slaren <slarengh@gmail.com> 
							
						 
						
							2024-01-03 14:38:38 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
							
							
								
							
							
								d55356d3ba 
								
							 
						 
						
							
							
								
								cuda : mark I16 and I32 ops as unsupported  
							
							... 
							
							
							
							ggml-ci 
							
						 
						
							2024-01-03 14:38:38 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
							
							
								
							
							
								75e3fd8581 
								
							 
						 
						
							
							
								
								sync : ggml  
							
							... 
							
							
							
							ggml-ci 
							
						 
						
							2024-01-03 14:38:38 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
							
							
								
							
							
								289313716f 
								
							 
						 
						
							
							
								
								metal : add kernel_get_rows_i32  
							
							... 
							
							
							
							ggml-ci 
							
						 
						
							2024-01-03 14:38:38 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
							
							
								
							
							
								ab62fc3e55 
								
							 
						 
						
							
							
								
								scripts : fix sync order + metal sed  
							
							
							
						 
						
							2024-01-03 14:38:38 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Guillaume Wenzek 
								
							 
						 
						
							
							
							
							
								
							
							
								5f66ebca9c 
								
							 
						 
						
							
							
								
								ggml : extend ggml_get_rows, ggml_repeat, ggml_concat (ggml/639)  
							
							... 
							
							
							
							* add more int ops
* ggml_compute_forward_dup_bytes
* add tests
* PR comments
* tests : minor indentations
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 
							
						 
						
							2024-01-03 14:38:38 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Justin Parker 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f2eb19bd8b 
								
							 
						 
						
							
							
								
								server : throw an error when slot unavailable ( #4741 )  
							
							
							
						 
						
							2024-01-03 10:43:19 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f3f62f0d83 
								
							 
						 
						
							
							
								
								metal : optimize ggml_mul_mat_id (faster Mixtral PP) ( #4725 )  
							
							... 
							
							
							
							* ggml : disable fast-math for Metal (cmake build only)
ggml-ci
* metal : fix Metal API debug warnings
* cmake : add -fno-inline for Metal build (#4545 )
* metal : fix API debug warnings
* metal : fix compile warnings
* metal : use uint64_t for strides
* cmake : rename option to LLAMA_METAL_SHADER_DEBUG
* metal : fix mat-vec Q8_0 kernel for BS > 1
* metal : normalize mat-vec kernel signatures
* cmake : respect LLAMA_QKK_64 option
* metal : fix mat-vec Q4_K kernel for QK_K == 64
* metal : optimizing ggml_mul_mat_id (wip)
* metal : minor fix
* metal : opt mul_mm_id 
							
						 
						
							2024-01-02 21:07:47 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Phil H 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0ef3ca2ac6 
								
							 
						 
						
							
							
								
								server : add token counts to html footer ( #4738 )  
							
							... 
							
							
							
							* server: add token counts to stats
* server: generate hpp
---------
Co-authored-by: phiharri <ph@got-root.co.uk> 
							
						 
						
							2024-01-02 17:48:49 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								540938f890 
								
							 
						 
						
							
							
								
								llama : llama_model_desc print number of experts  
							
							
							
						 
						
							2024-01-02 16:26:45 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Marcus Dunn 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0040d42eeb 
								
							 
						 
						
							
							
								
								llama : replace all API facing int's with int32_t ( #4577 )  
							
							... 
							
							
							
							* replaced all API facing `int`'s with `int32_t`
* formatting and missed `int` in `llama_token_to_piece` 
							
						 
						
							2024-01-02 16:15:16 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									postmasters 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								83e633c27e 
								
							 
						 
						
							
							
								
								llama : differentiate the KV dims in the attention ( #4657 )  
							
							... 
							
							
							
							* Add n_key_dim and n_value_dim
Some models use values that are not derived from `n_embd`.
Also remove `n_embd_head` and `n_embd_gqa` because it is not clear
which "head" is referred to (key or value).
Fix issue #4648 .
* Fix `llm_build_kqv` to use `n_value_gqa`
* Rebase
* Rename variables
* Fix llm_build_kqv to be more generic wrt n_embd_head_k
* Update default values for n_embd_head_k and n_embd_head_v
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Fix llm_load_tensors: the asserts were not backcompat
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 
							
						 
						
							2024-01-02 13:51:28 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								32866c5edd 
								
							 
						 
						
							
							
								
								editorconfig : fix whitespace and indentation  #4710  
							
							
							
						 
						
							2024-01-02 13:28:15 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									minarchist 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5d7002d437 
								
							 
						 
						
							
							
								
								server : add --override-kv parameter ( #4710 )  
							
							... 
							
							
							
							* Changes to server to allow metadata override
* documentation
* flake.nix: expose full scope in legacyPackages
* flake.nix: rocm not yet supported on aarch64, so hide the output
* flake.nix: expose checks
* workflows: nix-ci: init; build flake outputs
* workflows: nix-ci: add a job for eval
* workflows: weekly `nix flake update`
* workflows: nix-flakestry: drop tag filters
...and add a job for flakehub.com
* workflows: nix-ci: add a qemu job for jetsons
* flake.nix: suggest the binary caches
* flake.lock: update
to a commit recently cached by nixpkgs-cuda-ci
---------
Co-authored-by: John <john@jLap.lan>
Co-authored-by: Someone Serge <sergei.kozlukov@aalto.fi> 
							
						 
						
							2024-01-02 12:38:15 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Nam D. Tran 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								26f3071d71 
								
							 
						 
						
							
							
								
								py : re-enable mmap in convert hf ( #4732 )  
							
							... 
							
							
							
							* update: awq support llama-7b model
* update: change order
* update: benchmark results for llama2-7b
* update: mistral 7b v1 benchmark
* update: support 4 models
* fix: Readme
* update: ready for PR
* update: readme
* fix: readme
* update: change order import
* black
* format code
* update: work for bot mpt and awqmpt
* update: readme
* Rename to llm_build_ffn_mpt_awq
* Formatted other files
* Fixed params count
* fix: remove code
* update: more detail for mpt
* fix: readme
* fix: readme
* update: change folder architecture
* fix: common.cpp
* fix: readme
* fix: remove ggml_repeat
* update: cicd
* update: cicd
* uppdate: remove use_awq arg
* update: readme
* llama : adapt plamo to new ffn
ggml-ci
* fix: update torch version
---------
Co-authored-by: Trần Đức Nam <v.namtd12@vinai.io>
Co-authored-by: Le Hoang Anh <v.anhlh33@vinai.io>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 
							
						 
						
							2024-01-02 11:23:38 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Daniel Bevenius 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								775ac8712a 
								
							 
						 
						
							
							
								
								finetune: fix typo in README.md ( #4733 )  
							
							... 
							
							
							
							Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com> 
							
						 
						
							2024-01-02 10:16:55 +01:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								58ba655af0 
								
							 
						 
						
							
							
								
								metal : enable shader debugging (cmake option) ( #4705 )  
							
							... 
							
							
							
							* ggml : disable fast-math for Metal (cmake build only)
ggml-ci
* metal : fix Metal API debug warnings
* cmake : add -fno-inline for Metal build (#4545 )
* metal : fix API debug warnings
* metal : fix compile warnings
* metal : use uint64_t for strides
* cmake : rename option to LLAMA_METAL_SHADER_DEBUG
* metal : fix mat-vec Q8_0 kernel for BS > 1
* metal : normalize mat-vec kernel signatures
* cmake : respect LLAMA_QKK_64 option
* metal : fix mat-vec Q4_K kernel for QK_K == 64
ggml-ci 
							
						 
						
							2024-01-02 10:57:44 +02:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								edd1ab7bc3 
								
							 
						 
						
							
							
								
								flake.lock: update  
							
							... 
							
							
							
							to a commit recently cached by nixpkgs-cuda-ci 
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								198ed7ebfc 
								
							 
						 
						
							
							
								
								flake.nix: suggest the binary caches  
							
							
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								d836174731 
								
							 
						 
						
							
							
								
								workflows: nix-ci: add a qemu job for jetsons  
							
							
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								06f2a5d190 
								
							 
						 
						
							
							
								
								workflows: nix-flakestry: drop tag filters  
							
							... 
							
							
							
							...and add a job for flakehub.com 
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								c5239944ba 
								
							 
						 
						
							
							
								
								workflows: weekly nix flake update  
							
							
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								1e9ae54cf2 
								
							 
						 
						
							
							
								
								workflows: nix-ci: add a job for eval  
							
							
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								7adedecbe3 
								
							 
						 
						
							
							
								
								workflows: nix-ci: init; build flake outputs  
							
							
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								356ea17e0f 
								
							 
						 
						
							
							
								
								flake.nix: expose checks  
							
							
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								a5c088d8c6 
								
							 
						 
						
							
							
								
								flake.nix: rocm not yet supported on aarch64, so hide the output  
							
							
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Someone Serge 
								
							 
						 
						
							
							
							
							
								
							
							
								1e3900ebac 
								
							 
						 
						
							
							
								
								flake.nix: expose full scope in legacyPackages  
							
							
							
						 
						
							2023-12-31 13:14:58 -08:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Georgi Gerganov 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e39106c055 
								
							 
						 
						
							
							
								
								ggml : add ggml_vdotq_s32 alias ( #4715 )  
							
							... 
							
							
							
							ggml-ci 
							
						 
						
							2023-12-31 11:43:31 +02:00