From af02c947989fffab4f87494533bb5b07e2919bc7 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Fri, 17 Mar 2023 04:49:41 -0700 Subject: [PATCH] add easy Windows install instructions to the readme Also fix a typo of LLaMA's casing in the chat.cpp file, and add cmake's generated files to the gitignore --- .gitignore | 13 +++++++++++++ README.md | 23 ++++++++++++++++++++++- chat.cpp | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 5eb1ff1b8..699f76c4a 100644 --- a/.gitignore +++ b/.gitignore @@ -15,9 +15,22 @@ build-sanitize-addr/ build-sanitize-thread/ models/* +*.bin /main /quantize arm_neon.h compile_commands.json + +# Windows CMake files +*.vcxproj +*.filters +*.cmake +*.sln +x64/ +Debug/ +Release/ +CMakeFiles/ +CMakeCache.txt +*.dir/ diff --git a/README.md b/README.md index 14b294f1a..55786bb9b 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This combines the [LLaMA foundation model](https://github.com/facebookresearch/l ## Get started -``` +```sh git clone https://github.com/antimatter15/alpaca.cpp cd alpaca.cpp @@ -34,6 +34,27 @@ Save the `ggml-alpaca-7b-q4.bin` file in the same directory as your `./chat` exe The weights are based on the published fine-tunes from `alpaca-lora`, converted back into a pytorch checkpoint with a [modified script](https://github.com/tloen/alpaca-lora/pull/19) and then quantized with llama.cpp the regular way. +## Windows Setup + +- Download and install CMake: +- Download and install `git`. If you've never used git before, consider a GUI client like +- Clone this repo using your git client of choice (for GitHub Desktop, go to File -> Clone repository -> From URL and paste `https://github.com/antimatter15/alpaca.cpp` in as the URL) +- Open a Windows Terminal inside the folder you cloned the repository to +- Run the following commands one by one: + +```ps1 +cmake . +cmake --build . --config Release +``` + +- Download the weights via any of the links in "Get started" above, and save the file as `ggml-alpaca-7b-q4.bin` in the main Alpaca directory. +- In the terminal window, run this command: +```ps1 +.\Release\chat.exe +``` +- (You can add other launch options like `--n 8` as preferred onto the same line) +- You can now type to the AI in the terminal and it will reply. Enjoy! + ## Credit This combines [Facebook's LLaMA](https://github.com/facebookresearch/llama), [Stanford Alpaca](https://crfm.stanford.edu/2023/03/13/alpaca.html), [alpaca-lora](https://github.com/tloen/alpaca-lora) and [corresponding weights](https://huggingface.co/tloen/alpaca-lora-7b/tree/main) by Eric Wang (which uses [Jason Phang's implementation of LLaMA](https://github.com/huggingface/transformers/pull/21955) on top of Hugging Face Transformers), and [llama.cpp](https://github.com/ggerganov/llama.cpp) by Georgi Gerganov. The chat implementation is based on Matvey Soloviev's [Interactive Mode](https://github.com/ggerganov/llama.cpp/pull/61) for llama.cpp. Inspired by [Simon Willison's](https://til.simonwillison.net/llms/llama-7b-m2) getting started guide for LLaMA. diff --git a/chat.cpp b/chat.cpp index 885d1f69a..5acb2bc95 100644 --- a/chat.cpp +++ b/chat.cpp @@ -915,7 +915,7 @@ int main(int argc, char ** argv) { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) " - Press Ctrl+C to interject at any time.\n" #endif - " - Press Return to return control to LLaMa.\n" + " - Press Return to return control to LLaMA.\n" " - If you want to submit another line, end your input in '\\'.\n"); }