Added readme for server example
This commit is contained in:
parent
f01c6cbc7e
commit
197bb66339
2 changed files with 101 additions and 1 deletions
100
examples/server/README.md
Normal file
100
examples/server/README.md
Normal file
|
@ -0,0 +1,100 @@
|
|||
## llama.cpp/example/server
|
||||
|
||||
This example allow you to have a llama.cpp http server to interact from a web page or consume the API.
|
||||
|
||||
It doesn't require external dependencies.
|
||||
|
||||
## Limitations:
|
||||
* Just tested in Windows and Linux
|
||||
* Only CMake build.
|
||||
* Only one context at a time.
|
||||
* Just vicuna support for interaction.
|
||||
|
||||
## Endpoints
|
||||
|
||||
You can interact with this API Endpoints.
|
||||
|
||||
`POST hostname:port/setting-context`
|
||||
|
||||
`POST hostname:port/set-message`
|
||||
|
||||
`GET hostname:port/completion`
|
||||
|
||||
## Usage
|
||||
### Get Code
|
||||
```bash
|
||||
git clone https://github.com/FSSRepo/llama.cpp.git
|
||||
cd llama.cpp
|
||||
```
|
||||
### Build
|
||||
```bash
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cmake --build . --config Release
|
||||
```
|
||||
### Run
|
||||
Model tested: [Vicuna](https://huggingface.co/chharlesonfire/ggml-vicuna-7b-4bit/blob/main/ggml-vicuna-7b-q4_0.bin)
|
||||
```bash
|
||||
server -m ggml-vicuna-7b-q4_0.bin --keep -1 --ctx_size 2048
|
||||
```
|
||||
|
||||
### Node JS Test the endpoints
|
||||
|
||||
You need to have [Node.js](https://nodejs.org/en) installed.
|
||||
|
||||
```bash
|
||||
mkdir llama-client
|
||||
cd llama-client
|
||||
npm init
|
||||
npm install axios
|
||||
```
|
||||
|
||||
Create a index.js file and put inside this:
|
||||
```javascript
|
||||
const axios = require('axios');
|
||||
|
||||
async function Test() {
|
||||
let result = await axios.post("http://127.0.0.1:8080/setting-context", {
|
||||
context: [
|
||||
{ role: "system", content: "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." },
|
||||
{ role: "user", content: "Hello, Assistant." },
|
||||
{ role: "assistant", content: "Hello. How may I help you today?" },
|
||||
{ role: "user", content: "Please tell me the largest city in Europe." },
|
||||
{ role: "assistant", content: "Sure. The largest city in Europe is Moscow, the capital of Russia." }
|
||||
],
|
||||
batch_size: 64,
|
||||
temperature: 0.2,
|
||||
top_k: 40,
|
||||
top_p: 0.9,
|
||||
n_predict: 2048,
|
||||
threads: 5
|
||||
});
|
||||
result = await axios.post("http://127.0.0.1:8080/set-message", {
|
||||
message: ' What is linux?'
|
||||
});
|
||||
if(result.data.can_inference) {
|
||||
result = await axios.get("http://127.0.0.1:8080/completion?stream=true", { responseType: 'stream' });
|
||||
result.data.on('data', (data) => {
|
||||
// token by token completion
|
||||
let dat = JSON.parse(data.toString());
|
||||
process.stdout.write(dat.content);
|
||||
});
|
||||
|
||||
/*
|
||||
Wait the entire completion (takes long time for response)
|
||||
|
||||
result = await axios.get("http://127.0.0.1:8080/completion");
|
||||
console.log(result.data.content);
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
Test();
|
||||
```
|
||||
|
||||
And run it:
|
||||
|
||||
```bash
|
||||
node .
|
||||
```
|
|
@ -730,7 +730,7 @@ int main(int argc, char ** argv) {
|
|||
{ "content", completion.c_str() },
|
||||
{ "total_tokens", llama->tokens_completion }
|
||||
};
|
||||
printf("\nCompletion finished: %i tokens predicted.\n", llama->tokens_completion);
|
||||
printf("\rCompletion finished: %i tokens predicted.\n", llama->tokens_completion);
|
||||
res.set_content(data.dump(), "application/json");
|
||||
}
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue