add handler

This commit is contained in:
Robert Washbourne 2023-11-24 00:41:33 -05:00
parent 8ddd5cb916
commit 4468d96aec
2 changed files with 36 additions and 15 deletions

View file

@ -2,24 +2,45 @@ import subprocess
import runpod
import os
import time
import aiohttp
import json
headers = {
'Accept': 'text/event-stream',
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'Origin': 'http://127.0.0.1:8080',
'Referer': 'http://127.0.0.1:8080/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
}
llama_cmd = os.environ.get('LLAMA_CMD', "/server --host 0.0.0.0 --threads 8 -ngl 999 -np 8 -cb -m model.gguf -c 16384")
subprocess.Popen(llama_cmd.split(' '))
sub = subprocess.Popen(llama_cmd.split(' '))
## load your model(s) into vram here
def handler(event):
print(event)
time_slept = 0
while time_slept < sleep_time:
print("working, I promise")
time_slept += 1
time.sleep(1)
# do the things
return "Hello World"
url = "http://0.0.0.0:8080/completion"
async def handler(event):
print(event)
prompt = event["input"]["prompt"]
async with aiohttp.ClientSession() as session:
async with session.post(url, data=json.dumps(json_data = {
'stream': True,
'n_predict': 2048,
'temperature': 0.2,
'stop': [
'</s>',
'Llama:',
'User:',
],
'prompt': prompt,
}), headers=headers) as response:
async for line in response.content:
yield line
runpod.serverless.start({
"handler": handler
"handler": handler,
"return_aggregate_stream": True # Optional, results available via /run
})

View file

@ -38,7 +38,7 @@ RUN wget $MODEL_URL -O /model.gguf
WORKDIR /install
RUN apt-get install -y python3 python3-pip
RUN pip install --prefix /install runpod
RUN pip install --prefix /install runpod aiohttp
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime