add handler
This commit is contained in:
parent
8ddd5cb916
commit
4468d96aec
2 changed files with 36 additions and 15 deletions
|
@ -2,24 +2,45 @@ import subprocess
|
||||||
import runpod
|
import runpod
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import aiohttp
|
||||||
|
import json
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Accept': 'text/event-stream',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Origin': 'http://127.0.0.1:8080',
|
||||||
|
'Referer': 'http://127.0.0.1:8080/',
|
||||||
|
'Sec-Fetch-Dest': 'empty',
|
||||||
|
'Sec-Fetch-Mode': 'cors',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
}
|
||||||
|
|
||||||
llama_cmd = os.environ.get('LLAMA_CMD', "/server --host 0.0.0.0 --threads 8 -ngl 999 -np 8 -cb -m model.gguf -c 16384")
|
llama_cmd = os.environ.get('LLAMA_CMD', "/server --host 0.0.0.0 --threads 8 -ngl 999 -np 8 -cb -m model.gguf -c 16384")
|
||||||
subprocess.Popen(llama_cmd.split(' '))
|
sub = subprocess.Popen(llama_cmd.split(' '))
|
||||||
|
|
||||||
## load your model(s) into vram here
|
## load your model(s) into vram here
|
||||||
|
|
||||||
def handler(event):
|
url = "http://0.0.0.0:8080/completion"
|
||||||
print(event)
|
async def handler(event):
|
||||||
time_slept = 0
|
print(event)
|
||||||
while time_slept < sleep_time:
|
prompt = event["input"]["prompt"]
|
||||||
print("working, I promise")
|
async with aiohttp.ClientSession() as session:
|
||||||
time_slept += 1
|
async with session.post(url, data=json.dumps(json_data = {
|
||||||
time.sleep(1)
|
'stream': True,
|
||||||
# do the things
|
'n_predict': 2048,
|
||||||
|
'temperature': 0.2,
|
||||||
return "Hello World"
|
'stop': [
|
||||||
|
'</s>',
|
||||||
|
'Llama:',
|
||||||
|
'User:',
|
||||||
|
],
|
||||||
|
'prompt': prompt,
|
||||||
|
}), headers=headers) as response:
|
||||||
|
async for line in response.content:
|
||||||
|
yield line
|
||||||
|
|
||||||
runpod.serverless.start({
|
runpod.serverless.start({
|
||||||
"handler": handler
|
"handler": handler,
|
||||||
|
"return_aggregate_stream": True # Optional, results available via /run
|
||||||
})
|
})
|
||||||
|
|
|
@ -38,7 +38,7 @@ RUN wget $MODEL_URL -O /model.gguf
|
||||||
|
|
||||||
WORKDIR /install
|
WORKDIR /install
|
||||||
RUN apt-get install -y python3 python3-pip
|
RUN apt-get install -y python3 python3-pip
|
||||||
RUN pip install --prefix /install runpod
|
RUN pip install --prefix /install runpod aiohttp
|
||||||
|
|
||||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue