server : better security control for public deployments (#9776)
* server : more explicit endpoint access settings * protect /props endpoint * fix tests * update server docs * fix typo * fix tests
This commit is contained in:
parent
fa42aa6d89
commit
458367a906
8 changed files with 126 additions and 116 deletions
|
@ -5,7 +5,7 @@ Feature: Security
|
|||
Background: Server startup with an api key defined
|
||||
Given a server listening on localhost:8080
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a server api key llama.cpp
|
||||
And a server api key THIS_IS_THE_KEY
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
|
||||
|
@ -16,11 +16,11 @@ Feature: Security
|
|||
And a completion request with <api_error> api error
|
||||
|
||||
Examples: Prompts
|
||||
| api_key | api_error |
|
||||
| llama.cpp | no |
|
||||
| llama.cpp | no |
|
||||
| hackeme | raised |
|
||||
| | raised |
|
||||
| api_key | api_error |
|
||||
| THIS_IS_THE_KEY | no |
|
||||
| THIS_IS_THE_KEY | no |
|
||||
| hackeme | raised |
|
||||
| | raised |
|
||||
|
||||
Scenario Outline: OAI Compatibility
|
||||
Given a system prompt test
|
||||
|
@ -32,10 +32,10 @@ Feature: Security
|
|||
Given an OAI compatible chat completions request with <api_error> api error
|
||||
|
||||
Examples: Prompts
|
||||
| api_key | api_error |
|
||||
| llama.cpp | no |
|
||||
| llama.cpp | no |
|
||||
| hackme | raised |
|
||||
| api_key | api_error |
|
||||
| THIS_IS_THE_KEY | no |
|
||||
| THIS_IS_THE_KEY | no |
|
||||
| hackme | raised |
|
||||
|
||||
Scenario Outline: OAI Compatibility (invalid response formats)
|
||||
Given a system prompt test
|
||||
|
@ -55,7 +55,7 @@ Feature: Security
|
|||
|
||||
|
||||
Scenario Outline: CORS Options
|
||||
Given a user api key llama.cpp
|
||||
Given a user api key THIS_IS_THE_KEY
|
||||
When an OPTIONS request is sent from <origin>
|
||||
Then CORS header <cors_header> is set to <cors_header_value>
|
||||
|
||||
|
|
|
@ -1299,7 +1299,8 @@ async def wait_for_slots_status(context,
|
|||
|
||||
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
|
||||
while True:
|
||||
async with await session.get(f'{base_url}/slots', params=params) as slots_response:
|
||||
headers = {'Authorization': f'Bearer {context.server_api_key}'}
|
||||
async with await session.get(f'{base_url}/slots', params=params, headers=headers) as slots_response:
|
||||
status_code = slots_response.status
|
||||
slots = await slots_response.json()
|
||||
if context.debug:
|
||||
|
@ -1387,6 +1388,7 @@ def start_server_background(context):
|
|||
context.server_path = os.environ['LLAMA_SERVER_BIN_PATH']
|
||||
server_listen_addr = context.server_fqdn
|
||||
server_args = [
|
||||
'--slots', # requires to get slot status via /slots endpoint
|
||||
'--host', server_listen_addr,
|
||||
'--port', context.server_port,
|
||||
]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue