agent: isolate tools container + log its outgoing HTTP & HTTPS traffic w/ docker compose + self-signed squid proxy

This commit is contained in:
ochafik 2024-10-24 05:38:54 +01:00
parent 414f6f1b30
commit 267e630c14
8 changed files with 174 additions and 69 deletions

3
examples/agent/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
squid/ssl_cert/
squid/ssl_db/
squid/cache/

View file

@ -0,0 +1,8 @@
FROM debian:latest
ENV SQUID_CACHE_DIR=/var/spool/squid \
SQUID_LOG_DIR=/var/log/squid
RUN apt update && \
apt install -y squid-openssl && \
apt clean cache

View file

@ -0,0 +1,17 @@
FROM python:3.12-slim
RUN python -m pip install --upgrade pip && \
apt clean cache
COPY requirements.txt /root/
WORKDIR /root
RUN pip install -r requirements.txt
COPY ./*.py /root/
COPY ./tools/*.py /root/tools/
COPY ./squid/ssl_cert/squidCA.crt /usr/local/share/ca-certificates/squidCA.crt
RUN chmod 644 /usr/local/share/ca-certificates/squidCA.crt && update-ca-certificates
# ENTRYPOINT [ "python" ]
# CMD ["serve_tools.py"]

View file

@ -0,0 +1,74 @@
services:
# Forwards tool calls to the `siloed_tools` container.
tools_endpoint:
container_name: tools_endpoint
depends_on:
- siloed_tools
image: alpine/socat:latest
networks:
- private_net
- external_net
ports:
- 8088:8088
command: TCP-LISTEN:8088,fork,bind=tools_endpoint TCP-CONNECT:siloed_tools:8088
# Runs tools w/o direct internet access.
#
# All outgoing tool traffic must go through outgoing_proxy, which will log even HTTPS requests
# (the proxy's self-signed cert is added to this container's root CAs).
#
# Even if you trust your agents (which you shouldn't), please verify the kind of traffic they emit.
siloed_tools:
container_name: siloed_tools
depends_on:
- outgoing_proxy
image: local/llama.cpp:isolated-tools
build:
context: .
dockerfile: Dockerfile.tools
ports:
- 8088:8088
networks:
- private_net
environment:
- PORT=8088
- BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY}
- http_proxy=http://outgoing_proxy:3128
- https_proxy=http://outgoing_proxy:3128
entrypoint: python
command: serve_tools.py
# entrypoint: /usr/bin/bash
# command: ["-c", "pip install --upgrade gguf && apt update && apt install -y curl && curl https://ochafik.com && pip install gguf"]
# Logs all outgoing traffic, and caches pip & apt packages.
outgoing_proxy:
container_name: outgoing_proxy
image: local/llama.cpp:squid
build:
context: .
dockerfile: Dockerfile.squid
volumes:
- ./squid/conf/squid.conf:/etc/squid/squid.conf:ro
- ./squid/cache:/var/spool/squid
- ./squid/logs:/var/log/squid
- ./squid/ssl_cert:/etc/squid/ssl_cert:ro
- ./squid/ssl_db:/var/spool/squid/ssl_db
extra_hosts:
- host.docker.internal:host-gateway
networks:
- private_net
- external_net
ports:
- "3128:3128"
restart: unless-stopped
entrypoint: /usr/bin/bash
command: -c "squid -N -z && ( test -d /var/spool/squid/ssl_db/db || /usr/lib/squid/security_file_certgen -c -s /var/spool/squid/ssl_db/db -M 20MB ) && /usr/sbin/squid -N -d 1 -s"
networks:
private_net:
driver: bridge
internal: true
external_net:
driver: bridge

View file

@ -1,6 +1,5 @@
aiohttp aiohttp
fastapi fastapi
ipython ipython
pydantic pyppeteer
typer
uvicorn uvicorn

View file

@ -1,17 +1,3 @@
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "aiohttp",
# "beautifulsoup4",
# "fastapi",
# "html2text",
# "ipython",
# "pyppeteer",
# "requests",
# "typer",
# "uvicorn",
# ]
# ///
''' '''
Runs simple tools as a FastAPI server. Runs simple tools as a FastAPI server.
@ -28,12 +14,9 @@
''' '''
import logging import logging
import re import re
from typing import Optional
import fastapi import fastapi
import os import os
import sys import sys
import typer
import uvicorn
sys.path.insert(0, os.path.dirname(__file__)) sys.path.insert(0, os.path.dirname(__file__))
@ -42,6 +25,12 @@ from tools.search import brave_search
from tools.python import python, python_tools from tools.python import python, python_tools
verbose = os.environ.get('VERBOSE', '0') == '1'
include = os.environ.get('INCLUDE_TOOLS')
exclude = os.environ.get('EXCLUDE_TOOLS')
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
ALL_TOOLS = { ALL_TOOLS = {
fn.__name__: fn fn.__name__: fn
for fn in [ for fn in [
@ -51,26 +40,12 @@ ALL_TOOLS = {
] ]
} }
app = fastapi.FastAPI()
def main(host: str = '0.0.0.0', port: int = 8000, verbose: bool = False, include: Optional[str] = None, exclude: Optional[str] = None): for name, fn in ALL_TOOLS.items():
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) if include and not re.match(include, fn.__name__):
continue
def accept_tool(name): if exclude and re.match(exclude, fn.__name__):
if include and not re.match(include, name): continue
return False app.post(f'/{name}')(fn)
if exclude and re.match(exclude, name): if name != 'python':
return False python_tools[name] = fn
return True
app = fastapi.FastAPI()
for name, fn in ALL_TOOLS.items():
if accept_tool(name):
app.post(f'/{name}')(fn)
if name != 'python':
python_tools[name] = fn
uvicorn.run(app, host=host, port=port)
if __name__ == '__main__':
typer.run(main)

View file

@ -1,37 +1,30 @@
#!/bin/bash #!/bin/bash
# #
# Serves tools inside a docker container # Serves tools inside a docker container.
#
# All outgoing HTTP *and* HTTPS traffic will be logged to `examples/agent/squid/logs/access.log`.
# Direct traffic to the host machine will be ~blocked, but clever AIs may find a way around it:
# make sure to have proper firewall rules in place.
#
# Take a look at `examples/agent/squid/conf/squid.conf` if you want tools to access your local llama-server(s).
# #
# Usage: # Usage:
# examples/agent/serve_tools_inside_docker.sh [--verbose] [--include="tool1|tool2|..."] [--exclude="tool1|tool2|..."] # examples/agent/serve_tools_inside_docker.sh
# #
set -euo pipefail set -euo pipefail
PORT=${PORT:-8088} cd examples/agent
BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
DATA_DIR=${DATA_DIR:-$HOME/.llama.cpp/agent/tools/data}
UV_CACHE_DIR=${UV_CACHE_DIR:-$HOME/.llama.cpp/agent/tools/uv_cache}
mkdir -p "$DATA_DIR" mkdir -p squid/{cache,logs,ssl_cert,ssl_db}
mkdir -p "$UV_CACHE_DIR" rm -f squid/logs/{access,cache}.log
args=( --port $PORT "$@" ) # Generate a self-signed certificate for the outgoing proxy.
echo "# Warming up the uv cache" # Tools can only reach out to HTTPS endpoints through that proxy, which they are told to trust blindly.
docker run \ openssl req -new -newkey rsa:4096 -days 3650 -nodes -x509 \
-w /src \ -keyout squid/ssl_cert/squidCA.pem \
-v $PWD/examples/agent:/src \ -out squid/ssl_cert/squidCA.pem \
-v "$UV_CACHE_DIR":/root/.cache/uv:rw \ -subj "/C=US/ST=State/L=City/O=Organization/OU=Org Unit/CN=outgoing_proxy"
--rm -it ghcr.io/astral-sh/uv:python3.12-alpine \
uv run serve_tools.py --help
echo "# Running inside docker: serve_tools.py ${args[*]}" openssl x509 -outform PEM -in squid/ssl_cert/squidCA.pem -out squid/ssl_cert/squidCA.crt
docker run \
-p $PORT:$PORT \ docker compose up --detach --build
-w /src \
-v $PWD/examples/agent:/src \
-v "$UV_CACHE_DIR":/root/.cache/uv \
-v "$DATA_DIR":/data:rw \
--env "MEMORY_SQLITE_DB=/data/memory.db" \
--env "BRAVE_SEARCH_API_KEY=$BRAVE_SEARCH_API_KEY" \
--rm -it ghcr.io/astral-sh/uv:python3.12-alpine \
uv run serve_tools.py "${args[@]}"

View file

@ -0,0 +1,36 @@
# Squid Proxy w/ logging of both HTTP *and* HTTPS requests.
# We setup SSL Bump so http_proxy & https_proxy environment variables can be set to
# `http://<this_host>:3128` on any clients that trusts the CA certificate.
http_port 3128 ssl-bump cert=/etc/squid/ssl_cert/squidCA.pem tls-cafile=/etc/squid/ssl_cert/squidCA.crt
sslcrtd_program /usr/lib/squid/security_file_certgen -s /var/spool/squid/ssl_db/db -M 20MB
sslcrtd_children 5
acl step1 at_step SslBump1
ssl_bump peek step1
ssl_bump bump all
# Forbid access to the host.
# If you want to allow tools to call llama-server on the host (e.g. embeddings, or recursive thoughts),
# you can comment out the next two lines.
acl blocked_sites dstdomain host.docker.internal host-gateway
http_access deny blocked_sites
# Allow all other traffic (you may want to restrict this in a production environment)
http_access allow all
# Cache Python packages
refresh_pattern -i ($|\.)(files\.pythonhosted\.org|pypi\.org)/.*?\.(whl|zip|tar\.gz)$ 10080 90% 43200 reload-into-ims
# Cache Debian packages
refresh_pattern \.debian\.org/.*?\.(deb|udeb|tar\.(gz|xz|bz2)$ 129600 100% 129600
# Configure cache
cache_dir ufs /var/spool/squid 10000 16 256
cache_mem 200 MB
maximum_object_size 1024 MB
# Configure logs
cache_log /var/log/squid/cache.log
access_log /var/log/squid/access.log squid
cache_store_log none