From 267e630c14307fde01eeaaedfb039a8a2c826086 Mon Sep 17 00:00:00 2001 From: ochafik Date: Thu, 24 Oct 2024 05:38:54 +0100 Subject: [PATCH] `agent`: isolate tools container + log its outgoing HTTP & HTTPS traffic w/ docker compose + self-signed squid proxy --- examples/agent/.gitignore | 3 + examples/agent/Dockerfile.squid | 8 +++ examples/agent/Dockerfile.tools | 17 +++++ examples/agent/docker-compose.yml | 74 +++++++++++++++++++++ examples/agent/requirements.txt | 3 +- examples/agent/serve_tools.py | 55 +++++---------- examples/agent/serve_tools_inside_docker.sh | 47 ++++++------- examples/agent/squid/conf/squid.conf | 36 ++++++++++ 8 files changed, 174 insertions(+), 69 deletions(-) create mode 100644 examples/agent/.gitignore create mode 100644 examples/agent/Dockerfile.squid create mode 100644 examples/agent/Dockerfile.tools create mode 100644 examples/agent/docker-compose.yml create mode 100755 examples/agent/squid/conf/squid.conf diff --git a/examples/agent/.gitignore b/examples/agent/.gitignore new file mode 100644 index 000000000..29dcca836 --- /dev/null +++ b/examples/agent/.gitignore @@ -0,0 +1,3 @@ +squid/ssl_cert/ +squid/ssl_db/ +squid/cache/ \ No newline at end of file diff --git a/examples/agent/Dockerfile.squid b/examples/agent/Dockerfile.squid new file mode 100644 index 000000000..240d8197c --- /dev/null +++ b/examples/agent/Dockerfile.squid @@ -0,0 +1,8 @@ +FROM debian:latest + +ENV SQUID_CACHE_DIR=/var/spool/squid \ + SQUID_LOG_DIR=/var/log/squid + +RUN apt update && \ + apt install -y squid-openssl && \ + apt clean cache diff --git a/examples/agent/Dockerfile.tools b/examples/agent/Dockerfile.tools new file mode 100644 index 000000000..a26244f4b --- /dev/null +++ b/examples/agent/Dockerfile.tools @@ -0,0 +1,17 @@ +FROM python:3.12-slim + +RUN python -m pip install --upgrade pip && \ + apt clean cache + +COPY requirements.txt /root/ +WORKDIR /root +RUN pip install -r requirements.txt + +COPY ./*.py /root/ +COPY ./tools/*.py /root/tools/ + +COPY ./squid/ssl_cert/squidCA.crt /usr/local/share/ca-certificates/squidCA.crt +RUN chmod 644 /usr/local/share/ca-certificates/squidCA.crt && update-ca-certificates + +# ENTRYPOINT [ "python" ] +# CMD ["serve_tools.py"] diff --git a/examples/agent/docker-compose.yml b/examples/agent/docker-compose.yml new file mode 100644 index 000000000..df04b1fc2 --- /dev/null +++ b/examples/agent/docker-compose.yml @@ -0,0 +1,74 @@ +services: + + # Forwards tool calls to the `siloed_tools` container. + tools_endpoint: + container_name: tools_endpoint + depends_on: + - siloed_tools + image: alpine/socat:latest + networks: + - private_net + - external_net + ports: + - 8088:8088 + command: TCP-LISTEN:8088,fork,bind=tools_endpoint TCP-CONNECT:siloed_tools:8088 + + # Runs tools w/o direct internet access. + # + # All outgoing tool traffic must go through outgoing_proxy, which will log even HTTPS requests + # (the proxy's self-signed cert is added to this container's root CAs). + # + # Even if you trust your agents (which you shouldn't), please verify the kind of traffic they emit. + siloed_tools: + container_name: siloed_tools + depends_on: + - outgoing_proxy + image: local/llama.cpp:isolated-tools + build: + context: . + dockerfile: Dockerfile.tools + ports: + - 8088:8088 + networks: + - private_net + environment: + - PORT=8088 + - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY} + - http_proxy=http://outgoing_proxy:3128 + - https_proxy=http://outgoing_proxy:3128 + entrypoint: python + command: serve_tools.py + + # entrypoint: /usr/bin/bash + # command: ["-c", "pip install --upgrade gguf && apt update && apt install -y curl && curl https://ochafik.com && pip install gguf"] + + # Logs all outgoing traffic, and caches pip & apt packages. + outgoing_proxy: + container_name: outgoing_proxy + image: local/llama.cpp:squid + build: + context: . + dockerfile: Dockerfile.squid + volumes: + - ./squid/conf/squid.conf:/etc/squid/squid.conf:ro + - ./squid/cache:/var/spool/squid + - ./squid/logs:/var/log/squid + - ./squid/ssl_cert:/etc/squid/ssl_cert:ro + - ./squid/ssl_db:/var/spool/squid/ssl_db + extra_hosts: + - host.docker.internal:host-gateway + networks: + - private_net + - external_net + ports: + - "3128:3128" + restart: unless-stopped + entrypoint: /usr/bin/bash + command: -c "squid -N -z && ( test -d /var/spool/squid/ssl_db/db || /usr/lib/squid/security_file_certgen -c -s /var/spool/squid/ssl_db/db -M 20MB ) && /usr/sbin/squid -N -d 1 -s" + +networks: + private_net: + driver: bridge + internal: true + external_net: + driver: bridge diff --git a/examples/agent/requirements.txt b/examples/agent/requirements.txt index a24d50fb1..a1aae803c 100644 --- a/examples/agent/requirements.txt +++ b/examples/agent/requirements.txt @@ -1,6 +1,5 @@ aiohttp fastapi ipython -pydantic -typer +pyppeteer uvicorn diff --git a/examples/agent/serve_tools.py b/examples/agent/serve_tools.py index 197944073..70c4b0225 100644 --- a/examples/agent/serve_tools.py +++ b/examples/agent/serve_tools.py @@ -1,17 +1,3 @@ -# /// script -# requires-python = ">=3.11" -# dependencies = [ -# "aiohttp", -# "beautifulsoup4", -# "fastapi", -# "html2text", -# "ipython", -# "pyppeteer", -# "requests", -# "typer", -# "uvicorn", -# ] -# /// ''' Runs simple tools as a FastAPI server. @@ -28,12 +14,9 @@ ''' import logging import re -from typing import Optional import fastapi import os import sys -import typer -import uvicorn sys.path.insert(0, os.path.dirname(__file__)) @@ -42,6 +25,12 @@ from tools.search import brave_search from tools.python import python, python_tools +verbose = os.environ.get('VERBOSE', '0') == '1' +include = os.environ.get('INCLUDE_TOOLS') +exclude = os.environ.get('EXCLUDE_TOOLS') + +logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) + ALL_TOOLS = { fn.__name__: fn for fn in [ @@ -51,26 +40,12 @@ ALL_TOOLS = { ] } - -def main(host: str = '0.0.0.0', port: int = 8000, verbose: bool = False, include: Optional[str] = None, exclude: Optional[str] = None): - logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) - - def accept_tool(name): - if include and not re.match(include, name): - return False - if exclude and re.match(exclude, name): - return False - return True - - app = fastapi.FastAPI() - for name, fn in ALL_TOOLS.items(): - if accept_tool(name): - app.post(f'/{name}')(fn) - if name != 'python': - python_tools[name] = fn - - uvicorn.run(app, host=host, port=port) - - -if __name__ == '__main__': - typer.run(main) +app = fastapi.FastAPI() +for name, fn in ALL_TOOLS.items(): + if include and not re.match(include, fn.__name__): + continue + if exclude and re.match(exclude, fn.__name__): + continue + app.post(f'/{name}')(fn) + if name != 'python': + python_tools[name] = fn diff --git a/examples/agent/serve_tools_inside_docker.sh b/examples/agent/serve_tools_inside_docker.sh index 5fca28edc..8cdf81e76 100755 --- a/examples/agent/serve_tools_inside_docker.sh +++ b/examples/agent/serve_tools_inside_docker.sh @@ -1,37 +1,30 @@ #!/bin/bash # -# Serves tools inside a docker container +# Serves tools inside a docker container. +# +# All outgoing HTTP *and* HTTPS traffic will be logged to `examples/agent/squid/logs/access.log`. +# Direct traffic to the host machine will be ~blocked, but clever AIs may find a way around it: +# make sure to have proper firewall rules in place. +# +# Take a look at `examples/agent/squid/conf/squid.conf` if you want tools to access your local llama-server(s). # # Usage: -# examples/agent/serve_tools_inside_docker.sh [--verbose] [--include="tool1|tool2|..."] [--exclude="tool1|tool2|..."] +# examples/agent/serve_tools_inside_docker.sh # set -euo pipefail -PORT=${PORT:-8088} -BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} -DATA_DIR=${DATA_DIR:-$HOME/.llama.cpp/agent/tools/data} -UV_CACHE_DIR=${UV_CACHE_DIR:-$HOME/.llama.cpp/agent/tools/uv_cache} +cd examples/agent -mkdir -p "$DATA_DIR" -mkdir -p "$UV_CACHE_DIR" +mkdir -p squid/{cache,logs,ssl_cert,ssl_db} +rm -f squid/logs/{access,cache}.log -args=( --port $PORT "$@" ) -echo "# Warming up the uv cache" -docker run \ - -w /src \ - -v $PWD/examples/agent:/src \ - -v "$UV_CACHE_DIR":/root/.cache/uv:rw \ - --rm -it ghcr.io/astral-sh/uv:python3.12-alpine \ - uv run serve_tools.py --help +# Generate a self-signed certificate for the outgoing proxy. +# Tools can only reach out to HTTPS endpoints through that proxy, which they are told to trust blindly. +openssl req -new -newkey rsa:4096 -days 3650 -nodes -x509 \ + -keyout squid/ssl_cert/squidCA.pem \ + -out squid/ssl_cert/squidCA.pem \ + -subj "/C=US/ST=State/L=City/O=Organization/OU=Org Unit/CN=outgoing_proxy" -echo "# Running inside docker: serve_tools.py ${args[*]}" -docker run \ - -p $PORT:$PORT \ - -w /src \ - -v $PWD/examples/agent:/src \ - -v "$UV_CACHE_DIR":/root/.cache/uv \ - -v "$DATA_DIR":/data:rw \ - --env "MEMORY_SQLITE_DB=/data/memory.db" \ - --env "BRAVE_SEARCH_API_KEY=$BRAVE_SEARCH_API_KEY" \ - --rm -it ghcr.io/astral-sh/uv:python3.12-alpine \ - uv run serve_tools.py "${args[@]}" +openssl x509 -outform PEM -in squid/ssl_cert/squidCA.pem -out squid/ssl_cert/squidCA.crt + +docker compose up --detach --build diff --git a/examples/agent/squid/conf/squid.conf b/examples/agent/squid/conf/squid.conf new file mode 100755 index 000000000..ce649e10a --- /dev/null +++ b/examples/agent/squid/conf/squid.conf @@ -0,0 +1,36 @@ +# Squid Proxy w/ logging of both HTTP *and* HTTPS requests. +# We setup SSL Bump so http_proxy & https_proxy environment variables can be set to +# `http://:3128` on any clients that trusts the CA certificate. + +http_port 3128 ssl-bump cert=/etc/squid/ssl_cert/squidCA.pem tls-cafile=/etc/squid/ssl_cert/squidCA.crt + +sslcrtd_program /usr/lib/squid/security_file_certgen -s /var/spool/squid/ssl_db/db -M 20MB +sslcrtd_children 5 +acl step1 at_step SslBump1 +ssl_bump peek step1 +ssl_bump bump all + +# Forbid access to the host. +# If you want to allow tools to call llama-server on the host (e.g. embeddings, or recursive thoughts), +# you can comment out the next two lines. +acl blocked_sites dstdomain host.docker.internal host-gateway +http_access deny blocked_sites + +# Allow all other traffic (you may want to restrict this in a production environment) +http_access allow all + +# Cache Python packages +refresh_pattern -i ($|\.)(files\.pythonhosted\.org|pypi\.org)/.*?\.(whl|zip|tar\.gz)$ 10080 90% 43200 reload-into-ims + +# Cache Debian packages +refresh_pattern \.debian\.org/.*?\.(deb|udeb|tar\.(gz|xz|bz2)$ 129600 100% 129600 + +# Configure cache +cache_dir ufs /var/spool/squid 10000 16 256 +cache_mem 200 MB +maximum_object_size 1024 MB + +# Configure logs +cache_log /var/log/squid/cache.log +access_log /var/log/squid/access.log squid +cache_store_log none