From c059aecd37f5122f812f26c785c8a0fb961e28fb Mon Sep 17 00:00:00 2001 From: ochafik Date: Sat, 9 Nov 2024 18:25:34 +0000 Subject: [PATCH] `agent`: memorize, search_memory (sqlite-vec + sqlite-lembed), fetch + docling (pdf -> markdown), sparql for dbpedia and wikidata --- examples/agent/Dockerfile.tools | 6 +- examples/agent/README.md | 10 +- examples/agent/docker-compose.yml | 15 +- examples/agent/requirements.txt | 10 +- examples/agent/serve_tools_inside_docker.sh | 2 +- examples/agent/tools/__init__.py | 30 +-- examples/agent/tools/fetch.py | 50 +---- examples/agent/tools/memory.py | 198 ++++++++++++++++++++ examples/agent/tools/sparql.py | 28 +++ 9 files changed, 282 insertions(+), 67 deletions(-) create mode 100644 examples/agent/tools/memory.py create mode 100644 examples/agent/tools/sparql.py diff --git a/examples/agent/Dockerfile.tools b/examples/agent/Dockerfile.tools index 641f77a72..826cd4e95 100644 --- a/examples/agent/Dockerfile.tools +++ b/examples/agent/Dockerfile.tools @@ -1,15 +1,19 @@ FROM python:3.12-slim RUN python -m pip install --upgrade pip && \ + apt install -y wget && \ apt clean cache COPY requirements.txt /root/ COPY tools /root/tools WORKDIR /root -RUN pip install -r requirements.txt +RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu && \ + pip install -r requirements.txt COPY ./squid/ssl_cert/squidCA.crt /usr/local/share/ca-certificates/squidCA.crt RUN chmod 644 /usr/local/share/ca-certificates/squidCA.crt && update-ca-certificates +RUN wget https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.Q4_K_M.gguf -O /root/nomic-embed-text-v1.5.Q4_K_M.gguf + ENTRYPOINT [ "uvicorn" ] CMD ["tools:app", "--host", "0.0.0.0", "--port", "8088"] diff --git a/examples/agent/README.md b/examples/agent/README.md index 627f15634..aee17fa2f 100644 --- a/examples/agent/README.md +++ b/examples/agent/README.md @@ -22,22 +22,22 @@ Here's how to run an agent w/ local tool call: # (otherwise they'd use the generic tool call support, which may be less efficient # and consume more tokens) - ./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \ + ./llama-server --jinja -fa --verbose \ -hfr bartowski/Qwen2.5-7B-Instruct-GGUF -hff Qwen2.5-7B-Instruct-Q4_K_M.gguf - ./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \ + ./llama-server --jinja -fa --verbose \ -hfr NousResearch/Hermes-3-Llama-3.1-8B-GGUF -hff Hermes-3-Llama-3.1-8B.Q4_K_M.gguf \ --chat-template-file <( python scripts/get_hf_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use ) - ./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \ + ./llama-server --jinja -fa --verbose \ -hfr meetkai/functionary-small-v3.2-GGUF -hff functionary-small-v3.2.Q8_0.gguf \ --chat-template-file <( python scripts/get_hf_chat_template.py meetkai/functionary-medium-v3.2 ) - ./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \ + ./llama-server --jinja -fa --verbose \ -hfr lmstudio-community/Llama-3.2-3B-Instruct-GGUF -hff Llama-3.2-3B-Instruct-Q6_K.gguf \ --chat-template-file <( python scripts/get_hf_chat_template.py meta-llama/Llama-3.2-3B-Instruct ) - ./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \ + ./llama-server --jinja -fa --verbose \ -hfr bartowski/Mistral-Nemo-Instruct-2407-GGUF -hff Mistral-Nemo-Instruct-2407-Q8_0.gguf \ --chat-template-file <( python scripts/get_hf_chat_template.py mistralai/Mistral-Nemo-Instruct-2407 ) diff --git a/examples/agent/docker-compose.yml b/examples/agent/docker-compose.yml index fbbe005da..440d13ecc 100644 --- a/examples/agent/docker-compose.yml +++ b/examples/agent/docker-compose.yml @@ -13,7 +13,7 @@ services: - 8088:8088 command: TCP-LISTEN:8088,fork,bind=tools_endpoint TCP-CONNECT:siloed_tools:8088 - # Runs tools w/o direct internet access. + # Runs tools w/o **direct* internet access. # # All outgoing tool traffic must go through outgoing_proxy, which will log even HTTPS requests # (the proxy's self-signed cert is added to this container's root CAs). @@ -22,19 +22,30 @@ services: siloed_tools: container_name: siloed_tools depends_on: + # - embeddings_server - outgoing_proxy image: local/llama.cpp:isolated-tools + # sqlite-vec isn't compiled for linux/arm64 so to virtualize on Mac we force this to be x86_64 + platform: linux/amd64 build: context: . dockerfile: Dockerfile.tools ports: - 8088:8088 + volumes: + - ./data:/data:rw networks: - private_net environment: - - VERBOSE=1 - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY} + - EMBEDDINGS_DIMS=768 + - EMBEDDINGS_MODEL_FILE=/models/nomic-embed-text-v1.5.Q4_K_M.gguf + # - EMBEDDINGS_ENDPOINT=http://embeddings_server:8081/v1/embeddings + - EXCLUDE_TOOLS=${EXCLUDE_TOOLS:-} + - INCLUDE_TOOLS=${INCLUDE_TOOLS:-} + - MEMORY_SQLITE_DB=/data/memory.db - REQUESTS_CA_BUNDLE=/usr/local/share/ca-certificates/squidCA.crt + - VERBOSE=1 - http_proxy=http://outgoing_proxy:3128 - https_proxy=http://outgoing_proxy:3128 diff --git a/examples/agent/requirements.txt b/examples/agent/requirements.txt index 8e2d735fe..b1a312940 100644 --- a/examples/agent/requirements.txt +++ b/examples/agent/requirements.txt @@ -1,7 +1,11 @@ -aiohttp +aiosqlite +docling fastapi[standard] +# html2text ipython -html2text requests -pyppeteer +sparqlwrapper +sqlite-lembed +sqlite-rembed +sqlite-vec uvicorn diff --git a/examples/agent/serve_tools_inside_docker.sh b/examples/agent/serve_tools_inside_docker.sh index fdba83ce3..2d37004a4 100755 --- a/examples/agent/serve_tools_inside_docker.sh +++ b/examples/agent/serve_tools_inside_docker.sh @@ -27,4 +27,4 @@ openssl req -new -newkey rsa:4096 -days 3650 -nodes -x509 \ openssl x509 -outform PEM -in squid/ssl_cert/squidCA.pem -out squid/ssl_cert/squidCA.crt -docker compose up --build "$@" +docker compose --verbose up --build "$@" diff --git a/examples/agent/tools/__init__.py b/examples/agent/tools/__init__.py index 56e3e9681..f8b2abf0b 100644 --- a/examples/agent/tools/__init__.py +++ b/examples/agent/tools/__init__.py @@ -1,27 +1,29 @@ -''' - Runs simple tools as a FastAPI server. +# ''' +# Runs simple tools as a FastAPI server. - Usage (docker isolation - with network access): +# Usage (docker isolation - with network access): - export BRAVE_SEARCH_API_KEY=... - ./examples/agent/serve_tools_inside_docker.sh +# export BRAVE_SEARCH_API_KEY=... +# ./examples/agent/serve_tools_inside_docker.sh - Usage (non-siloed, DANGEROUS): +# Usage (non-siloed, DANGEROUS): - pip install -r examples/agent/requirements.txt - fastapi dev examples/agent/tools/__init__.py --port 8088 -''' +# pip install -r examples/agent/requirements.txt +# fastapi dev examples/agent/tools/__init__.py --port 8088 +# ''' import logging -import re import fastapi import os +import re import sys sys.path.insert(0, os.path.dirname(__file__)) -from .fetch import fetch_page +from .fetch import fetch from .search import brave_search from .python import python, python_tools_registry +from .memory import memorize, search_memory +from .sparql import wikidata_sparql, dbpedia_sparql verbose = os.environ.get('VERBOSE', '0') == '1' include = os.environ.get('INCLUDE_TOOLS') @@ -33,8 +35,12 @@ ALL_TOOLS = { fn.__name__: fn for fn in [ python, - fetch_page, + fetch, brave_search, + memorize, + search_memory, + wikidata_sparql, + dbpedia_sparql, ] } diff --git a/examples/agent/tools/fetch.py b/examples/agent/tools/fetch.py index 89cd423b7..4aac1021e 100644 --- a/examples/agent/tools/fetch.py +++ b/examples/agent/tools/fetch.py @@ -1,49 +1,13 @@ -import html2text import logging -import requests +from docling.document_converter import DocumentConverter -async def fetch_page(url: str): +def fetch(url: str) -> str: ''' - Fetch a web page (convert it to markdown if possible), using aiohttp. + Fetch a document at the provided URL and convert it to Markdown. ''' - try: - logging.debug(f'[fetch_page] Fetching %s', url) - response = requests.get(url) - response.raise_for_status() - content = response.text - except requests.exceptions.RequestException as e: - raise Exception(f'Failed to fetch {url}: {e}') - - # NOTE: Pyppeteer doesn't work great in docker, short of installing a bunch of dependencies - # from pyppeteer import launch - # from pyppeteer.errors import TimeoutError, NetworkError - # browser = await launch() - # try: - # page = await browser.newPage() - # response = await page.goto(url) - - # if not response.ok: - # return FetchResult(error=f'HTTP {response.status} {response.statusText}') - - # content=await page.content() - # except TimeoutError: - # return FetchResult(error='Page load timed out') - # except NetworkError: - # return FetchResult(error='Network error occurred') - # except Exception as e: - # return FetchResult(error=str(e)) - # finally: - # await browser.close() - - try: - h = html2text.HTML2Text() - h.ignore_links = False - h.ignore_images = False - h.ignore_emphasis = False - markdown = h.handle(content) - return markdown - except Exception as e: - logging.warning('[fetch_page] Failed to convert HTML of %s to markdown: %s', url, e) - return content + logging.debug(f'[fetch] Fetching %s', url) + converter = DocumentConverter() + result = converter.convert(url) + return result.document.export_to_markdown() diff --git a/examples/agent/tools/memory.py b/examples/agent/tools/memory.py new file mode 100644 index 000000000..3a3e87ce9 --- /dev/null +++ b/examples/agent/tools/memory.py @@ -0,0 +1,198 @@ +''' + Memory tools that use sqlite-vec as a vector database (combined w/ sqlite-lembed or sqlite-rembed for embeddings). + + Note: it's best to run this in a silo w/: + + ./examples/agent/serve_tools_inside_docker.sh + + # Run w/o other tools: + + ## Prerequisites: + + pip install aiosqlite "fastapi[standard]" sqlite-lembed sqlite-rembed sqlite-vec uvicorn + + ## Usage w/ sqlite-rembed: + + ./llama-server --port 8081 -fa -c 0 --embeddings --rope-freq-scale 0.75 \ + -hfr nomic-ai/nomic-embed-text-v1.5-GGUF -hff nomic-embed-text-v1.5.Q4_K_M.gguf + MEMORY_SQLITE_DB=memory_rembed.db \ + EMBEDDINGS_DIMS=768 \ + EMBEDDINGS_ENDPOINT=http://localhost:8081/v1/embeddings \ + python examples/agent/tools/memory.py + + ## Usage w/ sqlite-lembed: + + MEMORY_SQLITE_DB=memory_lembed.db \ + EMBEDDINGS_DIMS=768 \ + EMBEDDINGS_MODEL_FILE=~/Library/Caches/llama.cpp/nomic-embed-text-v1.5.Q4_K_M.gguf \ + python examples/agent/tools/memory.py + + ## Test: + + curl -X POST "http://localhost:8000/memorize" -H "Content-Type: application/json" -d '["User is Olivier Chafik", "User is a Software Engineer"]' + curl -X POST "http://localhost:8000/search_memory?text=What%20do%20we%20do%3F" +''' + +import logging +import aiosqlite +import fastapi +import os +import sqlite_lembed +import sqlite_rembed +import sqlite_vec + +verbose = os.environ.get('VERBOSE', '0') == '1' +db_path = os.environ['MEMORY_SQLITE_DB'] + + +# Embeddings configuration: +# Can either provide an embeddings model file (to be loaded locally by sqlite-lembed) +# or an embeddings endpoint w/ optional api key (to be queried remotely by sqlite-rembed). +embeddings_dims = int(os.environ['EMBEDDINGS_DIMS']) +if 'EMBEDDINGS_MODEL_FILE' in os.environ: + local = True + embed_fn = 'lembed' + embeddings_model_file = os.environ['EMBEDDINGS_MODEL_FILE'] + logging.info(f'Using local embeddings model: {embeddings_model_file}') +elif 'EMBEDDINGS_ENDPOINT' in os.environ: + local = False + embed_fn = 'rembed' + embeddings_endpoint = os.environ['EMBEDDINGS_ENDPOINT'] + embeddings_api_key = os.environ.get('EMBEDDINGS_API_KEY') + logging.info(f'Using remote embeddings endpoint: {embeddings_endpoint}') +else: + raise ValueError('Either EMBEDDINGS_MODEL_FILE or EMBEDDINGS_ENDPOINT must be set') + + +async def setup_db(db: aiosqlite.Connection): + + await db.enable_load_extension(True) + await db.load_extension(sqlite_vec.loadable_path()) + if local: + await db.load_extension(sqlite_lembed.loadable_path()) + else: + await db.load_extension(sqlite_rembed.loadable_path()) + await db.enable_load_extension(False) + + client_name = 'default' + + if local: + await db.execute(f''' + INSERT INTO lembed_models(name, model) VALUES ( + '{client_name}', lembed_model_from_file(?) + ); + ''', (embeddings_model_file,)) + else: + await db.execute(f''' + INSERT INTO rembed_clients(name, options) VALUES ( + '{client_name}', rembed_client_options('format', 'llamafile', 'url', ?, 'key', ?) + ); + ''', (embeddings_endpoint, embeddings_api_key)) + + async def create_vector_index(table_name, text_column, embedding_column): + ''' + Create an sqlite-vec virtual table w/ an embedding column + kept in sync with a source table's text column. + ''' + + await db.execute(f''' + CREATE VIRTUAL TABLE IF NOT EXISTS {table_name}_{embedding_column} USING vec0( + {embedding_column} float[{embeddings_dims}] + ) + ''') + await db.execute(f''' + CREATE TRIGGER IF NOT EXISTS insert_{table_name}_{embedding_column} + AFTER INSERT ON {table_name} + BEGIN + INSERT INTO {table_name}_{embedding_column} (rowid, {embedding_column}) + VALUES (NEW.rowid, {embed_fn}('{client_name}', NEW.{text_column})); + END; + ''') + await db.execute(f''' + CREATE TRIGGER IF NOT EXISTS update_{table_name}_{embedding_column} + AFTER UPDATE OF {text_column} ON {table_name} + BEGIN + UPDATE {table_name}_{embedding_column} + SET {embedding_column} = {embed_fn}('{client_name}', NEW.{text_column}) + WHERE rowid = NEW.rowid; + END; + ''') + await db.execute(f''' + CREATE TRIGGER IF NOT EXISTS delete_{table_name}_{embedding_column} + AFTER DELETE ON {table_name} + BEGIN + DELETE FROM {table_name}_{embedding_column} + WHERE rowid = OLD.rowid; + END; + ''') + def search(text: str, top_n: int, columns: list[str] = ['rowid', text_column]): + ''' + Search the vector index for the embedding of the provided text and return + the distance of the top_n nearest matches + their corresponding original table's columns. + ''' + + col_seq = ', '.join(['distance', *(f"{table_name}.{c}" for c in columns)]) + return db.execute( + f''' + SELECT {col_seq} + FROM ( + SELECT rowid, distance + FROM {table_name}_{embedding_column} + WHERE {table_name}_{embedding_column}.{embedding_column} MATCH {embed_fn}('{client_name}', ?) + ORDER BY distance + LIMIT ? + ) + JOIN {table_name} USING (rowid) + ''', + (text, top_n) + ) + return search + + await db.execute(''' + CREATE TABLE IF NOT EXISTS facts ( + rowid INTEGER PRIMARY KEY AUTOINCREMENT, + content TEXT NOT NULL + ) + ''') + facts_search = await create_vector_index('facts', 'content', 'embedding') + + await db.commit() + + return dict( + facts_search=facts_search, + ) + + +async def memorize(facts: list[str]): + 'Memorize a set of statements / facts.' + + async with aiosqlite.connect(db_path) as db: + await setup_db(db) + await db.executemany( + 'INSERT INTO facts (content) VALUES (?)', + [(fact,) for fact in facts] + ) + await db.commit() + + +async def search_memory(text: str, top_n: int = 10): + 'Search the memory for the closest informations to the provided text (return only the top_n best matches).' + + async with aiosqlite.connect(db_path) as db: + db_functions = await setup_db(db) + async with db_functions['facts_search'](text, top_n) as cursor: + # Return a json array of objects w/ columns + results = await cursor.fetchall() + cols = [c[0] for c in cursor.description] + return [dict(zip(cols, row)) for row in results] + + +# This main entry point is just here for easy debugging +if __name__ == '__main__': + import uvicorn + + logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) + app = fastapi.FastAPI() + app.post('/memorize')(memorize) + app.post('/search_memory')(search_memory) + uvicorn.run(app) diff --git a/examples/agent/tools/sparql.py b/examples/agent/tools/sparql.py new file mode 100644 index 000000000..657b81f93 --- /dev/null +++ b/examples/agent/tools/sparql.py @@ -0,0 +1,28 @@ +import json +import logging +from SPARQLWrapper import JSON, SPARQLWrapper + + +def execute_sparql(endpoint: str, query: str) -> str: + ''' + Execute a SPARQL query on a given endpoint + ''' + + logging.debug(f'[sparql] Executing on %s:\n%s', endpoint, query) + sparql = SPARQLWrapper(endpoint) + sparql.setQuery(query) + sparql.setReturnFormat(JSON) + return json.dumps(sparql.query().convert(), indent=2) + + +def wikidata_sparql(query: str) -> str: + 'Execute a SPARQL query on Wikidata' + + return execute_sparql("https://query.wikidata.org/sparql", query) + + +def dbpedia_sparql(query: str) -> str: + 'Execute a SPARQL query on DBpedia' + + return execute_sparql("https://dbpedia.org/sparql", query) +