agent: memorize, search_memory (sqlite-vec + sqlite-lembed), fetch + docling (pdf -> markdown), sparql for dbpedia and wikidata

This commit is contained in:
ochafik 2024-11-09 18:25:34 +00:00
parent bc52c0a4f0
commit c059aecd37
9 changed files with 282 additions and 67 deletions

View file

@ -1,15 +1,19 @@
FROM python:3.12-slim
RUN python -m pip install --upgrade pip && \
apt install -y wget && \
apt clean cache
COPY requirements.txt /root/
COPY tools /root/tools
WORKDIR /root
RUN pip install -r requirements.txt
RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu && \
pip install -r requirements.txt
COPY ./squid/ssl_cert/squidCA.crt /usr/local/share/ca-certificates/squidCA.crt
RUN chmod 644 /usr/local/share/ca-certificates/squidCA.crt && update-ca-certificates
RUN wget https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.Q4_K_M.gguf -O /root/nomic-embed-text-v1.5.Q4_K_M.gguf
ENTRYPOINT [ "uvicorn" ]
CMD ["tools:app", "--host", "0.0.0.0", "--port", "8088"]

View file

@ -22,22 +22,22 @@ Here's how to run an agent w/ local tool call:
# (otherwise they'd use the generic tool call support, which may be less efficient
# and consume more tokens)
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
./llama-server --jinja -fa --verbose \
-hfr bartowski/Qwen2.5-7B-Instruct-GGUF -hff Qwen2.5-7B-Instruct-Q4_K_M.gguf
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
./llama-server --jinja -fa --verbose \
-hfr NousResearch/Hermes-3-Llama-3.1-8B-GGUF -hff Hermes-3-Llama-3.1-8B.Q4_K_M.gguf \
--chat-template-file <( python scripts/get_hf_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use )
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
./llama-server --jinja -fa --verbose \
-hfr meetkai/functionary-small-v3.2-GGUF -hff functionary-small-v3.2.Q8_0.gguf \
--chat-template-file <( python scripts/get_hf_chat_template.py meetkai/functionary-medium-v3.2 )
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
./llama-server --jinja -fa --verbose \
-hfr lmstudio-community/Llama-3.2-3B-Instruct-GGUF -hff Llama-3.2-3B-Instruct-Q6_K.gguf \
--chat-template-file <( python scripts/get_hf_chat_template.py meta-llama/Llama-3.2-3B-Instruct )
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
./llama-server --jinja -fa --verbose \
-hfr bartowski/Mistral-Nemo-Instruct-2407-GGUF -hff Mistral-Nemo-Instruct-2407-Q8_0.gguf \
--chat-template-file <( python scripts/get_hf_chat_template.py mistralai/Mistral-Nemo-Instruct-2407 )

View file

@ -13,7 +13,7 @@ services:
- 8088:8088
command: TCP-LISTEN:8088,fork,bind=tools_endpoint TCP-CONNECT:siloed_tools:8088
# Runs tools w/o direct internet access.
# Runs tools w/o **direct* internet access.
#
# All outgoing tool traffic must go through outgoing_proxy, which will log even HTTPS requests
# (the proxy's self-signed cert is added to this container's root CAs).
@ -22,19 +22,30 @@ services:
siloed_tools:
container_name: siloed_tools
depends_on:
# - embeddings_server
- outgoing_proxy
image: local/llama.cpp:isolated-tools
# sqlite-vec isn't compiled for linux/arm64 so to virtualize on Mac we force this to be x86_64
platform: linux/amd64
build:
context: .
dockerfile: Dockerfile.tools
ports:
- 8088:8088
volumes:
- ./data:/data:rw
networks:
- private_net
environment:
- VERBOSE=1
- BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY}
- EMBEDDINGS_DIMS=768
- EMBEDDINGS_MODEL_FILE=/models/nomic-embed-text-v1.5.Q4_K_M.gguf
# - EMBEDDINGS_ENDPOINT=http://embeddings_server:8081/v1/embeddings
- EXCLUDE_TOOLS=${EXCLUDE_TOOLS:-}
- INCLUDE_TOOLS=${INCLUDE_TOOLS:-}
- MEMORY_SQLITE_DB=/data/memory.db
- REQUESTS_CA_BUNDLE=/usr/local/share/ca-certificates/squidCA.crt
- VERBOSE=1
- http_proxy=http://outgoing_proxy:3128
- https_proxy=http://outgoing_proxy:3128

View file

@ -1,7 +1,11 @@
aiohttp
aiosqlite
docling
fastapi[standard]
# html2text
ipython
html2text
requests
pyppeteer
sparqlwrapper
sqlite-lembed
sqlite-rembed
sqlite-vec
uvicorn

View file

@ -27,4 +27,4 @@ openssl req -new -newkey rsa:4096 -days 3650 -nodes -x509 \
openssl x509 -outform PEM -in squid/ssl_cert/squidCA.pem -out squid/ssl_cert/squidCA.crt
docker compose up --build "$@"
docker compose --verbose up --build "$@"

View file

@ -1,27 +1,29 @@
'''
Runs simple tools as a FastAPI server.
# '''
# Runs simple tools as a FastAPI server.
Usage (docker isolation - with network access):
# Usage (docker isolation - with network access):
export BRAVE_SEARCH_API_KEY=...
./examples/agent/serve_tools_inside_docker.sh
# export BRAVE_SEARCH_API_KEY=...
# ./examples/agent/serve_tools_inside_docker.sh
Usage (non-siloed, DANGEROUS):
# Usage (non-siloed, DANGEROUS):
pip install -r examples/agent/requirements.txt
fastapi dev examples/agent/tools/__init__.py --port 8088
'''
# pip install -r examples/agent/requirements.txt
# fastapi dev examples/agent/tools/__init__.py --port 8088
# '''
import logging
import re
import fastapi
import os
import re
import sys
sys.path.insert(0, os.path.dirname(__file__))
from .fetch import fetch_page
from .fetch import fetch
from .search import brave_search
from .python import python, python_tools_registry
from .memory import memorize, search_memory
from .sparql import wikidata_sparql, dbpedia_sparql
verbose = os.environ.get('VERBOSE', '0') == '1'
include = os.environ.get('INCLUDE_TOOLS')
@ -33,8 +35,12 @@ ALL_TOOLS = {
fn.__name__: fn
for fn in [
python,
fetch_page,
fetch,
brave_search,
memorize,
search_memory,
wikidata_sparql,
dbpedia_sparql,
]
}

View file

@ -1,49 +1,13 @@
import html2text
import logging
import requests
from docling.document_converter import DocumentConverter
async def fetch_page(url: str):
def fetch(url: str) -> str:
'''
Fetch a web page (convert it to markdown if possible), using aiohttp.
Fetch a document at the provided URL and convert it to Markdown.
'''
try:
logging.debug(f'[fetch_page] Fetching %s', url)
response = requests.get(url)
response.raise_for_status()
content = response.text
except requests.exceptions.RequestException as e:
raise Exception(f'Failed to fetch {url}: {e}')
# NOTE: Pyppeteer doesn't work great in docker, short of installing a bunch of dependencies
# from pyppeteer import launch
# from pyppeteer.errors import TimeoutError, NetworkError
# browser = await launch()
# try:
# page = await browser.newPage()
# response = await page.goto(url)
# if not response.ok:
# return FetchResult(error=f'HTTP {response.status} {response.statusText}')
# content=await page.content()
# except TimeoutError:
# return FetchResult(error='Page load timed out')
# except NetworkError:
# return FetchResult(error='Network error occurred')
# except Exception as e:
# return FetchResult(error=str(e))
# finally:
# await browser.close()
try:
h = html2text.HTML2Text()
h.ignore_links = False
h.ignore_images = False
h.ignore_emphasis = False
markdown = h.handle(content)
return markdown
except Exception as e:
logging.warning('[fetch_page] Failed to convert HTML of %s to markdown: %s', url, e)
return content
logging.debug(f'[fetch] Fetching %s', url)
converter = DocumentConverter()
result = converter.convert(url)
return result.document.export_to_markdown()

View file

@ -0,0 +1,198 @@
'''
Memory tools that use sqlite-vec as a vector database (combined w/ sqlite-lembed or sqlite-rembed for embeddings).
Note: it's best to run this in a silo w/:
./examples/agent/serve_tools_inside_docker.sh
# Run w/o other tools:
## Prerequisites:
pip install aiosqlite "fastapi[standard]" sqlite-lembed sqlite-rembed sqlite-vec uvicorn
## Usage w/ sqlite-rembed:
./llama-server --port 8081 -fa -c 0 --embeddings --rope-freq-scale 0.75 \
-hfr nomic-ai/nomic-embed-text-v1.5-GGUF -hff nomic-embed-text-v1.5.Q4_K_M.gguf
MEMORY_SQLITE_DB=memory_rembed.db \
EMBEDDINGS_DIMS=768 \
EMBEDDINGS_ENDPOINT=http://localhost:8081/v1/embeddings \
python examples/agent/tools/memory.py
## Usage w/ sqlite-lembed:
MEMORY_SQLITE_DB=memory_lembed.db \
EMBEDDINGS_DIMS=768 \
EMBEDDINGS_MODEL_FILE=~/Library/Caches/llama.cpp/nomic-embed-text-v1.5.Q4_K_M.gguf \
python examples/agent/tools/memory.py
## Test:
curl -X POST "http://localhost:8000/memorize" -H "Content-Type: application/json" -d '["User is Olivier Chafik", "User is a Software Engineer"]'
curl -X POST "http://localhost:8000/search_memory?text=What%20do%20we%20do%3F"
'''
import logging
import aiosqlite
import fastapi
import os
import sqlite_lembed
import sqlite_rembed
import sqlite_vec
verbose = os.environ.get('VERBOSE', '0') == '1'
db_path = os.environ['MEMORY_SQLITE_DB']
# Embeddings configuration:
# Can either provide an embeddings model file (to be loaded locally by sqlite-lembed)
# or an embeddings endpoint w/ optional api key (to be queried remotely by sqlite-rembed).
embeddings_dims = int(os.environ['EMBEDDINGS_DIMS'])
if 'EMBEDDINGS_MODEL_FILE' in os.environ:
local = True
embed_fn = 'lembed'
embeddings_model_file = os.environ['EMBEDDINGS_MODEL_FILE']
logging.info(f'Using local embeddings model: {embeddings_model_file}')
elif 'EMBEDDINGS_ENDPOINT' in os.environ:
local = False
embed_fn = 'rembed'
embeddings_endpoint = os.environ['EMBEDDINGS_ENDPOINT']
embeddings_api_key = os.environ.get('EMBEDDINGS_API_KEY')
logging.info(f'Using remote embeddings endpoint: {embeddings_endpoint}')
else:
raise ValueError('Either EMBEDDINGS_MODEL_FILE or EMBEDDINGS_ENDPOINT must be set')
async def setup_db(db: aiosqlite.Connection):
await db.enable_load_extension(True)
await db.load_extension(sqlite_vec.loadable_path())
if local:
await db.load_extension(sqlite_lembed.loadable_path())
else:
await db.load_extension(sqlite_rembed.loadable_path())
await db.enable_load_extension(False)
client_name = 'default'
if local:
await db.execute(f'''
INSERT INTO lembed_models(name, model) VALUES (
'{client_name}', lembed_model_from_file(?)
);
''', (embeddings_model_file,))
else:
await db.execute(f'''
INSERT INTO rembed_clients(name, options) VALUES (
'{client_name}', rembed_client_options('format', 'llamafile', 'url', ?, 'key', ?)
);
''', (embeddings_endpoint, embeddings_api_key))
async def create_vector_index(table_name, text_column, embedding_column):
'''
Create an sqlite-vec virtual table w/ an embedding column
kept in sync with a source table's text column.
'''
await db.execute(f'''
CREATE VIRTUAL TABLE IF NOT EXISTS {table_name}_{embedding_column} USING vec0(
{embedding_column} float[{embeddings_dims}]
)
''')
await db.execute(f'''
CREATE TRIGGER IF NOT EXISTS insert_{table_name}_{embedding_column}
AFTER INSERT ON {table_name}
BEGIN
INSERT INTO {table_name}_{embedding_column} (rowid, {embedding_column})
VALUES (NEW.rowid, {embed_fn}('{client_name}', NEW.{text_column}));
END;
''')
await db.execute(f'''
CREATE TRIGGER IF NOT EXISTS update_{table_name}_{embedding_column}
AFTER UPDATE OF {text_column} ON {table_name}
BEGIN
UPDATE {table_name}_{embedding_column}
SET {embedding_column} = {embed_fn}('{client_name}', NEW.{text_column})
WHERE rowid = NEW.rowid;
END;
''')
await db.execute(f'''
CREATE TRIGGER IF NOT EXISTS delete_{table_name}_{embedding_column}
AFTER DELETE ON {table_name}
BEGIN
DELETE FROM {table_name}_{embedding_column}
WHERE rowid = OLD.rowid;
END;
''')
def search(text: str, top_n: int, columns: list[str] = ['rowid', text_column]):
'''
Search the vector index for the embedding of the provided text and return
the distance of the top_n nearest matches + their corresponding original table's columns.
'''
col_seq = ', '.join(['distance', *(f"{table_name}.{c}" for c in columns)])
return db.execute(
f'''
SELECT {col_seq}
FROM (
SELECT rowid, distance
FROM {table_name}_{embedding_column}
WHERE {table_name}_{embedding_column}.{embedding_column} MATCH {embed_fn}('{client_name}', ?)
ORDER BY distance
LIMIT ?
)
JOIN {table_name} USING (rowid)
''',
(text, top_n)
)
return search
await db.execute('''
CREATE TABLE IF NOT EXISTS facts (
rowid INTEGER PRIMARY KEY AUTOINCREMENT,
content TEXT NOT NULL
)
''')
facts_search = await create_vector_index('facts', 'content', 'embedding')
await db.commit()
return dict(
facts_search=facts_search,
)
async def memorize(facts: list[str]):
'Memorize a set of statements / facts.'
async with aiosqlite.connect(db_path) as db:
await setup_db(db)
await db.executemany(
'INSERT INTO facts (content) VALUES (?)',
[(fact,) for fact in facts]
)
await db.commit()
async def search_memory(text: str, top_n: int = 10):
'Search the memory for the closest informations to the provided text (return only the top_n best matches).'
async with aiosqlite.connect(db_path) as db:
db_functions = await setup_db(db)
async with db_functions['facts_search'](text, top_n) as cursor:
# Return a json array of objects w/ columns
results = await cursor.fetchall()
cols = [c[0] for c in cursor.description]
return [dict(zip(cols, row)) for row in results]
# This main entry point is just here for easy debugging
if __name__ == '__main__':
import uvicorn
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
app = fastapi.FastAPI()
app.post('/memorize')(memorize)
app.post('/search_memory')(search_memory)
uvicorn.run(app)

View file

@ -0,0 +1,28 @@
import json
import logging
from SPARQLWrapper import JSON, SPARQLWrapper
def execute_sparql(endpoint: str, query: str) -> str:
'''
Execute a SPARQL query on a given endpoint
'''
logging.debug(f'[sparql] Executing on %s:\n%s', endpoint, query)
sparql = SPARQLWrapper(endpoint)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
return json.dumps(sparql.query().convert(), indent=2)
def wikidata_sparql(query: str) -> str:
'Execute a SPARQL query on Wikidata'
return execute_sparql("https://query.wikidata.org/sparql", query)
def dbpedia_sparql(query: str) -> str:
'Execute a SPARQL query on DBpedia'
return execute_sparql("https://dbpedia.org/sparql", query)