agent
: memorize, search_memory (sqlite-vec + sqlite-lembed), fetch + docling (pdf -> markdown), sparql for dbpedia and wikidata
This commit is contained in:
parent
bc52c0a4f0
commit
c059aecd37
9 changed files with 282 additions and 67 deletions
|
@ -1,15 +1,19 @@
|
|||
FROM python:3.12-slim
|
||||
|
||||
RUN python -m pip install --upgrade pip && \
|
||||
apt install -y wget && \
|
||||
apt clean cache
|
||||
|
||||
COPY requirements.txt /root/
|
||||
COPY tools /root/tools
|
||||
WORKDIR /root
|
||||
RUN pip install -r requirements.txt
|
||||
RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu && \
|
||||
pip install -r requirements.txt
|
||||
|
||||
COPY ./squid/ssl_cert/squidCA.crt /usr/local/share/ca-certificates/squidCA.crt
|
||||
RUN chmod 644 /usr/local/share/ca-certificates/squidCA.crt && update-ca-certificates
|
||||
|
||||
RUN wget https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.Q4_K_M.gguf -O /root/nomic-embed-text-v1.5.Q4_K_M.gguf
|
||||
|
||||
ENTRYPOINT [ "uvicorn" ]
|
||||
CMD ["tools:app", "--host", "0.0.0.0", "--port", "8088"]
|
||||
|
|
|
@ -22,22 +22,22 @@ Here's how to run an agent w/ local tool call:
|
|||
# (otherwise they'd use the generic tool call support, which may be less efficient
|
||||
# and consume more tokens)
|
||||
|
||||
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr bartowski/Qwen2.5-7B-Instruct-GGUF -hff Qwen2.5-7B-Instruct-Q4_K_M.gguf
|
||||
|
||||
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr NousResearch/Hermes-3-Llama-3.1-8B-GGUF -hff Hermes-3-Llama-3.1-8B.Q4_K_M.gguf \
|
||||
--chat-template-file <( python scripts/get_hf_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use )
|
||||
|
||||
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr meetkai/functionary-small-v3.2-GGUF -hff functionary-small-v3.2.Q8_0.gguf \
|
||||
--chat-template-file <( python scripts/get_hf_chat_template.py meetkai/functionary-medium-v3.2 )
|
||||
|
||||
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr lmstudio-community/Llama-3.2-3B-Instruct-GGUF -hff Llama-3.2-3B-Instruct-Q6_K.gguf \
|
||||
--chat-template-file <( python scripts/get_hf_chat_template.py meta-llama/Llama-3.2-3B-Instruct )
|
||||
|
||||
./llama-server --jinja -fa -ctk q4_0 -ctv q4_0 --verbose \
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr bartowski/Mistral-Nemo-Instruct-2407-GGUF -hff Mistral-Nemo-Instruct-2407-Q8_0.gguf \
|
||||
--chat-template-file <( python scripts/get_hf_chat_template.py mistralai/Mistral-Nemo-Instruct-2407 )
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ services:
|
|||
- 8088:8088
|
||||
command: TCP-LISTEN:8088,fork,bind=tools_endpoint TCP-CONNECT:siloed_tools:8088
|
||||
|
||||
# Runs tools w/o direct internet access.
|
||||
# Runs tools w/o **direct* internet access.
|
||||
#
|
||||
# All outgoing tool traffic must go through outgoing_proxy, which will log even HTTPS requests
|
||||
# (the proxy's self-signed cert is added to this container's root CAs).
|
||||
|
@ -22,19 +22,30 @@ services:
|
|||
siloed_tools:
|
||||
container_name: siloed_tools
|
||||
depends_on:
|
||||
# - embeddings_server
|
||||
- outgoing_proxy
|
||||
image: local/llama.cpp:isolated-tools
|
||||
# sqlite-vec isn't compiled for linux/arm64 so to virtualize on Mac we force this to be x86_64
|
||||
platform: linux/amd64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.tools
|
||||
ports:
|
||||
- 8088:8088
|
||||
volumes:
|
||||
- ./data:/data:rw
|
||||
networks:
|
||||
- private_net
|
||||
environment:
|
||||
- VERBOSE=1
|
||||
- BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY}
|
||||
- EMBEDDINGS_DIMS=768
|
||||
- EMBEDDINGS_MODEL_FILE=/models/nomic-embed-text-v1.5.Q4_K_M.gguf
|
||||
# - EMBEDDINGS_ENDPOINT=http://embeddings_server:8081/v1/embeddings
|
||||
- EXCLUDE_TOOLS=${EXCLUDE_TOOLS:-}
|
||||
- INCLUDE_TOOLS=${INCLUDE_TOOLS:-}
|
||||
- MEMORY_SQLITE_DB=/data/memory.db
|
||||
- REQUESTS_CA_BUNDLE=/usr/local/share/ca-certificates/squidCA.crt
|
||||
- VERBOSE=1
|
||||
- http_proxy=http://outgoing_proxy:3128
|
||||
- https_proxy=http://outgoing_proxy:3128
|
||||
|
||||
|
|
|
@ -1,7 +1,11 @@
|
|||
aiohttp
|
||||
aiosqlite
|
||||
docling
|
||||
fastapi[standard]
|
||||
# html2text
|
||||
ipython
|
||||
html2text
|
||||
requests
|
||||
pyppeteer
|
||||
sparqlwrapper
|
||||
sqlite-lembed
|
||||
sqlite-rembed
|
||||
sqlite-vec
|
||||
uvicorn
|
||||
|
|
|
@ -27,4 +27,4 @@ openssl req -new -newkey rsa:4096 -days 3650 -nodes -x509 \
|
|||
|
||||
openssl x509 -outform PEM -in squid/ssl_cert/squidCA.pem -out squid/ssl_cert/squidCA.crt
|
||||
|
||||
docker compose up --build "$@"
|
||||
docker compose --verbose up --build "$@"
|
||||
|
|
|
@ -1,27 +1,29 @@
|
|||
'''
|
||||
Runs simple tools as a FastAPI server.
|
||||
# '''
|
||||
# Runs simple tools as a FastAPI server.
|
||||
|
||||
Usage (docker isolation - with network access):
|
||||
# Usage (docker isolation - with network access):
|
||||
|
||||
export BRAVE_SEARCH_API_KEY=...
|
||||
./examples/agent/serve_tools_inside_docker.sh
|
||||
# export BRAVE_SEARCH_API_KEY=...
|
||||
# ./examples/agent/serve_tools_inside_docker.sh
|
||||
|
||||
Usage (non-siloed, DANGEROUS):
|
||||
# Usage (non-siloed, DANGEROUS):
|
||||
|
||||
pip install -r examples/agent/requirements.txt
|
||||
fastapi dev examples/agent/tools/__init__.py --port 8088
|
||||
'''
|
||||
# pip install -r examples/agent/requirements.txt
|
||||
# fastapi dev examples/agent/tools/__init__.py --port 8088
|
||||
# '''
|
||||
import logging
|
||||
import re
|
||||
import fastapi
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
|
||||
from .fetch import fetch_page
|
||||
from .fetch import fetch
|
||||
from .search import brave_search
|
||||
from .python import python, python_tools_registry
|
||||
from .memory import memorize, search_memory
|
||||
from .sparql import wikidata_sparql, dbpedia_sparql
|
||||
|
||||
verbose = os.environ.get('VERBOSE', '0') == '1'
|
||||
include = os.environ.get('INCLUDE_TOOLS')
|
||||
|
@ -33,8 +35,12 @@ ALL_TOOLS = {
|
|||
fn.__name__: fn
|
||||
for fn in [
|
||||
python,
|
||||
fetch_page,
|
||||
fetch,
|
||||
brave_search,
|
||||
memorize,
|
||||
search_memory,
|
||||
wikidata_sparql,
|
||||
dbpedia_sparql,
|
||||
]
|
||||
}
|
||||
|
||||
|
|
|
@ -1,49 +1,13 @@
|
|||
import html2text
|
||||
import logging
|
||||
import requests
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
||||
|
||||
async def fetch_page(url: str):
|
||||
def fetch(url: str) -> str:
|
||||
'''
|
||||
Fetch a web page (convert it to markdown if possible), using aiohttp.
|
||||
Fetch a document at the provided URL and convert it to Markdown.
|
||||
'''
|
||||
|
||||
try:
|
||||
logging.debug(f'[fetch_page] Fetching %s', url)
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
content = response.text
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f'Failed to fetch {url}: {e}')
|
||||
|
||||
# NOTE: Pyppeteer doesn't work great in docker, short of installing a bunch of dependencies
|
||||
# from pyppeteer import launch
|
||||
# from pyppeteer.errors import TimeoutError, NetworkError
|
||||
# browser = await launch()
|
||||
# try:
|
||||
# page = await browser.newPage()
|
||||
# response = await page.goto(url)
|
||||
|
||||
# if not response.ok:
|
||||
# return FetchResult(error=f'HTTP {response.status} {response.statusText}')
|
||||
|
||||
# content=await page.content()
|
||||
# except TimeoutError:
|
||||
# return FetchResult(error='Page load timed out')
|
||||
# except NetworkError:
|
||||
# return FetchResult(error='Network error occurred')
|
||||
# except Exception as e:
|
||||
# return FetchResult(error=str(e))
|
||||
# finally:
|
||||
# await browser.close()
|
||||
|
||||
try:
|
||||
h = html2text.HTML2Text()
|
||||
h.ignore_links = False
|
||||
h.ignore_images = False
|
||||
h.ignore_emphasis = False
|
||||
markdown = h.handle(content)
|
||||
return markdown
|
||||
except Exception as e:
|
||||
logging.warning('[fetch_page] Failed to convert HTML of %s to markdown: %s', url, e)
|
||||
return content
|
||||
logging.debug(f'[fetch] Fetching %s', url)
|
||||
converter = DocumentConverter()
|
||||
result = converter.convert(url)
|
||||
return result.document.export_to_markdown()
|
||||
|
|
198
examples/agent/tools/memory.py
Normal file
198
examples/agent/tools/memory.py
Normal file
|
@ -0,0 +1,198 @@
|
|||
'''
|
||||
Memory tools that use sqlite-vec as a vector database (combined w/ sqlite-lembed or sqlite-rembed for embeddings).
|
||||
|
||||
Note: it's best to run this in a silo w/:
|
||||
|
||||
./examples/agent/serve_tools_inside_docker.sh
|
||||
|
||||
# Run w/o other tools:
|
||||
|
||||
## Prerequisites:
|
||||
|
||||
pip install aiosqlite "fastapi[standard]" sqlite-lembed sqlite-rembed sqlite-vec uvicorn
|
||||
|
||||
## Usage w/ sqlite-rembed:
|
||||
|
||||
./llama-server --port 8081 -fa -c 0 --embeddings --rope-freq-scale 0.75 \
|
||||
-hfr nomic-ai/nomic-embed-text-v1.5-GGUF -hff nomic-embed-text-v1.5.Q4_K_M.gguf
|
||||
MEMORY_SQLITE_DB=memory_rembed.db \
|
||||
EMBEDDINGS_DIMS=768 \
|
||||
EMBEDDINGS_ENDPOINT=http://localhost:8081/v1/embeddings \
|
||||
python examples/agent/tools/memory.py
|
||||
|
||||
## Usage w/ sqlite-lembed:
|
||||
|
||||
MEMORY_SQLITE_DB=memory_lembed.db \
|
||||
EMBEDDINGS_DIMS=768 \
|
||||
EMBEDDINGS_MODEL_FILE=~/Library/Caches/llama.cpp/nomic-embed-text-v1.5.Q4_K_M.gguf \
|
||||
python examples/agent/tools/memory.py
|
||||
|
||||
## Test:
|
||||
|
||||
curl -X POST "http://localhost:8000/memorize" -H "Content-Type: application/json" -d '["User is Olivier Chafik", "User is a Software Engineer"]'
|
||||
curl -X POST "http://localhost:8000/search_memory?text=What%20do%20we%20do%3F"
|
||||
'''
|
||||
|
||||
import logging
|
||||
import aiosqlite
|
||||
import fastapi
|
||||
import os
|
||||
import sqlite_lembed
|
||||
import sqlite_rembed
|
||||
import sqlite_vec
|
||||
|
||||
verbose = os.environ.get('VERBOSE', '0') == '1'
|
||||
db_path = os.environ['MEMORY_SQLITE_DB']
|
||||
|
||||
|
||||
# Embeddings configuration:
|
||||
# Can either provide an embeddings model file (to be loaded locally by sqlite-lembed)
|
||||
# or an embeddings endpoint w/ optional api key (to be queried remotely by sqlite-rembed).
|
||||
embeddings_dims = int(os.environ['EMBEDDINGS_DIMS'])
|
||||
if 'EMBEDDINGS_MODEL_FILE' in os.environ:
|
||||
local = True
|
||||
embed_fn = 'lembed'
|
||||
embeddings_model_file = os.environ['EMBEDDINGS_MODEL_FILE']
|
||||
logging.info(f'Using local embeddings model: {embeddings_model_file}')
|
||||
elif 'EMBEDDINGS_ENDPOINT' in os.environ:
|
||||
local = False
|
||||
embed_fn = 'rembed'
|
||||
embeddings_endpoint = os.environ['EMBEDDINGS_ENDPOINT']
|
||||
embeddings_api_key = os.environ.get('EMBEDDINGS_API_KEY')
|
||||
logging.info(f'Using remote embeddings endpoint: {embeddings_endpoint}')
|
||||
else:
|
||||
raise ValueError('Either EMBEDDINGS_MODEL_FILE or EMBEDDINGS_ENDPOINT must be set')
|
||||
|
||||
|
||||
async def setup_db(db: aiosqlite.Connection):
|
||||
|
||||
await db.enable_load_extension(True)
|
||||
await db.load_extension(sqlite_vec.loadable_path())
|
||||
if local:
|
||||
await db.load_extension(sqlite_lembed.loadable_path())
|
||||
else:
|
||||
await db.load_extension(sqlite_rembed.loadable_path())
|
||||
await db.enable_load_extension(False)
|
||||
|
||||
client_name = 'default'
|
||||
|
||||
if local:
|
||||
await db.execute(f'''
|
||||
INSERT INTO lembed_models(name, model) VALUES (
|
||||
'{client_name}', lembed_model_from_file(?)
|
||||
);
|
||||
''', (embeddings_model_file,))
|
||||
else:
|
||||
await db.execute(f'''
|
||||
INSERT INTO rembed_clients(name, options) VALUES (
|
||||
'{client_name}', rembed_client_options('format', 'llamafile', 'url', ?, 'key', ?)
|
||||
);
|
||||
''', (embeddings_endpoint, embeddings_api_key))
|
||||
|
||||
async def create_vector_index(table_name, text_column, embedding_column):
|
||||
'''
|
||||
Create an sqlite-vec virtual table w/ an embedding column
|
||||
kept in sync with a source table's text column.
|
||||
'''
|
||||
|
||||
await db.execute(f'''
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS {table_name}_{embedding_column} USING vec0(
|
||||
{embedding_column} float[{embeddings_dims}]
|
||||
)
|
||||
''')
|
||||
await db.execute(f'''
|
||||
CREATE TRIGGER IF NOT EXISTS insert_{table_name}_{embedding_column}
|
||||
AFTER INSERT ON {table_name}
|
||||
BEGIN
|
||||
INSERT INTO {table_name}_{embedding_column} (rowid, {embedding_column})
|
||||
VALUES (NEW.rowid, {embed_fn}('{client_name}', NEW.{text_column}));
|
||||
END;
|
||||
''')
|
||||
await db.execute(f'''
|
||||
CREATE TRIGGER IF NOT EXISTS update_{table_name}_{embedding_column}
|
||||
AFTER UPDATE OF {text_column} ON {table_name}
|
||||
BEGIN
|
||||
UPDATE {table_name}_{embedding_column}
|
||||
SET {embedding_column} = {embed_fn}('{client_name}', NEW.{text_column})
|
||||
WHERE rowid = NEW.rowid;
|
||||
END;
|
||||
''')
|
||||
await db.execute(f'''
|
||||
CREATE TRIGGER IF NOT EXISTS delete_{table_name}_{embedding_column}
|
||||
AFTER DELETE ON {table_name}
|
||||
BEGIN
|
||||
DELETE FROM {table_name}_{embedding_column}
|
||||
WHERE rowid = OLD.rowid;
|
||||
END;
|
||||
''')
|
||||
def search(text: str, top_n: int, columns: list[str] = ['rowid', text_column]):
|
||||
'''
|
||||
Search the vector index for the embedding of the provided text and return
|
||||
the distance of the top_n nearest matches + their corresponding original table's columns.
|
||||
'''
|
||||
|
||||
col_seq = ', '.join(['distance', *(f"{table_name}.{c}" for c in columns)])
|
||||
return db.execute(
|
||||
f'''
|
||||
SELECT {col_seq}
|
||||
FROM (
|
||||
SELECT rowid, distance
|
||||
FROM {table_name}_{embedding_column}
|
||||
WHERE {table_name}_{embedding_column}.{embedding_column} MATCH {embed_fn}('{client_name}', ?)
|
||||
ORDER BY distance
|
||||
LIMIT ?
|
||||
)
|
||||
JOIN {table_name} USING (rowid)
|
||||
''',
|
||||
(text, top_n)
|
||||
)
|
||||
return search
|
||||
|
||||
await db.execute('''
|
||||
CREATE TABLE IF NOT EXISTS facts (
|
||||
rowid INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
content TEXT NOT NULL
|
||||
)
|
||||
''')
|
||||
facts_search = await create_vector_index('facts', 'content', 'embedding')
|
||||
|
||||
await db.commit()
|
||||
|
||||
return dict(
|
||||
facts_search=facts_search,
|
||||
)
|
||||
|
||||
|
||||
async def memorize(facts: list[str]):
|
||||
'Memorize a set of statements / facts.'
|
||||
|
||||
async with aiosqlite.connect(db_path) as db:
|
||||
await setup_db(db)
|
||||
await db.executemany(
|
||||
'INSERT INTO facts (content) VALUES (?)',
|
||||
[(fact,) for fact in facts]
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def search_memory(text: str, top_n: int = 10):
|
||||
'Search the memory for the closest informations to the provided text (return only the top_n best matches).'
|
||||
|
||||
async with aiosqlite.connect(db_path) as db:
|
||||
db_functions = await setup_db(db)
|
||||
async with db_functions['facts_search'](text, top_n) as cursor:
|
||||
# Return a json array of objects w/ columns
|
||||
results = await cursor.fetchall()
|
||||
cols = [c[0] for c in cursor.description]
|
||||
return [dict(zip(cols, row)) for row in results]
|
||||
|
||||
|
||||
# This main entry point is just here for easy debugging
|
||||
if __name__ == '__main__':
|
||||
import uvicorn
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
|
||||
app = fastapi.FastAPI()
|
||||
app.post('/memorize')(memorize)
|
||||
app.post('/search_memory')(search_memory)
|
||||
uvicorn.run(app)
|
28
examples/agent/tools/sparql.py
Normal file
28
examples/agent/tools/sparql.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import json
|
||||
import logging
|
||||
from SPARQLWrapper import JSON, SPARQLWrapper
|
||||
|
||||
|
||||
def execute_sparql(endpoint: str, query: str) -> str:
|
||||
'''
|
||||
Execute a SPARQL query on a given endpoint
|
||||
'''
|
||||
|
||||
logging.debug(f'[sparql] Executing on %s:\n%s', endpoint, query)
|
||||
sparql = SPARQLWrapper(endpoint)
|
||||
sparql.setQuery(query)
|
||||
sparql.setReturnFormat(JSON)
|
||||
return json.dumps(sparql.query().convert(), indent=2)
|
||||
|
||||
|
||||
def wikidata_sparql(query: str) -> str:
|
||||
'Execute a SPARQL query on Wikidata'
|
||||
|
||||
return execute_sparql("https://query.wikidata.org/sparql", query)
|
||||
|
||||
|
||||
def dbpedia_sparql(query: str) -> str:
|
||||
'Execute a SPARQL query on DBpedia'
|
||||
|
||||
return execute_sparql("https://dbpedia.org/sparql", query)
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue