Full text support in peewee
Adds support for full text search in peewee with the creation of two new field types: `FullIndexedCharField` and `FullIndexedTextField`. Note that this change depends upon https://github.com/zzzeek/sqlalchemy/pull/339 [Delivers #137453279] [Delivers #137453317]
This commit is contained in:
parent
048f932094
commit
d89c79b92d
4 changed files with 105 additions and 2 deletions
|
@ -22,6 +22,7 @@ from sqlalchemy.engine.url import make_url
|
||||||
import resumablehashlib
|
import resumablehashlib
|
||||||
|
|
||||||
from data.fields import ResumableSHA256Field, ResumableSHA1Field, JSONField, Base64BinaryField
|
from data.fields import ResumableSHA256Field, ResumableSHA1Field, JSONField, Base64BinaryField
|
||||||
|
from data.text import match_mysql, match_like
|
||||||
from data.read_slave import ReadSlaveModel
|
from data.read_slave import ReadSlaveModel
|
||||||
from util.names import urn_generator
|
from util.names import urn_generator
|
||||||
|
|
||||||
|
@ -42,6 +43,14 @@ _SCHEME_DRIVERS = {
|
||||||
'postgresql+psycopg2': PostgresqlDatabase,
|
'postgresql+psycopg2': PostgresqlDatabase,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SCHEME_MATCH_FUNCTION = {
|
||||||
|
'mysql': match_mysql,
|
||||||
|
'mysql+pymysql': match_mysql,
|
||||||
|
'sqlite': match_like,
|
||||||
|
'postgresql': match_like,
|
||||||
|
'postgresql+psycopg2': match_like,
|
||||||
|
}
|
||||||
|
|
||||||
SCHEME_RANDOM_FUNCTION = {
|
SCHEME_RANDOM_FUNCTION = {
|
||||||
'mysql': fn.Rand,
|
'mysql': fn.Rand,
|
||||||
'mysql+pymysql': fn.Rand,
|
'mysql+pymysql': fn.Rand,
|
||||||
|
@ -211,6 +220,7 @@ class TupleSelector(object):
|
||||||
db = Proxy()
|
db = Proxy()
|
||||||
read_slave = Proxy()
|
read_slave = Proxy()
|
||||||
db_random_func = CallableProxy()
|
db_random_func = CallableProxy()
|
||||||
|
db_match_func = CallableProxy()
|
||||||
db_for_update = CallableProxy()
|
db_for_update = CallableProxy()
|
||||||
db_transaction = CallableProxy()
|
db_transaction = CallableProxy()
|
||||||
db_concat_func = CallableProxy()
|
db_concat_func = CallableProxy()
|
||||||
|
@ -257,6 +267,7 @@ def configure(config_object):
|
||||||
|
|
||||||
parsed_write_uri = make_url(write_db_uri)
|
parsed_write_uri = make_url(write_db_uri)
|
||||||
db_random_func.initialize(SCHEME_RANDOM_FUNCTION[parsed_write_uri.drivername])
|
db_random_func.initialize(SCHEME_RANDOM_FUNCTION[parsed_write_uri.drivername])
|
||||||
|
db_match_func.initialize(SCHEME_MATCH_FUNCTION[parsed_write_uri.drivername])
|
||||||
db_for_update.initialize(SCHEME_SPECIALIZED_FOR_UPDATE.get(parsed_write_uri.drivername,
|
db_for_update.initialize(SCHEME_SPECIALIZED_FOR_UPDATE.get(parsed_write_uri.drivername,
|
||||||
real_for_update))
|
real_for_update))
|
||||||
db_concat_func.initialize(SCHEME_SPECIALIZED_CONCAT.get(parsed_write_uri.drivername,
|
db_concat_func.initialize(SCHEME_SPECIALIZED_CONCAT.get(parsed_write_uri.drivername,
|
||||||
|
|
|
@ -2,7 +2,8 @@ import base64
|
||||||
import resumablehashlib
|
import resumablehashlib
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from peewee import TextField
|
from peewee import TextField, CharField, Clause
|
||||||
|
from data.text import prefix_search
|
||||||
|
|
||||||
|
|
||||||
class _ResumableSHAField(TextField):
|
class _ResumableSHAField(TextField):
|
||||||
|
@ -64,3 +65,44 @@ class Base64BinaryField(TextField):
|
||||||
if value is None:
|
if value is None:
|
||||||
return None
|
return None
|
||||||
return base64.b64decode(value)
|
return base64.b64decode(value)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_fulltext(field_class):
|
||||||
|
""" Adds support for full text indexing and lookup to the given field class. """
|
||||||
|
class indexed_class(field_class):
|
||||||
|
# Marker used by SQLAlchemy translation layer to add the proper index for full text searching.
|
||||||
|
__fulltext__ = True
|
||||||
|
|
||||||
|
def __init__(self, match_function, *args, **kwargs):
|
||||||
|
field_class.__init__(self, *args, **kwargs)
|
||||||
|
self.match_function = match_function
|
||||||
|
|
||||||
|
def match(self, query):
|
||||||
|
return self.match_function(self, query)
|
||||||
|
|
||||||
|
def match_prefix(self, query):
|
||||||
|
return prefix_search(self, query)
|
||||||
|
|
||||||
|
def __mod__(self, _):
|
||||||
|
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||||
|
|
||||||
|
def __pow__(self, _):
|
||||||
|
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||||
|
|
||||||
|
def __contains__(self, _):
|
||||||
|
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||||
|
|
||||||
|
def contains(self, _):
|
||||||
|
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||||
|
|
||||||
|
def startswith(self, _):
|
||||||
|
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||||
|
|
||||||
|
def endswith(self, _):
|
||||||
|
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||||
|
|
||||||
|
return indexed_class
|
||||||
|
|
||||||
|
|
||||||
|
FullIndexedCharField = _add_fulltext(CharField)
|
||||||
|
FullIndexedTextField = _add_fulltext(TextField)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from sqlalchemy import (Table, MetaData, Column, ForeignKey, Integer, String, Boolean, Text,
|
from sqlalchemy import (Table, MetaData, Column, ForeignKey, Integer, String, Boolean, Text,
|
||||||
DateTime, Date, BigInteger, Index)
|
DateTime, Date, BigInteger, Index, text)
|
||||||
from peewee import (PrimaryKeyField, CharField, BooleanField, DateTimeField, TextField,
|
from peewee import (PrimaryKeyField, CharField, BooleanField, DateTimeField, TextField,
|
||||||
ForeignKeyField, BigIntegerField, IntegerField, DateField)
|
ForeignKeyField, BigIntegerField, IntegerField, DateField)
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ def gen_sqlalchemy_metadata(peewee_model_list):
|
||||||
meta = model._meta
|
meta = model._meta
|
||||||
|
|
||||||
all_indexes = set(meta.indexes)
|
all_indexes = set(meta.indexes)
|
||||||
|
fulltext_indexes = []
|
||||||
|
|
||||||
columns = []
|
columns = []
|
||||||
for field in meta.sorted_fields:
|
for field in meta.sorted_fields:
|
||||||
|
@ -60,6 +61,10 @@ def gen_sqlalchemy_metadata(peewee_model_list):
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('Unknown column type: %s' % field)
|
raise RuntimeError('Unknown column type: %s' % field)
|
||||||
|
|
||||||
|
if hasattr(field, '__fulltext__'):
|
||||||
|
# Add the fulltext index for the field, based on whether we are under MySQL or Postgres.
|
||||||
|
fulltext_indexes.append(field.name)
|
||||||
|
|
||||||
for option_name in OPTIONS_TO_COPY:
|
for option_name in OPTIONS_TO_COPY:
|
||||||
alchemy_option_name = (OPTION_TRANSLATIONS[option_name]
|
alchemy_option_name = (OPTION_TRANSLATIONS[option_name]
|
||||||
if option_name in OPTION_TRANSLATIONS else option_name)
|
if option_name in OPTION_TRANSLATIONS else option_name)
|
||||||
|
@ -81,4 +86,11 @@ def gen_sqlalchemy_metadata(peewee_model_list):
|
||||||
col_refs = [getattr(new_table.c, col_name) for col_name in col_names]
|
col_refs = [getattr(new_table.c, col_name) for col_name in col_names]
|
||||||
Index(index_name, *col_refs, unique=unique)
|
Index(index_name, *col_refs, unique=unique)
|
||||||
|
|
||||||
|
for col_field_name in fulltext_indexes:
|
||||||
|
index_name = '%s_%s__fulltext' % (meta.db_table, col_field_name)
|
||||||
|
col_ref = getattr(new_table.c, col_field_name)
|
||||||
|
Index(index_name, col_ref, postgresql_ops={col_field_name: 'gin_trgm_ops'},
|
||||||
|
postgresql_using='gin',
|
||||||
|
mysql_prefix='FULLTEXT')
|
||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
|
|
38
data/text.py
Normal file
38
data/text.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
from peewee import Clause, SQL, fn, TextField, Field
|
||||||
|
|
||||||
|
def _escape_wildcard(search_query):
|
||||||
|
""" Escapes the wildcards found in the given search query so that they are treated as *characters*
|
||||||
|
rather than wildcards when passed to a LIKE or ILIKE clause with an ESCAPE '!'.
|
||||||
|
"""
|
||||||
|
search_query = (search_query
|
||||||
|
.replace('!', '!!')
|
||||||
|
.replace('%', '!%')
|
||||||
|
.replace('_', '!_')
|
||||||
|
.replace('[', '!['))
|
||||||
|
return search_query
|
||||||
|
|
||||||
|
|
||||||
|
def prefix_search(field, prefix_query):
|
||||||
|
""" Returns the wildcard match for searching for the given prefix query. """
|
||||||
|
# Escape the known wildcard characters.
|
||||||
|
prefix_query = _escape_wildcard(prefix_query)
|
||||||
|
return Field.__pow__(field, Clause(prefix_query + '%', SQL("ESCAPE '!'")))
|
||||||
|
|
||||||
|
|
||||||
|
def match_mysql(field, search_query):
|
||||||
|
""" Generates a full-text match query using a Match operation, which is needed for MySQL.
|
||||||
|
"""
|
||||||
|
if field.name.find('`') >= 0: # Just to be safe.
|
||||||
|
raise Exception("How did field name '%s' end up containing a backtick?" % field.name)
|
||||||
|
|
||||||
|
return Clause(fn.MATCH(SQL("`%s`" % field.name)), fn.AGAINST(SQL('%s', search_query)),
|
||||||
|
parens=True)
|
||||||
|
|
||||||
|
|
||||||
|
def match_like(field, search_query):
|
||||||
|
""" Generates a full-text match query using an ILIKE operation, which is needed for SQLite and
|
||||||
|
Postgres.
|
||||||
|
"""
|
||||||
|
escaped_query = _escape_wildcard(search_query)
|
||||||
|
clause = Clause('%' + escaped_query + '%', SQL("ESCAPE '!'"))
|
||||||
|
return Field.__pow__(field, clause)
|
Reference in a new issue