Full text support in peewee
Adds support for full text search in peewee with the creation of two new field types: `FullIndexedCharField` and `FullIndexedTextField`. Note that this change depends upon https://github.com/zzzeek/sqlalchemy/pull/339 [Delivers #137453279] [Delivers #137453317]
This commit is contained in:
parent
048f932094
commit
d89c79b92d
4 changed files with 105 additions and 2 deletions
|
@ -22,6 +22,7 @@ from sqlalchemy.engine.url import make_url
|
|||
import resumablehashlib
|
||||
|
||||
from data.fields import ResumableSHA256Field, ResumableSHA1Field, JSONField, Base64BinaryField
|
||||
from data.text import match_mysql, match_like
|
||||
from data.read_slave import ReadSlaveModel
|
||||
from util.names import urn_generator
|
||||
|
||||
|
@ -42,6 +43,14 @@ _SCHEME_DRIVERS = {
|
|||
'postgresql+psycopg2': PostgresqlDatabase,
|
||||
}
|
||||
|
||||
SCHEME_MATCH_FUNCTION = {
|
||||
'mysql': match_mysql,
|
||||
'mysql+pymysql': match_mysql,
|
||||
'sqlite': match_like,
|
||||
'postgresql': match_like,
|
||||
'postgresql+psycopg2': match_like,
|
||||
}
|
||||
|
||||
SCHEME_RANDOM_FUNCTION = {
|
||||
'mysql': fn.Rand,
|
||||
'mysql+pymysql': fn.Rand,
|
||||
|
@ -211,6 +220,7 @@ class TupleSelector(object):
|
|||
db = Proxy()
|
||||
read_slave = Proxy()
|
||||
db_random_func = CallableProxy()
|
||||
db_match_func = CallableProxy()
|
||||
db_for_update = CallableProxy()
|
||||
db_transaction = CallableProxy()
|
||||
db_concat_func = CallableProxy()
|
||||
|
@ -257,6 +267,7 @@ def configure(config_object):
|
|||
|
||||
parsed_write_uri = make_url(write_db_uri)
|
||||
db_random_func.initialize(SCHEME_RANDOM_FUNCTION[parsed_write_uri.drivername])
|
||||
db_match_func.initialize(SCHEME_MATCH_FUNCTION[parsed_write_uri.drivername])
|
||||
db_for_update.initialize(SCHEME_SPECIALIZED_FOR_UPDATE.get(parsed_write_uri.drivername,
|
||||
real_for_update))
|
||||
db_concat_func.initialize(SCHEME_SPECIALIZED_CONCAT.get(parsed_write_uri.drivername,
|
||||
|
|
|
@ -2,7 +2,8 @@ import base64
|
|||
import resumablehashlib
|
||||
import json
|
||||
|
||||
from peewee import TextField
|
||||
from peewee import TextField, CharField, Clause
|
||||
from data.text import prefix_search
|
||||
|
||||
|
||||
class _ResumableSHAField(TextField):
|
||||
|
@ -64,3 +65,44 @@ class Base64BinaryField(TextField):
|
|||
if value is None:
|
||||
return None
|
||||
return base64.b64decode(value)
|
||||
|
||||
|
||||
def _add_fulltext(field_class):
|
||||
""" Adds support for full text indexing and lookup to the given field class. """
|
||||
class indexed_class(field_class):
|
||||
# Marker used by SQLAlchemy translation layer to add the proper index for full text searching.
|
||||
__fulltext__ = True
|
||||
|
||||
def __init__(self, match_function, *args, **kwargs):
|
||||
field_class.__init__(self, *args, **kwargs)
|
||||
self.match_function = match_function
|
||||
|
||||
def match(self, query):
|
||||
return self.match_function(self, query)
|
||||
|
||||
def match_prefix(self, query):
|
||||
return prefix_search(self, query)
|
||||
|
||||
def __mod__(self, _):
|
||||
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||
|
||||
def __pow__(self, _):
|
||||
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||
|
||||
def __contains__(self, _):
|
||||
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||
|
||||
def contains(self, _):
|
||||
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||
|
||||
def startswith(self, _):
|
||||
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||
|
||||
def endswith(self, _):
|
||||
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
|
||||
|
||||
return indexed_class
|
||||
|
||||
|
||||
FullIndexedCharField = _add_fulltext(CharField)
|
||||
FullIndexedTextField = _add_fulltext(TextField)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from sqlalchemy import (Table, MetaData, Column, ForeignKey, Integer, String, Boolean, Text,
|
||||
DateTime, Date, BigInteger, Index)
|
||||
DateTime, Date, BigInteger, Index, text)
|
||||
from peewee import (PrimaryKeyField, CharField, BooleanField, DateTimeField, TextField,
|
||||
ForeignKeyField, BigIntegerField, IntegerField, DateField)
|
||||
|
||||
|
@ -28,6 +28,7 @@ def gen_sqlalchemy_metadata(peewee_model_list):
|
|||
meta = model._meta
|
||||
|
||||
all_indexes = set(meta.indexes)
|
||||
fulltext_indexes = []
|
||||
|
||||
columns = []
|
||||
for field in meta.sorted_fields:
|
||||
|
@ -60,6 +61,10 @@ def gen_sqlalchemy_metadata(peewee_model_list):
|
|||
else:
|
||||
raise RuntimeError('Unknown column type: %s' % field)
|
||||
|
||||
if hasattr(field, '__fulltext__'):
|
||||
# Add the fulltext index for the field, based on whether we are under MySQL or Postgres.
|
||||
fulltext_indexes.append(field.name)
|
||||
|
||||
for option_name in OPTIONS_TO_COPY:
|
||||
alchemy_option_name = (OPTION_TRANSLATIONS[option_name]
|
||||
if option_name in OPTION_TRANSLATIONS else option_name)
|
||||
|
@ -81,4 +86,11 @@ def gen_sqlalchemy_metadata(peewee_model_list):
|
|||
col_refs = [getattr(new_table.c, col_name) for col_name in col_names]
|
||||
Index(index_name, *col_refs, unique=unique)
|
||||
|
||||
for col_field_name in fulltext_indexes:
|
||||
index_name = '%s_%s__fulltext' % (meta.db_table, col_field_name)
|
||||
col_ref = getattr(new_table.c, col_field_name)
|
||||
Index(index_name, col_ref, postgresql_ops={col_field_name: 'gin_trgm_ops'},
|
||||
postgresql_using='gin',
|
||||
mysql_prefix='FULLTEXT')
|
||||
|
||||
return metadata
|
||||
|
|
38
data/text.py
Normal file
38
data/text.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
from peewee import Clause, SQL, fn, TextField, Field
|
||||
|
||||
def _escape_wildcard(search_query):
|
||||
""" Escapes the wildcards found in the given search query so that they are treated as *characters*
|
||||
rather than wildcards when passed to a LIKE or ILIKE clause with an ESCAPE '!'.
|
||||
"""
|
||||
search_query = (search_query
|
||||
.replace('!', '!!')
|
||||
.replace('%', '!%')
|
||||
.replace('_', '!_')
|
||||
.replace('[', '!['))
|
||||
return search_query
|
||||
|
||||
|
||||
def prefix_search(field, prefix_query):
|
||||
""" Returns the wildcard match for searching for the given prefix query. """
|
||||
# Escape the known wildcard characters.
|
||||
prefix_query = _escape_wildcard(prefix_query)
|
||||
return Field.__pow__(field, Clause(prefix_query + '%', SQL("ESCAPE '!'")))
|
||||
|
||||
|
||||
def match_mysql(field, search_query):
|
||||
""" Generates a full-text match query using a Match operation, which is needed for MySQL.
|
||||
"""
|
||||
if field.name.find('`') >= 0: # Just to be safe.
|
||||
raise Exception("How did field name '%s' end up containing a backtick?" % field.name)
|
||||
|
||||
return Clause(fn.MATCH(SQL("`%s`" % field.name)), fn.AGAINST(SQL('%s', search_query)),
|
||||
parens=True)
|
||||
|
||||
|
||||
def match_like(field, search_query):
|
||||
""" Generates a full-text match query using an ILIKE operation, which is needed for SQLite and
|
||||
Postgres.
|
||||
"""
|
||||
escaped_query = _escape_wildcard(search_query)
|
||||
clause = Clause('%' + escaped_query + '%', SQL("ESCAPE '!'"))
|
||||
return Field.__pow__(field, clause)
|
Reference in a new issue