Full text search for repository name and description
Adds support for searching full text against the name and description of a repository [Delivers #134867401]
This commit is contained in:
parent
d65d32b284
commit
973a110ac7
5 changed files with 73 additions and 12 deletions
|
@ -21,7 +21,8 @@ from sqlalchemy.engine.url import make_url
|
||||||
|
|
||||||
import resumablehashlib
|
import resumablehashlib
|
||||||
|
|
||||||
from data.fields import ResumableSHA256Field, ResumableSHA1Field, JSONField, Base64BinaryField
|
from data.fields import (ResumableSHA256Field, ResumableSHA1Field, JSONField, Base64BinaryField,
|
||||||
|
FullIndexedTextField, FullIndexedCharField)
|
||||||
from data.text import match_mysql, match_like
|
from data.text import match_mysql, match_like
|
||||||
from data.read_slave import ReadSlaveModel
|
from data.read_slave import ReadSlaveModel
|
||||||
from util.names import urn_generator
|
from util.names import urn_generator
|
||||||
|
@ -31,10 +32,12 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
DEFAULT_DB_CONNECT_TIMEOUT = 10 # seconds
|
DEFAULT_DB_CONNECT_TIMEOUT = 10 # seconds
|
||||||
|
|
||||||
|
|
||||||
# IMAGE_NOT_SCANNED_ENGINE_VERSION is the version found in security_indexed_engine when the
|
# IMAGE_NOT_SCANNED_ENGINE_VERSION is the version found in security_indexed_engine when the
|
||||||
# image has not yet been scanned.
|
# image has not yet been scanned.
|
||||||
IMAGE_NOT_SCANNED_ENGINE_VERSION = -1
|
IMAGE_NOT_SCANNED_ENGINE_VERSION = -1
|
||||||
|
|
||||||
|
|
||||||
_SCHEME_DRIVERS = {
|
_SCHEME_DRIVERS = {
|
||||||
'mysql': MySQLDatabase,
|
'mysql': MySQLDatabase,
|
||||||
'mysql+pymysql': MySQLDatabase,
|
'mysql+pymysql': MySQLDatabase,
|
||||||
|
@ -43,6 +46,7 @@ _SCHEME_DRIVERS = {
|
||||||
'postgresql+psycopg2': PostgresqlDatabase,
|
'postgresql+psycopg2': PostgresqlDatabase,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
SCHEME_MATCH_FUNCTION = {
|
SCHEME_MATCH_FUNCTION = {
|
||||||
'mysql': match_mysql,
|
'mysql': match_mysql,
|
||||||
'mysql+pymysql': match_mysql,
|
'mysql+pymysql': match_mysql,
|
||||||
|
@ -51,6 +55,7 @@ SCHEME_MATCH_FUNCTION = {
|
||||||
'postgresql+psycopg2': match_like,
|
'postgresql+psycopg2': match_like,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
SCHEME_RANDOM_FUNCTION = {
|
SCHEME_RANDOM_FUNCTION = {
|
||||||
'mysql': fn.Rand,
|
'mysql': fn.Rand,
|
||||||
'mysql+pymysql': fn.Rand,
|
'mysql+pymysql': fn.Rand,
|
||||||
|
@ -59,6 +64,7 @@ SCHEME_RANDOM_FUNCTION = {
|
||||||
'postgresql+psycopg2': fn.Random,
|
'postgresql+psycopg2': fn.Random,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def pipes_concat(arg1, arg2, *extra_args):
|
def pipes_concat(arg1, arg2, *extra_args):
|
||||||
""" Concat function for sqlite, since it doesn't support fn.Concat.
|
""" Concat function for sqlite, since it doesn't support fn.Concat.
|
||||||
Concatenates clauses with || characters.
|
Concatenates clauses with || characters.
|
||||||
|
@ -482,9 +488,9 @@ class Visibility(BaseModel):
|
||||||
|
|
||||||
class Repository(BaseModel):
|
class Repository(BaseModel):
|
||||||
namespace_user = QuayUserField(null=True)
|
namespace_user = QuayUserField(null=True)
|
||||||
name = CharField()
|
name = FullIndexedCharField(match_function=db_match_func)
|
||||||
visibility = ForeignKeyField(Visibility)
|
visibility = ForeignKeyField(Visibility)
|
||||||
description = TextField(null=True)
|
description = FullIndexedTextField(match_function=db_match_func, null=True)
|
||||||
badge_token = CharField(default=uuid_generator)
|
badge_token = CharField(default=uuid_generator)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
"""Add full text search indexing for repo name and description
|
||||||
|
|
||||||
|
Revision ID: e2894a3a3c19
|
||||||
|
Revises: 45fd8b9869d4
|
||||||
|
Create Date: 2017-01-11 13:55:54.890774
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = 'e2894a3a3c19'
|
||||||
|
down_revision = '45fd8b9869d4'
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects import mysql
|
||||||
|
|
||||||
|
def upgrade(tables):
|
||||||
|
if op.get_bind().engine.name == 'postgresql':
|
||||||
|
op.execute('CREATE EXTENSION IF NOT EXISTS pg_trgm')
|
||||||
|
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.create_index('repository_description__fulltext', 'repository', ['description'], unique=False, postgresql_using='gin', postgresql_ops={'description': 'gin_trgm_ops'}, mysql_prefix='FULLTEXT')
|
||||||
|
op.create_index('repository_name__fulltext', 'repository', ['name'], unique=False, postgresql_using='gin', postgresql_ops={'name': 'gin_trgm_ops'}, mysql_prefix='FULLTEXT')
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade(tables):
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_index('repository_name__fulltext', table_name='repository')
|
||||||
|
op.drop_index('repository_description__fulltext', table_name='repository')
|
||||||
|
# ### end Alembic commands ###
|
|
@ -319,8 +319,8 @@ def get_visible_repositories(username, namespace=None, include_public=False, sta
|
||||||
return query
|
return query
|
||||||
|
|
||||||
|
|
||||||
def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
|
def get_sorted_matching_repositories(lookup_value, only_public, checker, limit=10):
|
||||||
""" Returns repositories matching the given prefix string and passing the given checker
|
""" Returns repositories matching the given lookup string and passing the given checker
|
||||||
function.
|
function.
|
||||||
"""
|
"""
|
||||||
last_week = datetime.now() - timedelta(weeks=1)
|
last_week = datetime.now() - timedelta(weeks=1)
|
||||||
|
@ -371,14 +371,16 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
|
||||||
results.append(result)
|
results.append(result)
|
||||||
existing_ids.append(result.id)
|
existing_ids.append(result.id)
|
||||||
|
|
||||||
# For performance reasons, we conduct the repo name and repo namespace searches on their
|
# For performance reasons, we conduct each set of searches on their own. This also affords us the
|
||||||
# own. This also affords us the ability to give higher precedence to repository names matching
|
# ability to easily define an order precedence.
|
||||||
# over namespaces, which is semantically correct.
|
get_search_results(Repository.name.match(lookup_value), with_count=True)
|
||||||
get_search_results(_basequery.prefix_search(Repository.name, prefix), with_count=True)
|
get_search_results(Repository.name.match(lookup_value), with_count=False)
|
||||||
get_search_results(_basequery.prefix_search(Repository.name, prefix), with_count=False)
|
|
||||||
|
|
||||||
get_search_results(_basequery.prefix_search(Namespace.username, prefix), with_count=True)
|
get_search_results(Repository.description.match(lookup_value), with_count=True)
|
||||||
get_search_results(_basequery.prefix_search(Namespace.username, prefix), with_count=False)
|
get_search_results(Repository.description.match(lookup_value), with_count=False)
|
||||||
|
|
||||||
|
get_search_results(prefix_search(Namespace.username, lookup_value), with_count=True)
|
||||||
|
get_search_results(prefix_search(Namespace.username, lookup_value), with_count=False)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
|
@ -568,6 +568,11 @@ def populate_database(minimal=False, with_storage=False):
|
||||||
[(new_user_2, 'write'), (reader, 'read')],
|
[(new_user_2, 'write'), (reader, 'read')],
|
||||||
(5, [], 'latest'))
|
(5, [], 'latest'))
|
||||||
|
|
||||||
|
__generate_repository(with_storage, new_user_1, 'text-full-repo',
|
||||||
|
'This is a repository for testing text search', False,
|
||||||
|
[(new_user_2, 'write'), (reader, 'read')],
|
||||||
|
(5, [], 'latest'))
|
||||||
|
|
||||||
building = __generate_repository(with_storage, new_user_1, 'building',
|
building = __generate_repository(with_storage, new_user_1, 'building',
|
||||||
'Empty repository which is building.',
|
'Empty repository which is building.',
|
||||||
False, [], (0, [], None))
|
False, [], (0, [], None))
|
||||||
|
|
|
@ -1001,6 +1001,23 @@ class TestConductSearch(ApiTestCase):
|
||||||
self.assertEquals(json['results'][0]['name'], 'shared')
|
self.assertEquals(json['results'][0]['name'], 'shared')
|
||||||
|
|
||||||
|
|
||||||
|
def test_full_text(self):
|
||||||
|
self.login(ADMIN_ACCESS_USER)
|
||||||
|
|
||||||
|
# Make sure the repository is found via `full` and `text search`.
|
||||||
|
json = self.getJsonResponse(ConductSearch,
|
||||||
|
params=dict(query='full'))
|
||||||
|
self.assertEquals(1, len(json['results']))
|
||||||
|
self.assertEquals(json['results'][0]['kind'], 'repository')
|
||||||
|
self.assertEquals(json['results'][0]['name'], 'text-full-repo')
|
||||||
|
|
||||||
|
json = self.getJsonResponse(ConductSearch,
|
||||||
|
params=dict(query='text search'))
|
||||||
|
self.assertEquals(1, len(json['results']))
|
||||||
|
self.assertEquals(json['results'][0]['kind'], 'repository')
|
||||||
|
self.assertEquals(json['results'][0]['name'], 'text-full-repo')
|
||||||
|
|
||||||
|
|
||||||
class TestGetMatchingEntities(ApiTestCase):
|
class TestGetMatchingEntities(ApiTestCase):
|
||||||
def test_simple_lookup(self):
|
def test_simple_lookup(self):
|
||||||
self.login(ADMIN_ACCESS_USER)
|
self.login(ADMIN_ACCESS_USER)
|
||||||
|
|
Reference in a new issue