Add a RepositorySearchScore table and calculation to the RAC worker

This will be used in a followup PR to order search results instead of the RAC join. Currently, the join with the RAC table in search results in a lookup of ~600K rows, which causes searching to take ~6s. This PR denormalizes the data we need, as well as allowing us to score based on a wider band (6 months vs the current 1 week).
This commit is contained in:
Joseph Schorr 2017-03-17 13:51:45 -04:00
parent 1bfca871ec
commit df3f47c79a
10 changed files with 243 additions and 50 deletions

View file

@ -12,7 +12,7 @@ from data.database import (Repository, Namespace, RepositoryTag, Star, Image, Im
Visibility, RepositoryPermission, RepositoryActionCount,
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
Label, TagManifestLabel, db_for_update, get_epoch_timestamp,
db_random_func, db_concat_func)
db_random_func, db_concat_func, RepositorySearchScore)
from data.text import prefix_search
from util.itertoolrecipes import take
@ -42,6 +42,7 @@ def create_repository(namespace, name, creating_user, visibility='private', repo
yesterday = datetime.now() - timedelta(days=1)
RepositoryActionCount.create(repository=repo, count=0, date=yesterday)
RepositorySearchScore.create(repository=repo, score=0)
if creating_user and not creating_user.organization:
RepositoryPermission.create(user=creating_user, repository=repo, role=admin)