Add a RepositorySearchScore table and calculation to the RAC worker

This will be used in a followup PR to order search results instead of the RAC join. Currently, the join with the RAC table in search results in a lookup of ~600K rows, which causes searching to take ~6s. This PR denormalizes the data we need, as well as allowing us to score based on a wider band (6 months vs the current 1 week).
This commit is contained in:
Joseph Schorr 2017-03-17 13:51:45 -04:00
parent 1bfca871ec
commit df3f47c79a
10 changed files with 243 additions and 50 deletions

View file

@ -0,0 +1,38 @@
from datetime import date, timedelta
import pytest
from data.database import RepositoryActionCount, RepositorySearchScore
from data.model.repository import create_repository
from data.model.repositoryactioncount import update_repository_score, SEARCH_BUCKETS
from test.fixtures import database_uri, init_db_path, sqlitedb_file
@pytest.mark.parametrize('bucket_sums,expected_score', [
((0, 0, 0, 0), 0),
((1, 6, 24, 152), 100),
((2, 6, 24, 152), 101),
((1, 6, 24, 304), 171),
((100, 480, 24, 152), 703),
((1, 6, 24, 15200), 7131),
((300, 500, 1000, 0), 1733),
((5000, 0, 0, 0), 5434),
])
def test_update_repository_score(bucket_sums, expected_score, database_uri):
# Create a new repository.
repo = create_repository('devtable', 'somenewrepo', None, repo_kind='image')
# Delete the RAC created in create_repository.
RepositoryActionCount.delete().where(RepositoryActionCount.repository == repo).execute()
# Add RAC rows for each of the buckets.
for index, bucket in enumerate(SEARCH_BUCKETS):
for day in range(0, bucket.days):
RepositoryActionCount.create(repository=repo,
count=(bucket_sums[index] / bucket.days * 1.0),
date=date.today() - bucket.delta + timedelta(days=day))
assert update_repository_score(repo)
assert RepositorySearchScore.get(repository=repo).score == expected_score