Add a RepositorySearchScore table and calculation to the RAC worker
This will be used in a followup PR to order search results instead of the RAC join. Currently, the join with the RAC table in search results in a lookup of ~600K rows, which causes searching to take ~6s. This PR denormalizes the data we need, as well as allowing us to score based on a wider band (6 months vs the current 1 week).
This commit is contained in:
parent
1bfca871ec
commit
df3f47c79a
10 changed files with 243 additions and 50 deletions
38
data/model/test/test_repositoryactioncount.py
Normal file
38
data/model/test/test_repositoryactioncount.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
from datetime import date, timedelta
|
||||
|
||||
import pytest
|
||||
|
||||
from data.database import RepositoryActionCount, RepositorySearchScore
|
||||
from data.model.repository import create_repository
|
||||
from data.model.repositoryactioncount import update_repository_score, SEARCH_BUCKETS
|
||||
from test.fixtures import database_uri, init_db_path, sqlitedb_file
|
||||
|
||||
@pytest.mark.parametrize('bucket_sums,expected_score', [
|
||||
((0, 0, 0, 0), 0),
|
||||
|
||||
((1, 6, 24, 152), 100),
|
||||
((2, 6, 24, 152), 101),
|
||||
((1, 6, 24, 304), 171),
|
||||
|
||||
((100, 480, 24, 152), 703),
|
||||
((1, 6, 24, 15200), 7131),
|
||||
|
||||
((300, 500, 1000, 0), 1733),
|
||||
((5000, 0, 0, 0), 5434),
|
||||
])
|
||||
def test_update_repository_score(bucket_sums, expected_score, database_uri):
|
||||
# Create a new repository.
|
||||
repo = create_repository('devtable', 'somenewrepo', None, repo_kind='image')
|
||||
|
||||
# Delete the RAC created in create_repository.
|
||||
RepositoryActionCount.delete().where(RepositoryActionCount.repository == repo).execute()
|
||||
|
||||
# Add RAC rows for each of the buckets.
|
||||
for index, bucket in enumerate(SEARCH_BUCKETS):
|
||||
for day in range(0, bucket.days):
|
||||
RepositoryActionCount.create(repository=repo,
|
||||
count=(bucket_sums[index] / bucket.days * 1.0),
|
||||
date=date.today() - bucket.delta + timedelta(days=day))
|
||||
|
||||
assert update_repository_score(repo)
|
||||
assert RepositorySearchScore.get(repository=repo).score == expected_score
|
Reference in a new issue