Add a RepositoryActionCount table so we can use it (instead of LogEntry) when scoring repo search results
This commit is contained in:
parent
703f48f194
commit
3f1e8f3c27
8 changed files with 137 additions and 19 deletions
2
conf/init/service/repositoryactioncounter/log/run
Executable file
2
conf/init/service/repositoryactioncounter/log/run
Executable file
|
@ -0,0 +1,2 @@
|
||||||
|
#!/bin/sh
|
||||||
|
exec logger -i -t repositoryactioncounter
|
8
conf/init/service/repositoryactioncounter/run
Executable file
8
conf/init/service/repositoryactioncounter/run
Executable file
|
@ -0,0 +1,8 @@
|
||||||
|
#! /bin/bash
|
||||||
|
|
||||||
|
echo 'Starting repository action count worker'
|
||||||
|
|
||||||
|
cd /
|
||||||
|
venv/bin/python -m workers.repositoryactioncounter 2>&1
|
||||||
|
|
||||||
|
echo 'Repository action worker exited'
|
|
@ -299,7 +299,7 @@ class Repository(BaseModel):
|
||||||
# Therefore, we define our own deletion order here and use the dependency system to verify it.
|
# Therefore, we define our own deletion order here and use the dependency system to verify it.
|
||||||
ordered_dependencies = [RepositoryAuthorizedEmail, RepositoryTag, Image, LogEntry,
|
ordered_dependencies = [RepositoryAuthorizedEmail, RepositoryTag, Image, LogEntry,
|
||||||
RepositoryBuild, RepositoryBuildTrigger, RepositoryNotification,
|
RepositoryBuild, RepositoryBuildTrigger, RepositoryNotification,
|
||||||
RepositoryPermission, AccessToken, Star]
|
RepositoryPermission, AccessToken, Star, RepositoryActionCount]
|
||||||
|
|
||||||
for query, fk in self.dependencies(search_nullable=True):
|
for query, fk in self.dependencies(search_nullable=True):
|
||||||
model = fk.model_class
|
model = fk.model_class
|
||||||
|
@ -560,6 +560,20 @@ class LogEntry(BaseModel):
|
||||||
metadata_json = TextField(default='{}')
|
metadata_json = TextField(default='{}')
|
||||||
|
|
||||||
|
|
||||||
|
class RepositoryActionCount(BaseModel):
|
||||||
|
repository = ForeignKeyField(Repository, index=True)
|
||||||
|
count = IntegerField()
|
||||||
|
date = DateField(index=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
database = db
|
||||||
|
read_slaves = (read_slave,)
|
||||||
|
indexes = (
|
||||||
|
# create a unique index on repository and date
|
||||||
|
(('repository', 'date'), True),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class OAuthApplication(BaseModel):
|
class OAuthApplication(BaseModel):
|
||||||
client_id = CharField(index=True, default=random_string_generator(length=20))
|
client_id = CharField(index=True, default=random_string_generator(length=20))
|
||||||
client_secret = CharField(default=random_string_generator(length=40))
|
client_secret = CharField(default=random_string_generator(length=40))
|
||||||
|
@ -645,4 +659,4 @@ all_models = [User, Repository, Image, AccessToken, Role, RepositoryPermission,
|
||||||
ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification,
|
ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification,
|
||||||
RepositoryAuthorizedEmail, ImageStorageTransformation, DerivedImageStorage,
|
RepositoryAuthorizedEmail, ImageStorageTransformation, DerivedImageStorage,
|
||||||
TeamMemberInvite, ImageStorageSignature, ImageStorageSignatureKind,
|
TeamMemberInvite, ImageStorageSignature, ImageStorageSignatureKind,
|
||||||
AccessTokenKind, Star]
|
AccessTokenKind, Star, RepositoryActionCount]
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
"""Add RepositoryActionCount table
|
||||||
|
|
||||||
|
Revision ID: 30c044b75632
|
||||||
|
Revises: 2b4dc0818a5e
|
||||||
|
Create Date: 2015-04-13 13:21:18.159602
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '30c044b75632'
|
||||||
|
down_revision = '2b4dc0818a5e'
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade(tables):
|
||||||
|
### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.create_table('repositoryactioncount',
|
||||||
|
sa.Column('id', sa.Integer(), nullable=False),
|
||||||
|
sa.Column('repository_id', sa.Integer(), nullable=False),
|
||||||
|
sa.Column('count', sa.Integer(), nullable=False),
|
||||||
|
sa.Column('date', sa.Date(), nullable=False),
|
||||||
|
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_repositoryactioncount_repository_id_repository')),
|
||||||
|
sa.PrimaryKeyConstraint('id', name=op.f('pk_repositoryactioncount'))
|
||||||
|
)
|
||||||
|
op.create_index('repositoryactioncount_date', 'repositoryactioncount', ['date'], unique=False)
|
||||||
|
op.create_index('repositoryactioncount_repository_id', 'repositoryactioncount', ['repository_id'], unique=False)
|
||||||
|
op.create_index('repositoryactioncount_repository_id_date', 'repositoryactioncount', ['repository_id', 'date'], unique=True)
|
||||||
|
### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade(tables):
|
||||||
|
### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_table('repositoryactioncount')
|
||||||
|
### end Alembic commands ###
|
|
@ -18,7 +18,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
|
||||||
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
|
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
|
||||||
db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
|
db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
|
||||||
ImageStorageSignatureKind, validate_database_url, db_for_update,
|
ImageStorageSignatureKind, validate_database_url, db_for_update,
|
||||||
AccessTokenKind, Star, get_epoch_timestamp)
|
AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount)
|
||||||
from peewee import JOIN_LEFT_OUTER, fn
|
from peewee import JOIN_LEFT_OUTER, fn
|
||||||
from util.validation import (validate_username, validate_email, validate_password,
|
from util.validation import (validate_username, validate_email, validate_password,
|
||||||
INVALID_PASSWORD_MESSAGE)
|
INVALID_PASSWORD_MESSAGE)
|
||||||
|
@ -995,20 +995,19 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
|
||||||
""" Returns repositories matching the given prefix string and passing the given checker
|
""" Returns repositories matching the given prefix string and passing the given checker
|
||||||
function.
|
function.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
last_week = datetime.now() - timedelta(weeks=1)
|
last_week = datetime.now() - timedelta(weeks=1)
|
||||||
results = []
|
results = []
|
||||||
existing_ids = []
|
existing_ids = []
|
||||||
|
|
||||||
def get_search_results(search_clause, with_count):
|
def get_search_results(search_clause, with_count=False):
|
||||||
if len(results) >= limit:
|
if len(results) >= limit:
|
||||||
return
|
return
|
||||||
|
|
||||||
selected = [Repository, Namespace]
|
select_items = [Repository, Namespace]
|
||||||
if with_count:
|
if with_count:
|
||||||
selected.append(fn.Count(LogEntry.id).alias('count'))
|
select_items.append(fn.Sum(RepositoryActionCount.count).alias('count'))
|
||||||
|
|
||||||
query = (Repository.select(*selected)
|
query = (Repository.select(*select_items)
|
||||||
.join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user))
|
.join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user))
|
||||||
.switch(Repository)
|
.switch(Repository)
|
||||||
.where(search_clause)
|
.where(search_clause)
|
||||||
|
@ -1021,9 +1020,10 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
|
||||||
query = query.where(~(Repository.id << existing_ids))
|
query = query.where(~(Repository.id << existing_ids))
|
||||||
|
|
||||||
if with_count:
|
if with_count:
|
||||||
query = (query.join(LogEntry, JOIN_LEFT_OUTER)
|
query = (query.switch(Repository)
|
||||||
.where(LogEntry.datetime >= last_week)
|
.join(RepositoryActionCount)
|
||||||
.order_by(fn.Count(LogEntry.id).desc()))
|
.where(RepositoryActionCount.date >= last_week)
|
||||||
|
.order_by(fn.Sum(RepositoryActionCount.count).desc()))
|
||||||
|
|
||||||
for result in query:
|
for result in query:
|
||||||
if len(results) >= limit:
|
if len(results) >= limit:
|
||||||
|
@ -1042,13 +1042,13 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
|
||||||
existing_ids.append(result.id)
|
existing_ids.append(result.id)
|
||||||
|
|
||||||
# For performance reasons, we conduct the repo name and repo namespace searches on their
|
# For performance reasons, we conduct the repo name and repo namespace searches on their
|
||||||
# own, and with and without counts on their own. This also affords us the ability to give
|
# own. This also affords us the ability to give higher precedence to repository names matching
|
||||||
# higher precedence to repository names matching over namespaces, which is semantically correct.
|
# over namespaces, which is semantically correct.
|
||||||
get_search_results((Repository.name ** (prefix + '%')), with_count=True)
|
get_search_results(Repository.name ** (prefix + '%'), with_count=True)
|
||||||
get_search_results((Repository.name ** (prefix + '%')), with_count=False)
|
get_search_results(Repository.name ** (prefix + '%'), with_count=False)
|
||||||
|
|
||||||
get_search_results((Namespace.username ** (prefix + '%')), with_count=True)
|
get_search_results(Namespace.username ** (prefix + '%'), with_count=True)
|
||||||
get_search_results((Namespace.username ** (prefix + '%')), with_count=False)
|
get_search_results(Namespace.username ** (prefix + '%'), with_count=False)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from sqlalchemy import (Table, MetaData, Column, ForeignKey, Integer, String, Boolean, Text,
|
from sqlalchemy import (Table, MetaData, Column, ForeignKey, Integer, String, Boolean, Text,
|
||||||
DateTime, BigInteger, Index)
|
DateTime, Date, BigInteger, Index)
|
||||||
from peewee import (PrimaryKeyField, CharField, BooleanField, DateTimeField, TextField,
|
from peewee import (PrimaryKeyField, CharField, BooleanField, DateTimeField, TextField,
|
||||||
ForeignKeyField, BigIntegerField, IntegerField)
|
ForeignKeyField, BigIntegerField, IntegerField, DateField)
|
||||||
|
|
||||||
|
|
||||||
OPTIONS_TO_COPY = [
|
OPTIONS_TO_COPY = [
|
||||||
|
@ -42,6 +42,8 @@ def gen_sqlalchemy_metadata(peewee_model_list):
|
||||||
alchemy_type = Boolean
|
alchemy_type = Boolean
|
||||||
elif isinstance(field, DateTimeField):
|
elif isinstance(field, DateTimeField):
|
||||||
alchemy_type = DateTime
|
alchemy_type = DateTime
|
||||||
|
elif isinstance(field, DateField):
|
||||||
|
alchemy_type = Date
|
||||||
elif isinstance(field, TextField):
|
elif isinstance(field, TextField):
|
||||||
alchemy_type = Text
|
alchemy_type = Text
|
||||||
elif isinstance(field, ForeignKeyField):
|
elif isinstance(field, ForeignKeyField):
|
||||||
|
|
|
@ -16,6 +16,8 @@ from data import model
|
||||||
from data.model import oauth
|
from data.model import oauth
|
||||||
from app import app, storage as store
|
from app import app, storage as store
|
||||||
|
|
||||||
|
from workers import repositoryactioncounter
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -582,6 +584,9 @@ def populate_database():
|
||||||
'trigger_id': trigger.uuid, 'config': json.loads(trigger.config),
|
'trigger_id': trigger.uuid, 'config': json.loads(trigger.config),
|
||||||
'service': trigger.service.name})
|
'service': trigger.service.name})
|
||||||
|
|
||||||
|
while repositoryactioncounter.count_repository_actions():
|
||||||
|
pass
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
log_level = getattr(logging, app.config['LOGGING_LEVEL'])
|
log_level = getattr(logging, app.config['LOGGING_LEVEL'])
|
||||||
logging.basicConfig(level=log_level)
|
logging.basicConfig(level=log_level)
|
||||||
|
|
51
workers/repositoryactioncounter.py
Normal file
51
workers/repositoryactioncounter.py
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||||
|
|
||||||
|
from data.database import Repository, LogEntry, RepositoryActionCount, db_random_func, fn
|
||||||
|
from datetime import date, datetime, timedelta
|
||||||
|
|
||||||
|
POLL_PERIOD_SECONDS = 30
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
sched = BlockingScheduler()
|
||||||
|
|
||||||
|
@sched.scheduled_job(trigger='interval', seconds=10)
|
||||||
|
def count_repository_actions():
|
||||||
|
""" Counts actions for a random repository for the previous day. """
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get a random repository to count.
|
||||||
|
today = date.today()
|
||||||
|
yesterday = today - timedelta(days=1)
|
||||||
|
has_yesterday_actions = (RepositoryActionCount.select(RepositoryActionCount.repository)
|
||||||
|
.where(RepositoryActionCount.date == yesterday))
|
||||||
|
|
||||||
|
to_count = (Repository.select()
|
||||||
|
.where(~(Repository.id << (has_yesterday_actions)))
|
||||||
|
.order_by(db_random_func()).get())
|
||||||
|
|
||||||
|
logger.debug('Counting: %s', to_count.id)
|
||||||
|
|
||||||
|
actions = (LogEntry.select()
|
||||||
|
.where(LogEntry.repository == to_count,
|
||||||
|
LogEntry.datetime >= yesterday,
|
||||||
|
LogEntry.datetime < today)
|
||||||
|
.count())
|
||||||
|
|
||||||
|
# Create the row.
|
||||||
|
try:
|
||||||
|
RepositoryActionCount.create(repository=to_count, date=yesterday, count=actions)
|
||||||
|
except:
|
||||||
|
logger.exception('Exception when writing count')
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Repository.DoesNotExist:
|
||||||
|
logger.debug('No further repositories to count')
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
sched.start()
|
Reference in a new issue