Add LogEntry3 table without the extra indexes and switch to writing to it

This commit is contained in:
Joseph Schorr 2019-01-03 13:50:43 -05:00
parent b6db002729
commit cdb49dbfd3
12 changed files with 114 additions and 49 deletions

View file

@ -7,7 +7,8 @@ from datetime import datetime, timedelta
from cachetools import lru_cache
import data
from data.database import LogEntry, LogEntryKind, User, RepositoryActionCount, db, LogEntry2
from data.database import (LogEntry, LogEntryKind, User, RepositoryActionCount, db, LogEntry3,
LogEntry3)
from data.model import config, user, DataModelException
logger = logging.getLogger(__name__)
@ -16,9 +17,9 @@ ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = ['pull_repo']
def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None,
namespace=None, ignore=None, model=LogEntry2, id_range=None):
namespace=None, ignore=None, model=LogEntry3, id_range=None):
""" Returns a query for selecting logs from the table, with various options and filters. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
# TODO(LogMigrate): Remove the branch once we're back on a single table.
assert (start_time is not None and end_time is not None) or (id_range is not None)
joined = (model.select(*selections).switch(model))
@ -64,9 +65,9 @@ def _get_log_entry_kind(name):
def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None,
ignore=None, model=LogEntry2):
ignore=None, model=LogEntry3):
""" Returns the count of logs, by kind and day, for the logs matching the given filters. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
# TODO(LogMigrate): Remove the branch once we're back on a single table.
date = db.extract_date('day', model.datetime)
selections = [model.kind, date.alias('day'), fn.Count(model.id).alias('count')]
query = _logs_query(selections, start_time, end_time, performer, repository, namespace, ignore,
@ -75,9 +76,9 @@ def get_aggregated_logs(start_time, end_time, performer=None, repository=None, n
def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None,
ignore=None, model=LogEntry2, id_range=None):
ignore=None, model=LogEntry3, id_range=None):
""" Returns the logs matching the given filters. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
# TODO(LogMigrate): Remove the branch once we're back on a single table.
Performer = User.alias()
Account = User.alias()
selections = [model, Performer]
@ -137,7 +138,7 @@ def log_action(kind_name, user_or_organization_name, performer=None, repository=
}
try:
LogEntry2.create(**log_data)
LogEntry3.create(**log_data)
except PeeweeException as ex:
strict_logging_disabled = config.app_config.get('ALLOW_PULLS_WITHOUT_STRICT_LOGGING')
if strict_logging_disabled and kind_name in ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING:
@ -148,7 +149,7 @@ def log_action(kind_name, user_or_organization_name, performer=None, repository=
def get_stale_logs_start_id(model):
""" Gets the oldest log entry. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
# TODO(LogMigrate): Remove the branch once we're back on a single table.
try:
return (model.select(model.id).order_by(model.id).limit(1).tuples())[0][0]
except IndexError:
@ -157,7 +158,7 @@ def get_stale_logs_start_id(model):
def get_stale_logs_cutoff_id(cutoff_date, model):
""" Gets the most recent ID created before the cutoff_date. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
# TODO(LogMigrate): Remove the branch once we're back on a single table.
try:
return (model.select(fn.Max(model.id)).where(model.datetime <= cutoff_date)
.tuples())[0][0]
@ -167,13 +168,13 @@ def get_stale_logs_cutoff_id(cutoff_date, model):
def get_stale_logs(start_id, end_id, model):
""" Returns all the logs with IDs between start_id and end_id inclusively. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
# TODO(LogMigrate): Remove the branch once we're back on a single table.
return model.select().where((model.id >= start_id), (model.id <= end_id))
def delete_stale_logs(start_id, end_id, model):
""" Deletes all the logs with IDs between start_id and end_id. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
# TODO(LogMigrate): Remove the branch once we're back on a single table.
model.delete().where((model.id >= start_id), (model.id <= end_id)).execute()
@ -205,7 +206,7 @@ def get_repositories_action_sums(repository_ids):
return action_count_map
def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry2):
def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry3):
""" Returns the minimum ID for logs matching the given repository or namespace in
the logs table, starting at the given start time.
"""
@ -221,7 +222,7 @@ def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, m
model=model)
def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry2):
def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry3):
""" Returns the maximum ID for logs matching the given repository or namespace in
the logs table, ending at the given end time.
"""
@ -238,7 +239,7 @@ def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, mod
def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None,
model=LogEntry2):
model=LogEntry3):
assert (namespace_id is not None) or (repository_id is not None)
query = (model
.select(fn(model.id))

View file

@ -4,7 +4,7 @@ from collections import namedtuple
from peewee import IntegrityError
from datetime import date, timedelta, datetime
from data.database import (Repository, LogEntry, LogEntry2, RepositoryActionCount,
from data.database import (Repository, LogEntry, LogEntry2, LogEntry3, RepositoryActionCount,
RepositorySearchScore, db_random_func, fn)
logger = logging.getLogger(__name__)
@ -52,7 +52,7 @@ def count_repository_actions(to_count):
today = date.today()
yesterday = today - timedelta(days=1)
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
# TODO(LogMigrate): Remove the branch once we're back on a single table.
def lookup_action_count(model):
return (model
.select()
@ -61,7 +61,8 @@ def count_repository_actions(to_count):
model.datetime < today)
.count())
actions = lookup_action_count(LogEntry) + lookup_action_count(LogEntry2)
actions = (lookup_action_count(LogEntry3) + lookup_action_count(LogEntry2) +
lookup_action_count(LogEntry))
try:
RepositoryActionCount.create(repository=to_count, date=yesterday, count=actions)
return True

View file

@ -1,6 +1,6 @@
import pytest
from data.database import LogEntry2, User
from data.database import LogEntry3, User
from data.model import config as _config
from data.model.log import log_action
@ -21,8 +21,8 @@ def logentry_kind():
@pytest.fixture()
def logentry(logentry_kind):
with patch('data.database.LogEntry2.create', spec=True):
yield LogEntry2
with patch('data.database.LogEntry3.create', spec=True):
yield LogEntry3
@pytest.fixture()
def user():