Fix references to LogEntry model used and add support for a maximum page of results on the logs API

This commit is contained in:
Joseph Schorr 2019-01-02 15:57:55 -05:00
parent 204eb74c4f
commit b6db002729
9 changed files with 48 additions and 31 deletions

View file

@ -559,3 +559,6 @@ class DefaultConfig(ImmutableConfig):
# Feature Flag: Whether to support log exporting.
FEATURE_LOG_EXPORT = True
# Maximum number of action logs pages that can be returned via the API.
ACTION_LOG_MAX_PAGE = None

View file

@ -16,7 +16,7 @@ ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = ['pull_repo']
def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None,
namespace=None, ignore=None, model=LogEntry, id_range=None):
namespace=None, ignore=None, model=LogEntry2, id_range=None):
""" Returns a query for selecting logs from the table, with various options and filters. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
assert (start_time is not None and end_time is not None) or (id_range is not None)
@ -64,7 +64,7 @@ def _get_log_entry_kind(name):
def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None,
ignore=None, model=LogEntry):
ignore=None, model=LogEntry2):
""" Returns the count of logs, by kind and day, for the logs matching the given filters. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
date = db.extract_date('day', model.datetime)
@ -75,7 +75,7 @@ def get_aggregated_logs(start_time, end_time, performer=None, repository=None, n
def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None,
ignore=None, model=LogEntry, id_range=None):
ignore=None, model=LogEntry2, id_range=None):
""" Returns the logs matching the given filters. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
Performer = User.alias()
@ -205,49 +205,52 @@ def get_repositories_action_sums(repository_ids):
return action_count_map
def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None):
def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry2):
""" Returns the minimum ID for logs matching the given repository or namespace in
the logs table, starting at the given start time.
"""
# First try bounded by a day. Most repositories will meet this criteria, and therefore
# can make a much faster query.
day_after = start_time + timedelta(days=1)
result = _get_bounded_id(fn.Min, LogEntry.datetime >= start_time,
repository_id, namespace_id, LogEntry.datetime < day_after)
result = _get_bounded_id(fn.Min, model.datetime >= start_time,
repository_id, namespace_id, model.datetime < day_after, model=model)
if result is not None:
return result
return _get_bounded_id(fn.Min, LogEntry.datetime >= start_time, repository_id, namespace_id)
return _get_bounded_id(fn.Min, model.datetime >= start_time, repository_id, namespace_id,
model=model)
def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None):
def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry2):
""" Returns the maximum ID for logs matching the given repository or namespace in
the logs table, ending at the given end time.
"""
# First try bounded by a day. Most repositories will meet this criteria, and therefore
# can make a much faster query.
day_before = end_time - timedelta(days=1)
result = _get_bounded_id(fn.Max, LogEntry.datetime <= end_time,
repository_id, namespace_id, LogEntry.datetime > day_before)
result = _get_bounded_id(fn.Max, model.datetime <= end_time,
repository_id, namespace_id, model.datetime > day_before, model=model)
if result is not None:
return result
return _get_bounded_id(fn.Max, LogEntry.datetime <= end_time, repository_id, namespace_id)
return _get_bounded_id(fn.Max, model.datetime <= end_time, repository_id, namespace_id,
model=model)
def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None):
def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None,
model=LogEntry2):
assert (namespace_id is not None) or (repository_id is not None)
query = (LogEntry
.select(fn(LogEntry.id))
query = (model
.select(fn(model.id))
.where(filter_clause))
if reduction_clause is not None:
query = query.where(reduction_clause)
if repository_id is not None:
query = query.where(LogEntry.repository == repository_id)
query = query.where(model.repository == repository_id)
else:
query = query.where(LogEntry.account == namespace_id)
query = query.where(model.account == namespace_id)
row = query.tuples()[0]
if not row:

View file

@ -1,6 +1,7 @@
from peewee import SQL
def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None):
def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None,
max_page=None):
""" Paginates the given query using an ID range, starting at the optional page_token.
Returns a *list* of matching results along with an unencrypted page_token for the
next page, if any. If descending is set to True, orders by the ID descending rather
@ -27,7 +28,12 @@ def paginate(query, model, descending=False, page_token=None, limit=50, id_alias
query = query.where(model.id >= start_id)
query = query.limit(limit + 1)
return paginate_query(query, limit=limit, id_alias=id_alias)
page_number = (page_token.get('page_number') or None) if page_token else None
if page_number is not None and max_page is not None and page_number > max_page:
return [], None
return paginate_query(query, limit=limit, id_alias=id_alias, page_number=page_number)
def pagination_start(page_token=None):
@ -38,7 +44,7 @@ def pagination_start(page_token=None):
return None
def paginate_query(query, limit=50, id_alias=None):
def paginate_query(query, limit=50, id_alias=None, page_number=None):
""" Executes the given query and returns a page's worth of results, as well as the page token
for the next page (if any).
"""
@ -47,7 +53,8 @@ def paginate_query(query, limit=50, id_alias=None):
if len(results) > limit:
start_id = getattr(results[limit], id_alias or 'id')
page_token = {
'start_id': start_id
'start_id': start_id,
'page_number': page_number + 1 if page_number else 1,
}
return results[0:limit], page_token

View file

@ -1,6 +1,6 @@
import pytest
from data.database import LogEntry, User
from data.database import LogEntry2, User
from data.model import config as _config
from data.model.log import log_action
@ -21,8 +21,8 @@ def logentry_kind():
@pytest.fixture()
def logentry(logentry_kind):
with patch('data.database.LogEntry.create', spec=True):
yield LogEntry
with patch('data.database.LogEntry2.create', spec=True):
yield LogEntry2
@pytest.fixture()
def user():

View file

@ -1,5 +1,6 @@
import itertools
from app import app
from data import model, database
from endpoints.api.logs_models_interface import LogEntryDataInterface, LogEntryPage, LogEntry, AggregatedLogEntry
@ -57,17 +58,18 @@ class PreOCIModel(LogEntryDataInterface):
logs, next_page_token = model.modelutil.paginate(logs_query, m,
descending=True, page_token=page_token,
limit=20)
limit=20,
max_page=app.config['ACTION_LOG_MAX_PAGE'])
return LogEntryPage([create_log(log) for log in logs], next_page_token)
# First check the LogEntry table for the most recent logs, unless we've been expressly told
# First check the LogEntry2 table for the most recent logs, unless we've been expressly told
# to look inside the "second" table.
TOKEN_TABLE_KEY2 = 'ttk2'
is_temp_table = page_token is not None and page_token.get(TOKEN_TABLE_KEY2) == 1
if is_temp_table:
page_result = get_logs(database.LogEntry2)
else:
page_result = get_logs(database.LogEntry)
else:
page_result = get_logs(database.LogEntry2)
if page_result.next_page_token is None and not is_temp_table:
page_result = page_result._replace(next_page_token={TOKEN_TABLE_KEY2: 1})

View file

@ -920,7 +920,7 @@ def populate_database(minimal=False, with_storage=False):
model.repositoryactioncount.update_repository_score(to_count)
WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry2', 'ManifestChild',
WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry', 'ManifestChild',
'NamespaceGeoRestriction']
def find_models_missing_data():

View file

@ -3,7 +3,7 @@ import time
import socket
from contextlib import contextmanager
from data.database import LogEntryKind, LogEntry
from data.database import LogEntryKind, LogEntry2
class assert_action_logged(object):
""" Specialized assertion for ensuring that a log entry of a particular kind was added under the
@ -14,7 +14,7 @@ class assert_action_logged(object):
self.existing_count = 0
def _get_log_count(self):
return LogEntry.select().where(LogEntry.kind == LogEntryKind.get(name=self.log_kind)).count()
return LogEntry2.select().where(LogEntry2.kind == LogEntryKind.get(name=self.log_kind)).count()
def __enter__(self):
self.existing_count = self._get_log_count()

View file

@ -9,6 +9,8 @@ INTERNAL_ONLY_PROPERTIES = {
'TESTING',
'SEND_FILE_MAX_AGE_DEFAULT',
'ACTION_LOG_MAX_PAGE',
'REPLICATION_QUEUE_NAME',
'DOCKERFILE_BUILD_QUEUE_NAME',
'CHUNK_CLEANUP_QUEUE_NAME',

View file

@ -59,7 +59,7 @@ def test_process_queue_item(namespace, repo_name, expects_logs, app):
created = storage.get_content(storage.preferred_locations, 'exportedactionlogs/' + storage_id)
created_json = json.loads(created)
expected_count = database.LogEntry.select().where(database.LogEntry.repository == repo).count()
expected_count = database.LogEntry2.select().where(database.LogEntry2.repository == repo).count()
assert (expected_count > 1) == expects_logs
assert created_json['export_id'] == 'someid'