Fix references to LogEntry model used and add support for a maximum page of results on the logs API

This commit is contained in:
Joseph Schorr 2019-01-02 15:57:55 -05:00
parent 204eb74c4f
commit b6db002729
9 changed files with 48 additions and 31 deletions

View file

@ -559,3 +559,6 @@ class DefaultConfig(ImmutableConfig):
# Feature Flag: Whether to support log exporting. # Feature Flag: Whether to support log exporting.
FEATURE_LOG_EXPORT = True FEATURE_LOG_EXPORT = True
# Maximum number of action logs pages that can be returned via the API.
ACTION_LOG_MAX_PAGE = None

View file

@ -16,7 +16,7 @@ ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = ['pull_repo']
def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None, def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None,
namespace=None, ignore=None, model=LogEntry, id_range=None): namespace=None, ignore=None, model=LogEntry2, id_range=None):
""" Returns a query for selecting logs from the table, with various options and filters. """ """ Returns a query for selecting logs from the table, with various options and filters. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only. # TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
assert (start_time is not None and end_time is not None) or (id_range is not None) assert (start_time is not None and end_time is not None) or (id_range is not None)
@ -64,7 +64,7 @@ def _get_log_entry_kind(name):
def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None, def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None,
ignore=None, model=LogEntry): ignore=None, model=LogEntry2):
""" Returns the count of logs, by kind and day, for the logs matching the given filters. """ """ Returns the count of logs, by kind and day, for the logs matching the given filters. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only. # TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
date = db.extract_date('day', model.datetime) date = db.extract_date('day', model.datetime)
@ -75,7 +75,7 @@ def get_aggregated_logs(start_time, end_time, performer=None, repository=None, n
def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None, def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None,
ignore=None, model=LogEntry, id_range=None): ignore=None, model=LogEntry2, id_range=None):
""" Returns the logs matching the given filters. """ """ Returns the logs matching the given filters. """
# TODO(LogMigrate): Remove the branch once we're back on LogEntry only. # TODO(LogMigrate): Remove the branch once we're back on LogEntry only.
Performer = User.alias() Performer = User.alias()
@ -205,49 +205,52 @@ def get_repositories_action_sums(repository_ids):
return action_count_map return action_count_map
def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None): def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry2):
""" Returns the minimum ID for logs matching the given repository or namespace in """ Returns the minimum ID for logs matching the given repository or namespace in
the logs table, starting at the given start time. the logs table, starting at the given start time.
""" """
# First try bounded by a day. Most repositories will meet this criteria, and therefore # First try bounded by a day. Most repositories will meet this criteria, and therefore
# can make a much faster query. # can make a much faster query.
day_after = start_time + timedelta(days=1) day_after = start_time + timedelta(days=1)
result = _get_bounded_id(fn.Min, LogEntry.datetime >= start_time, result = _get_bounded_id(fn.Min, model.datetime >= start_time,
repository_id, namespace_id, LogEntry.datetime < day_after) repository_id, namespace_id, model.datetime < day_after, model=model)
if result is not None: if result is not None:
return result return result
return _get_bounded_id(fn.Min, LogEntry.datetime >= start_time, repository_id, namespace_id) return _get_bounded_id(fn.Min, model.datetime >= start_time, repository_id, namespace_id,
model=model)
def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None): def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry2):
""" Returns the maximum ID for logs matching the given repository or namespace in """ Returns the maximum ID for logs matching the given repository or namespace in
the logs table, ending at the given end time. the logs table, ending at the given end time.
""" """
# First try bounded by a day. Most repositories will meet this criteria, and therefore # First try bounded by a day. Most repositories will meet this criteria, and therefore
# can make a much faster query. # can make a much faster query.
day_before = end_time - timedelta(days=1) day_before = end_time - timedelta(days=1)
result = _get_bounded_id(fn.Max, LogEntry.datetime <= end_time, result = _get_bounded_id(fn.Max, model.datetime <= end_time,
repository_id, namespace_id, LogEntry.datetime > day_before) repository_id, namespace_id, model.datetime > day_before, model=model)
if result is not None: if result is not None:
return result return result
return _get_bounded_id(fn.Max, LogEntry.datetime <= end_time, repository_id, namespace_id) return _get_bounded_id(fn.Max, model.datetime <= end_time, repository_id, namespace_id,
model=model)
def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None): def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None,
model=LogEntry2):
assert (namespace_id is not None) or (repository_id is not None) assert (namespace_id is not None) or (repository_id is not None)
query = (LogEntry query = (model
.select(fn(LogEntry.id)) .select(fn(model.id))
.where(filter_clause)) .where(filter_clause))
if reduction_clause is not None: if reduction_clause is not None:
query = query.where(reduction_clause) query = query.where(reduction_clause)
if repository_id is not None: if repository_id is not None:
query = query.where(LogEntry.repository == repository_id) query = query.where(model.repository == repository_id)
else: else:
query = query.where(LogEntry.account == namespace_id) query = query.where(model.account == namespace_id)
row = query.tuples()[0] row = query.tuples()[0]
if not row: if not row:

View file

@ -1,6 +1,7 @@
from peewee import SQL from peewee import SQL
def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None): def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None,
max_page=None):
""" Paginates the given query using an ID range, starting at the optional page_token. """ Paginates the given query using an ID range, starting at the optional page_token.
Returns a *list* of matching results along with an unencrypted page_token for the Returns a *list* of matching results along with an unencrypted page_token for the
next page, if any. If descending is set to True, orders by the ID descending rather next page, if any. If descending is set to True, orders by the ID descending rather
@ -27,7 +28,12 @@ def paginate(query, model, descending=False, page_token=None, limit=50, id_alias
query = query.where(model.id >= start_id) query = query.where(model.id >= start_id)
query = query.limit(limit + 1) query = query.limit(limit + 1)
return paginate_query(query, limit=limit, id_alias=id_alias)
page_number = (page_token.get('page_number') or None) if page_token else None
if page_number is not None and max_page is not None and page_number > max_page:
return [], None
return paginate_query(query, limit=limit, id_alias=id_alias, page_number=page_number)
def pagination_start(page_token=None): def pagination_start(page_token=None):
@ -38,7 +44,7 @@ def pagination_start(page_token=None):
return None return None
def paginate_query(query, limit=50, id_alias=None): def paginate_query(query, limit=50, id_alias=None, page_number=None):
""" Executes the given query and returns a page's worth of results, as well as the page token """ Executes the given query and returns a page's worth of results, as well as the page token
for the next page (if any). for the next page (if any).
""" """
@ -47,7 +53,8 @@ def paginate_query(query, limit=50, id_alias=None):
if len(results) > limit: if len(results) > limit:
start_id = getattr(results[limit], id_alias or 'id') start_id = getattr(results[limit], id_alias or 'id')
page_token = { page_token = {
'start_id': start_id 'start_id': start_id,
'page_number': page_number + 1 if page_number else 1,
} }
return results[0:limit], page_token return results[0:limit], page_token

View file

@ -1,6 +1,6 @@
import pytest import pytest
from data.database import LogEntry, User from data.database import LogEntry2, User
from data.model import config as _config from data.model import config as _config
from data.model.log import log_action from data.model.log import log_action
@ -21,8 +21,8 @@ def logentry_kind():
@pytest.fixture() @pytest.fixture()
def logentry(logentry_kind): def logentry(logentry_kind):
with patch('data.database.LogEntry.create', spec=True): with patch('data.database.LogEntry2.create', spec=True):
yield LogEntry yield LogEntry2
@pytest.fixture() @pytest.fixture()
def user(): def user():

View file

@ -1,5 +1,6 @@
import itertools import itertools
from app import app
from data import model, database from data import model, database
from endpoints.api.logs_models_interface import LogEntryDataInterface, LogEntryPage, LogEntry, AggregatedLogEntry from endpoints.api.logs_models_interface import LogEntryDataInterface, LogEntryPage, LogEntry, AggregatedLogEntry
@ -57,17 +58,18 @@ class PreOCIModel(LogEntryDataInterface):
logs, next_page_token = model.modelutil.paginate(logs_query, m, logs, next_page_token = model.modelutil.paginate(logs_query, m,
descending=True, page_token=page_token, descending=True, page_token=page_token,
limit=20) limit=20,
max_page=app.config['ACTION_LOG_MAX_PAGE'])
return LogEntryPage([create_log(log) for log in logs], next_page_token) return LogEntryPage([create_log(log) for log in logs], next_page_token)
# First check the LogEntry table for the most recent logs, unless we've been expressly told # First check the LogEntry2 table for the most recent logs, unless we've been expressly told
# to look inside the "second" table. # to look inside the "second" table.
TOKEN_TABLE_KEY2 = 'ttk2' TOKEN_TABLE_KEY2 = 'ttk2'
is_temp_table = page_token is not None and page_token.get(TOKEN_TABLE_KEY2) == 1 is_temp_table = page_token is not None and page_token.get(TOKEN_TABLE_KEY2) == 1
if is_temp_table: if is_temp_table:
page_result = get_logs(database.LogEntry2)
else:
page_result = get_logs(database.LogEntry) page_result = get_logs(database.LogEntry)
else:
page_result = get_logs(database.LogEntry2)
if page_result.next_page_token is None and not is_temp_table: if page_result.next_page_token is None and not is_temp_table:
page_result = page_result._replace(next_page_token={TOKEN_TABLE_KEY2: 1}) page_result = page_result._replace(next_page_token={TOKEN_TABLE_KEY2: 1})

View file

@ -920,7 +920,7 @@ def populate_database(minimal=False, with_storage=False):
model.repositoryactioncount.update_repository_score(to_count) model.repositoryactioncount.update_repository_score(to_count)
WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry2', 'ManifestChild', WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry', 'ManifestChild',
'NamespaceGeoRestriction'] 'NamespaceGeoRestriction']
def find_models_missing_data(): def find_models_missing_data():

View file

@ -3,7 +3,7 @@ import time
import socket import socket
from contextlib import contextmanager from contextlib import contextmanager
from data.database import LogEntryKind, LogEntry from data.database import LogEntryKind, LogEntry2
class assert_action_logged(object): class assert_action_logged(object):
""" Specialized assertion for ensuring that a log entry of a particular kind was added under the """ Specialized assertion for ensuring that a log entry of a particular kind was added under the
@ -14,7 +14,7 @@ class assert_action_logged(object):
self.existing_count = 0 self.existing_count = 0
def _get_log_count(self): def _get_log_count(self):
return LogEntry.select().where(LogEntry.kind == LogEntryKind.get(name=self.log_kind)).count() return LogEntry2.select().where(LogEntry2.kind == LogEntryKind.get(name=self.log_kind)).count()
def __enter__(self): def __enter__(self):
self.existing_count = self._get_log_count() self.existing_count = self._get_log_count()

View file

@ -9,6 +9,8 @@ INTERNAL_ONLY_PROPERTIES = {
'TESTING', 'TESTING',
'SEND_FILE_MAX_AGE_DEFAULT', 'SEND_FILE_MAX_AGE_DEFAULT',
'ACTION_LOG_MAX_PAGE',
'REPLICATION_QUEUE_NAME', 'REPLICATION_QUEUE_NAME',
'DOCKERFILE_BUILD_QUEUE_NAME', 'DOCKERFILE_BUILD_QUEUE_NAME',
'CHUNK_CLEANUP_QUEUE_NAME', 'CHUNK_CLEANUP_QUEUE_NAME',

View file

@ -59,7 +59,7 @@ def test_process_queue_item(namespace, repo_name, expects_logs, app):
created = storage.get_content(storage.preferred_locations, 'exportedactionlogs/' + storage_id) created = storage.get_content(storage.preferred_locations, 'exportedactionlogs/' + storage_id)
created_json = json.loads(created) created_json = json.loads(created)
expected_count = database.LogEntry.select().where(database.LogEntry.repository == repo).count() expected_count = database.LogEntry2.select().where(database.LogEntry2.repository == repo).count()
assert (expected_count > 1) == expects_logs assert (expected_count > 1) == expects_logs
assert created_json['export_id'] == 'someid' assert created_json['export_id'] == 'someid'