diff --git a/config.py b/config.py index 5d138e6a0..a23e3cb30 100644 --- a/config.py +++ b/config.py @@ -559,3 +559,6 @@ class DefaultConfig(ImmutableConfig): # Feature Flag: Whether to support log exporting. FEATURE_LOG_EXPORT = True + + # Maximum number of action logs pages that can be returned via the API. + ACTION_LOG_MAX_PAGE = None diff --git a/data/model/log.py b/data/model/log.py index 339a87332..77f5dfd3e 100644 --- a/data/model/log.py +++ b/data/model/log.py @@ -16,7 +16,7 @@ ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = ['pull_repo'] def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None, - namespace=None, ignore=None, model=LogEntry, id_range=None): + namespace=None, ignore=None, model=LogEntry2, id_range=None): """ Returns a query for selecting logs from the table, with various options and filters. """ # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. assert (start_time is not None and end_time is not None) or (id_range is not None) @@ -64,7 +64,7 @@ def _get_log_entry_kind(name): def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None, - ignore=None, model=LogEntry): + ignore=None, model=LogEntry2): """ Returns the count of logs, by kind and day, for the logs matching the given filters. """ # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. date = db.extract_date('day', model.datetime) @@ -75,7 +75,7 @@ def get_aggregated_logs(start_time, end_time, performer=None, repository=None, n def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None, - ignore=None, model=LogEntry, id_range=None): + ignore=None, model=LogEntry2, id_range=None): """ Returns the logs matching the given filters. """ # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. Performer = User.alias() @@ -205,49 +205,52 @@ def get_repositories_action_sums(repository_ids): return action_count_map -def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None): +def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry2): """ Returns the minimum ID for logs matching the given repository or namespace in the logs table, starting at the given start time. """ # First try bounded by a day. Most repositories will meet this criteria, and therefore # can make a much faster query. day_after = start_time + timedelta(days=1) - result = _get_bounded_id(fn.Min, LogEntry.datetime >= start_time, - repository_id, namespace_id, LogEntry.datetime < day_after) + result = _get_bounded_id(fn.Min, model.datetime >= start_time, + repository_id, namespace_id, model.datetime < day_after, model=model) if result is not None: return result - return _get_bounded_id(fn.Min, LogEntry.datetime >= start_time, repository_id, namespace_id) + return _get_bounded_id(fn.Min, model.datetime >= start_time, repository_id, namespace_id, + model=model) -def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None): +def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry2): """ Returns the maximum ID for logs matching the given repository or namespace in the logs table, ending at the given end time. """ # First try bounded by a day. Most repositories will meet this criteria, and therefore # can make a much faster query. day_before = end_time - timedelta(days=1) - result = _get_bounded_id(fn.Max, LogEntry.datetime <= end_time, - repository_id, namespace_id, LogEntry.datetime > day_before) + result = _get_bounded_id(fn.Max, model.datetime <= end_time, + repository_id, namespace_id, model.datetime > day_before, model=model) if result is not None: return result - return _get_bounded_id(fn.Max, LogEntry.datetime <= end_time, repository_id, namespace_id) + return _get_bounded_id(fn.Max, model.datetime <= end_time, repository_id, namespace_id, + model=model) -def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None): +def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None, + model=LogEntry2): assert (namespace_id is not None) or (repository_id is not None) - query = (LogEntry - .select(fn(LogEntry.id)) + query = (model + .select(fn(model.id)) .where(filter_clause)) if reduction_clause is not None: query = query.where(reduction_clause) if repository_id is not None: - query = query.where(LogEntry.repository == repository_id) + query = query.where(model.repository == repository_id) else: - query = query.where(LogEntry.account == namespace_id) + query = query.where(model.account == namespace_id) row = query.tuples()[0] if not row: diff --git a/data/model/modelutil.py b/data/model/modelutil.py index ac8bcd804..b1e6cd62b 100644 --- a/data/model/modelutil.py +++ b/data/model/modelutil.py @@ -1,6 +1,7 @@ from peewee import SQL -def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None): +def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None, + max_page=None): """ Paginates the given query using an ID range, starting at the optional page_token. Returns a *list* of matching results along with an unencrypted page_token for the next page, if any. If descending is set to True, orders by the ID descending rather @@ -27,7 +28,12 @@ def paginate(query, model, descending=False, page_token=None, limit=50, id_alias query = query.where(model.id >= start_id) query = query.limit(limit + 1) - return paginate_query(query, limit=limit, id_alias=id_alias) + + page_number = (page_token.get('page_number') or None) if page_token else None + if page_number is not None and max_page is not None and page_number > max_page: + return [], None + + return paginate_query(query, limit=limit, id_alias=id_alias, page_number=page_number) def pagination_start(page_token=None): @@ -38,7 +44,7 @@ def pagination_start(page_token=None): return None -def paginate_query(query, limit=50, id_alias=None): +def paginate_query(query, limit=50, id_alias=None, page_number=None): """ Executes the given query and returns a page's worth of results, as well as the page token for the next page (if any). """ @@ -47,7 +53,8 @@ def paginate_query(query, limit=50, id_alias=None): if len(results) > limit: start_id = getattr(results[limit], id_alias or 'id') page_token = { - 'start_id': start_id + 'start_id': start_id, + 'page_number': page_number + 1 if page_number else 1, } return results[0:limit], page_token diff --git a/data/model/test/test_log.py b/data/model/test/test_log.py index f1d9fe3e5..93a6dc3c0 100644 --- a/data/model/test/test_log.py +++ b/data/model/test/test_log.py @@ -1,6 +1,6 @@ import pytest -from data.database import LogEntry, User +from data.database import LogEntry2, User from data.model import config as _config from data.model.log import log_action @@ -21,8 +21,8 @@ def logentry_kind(): @pytest.fixture() def logentry(logentry_kind): - with patch('data.database.LogEntry.create', spec=True): - yield LogEntry + with patch('data.database.LogEntry2.create', spec=True): + yield LogEntry2 @pytest.fixture() def user(): diff --git a/endpoints/api/logs_models_pre_oci.py b/endpoints/api/logs_models_pre_oci.py index ec66f6e4b..34314b75c 100644 --- a/endpoints/api/logs_models_pre_oci.py +++ b/endpoints/api/logs_models_pre_oci.py @@ -1,5 +1,6 @@ import itertools +from app import app from data import model, database from endpoints.api.logs_models_interface import LogEntryDataInterface, LogEntryPage, LogEntry, AggregatedLogEntry @@ -57,17 +58,18 @@ class PreOCIModel(LogEntryDataInterface): logs, next_page_token = model.modelutil.paginate(logs_query, m, descending=True, page_token=page_token, - limit=20) + limit=20, + max_page=app.config['ACTION_LOG_MAX_PAGE']) return LogEntryPage([create_log(log) for log in logs], next_page_token) - # First check the LogEntry table for the most recent logs, unless we've been expressly told + # First check the LogEntry2 table for the most recent logs, unless we've been expressly told # to look inside the "second" table. TOKEN_TABLE_KEY2 = 'ttk2' is_temp_table = page_token is not None and page_token.get(TOKEN_TABLE_KEY2) == 1 if is_temp_table: - page_result = get_logs(database.LogEntry2) - else: page_result = get_logs(database.LogEntry) + else: + page_result = get_logs(database.LogEntry2) if page_result.next_page_token is None and not is_temp_table: page_result = page_result._replace(next_page_token={TOKEN_TABLE_KEY2: 1}) diff --git a/initdb.py b/initdb.py index 3b9358980..91e315d4e 100644 --- a/initdb.py +++ b/initdb.py @@ -920,7 +920,7 @@ def populate_database(minimal=False, with_storage=False): model.repositoryactioncount.update_repository_score(to_count) -WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry2', 'ManifestChild', +WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry', 'ManifestChild', 'NamespaceGeoRestriction'] def find_models_missing_data(): diff --git a/test/helpers.py b/test/helpers.py index 2adfcc936..29872c234 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -3,7 +3,7 @@ import time import socket from contextlib import contextmanager -from data.database import LogEntryKind, LogEntry +from data.database import LogEntryKind, LogEntry2 class assert_action_logged(object): """ Specialized assertion for ensuring that a log entry of a particular kind was added under the @@ -14,7 +14,7 @@ class assert_action_logged(object): self.existing_count = 0 def _get_log_count(self): - return LogEntry.select().where(LogEntry.kind == LogEntryKind.get(name=self.log_kind)).count() + return LogEntry2.select().where(LogEntry2.kind == LogEntryKind.get(name=self.log_kind)).count() def __enter__(self): self.existing_count = self._get_log_count() diff --git a/util/config/schema.py b/util/config/schema.py index 844359008..ac2103e2e 100644 --- a/util/config/schema.py +++ b/util/config/schema.py @@ -9,6 +9,8 @@ INTERNAL_ONLY_PROPERTIES = { 'TESTING', 'SEND_FILE_MAX_AGE_DEFAULT', + 'ACTION_LOG_MAX_PAGE', + 'REPLICATION_QUEUE_NAME', 'DOCKERFILE_BUILD_QUEUE_NAME', 'CHUNK_CLEANUP_QUEUE_NAME', diff --git a/workers/test/test_exportactionlogsworker.py b/workers/test/test_exportactionlogsworker.py index 8e3f3df61..8ec370622 100644 --- a/workers/test/test_exportactionlogsworker.py +++ b/workers/test/test_exportactionlogsworker.py @@ -59,7 +59,7 @@ def test_process_queue_item(namespace, repo_name, expects_logs, app): created = storage.get_content(storage.preferred_locations, 'exportedactionlogs/' + storage_id) created_json = json.loads(created) - expected_count = database.LogEntry.select().where(database.LogEntry.repository == repo).count() + expected_count = database.LogEntry2.select().where(database.LogEntry2.repository == repo).count() assert (expected_count > 1) == expects_logs assert created_json['export_id'] == 'someid'