From 58d7dd07b914edfb44a7b7149e49e645674b887e Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 2 Jan 2019 13:29:35 -0500 Subject: [PATCH 1/9] Revert the changes to drop LogEntry2 table definition --- data/database.py | 21 ++++++++++++++++ data/model/log.py | 7 ++++++ data/model/repositoryactioncount.py | 14 +++++++---- endpoints/api/logs_models_pre_oci.py | 25 ++++++++++++++++--- endpoints/api/superuser.py | 4 ++- .../api/test/test_logs_models_pre_oci.py | 5 ++-- initdb.py | 3 ++- workers/logrotateworker.py | 7 ++++-- 8 files changed, 72 insertions(+), 14 deletions(-) diff --git a/data/database.py b/data/database.py index 2a47ee60d..accb42a6e 100644 --- a/data/database.py +++ b/data/database.py @@ -1018,6 +1018,27 @@ class LogEntry(BaseModel): ) +class LogEntry2(BaseModel): + """ TEMP FOR QUAY.IO ONLY. DO NOT RELEASE INTO QUAY ENTERPRISE. """ + kind = ForeignKeyField(LogEntryKind) + account = IntegerField(index=True, db_column='account_id') + performer = IntegerField(index=True, null=True, db_column='performer_id') + repository = IntegerField(index=True, null=True, db_column='repository_id') + datetime = DateTimeField(default=datetime.now, index=True) + ip = CharField(null=True) + metadata_json = TextField(default='{}') + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('account', 'datetime'), False), + (('performer', 'datetime'), False), + (('repository', 'datetime'), False), + (('repository', 'datetime', 'kind'), False), + ) + + class RepositoryActionCount(BaseModel): repository = ForeignKeyField(Repository) count = IntegerField() diff --git a/data/model/log.py b/data/model/log.py index 40f76c7a9..5e4afd25f 100644 --- a/data/model/log.py +++ b/data/model/log.py @@ -18,6 +18,7 @@ ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = ['pull_repo'] def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None, namespace=None, ignore=None, model=LogEntry, id_range=None): """ Returns a query for selecting logs from the table, with various options and filters. """ + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. assert (start_time is not None and end_time is not None) or (id_range is not None) joined = (model.select(*selections).switch(model)) @@ -65,6 +66,7 @@ def _get_log_entry_kind(name): def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None, ignore=None, model=LogEntry): """ Returns the count of logs, by kind and day, for the logs matching the given filters. """ + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. date = db.extract_date('day', model.datetime) selections = [model.kind, date.alias('day'), fn.Count(model.id).alias('count')] query = _logs_query(selections, start_time, end_time, performer, repository, namespace, ignore, @@ -75,6 +77,7 @@ def get_aggregated_logs(start_time, end_time, performer=None, repository=None, n def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None, ignore=None, model=LogEntry, id_range=None): """ Returns the logs matching the given filters. """ + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. Performer = User.alias() Account = User.alias() selections = [model, Performer] @@ -145,6 +148,7 @@ def log_action(kind_name, user_or_organization_name, performer=None, repository= def get_stale_logs_start_id(model): """ Gets the oldest log entry. """ + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. try: return (model.select(model.id).order_by(model.id).limit(1).tuples())[0][0] except IndexError: @@ -153,6 +157,7 @@ def get_stale_logs_start_id(model): def get_stale_logs_cutoff_id(cutoff_date, model): """ Gets the most recent ID created before the cutoff_date. """ + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. try: return (model.select(fn.Max(model.id)).where(model.datetime <= cutoff_date) .tuples())[0][0] @@ -162,11 +167,13 @@ def get_stale_logs_cutoff_id(cutoff_date, model): def get_stale_logs(start_id, end_id, model): """ Returns all the logs with IDs between start_id and end_id inclusively. """ + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. return model.select().where((model.id >= start_id), (model.id <= end_id)) def delete_stale_logs(start_id, end_id, model): """ Deletes all the logs with IDs between start_id and end_id. """ + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. model.delete().where((model.id >= start_id), (model.id <= end_id)).execute() diff --git a/data/model/repositoryactioncount.py b/data/model/repositoryactioncount.py index da30e81ad..d57e47620 100644 --- a/data/model/repositoryactioncount.py +++ b/data/model/repositoryactioncount.py @@ -4,7 +4,7 @@ from collections import namedtuple from peewee import IntegrityError from datetime import date, timedelta, datetime -from data.database import (Repository, LogEntry, RepositoryActionCount, +from data.database import (Repository, LogEntry, LogEntry2, RepositoryActionCount, RepositorySearchScore, db_random_func, fn) logger = logging.getLogger(__name__) @@ -52,12 +52,16 @@ def count_repository_actions(to_count): today = date.today() yesterday = today - timedelta(days=1) - actions = (LogEntry + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + def lookup_action_count(model): + return (model .select() - .where(LogEntry.repository == to_count, - LogEntry.datetime >= yesterday, - LogEntry.datetime < today) + .where(model.repository == to_count, + model.datetime >= yesterday, + model.datetime < today) .count()) + + actions = lookup_action_count(LogEntry) + lookup_action_count(LogEntry2) try: RepositoryActionCount.create(repository=to_count, date=yesterday, count=actions) return True diff --git a/endpoints/api/logs_models_pre_oci.py b/endpoints/api/logs_models_pre_oci.py index ba46a931b..ec66f6e4b 100644 --- a/endpoints/api/logs_models_pre_oci.py +++ b/endpoints/api/logs_models_pre_oci.py @@ -48,7 +48,8 @@ class PreOCIModel(LogEntryDataInterface): performer = None if performer_name: performer = model.user.get_user(performer_name) - + + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. def get_logs(m): logs_query = model.log.get_logs_query(start_time, end_time, performer=performer, repository=repo, namespace=namespace_name, @@ -59,7 +60,21 @@ class PreOCIModel(LogEntryDataInterface): limit=20) return LogEntryPage([create_log(log) for log in logs], next_page_token) - return get_logs(database.LogEntry) + # First check the LogEntry table for the most recent logs, unless we've been expressly told + # to look inside the "second" table. + TOKEN_TABLE_KEY2 = 'ttk2' + is_temp_table = page_token is not None and page_token.get(TOKEN_TABLE_KEY2) == 1 + if is_temp_table: + page_result = get_logs(database.LogEntry2) + else: + page_result = get_logs(database.LogEntry) + + if page_result.next_page_token is None and not is_temp_table: + page_result = page_result._replace(next_page_token={TOKEN_TABLE_KEY2: 1}) + elif is_temp_table and page_result.next_page_token is not None: + page_result.next_page_token[TOKEN_TABLE_KEY2] = 1 + + return page_result def get_log_entry_kinds(self): return model.log.get_log_entry_kinds() @@ -80,12 +95,16 @@ class PreOCIModel(LogEntryDataInterface): if performer_name: performer = model.user.get_user(performer_name) + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. aggregated_logs = model.log.get_aggregated_logs(start_time, end_time, performer=performer, repository=repo, namespace=namespace_name, ignore=ignore, model=database.LogEntry) + aggregated_logs_2 = model.log.get_aggregated_logs(start_time, end_time, performer=performer, + repository=repo, namespace=namespace_name, + ignore=ignore, model=database.LogEntry2) entries = {} - for log in aggregated_logs: + for log in itertools.chain(aggregated_logs, aggregated_logs_2): key = '%s-%s' % (log.kind_id, log.day) if key in entries: entries[key] = AggregatedLogEntry(log.count + entries[key].count, log.kind_id, log.day) diff --git a/endpoints/api/superuser.py b/endpoints/api/superuser.py index f9db1b2dd..99a4fbc45 100644 --- a/endpoints/api/superuser.py +++ b/endpoints/api/superuser.py @@ -135,6 +135,7 @@ class SuperUserAggregateLogs(ApiResource): (start_time, end_time) = _validate_logs_arguments(parsed_args['starttime'], parsed_args['endtime']) + # TODO(LogMigrate): Change to a unified log lookup util lib once we're back on LogEntry only. aggregated_logs = log_model.get_aggregated_logs(start_time, end_time) kinds = log_model.get_log_entry_kinds() return { @@ -165,8 +166,9 @@ class SuperUserLogs(ApiResource): if SuperUserPermission().can(): start_time = parsed_args['starttime'] end_time = parsed_args['endtime'] - (start_time, end_time) = _validate_logs_arguments(start_time, end_time) + + # TODO(LogMigrate): Change to a unified log lookup util lib once we're back on LogEntry only. log_page = log_model.get_logs_query(start_time, end_time, page_token=page_token) kinds = log_model.get_log_entry_kinds() return { diff --git a/endpoints/api/test/test_logs_models_pre_oci.py b/endpoints/api/test/test_logs_models_pre_oci.py index dd8fb4951..fb575b063 100644 --- a/endpoints/api/test/test_logs_models_pre_oci.py +++ b/endpoints/api/test/test_logs_models_pre_oci.py @@ -110,7 +110,8 @@ def test_does_repo_exist_returns_true(monkeypatch): def test_get_aggregated_logs(monkeypatch): get_aggregated_logs_mock = Mock() - get_aggregated_logs_mock.side_effect = [[AttrDict({'day': '1', 'kind_id': 4, 'count': 6})]] + get_aggregated_logs_mock.side_effect = [[AttrDict({'day': '1', 'kind_id': 4, 'count': 6})], + [AttrDict({'day': '1', 'kind_id': 4, 'count': 12})]] monkeypatch.setattr(model.log, 'get_aggregated_logs', get_aggregated_logs_mock) repo_mock = Mock() @@ -126,4 +127,4 @@ def test_get_aggregated_logs(monkeypatch): actual = pre_oci_model.get_aggregated_logs('start_time', 'end_time', 'performer_name', 'repository_name', 'namespace_name', set()) - assert actual == [AggregatedLogEntry(6, 4, '1')] + assert actual == [AggregatedLogEntry(18, 4, '1')] diff --git a/initdb.py b/initdb.py index 63fcf9132..3b9358980 100644 --- a/initdb.py +++ b/initdb.py @@ -920,7 +920,8 @@ def populate_database(minimal=False, with_storage=False): model.repositoryactioncount.update_repository_score(to_count) -WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'ManifestChild', 'NamespaceGeoRestriction'] +WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry2', 'ManifestChild', + 'NamespaceGeoRestriction'] def find_models_missing_data(): # As a sanity check we are going to make sure that all db tables have some data, unless explicitly diff --git a/workers/logrotateworker.py b/workers/logrotateworker.py index 723ded3a5..e8486403c 100644 --- a/workers/logrotateworker.py +++ b/workers/logrotateworker.py @@ -8,7 +8,7 @@ from tempfile import SpooledTemporaryFile import features from app import app, storage -from data.database import UseThenDisconnect, LogEntry +from data.database import UseThenDisconnect, LogEntry, LogEntry2 from data.model.log import (get_stale_logs, get_stale_logs_start_id, get_stale_logs_cutoff_id, delete_stale_logs) from data.userfiles import DelegateUserfiles @@ -36,7 +36,10 @@ class LogRotateWorker(Worker): self.add_operation(self._archive_logs, WORKER_FREQUENCY) def _archive_logs(self): - self._archive_logs_for_model(LogEntry) + # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + models = [LogEntry, LogEntry2] + for model in models: + self._archive_logs_for_model(model) def _archive_logs_for_model(self, model): logger.debug('Attempting to rotate log entries') From a6b5b4d6e3989a490b19fadc4eb3450163ca4325 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 2 Jan 2019 13:29:52 -0500 Subject: [PATCH 2/9] Switch to writing logs to LogEntry2 table --- data/model/log.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/model/log.py b/data/model/log.py index 5e4afd25f..339a87332 100644 --- a/data/model/log.py +++ b/data/model/log.py @@ -7,7 +7,7 @@ from datetime import datetime, timedelta from cachetools import lru_cache import data -from data.database import LogEntry, LogEntryKind, User, RepositoryActionCount, db +from data.database import LogEntry, LogEntryKind, User, RepositoryActionCount, db, LogEntry2 from data.model import config, user, DataModelException logger = logging.getLogger(__name__) @@ -137,7 +137,7 @@ def log_action(kind_name, user_or_organization_name, performer=None, repository= } try: - LogEntry.create(**log_data) + LogEntry2.create(**log_data) except PeeweeException as ex: strict_logging_disabled = config.app_config.get('ALLOW_PULLS_WITHOUT_STRICT_LOGGING') if strict_logging_disabled and kind_name in ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING: From 4ba4d9141b60432f3882351d2df32d2fff1ddbbf Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 2 Jan 2019 13:46:28 -0500 Subject: [PATCH 3/9] Put user last accessed updating behind a feature flag --- config.py | 3 +++ data/model/_basequery.py | 3 +++ endpoints/api/robot_models_pre_oci.py | 8 ++++++-- static/directives/robots-manager.html | 5 +++-- util/config/schema.py | 7 +++++++ 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/config.py b/config.py index 856bb20d5..4d78b1abf 100644 --- a/config.py +++ b/config.py @@ -550,3 +550,6 @@ class DefaultConfig(ImmutableConfig): # Defines the maximum number of pages the user can paginate before they are limited SEARCH_MAX_RESULT_PAGE_COUNT = 10 + + # Feature Flag: Whether to record when users were last accessed. + FEATURE_USER_LAST_ACCESSED = True diff --git a/data/model/_basequery.py b/data/model/_basequery.py index a0b0c2d8a..0e20a516c 100644 --- a/data/model/_basequery.py +++ b/data/model/_basequery.py @@ -162,6 +162,9 @@ def calculate_image_aggregate_size(ancestors_str, image_size, parent_image): def update_last_accessed(token_or_user): """ Updates the `last_accessed` field on the given token or user. If the existing field's value is within the configured threshold, the update is skipped. """ + if not config.app_config.get('FEATURE_USER_LAST_ACCESSED'): + return + threshold = timedelta(seconds=config.app_config.get('LAST_ACCESSED_UPDATE_THRESHOLD_S', 120)) if (token_or_user.last_accessed is not None and datetime.utcnow() - token_or_user.last_accessed < threshold): diff --git a/endpoints/api/robot_models_pre_oci.py b/endpoints/api/robot_models_pre_oci.py index 6f5396440..20cd83bf1 100644 --- a/endpoints/api/robot_models_pre_oci.py +++ b/endpoints/api/robot_models_pre_oci.py @@ -1,3 +1,5 @@ +import features + from app import avatar from data import model from data.database import User, FederatedLogin, Team as TeamTable, Repository, RobotAccountMetadata @@ -25,7 +27,8 @@ class RobotPreOCIModel(RobotInterface): 'name': robot_name, 'token': robot_tuple.get(FederatedLogin.service_ident) if include_token else None, 'created': robot_tuple.get(User.creation_date), - 'last_accessed': robot_tuple.get(User.last_accessed), + 'last_accessed': (robot_tuple.get(User.last_accessed) + if features.USER_LAST_ACCESSED else None), 'description': robot_tuple.get(RobotAccountMetadata.description), 'unstructured_metadata': robot_tuple.get(RobotAccountMetadata.unstructured_json), } @@ -58,7 +61,8 @@ class RobotPreOCIModel(RobotInterface): robot_dict['repositories'].append(repository_name) robots[robot_name] = RobotWithPermissions(robot_dict['name'], robot_dict['token'], robot_dict['created'], - robot_dict['last_accessed'], + (robot_dict['last_accessed'] + if features.USER_LAST_ACCESSED else None), robot_dict['teams'], robot_dict['repositories'], robot_dict['description']) diff --git a/static/directives/robots-manager.html b/static/directives/robots-manager.html index 87f041afe..d2a265247 100644 --- a/static/directives/robots-manager.html +++ b/static/directives/robots-manager.html @@ -44,7 +44,8 @@ Created - + Last Accessed @@ -92,7 +93,7 @@ - + diff --git a/util/config/schema.py b/util/config/schema.py index 684d07d17..64226fc4e 100644 --- a/util/config/schema.py +++ b/util/config/schema.py @@ -784,6 +784,13 @@ CONFIG_SCHEMA = { 'pattern': '^[0-9]+(w|m|d|h|s)$', }, + # Feature Flag: User last accessed. + 'FEATURE_USER_LAST_ACCESSED': { + 'type': 'boolean', + 'description': 'Whether to record the last time a user was accessed. Defaults to True', + 'x-example': True, + }, + # Feature Flag: Permanent Sessions. 'FEATURE_PERMANENT_SESSIONS': { 'type': 'boolean', From 204eb74c4f83b913b0a095dfe53b98212a8cfd31 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 2 Jan 2019 14:17:40 -0500 Subject: [PATCH 4/9] Put aggregated log query and log exports behind feature flags --- config.py | 6 ++++++ endpoints/api/logs.py | 10 ++++++++-- static/directives/logs-view.html | 8 +++++--- static/js/directives/ui/logs-view.js | 30 ++++++++++++++++------------ util/config/schema.py | 14 +++++++++++++ workers/exportactionlogsworker.py | 8 ++++++++ workers/logrotateworker.py | 4 ++-- 7 files changed, 60 insertions(+), 20 deletions(-) diff --git a/config.py b/config.py index 4d78b1abf..5d138e6a0 100644 --- a/config.py +++ b/config.py @@ -553,3 +553,9 @@ class DefaultConfig(ImmutableConfig): # Feature Flag: Whether to record when users were last accessed. FEATURE_USER_LAST_ACCESSED = True + + # Feature Flag: Whether to allow users to retrieve aggregated log counts. + FEATURE_AGGREGATED_LOG_COUNT_RETRIEVAL = True + + # Feature Flag: Whether to support log exporting. + FEATURE_LOG_EXPORT = True diff --git a/endpoints/api/logs.py b/endpoints/api/logs.py index 4c3928c43..0cae6b095 100644 --- a/endpoints/api/logs.py +++ b/endpoints/api/logs.py @@ -6,11 +6,13 @@ from datetime import datetime, timedelta from flask import request +import features + from app import export_action_logs_queue from endpoints.api import (resource, nickname, ApiResource, query_param, parse_args, RepositoryParamResource, require_repo_admin, related_user_resource, format_date, require_user_admin, path_param, require_scope, page_support, - validate_json_request, InvalidRequest) + validate_json_request, InvalidRequest, show_if) from data import model as data_model from endpoints.api.logs_models_pre_oci import pre_oci_model as model from endpoints.exception import Unauthorized, NotFound @@ -150,6 +152,7 @@ class OrgLogs(ApiResource): @resource('/v1/repository//aggregatelogs') +@show_if(features.AGGREGATED_LOG_COUNT_RETRIEVAL) @path_param('repository', 'The full path of the repository. e.g. namespace/name') class RepositoryAggregateLogs(RepositoryParamResource): """ Resource for fetching aggregated logs for the specific repository. """ @@ -170,6 +173,7 @@ class RepositoryAggregateLogs(RepositoryParamResource): @resource('/v1/user/aggregatelogs') +@show_if(features.AGGREGATED_LOG_COUNT_RETRIEVAL) class UserAggregateLogs(ApiResource): """ Resource for fetching aggregated logs for the current user. """ @@ -191,6 +195,7 @@ class UserAggregateLogs(ApiResource): @resource('/v1/organization//aggregatelogs') +@show_if(features.AGGREGATED_LOG_COUNT_RETRIEVAL) @path_param('orgname', 'The name of the organization') @related_user_resource(UserLogs) class OrgAggregateLogs(ApiResource): @@ -314,6 +319,7 @@ class ExportUserLogs(ApiResource): @resource('/v1/organization//exportlogs') +@show_if(features.LOG_EXPORT) @path_param('orgname', 'The name of the organization') @related_user_resource(ExportUserLogs) class ExportOrgLogs(ApiResource): @@ -329,7 +335,7 @@ class ExportOrgLogs(ApiResource): @require_scope(scopes.ORG_ADMIN) @validate_json_request('ExportLogs') def post(self, orgname, parsed_args): - """ Gets the aggregated logs for the specified organization. """ + """ Exports the logs for the specified organization. """ permission = AdministerOrganizationPermission(orgname) if permission.can(): start_time = parsed_args['starttime'] diff --git a/static/directives/logs-view.html b/static/directives/logs-view.html index 79ce3d7a2..9cd1cfee5 100644 --- a/static/directives/logs-view.html +++ b/static/directives/logs-view.html @@ -22,14 +22,16 @@
-
+
diff --git a/static/js/directives/ui/logs-view.js b/static/js/directives/ui/logs-view.js index a73e9f5f6..ab79ec045 100644 --- a/static/js/directives/ui/logs-view.js +++ b/static/js/directives/ui/logs-view.js @@ -19,7 +19,9 @@ angular.module('quay').directive('logsView', function () { 'allLogs': '@allLogs' }, controller: function($scope, $element, $sce, Restangular, ApiService, TriggerService, - StringBuilderService, ExternalNotificationData, UtilService) { + StringBuilderService, ExternalNotificationData, UtilService, + Features) { + $scope.Features = Features; $scope.loading = true; $scope.loadCounter = -1; $scope.logs = null; @@ -405,20 +407,22 @@ angular.module('quay').directive('logsView', function () { return; } - $scope.chartLoading = true; + if (Features.AGGREGATED_LOG_COUNT_RETRIEVAL) { + $scope.chartLoading = true; - var aggregateUrl = getUrl('aggregatelogs').toString(); - var loadAggregate = Restangular.one(aggregateUrl); - loadAggregate.customGET().then(function(resp) { - $scope.chart = new LogUsageChart(logKinds); - $($scope.chart).bind('filteringChanged', function(e) { - $scope.$apply(function() { $scope.kindsAllowed = e.allowed; }); + var aggregateUrl = getUrl('aggregatelogs').toString(); + var loadAggregate = Restangular.one(aggregateUrl); + loadAggregate.customGET().then(function(resp) { + $scope.chart = new LogUsageChart(logKinds); + $($scope.chart).bind('filteringChanged', function(e) { + $scope.$apply(function() { $scope.kindsAllowed = e.allowed; }); + }); + + $scope.chart.draw('bar-chart', resp.aggregated, $scope.options.logStartDate, + $scope.options.logEndDate); + $scope.chartLoading = false; }); - - $scope.chart.draw('bar-chart', resp.aggregated, $scope.options.logStartDate, - $scope.options.logEndDate); - $scope.chartLoading = false; - }); + } $scope.nextPageToken = null; $scope.hasAdditional = true; diff --git a/util/config/schema.py b/util/config/schema.py index 64226fc4e..844359008 100644 --- a/util/config/schema.py +++ b/util/config/schema.py @@ -784,6 +784,20 @@ CONFIG_SCHEMA = { 'pattern': '^[0-9]+(w|m|d|h|s)$', }, + # Feature Flag: Aggregated log retrieval. + 'FEATURE_AGGREGATED_LOG_COUNT_RETRIEVAL': { + 'type': 'boolean', + 'description': 'Whether to allow retrieval of aggregated log counts. Defaults to True', + 'x-example': True, + }, + + # Feature Flag: Log export. + 'FEATURE_LOG_EXPORT': { + 'type': 'boolean', + 'description': 'Whether to allow exporting of action logs. Defaults to True', + 'x-example': True, + }, + # Feature Flag: User last accessed. 'FEATURE_USER_LAST_ACCESSED': { 'type': 'boolean', diff --git a/workers/exportactionlogsworker.py b/workers/exportactionlogsworker.py index 3c14ec006..857bdd094 100644 --- a/workers/exportactionlogsworker.py +++ b/workers/exportactionlogsworker.py @@ -1,6 +1,7 @@ import logging import os.path import json +import time import uuid from datetime import datetime, timedelta @@ -8,6 +9,8 @@ from io import BytesIO from enum import Enum, unique +import features + from app import app, export_action_logs_queue, storage, get_app_url from data import model from endpoints.api import format_date @@ -277,6 +280,11 @@ def _run_and_time(fn): if __name__ == "__main__": logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False) + if not features.LOG_EXPORT: + logger.debug('Log export not enabled; skipping') + while True: + time.sleep(100000) + logger.debug('Starting export action logs worker') worker = ExportActionLogsWorker(export_action_logs_queue, poll_period_seconds=POLL_PERIOD_SECONDS) diff --git a/workers/logrotateworker.py b/workers/logrotateworker.py index e8486403c..a0ad7d314 100644 --- a/workers/logrotateworker.py +++ b/workers/logrotateworker.py @@ -129,8 +129,8 @@ def log_dict(log): def main(): logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False) - if not features.ACTION_LOG_ROTATION or None in [SAVE_PATH, SAVE_LOCATION]: - logger.debug('Action log rotation worker not enabled; skipping') + if not features.LOG_EXPORT: + logger.debug('Log export not enabled; skipping') while True: time.sleep(100000) From b6db0027290352c7d31d071c6aec94e676d1539e Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 2 Jan 2019 15:57:55 -0500 Subject: [PATCH 5/9] Fix references to LogEntry model used and add support for a maximum page of results on the logs API --- config.py | 3 ++ data/model/log.py | 35 +++++++++++---------- data/model/modelutil.py | 15 ++++++--- data/model/test/test_log.py | 6 ++-- endpoints/api/logs_models_pre_oci.py | 10 +++--- initdb.py | 2 +- test/helpers.py | 4 +-- util/config/schema.py | 2 ++ workers/test/test_exportactionlogsworker.py | 2 +- 9 files changed, 48 insertions(+), 31 deletions(-) diff --git a/config.py b/config.py index 5d138e6a0..a23e3cb30 100644 --- a/config.py +++ b/config.py @@ -559,3 +559,6 @@ class DefaultConfig(ImmutableConfig): # Feature Flag: Whether to support log exporting. FEATURE_LOG_EXPORT = True + + # Maximum number of action logs pages that can be returned via the API. + ACTION_LOG_MAX_PAGE = None diff --git a/data/model/log.py b/data/model/log.py index 339a87332..77f5dfd3e 100644 --- a/data/model/log.py +++ b/data/model/log.py @@ -16,7 +16,7 @@ ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = ['pull_repo'] def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None, - namespace=None, ignore=None, model=LogEntry, id_range=None): + namespace=None, ignore=None, model=LogEntry2, id_range=None): """ Returns a query for selecting logs from the table, with various options and filters. """ # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. assert (start_time is not None and end_time is not None) or (id_range is not None) @@ -64,7 +64,7 @@ def _get_log_entry_kind(name): def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None, - ignore=None, model=LogEntry): + ignore=None, model=LogEntry2): """ Returns the count of logs, by kind and day, for the logs matching the given filters. """ # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. date = db.extract_date('day', model.datetime) @@ -75,7 +75,7 @@ def get_aggregated_logs(start_time, end_time, performer=None, repository=None, n def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None, - ignore=None, model=LogEntry, id_range=None): + ignore=None, model=LogEntry2, id_range=None): """ Returns the logs matching the given filters. """ # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. Performer = User.alias() @@ -205,49 +205,52 @@ def get_repositories_action_sums(repository_ids): return action_count_map -def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None): +def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry2): """ Returns the minimum ID for logs matching the given repository or namespace in the logs table, starting at the given start time. """ # First try bounded by a day. Most repositories will meet this criteria, and therefore # can make a much faster query. day_after = start_time + timedelta(days=1) - result = _get_bounded_id(fn.Min, LogEntry.datetime >= start_time, - repository_id, namespace_id, LogEntry.datetime < day_after) + result = _get_bounded_id(fn.Min, model.datetime >= start_time, + repository_id, namespace_id, model.datetime < day_after, model=model) if result is not None: return result - return _get_bounded_id(fn.Min, LogEntry.datetime >= start_time, repository_id, namespace_id) + return _get_bounded_id(fn.Min, model.datetime >= start_time, repository_id, namespace_id, + model=model) -def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None): +def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry2): """ Returns the maximum ID for logs matching the given repository or namespace in the logs table, ending at the given end time. """ # First try bounded by a day. Most repositories will meet this criteria, and therefore # can make a much faster query. day_before = end_time - timedelta(days=1) - result = _get_bounded_id(fn.Max, LogEntry.datetime <= end_time, - repository_id, namespace_id, LogEntry.datetime > day_before) + result = _get_bounded_id(fn.Max, model.datetime <= end_time, + repository_id, namespace_id, model.datetime > day_before, model=model) if result is not None: return result - return _get_bounded_id(fn.Max, LogEntry.datetime <= end_time, repository_id, namespace_id) + return _get_bounded_id(fn.Max, model.datetime <= end_time, repository_id, namespace_id, + model=model) -def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None): +def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None, + model=LogEntry2): assert (namespace_id is not None) or (repository_id is not None) - query = (LogEntry - .select(fn(LogEntry.id)) + query = (model + .select(fn(model.id)) .where(filter_clause)) if reduction_clause is not None: query = query.where(reduction_clause) if repository_id is not None: - query = query.where(LogEntry.repository == repository_id) + query = query.where(model.repository == repository_id) else: - query = query.where(LogEntry.account == namespace_id) + query = query.where(model.account == namespace_id) row = query.tuples()[0] if not row: diff --git a/data/model/modelutil.py b/data/model/modelutil.py index ac8bcd804..b1e6cd62b 100644 --- a/data/model/modelutil.py +++ b/data/model/modelutil.py @@ -1,6 +1,7 @@ from peewee import SQL -def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None): +def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None, + max_page=None): """ Paginates the given query using an ID range, starting at the optional page_token. Returns a *list* of matching results along with an unencrypted page_token for the next page, if any. If descending is set to True, orders by the ID descending rather @@ -27,7 +28,12 @@ def paginate(query, model, descending=False, page_token=None, limit=50, id_alias query = query.where(model.id >= start_id) query = query.limit(limit + 1) - return paginate_query(query, limit=limit, id_alias=id_alias) + + page_number = (page_token.get('page_number') or None) if page_token else None + if page_number is not None and max_page is not None and page_number > max_page: + return [], None + + return paginate_query(query, limit=limit, id_alias=id_alias, page_number=page_number) def pagination_start(page_token=None): @@ -38,7 +44,7 @@ def pagination_start(page_token=None): return None -def paginate_query(query, limit=50, id_alias=None): +def paginate_query(query, limit=50, id_alias=None, page_number=None): """ Executes the given query and returns a page's worth of results, as well as the page token for the next page (if any). """ @@ -47,7 +53,8 @@ def paginate_query(query, limit=50, id_alias=None): if len(results) > limit: start_id = getattr(results[limit], id_alias or 'id') page_token = { - 'start_id': start_id + 'start_id': start_id, + 'page_number': page_number + 1 if page_number else 1, } return results[0:limit], page_token diff --git a/data/model/test/test_log.py b/data/model/test/test_log.py index f1d9fe3e5..93a6dc3c0 100644 --- a/data/model/test/test_log.py +++ b/data/model/test/test_log.py @@ -1,6 +1,6 @@ import pytest -from data.database import LogEntry, User +from data.database import LogEntry2, User from data.model import config as _config from data.model.log import log_action @@ -21,8 +21,8 @@ def logentry_kind(): @pytest.fixture() def logentry(logentry_kind): - with patch('data.database.LogEntry.create', spec=True): - yield LogEntry + with patch('data.database.LogEntry2.create', spec=True): + yield LogEntry2 @pytest.fixture() def user(): diff --git a/endpoints/api/logs_models_pre_oci.py b/endpoints/api/logs_models_pre_oci.py index ec66f6e4b..34314b75c 100644 --- a/endpoints/api/logs_models_pre_oci.py +++ b/endpoints/api/logs_models_pre_oci.py @@ -1,5 +1,6 @@ import itertools +from app import app from data import model, database from endpoints.api.logs_models_interface import LogEntryDataInterface, LogEntryPage, LogEntry, AggregatedLogEntry @@ -57,17 +58,18 @@ class PreOCIModel(LogEntryDataInterface): logs, next_page_token = model.modelutil.paginate(logs_query, m, descending=True, page_token=page_token, - limit=20) + limit=20, + max_page=app.config['ACTION_LOG_MAX_PAGE']) return LogEntryPage([create_log(log) for log in logs], next_page_token) - # First check the LogEntry table for the most recent logs, unless we've been expressly told + # First check the LogEntry2 table for the most recent logs, unless we've been expressly told # to look inside the "second" table. TOKEN_TABLE_KEY2 = 'ttk2' is_temp_table = page_token is not None and page_token.get(TOKEN_TABLE_KEY2) == 1 if is_temp_table: - page_result = get_logs(database.LogEntry2) - else: page_result = get_logs(database.LogEntry) + else: + page_result = get_logs(database.LogEntry2) if page_result.next_page_token is None and not is_temp_table: page_result = page_result._replace(next_page_token={TOKEN_TABLE_KEY2: 1}) diff --git a/initdb.py b/initdb.py index 3b9358980..91e315d4e 100644 --- a/initdb.py +++ b/initdb.py @@ -920,7 +920,7 @@ def populate_database(minimal=False, with_storage=False): model.repositoryactioncount.update_repository_score(to_count) -WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry2', 'ManifestChild', +WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry', 'ManifestChild', 'NamespaceGeoRestriction'] def find_models_missing_data(): diff --git a/test/helpers.py b/test/helpers.py index 2adfcc936..29872c234 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -3,7 +3,7 @@ import time import socket from contextlib import contextmanager -from data.database import LogEntryKind, LogEntry +from data.database import LogEntryKind, LogEntry2 class assert_action_logged(object): """ Specialized assertion for ensuring that a log entry of a particular kind was added under the @@ -14,7 +14,7 @@ class assert_action_logged(object): self.existing_count = 0 def _get_log_count(self): - return LogEntry.select().where(LogEntry.kind == LogEntryKind.get(name=self.log_kind)).count() + return LogEntry2.select().where(LogEntry2.kind == LogEntryKind.get(name=self.log_kind)).count() def __enter__(self): self.existing_count = self._get_log_count() diff --git a/util/config/schema.py b/util/config/schema.py index 844359008..ac2103e2e 100644 --- a/util/config/schema.py +++ b/util/config/schema.py @@ -9,6 +9,8 @@ INTERNAL_ONLY_PROPERTIES = { 'TESTING', 'SEND_FILE_MAX_AGE_DEFAULT', + 'ACTION_LOG_MAX_PAGE', + 'REPLICATION_QUEUE_NAME', 'DOCKERFILE_BUILD_QUEUE_NAME', 'CHUNK_CLEANUP_QUEUE_NAME', diff --git a/workers/test/test_exportactionlogsworker.py b/workers/test/test_exportactionlogsworker.py index 8e3f3df61..8ec370622 100644 --- a/workers/test/test_exportactionlogsworker.py +++ b/workers/test/test_exportactionlogsworker.py @@ -59,7 +59,7 @@ def test_process_queue_item(namespace, repo_name, expects_logs, app): created = storage.get_content(storage.preferred_locations, 'exportedactionlogs/' + storage_id) created_json = json.loads(created) - expected_count = database.LogEntry.select().where(database.LogEntry.repository == repo).count() + expected_count = database.LogEntry2.select().where(database.LogEntry2.repository == repo).count() assert (expected_count > 1) == expects_logs assert created_json['export_id'] == 'someid' From cdb49dbfd3366792cbbe83fc0152aeb61ec6712a Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 3 Jan 2019 13:50:43 -0500 Subject: [PATCH 6/9] Add LogEntry3 table without the extra indexes and switch to writing to it --- data/database.py | 20 +++++++++ .../6ec8726c0ace_add_logentry3_table.py | 42 +++++++++++++++++++ data/model/log.py | 31 +++++++------- data/model/repositoryactioncount.py | 7 ++-- data/model/test/test_log.py | 6 +-- endpoints/api/logs_models_pre_oci.py | 33 ++++++++------- endpoints/api/superuser.py | 5 +-- .../api/test/test_logs_models_pre_oci.py | 5 ++- initdb.py | 2 +- test/helpers.py | 4 +- workers/logrotateworker.py | 6 +-- workers/test/test_exportactionlogsworker.py | 2 +- 12 files changed, 114 insertions(+), 49 deletions(-) create mode 100644 data/migrations/versions/6ec8726c0ace_add_logentry3_table.py diff --git a/data/database.py b/data/database.py index accb42a6e..883d82664 100644 --- a/data/database.py +++ b/data/database.py @@ -1039,6 +1039,26 @@ class LogEntry2(BaseModel): ) +class LogEntry3(BaseModel): + id = BigAutoField() + kind = ForeignKeyField(LogEntryKind) + account = IntegerField(db_column='account_id') + performer = IntegerField(null=True, db_column='performer_id') + repository = IntegerField(null=True, db_column='repository_id') + datetime = DateTimeField(default=datetime.now, index=True) + ip = CharField(null=True) + metadata_json = TextField(default='{}') + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('account', 'datetime'), False), + (('performer', 'datetime'), False), + (('repository', 'datetime', 'kind'), False), + ) + + class RepositoryActionCount(BaseModel): repository = ForeignKeyField(Repository) count = IntegerField() diff --git a/data/migrations/versions/6ec8726c0ace_add_logentry3_table.py b/data/migrations/versions/6ec8726c0ace_add_logentry3_table.py new file mode 100644 index 000000000..c2310fdd1 --- /dev/null +++ b/data/migrations/versions/6ec8726c0ace_add_logentry3_table.py @@ -0,0 +1,42 @@ +"""Add LogEntry3 table + +Revision ID: 6ec8726c0ace +Revises: 54492a68a3cf +Create Date: 2019-01-03 13:41:02.897957 + +""" + +# revision identifiers, used by Alembic. +revision = '6ec8726c0ace' +down_revision = '54492a68a3cf' + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +def upgrade(tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('logentry3', + sa.Column('id', sa.BigInteger(), nullable=False), + sa.Column('kind_id', sa.Integer(), nullable=False), + sa.Column('account_id', sa.Integer(), nullable=False), + sa.Column('performer_id', sa.Integer(), nullable=True), + sa.Column('repository_id', sa.Integer(), nullable=True), + sa.Column('datetime', sa.DateTime(), nullable=False), + sa.Column('ip', sa.String(length=255), nullable=True), + sa.Column('metadata_json', sa.Text(), nullable=False), + sa.ForeignKeyConstraint(['kind_id'], ['logentrykind.id'], name=op.f('fk_logentry3_kind_id_logentrykind')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_logentry3')) + ) + op.create_index('logentry3_account_id_datetime', 'logentry3', ['account_id', 'datetime'], unique=False) + op.create_index('logentry3_datetime', 'logentry3', ['datetime'], unique=False) + op.create_index('logentry3_kind_id', 'logentry3', ['kind_id'], unique=False) + op.create_index('logentry3_performer_id_datetime', 'logentry3', ['performer_id', 'datetime'], unique=False) + op.create_index('logentry3_repository_id_datetime_kind_id', 'logentry3', ['repository_id', 'datetime', 'kind_id'], unique=False) + # ### end Alembic commands ### + + +def downgrade(tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('logentry3') + # ### end Alembic commands ### diff --git a/data/model/log.py b/data/model/log.py index 77f5dfd3e..9e445c5ee 100644 --- a/data/model/log.py +++ b/data/model/log.py @@ -7,7 +7,8 @@ from datetime import datetime, timedelta from cachetools import lru_cache import data -from data.database import LogEntry, LogEntryKind, User, RepositoryActionCount, db, LogEntry2 +from data.database import (LogEntry, LogEntryKind, User, RepositoryActionCount, db, LogEntry3, + LogEntry3) from data.model import config, user, DataModelException logger = logging.getLogger(__name__) @@ -16,9 +17,9 @@ ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = ['pull_repo'] def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None, - namespace=None, ignore=None, model=LogEntry2, id_range=None): + namespace=None, ignore=None, model=LogEntry3, id_range=None): """ Returns a query for selecting logs from the table, with various options and filters. """ - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. assert (start_time is not None and end_time is not None) or (id_range is not None) joined = (model.select(*selections).switch(model)) @@ -64,9 +65,9 @@ def _get_log_entry_kind(name): def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None, - ignore=None, model=LogEntry2): + ignore=None, model=LogEntry3): """ Returns the count of logs, by kind and day, for the logs matching the given filters. """ - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. date = db.extract_date('day', model.datetime) selections = [model.kind, date.alias('day'), fn.Count(model.id).alias('count')] query = _logs_query(selections, start_time, end_time, performer, repository, namespace, ignore, @@ -75,9 +76,9 @@ def get_aggregated_logs(start_time, end_time, performer=None, repository=None, n def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None, - ignore=None, model=LogEntry2, id_range=None): + ignore=None, model=LogEntry3, id_range=None): """ Returns the logs matching the given filters. """ - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. Performer = User.alias() Account = User.alias() selections = [model, Performer] @@ -137,7 +138,7 @@ def log_action(kind_name, user_or_organization_name, performer=None, repository= } try: - LogEntry2.create(**log_data) + LogEntry3.create(**log_data) except PeeweeException as ex: strict_logging_disabled = config.app_config.get('ALLOW_PULLS_WITHOUT_STRICT_LOGGING') if strict_logging_disabled and kind_name in ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING: @@ -148,7 +149,7 @@ def log_action(kind_name, user_or_organization_name, performer=None, repository= def get_stale_logs_start_id(model): """ Gets the oldest log entry. """ - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. try: return (model.select(model.id).order_by(model.id).limit(1).tuples())[0][0] except IndexError: @@ -157,7 +158,7 @@ def get_stale_logs_start_id(model): def get_stale_logs_cutoff_id(cutoff_date, model): """ Gets the most recent ID created before the cutoff_date. """ - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. try: return (model.select(fn.Max(model.id)).where(model.datetime <= cutoff_date) .tuples())[0][0] @@ -167,13 +168,13 @@ def get_stale_logs_cutoff_id(cutoff_date, model): def get_stale_logs(start_id, end_id, model): """ Returns all the logs with IDs between start_id and end_id inclusively. """ - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. return model.select().where((model.id >= start_id), (model.id <= end_id)) def delete_stale_logs(start_id, end_id, model): """ Deletes all the logs with IDs between start_id and end_id. """ - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. model.delete().where((model.id >= start_id), (model.id <= end_id)).execute() @@ -205,7 +206,7 @@ def get_repositories_action_sums(repository_ids): return action_count_map -def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry2): +def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry3): """ Returns the minimum ID for logs matching the given repository or namespace in the logs table, starting at the given start time. """ @@ -221,7 +222,7 @@ def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, m model=model) -def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry2): +def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry3): """ Returns the maximum ID for logs matching the given repository or namespace in the logs table, ending at the given end time. """ @@ -238,7 +239,7 @@ def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, mod def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None, - model=LogEntry2): + model=LogEntry3): assert (namespace_id is not None) or (repository_id is not None) query = (model .select(fn(model.id)) diff --git a/data/model/repositoryactioncount.py b/data/model/repositoryactioncount.py index d57e47620..b9bd76b21 100644 --- a/data/model/repositoryactioncount.py +++ b/data/model/repositoryactioncount.py @@ -4,7 +4,7 @@ from collections import namedtuple from peewee import IntegrityError from datetime import date, timedelta, datetime -from data.database import (Repository, LogEntry, LogEntry2, RepositoryActionCount, +from data.database import (Repository, LogEntry, LogEntry2, LogEntry3, RepositoryActionCount, RepositorySearchScore, db_random_func, fn) logger = logging.getLogger(__name__) @@ -52,7 +52,7 @@ def count_repository_actions(to_count): today = date.today() yesterday = today - timedelta(days=1) - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. def lookup_action_count(model): return (model .select() @@ -61,7 +61,8 @@ def count_repository_actions(to_count): model.datetime < today) .count()) - actions = lookup_action_count(LogEntry) + lookup_action_count(LogEntry2) + actions = (lookup_action_count(LogEntry3) + lookup_action_count(LogEntry2) + + lookup_action_count(LogEntry)) try: RepositoryActionCount.create(repository=to_count, date=yesterday, count=actions) return True diff --git a/data/model/test/test_log.py b/data/model/test/test_log.py index 93a6dc3c0..7ced0bb91 100644 --- a/data/model/test/test_log.py +++ b/data/model/test/test_log.py @@ -1,6 +1,6 @@ import pytest -from data.database import LogEntry2, User +from data.database import LogEntry3, User from data.model import config as _config from data.model.log import log_action @@ -21,8 +21,8 @@ def logentry_kind(): @pytest.fixture() def logentry(logentry_kind): - with patch('data.database.LogEntry2.create', spec=True): - yield LogEntry2 + with patch('data.database.LogEntry3.create', spec=True): + yield LogEntry3 @pytest.fixture() def user(): diff --git a/endpoints/api/logs_models_pre_oci.py b/endpoints/api/logs_models_pre_oci.py index 34314b75c..b4dbbb060 100644 --- a/endpoints/api/logs_models_pre_oci.py +++ b/endpoints/api/logs_models_pre_oci.py @@ -50,7 +50,7 @@ class PreOCIModel(LogEntryDataInterface): if performer_name: performer = model.user.get_user(performer_name) - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. def get_logs(m): logs_query = model.log.get_logs_query(start_time, end_time, performer=performer, repository=repo, namespace=namespace_name, @@ -62,19 +62,19 @@ class PreOCIModel(LogEntryDataInterface): max_page=app.config['ACTION_LOG_MAX_PAGE']) return LogEntryPage([create_log(log) for log in logs], next_page_token) - # First check the LogEntry2 table for the most recent logs, unless we've been expressly told - # to look inside the "second" table. - TOKEN_TABLE_KEY2 = 'ttk2' - is_temp_table = page_token is not None and page_token.get(TOKEN_TABLE_KEY2) == 1 - if is_temp_table: - page_result = get_logs(database.LogEntry) - else: - page_result = get_logs(database.LogEntry2) + # First check the LogEntry3 table for the most recent logs, unless we've been expressly told + # to look inside the other tables. + TOKEN_TABLE_ID = 'tti' + tables = [database.LogEntry3, database.LogEntry2, database.LogEntry] - if page_result.next_page_token is None and not is_temp_table: - page_result = page_result._replace(next_page_token={TOKEN_TABLE_KEY2: 1}) - elif is_temp_table and page_result.next_page_token is not None: - page_result.next_page_token[TOKEN_TABLE_KEY2] = 1 + table_index = 0 + table_specified = page_token is not None and page_token.get(TOKEN_TABLE_ID) is not None + if table_specified: + table_index = page_token.get(TOKEN_TABLE_ID) + + page_result = get_logs(tables[table_index]) + if page_result.next_page_token is None and table_index < len(tables) - 1: + page_result = page_result._replace(next_page_token={TOKEN_TABLE_ID: table_index + 1}) return page_result @@ -97,16 +97,19 @@ class PreOCIModel(LogEntryDataInterface): if performer_name: performer = model.user.get_user(performer_name) - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. + # TODO(LogMigrate): Remove the branch once we're back on a single table. aggregated_logs = model.log.get_aggregated_logs(start_time, end_time, performer=performer, repository=repo, namespace=namespace_name, ignore=ignore, model=database.LogEntry) aggregated_logs_2 = model.log.get_aggregated_logs(start_time, end_time, performer=performer, repository=repo, namespace=namespace_name, ignore=ignore, model=database.LogEntry2) + aggregated_logs_3 = model.log.get_aggregated_logs(start_time, end_time, performer=performer, + repository=repo, namespace=namespace_name, + ignore=ignore, model=database.LogEntry3) entries = {} - for log in itertools.chain(aggregated_logs, aggregated_logs_2): + for log in itertools.chain(aggregated_logs, aggregated_logs_2, aggregated_logs_3): key = '%s-%s' % (log.kind_id, log.day) if key in entries: entries[key] = AggregatedLogEntry(log.count + entries[key].count, log.kind_id, log.day) diff --git a/endpoints/api/superuser.py b/endpoints/api/superuser.py index 99a4fbc45..40e3b4e12 100644 --- a/endpoints/api/superuser.py +++ b/endpoints/api/superuser.py @@ -134,8 +134,6 @@ class SuperUserAggregateLogs(ApiResource): if SuperUserPermission().can(): (start_time, end_time) = _validate_logs_arguments(parsed_args['starttime'], parsed_args['endtime']) - - # TODO(LogMigrate): Change to a unified log lookup util lib once we're back on LogEntry only. aggregated_logs = log_model.get_aggregated_logs(start_time, end_time) kinds = log_model.get_log_entry_kinds() return { @@ -166,9 +164,8 @@ class SuperUserLogs(ApiResource): if SuperUserPermission().can(): start_time = parsed_args['starttime'] end_time = parsed_args['endtime'] - (start_time, end_time) = _validate_logs_arguments(start_time, end_time) - # TODO(LogMigrate): Change to a unified log lookup util lib once we're back on LogEntry only. + (start_time, end_time) = _validate_logs_arguments(start_time, end_time) log_page = log_model.get_logs_query(start_time, end_time, page_token=page_token) kinds = log_model.get_log_entry_kinds() return { diff --git a/endpoints/api/test/test_logs_models_pre_oci.py b/endpoints/api/test/test_logs_models_pre_oci.py index fb575b063..f9a40b235 100644 --- a/endpoints/api/test/test_logs_models_pre_oci.py +++ b/endpoints/api/test/test_logs_models_pre_oci.py @@ -111,7 +111,8 @@ def test_does_repo_exist_returns_true(monkeypatch): def test_get_aggregated_logs(monkeypatch): get_aggregated_logs_mock = Mock() get_aggregated_logs_mock.side_effect = [[AttrDict({'day': '1', 'kind_id': 4, 'count': 6})], - [AttrDict({'day': '1', 'kind_id': 4, 'count': 12})]] + [AttrDict({'day': '1', 'kind_id': 4, 'count': 12})], + [AttrDict({'day': '1', 'kind_id': 4, 'count': 3})]] monkeypatch.setattr(model.log, 'get_aggregated_logs', get_aggregated_logs_mock) repo_mock = Mock() @@ -127,4 +128,4 @@ def test_get_aggregated_logs(monkeypatch): actual = pre_oci_model.get_aggregated_logs('start_time', 'end_time', 'performer_name', 'repository_name', 'namespace_name', set()) - assert actual == [AggregatedLogEntry(18, 4, '1')] + assert actual == [AggregatedLogEntry(21, 4, '1')] diff --git a/initdb.py b/initdb.py index 91e315d4e..08e7c377d 100644 --- a/initdb.py +++ b/initdb.py @@ -920,7 +920,7 @@ def populate_database(minimal=False, with_storage=False): model.repositoryactioncount.update_repository_score(to_count) -WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry', 'ManifestChild', +WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry', 'LogEntry2', 'ManifestChild', 'NamespaceGeoRestriction'] def find_models_missing_data(): diff --git a/test/helpers.py b/test/helpers.py index 29872c234..3493ed645 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -3,7 +3,7 @@ import time import socket from contextlib import contextmanager -from data.database import LogEntryKind, LogEntry2 +from data.database import LogEntryKind, LogEntry3 class assert_action_logged(object): """ Specialized assertion for ensuring that a log entry of a particular kind was added under the @@ -14,7 +14,7 @@ class assert_action_logged(object): self.existing_count = 0 def _get_log_count(self): - return LogEntry2.select().where(LogEntry2.kind == LogEntryKind.get(name=self.log_kind)).count() + return LogEntry3.select().where(LogEntry3.kind == LogEntryKind.get(name=self.log_kind)).count() def __enter__(self): self.existing_count = self._get_log_count() diff --git a/workers/logrotateworker.py b/workers/logrotateworker.py index a0ad7d314..d9b60e7b9 100644 --- a/workers/logrotateworker.py +++ b/workers/logrotateworker.py @@ -8,7 +8,7 @@ from tempfile import SpooledTemporaryFile import features from app import app, storage -from data.database import UseThenDisconnect, LogEntry, LogEntry2 +from data.database import UseThenDisconnect, LogEntry, LogEntry2, LogEntry3 from data.model.log import (get_stale_logs, get_stale_logs_start_id, get_stale_logs_cutoff_id, delete_stale_logs) from data.userfiles import DelegateUserfiles @@ -36,8 +36,8 @@ class LogRotateWorker(Worker): self.add_operation(self._archive_logs, WORKER_FREQUENCY) def _archive_logs(self): - # TODO(LogMigrate): Remove the branch once we're back on LogEntry only. - models = [LogEntry, LogEntry2] + # TODO(LogMigrate): Remove the branch once we're back on a single table. + models = [LogEntry, LogEntry2, LogEntry3] for model in models: self._archive_logs_for_model(model) diff --git a/workers/test/test_exportactionlogsworker.py b/workers/test/test_exportactionlogsworker.py index 8ec370622..9eba55179 100644 --- a/workers/test/test_exportactionlogsworker.py +++ b/workers/test/test_exportactionlogsworker.py @@ -59,7 +59,7 @@ def test_process_queue_item(namespace, repo_name, expects_logs, app): created = storage.get_content(storage.preferred_locations, 'exportedactionlogs/' + storage_id) created_json = json.loads(created) - expected_count = database.LogEntry2.select().where(database.LogEntry2.repository == repo).count() + expected_count = database.LogEntry3.select().where(database.LogEntry3.repository == repo).count() assert (expected_count > 1) == expects_logs assert created_json['export_id'] == 'someid' From e3095087760bf68759726ddd8f05a6b9a0d1c8c4 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 3 Jan 2019 14:59:24 -0500 Subject: [PATCH 7/9] Change lookup of blobs to not use a subquery, which is murder on MySQL --- data/model/blob.py | 40 +++++++++++++++++----------------- data/model/oci/blob.py | 20 ++++++++--------- data/model/storage.py | 9 -------- endpoints/v2/test/test_blob.py | 2 +- 4 files changed, 31 insertions(+), 40 deletions(-) diff --git a/data/model/blob.py b/data/model/blob.py index 7c09c1334..2c087f754 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -16,16 +16,16 @@ def get_repository_blob_by_digest(repository, blob_digest): """ Find the content-addressable blob linked to the specified repository. """ try: - storage_id_query = (ImageStorage - .select(ImageStorage.id) - .join(Image) - .where(Image.repository == repository, - ImageStorage.content_checksum == blob_digest, - ImageStorage.uploading == False) - .limit(1)) + storage = (ImageStorage + .select(ImageStorage.uuid) + .join(Image) + .where(Image.repository == repository, + ImageStorage.content_checksum == blob_digest, + ImageStorage.uploading == False) + .get()) - return storage_model.get_storage_by_subquery(storage_id_query) - except InvalidImageException: + return storage_model.get_storage_by_uuid(storage.uuid) + except (ImageStorage.DoesNotExist, InvalidImageException): raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) @@ -33,18 +33,18 @@ def get_repo_blob_by_digest(namespace, repo_name, blob_digest): """ Find the content-addressable blob linked to the specified repository. """ try: - storage_id_query = (ImageStorage - .select(ImageStorage.id) - .join(Image) - .join(Repository) - .join(Namespace, on=(Namespace.id == Repository.namespace_user)) - .where(Repository.name == repo_name, Namespace.username == namespace, - ImageStorage.content_checksum == blob_digest, - ImageStorage.uploading == False) - .limit(1)) + storage = (ImageStorage + .select(ImageStorage.uuid) + .join(Image) + .join(Repository) + .join(Namespace, on=(Namespace.id == Repository.namespace_user)) + .where(Repository.name == repo_name, Namespace.username == namespace, + ImageStorage.content_checksum == blob_digest, + ImageStorage.uploading == False) + .get()) - return storage_model.get_storage_by_subquery(storage_id_query) - except InvalidImageException: + return storage_model.get_storage_by_uuid(storage.uuid) + except (ImageStorage.DoesNotExist, InvalidImageException): raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) diff --git a/data/model/oci/blob.py b/data/model/oci/blob.py index 435d8fefb..b631491df 100644 --- a/data/model/oci/blob.py +++ b/data/model/oci/blob.py @@ -1,6 +1,6 @@ from data.database import ImageStorage, ManifestBlob from data.model import BlobDoesNotExist -from data.model.storage import get_storage_by_subquery, InvalidImageException +from data.model.storage import get_storage_by_uuid, InvalidImageException from data.model.blob import get_repository_blob_by_digest as legacy_get def get_repository_blob_by_digest(repository, blob_digest): @@ -8,16 +8,16 @@ def get_repository_blob_by_digest(repository, blob_digest): returns it or None if none. """ try: - storage_id_query = (ImageStorage - .select(ImageStorage.id) - .join(ManifestBlob) - .where(ManifestBlob.repository == repository, - ImageStorage.content_checksum == blob_digest, - ImageStorage.uploading == False) - .limit(1)) + storage = (ImageStorage + .select(ImageStorage.uuid) + .join(ManifestBlob) + .where(ManifestBlob.repository == repository, + ImageStorage.content_checksum == blob_digest, + ImageStorage.uploading == False) + .get()) - return get_storage_by_subquery(storage_id_query) - except InvalidImageException: + return get_storage_by_uuid(storage.uuid) + except (ImageStorage.DoesNotExist, InvalidImageException): # TODO(jschorr): Remove once we are no longer using the legacy tables. # Try the legacy call. try: diff --git a/data/model/storage.py b/data/model/storage.py index d8f3798a8..04f8b965b 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -233,15 +233,6 @@ def _get_storage(query_modifier): return found -def get_storage_by_subquery(subquery): - """ Returns the storage (and its locations) for the storage id returned by the subquery. The - subquery must return at most 1 result, which is a storage ID. """ - def filter_by_subquery(query): - return query.where(ImageStorage.id == subquery) - - return _get_storage(filter_by_subquery) - - def get_storage_by_uuid(storage_uuid): def filter_to_uuid(query): return query.where(ImageStorage.uuid == storage_uuid) diff --git a/endpoints/v2/test/test_blob.py b/endpoints/v2/test/test_blob.py index 7551bfb1c..86c324552 100644 --- a/endpoints/v2/test/test_blob.py +++ b/endpoints/v2/test/test_blob.py @@ -50,7 +50,7 @@ def test_blob_caching(method, endpoint, client, app): with patch('endpoints.v2.blob.model_cache', InMemoryDataModelCache()): # First request should make a DB query to retrieve the blob. - with assert_query_count(3): + with assert_query_count(4): conduct_call(client, 'v2.' + endpoint, url_for, method, params, expected_code=200, headers=headers) From c510759df3c7510d3a2bb5928a76f03d758f4284 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 4 Jan 2019 13:35:14 -0500 Subject: [PATCH 8/9] Remove the foreign key constraint from the kind field on the logentry3 table --- data/database.py | 2 +- data/migrations/versions/6ec8726c0ace_add_logentry3_table.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/data/database.py b/data/database.py index 883d82664..120aee6f3 100644 --- a/data/database.py +++ b/data/database.py @@ -1041,7 +1041,7 @@ class LogEntry2(BaseModel): class LogEntry3(BaseModel): id = BigAutoField() - kind = ForeignKeyField(LogEntryKind) + kind = IntegerField(db_column='kind_id') account = IntegerField(db_column='account_id') performer = IntegerField(null=True, db_column='performer_id') repository = IntegerField(null=True, db_column='repository_id') diff --git a/data/migrations/versions/6ec8726c0ace_add_logentry3_table.py b/data/migrations/versions/6ec8726c0ace_add_logentry3_table.py index c2310fdd1..ce0165e36 100644 --- a/data/migrations/versions/6ec8726c0ace_add_logentry3_table.py +++ b/data/migrations/versions/6ec8726c0ace_add_logentry3_table.py @@ -25,12 +25,10 @@ def upgrade(tables, tester): sa.Column('datetime', sa.DateTime(), nullable=False), sa.Column('ip', sa.String(length=255), nullable=True), sa.Column('metadata_json', sa.Text(), nullable=False), - sa.ForeignKeyConstraint(['kind_id'], ['logentrykind.id'], name=op.f('fk_logentry3_kind_id_logentrykind')), sa.PrimaryKeyConstraint('id', name=op.f('pk_logentry3')) ) op.create_index('logentry3_account_id_datetime', 'logentry3', ['account_id', 'datetime'], unique=False) op.create_index('logentry3_datetime', 'logentry3', ['datetime'], unique=False) - op.create_index('logentry3_kind_id', 'logentry3', ['kind_id'], unique=False) op.create_index('logentry3_performer_id_datetime', 'logentry3', ['performer_id', 'datetime'], unique=False) op.create_index('logentry3_repository_id_datetime_kind_id', 'logentry3', ['repository_id', 'datetime', 'kind_id'], unique=False) # ### end Alembic commands ### From 636f65cb8171b3317695cd4ba938d3b8178d9b01 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 4 Jan 2019 15:35:00 -0500 Subject: [PATCH 9/9] Disable flaky test on MySQL --- endpoints/api/test/test_logs.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/endpoints/api/test/test_logs.py b/endpoints/api/test/test_logs.py index 3720d3c95..a73561bfa 100644 --- a/endpoints/api/test/test_logs.py +++ b/endpoints/api/test/test_logs.py @@ -1,3 +1,4 @@ +import os import time from mock import patch @@ -9,6 +10,8 @@ from endpoints.test.shared import client_with_identity from test.fixtures import * +@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0, + reason="Queue code is very sensitive to times on MySQL, making this flaky") def test_export_logs(client): with client_with_identity('devtable', client) as cl: assert export_action_logs_queue.get() is None