Merge pull request #3318 from quay/prod-fire-cleanup

Prod fire cleanup fixes
This commit is contained in:
Joseph Schorr 2019-01-07 14:11:42 -05:00 committed by GitHub
commit a156c91962
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 297 additions and 107 deletions

View file

@ -550,3 +550,15 @@ class DefaultConfig(ImmutableConfig):
# Defines the maximum number of pages the user can paginate before they are limited # Defines the maximum number of pages the user can paginate before they are limited
SEARCH_MAX_RESULT_PAGE_COUNT = 10 SEARCH_MAX_RESULT_PAGE_COUNT = 10
# Feature Flag: Whether to record when users were last accessed.
FEATURE_USER_LAST_ACCESSED = True
# Feature Flag: Whether to allow users to retrieve aggregated log counts.
FEATURE_AGGREGATED_LOG_COUNT_RETRIEVAL = True
# Feature Flag: Whether to support log exporting.
FEATURE_LOG_EXPORT = True
# Maximum number of action logs pages that can be returned via the API.
ACTION_LOG_MAX_PAGE = None

View file

@ -1018,6 +1018,47 @@ class LogEntry(BaseModel):
) )
class LogEntry2(BaseModel):
""" TEMP FOR QUAY.IO ONLY. DO NOT RELEASE INTO QUAY ENTERPRISE. """
kind = ForeignKeyField(LogEntryKind)
account = IntegerField(index=True, db_column='account_id')
performer = IntegerField(index=True, null=True, db_column='performer_id')
repository = IntegerField(index=True, null=True, db_column='repository_id')
datetime = DateTimeField(default=datetime.now, index=True)
ip = CharField(null=True)
metadata_json = TextField(default='{}')
class Meta:
database = db
read_slaves = (read_slave,)
indexes = (
(('account', 'datetime'), False),
(('performer', 'datetime'), False),
(('repository', 'datetime'), False),
(('repository', 'datetime', 'kind'), False),
)
class LogEntry3(BaseModel):
id = BigAutoField()
kind = IntegerField(db_column='kind_id')
account = IntegerField(db_column='account_id')
performer = IntegerField(null=True, db_column='performer_id')
repository = IntegerField(null=True, db_column='repository_id')
datetime = DateTimeField(default=datetime.now, index=True)
ip = CharField(null=True)
metadata_json = TextField(default='{}')
class Meta:
database = db
read_slaves = (read_slave,)
indexes = (
(('account', 'datetime'), False),
(('performer', 'datetime'), False),
(('repository', 'datetime', 'kind'), False),
)
class RepositoryActionCount(BaseModel): class RepositoryActionCount(BaseModel):
repository = ForeignKeyField(Repository) repository = ForeignKeyField(Repository)
count = IntegerField() count = IntegerField()

View file

@ -0,0 +1,40 @@
"""Add LogEntry3 table
Revision ID: 6ec8726c0ace
Revises: 54492a68a3cf
Create Date: 2019-01-03 13:41:02.897957
"""
# revision identifiers, used by Alembic.
revision = '6ec8726c0ace'
down_revision = '54492a68a3cf'
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester):
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('logentry3',
sa.Column('id', sa.BigInteger(), nullable=False),
sa.Column('kind_id', sa.Integer(), nullable=False),
sa.Column('account_id', sa.Integer(), nullable=False),
sa.Column('performer_id', sa.Integer(), nullable=True),
sa.Column('repository_id', sa.Integer(), nullable=True),
sa.Column('datetime', sa.DateTime(), nullable=False),
sa.Column('ip', sa.String(length=255), nullable=True),
sa.Column('metadata_json', sa.Text(), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_logentry3'))
)
op.create_index('logentry3_account_id_datetime', 'logentry3', ['account_id', 'datetime'], unique=False)
op.create_index('logentry3_datetime', 'logentry3', ['datetime'], unique=False)
op.create_index('logentry3_performer_id_datetime', 'logentry3', ['performer_id', 'datetime'], unique=False)
op.create_index('logentry3_repository_id_datetime_kind_id', 'logentry3', ['repository_id', 'datetime', 'kind_id'], unique=False)
# ### end Alembic commands ###
def downgrade(tables, tester):
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('logentry3')
# ### end Alembic commands ###

View file

@ -162,6 +162,9 @@ def calculate_image_aggregate_size(ancestors_str, image_size, parent_image):
def update_last_accessed(token_or_user): def update_last_accessed(token_or_user):
""" Updates the `last_accessed` field on the given token or user. If the existing field's value """ Updates the `last_accessed` field on the given token or user. If the existing field's value
is within the configured threshold, the update is skipped. """ is within the configured threshold, the update is skipped. """
if not config.app_config.get('FEATURE_USER_LAST_ACCESSED'):
return
threshold = timedelta(seconds=config.app_config.get('LAST_ACCESSED_UPDATE_THRESHOLD_S', 120)) threshold = timedelta(seconds=config.app_config.get('LAST_ACCESSED_UPDATE_THRESHOLD_S', 120))
if (token_or_user.last_accessed is not None and if (token_or_user.last_accessed is not None and
datetime.utcnow() - token_or_user.last_accessed < threshold): datetime.utcnow() - token_or_user.last_accessed < threshold):

View file

@ -16,16 +16,16 @@ def get_repository_blob_by_digest(repository, blob_digest):
""" Find the content-addressable blob linked to the specified repository. """ Find the content-addressable blob linked to the specified repository.
""" """
try: try:
storage_id_query = (ImageStorage storage = (ImageStorage
.select(ImageStorage.id) .select(ImageStorage.uuid)
.join(Image) .join(Image)
.where(Image.repository == repository, .where(Image.repository == repository,
ImageStorage.content_checksum == blob_digest, ImageStorage.content_checksum == blob_digest,
ImageStorage.uploading == False) ImageStorage.uploading == False)
.limit(1)) .get())
return storage_model.get_storage_by_subquery(storage_id_query) return storage_model.get_storage_by_uuid(storage.uuid)
except InvalidImageException: except (ImageStorage.DoesNotExist, InvalidImageException):
raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest))
@ -33,18 +33,18 @@ def get_repo_blob_by_digest(namespace, repo_name, blob_digest):
""" Find the content-addressable blob linked to the specified repository. """ Find the content-addressable blob linked to the specified repository.
""" """
try: try:
storage_id_query = (ImageStorage storage = (ImageStorage
.select(ImageStorage.id) .select(ImageStorage.uuid)
.join(Image) .join(Image)
.join(Repository) .join(Repository)
.join(Namespace, on=(Namespace.id == Repository.namespace_user)) .join(Namespace, on=(Namespace.id == Repository.namespace_user))
.where(Repository.name == repo_name, Namespace.username == namespace, .where(Repository.name == repo_name, Namespace.username == namespace,
ImageStorage.content_checksum == blob_digest, ImageStorage.content_checksum == blob_digest,
ImageStorage.uploading == False) ImageStorage.uploading == False)
.limit(1)) .get())
return storage_model.get_storage_by_subquery(storage_id_query) return storage_model.get_storage_by_uuid(storage.uuid)
except InvalidImageException: except (ImageStorage.DoesNotExist, InvalidImageException):
raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest))

View file

@ -7,7 +7,8 @@ from datetime import datetime, timedelta
from cachetools import lru_cache from cachetools import lru_cache
import data import data
from data.database import LogEntry, LogEntryKind, User, RepositoryActionCount, db from data.database import (LogEntry, LogEntryKind, User, RepositoryActionCount, db, LogEntry3,
LogEntry3)
from data.model import config, user, DataModelException from data.model import config, user, DataModelException
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -16,8 +17,9 @@ ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = ['pull_repo']
def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None, def _logs_query(selections, start_time=None, end_time=None, performer=None, repository=None,
namespace=None, ignore=None, model=LogEntry, id_range=None): namespace=None, ignore=None, model=LogEntry3, id_range=None):
""" Returns a query for selecting logs from the table, with various options and filters. """ """ Returns a query for selecting logs from the table, with various options and filters. """
# TODO(LogMigrate): Remove the branch once we're back on a single table.
assert (start_time is not None and end_time is not None) or (id_range is not None) assert (start_time is not None and end_time is not None) or (id_range is not None)
joined = (model.select(*selections).switch(model)) joined = (model.select(*selections).switch(model))
@ -63,8 +65,9 @@ def _get_log_entry_kind(name):
def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None, def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None,
ignore=None, model=LogEntry): ignore=None, model=LogEntry3):
""" Returns the count of logs, by kind and day, for the logs matching the given filters. """ """ Returns the count of logs, by kind and day, for the logs matching the given filters. """
# TODO(LogMigrate): Remove the branch once we're back on a single table.
date = db.extract_date('day', model.datetime) date = db.extract_date('day', model.datetime)
selections = [model.kind, date.alias('day'), fn.Count(model.id).alias('count')] selections = [model.kind, date.alias('day'), fn.Count(model.id).alias('count')]
query = _logs_query(selections, start_time, end_time, performer, repository, namespace, ignore, query = _logs_query(selections, start_time, end_time, performer, repository, namespace, ignore,
@ -73,8 +76,9 @@ def get_aggregated_logs(start_time, end_time, performer=None, repository=None, n
def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None, def get_logs_query(start_time=None, end_time=None, performer=None, repository=None, namespace=None,
ignore=None, model=LogEntry, id_range=None): ignore=None, model=LogEntry3, id_range=None):
""" Returns the logs matching the given filters. """ """ Returns the logs matching the given filters. """
# TODO(LogMigrate): Remove the branch once we're back on a single table.
Performer = User.alias() Performer = User.alias()
Account = User.alias() Account = User.alias()
selections = [model, Performer] selections = [model, Performer]
@ -134,7 +138,7 @@ def log_action(kind_name, user_or_organization_name, performer=None, repository=
} }
try: try:
LogEntry.create(**log_data) LogEntry3.create(**log_data)
except PeeweeException as ex: except PeeweeException as ex:
strict_logging_disabled = config.app_config.get('ALLOW_PULLS_WITHOUT_STRICT_LOGGING') strict_logging_disabled = config.app_config.get('ALLOW_PULLS_WITHOUT_STRICT_LOGGING')
if strict_logging_disabled and kind_name in ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING: if strict_logging_disabled and kind_name in ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING:
@ -145,6 +149,7 @@ def log_action(kind_name, user_or_organization_name, performer=None, repository=
def get_stale_logs_start_id(model): def get_stale_logs_start_id(model):
""" Gets the oldest log entry. """ """ Gets the oldest log entry. """
# TODO(LogMigrate): Remove the branch once we're back on a single table.
try: try:
return (model.select(model.id).order_by(model.id).limit(1).tuples())[0][0] return (model.select(model.id).order_by(model.id).limit(1).tuples())[0][0]
except IndexError: except IndexError:
@ -153,6 +158,7 @@ def get_stale_logs_start_id(model):
def get_stale_logs_cutoff_id(cutoff_date, model): def get_stale_logs_cutoff_id(cutoff_date, model):
""" Gets the most recent ID created before the cutoff_date. """ """ Gets the most recent ID created before the cutoff_date. """
# TODO(LogMigrate): Remove the branch once we're back on a single table.
try: try:
return (model.select(fn.Max(model.id)).where(model.datetime <= cutoff_date) return (model.select(fn.Max(model.id)).where(model.datetime <= cutoff_date)
.tuples())[0][0] .tuples())[0][0]
@ -162,11 +168,13 @@ def get_stale_logs_cutoff_id(cutoff_date, model):
def get_stale_logs(start_id, end_id, model): def get_stale_logs(start_id, end_id, model):
""" Returns all the logs with IDs between start_id and end_id inclusively. """ """ Returns all the logs with IDs between start_id and end_id inclusively. """
# TODO(LogMigrate): Remove the branch once we're back on a single table.
return model.select().where((model.id >= start_id), (model.id <= end_id)) return model.select().where((model.id >= start_id), (model.id <= end_id))
def delete_stale_logs(start_id, end_id, model): def delete_stale_logs(start_id, end_id, model):
""" Deletes all the logs with IDs between start_id and end_id. """ """ Deletes all the logs with IDs between start_id and end_id. """
# TODO(LogMigrate): Remove the branch once we're back on a single table.
model.delete().where((model.id >= start_id), (model.id <= end_id)).execute() model.delete().where((model.id >= start_id), (model.id <= end_id)).execute()
@ -198,49 +206,52 @@ def get_repositories_action_sums(repository_ids):
return action_count_map return action_count_map
def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None): def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry3):
""" Returns the minimum ID for logs matching the given repository or namespace in """ Returns the minimum ID for logs matching the given repository or namespace in
the logs table, starting at the given start time. the logs table, starting at the given start time.
""" """
# First try bounded by a day. Most repositories will meet this criteria, and therefore # First try bounded by a day. Most repositories will meet this criteria, and therefore
# can make a much faster query. # can make a much faster query.
day_after = start_time + timedelta(days=1) day_after = start_time + timedelta(days=1)
result = _get_bounded_id(fn.Min, LogEntry.datetime >= start_time, result = _get_bounded_id(fn.Min, model.datetime >= start_time,
repository_id, namespace_id, LogEntry.datetime < day_after) repository_id, namespace_id, model.datetime < day_after, model=model)
if result is not None: if result is not None:
return result return result
return _get_bounded_id(fn.Min, LogEntry.datetime >= start_time, repository_id, namespace_id) return _get_bounded_id(fn.Min, model.datetime >= start_time, repository_id, namespace_id,
model=model)
def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None): def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry3):
""" Returns the maximum ID for logs matching the given repository or namespace in """ Returns the maximum ID for logs matching the given repository or namespace in
the logs table, ending at the given end time. the logs table, ending at the given end time.
""" """
# First try bounded by a day. Most repositories will meet this criteria, and therefore # First try bounded by a day. Most repositories will meet this criteria, and therefore
# can make a much faster query. # can make a much faster query.
day_before = end_time - timedelta(days=1) day_before = end_time - timedelta(days=1)
result = _get_bounded_id(fn.Max, LogEntry.datetime <= end_time, result = _get_bounded_id(fn.Max, model.datetime <= end_time,
repository_id, namespace_id, LogEntry.datetime > day_before) repository_id, namespace_id, model.datetime > day_before, model=model)
if result is not None: if result is not None:
return result return result
return _get_bounded_id(fn.Max, LogEntry.datetime <= end_time, repository_id, namespace_id) return _get_bounded_id(fn.Max, model.datetime <= end_time, repository_id, namespace_id,
model=model)
def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None): def _get_bounded_id(fn, filter_clause, repository_id, namespace_id, reduction_clause=None,
model=LogEntry3):
assert (namespace_id is not None) or (repository_id is not None) assert (namespace_id is not None) or (repository_id is not None)
query = (LogEntry query = (model
.select(fn(LogEntry.id)) .select(fn(model.id))
.where(filter_clause)) .where(filter_clause))
if reduction_clause is not None: if reduction_clause is not None:
query = query.where(reduction_clause) query = query.where(reduction_clause)
if repository_id is not None: if repository_id is not None:
query = query.where(LogEntry.repository == repository_id) query = query.where(model.repository == repository_id)
else: else:
query = query.where(LogEntry.account == namespace_id) query = query.where(model.account == namespace_id)
row = query.tuples()[0] row = query.tuples()[0]
if not row: if not row:

View file

@ -1,6 +1,7 @@
from peewee import SQL from peewee import SQL
def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None): def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None,
max_page=None):
""" Paginates the given query using an ID range, starting at the optional page_token. """ Paginates the given query using an ID range, starting at the optional page_token.
Returns a *list* of matching results along with an unencrypted page_token for the Returns a *list* of matching results along with an unencrypted page_token for the
next page, if any. If descending is set to True, orders by the ID descending rather next page, if any. If descending is set to True, orders by the ID descending rather
@ -27,7 +28,12 @@ def paginate(query, model, descending=False, page_token=None, limit=50, id_alias
query = query.where(model.id >= start_id) query = query.where(model.id >= start_id)
query = query.limit(limit + 1) query = query.limit(limit + 1)
return paginate_query(query, limit=limit, id_alias=id_alias)
page_number = (page_token.get('page_number') or None) if page_token else None
if page_number is not None and max_page is not None and page_number > max_page:
return [], None
return paginate_query(query, limit=limit, id_alias=id_alias, page_number=page_number)
def pagination_start(page_token=None): def pagination_start(page_token=None):
@ -38,7 +44,7 @@ def pagination_start(page_token=None):
return None return None
def paginate_query(query, limit=50, id_alias=None): def paginate_query(query, limit=50, id_alias=None, page_number=None):
""" Executes the given query and returns a page's worth of results, as well as the page token """ Executes the given query and returns a page's worth of results, as well as the page token
for the next page (if any). for the next page (if any).
""" """
@ -47,7 +53,8 @@ def paginate_query(query, limit=50, id_alias=None):
if len(results) > limit: if len(results) > limit:
start_id = getattr(results[limit], id_alias or 'id') start_id = getattr(results[limit], id_alias or 'id')
page_token = { page_token = {
'start_id': start_id 'start_id': start_id,
'page_number': page_number + 1 if page_number else 1,
} }
return results[0:limit], page_token return results[0:limit], page_token

View file

@ -1,6 +1,6 @@
from data.database import ImageStorage, ManifestBlob from data.database import ImageStorage, ManifestBlob
from data.model import BlobDoesNotExist from data.model import BlobDoesNotExist
from data.model.storage import get_storage_by_subquery, InvalidImageException from data.model.storage import get_storage_by_uuid, InvalidImageException
from data.model.blob import get_repository_blob_by_digest as legacy_get from data.model.blob import get_repository_blob_by_digest as legacy_get
def get_repository_blob_by_digest(repository, blob_digest): def get_repository_blob_by_digest(repository, blob_digest):
@ -8,16 +8,16 @@ def get_repository_blob_by_digest(repository, blob_digest):
returns it or None if none. returns it or None if none.
""" """
try: try:
storage_id_query = (ImageStorage storage = (ImageStorage
.select(ImageStorage.id) .select(ImageStorage.uuid)
.join(ManifestBlob) .join(ManifestBlob)
.where(ManifestBlob.repository == repository, .where(ManifestBlob.repository == repository,
ImageStorage.content_checksum == blob_digest, ImageStorage.content_checksum == blob_digest,
ImageStorage.uploading == False) ImageStorage.uploading == False)
.limit(1)) .get())
return get_storage_by_subquery(storage_id_query) return get_storage_by_uuid(storage.uuid)
except InvalidImageException: except (ImageStorage.DoesNotExist, InvalidImageException):
# TODO(jschorr): Remove once we are no longer using the legacy tables. # TODO(jschorr): Remove once we are no longer using the legacy tables.
# Try the legacy call. # Try the legacy call.
try: try:

View file

@ -4,7 +4,7 @@ from collections import namedtuple
from peewee import IntegrityError from peewee import IntegrityError
from datetime import date, timedelta, datetime from datetime import date, timedelta, datetime
from data.database import (Repository, LogEntry, RepositoryActionCount, from data.database import (Repository, LogEntry, LogEntry2, LogEntry3, RepositoryActionCount,
RepositorySearchScore, db_random_func, fn) RepositorySearchScore, db_random_func, fn)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -52,12 +52,17 @@ def count_repository_actions(to_count):
today = date.today() today = date.today()
yesterday = today - timedelta(days=1) yesterday = today - timedelta(days=1)
actions = (LogEntry # TODO(LogMigrate): Remove the branch once we're back on a single table.
def lookup_action_count(model):
return (model
.select() .select()
.where(LogEntry.repository == to_count, .where(model.repository == to_count,
LogEntry.datetime >= yesterday, model.datetime >= yesterday,
LogEntry.datetime < today) model.datetime < today)
.count()) .count())
actions = (lookup_action_count(LogEntry3) + lookup_action_count(LogEntry2) +
lookup_action_count(LogEntry))
try: try:
RepositoryActionCount.create(repository=to_count, date=yesterday, count=actions) RepositoryActionCount.create(repository=to_count, date=yesterday, count=actions)
return True return True

View file

@ -233,15 +233,6 @@ def _get_storage(query_modifier):
return found return found
def get_storage_by_subquery(subquery):
""" Returns the storage (and its locations) for the storage id returned by the subquery. The
subquery must return at most 1 result, which is a storage ID. """
def filter_by_subquery(query):
return query.where(ImageStorage.id == subquery)
return _get_storage(filter_by_subquery)
def get_storage_by_uuid(storage_uuid): def get_storage_by_uuid(storage_uuid):
def filter_to_uuid(query): def filter_to_uuid(query):
return query.where(ImageStorage.uuid == storage_uuid) return query.where(ImageStorage.uuid == storage_uuid)

View file

@ -1,6 +1,6 @@
import pytest import pytest
from data.database import LogEntry, User from data.database import LogEntry3, User
from data.model import config as _config from data.model import config as _config
from data.model.log import log_action from data.model.log import log_action
@ -21,8 +21,8 @@ def logentry_kind():
@pytest.fixture() @pytest.fixture()
def logentry(logentry_kind): def logentry(logentry_kind):
with patch('data.database.LogEntry.create', spec=True): with patch('data.database.LogEntry3.create', spec=True):
yield LogEntry yield LogEntry3
@pytest.fixture() @pytest.fixture()
def user(): def user():

View file

@ -6,11 +6,13 @@ from datetime import datetime, timedelta
from flask import request from flask import request
import features
from app import export_action_logs_queue from app import export_action_logs_queue
from endpoints.api import (resource, nickname, ApiResource, query_param, parse_args, from endpoints.api import (resource, nickname, ApiResource, query_param, parse_args,
RepositoryParamResource, require_repo_admin, related_user_resource, RepositoryParamResource, require_repo_admin, related_user_resource,
format_date, require_user_admin, path_param, require_scope, page_support, format_date, require_user_admin, path_param, require_scope, page_support,
validate_json_request, InvalidRequest) validate_json_request, InvalidRequest, show_if)
from data import model as data_model from data import model as data_model
from endpoints.api.logs_models_pre_oci import pre_oci_model as model from endpoints.api.logs_models_pre_oci import pre_oci_model as model
from endpoints.exception import Unauthorized, NotFound from endpoints.exception import Unauthorized, NotFound
@ -150,6 +152,7 @@ class OrgLogs(ApiResource):
@resource('/v1/repository/<apirepopath:repository>/aggregatelogs') @resource('/v1/repository/<apirepopath:repository>/aggregatelogs')
@show_if(features.AGGREGATED_LOG_COUNT_RETRIEVAL)
@path_param('repository', 'The full path of the repository. e.g. namespace/name') @path_param('repository', 'The full path of the repository. e.g. namespace/name')
class RepositoryAggregateLogs(RepositoryParamResource): class RepositoryAggregateLogs(RepositoryParamResource):
""" Resource for fetching aggregated logs for the specific repository. """ """ Resource for fetching aggregated logs for the specific repository. """
@ -170,6 +173,7 @@ class RepositoryAggregateLogs(RepositoryParamResource):
@resource('/v1/user/aggregatelogs') @resource('/v1/user/aggregatelogs')
@show_if(features.AGGREGATED_LOG_COUNT_RETRIEVAL)
class UserAggregateLogs(ApiResource): class UserAggregateLogs(ApiResource):
""" Resource for fetching aggregated logs for the current user. """ """ Resource for fetching aggregated logs for the current user. """
@ -191,6 +195,7 @@ class UserAggregateLogs(ApiResource):
@resource('/v1/organization/<orgname>/aggregatelogs') @resource('/v1/organization/<orgname>/aggregatelogs')
@show_if(features.AGGREGATED_LOG_COUNT_RETRIEVAL)
@path_param('orgname', 'The name of the organization') @path_param('orgname', 'The name of the organization')
@related_user_resource(UserLogs) @related_user_resource(UserLogs)
class OrgAggregateLogs(ApiResource): class OrgAggregateLogs(ApiResource):
@ -314,6 +319,7 @@ class ExportUserLogs(ApiResource):
@resource('/v1/organization/<orgname>/exportlogs') @resource('/v1/organization/<orgname>/exportlogs')
@show_if(features.LOG_EXPORT)
@path_param('orgname', 'The name of the organization') @path_param('orgname', 'The name of the organization')
@related_user_resource(ExportUserLogs) @related_user_resource(ExportUserLogs)
class ExportOrgLogs(ApiResource): class ExportOrgLogs(ApiResource):
@ -329,7 +335,7 @@ class ExportOrgLogs(ApiResource):
@require_scope(scopes.ORG_ADMIN) @require_scope(scopes.ORG_ADMIN)
@validate_json_request('ExportLogs') @validate_json_request('ExportLogs')
def post(self, orgname, parsed_args): def post(self, orgname, parsed_args):
""" Gets the aggregated logs for the specified organization. """ """ Exports the logs for the specified organization. """
permission = AdministerOrganizationPermission(orgname) permission = AdministerOrganizationPermission(orgname)
if permission.can(): if permission.can():
start_time = parsed_args['starttime'] start_time = parsed_args['starttime']

View file

@ -1,5 +1,6 @@
import itertools import itertools
from app import app
from data import model, database from data import model, database
from endpoints.api.logs_models_interface import LogEntryDataInterface, LogEntryPage, LogEntry, AggregatedLogEntry from endpoints.api.logs_models_interface import LogEntryDataInterface, LogEntryPage, LogEntry, AggregatedLogEntry
@ -49,6 +50,7 @@ class PreOCIModel(LogEntryDataInterface):
if performer_name: if performer_name:
performer = model.user.get_user(performer_name) performer = model.user.get_user(performer_name)
# TODO(LogMigrate): Remove the branch once we're back on a single table.
def get_logs(m): def get_logs(m):
logs_query = model.log.get_logs_query(start_time, end_time, performer=performer, logs_query = model.log.get_logs_query(start_time, end_time, performer=performer,
repository=repo, namespace=namespace_name, repository=repo, namespace=namespace_name,
@ -56,10 +58,25 @@ class PreOCIModel(LogEntryDataInterface):
logs, next_page_token = model.modelutil.paginate(logs_query, m, logs, next_page_token = model.modelutil.paginate(logs_query, m,
descending=True, page_token=page_token, descending=True, page_token=page_token,
limit=20) limit=20,
max_page=app.config['ACTION_LOG_MAX_PAGE'])
return LogEntryPage([create_log(log) for log in logs], next_page_token) return LogEntryPage([create_log(log) for log in logs], next_page_token)
return get_logs(database.LogEntry) # First check the LogEntry3 table for the most recent logs, unless we've been expressly told
# to look inside the other tables.
TOKEN_TABLE_ID = 'tti'
tables = [database.LogEntry3, database.LogEntry2, database.LogEntry]
table_index = 0
table_specified = page_token is not None and page_token.get(TOKEN_TABLE_ID) is not None
if table_specified:
table_index = page_token.get(TOKEN_TABLE_ID)
page_result = get_logs(tables[table_index])
if page_result.next_page_token is None and table_index < len(tables) - 1:
page_result = page_result._replace(next_page_token={TOKEN_TABLE_ID: table_index + 1})
return page_result
def get_log_entry_kinds(self): def get_log_entry_kinds(self):
return model.log.get_log_entry_kinds() return model.log.get_log_entry_kinds()
@ -80,12 +97,19 @@ class PreOCIModel(LogEntryDataInterface):
if performer_name: if performer_name:
performer = model.user.get_user(performer_name) performer = model.user.get_user(performer_name)
# TODO(LogMigrate): Remove the branch once we're back on a single table.
aggregated_logs = model.log.get_aggregated_logs(start_time, end_time, performer=performer, aggregated_logs = model.log.get_aggregated_logs(start_time, end_time, performer=performer,
repository=repo, namespace=namespace_name, repository=repo, namespace=namespace_name,
ignore=ignore, model=database.LogEntry) ignore=ignore, model=database.LogEntry)
aggregated_logs_2 = model.log.get_aggregated_logs(start_time, end_time, performer=performer,
repository=repo, namespace=namespace_name,
ignore=ignore, model=database.LogEntry2)
aggregated_logs_3 = model.log.get_aggregated_logs(start_time, end_time, performer=performer,
repository=repo, namespace=namespace_name,
ignore=ignore, model=database.LogEntry3)
entries = {} entries = {}
for log in aggregated_logs: for log in itertools.chain(aggregated_logs, aggregated_logs_2, aggregated_logs_3):
key = '%s-%s' % (log.kind_id, log.day) key = '%s-%s' % (log.kind_id, log.day)
if key in entries: if key in entries:
entries[key] = AggregatedLogEntry(log.count + entries[key].count, log.kind_id, log.day) entries[key] = AggregatedLogEntry(log.count + entries[key].count, log.kind_id, log.day)

View file

@ -1,3 +1,5 @@
import features
from app import avatar from app import avatar
from data import model from data import model
from data.database import User, FederatedLogin, Team as TeamTable, Repository, RobotAccountMetadata from data.database import User, FederatedLogin, Team as TeamTable, Repository, RobotAccountMetadata
@ -25,7 +27,8 @@ class RobotPreOCIModel(RobotInterface):
'name': robot_name, 'name': robot_name,
'token': robot_tuple.get(FederatedLogin.service_ident) if include_token else None, 'token': robot_tuple.get(FederatedLogin.service_ident) if include_token else None,
'created': robot_tuple.get(User.creation_date), 'created': robot_tuple.get(User.creation_date),
'last_accessed': robot_tuple.get(User.last_accessed), 'last_accessed': (robot_tuple.get(User.last_accessed)
if features.USER_LAST_ACCESSED else None),
'description': robot_tuple.get(RobotAccountMetadata.description), 'description': robot_tuple.get(RobotAccountMetadata.description),
'unstructured_metadata': robot_tuple.get(RobotAccountMetadata.unstructured_json), 'unstructured_metadata': robot_tuple.get(RobotAccountMetadata.unstructured_json),
} }
@ -58,7 +61,8 @@ class RobotPreOCIModel(RobotInterface):
robot_dict['repositories'].append(repository_name) robot_dict['repositories'].append(repository_name)
robots[robot_name] = RobotWithPermissions(robot_dict['name'], robot_dict['token'], robots[robot_name] = RobotWithPermissions(robot_dict['name'], robot_dict['token'],
robot_dict['created'], robot_dict['created'],
robot_dict['last_accessed'], (robot_dict['last_accessed']
if features.USER_LAST_ACCESSED else None),
robot_dict['teams'], robot_dict['teams'],
robot_dict['repositories'], robot_dict['repositories'],
robot_dict['description']) robot_dict['description'])

View file

@ -134,7 +134,6 @@ class SuperUserAggregateLogs(ApiResource):
if SuperUserPermission().can(): if SuperUserPermission().can():
(start_time, end_time) = _validate_logs_arguments(parsed_args['starttime'], (start_time, end_time) = _validate_logs_arguments(parsed_args['starttime'],
parsed_args['endtime']) parsed_args['endtime'])
aggregated_logs = log_model.get_aggregated_logs(start_time, end_time) aggregated_logs = log_model.get_aggregated_logs(start_time, end_time)
kinds = log_model.get_log_entry_kinds() kinds = log_model.get_log_entry_kinds()
return { return {

View file

@ -1,3 +1,4 @@
import os
import time import time
from mock import patch from mock import patch
@ -9,6 +10,8 @@ from endpoints.test.shared import client_with_identity
from test.fixtures import * from test.fixtures import *
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0,
reason="Queue code is very sensitive to times on MySQL, making this flaky")
def test_export_logs(client): def test_export_logs(client):
with client_with_identity('devtable', client) as cl: with client_with_identity('devtable', client) as cl:
assert export_action_logs_queue.get() is None assert export_action_logs_queue.get() is None

View file

@ -110,7 +110,9 @@ def test_does_repo_exist_returns_true(monkeypatch):
def test_get_aggregated_logs(monkeypatch): def test_get_aggregated_logs(monkeypatch):
get_aggregated_logs_mock = Mock() get_aggregated_logs_mock = Mock()
get_aggregated_logs_mock.side_effect = [[AttrDict({'day': '1', 'kind_id': 4, 'count': 6})]] get_aggregated_logs_mock.side_effect = [[AttrDict({'day': '1', 'kind_id': 4, 'count': 6})],
[AttrDict({'day': '1', 'kind_id': 4, 'count': 12})],
[AttrDict({'day': '1', 'kind_id': 4, 'count': 3})]]
monkeypatch.setattr(model.log, 'get_aggregated_logs', get_aggregated_logs_mock) monkeypatch.setattr(model.log, 'get_aggregated_logs', get_aggregated_logs_mock)
repo_mock = Mock() repo_mock = Mock()
@ -126,4 +128,4 @@ def test_get_aggregated_logs(monkeypatch):
actual = pre_oci_model.get_aggregated_logs('start_time', 'end_time', 'performer_name', 'repository_name', actual = pre_oci_model.get_aggregated_logs('start_time', 'end_time', 'performer_name', 'repository_name',
'namespace_name', set()) 'namespace_name', set())
assert actual == [AggregatedLogEntry(6, 4, '1')] assert actual == [AggregatedLogEntry(21, 4, '1')]

View file

@ -50,7 +50,7 @@ def test_blob_caching(method, endpoint, client, app):
with patch('endpoints.v2.blob.model_cache', InMemoryDataModelCache()): with patch('endpoints.v2.blob.model_cache', InMemoryDataModelCache()):
# First request should make a DB query to retrieve the blob. # First request should make a DB query to retrieve the blob.
with assert_query_count(3): with assert_query_count(4):
conduct_call(client, 'v2.' + endpoint, url_for, method, params, expected_code=200, conduct_call(client, 'v2.' + endpoint, url_for, method, params, expected_code=200,
headers=headers) headers=headers)

View file

@ -920,7 +920,8 @@ def populate_database(minimal=False, with_storage=False):
model.repositoryactioncount.update_repository_score(to_count) model.repositoryactioncount.update_repository_score(to_count)
WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'ManifestChild', 'NamespaceGeoRestriction'] WHITELISTED_EMPTY_MODELS = ['DeletedNamespace', 'LogEntry', 'LogEntry2', 'ManifestChild',
'NamespaceGeoRestriction']
def find_models_missing_data(): def find_models_missing_data():
# As a sanity check we are going to make sure that all db tables have some data, unless explicitly # As a sanity check we are going to make sure that all db tables have some data, unless explicitly

View file

@ -22,14 +22,16 @@
</span> </span>
<span class="hidden-xs right"> <span class="hidden-xs right">
<i class="fa fa-bar-chart-o toggle-icon" ng-class="chartVisible ? 'active' : ''" <i class="fa fa-bar-chart-o toggle-icon" ng-class="chartVisible ? 'active' : ''"
ng-click="toggleChart()" data-title="Toggle Chart" bs-tooltip="tooltip.title"></i> ng-click="toggleChart()" data-title="Toggle Chart" bs-tooltip="tooltip.title"
quay-show="Features.AGGREGATED_LOG_COUNT_RETRIEVAL"></i>
<button class="btn btn-default download-btn" ng-click="showExportLogs()" <button class="btn btn-default download-btn" ng-click="showExportLogs()"
ng-if="user || organization || repository"><i class="fa fa-download"></i>Export Logs</button> ng-if="(user || organization || repository) && Features.LOG_EXPORT"><i class="fa fa-download"></i>Export Logs</button>
</span> </span>
</div> </div>
<div> <div>
<div id="bar-chart" style="width: 800px; height: 500px;" ng-show="chartVisible"> <div id="bar-chart" style="width: 800px; height: 500px;"
quay-show="chartVisible && Features.AGGREGATED_LOG_COUNT_RETRIEVAL">
<svg style="width: 800px; height: 500px;"></svg> <svg style="width: 800px; height: 500px;"></svg>
<div class="cor-loader" ng-if="chartLoading"></div> <div class="cor-loader" ng-if="chartLoading"></div>
</div> </div>

View file

@ -44,7 +44,8 @@
<td ng-class="TableService.tablePredicateClass('created_datetime', options.predicate, options.reverse)"> <td ng-class="TableService.tablePredicateClass('created_datetime', options.predicate, options.reverse)">
<a ng-click="TableService.orderBy('created_datetime', options)">Created</a> <a ng-click="TableService.orderBy('created_datetime', options)">Created</a>
</td> </td>
<td ng-class="TableService.tablePredicateClass('last_accessed_datetime', options.predicate, options.reverse)"> <td ng-class="TableService.tablePredicateClass('last_accessed_datetime', options.predicate, options.reverse)"
quay-show="Features.USER_LAST_ACCESSED">
<a ng-click="TableService.orderBy('last_accessed_datetime', options)">Last Accessed</a> <a ng-click="TableService.orderBy('last_accessed_datetime', options)">Last Accessed</a>
</td> </td>
<td class="options-col"></td> <td class="options-col"></td>
@ -92,7 +93,7 @@
<td> <td>
<time-ago datetime="robotInfo.created"></time-ago> <time-ago datetime="robotInfo.created"></time-ago>
</td> </td>
<td> <td quay-show="Features.USER_LAST_ACCESSED">
<time-ago datetime="robotInfo.last_accessed"></time-ago> <time-ago datetime="robotInfo.last_accessed"></time-ago>
</td> </td>
<td class="options-col"> <td class="options-col">

View file

@ -19,7 +19,9 @@ angular.module('quay').directive('logsView', function () {
'allLogs': '@allLogs' 'allLogs': '@allLogs'
}, },
controller: function($scope, $element, $sce, Restangular, ApiService, TriggerService, controller: function($scope, $element, $sce, Restangular, ApiService, TriggerService,
StringBuilderService, ExternalNotificationData, UtilService) { StringBuilderService, ExternalNotificationData, UtilService,
Features) {
$scope.Features = Features;
$scope.loading = true; $scope.loading = true;
$scope.loadCounter = -1; $scope.loadCounter = -1;
$scope.logs = null; $scope.logs = null;
@ -405,6 +407,7 @@ angular.module('quay').directive('logsView', function () {
return; return;
} }
if (Features.AGGREGATED_LOG_COUNT_RETRIEVAL) {
$scope.chartLoading = true; $scope.chartLoading = true;
var aggregateUrl = getUrl('aggregatelogs').toString(); var aggregateUrl = getUrl('aggregatelogs').toString();
@ -419,6 +422,7 @@ angular.module('quay').directive('logsView', function () {
$scope.options.logEndDate); $scope.options.logEndDate);
$scope.chartLoading = false; $scope.chartLoading = false;
}); });
}
$scope.nextPageToken = null; $scope.nextPageToken = null;
$scope.hasAdditional = true; $scope.hasAdditional = true;

View file

@ -3,7 +3,7 @@ import time
import socket import socket
from contextlib import contextmanager from contextlib import contextmanager
from data.database import LogEntryKind, LogEntry from data.database import LogEntryKind, LogEntry3
class assert_action_logged(object): class assert_action_logged(object):
""" Specialized assertion for ensuring that a log entry of a particular kind was added under the """ Specialized assertion for ensuring that a log entry of a particular kind was added under the
@ -14,7 +14,7 @@ class assert_action_logged(object):
self.existing_count = 0 self.existing_count = 0
def _get_log_count(self): def _get_log_count(self):
return LogEntry.select().where(LogEntry.kind == LogEntryKind.get(name=self.log_kind)).count() return LogEntry3.select().where(LogEntry3.kind == LogEntryKind.get(name=self.log_kind)).count()
def __enter__(self): def __enter__(self):
self.existing_count = self._get_log_count() self.existing_count = self._get_log_count()

View file

@ -9,6 +9,8 @@ INTERNAL_ONLY_PROPERTIES = {
'TESTING', 'TESTING',
'SEND_FILE_MAX_AGE_DEFAULT', 'SEND_FILE_MAX_AGE_DEFAULT',
'ACTION_LOG_MAX_PAGE',
'REPLICATION_QUEUE_NAME', 'REPLICATION_QUEUE_NAME',
'DOCKERFILE_BUILD_QUEUE_NAME', 'DOCKERFILE_BUILD_QUEUE_NAME',
'CHUNK_CLEANUP_QUEUE_NAME', 'CHUNK_CLEANUP_QUEUE_NAME',
@ -784,6 +786,27 @@ CONFIG_SCHEMA = {
'pattern': '^[0-9]+(w|m|d|h|s)$', 'pattern': '^[0-9]+(w|m|d|h|s)$',
}, },
# Feature Flag: Aggregated log retrieval.
'FEATURE_AGGREGATED_LOG_COUNT_RETRIEVAL': {
'type': 'boolean',
'description': 'Whether to allow retrieval of aggregated log counts. Defaults to True',
'x-example': True,
},
# Feature Flag: Log export.
'FEATURE_LOG_EXPORT': {
'type': 'boolean',
'description': 'Whether to allow exporting of action logs. Defaults to True',
'x-example': True,
},
# Feature Flag: User last accessed.
'FEATURE_USER_LAST_ACCESSED': {
'type': 'boolean',
'description': 'Whether to record the last time a user was accessed. Defaults to True',
'x-example': True,
},
# Feature Flag: Permanent Sessions. # Feature Flag: Permanent Sessions.
'FEATURE_PERMANENT_SESSIONS': { 'FEATURE_PERMANENT_SESSIONS': {
'type': 'boolean', 'type': 'boolean',

View file

@ -1,6 +1,7 @@
import logging import logging
import os.path import os.path
import json import json
import time
import uuid import uuid
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -8,6 +9,8 @@ from io import BytesIO
from enum import Enum, unique from enum import Enum, unique
import features
from app import app, export_action_logs_queue, storage, get_app_url from app import app, export_action_logs_queue, storage, get_app_url
from data import model from data import model
from endpoints.api import format_date from endpoints.api import format_date
@ -277,6 +280,11 @@ def _run_and_time(fn):
if __name__ == "__main__": if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False) logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not features.LOG_EXPORT:
logger.debug('Log export not enabled; skipping')
while True:
time.sleep(100000)
logger.debug('Starting export action logs worker') logger.debug('Starting export action logs worker')
worker = ExportActionLogsWorker(export_action_logs_queue, worker = ExportActionLogsWorker(export_action_logs_queue,
poll_period_seconds=POLL_PERIOD_SECONDS) poll_period_seconds=POLL_PERIOD_SECONDS)

View file

@ -8,7 +8,7 @@ from tempfile import SpooledTemporaryFile
import features import features
from app import app, storage from app import app, storage
from data.database import UseThenDisconnect, LogEntry from data.database import UseThenDisconnect, LogEntry, LogEntry2, LogEntry3
from data.model.log import (get_stale_logs, get_stale_logs_start_id, from data.model.log import (get_stale_logs, get_stale_logs_start_id,
get_stale_logs_cutoff_id, delete_stale_logs) get_stale_logs_cutoff_id, delete_stale_logs)
from data.userfiles import DelegateUserfiles from data.userfiles import DelegateUserfiles
@ -36,7 +36,10 @@ class LogRotateWorker(Worker):
self.add_operation(self._archive_logs, WORKER_FREQUENCY) self.add_operation(self._archive_logs, WORKER_FREQUENCY)
def _archive_logs(self): def _archive_logs(self):
self._archive_logs_for_model(LogEntry) # TODO(LogMigrate): Remove the branch once we're back on a single table.
models = [LogEntry, LogEntry2, LogEntry3]
for model in models:
self._archive_logs_for_model(model)
def _archive_logs_for_model(self, model): def _archive_logs_for_model(self, model):
logger.debug('Attempting to rotate log entries') logger.debug('Attempting to rotate log entries')
@ -126,8 +129,8 @@ def log_dict(log):
def main(): def main():
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False) logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not features.ACTION_LOG_ROTATION or None in [SAVE_PATH, SAVE_LOCATION]: if not features.LOG_EXPORT:
logger.debug('Action log rotation worker not enabled; skipping') logger.debug('Log export not enabled; skipping')
while True: while True:
time.sleep(100000) time.sleep(100000)

View file

@ -59,7 +59,7 @@ def test_process_queue_item(namespace, repo_name, expects_logs, app):
created = storage.get_content(storage.preferred_locations, 'exportedactionlogs/' + storage_id) created = storage.get_content(storage.preferred_locations, 'exportedactionlogs/' + storage_id)
created_json = json.loads(created) created_json = json.loads(created)
expected_count = database.LogEntry.select().where(database.LogEntry.repository == repo).count() expected_count = database.LogEntry3.select().where(database.LogEntry3.repository == repo).count()
assert (expected_count > 1) == expects_logs assert (expected_count > 1) == expects_logs
assert created_json['export_id'] == 'someid' assert created_json['export_id'] == 'someid'