Implement a worker for batch exporting of usage logs

This will allow customers to request their usage logs for a repository or an entire namespace, and we can export the logs in a manner that doesn't absolutely destroy the database, with every step along the way timed.
This commit is contained in:
Joseph Schorr 2018-11-27 18:28:32 +02:00
parent b8d2e1be9c
commit 8a212728a3
18 changed files with 768 additions and 15 deletions

View file

@ -1,10 +1,17 @@
""" Access usage logs for organizations or repositories. """
import json
import uuid
from datetime import datetime, timedelta
from flask import request
from app import export_action_logs_queue
from endpoints.api import (resource, nickname, ApiResource, query_param, parse_args,
RepositoryParamResource, require_repo_admin, related_user_resource,
format_date, require_user_admin, path_param, require_scope, page_support)
format_date, require_user_admin, path_param, require_scope, page_support,
validate_json_request, InvalidRequest)
from data import model as data_model
from endpoints.api.logs_models_pre_oci import pre_oci_model as model
from endpoints.exception import Unauthorized, NotFound
from auth.permissions import AdministerOrganizationPermission
@ -207,3 +214,127 @@ class OrgAggregateLogs(ApiResource):
performer_name=performer_name, ignore=SERVICE_LEVEL_LOG_KINDS)
raise Unauthorized()
def queue_logs_export(start_time, end_time, options, namespace_name, repository_name=None):
export_id = str(uuid.uuid4())
namespace = data_model.user.get_namespace_user(namespace_name)
if namespace is None:
raise InvalidRequest('Unknown namespace')
repository = None
if repository_name is not None:
repository = data_model.repository.get_repository(namespace_name, repository_name)
if repository is None:
raise InvalidRequest('Unknown repository')
callback_url = options.get('callback_url')
if callback_url:
if not callback_url.startswith('https://') and not callback_url.startswith('http://'):
raise InvalidRequest('Invalid callback URL')
export_action_logs_queue.put([namespace_name], json.dumps({
'export_id': export_id,
'repository_id': repository.id if repository else None,
'namespace_id': namespace.id,
'namespace_name': namespace.username,
'repository_name': repository.name if repository else None,
'start_time': start_time,
'end_time': end_time,
'callback_url': callback_url,
'callback_email': options.get('callback_email'),
}), retries_remaining=3)
return {
'export_id': export_id,
}
EXPORT_LOGS_SCHEMA = {
'type': 'object',
'description': 'Configuration for an export logs operation',
'properties': {
'callback_url': {
'type': 'string',
'description': 'The callback URL to invoke with a link to the exported logs',
},
'callback_email': {
'type': 'string',
'description': 'The e-mail address at which to e-mail a link to the exported logs',
},
},
}
@resource('/v1/repository/<apirepopath:repository>/exportlogs')
@path_param('repository', 'The full path of the repository. e.g. namespace/name')
class ExportRepositoryLogs(RepositoryParamResource):
""" Resource for exporting the logs for the specific repository. """
schemas = {
'ExportLogs': EXPORT_LOGS_SCHEMA
}
@require_repo_admin
@nickname('exportRepoLogs')
@parse_args()
@query_param('starttime', 'Earliest time from which to get logs (%m/%d/%Y %Z)', type=str)
@query_param('endtime', 'Latest time to which to get logs (%m/%d/%Y %Z)', type=str)
@validate_json_request('ExportLogs')
def post(self, namespace, repository, parsed_args):
""" Queues an export of the logs for the specified repository. """
if model.repo_exists(namespace, repository) is False:
raise NotFound()
start_time = parsed_args['starttime']
end_time = parsed_args['endtime']
return queue_logs_export(start_time, end_time, request.get_json(), namespace,
repository_name=repository)
@resource('/v1/user/exportlogs')
class ExportUserLogs(ApiResource):
""" Resource for exporting the logs for the current user repository. """
schemas = {
'ExportLogs': EXPORT_LOGS_SCHEMA
}
@require_user_admin
@nickname('exportUserLogs')
@parse_args()
@query_param('starttime', 'Earliest time from which to get logs. (%m/%d/%Y %Z)', type=str)
@query_param('endtime', 'Latest time to which to get logs. (%m/%d/%Y %Z)', type=str)
@validate_json_request('ExportLogs')
def post(self, parsed_args):
""" Returns the aggregated logs for the current user. """
start_time = parsed_args['starttime']
end_time = parsed_args['endtime']
user = get_authenticated_user()
return queue_logs_export(start_time, end_time, request.get_json(), user.username)
@resource('/v1/organization/<orgname>/exportlogs')
@path_param('orgname', 'The name of the organization')
@related_user_resource(ExportUserLogs)
class ExportOrgLogs(ApiResource):
""" Resource for exporting the logs for an entire organization. """
schemas = {
'ExportLogs': EXPORT_LOGS_SCHEMA
}
@nickname('exportOrgLogs')
@parse_args()
@query_param('starttime', 'Earliest time from which to get logs. (%m/%d/%Y %Z)', type=str)
@query_param('endtime', 'Latest time to which to get logs. (%m/%d/%Y %Z)', type=str)
@require_scope(scopes.ORG_ADMIN)
@validate_json_request('ExportLogs')
def post(self, orgname, parsed_args):
""" Gets the aggregated logs for the specified organization. """
permission = AdministerOrganizationPermission(orgname)
if permission.can():
start_time = parsed_args['starttime']
end_time = parsed_args['endtime']
return queue_logs_export(start_time, end_time, request.get_json(), orgname)
raise Unauthorized()