initial import for Open Source 🎉

This commit is contained in:
Jimmy Zelinskie 2019-11-12 11:09:47 -05:00
parent 1898c361f3
commit 9c0dd3b722
2048 changed files with 218743 additions and 0 deletions

0
workers/__init__.py Normal file
View file

View file

@ -0,0 +1,52 @@
import logging
import logging.config
from datetime import timedelta
from app import app, storage
from data.database import UseThenDisconnect
from workers.blobuploadcleanupworker.models_pre_oci import pre_oci_model as model
from workers.worker import Worker
from util.log import logfile_path
logger = logging.getLogger(__name__)
DELETION_DATE_THRESHOLD = timedelta(days=2)
BLOBUPLOAD_CLEANUP_FREQUENCY = app.config.get('BLOBUPLOAD_CLEANUP_FREQUENCY', 60 * 60)
class BlobUploadCleanupWorker(Worker):
def __init__(self):
super(BlobUploadCleanupWorker, self).__init__()
self.add_operation(self._cleanup_uploads, BLOBUPLOAD_CLEANUP_FREQUENCY)
def _cleanup_uploads(self):
""" Performs garbage collection on the blobupload table. """
while True:
# Find all blob uploads older than the threshold (typically a week) and delete them.
with UseThenDisconnect(app.config):
stale_upload = model.get_stale_blob_upload(DELETION_DATE_THRESHOLD)
if stale_upload is None:
logger.debug('No additional stale blob uploads found')
return
# Remove the stale upload from storage.
logger.debug('Removing stale blob upload %s', stale_upload.uuid)
try:
storage.cancel_chunked_upload([stale_upload.location_name], stale_upload.uuid,
stale_upload.storage_metadata)
except Exception as ex:
logger.debug('Got error when trying to cancel chunked upload %s: %s', stale_upload.uuid,
ex.message)
# Delete the stale upload's row.
with UseThenDisconnect(app.config):
model.delete_blob_upload(stale_upload)
logger.debug('Removed stale blob upload %s', stale_upload.uuid)
if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
worker = BlobUploadCleanupWorker()
worker.start()

View file

@ -0,0 +1,37 @@
from abc import ABCMeta, abstractmethod
from collections import namedtuple
from six import add_metaclass
class BlobUpload(namedtuple('BlobUpload', ['uuid', 'storage_metadata', 'location_name'])):
"""
BlobUpload represents a single upload of a blob in progress or previously started.
"""
@add_metaclass(ABCMeta)
class BlobUploadCleanupWorkerDataInterface(object):
"""
Interface that represents all data store interactions required by the blob upload cleanup worker.
"""
@abstractmethod
def get_stale_blob_upload(self, stale_threshold):
""" Returns a BlobUpload that was created on or before the current date/time minus the
stale threshold. If none, returns None. """
pass
@abstractmethod
def delete_blob_upload(self, blob_upload):
""" Deletes a blob upload from the database. """
pass
@abstractmethod
def create_stale_upload_for_testing(self):
""" Creates a new stale blob upload for testing. """
pass
@abstractmethod
def blob_upload_exists(self, upload_uuid):
""" Returns True if a blob upload with the given UUID exists. """
pass

View file

@ -0,0 +1,38 @@
from datetime import datetime, timedelta
from data import model
from data.database import BlobUpload as BlobUploadTable
from workers.blobuploadcleanupworker.models_interface import (
BlobUpload, BlobUploadCleanupWorkerDataInterface)
class PreOCIModel(BlobUploadCleanupWorkerDataInterface):
def get_stale_blob_upload(self, stale_threshold):
blob_upload = model.blob.get_stale_blob_upload(stale_threshold)
if blob_upload is None:
return None
return BlobUpload(blob_upload.uuid, blob_upload.storage_metadata, blob_upload.location.name)
def delete_blob_upload(self, blob_upload):
blob_upload = model.blob.get_blob_upload_by_uuid(blob_upload.uuid)
if blob_upload is None:
return
try:
blob_upload.delete_instance()
except BlobUploadTable.DoesNotExist:
pass
def create_stale_upload_for_testing(self):
blob_upload = model.blob.initiate_upload('devtable', 'simple', 'foobarbaz', 'local_us', {})
blob_upload.created = datetime.now() - timedelta(days=60)
blob_upload.save()
return BlobUpload(blob_upload.uuid, blob_upload.storage_metadata, blob_upload.location.name)
def blob_upload_exists(self, upload_uuid):
blob_upload = model.blob.get_blob_upload_by_uuid(upload_uuid)
return blob_upload is not None
pre_oci_model = PreOCIModel()

View file

@ -0,0 +1,27 @@
from contextlib import contextmanager
from mock import patch, Mock
from test.fixtures import *
from workers.blobuploadcleanupworker.blobuploadcleanupworker import BlobUploadCleanupWorker
from workers.blobuploadcleanupworker.models_pre_oci import pre_oci_model as model
def test_blobuploadcleanupworker(initialized_db):
# Create a blob upload older than the threshold.
blob_upload = model.create_stale_upload_for_testing()
# Note: We need to override UseThenDisconnect to ensure to remains connected to the test DB.
@contextmanager
def noop(_):
yield
storage_mock = Mock()
with patch('workers.blobuploadcleanupworker.blobuploadcleanupworker.UseThenDisconnect', noop):
with patch('workers.blobuploadcleanupworker.blobuploadcleanupworker.storage', storage_mock):
# Call cleanup and ensure it is canceled.
worker = BlobUploadCleanupWorker()
worker._cleanup_uploads()
storage_mock.cancel_chunked_upload.assert_called_once()
# Ensure the blob no longer exists.
model.blob_upload_exists(blob_upload.uuid)

View file

View file

@ -0,0 +1,63 @@
import logging
from gzip import GzipFile
from tempfile import SpooledTemporaryFile
from app import build_logs, log_archive, app
from data.archivedlogs import JSON_MIMETYPE
from data.database import CloseForLongOperation
from util.streamingjsonencoder import StreamingJSONEncoder
from workers.buildlogsarchiver.models_pre_oci import pre_oci_model as model
from workers.worker import Worker
POLL_PERIOD_SECONDS = 30
MEMORY_TEMPFILE_SIZE = 64 * 1024 # Large enough to handle approximately 99% of builds in memory
logger = logging.getLogger(__name__)
class ArchiveBuildLogsWorker(Worker):
def __init__(self):
super(ArchiveBuildLogsWorker, self).__init__()
self.add_operation(self._archive_redis_buildlogs, POLL_PERIOD_SECONDS)
def _archive_redis_buildlogs(self):
""" Archive a single build, choosing a candidate at random. This process must be idempotent to
avoid needing two-phase commit. """
# Get a random build to archive
to_archive = model.get_archivable_build()
if to_archive is None:
logger.debug('No more builds to archive')
return
logger.debug('Archiving: %s', to_archive.uuid)
length, entries = build_logs.get_log_entries(to_archive.uuid, 0)
to_encode = {
'start': 0,
'total': length,
'logs': entries,
}
if length > 0:
with CloseForLongOperation(app.config):
with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
with GzipFile('testarchive', fileobj=tempfile) as zipstream:
for chunk in StreamingJSONEncoder().iterencode(to_encode):
zipstream.write(chunk)
tempfile.seek(0)
log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip',
file_id=to_archive.uuid)
we_updated = model.mark_build_archived(to_archive.uuid)
if we_updated:
build_logs.expire_status(to_archive.uuid)
build_logs.delete_log_entries(to_archive.uuid)
else:
logger.debug('Another worker pre-empted us when archiving: %s', to_archive.uuid)
if __name__ == "__main__":
worker = ArchiveBuildLogsWorker()
worker.start()

View file

@ -0,0 +1,38 @@
from abc import ABCMeta, abstractmethod
from collections import namedtuple
from six import add_metaclass
class Build(namedtuple('Build', ['uuid', 'logs_archived'])):
"""
Build represents a single build in the build system.
"""
@add_metaclass(ABCMeta)
class BuildLogsArchiverWorkerDataInterface(object):
"""
Interface that represents all data store interactions required by the build logs archiver worker.
"""
@abstractmethod
def get_archivable_build(self):
""" Returns a build whose logs are available for archiving. If none, returns None. """
pass
@abstractmethod
def get_build(self, build_uuid):
""" Returns the build with the matching UUID or None if none. """
pass
@abstractmethod
def mark_build_archived(self, build_uuid):
""" Marks the build with the given UUID as having its logs archived. Returns False if
the build was already marked as archived.
"""
pass
@abstractmethod
def create_build_for_testing(self):
""" Creates an unarchived build for testing of archiving. """
pass

View file

@ -0,0 +1,32 @@
from data import model
from workers.buildlogsarchiver.models_interface import Build, BuildLogsArchiverWorkerDataInterface
class PreOCIModel(BuildLogsArchiverWorkerDataInterface):
def get_archivable_build(self):
build = model.build.get_archivable_build()
if build is None:
return None
return Build(build.uuid, build.logs_archived)
def mark_build_archived(self, build_uuid):
return model.build.mark_build_archived(build_uuid)
def create_build_for_testing(self):
repo = model.repository.get_repository('devtable', 'simple')
access_token = model.token.create_access_token(repo, 'admin')
build = model.build.create_repository_build(repo, access_token, {}, None, 'foo')
build.phase = 'error'
build.save()
return Build(build.uuid, build.logs_archived)
def get_build(self, build_uuid):
build = model.build.get_repository_build(build_uuid)
if build is None:
return None
return Build(build.uuid, build.logs_archived)
pre_oci_model = PreOCIModel()

View file

@ -0,0 +1,30 @@
from mock import patch, Mock
from app import storage
from workers.buildlogsarchiver.buildlogsarchiver import ArchiveBuildLogsWorker
from test.fixtures import *
from workers.buildlogsarchiver.models_pre_oci import pre_oci_model as model
def test_logarchiving(app):
worker = ArchiveBuildLogsWorker()
logs_mock = Mock()
logs_mock.get_log_entries = Mock(return_value=(1, [{'some': 'entry'}]))
# Add a build that is ready for archiving.
build = model.create_build_for_testing()
with patch('workers.buildlogsarchiver.buildlogsarchiver.build_logs', logs_mock):
worker._archive_redis_buildlogs()
# Ensure the get method was called.
logs_mock.get_log_entries.assert_called_once()
logs_mock.expire_status.assert_called_once()
logs_mock.delete_log_entries.assert_called_once()
# Ensure the build was marked as archived.
assert model.get_build(build.uuid).logs_archived
# Ensure a file was written to storage.
assert storage.exists(['local_us'], 'logarchive/%s' % build.uuid)

View file

@ -0,0 +1,44 @@
import logging
import time
from app import app, storage, chunk_cleanup_queue
from workers.queueworker import QueueWorker, JobException
from util.log import logfile_path
logger = logging.getLogger(__name__)
POLL_PERIOD_SECONDS = 10
class ChunkCleanupWorker(QueueWorker):
""" Worker which cleans up chunks enqueued by the storage engine(s). This is typically used to
cleanup empty chunks which are no longer needed.
"""
def process_queue_item(self, job_details):
logger.debug('Got chunk cleanup queue item: %s', job_details)
storage_location = job_details['location']
storage_path = job_details['path']
if not storage.exists([storage_location], storage_path):
logger.debug('Chunk already deleted')
return
try:
storage.remove([storage_location], storage_path)
except IOError:
raise JobException()
if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
engines = set([config[0] for config in app.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values()])
if 'SwiftStorage' not in engines:
logger.debug('Swift storage not detected; sleeping')
while True:
time.sleep(10000)
logger.debug('Starting chunk cleanup worker')
worker = ChunkCleanupWorker(chunk_cleanup_queue, poll_period_seconds=POLL_PERIOD_SECONDS)
worker.start()

View file

@ -0,0 +1,49 @@
import logging
import time
import features
from app import app # This is required to initialize the database.
from data import model
from workers.worker import Worker
from util.log import logfile_path
from util.timedeltastring import convert_to_timedelta
POLL_PERIOD_SECONDS = 60 * 60 # 1 hour
logger = logging.getLogger(__name__)
class ExpiredAppSpecificTokenWorker(Worker):
def __init__(self):
super(ExpiredAppSpecificTokenWorker, self).__init__()
expiration_window = app.config.get('EXPIRED_APP_SPECIFIC_TOKEN_GC', '1d')
self.expiration_window = convert_to_timedelta(expiration_window)
logger.debug('Found expiration window: %s', expiration_window)
self.add_operation(self._gc_expired_tokens, POLL_PERIOD_SECONDS)
def _gc_expired_tokens(self):
""" Garbage collects any expired app specific tokens outside of the configured
window. """
logger.debug('Garbage collecting expired app specific tokens with window: %s',
self.expiration_window)
model.appspecifictoken.gc_expired_tokens(self.expiration_window)
return True
if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not features.APP_SPECIFIC_TOKENS:
logger.debug('App specific tokens disabled; skipping')
while True:
time.sleep(100000)
if app.config.get('EXPIRED_APP_SPECIFIC_TOKEN_GC') is None:
logger.debug('GC of App specific tokens is disabled; skipping')
while True:
time.sleep(100000)
logger.debug('Starting expired app specific token GC worker')
worker = ExpiredAppSpecificTokenWorker()
worker.start()

View file

@ -0,0 +1,260 @@
import logging
import os.path
import json
import time
import uuid
from datetime import datetime, timedelta
from io import BytesIO
from enum import Enum, unique
import features
from app import app, export_action_logs_queue, storage as app_storage, get_app_url, avatar
from endpoints.api import format_date
from data.logs_model import logs_model
from data.logs_model.interface import LogsIterationTimeout
from workers.queueworker import QueueWorker, JobException
from util.log import logfile_path
from util.useremails import send_logs_exported_email
logger = logging.getLogger(__name__)
POLL_PERIOD_SECONDS = app.config.get('EXPORT_ACTION_LOGS_WORKER_POLL_PERIOD', 60)
EXPORT_LOGS_STORAGE_PATH = app.config.get('EXPORT_ACTION_LOGS_STORAGE_PATH', 'exportedactionlogs')
MAXIMUM_WORK_PERIOD_SECONDS = app.config.get('EXPORT_ACTION_LOGS_MAXIMUM_SECONDS', 60 * 60) # 1 hour
MAXIMUM_QUERY_TIME_SECONDS = app.config.get('EXPORT_ACTION_LOGS_MAXIMUM_QUERY_TIME_SECONDS', 30)
EXPORTED_LOGS_EXPIRATION_SECONDS = app.config.get('EXPORT_ACTION_LOGS_SECONDS', 60 * 60) # 1 hour
@unique
class ExportResult(Enum):
# NOTE: Make sure to handle these in `logsexported.html` in `emails`
INVALID_REQUEST = 'invalidrequest'
OPERATION_TIMEDOUT = 'timedout'
FAILED_EXPORT = 'failed'
SUCCESSFUL_EXPORT = 'success'
class ExportActionLogsWorker(QueueWorker):
""" Worker which exports action logs for a namespace or a repository based on
a queued request from the API.
"""
def process_queue_item(self, job_details):
return self._process_queue_item(job_details, app_storage)
def _process_queue_item(self, job_details, storage):
logger.info('Got export actions logs queue item: %s', job_details)
# job_details block (as defined in the logs.py API endpoint):
# {
# 'export_id': export_id,
# 'repository_id': repository.id or None,
# 'namespace_id': namespace.id,
# 'namespace_name': namespace.username,
# 'repository_name': repository.name or None,
# 'start_time': start_time,
# 'end_time': end_time,
# 'callback_url': callback_url or None,
# 'callback_email': callback_email or None,
# }
export_id = job_details['export_id']
start_time = _parse_time(job_details['start_time'])
end_time = _parse_time(job_details['end_time'])
# Make sure the end time has the whole day.
if start_time is None or end_time is None:
self._report_results(job_details, ExportResult.INVALID_REQUEST)
return
end_time = end_time + timedelta(days=1) - timedelta(milliseconds=1)
# Select the minimum and maximum IDs for the logs for the repository/namespace
# over the time range.
namespace_id = job_details['namespace_id']
repository_id = job_details['repository_id']
max_query_time = timedelta(seconds=MAXIMUM_QUERY_TIME_SECONDS)
# Generate a file key so that if we return an API URL, it cannot simply be constructed from
# just the export ID.
file_key = str(uuid.uuid4())
exported_filename = '%s-%s' % (export_id, file_key)
# Start a chunked upload for the logs and stream them.
upload_id, upload_metadata = storage.initiate_chunked_upload(storage.preferred_locations)
export_storage_path = os.path.join(EXPORT_LOGS_STORAGE_PATH, exported_filename)
logger.debug('Starting chunked upload to path `%s`', export_storage_path)
# Start with a 'json' header that contains the opening bracket, as well as basic
# information and the start of the `logs` array.
details = {
'start_time': format_date(start_time),
'end_time': format_date(end_time),
'namespace': job_details['namespace_name'],
'repository': job_details['repository_name'],
}
prefix_data = """{
"export_id": "%s",
"details": %s,
"logs": [
""" % (export_id, json.dumps(details))
_, new_metadata, upload_error = storage.stream_upload_chunk(storage.preferred_locations,
upload_id, 0,
-1,
BytesIO(str(prefix_data)),
upload_metadata)
uploaded_byte_count = len(prefix_data)
if upload_error is not None:
logger.error('Got an error when writing chunk for `%s`: %s', export_id, upload_error)
storage.cancel_chunked_upload(storage.preferred_locations, upload_id, upload_metadata)
self._report_results(job_details, ExportResult.FAILED_EXPORT)
raise IOError(upload_error)
upload_metadata = new_metadata
logs_iterator = logs_model.yield_logs_for_export(start_time, end_time, repository_id,
namespace_id, max_query_time)
try:
# Stream the logs to storage as chunks.
new_metadata, uploaded_byte_count = self._stream_logs(upload_id, upload_metadata,
uploaded_byte_count, logs_iterator,
job_details, storage)
if uploaded_byte_count is None:
logger.error('Failed to upload streamed logs for `%s`', export_id)
storage.cancel_chunked_upload(storage.preferred_locations, upload_id, upload_metadata)
self._report_results(job_details, ExportResult.FAILED_EXPORT)
raise IOError('Export failed to upload')
upload_metadata = new_metadata
# Close the JSON block.
suffix_data = """
{"terminator": true}]
}"""
_, new_metadata, upload_error = storage.stream_upload_chunk(storage.preferred_locations,
upload_id,
0,
-1,
BytesIO(str(suffix_data)),
upload_metadata)
if upload_error is not None:
logger.error('Got an error when writing chunk for `%s`: %s', export_id, upload_error)
storage.cancel_chunked_upload(storage.preferred_locations, upload_id, upload_metadata)
self._report_results(job_details, ExportResult.FAILED_EXPORT)
raise IOError(upload_error)
# Complete the upload.
upload_metadata = new_metadata
storage.complete_chunked_upload(storage.preferred_locations, upload_id, export_storage_path,
upload_metadata)
except:
logger.exception('Exception when exporting logs for `%s`', export_id)
storage.cancel_chunked_upload(storage.preferred_locations, upload_id, upload_metadata)
self._report_results(job_details, ExportResult.FAILED_EXPORT)
raise
# Invoke the callbacks.
export_url = storage.get_direct_download_url(storage.preferred_locations, export_storage_path,
expires_in=EXPORTED_LOGS_EXPIRATION_SECONDS)
if export_url is None:
export_url = '%s/exportedlogs/%s' % (get_app_url(), exported_filename)
self._report_results(job_details, ExportResult.SUCCESSFUL_EXPORT, export_url)
def _stream_logs(self, upload_id, upload_metadata, uploaded_byte_count, logs_iterator,
job_details, storage):
export_id = job_details['export_id']
max_work_period = timedelta(seconds=MAXIMUM_WORK_PERIOD_SECONDS)
batch_start_time = datetime.utcnow()
try:
for logs in logs_iterator:
work_elapsed = datetime.utcnow() - batch_start_time
if work_elapsed > max_work_period:
logger.error('Retrieval of logs `%s` timed out with time of `%s`',
export_id, work_elapsed)
self._report_results(job_details, ExportResult.OPERATION_TIMEDOUT)
return None, None
logs_data = ''
if logs:
logs_data = ','.join([json.dumps(log.to_dict(avatar, False)) for log in logs]) + ','
logs_data = logs_data.encode('utf-8')
if logs_data:
_, new_metadata, upload_error = storage.stream_upload_chunk(storage.preferred_locations,
upload_id,
0,
-1,
BytesIO(logs_data),
upload_metadata)
if upload_error is not None:
logger.error('Got an error when writing chunk: %s', upload_error)
return upload_metadata, None
upload_metadata = new_metadata
uploaded_byte_count += len(logs_data)
except LogsIterationTimeout:
logger.error('Retrieval of logs for export logs timed out at `%s`', work_elapsed)
self._report_results(job_details, ExportResult.OPERATION_TIMEDOUT)
return upload_metadata, None
return upload_metadata, uploaded_byte_count
def _report_results(self, job_details, result_status, exported_data_url=None):
logger.info('Reporting result of `%s` for %s; %s', result_status, job_details,
exported_data_url)
if job_details.get('callback_url'):
# Post the results to the callback URL.
client = app.config['HTTPCLIENT']
result = client.post(job_details['callback_url'], json={
'export_id': job_details['export_id'],
'start_time': job_details['start_time'],
'end_time': job_details['end_time'],
'namespace': job_details['namespace_name'],
'repository': job_details['repository_name'],
'exported_data_url': exported_data_url,
'status': result_status.value,
})
if result.status_code != 200:
logger.error('Got `%s` status code for callback URL `%s` for export `%s`',
result.status_code, job_details['callback_url'],
job_details['export_id'])
raise Exception('Got non-200 for batch logs reporting; retrying later')
if job_details.get('callback_email'):
with app.app_context():
send_logs_exported_email(job_details['callback_email'], job_details['export_id'],
result_status.value, exported_data_url,
EXPORTED_LOGS_EXPIRATION_SECONDS)
def _parse_time(specified_time):
try:
return datetime.strptime(specified_time + ' UTC', '%m/%d/%Y %Z')
except ValueError:
return None
if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not features.LOG_EXPORT:
logger.debug('Log export not enabled; skipping')
while True:
time.sleep(100000)
logger.debug('Starting export action logs worker')
worker = ExportActionLogsWorker(export_action_logs_queue,
poll_period_seconds=POLL_PERIOD_SECONDS)
worker.start()

0
workers/gc/__init__.py Normal file
View file

49
workers/gc/gcworker.py Normal file
View file

@ -0,0 +1,49 @@
import logging
import time
import features
from app import app
from data.database import UseThenDisconnect
from data.model.repository import find_repository_with_garbage, get_random_gc_policy
from data.model.gc import garbage_collect_repo
from workers.worker import Worker
logger = logging.getLogger(__name__)
class GarbageCollectionWorker(Worker):
def __init__(self):
super(GarbageCollectionWorker, self).__init__()
self.add_operation(self._garbage_collection_repos,
app.config.get('GARBAGE_COLLECTION_FREQUENCY', 30))
def _garbage_collection_repos(self):
""" Performs garbage collection on repositories. """
with UseThenDisconnect(app.config):
repository = find_repository_with_garbage(get_random_gc_policy())
if repository is None:
logger.debug('No repository with garbage found')
return
assert features.GARBAGE_COLLECTION
logger.debug('Starting GC of repository #%s (%s)', repository.id, repository.name)
garbage_collect_repo(repository)
logger.debug('Finished GC of repository #%s (%s)', repository.id, repository.name)
if __name__ == "__main__":
if not features.GARBAGE_COLLECTION:
logger.debug('Garbage collection is disabled; skipping')
while True:
time.sleep(100000)
if ((app.config.get('V3_UPGRADE_MODE') == 'production-transition') or
(app.config.get('V3_UPGRADE_MODE') == 'post-oci-rollout') or
(app.config.get('V3_UPGRADE_MODE') == 'post-oci-roll-back-compat')):
logger.debug('GC worker disabled for production transition; skipping')
while True:
time.sleep(100000)
worker = GarbageCollectionWorker()
worker.start()

View file

@ -0,0 +1,8 @@
from workers.gc.gcworker import GarbageCollectionWorker
from test.fixtures import *
def test_gc(initialized_db):
worker = GarbageCollectionWorker()
worker._garbage_collection_repos()

View file

View file

@ -0,0 +1,58 @@
import logging
import time
from app import app, metric_queue
from data.database import UseThenDisconnect
from workers.globalpromstats.models_pre_oci import pre_oci_model as model
from util.locking import GlobalLock, LockNotAcquiredException
from util.log import logfile_path
from workers.worker import Worker
logger = logging.getLogger(__name__)
WORKER_FREQUENCY = app.config.get('GLOBAL_PROMETHEUS_STATS_FREQUENCY', 60 * 60)
class GlobalPrometheusStatsWorker(Worker):
""" Worker which reports global stats (# of users, orgs, repos, etc) to Prometheus periodically.
"""
def __init__(self):
super(GlobalPrometheusStatsWorker, self).__init__()
self.add_operation(self._try_report_stats, WORKER_FREQUENCY)
def _try_report_stats(self):
logger.debug('Attempting to report stats')
try:
with GlobalLock('GLOBAL_PROM_STATS'):
self._report_stats()
except LockNotAcquiredException:
logger.debug('Could not acquire global lock for global prometheus stats')
return
def _report_stats(self):
logger.debug('Reporting global stats')
with UseThenDisconnect(app.config):
# Repository count.
metric_queue.repository_count.Set(model.get_repository_count())
# User counts.
metric_queue.user_count.Set(model.get_active_user_count())
metric_queue.org_count.Set(model.get_active_org_count())
metric_queue.robot_count.Set(model.get_robot_count())
def main():
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not app.config.get('PROMETHEUS_AGGREGATOR_URL'):
logger.debug('Prometheus not enabled; skipping global stats reporting')
while True:
time.sleep(100000)
worker = GlobalPrometheusStatsWorker()
worker.start()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,27 @@
from abc import ABCMeta, abstractmethod
from six import add_metaclass
@add_metaclass(ABCMeta)
class GlobalPromStatsWorkerDataInterface(object):
"""
Interface that represents all data store interactions required by the global prom stats worker.
"""
@abstractmethod
def get_repository_count(self):
""" Returns the number of repositories in the database. """
pass
@abstractmethod
def get_active_user_count(self):
""" Returns the number of active users in the database. """
pass
@abstractmethod
def get_active_org_count(self):
""" Returns the number of active organizations in the database. """
pass
@abstractmethod
def get_robot_count(self):
""" Returns the number of robots in the database. """
pass

View file

@ -0,0 +1,18 @@
from data import model
from workers.globalpromstats.models_interface import GlobalPromStatsWorkerDataInterface
class PreOCIModel(GlobalPromStatsWorkerDataInterface):
def get_repository_count(self):
return model.repository.get_repository_count()
def get_active_user_count(self):
return model.user.get_active_user_count()
def get_active_org_count(self):
return model.organization.get_active_org_count()
def get_robot_count(self):
return model.user.get_robot_count()
pre_oci_model = PreOCIModel()

View file

@ -0,0 +1,15 @@
from mock import patch, Mock
from workers.globalpromstats.globalpromstats import GlobalPrometheusStatsWorker
from test.fixtures import *
def test_reportstats(initialized_db):
mock = Mock()
with patch('workers.globalpromstats.globalpromstats.metric_queue', mock):
worker = GlobalPrometheusStatsWorker()
worker._report_stats()
mock.repository_count.Set.assert_called_once()
mock.org_count.Set.assert_called_once()
mock.robot_count.Set.assert_called_once()

View file

@ -0,0 +1,120 @@
import logging
import logging.config
import time
from peewee import JOIN, fn, IntegrityError
from app import app
from data.database import (UseThenDisconnect, TagManifestLabel, TagManifestLabelMap,
TagManifestToManifest, ManifestLabel, db_transaction)
from workers.worker import Worker
from util.log import logfile_path
from util.migrate.allocator import yield_random_entries
logger = logging.getLogger(__name__)
WORKER_TIMEOUT = 600
class LabelBackfillWorker(Worker):
def __init__(self):
super(LabelBackfillWorker, self).__init__()
self.add_operation(self._backfill_labels, WORKER_TIMEOUT)
def _candidates_to_backfill(self):
def missing_tmt_query():
return (TagManifestLabel
.select()
.join(TagManifestLabelMap, JOIN.LEFT_OUTER)
.where(TagManifestLabelMap.id >> None))
min_id = (TagManifestLabel
.select(fn.Min(TagManifestLabel.id))
.join(TagManifestLabelMap, JOIN.LEFT_OUTER)
.where(TagManifestLabelMap.id >> None)
.scalar())
max_id = TagManifestLabel.select(fn.Max(TagManifestLabel.id)).scalar()
iterator = yield_random_entries(
missing_tmt_query,
TagManifestLabel.id,
100,
max_id,
min_id,
)
return iterator
def _backfill_labels(self):
with UseThenDisconnect(app.config):
iterator = self._candidates_to_backfill()
if iterator is None:
logger.debug('Found no additional labels to backfill')
time.sleep(10000)
return None
for candidate, abt, _ in iterator:
if not backfill_label(candidate):
logger.info('Another worker pre-empted us for label: %s', candidate.id)
abt.set()
def lookup_map_row(tag_manifest_label):
try:
TagManifestLabelMap.get(tag_manifest_label=tag_manifest_label)
return True
except TagManifestLabelMap.DoesNotExist:
return False
def backfill_label(tag_manifest_label):
logger.info('Backfilling label %s', tag_manifest_label.id)
# Ensure that a mapping row doesn't already exist. If it does, we've been preempted.
if lookup_map_row(tag_manifest_label):
return False
# Ensure the tag manifest has been backfilled into the manifest table.
try:
tmt = TagManifestToManifest.get(tag_manifest=tag_manifest_label.annotated)
except TagManifestToManifest.DoesNotExist:
# We'll come back to this later.
logger.debug('Tag Manifest %s for label %s has not yet been backfilled',
tag_manifest_label.annotated.id, tag_manifest_label.id)
return True
repository = tag_manifest_label.repository
# Create the new mapping entry and label.
with db_transaction():
if lookup_map_row(tag_manifest_label):
return False
label = tag_manifest_label.label
if tmt.manifest:
try:
manifest_label = ManifestLabel.create(manifest=tmt.manifest, label=label,
repository=repository)
TagManifestLabelMap.create(manifest_label=manifest_label,
tag_manifest_label=tag_manifest_label,
label=label,
manifest=tmt.manifest,
tag_manifest=tag_manifest_label.annotated)
except IntegrityError:
return False
logger.info('Backfilled label %s', tag_manifest_label.id)
return True
if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not app.config.get('BACKFILL_TAG_MANIFEST_LABELS', False):
logger.debug('Manifest label backfill disabled; skipping')
while True:
time.sleep(100000)
worker = LabelBackfillWorker()
worker.start()

121
workers/logrotateworker.py Normal file
View file

@ -0,0 +1,121 @@
import logging
import json
import time
from datetime import datetime
from gzip import GzipFile
from tempfile import SpooledTemporaryFile
import features
from app import app, storage
from data.logs_model import logs_model
from data.userfiles import DelegateUserfiles
from util.locking import GlobalLock, LockNotAcquiredException
from util.log import logfile_path
from util.streamingjsonencoder import StreamingJSONEncoder
from util.timedeltastring import convert_to_timedelta
from workers.worker import Worker
logger = logging.getLogger(__name__)
JSON_MIMETYPE = 'application/json'
MIN_LOGS_PER_ROTATION = 5000
MEMORY_TEMPFILE_SIZE = 12 * 1024 * 1024
WORKER_FREQUENCY = app.config.get('ACTION_LOG_ROTATION_FREQUENCY', 60 * 60 * 12)
STALE_AFTER = convert_to_timedelta(app.config.get('ACTION_LOG_ROTATION_THRESHOLD', '30d'))
MINIMUM_LOGS_AGE_FOR_ARCHIVE = convert_to_timedelta(app.config.get('MINIMUM_LOGS_AGE_FOR_ARCHIVE', '7d'))
SAVE_PATH = app.config.get('ACTION_LOG_ARCHIVE_PATH')
SAVE_LOCATION = app.config.get('ACTION_LOG_ARCHIVE_LOCATION')
class LogRotateWorker(Worker):
""" Worker used to rotate old logs out the database and into storage. """
def __init__(self):
super(LogRotateWorker, self).__init__()
self.add_operation(self._archive_logs, WORKER_FREQUENCY)
def _archive_logs(self):
cutoff_date = datetime.now() - STALE_AFTER
try:
with GlobalLock('ACTION_LOG_ROTATION'):
self._perform_archiving(cutoff_date)
except LockNotAcquiredException:
return
def _perform_archiving(self, cutoff_date):
assert datetime.now() - cutoff_date >= MINIMUM_LOGS_AGE_FOR_ARCHIVE
archived_files = []
save_location = SAVE_LOCATION
if not save_location:
# Pick the *same* save location for all instances. This is a fallback if
# a location was not configured.
save_location = storage.locations[0]
log_archive = DelegateUserfiles(app, storage, save_location, SAVE_PATH)
for log_rotation_context in logs_model.yield_log_rotation_context(cutoff_date,
MIN_LOGS_PER_ROTATION):
with log_rotation_context as context:
for logs, filename in context.yield_logs_batch():
formatted_logs = [log_dict(log) for log in logs]
logger.debug('Archiving logs rotation %s', filename)
_write_logs(filename, formatted_logs, log_archive)
logger.debug('Finished archiving logs to %s', filename)
archived_files.append(filename)
return archived_files
def log_dict(log):
""" Pretty prints a LogEntry in JSON. """
try:
metadata_json = json.loads(str(log.metadata_json))
except ValueError:
# The results returned by querying Elasticsearch does not have
# a top-level attribute `id` like when querying with Peewee.
# `random_id` is a copy of the document's `_id`.
logger.exception('Could not parse metadata JSON for log entry %s',
log.id if hasattr(log, 'id') else log.random_id)
metadata_json = {'__raw': log.metadata_json}
except TypeError:
logger.exception('Could not parse metadata JSON for log entry %s',
log.id if hasattr(log, 'id') else log.random_id)
metadata_json = {'__raw': log.metadata_json}
return {
'kind_id': log.kind_id,
'account_id': log.account_id,
'performer_id': log.performer_id,
'repository_id': log.repository_id,
'datetime': str(log.datetime),
'ip': str(log.ip),
'metadata_json': metadata_json,
}
def _write_logs(filename, logs, log_archive):
with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
with GzipFile('temp_action_log_rotate', fileobj=tempfile, compresslevel=1) as zipstream:
for chunk in StreamingJSONEncoder().iterencode(logs):
zipstream.write(chunk)
tempfile.seek(0)
log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip', file_id=filename)
def main():
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not features.ACTION_LOG_ROTATION or None in [SAVE_PATH, SAVE_LOCATION]:
logger.debug('Action log rotation worker not enabled; skipping')
while True:
time.sleep(100000)
worker = LogRotateWorker()
worker.start()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,29 @@
import logging
from app import namespace_gc_queue, all_queues
from data import model
from workers.queueworker import QueueWorker
from util.log import logfile_path
logger = logging.getLogger(__name__)
POLL_PERIOD_SECONDS = 60
NAMESPACE_GC_TIMEOUT = 60 * 15 # 15 minutes
class NamespaceGCWorker(QueueWorker):
""" Worker which cleans up namespaces enqueued to be GCed.
"""
def process_queue_item(self, job_details):
logger.debug('Got namespace GC queue item: %s', job_details)
marker_id = job_details['marker_id']
model.user.delete_namespace_via_marker(marker_id, all_queues)
if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
logger.debug('Starting namespace GC worker')
worker = NamespaceGCWorker(namespace_gc_queue,
poll_period_seconds=POLL_PERIOD_SECONDS,
reservation_seconds=NAMESPACE_GC_TIMEOUT)
worker.start()

View file

View file

@ -0,0 +1,50 @@
from abc import ABCMeta, abstractmethod
from collections import namedtuple
from six import add_metaclass
class Repository(namedtuple('Repository', ['namespace_name', 'name'])):
"""
Repository represents a repository.
"""
class Notification(
namedtuple('Notification', [
'uuid', 'event_name', 'method_name', 'event_config_dict', 'method_config_dict',
'repository'])):
"""
Notification represents a registered notification of some kind.
"""
@add_metaclass(ABCMeta)
class NotificationWorkerDataInterface(object):
"""
Interface that represents all data store interactions required by the notification worker.
"""
@abstractmethod
def get_enabled_notification(self, notification_uuid):
""" Returns an *enabled* notification with the given UUID, or None if none. """
pass
@abstractmethod
def reset_number_of_failures_to_zero(self, notification):
""" Resets the number of failures for the given notification back to zero. """
pass
@abstractmethod
def increment_notification_failure_count(self, notification):
""" Increments the number of failures on the given notification. """
pass
@abstractmethod
def create_notification_for_testing(self, target_username, method_name=None, method_config=None):
""" Creates a notification for testing. """
pass
@abstractmethod
def user_has_local_notifications(self, target_username):
""" Returns whether there are any Quay-local notifications for the given user. """
pass

View file

@ -0,0 +1,50 @@
import json
from data import model
from workers.notificationworker.models_interface import (
NotificationWorkerDataInterface, Notification, Repository)
def notification(notification_row):
""" Converts the given notification row into a notification tuple. """
return Notification(uuid=notification_row.uuid, event_name=notification_row.event.name,
method_name=notification_row.method.name,
event_config_dict=json.loads(notification_row.event_config_json or '{}'),
method_config_dict=json.loads(notification_row.config_json or '{}'),
repository=Repository(notification_row.repository.namespace_user.username,
notification_row.repository.name))
class PreOCIModel(NotificationWorkerDataInterface):
def get_enabled_notification(self, notification_uuid):
try:
notification_row = model.notification.get_enabled_notification(notification_uuid)
except model.InvalidNotificationException:
return None
return notification(notification_row)
def reset_number_of_failures_to_zero(self, notification):
model.notification.reset_notification_number_of_failures(
notification.repository.namespace_name, notification.repository.name, notification.uuid)
def increment_notification_failure_count(self, notification):
model.notification.increment_notification_failure_count(notification.uuid)
def create_notification_for_testing(self, target_username, method_name='quay_notification',
method_config=None):
repo = model.repository.get_repository('devtable', 'simple')
method_data = method_config or {
'target': {
'kind': 'user',
'name': target_username,
}
}
notification = model.notification.create_repo_notification(repo, 'repo_push',
method_name, method_data, {})
return notification.uuid
def user_has_local_notifications(self, target_username):
user = model.user.get_namespace_user(target_username)
return bool(list(model.notification.list_notifications(user)))
pre_oci_model = PreOCIModel()

View file

@ -0,0 +1,43 @@
import logging
from app import notification_queue
from notifications.notificationmethod import NotificationMethod, InvalidNotificationMethodException
from notifications.notificationevent import NotificationEvent, InvalidNotificationEventException
from workers.notificationworker.models_pre_oci import pre_oci_model as model
from workers.queueworker import QueueWorker, JobException
logger = logging.getLogger(__name__)
class NotificationWorker(QueueWorker):
def process_queue_item(self, job_details):
notification = model.get_enabled_notification(job_details['notification_uuid'])
if notification is None:
return
event_name = notification.event_name
method_name = notification.method_name
try:
event_handler = NotificationEvent.get_event(event_name)
method_handler = NotificationMethod.get_method(method_name)
except InvalidNotificationMethodException as ex:
logger.exception('Cannot find notification method: %s', ex.message)
raise JobException('Cannot find notification method: %s' % ex.message)
except InvalidNotificationEventException as ex:
logger.exception('Cannot find notification event: %s', ex.message)
raise JobException('Cannot find notification event: %s' % ex.message)
if event_handler.should_perform(job_details['event_data'], notification):
try:
method_handler.perform(notification, event_handler, job_details)
model.reset_number_of_failures_to_zero(notification)
except (JobException, KeyError) as exc:
model.increment_notification_failure_count(notification)
raise exc
if __name__ == "__main__":
worker = NotificationWorker(notification_queue, poll_period_seconds=10, reservation_seconds=30,
retry_after_seconds=30)
worker.start()

View file

@ -0,0 +1,72 @@
import pytest
from mock import patch, Mock
from httmock import urlmatch, HTTMock
from notifications.notificationmethod import (QuayNotificationMethod, EmailMethod, WebhookMethod,
FlowdockMethod, HipchatMethod, SlackMethod,
CannotValidateNotificationMethodException)
from notifications.notificationevent import RepoPushEvent
from notifications.models_interface import Repository
from workers.notificationworker.notificationworker import NotificationWorker
from test.fixtures import *
from workers.notificationworker.models_pre_oci import pre_oci_model as model
def test_basic_notification_endtoend(initialized_db):
# Ensure the public user doesn't have any notifications.
assert not model.user_has_local_notifications('public')
# Add a basic build notification.
notification_uuid = model.create_notification_for_testing('public')
event_data = {}
# Fire off the queue processing.
worker = NotificationWorker(None)
worker.process_queue_item({
'notification_uuid': notification_uuid,
'event_data': event_data,
})
# Ensure the notification was handled.
assert model.user_has_local_notifications('public')
@pytest.mark.parametrize('method,method_config,netloc', [
(QuayNotificationMethod, {'target': {'name': 'devtable', 'kind': 'user'}}, None),
(EmailMethod, {'email': 'jschorr@devtable.com'}, None),
(WebhookMethod, {'url': 'http://example.com'}, 'example.com'),
(FlowdockMethod, {'flow_api_token': 'sometoken'}, 'api.flowdock.com'),
(HipchatMethod, {'notification_token': 'token', 'room_id': 'foo'}, 'api.hipchat.com'),
(SlackMethod, {'url': 'http://example.com'}, 'example.com'),
])
def test_notifications(method, method_config, netloc, initialized_db):
url_hit = [False]
@urlmatch(netloc=netloc)
def url_handler(_, __):
url_hit[0] = True
return ''
mock = Mock()
def get_mock(*args, **kwargs):
return mock
with patch('notifications.notificationmethod.Message', get_mock):
with HTTMock(url_handler):
# Add a basic build notification.
notification_uuid = model.create_notification_for_testing('public',
method_name=method.method_name(),
method_config=method_config)
event_data = RepoPushEvent().get_sample_data('devtable', 'simple', {})
# Fire off the queue processing.
worker = NotificationWorker(None)
worker.process_queue_item({
'notification_uuid': notification_uuid,
'event_data': event_data,
'performer_data': {},
})
if netloc is not None:
assert url_hit[0]

View file

@ -0,0 +1,38 @@
import logging
from datetime import timedelta, datetime
from app import app
from data.database import UseThenDisconnect
from data.queue import delete_expired
from workers.worker import Worker
logger = logging.getLogger(__name__)
DELETION_DATE_THRESHOLD = timedelta(days=1)
DELETION_COUNT_THRESHOLD = 50
BATCH_SIZE = 500
QUEUE_CLEANUP_FREQUENCY = app.config.get('QUEUE_CLEANUP_FREQUENCY', 60*60*24)
class QueueCleanupWorker(Worker):
def __init__(self):
super(QueueCleanupWorker, self).__init__()
self.add_operation(self._cleanup_queue, QUEUE_CLEANUP_FREQUENCY)
def _cleanup_queue(self):
""" Performs garbage collection on the queueitem table. """
with UseThenDisconnect(app.config):
while True:
# Find all queue items older than the threshold (typically a week) and delete them.
expiration_threshold = datetime.now() - DELETION_DATE_THRESHOLD
deleted_count = delete_expired(expiration_threshold, DELETION_COUNT_THRESHOLD, BATCH_SIZE)
if deleted_count == 0:
return
if __name__ == "__main__":
worker = QueueCleanupWorker()
worker.start()

136
workers/queueworker.py Normal file
View file

@ -0,0 +1,136 @@
import logging
import json
from threading import Event, Lock
from app import app
from data.database import CloseForLongOperation
from workers.worker import Worker
logger = logging.getLogger(__name__)
class JobException(Exception):
""" A job exception is an exception that is caused by something being malformed in the job. When
a worker raises this exception the job will be terminated and the retry will not be returned
to the queue. """
pass
class WorkerUnhealthyException(Exception):
""" When this exception is raised, the worker is no longer healthy and will not accept any more
work. When this is raised while processing a queue item, the item should be returned to the
queue along with another retry. """
pass
class QueueWorker(Worker):
def __init__(self, queue, poll_period_seconds=30, reservation_seconds=300,
watchdog_period_seconds=60, retry_after_seconds=300):
super(QueueWorker, self).__init__()
self._poll_period_seconds = poll_period_seconds
self._reservation_seconds = reservation_seconds
self._watchdog_period_seconds = watchdog_period_seconds
self._retry_after_seconds = retry_after_seconds
self._stop = Event()
self._terminated = Event()
self._queue = queue
self._current_item_lock = Lock()
self.current_queue_item = None
# Add the various operations.
self.add_operation(self.poll_queue, self._poll_period_seconds)
self.add_operation(self.update_queue_metrics,
app.config['QUEUE_WORKER_METRICS_REFRESH_SECONDS'])
self.add_operation(self.run_watchdog, self._watchdog_period_seconds)
def process_queue_item(self, job_details):
""" Processes the work for the given job. If the job fails and should be retried,
this method should raise a WorkerUnhealthyException. If the job should be marked
as permanently failed, it should raise a JobException. Otherwise, a successful return
of this method will remove the job from the queue as completed.
"""
raise NotImplementedError('Workers must implement run.')
def watchdog(self):
""" Function that gets run once every watchdog_period_seconds. """
pass
def extend_processing(self, seconds_from_now, updated_data=None):
with self._current_item_lock:
if self.current_queue_item is not None:
self._queue.extend_processing(self.current_queue_item, seconds_from_now,
updated_data=updated_data)
def run_watchdog(self):
logger.debug('Running watchdog.')
try:
self.watchdog()
except WorkerUnhealthyException as exc:
logger.error('The worker has encountered an error via watchdog and will not take new jobs')
logger.error(exc.message)
self.mark_current_incomplete(restore_retry=True)
self._stop.set()
def poll_queue(self):
logger.debug('Getting work item from queue.')
with self._current_item_lock:
self.current_queue_item = self._queue.get(processing_time=self._reservation_seconds)
while True:
# Retrieve the current item in the queue over which to operate. We do so under
# a lock to make sure we are always retrieving an item when in a healthy state.
current_queue_item = None
with self._current_item_lock:
current_queue_item = self.current_queue_item
if current_queue_item is None:
break
logger.debug('Queue gave us some work: %s', current_queue_item.body)
job_details = json.loads(current_queue_item.body)
try:
with CloseForLongOperation(app.config):
self.process_queue_item(job_details)
self.mark_current_complete()
except JobException as jex:
logger.warning('An error occurred processing request: %s', current_queue_item.body)
logger.warning('Job exception: %s', jex)
self.mark_current_incomplete(restore_retry=False)
except WorkerUnhealthyException as exc:
logger.error('The worker has encountered an error via the job and will not take new jobs')
logger.error(exc.message)
self.mark_current_incomplete(restore_retry=True)
self._stop.set()
if not self._stop.is_set():
with self._current_item_lock:
self.current_queue_item = self._queue.get(processing_time=self._reservation_seconds)
if not self._stop.is_set():
logger.debug('No more work.')
def update_queue_metrics(self):
self._queue.update_metrics()
def mark_current_incomplete(self, restore_retry=False):
with self._current_item_lock:
if self.current_queue_item is not None:
self._queue.incomplete(self.current_queue_item, restore_retry=restore_retry,
retry_after=self._retry_after_seconds)
self.current_queue_item = None
def mark_current_complete(self):
with self._current_item_lock:
if self.current_queue_item is not None:
self._queue.complete(self.current_queue_item)
self.current_queue_item = None
def ungracefully_terminated(self):
# Give back the retry that we took for this queue item so that if it were down to zero
# retries it will still be picked up by another worker
self.mark_current_incomplete()

View file

@ -0,0 +1,280 @@
import os
import re
import traceback
import fnmatch
import logging.config
import features
from app import app, prometheus
from data import database
from data.model.repo_mirror import claim_mirror, release_mirror
from data.logs_model import logs_model
from data.registry_model import registry_model
from data.database import RepoMirrorStatus
from data.model.oci.tag import delete_tag, retarget_tag, lookup_alive_tags_shallow
from notifications import spawn_notification
from util.audit import wrap_repository
from workers.repomirrorworker.repo_mirror_model import repo_mirror_model as model
logger = logging.getLogger(__name__)
unmirrored_repositories_gauge = prometheus.create_gauge('unmirrored_repositories',
'Number of repositories that need to be scanned.')
class PreemptedException(Exception):
""" Exception raised if another worker analyzed the image before this worker was able to do so.
"""
class RepoMirrorSkopeoException(Exception):
""" Exception from skopeo
"""
def __init__(self, message, stdout, stderr):
self.message = message
self.stdout = stdout
self.stderr = stderr
def process_mirrors(skopeo, token=None):
""" Performs mirroring of repositories whose last sync time is greater than sync interval.
If a token is provided, scanning will begin where the token indicates it previously completed.
"""
if not features.REPO_MIRROR:
logger.debug('Repository mirror disabled; skipping RepoMirrorWorker process_mirrors')
return None
iterator, next_token = model.repositories_to_mirror(start_token=token)
if iterator is None:
logger.debug('Found no additional repositories to mirror')
return next_token
with database.UseThenDisconnect(app.config):
for mirror, abt, num_remaining in iterator:
try:
perform_mirror(skopeo, mirror)
except PreemptedException:
logger.info('Another repository mirror worker pre-empted us for repository: %s', mirror.id)
abt.set()
except Exception as e: # TODO: define exceptions
logger.exception('Repository Mirror service unavailable')
return None
unmirrored_repositories_gauge.Set(num_remaining)
return next_token
def perform_mirror(skopeo, mirror):
"""Run mirror on all matching tags of remote repository."""
if os.getenv('DEBUGLOG', 'false').lower() == 'true':
verbose_logs = True
else:
verbose_logs = False
mirror = claim_mirror(mirror)
if (mirror == None):
raise PreemptedException
emit_log(mirror, "repo_mirror_sync_started", "start", "'%s' with tag pattern '%s'" % (mirror.external_reference,
",".join(mirror.root_rule.rule_value)))
# Fetch the tags to mirror, being careful to handle exceptions. The 'Exception' is safety net only, allowing
# easy communication by user through bug report.
tags = []
try:
tags = tags_to_mirror(skopeo, mirror)
except RepoMirrorSkopeoException as e:
emit_log(mirror, "repo_mirror_sync_failed", "end",
"'%s' with tag pattern '%s': %s" % (mirror.external_reference, ",".join(mirror.root_rule.rule_value), e.message),
tags=", ".join(tags), stdout=e.stdout, stderr=e.stderr)
release_mirror(mirror, RepoMirrorStatus.FAIL)
return
except Exception as e:
emit_log(mirror, "repo_mirror_sync_failed", "end",
"'%s' with tag pattern '%s': INTERNAL ERROR" % (mirror.external_reference, ",".join(mirror.root_rule.rule_value)),
tags=", ".join(tags), stdout="Not applicable", stderr=traceback.format_exc(e))
release_mirror(mirror, RepoMirrorStatus.FAIL)
return
if tags == []:
emit_log(mirror, "repo_mirror_sync_success", "end",
"'%s' with tag pattern '%s'" % (mirror.external_reference, ",".join(mirror.root_rule.rule_value)),
tags="No tags matched")
release_mirror(mirror, RepoMirrorStatus.SUCCESS)
return
# Sync tags
now_ms = database.get_epoch_timestamp_ms()
overall_status = RepoMirrorStatus.SUCCESS
try:
delete_obsolete_tags(mirror, tags)
username = (mirror.external_registry_username.decrypt()
if mirror.external_registry_username else None)
password = (mirror.external_registry_password.decrypt()
if mirror.external_registry_password else None)
dest_server = app.config.get('REPO_MIRROR_SERVER_HOSTNAME', None) or app.config['SERVER_HOSTNAME']
for tag in tags:
src_image = "docker://%s:%s" % (mirror.external_reference, tag)
dest_image = "docker://%s/%s/%s:%s" % (dest_server,
mirror.repository.namespace_user.username,
mirror.repository.name, tag)
with database.CloseForLongOperation(app.config):
result = skopeo.copy(src_image, dest_image,
src_tls_verify=mirror.external_registry_config.get('tls_verify', True),
dest_tls_verify=app.config.get('REPO_MIRROR_TLS_VERIFY', True), # TODO: is this a config choice or something else?
src_username=username,
src_password=password,
dest_username=mirror.internal_robot.username,
dest_password=mirror.internal_robot.email,
proxy=mirror.external_registry_config.get('proxy', {}),
verbose_logs=verbose_logs)
if not result.success:
overall_status = RepoMirrorStatus.FAIL
emit_log(mirror, "repo_mirror_sync_tag_failed", "finish", "Source '%s' failed to sync" % src_image,
tag=tag, stdout=result.stdout, stderr=result.stderr)
logger.info("Source '%s' failed to sync." % src_image)
else:
emit_log(mirror, "repo_mirror_sync_tag_success", "finish", "Source '%s' successful sync" % src_image,
tag=tag, stdout=result.stdout, stderr=result.stderr)
logger.info("Source '%s' successful sync." % src_image)
mirror = claim_mirror(mirror)
if mirror is None:
emit_log(mirror, "repo_mirror_sync_failed", "lost",
"'%s' with tag pattern '%s'" % (mirror.external_reference, ",".join(mirror.root_rule.rule_value)))
except Exception as e:
overall_status = RepoMirrorStatus.FAIL
emit_log(mirror, "repo_mirror_sync_failed", "end",
"'%s' with tag pattern '%s': INTERNAL ERROR" % (mirror.external_reference, ",".join(mirror.root_rule.rule_value)),
tags=", ".join(tags), stdout="Not applicable", stderr=traceback.format_exc(e))
release_mirror(mirror, overall_status)
return
finally:
if overall_status == RepoMirrorStatus.FAIL:
emit_log(mirror, "repo_mirror_sync_failed", "lost",
"'%s' with tag pattern '%s'" % (mirror.external_reference, ",".join(mirror.root_rule.rule_value)))
rollback(mirror, now_ms)
else:
emit_log(mirror, "repo_mirror_sync_success", "end",
"'%s' with tag pattern '%s'" % (mirror.external_reference, ",".join(mirror.root_rule.rule_value)),
tags=", ".join(tags))
release_mirror(mirror, overall_status)
return overall_status
def tags_to_mirror(skopeo, mirror):
all_tags = get_all_tags(skopeo, mirror)
if all_tags == []:
return []
matching_tags = []
for pattern in mirror.root_rule.rule_value:
matching_tags = matching_tags + filter(lambda tag: fnmatch.fnmatch(tag, pattern), all_tags)
matching_tags = list(set(matching_tags))
matching_tags.sort()
return matching_tags
def get_all_tags(skopeo, mirror):
verbose_logs = os.getenv('DEBUGLOG', 'false').lower() == 'true'
username = (mirror.external_registry_username.decrypt()
if mirror.external_registry_username else None)
password = (mirror.external_registry_password.decrypt()
if mirror.external_registry_password else None)
with database.CloseForLongOperation(app.config):
result = skopeo.tags("docker://%s" % (mirror.external_reference),
mirror.root_rule.rule_value,
username=username,
password=password,
verbose_logs=verbose_logs,
tls_verify=mirror.external_registry_config.get('tls_verify', True),
proxy=mirror.external_registry_config.get('proxy', {}))
if not result.success:
raise RepoMirrorSkopeoException("skopeo inspect failed: %s" % _skopeo_inspect_failure(result),
result.stdout, result.stderr)
return result.tags
def _skopeo_inspect_failure(result):
"""
Custom processing of skopeo error messages for user friendly description
:param result: SkopeoResults object
:return: Message to display
"""
lines = result.stderr.split("\n")
for line in lines:
if re.match('.*Error reading manifest.*', line):
return "No matching tags, including 'latest', to inspect for tags list"
return "See output"
def rollback(mirror, since_ms):
"""
:param mirror: Mirror to perform rollback on
:param start_time: Time mirror was started; all changes after will be undone
:return:
"""
repository_ref = registry_model.lookup_repository(mirror.repository.namespace_user.username,
mirror.repository.name)
tags, has_more = registry_model.list_repository_tag_history(repository_ref, 1, 100, since_time_ms=since_ms)
for tag in tags:
logger.debug("Repo mirroring rollback tag '%s'" % tag)
# If the tag has an end time, it was either deleted or moved.
if tag.lifetime_end_ms:
# If a future entry exists with a start time equal to the end time for this tag,
# then the action was a move, rather than a delete and a create.
newer_tag = filter(lambda t: tag != t and tag.name == t.name and tag.lifetime_end_ms and
t.lifetime_start_ms == tag.lifetime_end_ms, tags)[0]
if (newer_tag):
logger.debug("Repo mirroring rollback revert tag '%s'" % tag)
retarget_tag(tag.name, tag.manifest._db_id, is_reversion=True)
else:
logger.debug("Repo mirroring recreate tag '%s'" % tag)
retarget_tag(tag.name, tag.manifest._db_id, is_reversion=True)
# If the tag has a start time, it was created.
elif tag.lifetime_start_ms:
logger.debug("Repo mirroring rollback delete tag '%s'" % tag)
delete_tag(mirror.repository, tag.name)
def delete_obsolete_tags(mirror, tags):
existing_tags = lookup_alive_tags_shallow(mirror.repository.id)
obsolete_tags = list(filter(lambda tag: tag.name not in tags, existing_tags))
for tag in obsolete_tags:
delete_tag(mirror.repository, tag.name)
return obsolete_tags
# TODO: better to call 'track_and_log()' https://jira.coreos.com/browse/QUAY-1821
def emit_log(mirror, log_kind, verb, message, tag=None, tags=None, stdout=None, stderr=None):
logs_model.log_action(log_kind, namespace_name=mirror.repository.namespace_user.username,
repository_name=mirror.repository.name,
metadata={"verb": verb,
"namespace": mirror.repository.namespace_user.username,
"repo": mirror.repository.name,
"message": message,
"tag": tag,
"tags": tags,
"stdout": stdout, "stderr": stderr})
if log_kind in ("repo_mirror_sync_started", "repo_mirror_sync_failed", "repo_mirror_sync_success"):
spawn_notification(wrap_repository(mirror.repository), log_kind, {'message': message})

View file

@ -0,0 +1,25 @@
from abc import ABCMeta, abstractmethod
from collections import namedtuple
from six import add_metaclass
class RepoMirrorToken(namedtuple('NextRepoMirrorToken', ['min_id'])):
"""
RepoMirrorToken represents an opaque token that can be passed between runs of the repository
mirror worker to continue mirroring whereever the previous run left off. Note that the data of the
token is *opaque* to the repository mirror worker, and the worker should *not* pull any data out
or modify the token in any way.
"""
@add_metaclass(ABCMeta)
class RepoMirrorWorkerDataInterface(object):
@abstractmethod
def repositories_to_mirror(self, target_time, start_token=None):
"""
Returns a tuple consisting of an iterator of all the candidates to scan and a NextScanToken.
The iterator returns a tuple for each iteration consisting of the candidate Repository, the abort
signal, and the number of remaining candidates. If the iterator returned is None, there are
no candidates to process.
"""

View file

@ -0,0 +1,42 @@
from math import log10
from data.model.repo_mirror import (get_eligible_mirrors, get_max_id_for_repo_mirror_config,
get_min_id_for_repo_mirror_config)
from data.database import RepoMirrorConfig
from util.migrate.allocator import yield_random_entries
from workers.repomirrorworker.models_interface import (RepoMirrorToken, RepoMirrorWorkerDataInterface)
class RepoMirrorModel(RepoMirrorWorkerDataInterface):
def repositories_to_mirror(self, start_token=None):
def batch_query():
return get_eligible_mirrors()
# Find the minimum ID.
if start_token is not None:
min_id = start_token.min_id
else:
min_id = get_min_id_for_repo_mirror_config()
# Get the ID of the last repository mirror config. Will be None if there are none in the database.
max_id = get_max_id_for_repo_mirror_config()
if max_id is None:
return (None, None)
if min_id is None or min_id > max_id:
return (None, None)
# 4^log10(total) gives us a scalable batch size into the billions.
batch_size = int(4**log10(max(10, max_id - min_id)))
iterator = yield_random_entries(
batch_query,
RepoMirrorConfig.id,
batch_size,
max_id,
min_id)
return (iterator, RepoMirrorToken(max_id + 1))
repo_mirror_model = RepoMirrorModel()

View file

@ -0,0 +1,60 @@
import os
import logging.config
import time
import argparse
import features
from app import app, repo_mirror_api
from workers.worker import Worker
from workers.repomirrorworker import process_mirrors
from util.repomirror.validator import RepoMirrorConfigValidator
from util.repomirror.skopeomirror import SkopeoMirror
from util.log import logfile_path
logger = logging.getLogger(__name__)
DEFAULT_MIRROR_INTERVAL = 30
class RepoMirrorWorker(Worker):
def __init__(self):
super(RepoMirrorWorker, self).__init__()
RepoMirrorConfigValidator(app.config.get('FEATURE_REPO_MIRROR', False)).valid()
self._mirrorer = SkopeoMirror()
self._next_token = None
interval = app.config.get('REPO_MIRROR_INTERVAL', DEFAULT_MIRROR_INTERVAL)
self.add_operation(self._process_mirrors, interval)
def _process_mirrors(self):
while True:
assert app.config.get('FEATURE_REPO_MIRROR', False)
self._next_token = process_mirrors(self._mirrorer, self._next_token)
if self._next_token is None:
break
if __name__ == '__main__':
if os.getenv('PYDEV_DEBUG', None):
import pydevd
host, port = os.getenv('PYDEV_DEBUG').split(':')
pydevd.settrace(host, port=int(port), stdoutToServer=True, stderrToServer=True, suspend=False)
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
parser = argparse.ArgumentParser()
parser.add_argument('mode', metavar='MODE', type=str, nargs='?', default='',
choices=['mirror', ''])
args = parser.parse_args()
if not features.REPO_MIRROR:
logger.debug('Repository mirror disabled; skipping RepoMirrorWorker')
while True:
time.sleep(100000)
worker = RepoMirrorWorker()
worker.start()

View file

@ -0,0 +1,516 @@
import pytest
import mock
import json
from functools import wraps
from app import storage
from data.registry_model.blobuploader import upload_blob, BlobUploadSettings
from image.docker.schema2.manifest import DockerSchema2ManifestBuilder
from data.registry_model import registry_model
from data.model.test.test_repo_mirroring import create_mirror_repo_robot
from data.database import Manifest, RepoMirrorConfig, RepoMirrorStatus
from workers.repomirrorworker import delete_obsolete_tags
from workers.repomirrorworker.repomirrorworker import RepoMirrorWorker
from io import BytesIO
from data.model.image import find_create_or_link_image
from data.model.tag import create_or_update_tag_for_repo
from util.repomirror.skopeomirror import SkopeoResults, SkopeoMirror
from test.fixtures import *
def disable_existing_mirrors(func):
@wraps(func)
def wrapper(*args, **kwargs):
for mirror in RepoMirrorConfig.select():
mirror.is_enabled = False
mirror.save()
func(*args, **kwargs)
for mirror in RepoMirrorConfig.select():
mirror.is_enabled = True
mirror.save()
return wrapper
def _create_tag(repo, name):
repo_ref = registry_model.lookup_repository('mirror', 'repo')
with upload_blob(repo_ref, storage, BlobUploadSettings(500, 500, 500)) as upload:
app_config = {'TESTING': True}
config_json = json.dumps({
"config": {
"author": u"Repo Mirror",
},
"rootfs": {"type": "layers", "diff_ids": []},
"history": [
{
"created": "2019-07-30T18:37:09.284840891Z",
"created_by": "base",
"author": u"Repo Mirror",
},
],
})
upload.upload_chunk(app_config, BytesIO(config_json))
blob = upload.commit_to_blob(app_config)
builder = DockerSchema2ManifestBuilder()
builder.set_config_digest(blob.digest, blob.compressed_size)
builder.add_layer('sha256:abcd', 1234, urls=['http://hello/world'])
manifest = builder.build()
manifest, tag = registry_model.create_manifest_and_retarget_tag(repo_ref, manifest,
name, storage)
@disable_existing_mirrors
@mock.patch('util.repomirror.skopeomirror.SkopeoMirror.run_skopeo')
def test_successful_mirror(run_skopeo_mock, initialized_db, app):
"""
Basic test of successful mirror
"""
mirror, repo = create_mirror_repo_robot(["latest", "7.1"])
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:latest"],
"results": SkopeoResults(True, [], '{"RepoTags": ["latest"]}', "")
}, {
"args": [
"/usr/bin/skopeo", "copy",
"--src-tls-verify=True", "--dest-tls-verify=True",
"--dest-creds", "%s:%s" % (mirror.internal_robot.username, mirror.internal_robot.email),
u"docker://registry.example.com/namespace/repository:latest",
u"docker://localhost:5000/mirror/repo:latest"
],
"results": SkopeoResults(True, [], "stdout", "stderr")
}
]
def skopeo_test(args, proxy):
try:
skopeo_call = skopeo_calls.pop(0)
assert args == skopeo_call['args']
assert proxy == {}
return skopeo_call['results']
except Exception as e:
skopeo_calls.append(skopeo_call)
raise e
run_skopeo_mock.side_effect = skopeo_test
worker = RepoMirrorWorker()
worker._process_mirrors()
assert [] == skopeo_calls
@disable_existing_mirrors
@mock.patch('util.repomirror.skopeomirror.SkopeoMirror.run_skopeo')
def test_successful_disabled_sync_now(run_skopeo_mock, initialized_db, app):
"""
Disabled mirrors still allow "sync now"
"""
mirror, repo = create_mirror_repo_robot(["latest", "7.1"])
mirror.is_enabled = False
mirror.sync_status = RepoMirrorStatus.SYNC_NOW
mirror.save()
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:latest"],
"results": SkopeoResults(True, [], '{"RepoTags": ["latest"]}', "")
}, {
"args": [
"/usr/bin/skopeo", "copy",
"--src-tls-verify=True", "--dest-tls-verify=True",
"--dest-creds", "%s:%s" % (mirror.internal_robot.username, mirror.internal_robot.email),
u"docker://registry.example.com/namespace/repository:latest",
u"docker://localhost:5000/mirror/repo:latest"
],
"results": SkopeoResults(True, [], "stdout", "stderr")
}
]
def skopeo_test(args, proxy):
try:
skopeo_call = skopeo_calls.pop(0)
assert args == skopeo_call['args']
assert proxy == {}
return skopeo_call['results']
except Exception as e:
skopeo_calls.append(skopeo_call)
raise e
run_skopeo_mock.side_effect = skopeo_test
worker = RepoMirrorWorker()
worker._process_mirrors()
assert [] == skopeo_calls
@disable_existing_mirrors
@mock.patch('util.repomirror.skopeomirror.SkopeoMirror.run_skopeo')
def test_successful_mirror_verbose_logs(run_skopeo_mock, initialized_db, app, monkeypatch):
"""
Basic test of successful mirror with verbose logs turned on
"""
mirror, repo = create_mirror_repo_robot(["latest", "7.1"])
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "--debug", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:latest"],
"results": SkopeoResults(True, [], '{"RepoTags": ["latest"]}', '')
}, {
"args": [
"/usr/bin/skopeo", "--debug", "copy",
"--src-tls-verify=True", "--dest-tls-verify=True",
"--dest-creds", "%s:%s" % (mirror.internal_robot.username, mirror.internal_robot.email),
u"docker://registry.example.com/namespace/repository:latest",
u"docker://localhost:5000/mirror/repo:latest"
],
"results": SkopeoResults(True, [], 'Success', '')
}
]
def skopeo_test(args, proxy):
try:
skopeo_call = skopeo_calls.pop(0)
assert args == skopeo_call['args']
assert proxy == {}
return skopeo_call['results']
except Exception as e:
skopeo_calls.append(skopeo_call)
raise e
run_skopeo_mock.side_effect = skopeo_test
monkeypatch.setenv('DEBUGLOG', 'true')
worker = RepoMirrorWorker()
worker._process_mirrors()
assert [] == skopeo_calls
@disable_existing_mirrors
@mock.patch('util.repomirror.skopeomirror.SkopeoMirror.run_skopeo')
def test_rollback(run_skopeo_mock, initialized_db, app):
"""
Tags in the repo:
"updated" - this tag will be updated during the mirror
"removed" - this tag will be removed during the mirror
"created" - this tag will be created during the mirror
"""
mirror, repo = create_mirror_repo_robot(["updated", "created", "zzerror"])
_create_tag(repo, "updated")
_create_tag(repo, "deleted")
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:updated"],
"results": SkopeoResults(True, [], '{"RepoTags": ["latest", "updated", "created", "zzerror"]}', '')
}, {
"args": [
"/usr/bin/skopeo", "copy",
"--src-tls-verify=True", "--dest-tls-verify=True",
"--dest-creds", "%s:%s" % (mirror.internal_robot.username, mirror.internal_robot.email),
u"docker://registry.example.com/namespace/repository:created",
u"docker://localhost:5000/mirror/repo:created"
],
"results": SkopeoResults(True, [], 'Success', '')
}, {
"args": [
"/usr/bin/skopeo", "copy",
"--src-tls-verify=True", "--dest-tls-verify=True",
"--dest-creds", "%s:%s" % (mirror.internal_robot.username, mirror.internal_robot.email),
u"docker://registry.example.com/namespace/repository:updated",
u"docker://localhost:5000/mirror/repo:updated"
],
"results": SkopeoResults(True, [], 'Success', '')
}, {
"args": [
"/usr/bin/skopeo", "copy",
"--src-tls-verify=True", "--dest-tls-verify=True",
"--dest-creds", "%s:%s" % (mirror.internal_robot.username, mirror.internal_robot.email),
u"docker://registry.example.com/namespace/repository:zzerror",
u"docker://localhost:5000/mirror/repo:zzerror"
],
"results": SkopeoResults(False, [], '', 'ERROR')
}
]
def skopeo_test(args, proxy):
try:
skopeo_call = skopeo_calls.pop(0)
assert args == skopeo_call['args']
assert proxy == {}
if args[1] == "copy" and args[6].endswith(":updated"):
_create_tag(repo, "updated")
elif args[1] == "copy" and args[6].endswith(":created"):
_create_tag(repo, "created")
return skopeo_call['results']
except Exception as e:
skopeo_calls.append(skopeo_call)
raise e
run_skopeo_mock.side_effect = skopeo_test
worker = RepoMirrorWorker()
worker._process_mirrors()
assert [] == skopeo_calls
# TODO: how to assert tag.retarget_tag() and tag.delete_tag() called?
def test_remove_obsolete_tags(initialized_db):
"""
As part of the mirror, the set of tags on the remote repository is compared to the local
existing tags. Those not present on the remote are removed locally.
"""
mirror, repository = create_mirror_repo_robot(["updated", "created"], repo_name="removed")
manifest = Manifest.get()
image = find_create_or_link_image('removed', repository, None, {}, 'local_us')
tag = create_or_update_tag_for_repo(repository, 'oldtag', image.docker_image_id,
oci_manifest=manifest, reversion=True)
incoming_tags = ["one", "two"]
deleted_tags = delete_obsolete_tags(mirror, incoming_tags)
assert [tag.name for tag in deleted_tags] == [tag.name]
@disable_existing_mirrors
@mock.patch('util.repomirror.skopeomirror.SkopeoMirror.run_skopeo')
def test_mirror_config_server_hostname(run_skopeo_mock, initialized_db, app, monkeypatch):
"""
Set REPO_MIRROR_SERVER_HOSTNAME to override SERVER_HOSTNAME config
"""
mirror, repo = create_mirror_repo_robot(["latest", "7.1"])
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "--debug", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:latest"],
"results": SkopeoResults(True, [], '{"RepoTags": ["latest"]}', '')
}, {
"args": [
"/usr/bin/skopeo", "--debug", "copy",
"--src-tls-verify=True", "--dest-tls-verify=True",
"--dest-creds", "%s:%s" % (mirror.internal_robot.username, mirror.internal_robot.email),
u"docker://registry.example.com/namespace/repository:latest",
u"docker://config_server_hostname/mirror/repo:latest"
],
"results": SkopeoResults(True, [], 'Success', '')
}
]
def skopeo_test(args, proxy):
try:
skopeo_call = skopeo_calls.pop(0)
assert args == skopeo_call['args']
assert proxy == {}
return skopeo_call['results']
except Exception as e:
skopeo_calls.append(skopeo_call)
raise e
run_skopeo_mock.side_effect = skopeo_test
monkeypatch.setenv('DEBUGLOG', 'true')
with patch.dict('data.model.config.app_config', {'REPO_MIRROR_SERVER_HOSTNAME': 'config_server_hostname'}):
worker = RepoMirrorWorker()
worker._process_mirrors()
assert [] == skopeo_calls
@disable_existing_mirrors
@mock.patch('util.repomirror.skopeomirror.SkopeoMirror.run_skopeo')
def test_quote_params(run_skopeo_mock, initialized_db, app):
"""
Basic test of successful mirror
"""
mirror, repo = create_mirror_repo_robot(["latest", "7.1"])
mirror.external_reference = "& rm -rf /;/namespace/repository"
mirror.external_registry_username = "`rm -rf /`"
mirror.save()
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", "--creds", u"`rm -rf /`", u"'docker://& rm -rf /;/namespace/repository:latest'"],
"results": SkopeoResults(True, [], '{"RepoTags": ["latest"]}', "")
}, {
"args": [
"/usr/bin/skopeo", "copy",
"--src-tls-verify=True", "--dest-tls-verify=True",
"--dest-creds", "%s:%s" % (mirror.internal_robot.username, mirror.internal_robot.email),
"--src-creds", u"`rm -rf /`",
u"'docker://& rm -rf /;/namespace/repository:latest'",
u"docker://localhost:5000/mirror/repo:latest"
],
"results": SkopeoResults(True, [], "stdout", "stderr")
}
]
def skopeo_test(args, proxy):
try:
skopeo_call = skopeo_calls.pop(0)
assert args == skopeo_call['args']
assert proxy == {}
return skopeo_call['results']
except Exception as e:
skopeo_calls.append(skopeo_call)
raise e
run_skopeo_mock.side_effect = skopeo_test
worker = RepoMirrorWorker()
worker._process_mirrors()
assert [] == skopeo_calls
@disable_existing_mirrors
@mock.patch('util.repomirror.skopeomirror.SkopeoMirror.run_skopeo')
def test_quote_params_password(run_skopeo_mock, initialized_db, app):
"""
Basic test of successful mirror
"""
mirror, repo = create_mirror_repo_robot(["latest", "7.1"])
mirror.external_reference = "& rm -rf /;/namespace/repository"
mirror.external_registry_username = "`rm -rf /`"
mirror.external_registry_password = "\"\"$PATH\\\""
mirror.save()
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", "--creds", u"`rm -rf /`:\"\"$PATH\\\"", u"'docker://& rm -rf /;/namespace/repository:latest'"],
"results": SkopeoResults(True, [], '{"RepoTags": ["latest"]}', "")
}, {
"args": [
"/usr/bin/skopeo", "copy",
"--src-tls-verify=True", "--dest-tls-verify=True",
"--dest-creds", u"%s:%s" % (mirror.internal_robot.username, mirror.internal_robot.email),
"--src-creds", u"`rm -rf /`:\"\"$PATH\\\"",
u"'docker://& rm -rf /;/namespace/repository:latest'",
u"docker://localhost:5000/mirror/repo:latest"
],
"results": SkopeoResults(True, [], "stdout", "stderr")
}
]
def skopeo_test(args, proxy):
try:
skopeo_call = skopeo_calls.pop(0)
assert args == skopeo_call['args']
assert proxy == {}
return skopeo_call['results']
except Exception as e:
skopeo_calls.append(skopeo_call)
raise e
run_skopeo_mock.side_effect = skopeo_test
worker = RepoMirrorWorker()
worker._process_mirrors()
assert [] == skopeo_calls
@disable_existing_mirrors
@mock.patch('util.repomirror.skopeomirror.SkopeoMirror.run_skopeo')
def test_inspect_error_mirror(run_skopeo_mock, initialized_db, app):
"""
Test for no tag for skopeo inspect. The mirror is processed four times, asserting that the remaining syncs
decrement until next sync is bumped to the future, confirming the fourth is never processed.
"""
def skopeo_test(args, proxy):
try:
skopeo_call = skopeo_calls.pop(0)
assert args == skopeo_call['args']
assert proxy == {}
return skopeo_call['results']
except Exception as e:
skopeo_calls.append(skopeo_call)
raise e
run_skopeo_mock.side_effect = skopeo_test
worker = RepoMirrorWorker()
mirror, repo = create_mirror_repo_robot(["7.1"])
# Call number 1
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:7.1"],
"results": SkopeoResults(False, [], "", 'time="2019-09-18T13:29:40Z" level=fatal msg="Error reading manifest 7.1 in registry.example.com/namespace/repository: manifest unknown: manifest unknown"')
},
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:latest"],
"results": SkopeoResults(False, [], "", 'time="2019-09-18T13:29:40Z" level=fatal msg="Error reading manifest latest in registry.example.com/namespace/repository: manifest unknown: manifest unknown"')
}
]
worker._process_mirrors()
mirror = RepoMirrorConfig.get_by_id(mirror.id)
assert [] == skopeo_calls
assert 2 == mirror.sync_retries_remaining
# Call number 2
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:7.1"],
"results": SkopeoResults(False, [], "", 'time="2019-09-18T13:29:40Z" level=fatal msg="Error reading manifest 7.1 in registry.example.com/namespace/repository: manifest unknown: manifest unknown"')
},
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:latest"],
"results": SkopeoResults(False, [], "", 'time="2019-09-18T13:29:40Z" level=fatal msg="Error reading manifest latest in registry.example.com/namespace/repository: manifest unknown: manifest unknown"')
}
]
worker._process_mirrors()
mirror = RepoMirrorConfig.get_by_id(mirror.id)
assert [] == skopeo_calls
assert 1 == mirror.sync_retries_remaining
# Call number 3
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:7.1"],
"results": SkopeoResults(False, [], "", 'time="2019-09-18T13:29:40Z" level=fatal msg="Error reading manifest 7.1 in registry.example.com/namespace/repository: manifest unknown: manifest unknown"')
},
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:latest"],
"results": SkopeoResults(False, [], "", 'time="2019-09-18T13:29:40Z" level=fatal msg="Error reading manifest latest in registry.example.com/namespace/repository: manifest unknown: manifest unknown"')
}
]
worker._process_mirrors()
mirror = RepoMirrorConfig.get_by_id(mirror.id)
assert [] == skopeo_calls
assert 3 == mirror.sync_retries_remaining
# Call number 4
skopeo_calls = [
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:7.1"],
"results": SkopeoResults(False, [], "", 'time="2019-09-18T13:29:40Z" level=fatal msg="Error reading manifest 7.1 in registry.example.com/namespace/repository: manifest unknown: manifest unknown"')
},
{
"args": ["/usr/bin/skopeo", "inspect", "--tls-verify=True", u"docker://registry.example.com/namespace/repository:latest"],
"results": SkopeoResults(False, [], "", 'time="2019-09-18T13:29:40Z" level=fatal msg="Error reading manifest latest in registry.example.com/namespace/repository: manifest unknown: manifest unknown"')
}
]
worker._process_mirrors()
mirror = RepoMirrorConfig.get_by_id(mirror.id)
assert 2 == len(skopeo_calls)
assert 3 == mirror.sync_retries_remaining

View file

@ -0,0 +1,53 @@
import logging
from datetime import date, timedelta
from app import app # This is required to initialize the database.
from data import model
from data.logs_model import logs_model
from workers.worker import Worker, with_exponential_backoff
POLL_PERIOD_SECONDS = 10
logger = logging.getLogger(__name__)
class RepositoryActionCountWorker(Worker):
def __init__(self):
super(RepositoryActionCountWorker, self).__init__()
self.add_operation(self._count_repository_actions, POLL_PERIOD_SECONDS)
@with_exponential_backoff(backoff_multiplier=10, max_backoff=3600, max_retries=10)
def _count_repository_actions(self):
""" Counts actions and aggregates search scores for a random repository for the
previous day. """
to_count = model.repositoryactioncount.find_uncounted_repository()
if to_count is None:
logger.debug('No further repositories to count')
return False
yesterday = date.today() - timedelta(days=1)
logger.debug('Found repository #%s to count', to_count.id)
daily_count = logs_model.count_repository_actions(to_count, yesterday)
if daily_count is None:
logger.debug('Could not load count for repository #%s', to_count.id)
return False
was_counted = model.repositoryactioncount.store_repository_action_count(to_count, yesterday,
daily_count)
if not was_counted:
logger.debug('Repository #%s was counted by another worker', to_count.id)
return False
logger.debug('Updating search score for repository #%s', to_count.id)
was_updated = model.repositoryactioncount.update_repository_score(to_count)
if not was_updated:
logger.debug('Repository #%s had its search score updated by another worker', to_count.id)
return False
logger.debug('Repository #%s search score updated', to_count.id)
return True
if __name__ == "__main__":
worker = RepositoryActionCountWorker()
worker.start()

View file

@ -0,0 +1,89 @@
import logging
import time
import json
import features
from app import secscan_notification_queue, secscan_api
from workers.queueworker import QueueWorker, JobException
from util.secscan.notifier import SecurityNotificationHandler, ProcessNotificationPageResult
logger = logging.getLogger(__name__)
_PROCESSING_SECONDS = 60 * 60 # 1 hour
_LAYER_LIMIT = 1000 # The number of layers to request on each page.
class SecurityNotificationWorker(QueueWorker):
def process_queue_item(self, data):
self.perform_notification_work(data)
def perform_notification_work(self, data, layer_limit=_LAYER_LIMIT):
""" Performs the work for handling a security notification as referenced by the given data
object. Returns True on successful handling, False on non-retryable failure and raises
a JobException on retryable failure.
"""
notification_name = data['Name']
current_page = data.get('page', None)
handler = SecurityNotificationHandler(layer_limit)
while True:
# Retrieve the current page of notification data from the security scanner.
(response_data, should_retry) = secscan_api.get_notification(notification_name,
layer_limit=layer_limit,
page=current_page)
# If no response, something went wrong.
if response_data is None:
if should_retry:
raise JobException()
else:
# Remove the job from the API.
logger.error('Failed to handle security notification %s', notification_name)
secscan_api.mark_notification_read(notification_name)
# Return to mark the job as "complete", as we'll never be able to finish it.
return False
# Extend processing on the queue item so it doesn't expire while we're working.
self.extend_processing(_PROCESSING_SECONDS, json.dumps(data))
# Process the notification data.
notification_data = response_data['Notification']
result = handler.process_notification_page_data(notification_data)
# Possible states after processing: failed to process, finished processing entirely
# or finished processing the page.
if result == ProcessNotificationPageResult.FAILED:
# Something went wrong.
raise JobException
if result == ProcessNotificationPageResult.FINISHED_PROCESSING:
# Mark the notification as read.
if not secscan_api.mark_notification_read(notification_name):
# Return to mark the job as "complete", as we'll never be able to finish it.
logger.error('Failed to mark notification %s as read', notification_name)
return False
# Send the generated Quay notifications.
handler.send_notifications()
return True
if result == ProcessNotificationPageResult.FINISHED_PAGE:
# Continue onto the next page.
current_page = notification_data['NextPage']
continue
if __name__ == '__main__':
if not features.SECURITY_SCANNER or not features.SECURITY_NOTIFICATIONS:
logger.debug('Security scanner disabled; skipping SecurityNotificationWorker')
while True:
time.sleep(100000)
worker = SecurityNotificationWorker(secscan_notification_queue, poll_period_seconds=30,
reservation_seconds=30, retry_after_seconds=30)
worker.start()

View file

@ -0,0 +1,36 @@
import logging.config
from app import app, prometheus
from data.database import UseThenDisconnect
from workers.securityworker.models_pre_oci import pre_oci_model as model
from util.secscan.api import APIRequestFailure
from util.secscan.analyzer import PreemptedException
logger = logging.getLogger(__name__)
unscanned_images_gauge = prometheus.create_gauge('unscanned_images',
'Number of images that clair needs to scan.')
def index_images(target_version, analyzer, token=None):
""" Performs security indexing of all images in the database not scanned at the target version.
If a token is provided, scanning will begin where the token indicates it previously completed.
"""
iterator, next_token = model.candidates_to_scan(target_version, start_token=token)
if iterator is None:
logger.debug('Found no additional images to scan')
return None
with UseThenDisconnect(app.config):
for candidate, abt, num_remaining in iterator:
try:
analyzer.analyze_recursively(candidate)
except PreemptedException:
logger.info('Another worker pre-empted us for layer: %s', candidate.id)
abt.set()
except APIRequestFailure:
logger.exception('Security scanner service unavailable')
return
unscanned_images_gauge.Set(num_remaining)
return next_token

View file

@ -0,0 +1,30 @@
from abc import ABCMeta, abstractmethod
from collections import namedtuple
from six import add_metaclass
class ScanToken(namedtuple('NextScanToken', ['min_id'])):
"""
ScanToken represents an opaque token that can be passed between runs of the security worker
to continue scanning whereever the previous run left off. Note that the data of the token is
*opaque* to the security worker, and the security worker should *not* pull any data out or modify
the token in any way.
"""
@add_metaclass(ABCMeta)
class SecurityWorkerDataInterface(object):
"""
Interface that represents all data store interactions required by the security worker.
"""
@abstractmethod
def candidates_to_scan(self, target_version, start_token=None):
"""
Returns a tuple consisting of an iterator of all the candidates to scan and a NextScanToken.
The iterator returns a tuple for each iteration consisting of the candidate Image, the abort
signal, and the number of remaining candidates. If the iterator returned is None, there are
no candidates to process.
"""
pass

View file

@ -0,0 +1,49 @@
from math import log10
from app import app
from data.model.image import (get_images_eligible_for_scan, get_image_pk_field,
get_max_id_for_sec_scan, get_min_id_for_sec_scan)
from util.migrate.allocator import yield_random_entries
from workers.securityworker.models_interface import (ScanToken, SecurityWorkerDataInterface)
class PreOCIModel(SecurityWorkerDataInterface):
def candidates_to_scan(self, target_version, start_token=None):
def batch_query():
return get_images_eligible_for_scan(target_version)
# Find the minimum ID.
min_id = None
if start_token is not None:
min_id = start_token.min_id
else:
min_id = app.config.get('SECURITY_SCANNER_INDEXING_MIN_ID')
if min_id is None:
min_id = get_min_id_for_sec_scan(target_version)
# Get the ID of the last image we can analyze. Will be None if there are no images in the
# database.
max_id = get_max_id_for_sec_scan()
if max_id is None:
return (None, None)
if min_id is None or min_id > max_id:
return (None, None)
# 4^log10(total) gives us a scalable batch size into the billions.
batch_size = int(4**log10(max(10, max_id - min_id)))
# TODO: Once we have a clean shared NamedTuple for Images, send that to the secscan analyzer
# rather than the database Image itself.
iterator = yield_random_entries(
batch_query,
get_image_pk_field(),
batch_size,
max_id,
min_id,)
return (iterator, ScanToken(max_id + 1))
pre_oci_model = PreOCIModel()

View file

@ -0,0 +1,48 @@
import logging.config
import time
import features
from app import app, secscan_api
from workers.worker import Worker
from workers.securityworker import index_images
from util.secscan.api import SecurityConfigValidator
from util.secscan.analyzer import LayerAnalyzer
from util.log import logfile_path
from endpoints.v2 import v2_bp
logger = logging.getLogger(__name__)
DEFAULT_INDEXING_INTERVAL = 30
class SecurityWorker(Worker):
def __init__(self):
super(SecurityWorker, self).__init__()
validator = SecurityConfigValidator(app.config.get('FEATURE_SECURITY_SCANNER', False), app.config.get('SECURITY_SCANNER_ENDPOINT'))
if not validator.valid():
logger.warning('Failed to validate security scan configuration')
return
self._target_version = app.config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 3)
self._analyzer = LayerAnalyzer(app.config, secscan_api)
self._next_token = None
interval = app.config.get('SECURITY_SCANNER_INDEXING_INTERVAL', DEFAULT_INDEXING_INTERVAL)
self.add_operation(self._index_images, interval)
def _index_images(self):
self._next_token = index_images(self._target_version, self._analyzer, self._next_token)
if __name__ == '__main__':
app.register_blueprint(v2_bp, url_prefix='/v2')
if not features.SECURITY_SCANNER:
logger.debug('Security scanner disabled; skipping SecurityWorker')
while True:
time.sleep(100000)
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
worker = SecurityWorker()
worker.start()

View file

@ -0,0 +1,10 @@
from mock import patch, Mock
from test.fixtures import *
from workers.securityworker import index_images
def test_securityworker_realdb(initialized_db):
mock_analyzer = Mock()
assert index_images(1, mock_analyzer) is not None
mock_analyzer.analyze_recursively.assert_called()

View file

View file

@ -0,0 +1,28 @@
from abc import ABCMeta, abstractmethod
from six import add_metaclass
@add_metaclass(ABCMeta)
class ServiceKeyWorkerDataInterface(object):
"""
Interface that represents all data store interactions required by the service key worker.
"""
@abstractmethod
def set_key_expiration(self, key_id, expiration_date):
""" Sets the expiration date of the service key with the given key ID to that given. """
pass
@abstractmethod
def create_service_key_for_testing(self, expiration):
""" Creates a service key for testing with the given expiration. Returns the KID for
key.
"""
pass
@abstractmethod
def get_service_key_expiration(self, key_id):
""" Returns the expiration date for the key with the given ID. If the key doesn't exist or
does not have an expiration, returns None.
"""
pass

View file

@ -0,0 +1,21 @@
from data import model
from workers.servicekeyworker.models_interface import ServiceKeyWorkerDataInterface
class PreOCIModel(ServiceKeyWorkerDataInterface):
def set_key_expiration(self, kid, expiration_date):
model.service_keys.set_key_expiration(kid, expiration_date)
def create_service_key_for_testing(self, expiration):
key = model.service_keys.create_service_key('test', 'somekid', 'quay', '', {}, expiration)
return key.kid
def get_service_key_expiration(self, kid):
try:
key = model.service_keys.get_service_key(kid, approved_only=False)
return key.expiration_date
except model.ServiceKeyDoesNotExist:
return None
pre_oci_model = PreOCIModel()

View file

@ -0,0 +1,41 @@
import logging
from datetime import datetime, timedelta
from app import app, instance_keys, metric_queue
from workers.servicekeyworker.models_pre_oci import pre_oci_model as model
from workers.worker import Worker
logger = logging.getLogger(__name__)
class ServiceKeyWorker(Worker):
def __init__(self):
super(ServiceKeyWorker, self).__init__()
self.add_operation(self._refresh_service_key,
app.config.get('INSTANCE_SERVICE_KEY_REFRESH', 60) * 60)
def _refresh_service_key(self):
"""
Refreshes the instance's active service key so it doesn't get garbage collected.
"""
expiration_time = timedelta(minutes=instance_keys.service_key_expiration)
new_expiration = datetime.utcnow() + expiration_time
logger.debug('Starting automatic refresh of service key %s to new expiration %s',
instance_keys.local_key_id, new_expiration)
try:
model.set_key_expiration(instance_keys.local_key_id, new_expiration)
except Exception as ex:
logger.exception('Failure for automatic refresh of service key %s with new expiration %s',
instance_keys.local_key_id, new_expiration)
metric_queue.instance_key_renewal_failure.Inc(labelvalues=[instance_keys.local_key_id])
raise ex
logger.debug('Finished automatic refresh of service key %s with new expiration %s',
instance_keys.local_key_id, new_expiration)
metric_queue.instance_key_renewal_success.Inc(labelvalues=[instance_keys.local_key_id])
if __name__ == "__main__":
worker = ServiceKeyWorker()
worker.start()

View file

@ -0,0 +1,23 @@
from datetime import datetime, timedelta
from mock import patch
from data import model
from workers.servicekeyworker.servicekeyworker import ServiceKeyWorker
from util.morecollections import AttrDict
from test.fixtures import *
from workers.servicekeyworker.models_pre_oci import pre_oci_model as model
def test_refresh_service_key(initialized_db):
# Create a service key for testing.
original_expiration = datetime.utcnow() + timedelta(minutes=10)
test_key_kid = model.create_service_key_for_testing(original_expiration)
assert model.get_service_key_expiration(test_key_kid)
instance_keys = AttrDict(dict(local_key_id=test_key_kid, service_key_expiration=30))
with patch('workers.servicekeyworker.servicekeyworker.instance_keys', instance_keys):
worker = ServiceKeyWorker()
worker._refresh_service_key()
# Ensure the key's expiration was changed.
assert model.get_service_key_expiration(test_key_kid) > original_expiration

View file

@ -0,0 +1,147 @@
import logging
import time
import features
from app import app, storage as app_storage, image_replication_queue
from data.database import CloseForLongOperation
from data import model
from workers.queueworker import QueueWorker, WorkerUnhealthyException, JobException
from util.log import logfile_path
logger = logging.getLogger(__name__)
POLL_PERIOD_SECONDS = 10
RESERVATION_SECONDS = app.config.get('STORAGE_REPLICATION_PROCESSING_SECONDS', 60*20)
class StorageReplicationWorker(QueueWorker):
def process_queue_item(self, job_details):
storage_uuid = job_details['storage_id']
namespace_id = job_details['namespace_user_id']
logger.debug('Starting replication of image storage %s under namespace %s', storage_uuid,
namespace_id)
try:
namespace = model.user.get_namespace_user_by_user_id(namespace_id)
except model.user.InvalidUsernameException:
logger.exception('Exception when looking up namespace %s for replication of image storage %s',
namespace_id, storage_uuid)
return
self.replicate_storage(namespace, storage_uuid, app_storage)
def _backoff_check_exists(self, location, path, storage, backoff_check=True):
for retry in range(0, 4):
if storage.exists([location], path):
return True
if not backoff_check:
return False
seconds = pow(2, retry) * 2
logger.debug('Cannot find path `%s` in location %s (try #%s). Sleeping for %s seconds',
path, location, retry, seconds)
time.sleep(seconds)
return False
def replicate_storage(self, namespace, storage_uuid, storage, backoff_check=True):
# Lookup the namespace and its associated regions.
if not namespace:
logger.debug('Unknown namespace when trying to replicate storage %s', storage_uuid)
return
locations = model.user.get_region_locations(namespace)
# Lookup the image storage.
try:
partial_storage = model.storage.get_storage_by_uuid(storage_uuid)
except model.InvalidImageException:
logger.debug('Unknown storage: %s', storage_uuid)
return
# Check to see if the image is at all the required locations.
locations_required = locations | set(storage.default_locations)
locations_missing = locations_required - set(partial_storage.locations)
logger.debug('For replication of storage %s under namespace %s: %s required; %s missing',
storage_uuid, namespace.username, locations_required, locations_missing)
if not locations_missing:
logger.debug('No missing locations for storage %s under namespace %s. Required: %s',
storage_uuid, namespace.username, locations_required)
return
# For any missing storage locations, initiate a copy.
existing_location = list(partial_storage.locations)[0]
path_to_copy = model.storage.get_layer_path(partial_storage)
# Lookup and ensure the existing location exists.
if not self._backoff_check_exists(existing_location, path_to_copy, storage, backoff_check):
logger.warning('Cannot find image storage %s in existing location %s; stopping replication',
storage_uuid, existing_location)
raise JobException()
# For each missing location, copy over the storage.
for location in locations_missing:
logger.debug('Starting copy of storage %s to location %s from %s', partial_storage.uuid,
location, existing_location)
# Copy the binary data.
copied = False
try:
with CloseForLongOperation(app.config):
storage.copy_between(path_to_copy, existing_location, location)
copied = True
except IOError:
logger.exception('Failed to copy path `%s` of image storage %s to location %s',
path_to_copy, partial_storage.uuid, location)
raise JobException()
except:
logger.exception('Unknown exception when copying path %s of image storage %s to loc %s',
path_to_copy, partial_storage.uuid, location)
raise WorkerUnhealthyException()
if copied:
# Verify the data was copied to the target storage, to ensure that there are no cases
# where we write the placement without knowing the data is present.
if not self._backoff_check_exists(location, path_to_copy, storage, backoff_check):
logger.warning('Failed to find path `%s` in location `%s` after copy', path_to_copy,
location)
raise JobException()
# Create the storage location record for the storage now that the copy has
# completed.
model.storage.add_storage_placement(partial_storage, location)
logger.debug('Finished copy of image storage %s to location %s from %s',
partial_storage.uuid, location, existing_location)
logger.debug('Completed replication of image storage %s to locations %s from %s',
partial_storage.uuid, locations_missing, existing_location)
if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
has_local_storage = False
if features.STORAGE_REPLICATION:
for storage_type, _ in app.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values():
if storage_type == 'LocalStorage':
has_local_storage = True
break
if not features.STORAGE_REPLICATION or has_local_storage:
if has_local_storage:
logger.error("Storage replication can't be used with local storage")
else:
logger.debug('Full storage replication disabled; skipping')
while True:
time.sleep(10000)
logger.debug('Starting replication worker')
worker = StorageReplicationWorker(image_replication_queue,
poll_period_seconds=POLL_PERIOD_SECONDS,
reservation_seconds=RESERVATION_SECONDS)
worker.start()

View file

@ -0,0 +1,408 @@
import logging
import logging.config
import time
from peewee import JOIN, fn, IntegrityError
from app import app
from data.database import (UseThenDisconnect, TagToRepositoryTag, RepositoryTag,
TagManifestToManifest, Tag, TagManifest, TagManifestToManifest, Image,
Manifest, TagManifestLabel, ManifestLabel, TagManifestLabelMap,
Repository, db_transaction)
from data.model import DataModelException
from data.model.image import get_parent_images
from data.model.tag import populate_manifest
from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist
from data.model.user import get_namespace_user
from data.registry_model import pre_oci_model
from data.registry_model.datatypes import Tag as TagDataType
from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE,
MalformedSchema1Manifest)
from workers.worker import Worker
from util.bytes import Bytes
from util.log import logfile_path
from util.migrate.allocator import yield_random_entries
logger = logging.getLogger(__name__)
WORKER_TIMEOUT = app.config.get('BACKFILL_TAGS_TIMEOUT', 6000)
class BrokenManifest(ManifestInterface):
""" Implementation of the ManifestInterface for "broken" manifests. This allows us to add the
new manifest row while not adding any additional rows for it.
"""
def __init__(self, digest, payload):
self._digest = digest
self._payload = Bytes.for_string_or_unicode(payload)
@property
def digest(self):
return self._digest
@property
def media_type(self):
return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
@property
def manifest_dict(self):
return {}
@property
def bytes(self):
return self._payload
def get_layers(self, content_retriever):
return None
def get_legacy_image_ids(self, cr):
return []
def get_leaf_layer_v1_image_id(self, cr):
return None
@property
def blob_digests(self):
return []
@property
def local_blob_digests(self):
return []
def get_blob_digests_for_translation(self):
return []
def validate(self, content_retriever):
pass
def child_manifests(self, lookup_manifest_fn):
return None
def get_manifest_labels(self, lookup_config_fn):
return {}
def unsigned(self):
return self
def generate_legacy_layers(self, images_map, lookup_config_fn):
return None
def get_schema1_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
return self
@property
def schema_version(self):
return 1
@property
def layers_compressed_size(self):
return None
@property
def is_manifest_list(self):
return False
@property
def has_legacy_image(self):
return False
def get_requires_empty_layer_blob(self, content_retriever):
return False
def convert_manifest(self, allowed_mediatypes, namespace_name, repo_name, tag_name,
content_retriever):
return None
class TagBackfillWorker(Worker):
def __init__(self, namespace_filter=None):
super(TagBackfillWorker, self).__init__()
self._namespace_filter = namespace_filter
self.add_operation(self._backfill_tags, WORKER_TIMEOUT)
def _filter(self, query):
if self._namespace_filter:
logger.info('Filtering by namespace `%s`', self._namespace_filter)
namespace_user = get_namespace_user(self._namespace_filter)
query = query.join(Repository).where(Repository.namespace_user == namespace_user)
return query
def _candidates_to_backfill(self):
def missing_tmt_query():
return (self._filter(RepositoryTag.select())
.join(TagToRepositoryTag, JOIN.LEFT_OUTER)
.where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False))
min_id = (self._filter(RepositoryTag.select(fn.Min(RepositoryTag.id))).scalar())
max_id = self._filter(RepositoryTag.select(fn.Max(RepositoryTag.id))).scalar()
logger.info('Found candidate range %s-%s', min_id, max_id)
iterator = yield_random_entries(
missing_tmt_query,
RepositoryTag.id,
1000,
max_id,
min_id,
)
return iterator
def _backfill_tags(self):
with UseThenDisconnect(app.config):
iterator = self._candidates_to_backfill()
if iterator is None:
logger.debug('Found no additional tags to backfill')
time.sleep(10000)
return None
for candidate, abt, _ in iterator:
if not backfill_tag(candidate):
logger.info('Another worker pre-empted us for tag: %s', candidate.id)
abt.set()
def lookup_map_row(repositorytag):
try:
TagToRepositoryTag.get(repository_tag=repositorytag)
return True
except TagToRepositoryTag.DoesNotExist:
return False
def backfill_tag(repositorytag):
logger.info('Backfilling tag %s', repositorytag.id)
# Ensure that a mapping row doesn't already exist. If it does, nothing more to do.
if lookup_map_row(repositorytag):
return False
# Grab the manifest for the RepositoryTag, backfilling as necessary.
manifest_id = _get_manifest_id(repositorytag)
if manifest_id is None:
return True
lifetime_start_ms = (repositorytag.lifetime_start_ts * 1000
if repositorytag.lifetime_start_ts is not None else None)
lifetime_end_ms = (repositorytag.lifetime_end_ts * 1000
if repositorytag.lifetime_end_ts is not None else None)
# Create the new Tag.
with db_transaction():
if lookup_map_row(repositorytag):
return False
try:
created = Tag.create(name=repositorytag.name,
repository=repositorytag.repository,
lifetime_start_ms=lifetime_start_ms,
lifetime_end_ms=lifetime_end_ms,
reversion=repositorytag.reversion,
manifest=manifest_id,
tag_kind=Tag.tag_kind.get_id('tag'))
TagToRepositoryTag.create(tag=created, repository_tag=repositorytag,
repository=repositorytag.repository)
except IntegrityError:
logger.exception('Could not create tag for repo tag `%s`', repositorytag.id)
return False
logger.info('Backfilled tag %s', repositorytag.id)
return True
def lookup_manifest_map_row(tag_manifest):
try:
TagManifestToManifest.get(tag_manifest=tag_manifest)
return True
except TagManifestToManifest.DoesNotExist:
return False
def _get_manifest_id(repositorytag):
repository_tag_datatype = TagDataType.for_repository_tag(repositorytag)
# Retrieve the TagManifest for the RepositoryTag, backfilling if necessary.
with db_transaction():
manifest_datatype = None
try:
manifest_datatype = pre_oci_model.get_manifest_for_tag(repository_tag_datatype,
backfill_if_necessary=True)
except MalformedSchema1Manifest:
logger.exception('Error backfilling manifest for tag `%s`', repositorytag.id)
if manifest_datatype is None:
logger.error('Could not load or backfill manifest for tag `%s`', repositorytag.id)
# Create a broken manifest for the tag.
tag_manifest = TagManifest.create(tag=repositorytag,
digest='BROKEN-%s' % repositorytag.id,
json_data='{}')
else:
# Retrieve the new-style Manifest for the TagManifest, if any.
try:
tag_manifest = TagManifest.get(id=manifest_datatype._db_id)
except TagManifest.DoesNotExist:
logger.exception('Could not find tag manifest')
return None
try:
found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest
# Verify that the new-style manifest has the same contents as the old-style manifest.
# If not, update and then return. This is an extra check put in place to ensure unicode
# manifests have been correctly copied.
if found.manifest_bytes != tag_manifest.json_data:
logger.warning('Fixing manifest `%s`', found.id)
found.manifest_bytes = tag_manifest.json_data
found.save()
return found.id
except TagManifestToManifest.DoesNotExist:
# Could not find the new style manifest, so backfill.
_backfill_manifest(tag_manifest)
# Try to retrieve the manifest again, since we've performed a backfill.
try:
return TagManifestToManifest.get(tag_manifest=tag_manifest).manifest_id
except TagManifestToManifest.DoesNotExist:
return None
def _backfill_manifest(tag_manifest):
logger.info('Backfilling manifest for tag manifest %s', tag_manifest.id)
# Ensure that a mapping row doesn't already exist. If it does, we've been preempted.
if lookup_manifest_map_row(tag_manifest):
return False
# Parse the manifest. If we cannot parse, then we treat the manifest as broken and just emit it
# without additional rows or data, as it will eventually not be useful.
is_broken = False
try:
manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(tag_manifest.json_data),
validate=False)
except ManifestException:
logger.exception('Exception when trying to parse manifest %s', tag_manifest.id)
manifest = BrokenManifest(tag_manifest.digest, tag_manifest.json_data)
is_broken = True
# Lookup the storages for the digests.
root_image = tag_manifest.tag.image
repository = tag_manifest.tag.repository
image_storage_id_map = {root_image.storage.content_checksum: root_image.storage.id}
try:
parent_images = get_parent_images(repository.namespace_user.username, repository.name,
root_image)
except DataModelException:
logger.exception('Exception when trying to load parent images for manifest `%s`',
tag_manifest.id)
parent_images = {}
is_broken = True
for parent_image in parent_images:
image_storage_id_map[parent_image.storage.content_checksum] = parent_image.storage.id
# Ensure that all the expected blobs have been found. If not, we lookup the blob under the repo
# and add its storage ID. If the blob is not found, we mark the manifest as broken.
storage_ids = set()
try:
for blob_digest in manifest.get_blob_digests_for_translation():
if blob_digest in image_storage_id_map:
storage_ids.add(image_storage_id_map[blob_digest])
else:
logger.debug('Blob `%s` not found in images for manifest `%s`; checking repo',
blob_digest, tag_manifest.id)
try:
blob_storage = get_repo_blob_by_digest(repository.namespace_user.username,
repository.name, blob_digest)
storage_ids.add(blob_storage.id)
except BlobDoesNotExist:
logger.debug('Blob `%s` not found in repo for manifest `%s`',
blob_digest, tag_manifest.id)
is_broken = True
except MalformedSchema1Manifest:
logger.warning('Found malformed schema 1 manifest during blob backfill')
is_broken = True
with db_transaction():
# Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag.
try:
tag_manifest = TagManifest.get(id=tag_manifest.id)
except TagManifest.DoesNotExist:
return True
# Ensure it wasn't already created.
if lookup_manifest_map_row(tag_manifest):
return False
# Check for a pre-existing manifest matching the digest in the repository. This can happen
# if we've already created the manifest row (typically for tag reverision).
try:
manifest_row = Manifest.get(digest=manifest.digest, repository=tag_manifest.tag.repository)
except Manifest.DoesNotExist:
# Create the new-style rows for the manifest.
try:
manifest_row = populate_manifest(tag_manifest.tag.repository, manifest,
tag_manifest.tag.image, storage_ids)
except IntegrityError:
# Pre-empted.
return False
# Create the mapping row. If we find another was created for this tag manifest in the
# meantime, then we've been preempted.
try:
TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest_row,
broken=is_broken)
except IntegrityError:
return False
# Backfill any labels on the manifest.
_backfill_labels(tag_manifest, manifest_row, repository)
return True
def _backfill_labels(tag_manifest, manifest, repository):
tmls = list(TagManifestLabel.select().where(TagManifestLabel.annotated == tag_manifest))
if not tmls:
return
for tag_manifest_label in tmls:
label = tag_manifest_label.label
try:
TagManifestLabelMap.get(tag_manifest_label=tag_manifest_label)
continue
except TagManifestLabelMap.DoesNotExist:
pass
try:
manifest_label = ManifestLabel.create(manifest=manifest, label=label,
repository=repository)
TagManifestLabelMap.create(manifest_label=manifest_label,
tag_manifest_label=tag_manifest_label,
label=label,
manifest=manifest,
tag_manifest=tag_manifest_label.annotated)
except IntegrityError:
continue
if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if (not app.config.get('BACKFILL_TAGS', False) and
app.config.get('V3_UPGRADE_MODE') != 'background'):
logger.debug('Tag backfill disabled; skipping')
while True:
time.sleep(100000)
worker = TagBackfillWorker(app.config.get('BACKFILL_TAGS_NAMESPACE'))
worker.start()

View file

View file

@ -0,0 +1,41 @@
import logging
import time
import features
from app import app, authentication
from data.users.teamsync import sync_teams_to_groups
from workers.worker import Worker
from util.timedeltastring import convert_to_timedelta
from util.log import logfile_path
logger = logging.getLogger(__name__)
WORKER_FREQUENCY = app.config.get('TEAM_SYNC_WORKER_FREQUENCY', 60)
STALE_CUTOFF = convert_to_timedelta(app.config.get('TEAM_RESYNC_STALE_TIME', '30m'))
class TeamSynchronizationWorker(Worker):
""" Worker which synchronizes teams with their backing groups in LDAP/Keystone/etc.
"""
def __init__(self):
super(TeamSynchronizationWorker, self).__init__()
self.add_operation(self._sync_teams_to_groups, WORKER_FREQUENCY)
def _sync_teams_to_groups(self):
sync_teams_to_groups(authentication, STALE_CUTOFF)
def main():
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not features.TEAM_SYNCING or not authentication.federated_service:
logger.debug('Team syncing is disabled; sleeping')
while True:
time.sleep(100000)
worker = TeamSynchronizationWorker()
worker.start()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,150 @@
import json
import os
from datetime import datetime, timedelta
import boto
from httmock import urlmatch, HTTMock
from moto import mock_s3_deprecated as mock_s3
from app import storage as test_storage
from data import model, database
from data.logs_model import logs_model
from storage import S3Storage, StorageContext, DistributedStorage
from workers.exportactionlogsworker import ExportActionLogsWorker, POLL_PERIOD_SECONDS
from test.fixtures import *
_TEST_CONTENT = os.urandom(1024)
_TEST_BUCKET = 'some_bucket'
_TEST_USER = 'someuser'
_TEST_PASSWORD = 'somepassword'
_TEST_PATH = 'some/cool/path'
_TEST_CONTEXT = StorageContext('nyc', None, None, None, None)
@pytest.fixture(params=['test', 'mock_s3'])
def storage_engine(request):
if request.param == 'test':
yield test_storage
else:
with mock_s3():
# Create a test bucket and put some test content.
boto.connect_s3().create_bucket(_TEST_BUCKET)
engine = DistributedStorage(
{'foo': S3Storage(_TEST_CONTEXT, 'some/path', _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)},
['foo'])
yield engine
def test_export_logs_failure(initialized_db):
# Make all uploads fail.
test_storage.put_content('local_us', 'except_upload', 'true')
repo = model.repository.get_repository('devtable', 'simple')
user = model.user.get_user('devtable')
worker = ExportActionLogsWorker(None)
called = [{}]
@urlmatch(netloc=r'testcallback')
def handle_request(url, request):
called[0] = json.loads(request.body)
return {'status_code': 200, 'content': '{}'}
def format_date(datetime):
return datetime.strftime("%m/%d/%Y")
now = datetime.now()
with HTTMock(handle_request):
with pytest.raises(IOError):
worker._process_queue_item({
'export_id': 'someid',
'repository_id': repo.id,
'namespace_id': repo.namespace_user.id,
'namespace_name': 'devtable',
'repository_name': 'simple',
'start_time': format_date(now + timedelta(days=-10)),
'end_time': format_date(now + timedelta(days=10)),
'callback_url': 'http://testcallback/',
'callback_email': None,
}, test_storage)
test_storage.remove('local_us', 'except_upload')
assert called[0]
assert called[0][u'export_id'] == 'someid'
assert called[0][u'status'] == 'failed'
@pytest.mark.parametrize('has_logs', [
True,
False,
])
def test_export_logs(initialized_db, storage_engine, has_logs):
# Delete all existing logs.
database.LogEntry3.delete().execute()
repo = model.repository.get_repository('devtable', 'simple')
user = model.user.get_user('devtable')
now = datetime.now()
if has_logs:
# Add new logs over a multi-day period.
for index in range(-10, 10):
logs_model.log_action('push_repo', 'devtable', user, '0.0.0.0', {'index': index},
repo, timestamp=now + timedelta(days=index))
worker = ExportActionLogsWorker(None)
called = [{}]
@urlmatch(netloc=r'testcallback')
def handle_request(url, request):
called[0] = json.loads(request.body)
return {'status_code': 200, 'content': '{}'}
def format_date(datetime):
return datetime.strftime("%m/%d/%Y")
with HTTMock(handle_request):
worker._process_queue_item({
'export_id': 'someid',
'repository_id': repo.id,
'namespace_id': repo.namespace_user.id,
'namespace_name': 'devtable',
'repository_name': 'simple',
'start_time': format_date(now + timedelta(days=-10)),
'end_time': format_date(now + timedelta(days=10)),
'callback_url': 'http://testcallback/',
'callback_email': None,
}, storage_engine)
assert called[0]
assert called[0][u'export_id'] == 'someid'
assert called[0][u'status'] == 'success'
url = called[0][u'exported_data_url']
if url.find('http://localhost:5000/exportedlogs/') == 0:
storage_id = url[len('http://localhost:5000/exportedlogs/'):]
else:
assert url.find('https://some_bucket.s3.amazonaws.com/some/path/exportedactionlogs/') == 0
storage_id, _ = url[len('https://some_bucket.s3.amazonaws.com/some/path/exportedactionlogs/'):].split('?')
created = storage_engine.get_content(storage_engine.preferred_locations,
'exportedactionlogs/' + storage_id)
created_json = json.loads(created)
if has_logs:
found = set()
for log in created_json['logs']:
if log.get('terminator'):
continue
found.add(log['metadata']['index'])
for index in range(-10, 10):
assert index in found
else:
assert created_json['logs'] == [{'terminator': True}]

View file

@ -0,0 +1,142 @@
import os.path
from datetime import datetime, timedelta
from app import storage
from data import model
from data.database import LogEntry, LogEntry2, LogEntry3
from data.logs_model.elastic_logs import INDEX_NAME_PREFIX, INDEX_DATE_FORMAT
from data.logs_model.datatypes import AggregatedLogCount, LogEntriesPage, Log
from data.logs_model.document_logs_model import DocumentLogsModel
from data.logs_model.test.fake_elasticsearch import FAKE_ES_HOST, fake_elasticsearch
from data.logs_model.table_logs_model import TableLogsModel
from data.logs_model.combined_model import CombinedLogsModel
from data.logs_model.inmemory_model import InMemoryModel
from data.logs_model import LogsModelProxy
from util.timedeltastring import convert_to_timedelta
from workers.logrotateworker import LogRotateWorker, SAVE_PATH, SAVE_LOCATION
from test.fixtures import *
@pytest.fixture()
def clear_db_logs(initialized_db):
LogEntry.delete().execute()
LogEntry2.delete().execute()
LogEntry3.delete().execute()
def combined_model():
return CombinedLogsModel(TableLogsModel(), InMemoryModel())
def es_model():
return DocumentLogsModel(producer='elasticsearch', elasticsearch_config={
'host': FAKE_ES_HOST,
'port': 12345,
})
@pytest.fixture()
def fake_es():
with fake_elasticsearch():
yield
@pytest.fixture(params=[TableLogsModel, es_model, InMemoryModel, combined_model])
def logs_model(request, clear_db_logs, fake_es):
model = request.param()
with patch('data.logs_model.logs_model', model):
with patch('workers.logrotateworker.logs_model', model):
yield model
def _lookup_logs(logs_model, start_time, end_time, **kwargs):
logs_found = []
page_token = None
while True:
found = logs_model.lookup_logs(start_time, end_time, page_token=page_token, **kwargs)
logs_found.extend(found.logs)
page_token = found.next_page_token
if not found.logs or not page_token:
break
assert len(logs_found) == len(set(logs_found))
return logs_found
def test_logrotateworker(logs_model):
worker = LogRotateWorker()
days = 90
start_timestamp = datetime(2019, 1, 1)
# Make sure there are no existing logs
found = _lookup_logs(logs_model, start_timestamp - timedelta(days=1000), start_timestamp + timedelta(days=1000))
assert not found
# Create some logs
for day in range(0, days):
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4', timestamp=start_timestamp-timedelta(days=day))
# Ensure there are logs.
logs = _lookup_logs(logs_model,
start_timestamp - timedelta(days=1000),
start_timestamp + timedelta(days=1000))
assert len(logs) == days
# Archive all the logs.
assert worker._perform_archiving(start_timestamp + timedelta(days=1))
# Ensure all the logs were archived.
found = _lookup_logs(logs_model, start_timestamp - timedelta(days=1000), start_timestamp + timedelta(days=1000))
assert not found
def test_logrotateworker_with_cutoff(logs_model):
days = 60
start_timestamp = datetime(2019, 1, 1)
# Make sure there are no existing logs
found = _lookup_logs(logs_model, start_timestamp - timedelta(days=365), start_timestamp + timedelta(days=365))
assert not found
# Create a new set of logs/indices.
for day in range(0, days):
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4', timestamp=start_timestamp+timedelta(days=day))
# Get all logs
logs = _lookup_logs(logs_model,
start_timestamp - timedelta(days=days-1),
start_timestamp + timedelta(days=days+1))
assert len(logs) == days
# Set the cutoff datetime to be the midpoint of the logs
midpoint = logs[0:len(logs)/2]
assert midpoint
assert len(midpoint) < len(logs)
worker = LogRotateWorker()
cutoff_date = midpoint[-1].datetime
# Archive the indices at or older than the cutoff date
archived_files = worker._perform_archiving(cutoff_date)
# Ensure the eariler logs were archived
found = _lookup_logs(logs_model, start_timestamp, cutoff_date-timedelta(seconds=1))
assert not found
# Check that the files were written to storage
for archived_file in archived_files:
assert storage.exists([SAVE_LOCATION], os.path.join(SAVE_PATH, archived_file))
# If current model uses ES, check that the indices were also deleted
if isinstance(logs_model, DocumentLogsModel):
assert len(logs_model.list_indices()) == days - (len(logs) / 2)
for index in logs_model.list_indices():
dt = datetime.strptime(index[len(INDEX_NAME_PREFIX):], INDEX_DATE_FORMAT)
assert dt >= cutoff_date

View file

@ -0,0 +1,12 @@
from data import model, database
from workers.repositoryactioncounter import RepositoryActionCountWorker
from test.fixtures import *
def test_repositoryactioncount(app):
database.RepositoryActionCount.delete().execute()
database.RepositorySearchScore.delete().execute()
rac = RepositoryActionCountWorker()
while rac._count_repository_actions():
continue

View file

@ -0,0 +1,171 @@
import hashlib
import pytest
from data import model, database
from storage.basestorage import StoragePaths
from storage.fakestorage import FakeStorage
from storage.distributedstorage import DistributedStorage
from workers.storagereplication import (StorageReplicationWorker, JobException,
WorkerUnhealthyException)
from test.fixtures import *
@pytest.fixture()
def storage_user(app):
user = model.user.get_user('devtable')
database.UserRegion.create(user=user,
location=database.ImageStorageLocation.get(name='local_us'))
database.UserRegion.create(user=user,
location=database.ImageStorageLocation.get(name='local_eu'))
return user
@pytest.fixture()
def storage_paths():
return StoragePaths()
@pytest.fixture()
def replication_worker():
return StorageReplicationWorker(None)
@pytest.fixture()
def storage():
return DistributedStorage({'local_us': FakeStorage('local'), 'local_eu': FakeStorage('local')},
['local_us'])
def test_storage_replication_v1(storage_user, storage_paths, replication_worker, storage, app):
# Add a storage entry with a V1 path.
v1_storage = model.storage.create_v1_storage('local_us')
content_path = storage_paths.v1_image_layer_path(v1_storage.uuid)
storage.put_content(['local_us'], content_path, 'some content')
# Call replicate on it and verify it replicates.
replication_worker.replicate_storage(storage_user, v1_storage.uuid, storage)
# Ensure that the data was replicated to the other "region".
assert storage.get_content(['local_eu'], content_path) == 'some content'
locations = model.storage.get_storage_locations(v1_storage.uuid)
assert len(locations) == 2
def test_storage_replication_cas(storage_user, storage_paths, replication_worker, storage, app):
# Add a storage entry with a CAS path.
content_checksum = 'sha256:' + hashlib.sha256('some content').hexdigest()
cas_storage = database.ImageStorage.create(cas_path=True, content_checksum=content_checksum)
location = database.ImageStorageLocation.get(name='local_us')
database.ImageStoragePlacement.create(storage=cas_storage, location=location)
content_path = storage_paths.blob_path(cas_storage.content_checksum)
storage.put_content(['local_us'], content_path, 'some content')
# Call replicate on it and verify it replicates.
replication_worker.replicate_storage(storage_user, cas_storage.uuid, storage)
# Ensure that the data was replicated to the other "region".
assert storage.get_content(['local_eu'], content_path) == 'some content'
locations = model.storage.get_storage_locations(cas_storage.uuid)
assert len(locations) == 2
def test_storage_replication_missing_base(storage_user, storage_paths, replication_worker, storage,
app):
# Add a storage entry with a CAS path.
content_checksum = 'sha256:' + hashlib.sha256('some content').hexdigest()
cas_storage = database.ImageStorage.create(cas_path=True, content_checksum=content_checksum)
location = database.ImageStorageLocation.get(name='local_us')
database.ImageStoragePlacement.create(storage=cas_storage, location=location)
# Attempt to replicate storage. This should fail because the layer is missing from the base
# storage.
with pytest.raises(JobException):
replication_worker.replicate_storage(storage_user, cas_storage.uuid, storage,
backoff_check=False)
# Ensure the storage location count remains 1. This is technically inaccurate, but that's okay
# as we still require at least one location per storage.
locations = model.storage.get_storage_locations(cas_storage.uuid)
assert len(locations) == 1
def test_storage_replication_copy_error(storage_user, storage_paths, replication_worker, storage,
app):
# Add a storage entry with a CAS path.
content_checksum = 'sha256:' + hashlib.sha256('some content').hexdigest()
cas_storage = database.ImageStorage.create(cas_path=True, content_checksum=content_checksum)
location = database.ImageStorageLocation.get(name='local_us')
database.ImageStoragePlacement.create(storage=cas_storage, location=location)
content_path = storage_paths.blob_path(cas_storage.content_checksum)
storage.put_content(['local_us'], content_path, 'some content')
# Tell storage to break copying.
storage.put_content(['local_us'], 'break_copying', 'true')
# Attempt to replicate storage. This should fail because the write fails.
with pytest.raises(JobException):
replication_worker.replicate_storage(storage_user, cas_storage.uuid, storage,
backoff_check=False)
# Ensure the storage location count remains 1.
locations = model.storage.get_storage_locations(cas_storage.uuid)
assert len(locations) == 1
def test_storage_replication_copy_didnot_copy(storage_user, storage_paths, replication_worker,
storage, app):
# Add a storage entry with a CAS path.
content_checksum = 'sha256:' + hashlib.sha256('some content').hexdigest()
cas_storage = database.ImageStorage.create(cas_path=True, content_checksum=content_checksum)
location = database.ImageStorageLocation.get(name='local_us')
database.ImageStoragePlacement.create(storage=cas_storage, location=location)
content_path = storage_paths.blob_path(cas_storage.content_checksum)
storage.put_content(['local_us'], content_path, 'some content')
# Tell storage to fake copying (i.e. not actually copy the data).
storage.put_content(['local_us'], 'fake_copying', 'true')
# Attempt to replicate storage. This should fail because the copy doesn't actually do the copy.
with pytest.raises(JobException):
replication_worker.replicate_storage(storage_user, cas_storage.uuid, storage,
backoff_check=False)
# Ensure the storage location count remains 1.
locations = model.storage.get_storage_locations(cas_storage.uuid)
assert len(locations) == 1
def test_storage_replication_copy_unhandled_exception(storage_user, storage_paths,
replication_worker, storage, app):
# Add a storage entry with a CAS path.
content_checksum = 'sha256:' + hashlib.sha256('some content').hexdigest()
cas_storage = database.ImageStorage.create(cas_path=True, content_checksum=content_checksum)
location = database.ImageStorageLocation.get(name='local_us')
database.ImageStoragePlacement.create(storage=cas_storage, location=location)
content_path = storage_paths.blob_path(cas_storage.content_checksum)
storage.put_content(['local_us'], content_path, 'some content')
# Tell storage to raise an exception when copying.
storage.put_content(['local_us'], 'except_copying', 'true')
# Attempt to replicate storage. This should fail because the copy raises an unhandled exception.
with pytest.raises(WorkerUnhealthyException):
replication_worker.replicate_storage(storage_user, cas_storage.uuid, storage,
backoff_check=False)
# Ensure the storage location count remains 1.
locations = model.storage.get_storage_locations(cas_storage.uuid)
assert len(locations) == 1

View file

@ -0,0 +1,281 @@
from app import docker_v2_signing_key
from data import model
from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob,
ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image,
TagManifestLabel, Tag, TagToRepositoryTag, Repository,
ImageStorage)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from workers.tagbackfillworker import backfill_tag, _backfill_manifest
from test.fixtures import *
@pytest.fixture()
def clear_rows(initialized_db):
# Remove all new-style rows so we can backfill.
TagToRepositoryTag.delete().execute()
Tag.delete().execute()
TagManifestLabelMap.delete().execute()
ManifestLabel.delete().execute()
ManifestBlob.delete().execute()
ManifestLegacyImage.delete().execute()
TagManifestToManifest.delete().execute()
Manifest.delete().execute()
@pytest.mark.parametrize('clear_all_rows', [
True,
False,
])
def test_tagbackfillworker(clear_all_rows, initialized_db):
# Remove the new-style rows so we can backfill.
TagToRepositoryTag.delete().execute()
Tag.delete().execute()
if clear_all_rows:
TagManifestLabelMap.delete().execute()
ManifestLabel.delete().execute()
ManifestBlob.delete().execute()
ManifestLegacyImage.delete().execute()
TagManifestToManifest.delete().execute()
Manifest.delete().execute()
found_dead_tag = False
for repository_tag in list(RepositoryTag.select()):
# Backfill the tag.
assert backfill_tag(repository_tag)
# Ensure if we try again, the backfill is skipped.
assert not backfill_tag(repository_tag)
# Ensure that we now have the expected tag rows.
tag_to_repo_tag = TagToRepositoryTag.get(repository_tag=repository_tag)
tag = tag_to_repo_tag.tag
assert tag.name == repository_tag.name
assert tag.repository == repository_tag.repository
assert not tag.hidden
assert tag.reversion == repository_tag.reversion
if repository_tag.lifetime_start_ts is None:
assert tag.lifetime_start_ms is None
else:
assert tag.lifetime_start_ms == (repository_tag.lifetime_start_ts * 1000)
if repository_tag.lifetime_end_ts is None:
assert tag.lifetime_end_ms is None
else:
assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts * 1000)
found_dead_tag = True
assert tag.manifest
# Ensure that we now have the expected manifest rows.
try:
tag_manifest = TagManifest.get(tag=repository_tag)
except TagManifest.DoesNotExist:
continue
map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
assert not map_row.broken
manifest_row = map_row.manifest
assert manifest_row.manifest_bytes == tag_manifest.json_data
assert manifest_row.digest == tag_manifest.digest
assert manifest_row.repository == tag_manifest.tag.repository
assert tag.manifest == map_row.manifest
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert tag_manifest.tag.image == legacy_image
expected_storages = {tag_manifest.tag.image.storage.id}
for parent_image_id in tag_manifest.tag.image.ancestor_id_list():
expected_storages.add(Image.get(id=parent_image_id).storage_id)
found_storages = {manifest_blob.blob_id for manifest_blob
in ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)}
assert expected_storages == found_storages
# Ensure the labels were copied over.
tmls = list(TagManifestLabel.select().where(TagManifestLabel.annotated == tag_manifest))
expected_labels = {tml.label_id for tml in tmls}
found_labels = {m.label_id for m
in ManifestLabel.select().where(ManifestLabel.manifest == manifest_row)}
assert found_labels == expected_labels
# Verify at the repository level.
for repository in list(Repository.select()):
tags = RepositoryTag.select().where(RepositoryTag.repository == repository,
RepositoryTag.hidden == False)
oci_tags = Tag.select().where(Tag.repository == repository)
assert len(tags) == len(oci_tags)
assert {t.name for t in tags} == {t.name for t in oci_tags}
for tag in tags:
tag_manifest = TagManifest.get(tag=tag)
ttr = TagToRepositoryTag.get(repository_tag=tag)
manifest = ttr.tag.manifest
assert tag_manifest.json_data == manifest.manifest_bytes
assert tag_manifest.digest == manifest.digest
assert tag.image == ManifestLegacyImage.get(manifest=manifest).image
assert tag.lifetime_start_ts == (ttr.tag.lifetime_start_ms / 1000)
if tag.lifetime_end_ts:
assert tag.lifetime_end_ts == (ttr.tag.lifetime_end_ms / 1000)
else:
assert ttr.tag.lifetime_end_ms is None
assert found_dead_tag
def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db):
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
# Add a broken manifest.
broken_manifest = TagManifest.create(json_data='wat?', digest='sha256:foobar',
tag=RepositoryTag.get())
# Ensure the backfill works.
assert _backfill_manifest(broken_manifest)
# Ensure the mapping is marked as broken.
map_row = TagManifestToManifest.get(tag_manifest=broken_manifest)
assert map_row.broken
manifest_row = map_row.manifest
assert manifest_row.manifest_bytes == broken_manifest.json_data
assert manifest_row.digest == broken_manifest.digest
assert manifest_row.repository == broken_manifest.tag.repository
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert broken_manifest.tag.image == legacy_image
def test_manifestbackfillworker_mislinked_manifest(clear_rows, initialized_db):
""" Tests that a manifest whose image is mislinked will have its storages relinked properly. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
repo = model.repository.get_repository('devtable', 'complex')
tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0')
tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')
# Add a mislinked manifest, by having its layer point to a blob in v3.0 but its image
# be the v5.0 image.
builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
builder.add_layer(tag_v30.image.storage.content_checksum, '{"id": "foo"}')
manifest = builder.build(docker_v2_signing_key)
mislinked_manifest = TagManifest.create(json_data=manifest.bytes.as_encoded_str(),
digest=manifest.digest,
tag=tag_v50)
# Backfill the manifest and ensure its proper content checksum was linked.
assert _backfill_manifest(mislinked_manifest)
map_row = TagManifestToManifest.get(tag_manifest=mislinked_manifest)
assert not map_row.broken
manifest_row = map_row.manifest
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert legacy_image == tag_v50.image
manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
assert len(manifest_blobs) == 1
assert manifest_blobs[0].blob.content_checksum == tag_v30.image.storage.content_checksum
def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initialized_db):
""" Tests that a manifest whose image is mislinked will attempt to have its storages relinked
properly. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
repo = model.repository.get_repository('devtable', 'complex')
tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')
# Add a mislinked manifest, by having its layer point to an invalid blob but its image
# be the v5.0 image.
builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
builder.add_layer('sha256:deadbeef', '{"id": "foo"}')
manifest = builder.build(docker_v2_signing_key)
broken_manifest = TagManifest.create(json_data=manifest.bytes.as_encoded_str(),
digest=manifest.digest,
tag=tag_v50)
# Backfill the manifest and ensure it is marked as broken.
assert _backfill_manifest(broken_manifest)
map_row = TagManifestToManifest.get(tag_manifest=broken_manifest)
assert map_row.broken
manifest_row = map_row.manifest
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert legacy_image == tag_v50.image
manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
assert len(manifest_blobs) == 0
def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db):
""" Tests that a manifest with a shared digest will be properly linked. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
repo = model.repository.get_repository('devtable', 'gargantuan')
tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0')
tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')
# Build a manifest and assign it to both tags (this is allowed in the old model).
builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
builder.add_layer('sha256:deadbeef', '{"id": "foo"}')
manifest = builder.build(docker_v2_signing_key)
manifest_1 = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest,
tag=tag_v30)
manifest_2 = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest,
tag=tag_v50)
# Backfill "both" manifests and ensure both are pointed to by a single resulting row.
assert _backfill_manifest(manifest_1)
assert _backfill_manifest(manifest_2)
map_row1 = TagManifestToManifest.get(tag_manifest=manifest_1)
map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2)
assert map_row1.manifest == map_row2.manifest
def test_manifest_backfill_broken_tag(clear_rows, initialized_db):
""" Tests backfilling a broken tag. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
# Create a tag with an image referenced missing parent images.
repo = model.repository.get_repository('devtable', 'gargantuan')
broken_image = Image.create(docker_image_id='foo', repository=repo, ancestors='/348723847234/',
storage=ImageStorage.get())
broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name='broken')
# Backfill the tag.
assert backfill_tag(broken_image_tag)
# Ensure we backfilled, even though we reference a broken manifest.
tag_manifest = TagManifest.get(tag=broken_image_tag)
map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
manifest = map_row.manifest
assert manifest.manifest_bytes == tag_manifest.json_data
tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag
assert tag.name == 'broken'
assert tag.manifest == manifest

139
workers/worker.py Normal file
View file

@ -0,0 +1,139 @@
import logging
import signal
import sys
import socket
import time
from datetime import datetime, timedelta
from functools import wraps
from random import randint
from threading import Event
from apscheduler.schedulers.background import BackgroundScheduler
from raven import Client
from app import app
from data.database import UseThenDisconnect
from util.log import logfile_path
logger = logging.getLogger(__name__)
def with_exponential_backoff(backoff_multiplier=10, max_backoff=3600, max_retries=10):
def inner(func):
""" Decorator to retry the operation with exponential backoff if it raised an exception.
Waits 2^attempts * `backoff_multiplier`, up to `max_backoff`, up to `max_retries` number of time,
then re-raise the exception.
"""
def wrapper(*args, **kwargs):
attempts = 0
backoff = 0
while True:
next_backoff = 2**attempts * backoff_multiplier
backoff = min(next_backoff, max_backoff)
attempts += 1
try:
return func(*args, **kwargs)
except Exception as e:
if max_retries is not None and attempts == max_retries:
raise e
logger.exception('Operation raised exception, retrying in %d seconds', backoff)
time.sleep(backoff)
return wrapper
return inner
class Worker(object):
""" Base class for workers which perform some work periodically. """
def __init__(self):
self._sched = BackgroundScheduler()
self._operations = []
self._stop = Event()
self._terminated = Event()
self._raven_client = None
if app.config.get('EXCEPTION_LOG_TYPE', 'FakeSentry') == 'Sentry':
worker_name = '%s:worker-%s' % (socket.gethostname(), self.__class__.__name__)
self._raven_client = Client(app.config.get('SENTRY_DSN', ''), name=worker_name)
def is_healthy(self):
return not self._stop.is_set()
def is_terminated(self):
return self._terminated.is_set()
def ungracefully_terminated(self):
""" Method called when the worker has been terminated in an ungraceful fashion. """
pass
def add_operation(self, operation_func, operation_sec):
@wraps(operation_func)
def _operation_func():
try:
with UseThenDisconnect(app.config):
return operation_func()
except Exception:
logger.exception('Operation raised exception')
if self._raven_client:
logger.debug('Logging exception to Sentry')
self._raven_client.captureException()
self._operations.append((_operation_func, operation_sec))
def _setup_and_wait_for_shutdown(self):
signal.signal(signal.SIGTERM, self.terminate)
signal.signal(signal.SIGINT, self.terminate)
while not self._stop.wait(1):
pass
def start(self):
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not app.config.get('SETUP_COMPLETE', False):
logger.info('Product setup is not yet complete; skipping worker startup')
self._setup_and_wait_for_shutdown()
return
if app.config.get('REGISTRY_STATE', 'normal') == 'readonly':
logger.info('Product is in read-only mode; skipping worker startup')
self._setup_and_wait_for_shutdown()
return
logger.debug('Scheduling worker.')
self._sched.start()
for operation_func, operation_sec in self._operations:
start_date = datetime.now() + timedelta(seconds=0.001)
if app.config.get('STAGGER_WORKERS'):
start_date += timedelta(seconds=randint(1, operation_sec))
logger.debug('First run scheduled for %s', start_date)
self._sched.add_job(operation_func, 'interval', seconds=operation_sec,
start_date=start_date, max_instances=1)
self._setup_and_wait_for_shutdown()
logger.debug('Waiting for running tasks to complete.')
self._sched.shutdown()
logger.debug('Finished.')
self._terminated.set()
def terminate(self, signal_num=None, stack_frame=None, graceful=False):
if self._terminated.is_set():
sys.exit(1)
else:
logger.debug('Shutting down worker.')
self._stop.set()
if not graceful:
self.ungracefully_terminated()
def join(self):
self.terminate(graceful=True)