Add support for Quay's vulnerability tool

This commit is contained in:
Quentin Machu 2015-10-05 13:35:01 -04:00 committed by Jimmy Zelinskie
parent 3d0bcbaaeb
commit f59e35cc81
11 changed files with 409 additions and 0 deletions

View file

@ -0,0 +1,2 @@
#!/bin/sh
exec logger -i -t securityworker

View file

@ -0,0 +1,8 @@
#! /bin/bash
echo 'Starting security scanner worker'
cd /
venv/bin/python -m workers.securityworker 2>&1
echo 'Security scanner worker exited'

View file

@ -573,12 +573,18 @@ class Image(BaseModel):
v1_json_metadata = TextField(null=True)
v1_checksum = CharField(null=True)
security_indexed = BooleanField(default=False)
security_indexed_engine = IntegerField(default=-1)
parent = ForeignKeyField('self', index=True, null=True, related_name='children')
class Meta:
database = db
read_slaves = (read_slave,)
indexes = (
# we don't really want duplicates
(('repository', 'docker_image_id'), True),
(('security_indexed_engine', 'security_indexed'), False),
)

View file

@ -0,0 +1,21 @@
"""backfill parent ids and checksums
Revision ID: 2fb9492c20cc
Revises: 57dad559ff2d
Create Date: 2015-07-14 17:38:47.397963
"""
# revision identifiers, used by Alembic.
revision = '2fb9492c20cc'
down_revision = '57dad559ff2d'
from alembic import op
import sqlalchemy as sa
from util.migrate.backfill_parent_id import backfill_parent_id
from util.migrate.backfill_checksums import backfill_checksums
def upgrade(tables):
backfill_parent_id()
backfill_checksums()
def downgrade(tables):
pass

View file

@ -0,0 +1,32 @@
"""add support for quay's security indexer
Revision ID: 57dad559ff2d
Revises: 154f2befdfbe
Create Date: 2015-07-13 16:51:41.669249
"""
# revision identifiers, used by Alembic.
revision = '57dad559ff2d'
down_revision = '3ff4fbc94644'
from alembic import op
import sqlalchemy as sa
def upgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.add_column('image', sa.Column('parent_id', sa.Integer(), nullable=True))
op.add_column('image', sa.Column('security_indexed', sa.Boolean(), nullable=False))
op.add_column('image', sa.Column('security_indexed_engine', sa.Integer(), nullable=False))
op.create_index('image_parent_id', 'image', ['parent_id'], unique=False)
op.create_foreign_key(op.f('fk_image_parent_id_image'), 'image', 'image', ['parent_id'], ['id'])
### end Alembic commands ###
op.create_index('image_security_indexed_engine_security_indexed', 'image', ['security_indexed_engine', 'security_indexed'])
def downgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(op.f('fk_image_parent_id_image'), 'image', type_='foreignkey')
op.drop_index('image_parent_id', table_name='image')
op.drop_column('image', 'security_indexed')
op.drop_column('image', 'security_indexed_engine')
op.drop_column('image', 'parent_id')
### end Alembic commands ###
op.drop_index('image_security_indexed', 'image')

View file

@ -303,6 +303,7 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
if parent:
fetched.ancestors = '%s%s/' % (parent.ancestors, parent.id)
fetched.parent = parent
fetched.save()
fetched.storage.save()

View file

@ -5,6 +5,7 @@ import random
import calendar
import os
from sys import maxsize
from datetime import datetime, timedelta
from peewee import (SqliteDatabase, create_model_tables, drop_model_tables, savepoint_sqlite,
savepoint)
@ -94,6 +95,10 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map):
for path_builder in paths:
path = path_builder(new_image.storage.uuid)
store.put_content('local_us', path, checksum)
new_image.security_indexed = False
new_image.security_indexed_engine = maxsize
new_image.save()
creation_time = REFERENCE_DATE + timedelta(weeks=image_num) + timedelta(days=model_num)
command_list = SAMPLE_CMDS[image_num % len(SAMPLE_CMDS)]

Binary file not shown.

View file

@ -0,0 +1,67 @@
import logging
from app import storage as store
from data.database import ImageStorage, ImageStoragePlacement, ImageStorageLocation, JOIN_LEFT_OUTER
from digest import checksums
logger = logging.getLogger(__name__)
def _get_imagestorages_with_locations(query_modifier):
query = (ImageStoragePlacement
.select(ImageStoragePlacement, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER))
query = query_modifier(query)
location_list = list(query)
storages = {}
for location in location_list:
storage = location.storage
if not storage.id in storages:
storages[storage.id] = storage
storage.locations = set()
else:
storage = storages[storage.id]
storage.locations.add(location.location.name)
return storages.values()
def backfill_checksum(imagestorage_with_locations):
try:
json_data = store.get_content(imagestorage_with_locations.locations, store.image_json_path(imagestorage_with_locations.uuid))
with store.stream_read_file(imagestorage_with_locations.locations, store.image_layer_path(imagestorage_with_locations.uuid)) as fp:
imagestorage_with_locations.checksum = 'sha256:{0}'.format(checksums.sha256_file(fp, json_data + '\n'))
imagestorage_with_locations.save()
except IOError as e:
if str(e).startswith("No such key"):
imagestorage_with_locations.checksum = 'unknown:{0}'.format(imagestorage_with_locations.uuid)
imagestorage_with_locations.save()
except:
logger.exception('exception when backfilling checksum of %s', imagestorage_with_locations.uuid)
def backfill_checksums():
logger.setLevel(logging.DEBUG)
logger.debug('backfill_checksums: Starting')
logger.debug('backfill_checksums: This can be a LONG RUNNING OPERATION. Please wait!')
def limit_to_empty_checksum(query):
return query.where(ImageStorage.checksum >> None, ImageStorage.uploading == False).limit(100)
while True:
storages = _get_imagestorages_with_locations(limit_to_empty_checksum)
if len(storages) == 0:
logger.debug('backfill_checksums: Completed')
return
for storage in storages:
backfill_checksum(storage)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
logging.getLogger('boto').setLevel(logging.CRITICAL)
backfill_checksums()

View file

@ -0,0 +1,49 @@
import logging
from data.database import Image, ImageStorage, db
from app import app
logger = logging.getLogger(__name__)
def backfill_parent_id():
logger.setLevel(logging.DEBUG)
logger.debug('backfill_parent_id: Starting')
logger.debug('backfill_parent_id: This can be a LONG RUNNING OPERATION. Please wait!')
# Check for any images without parent
has_images = bool(list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.parent >> None, Image.ancestors != '/', ImageStorage.uploading == False)
.limit(1)))
if not has_images:
logger.debug('backfill_parent_id: No migration needed')
return
while True:
# Load the record from the DB.
batch_images_ids = list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.parent >> None, Image.ancestors != '/', ImageStorage.uploading == False)
.limit(100))
if len(batch_images_ids) == 0:
logger.debug('backfill_parent_id: Completed')
return
for image_id in batch_images_ids:
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = Image.select(Image.id, Image.ancestors).where(Image.id == image_id).get()
image.parent = image.ancestors.split('/')[-2]
image.save()
except Image.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_parent_id()

218
workers/securityworker.py Normal file
View file

@ -0,0 +1,218 @@
import logging
import requests
import features
import time
import os
import random
from sys import exc_info
from peewee import JOIN_LEFT_OUTER
from app import app, storage, OVERRIDE_CONFIG_DIRECTORY
from workers.worker import Worker
from data.database import Image, ImageStorage, ImageStorageLocation, ImageStoragePlacement, db_random_func, UseThenDisconnect
logger = logging.getLogger(__name__)
BATCH_SIZE = 20
INDEXING_INTERVAL = 10
API_METHOD_INSERT = '/layers'
API_METHOD_VERSION = '/versions/engine'
def _get_image_to_export(version):
Parent = Image.alias()
ParentImageStorage = ImageStorage.alias()
rimages = []
# Without parent
candidates = (Image
.select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum)
.join(ImageStorage)
.where(Image.security_indexed_engine < version, Image.parent >> None, ImageStorage.uploading == False, ImageStorage.checksum != '')
.limit(BATCH_SIZE*10)
.alias('candidates'))
images = (Image
.select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum)
.distinct()
.from_(candidates)
.order_by(db_random_func())
.tuples()
.limit(BATCH_SIZE))
for image in images:
rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': None, 'parent_storage_uuid': None})
# With analyzed parent
candidates = (Image
.select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum, Parent.docker_image_id.alias('parent_docker_image_id'), ParentImageStorage.uuid.alias('parent_storage_uuid'))
.join(Parent, on=(Image.parent == Parent.id))
.join(ParentImageStorage, on=(ParentImageStorage.id == Parent.storage))
.switch(Image)
.join(ImageStorage)
.where(Image.security_indexed_engine < version, Parent.security_indexed == True, Parent.security_indexed_engine >= version, ImageStorage.uploading == False, ImageStorage.checksum != '')
.limit(BATCH_SIZE*10)
.alias('candidates'))
images = (Image
.select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum, candidates.c.parent_docker_image_id, candidates.c.parent_storage_uuid)
.distinct()
.from_(candidates)
.order_by(db_random_func())
.tuples()
.limit(BATCH_SIZE))
for image in images:
rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': None, 'parent_storage_uuid': None})
# Re-shuffle, otherwise the images without parents will always be on the top
random.shuffle(rimages)
return rimages
def _get_storage_locations(uuid):
query = (ImageStoragePlacement
.select()
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.where(ImageStorage.uuid == uuid))
locations = list()
for location in query:
locations.append(location.location.name)
return locations
def _update_image(image, indexed, version):
query = (Image
.select()
.join(ImageStorage)
.where(Image.docker_image_id == image['docker_image_id'], ImageStorage.uuid == image['storage_uuid']))
updated_images = list()
for image in query:
updated_images.append(image.id)
query = (Image
.update(security_indexed=indexed, security_indexed_engine=version)
.where(Image.id << updated_images))
query.execute()
class SecurityWorker(Worker):
def __init__(self):
super(SecurityWorker, self).__init__()
if self._load_configuration():
self.add_operation(self._index_images, INDEXING_INTERVAL)
def _load_configuration(self):
# Load configuration
config = app.config.get('SECURITY_SCANNER')
if not config or not 'ENDPOINT' in config or not 'ENGINE_VERSION_TARGET' in config or not 'DISTRIBUTED_STORAGE_PREFERENCE' in app.config:
logger.exception('No configuration found for the security worker')
return False
self._api = config['ENDPOINT']
self._target_version = config['ENGINE_VERSION_TARGET']
self._default_storage_locations = app.config['DISTRIBUTED_STORAGE_PREFERENCE']
self._ca_verification = False
self._cert = None
if 'CA_CERTIFICATE_FILENAME' in config:
self._ca_verification = os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['CA_CERTIFICATE_FILENAME'])
if not os.path.isfile(self._ca_verification):
logger.exception('Could not find configured CA file')
return False
if 'PRIVATE_KEY_FILENAME' in config and 'PUBLIC_KEY_FILENAME' in config:
self._cert = (
os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['PUBLIC_KEY_FILENAME']),
os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['PRIVATE_KEY_FILENAME']),
)
if not os.path.isfile(self._cert[0]) or not os.path.isfile(self._cert[1]):
logger.exception('Could not find configured key pair files')
return False
return True
def _index_images(self):
with UseThenDisconnect(app.config):
while True:
# Get images to analyze
try:
images = _get_image_to_export(self._target_version)
except Image.DoesNotExist:
logger.debug('No more image to analyze')
return
for img in images:
# Get layer storage URL
path = storage.image_layer_path(img['storage_uuid'])
locations = self._default_storage_locations
if not storage.exists(locations, path):
locations = _get_storage_locations(img['storage_uuid'])
if not storage.exists(locations, path):
logger.warning('Could not find a valid location to download layer %s', img['docker_image_id']+'.'+img['storage_uuid'])
# Mark as analyzed because that error is most likely to occur during the pre-process, with the database copy
# when images are actually removed on the real database (and therefore in S3)
_update_image(img, False, self._target_version)
continue
uri = storage.get_direct_download_url(locations, path)
if uri == None:
# Local storage hack
uri = path
# Forge request
request = {
'ID': img['docker_image_id']+'.'+img['storage_uuid'],
'TarSum': img['storage_checksum'],
'Path': uri
}
if img['parent_docker_image_id'] is not None:
request['ParentID'] = img['parent_docker_image_id']+'.'+img['parent_storage_uuid']
# Post request
try:
logger.info('Analyzing %s', request['ID'])
# Using invalid certificates doesn't return proper errors because of
# https://github.com/shazow/urllib3/issues/556
httpResponse = requests.post(self._api + API_METHOD_INSERT, json=request, cert=self._cert, verify=self._ca_verification)
except:
logger.exception('An exception occurred when analyzing layer ID %s : %s', request['ID'], exc_info()[0])
return
try:
jsonResponse = httpResponse.json()
except:
logger.exception('An exception occurred when analyzing layer ID %s : the response is not valid JSON (%s)', request['ID'], httpResponse.text)
return
if httpResponse.status_code == 201:
# The layer has been successfully indexed
api_version = jsonResponse['Version']
if api_version < self._target_version:
logger.warning('An engine runs on version %d but the target version is %d')
_update_image(img, True, api_version)
logger.info('Layer ID %s : analyzed successfully', request['ID'])
else:
if 'Message' in jsonResponse:
if 'OS and/or package manager are not supported' in jsonResponse['Message']:
# The current engine could not index this layer
logger.warning('A warning event occurred when analyzing layer ID %s : %s', request['ID'], jsonResponse['Message'])
# Hopefully, there is no version lower than the target one running
_update_image(img, False, self._target_version)
else:
logger.exception('An exception occurred when analyzing layer ID %s : %d %s', request['ID'], httpResponse.status_code, jsonResponse['Message'])
return
else:
logger.exception('An exception occurred when analyzing layer ID %s : %d', request['ID'], httpResponse.status_code)
return
if __name__ == '__main__':
logging.getLogger('requests').setLevel(logging.WARNING)
logging.getLogger('apscheduler').setLevel(logging.CRITICAL)
if not features.SECURITY_SCANNER:
logger.debug('Security scanner disabled; skipping')
while True:
time.sleep(100000)
worker = SecurityWorker()
worker.start()