Add support for Quay's vulnerability tool

This commit is contained in:
Quentin Machu 2015-10-05 13:35:01 -04:00 committed by Jimmy Zelinskie
parent c97bc0d8d0
commit 37118423a5
8 changed files with 135 additions and 32 deletions

View file

@ -577,6 +577,10 @@ class Image(BaseModel):
security_indexed_engine = IntegerField(default=-1)
parent = ForeignKeyField('self', index=True, null=True, related_name='children')
security_indexed = BooleanField(default=False)
security_indexed_engine = IntegerField(default=-1)
parent = ForeignKeyField('self', index=True, null=True, related_name='children')
class Meta:
database = db
read_slaves = (read_slave,)

View file

@ -0,0 +1,21 @@
"""backfill parent ids and checksums
Revision ID: 2fb9492c20cc
Revises: 57dad559ff2d
Create Date: 2015-07-14 17:38:47.397963
"""
# revision identifiers, used by Alembic.
revision = '2fb9492c20cc'
down_revision = '57dad559ff2d'
from alembic import op
import sqlalchemy as sa
from util.migrate.backfill_parent_id import backfill_parent_id
from util.migrate.backfill_checksums import backfill_checksums
def upgrade(tables):
backfill_parent_id()
backfill_checksums()
def downgrade(tables):
pass

View file

@ -1,14 +1,12 @@
"""add support for quay's security indexer
Revision ID: 57dad559ff2d
Revises: 154f2befdfbe
Create Date: 2015-07-13 16:51:41.669249
"""
# revision identifiers, used by Alembic.
revision = '57dad559ff2d'
down_revision = '73669db7e12'
down_revision = '3ff4fbc94644'
from alembic import op
import sqlalchemy as sa
@ -16,19 +14,19 @@ import sqlalchemy as sa
def upgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.add_column('image', sa.Column('parent_id', sa.Integer(), nullable=True))
op.add_column('image', sa.Column('security_indexed', sa.Boolean(), nullable=False, default=False, server_default=sa.sql.expression.false()))
op.add_column('image', sa.Column('security_indexed_engine', sa.Integer(), nullable=False, default=-1, server_default="-1"))
op.add_column('image', sa.Column('security_indexed', sa.Boolean(), nullable=False))
op.add_column('image', sa.Column('security_indexed_engine', sa.Integer(), nullable=False))
op.create_index('image_parent_id', 'image', ['parent_id'], unique=False)
op.create_foreign_key(op.f('fk_image_parent_id_image'), 'image', 'image', ['parent_id'], ['id'])
### end Alembic commands ###
op.create_index('image_security_indexed_engine_security_indexed', 'image', ['security_indexed_engine', 'security_indexed'])
def downgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.drop_index('image_security_indexed_engine_security_indexed', 'image')
### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(op.f('fk_image_parent_id_image'), 'image', type_='foreignkey')
op.drop_index('image_parent_id', table_name='image')
op.drop_column('image', 'security_indexed')
op.drop_column('image', 'security_indexed_engine')
op.drop_column('image', 'parent_id')
### end Alembic commands ###
op.drop_index('image_security_indexed', 'image')

View file

@ -95,6 +95,10 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map):
for path_builder in paths:
path = path_builder(new_image.storage.uuid)
store.put_content('local_us', path, checksum)
new_image.security_indexed = False
new_image.security_indexed_engine = maxsize
new_image.save()
new_image.security_indexed = False
new_image.security_indexed_engine = maxsize

Binary file not shown.

View file

@ -0,0 +1,67 @@
import logging
from app import storage as store
from data.database import ImageStorage, ImageStoragePlacement, ImageStorageLocation, JOIN_LEFT_OUTER
from digest import checksums
logger = logging.getLogger(__name__)
def _get_imagestorages_with_locations(query_modifier):
query = (ImageStoragePlacement
.select(ImageStoragePlacement, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER))
query = query_modifier(query)
location_list = list(query)
storages = {}
for location in location_list:
storage = location.storage
if not storage.id in storages:
storages[storage.id] = storage
storage.locations = set()
else:
storage = storages[storage.id]
storage.locations.add(location.location.name)
return storages.values()
def backfill_checksum(imagestorage_with_locations):
try:
json_data = store.get_content(imagestorage_with_locations.locations, store.image_json_path(imagestorage_with_locations.uuid))
with store.stream_read_file(imagestorage_with_locations.locations, store.image_layer_path(imagestorage_with_locations.uuid)) as fp:
imagestorage_with_locations.checksum = 'sha256:{0}'.format(checksums.sha256_file(fp, json_data + '\n'))
imagestorage_with_locations.save()
except IOError as e:
if str(e).startswith("No such key"):
imagestorage_with_locations.checksum = 'unknown:{0}'.format(imagestorage_with_locations.uuid)
imagestorage_with_locations.save()
except:
logger.exception('exception when backfilling checksum of %s', imagestorage_with_locations.uuid)
def backfill_checksums():
logger.setLevel(logging.DEBUG)
logger.debug('backfill_checksums: Starting')
logger.debug('backfill_checksums: This can be a LONG RUNNING OPERATION. Please wait!')
def limit_to_empty_checksum(query):
return query.where(ImageStorage.checksum >> None, ImageStorage.uploading == False).limit(100)
while True:
storages = _get_imagestorages_with_locations(limit_to_empty_checksum)
if len(storages) == 0:
logger.debug('backfill_checksums: Completed')
return
for storage in storages:
backfill_checksum(storage)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
logging.getLogger('boto').setLevel(logging.CRITICAL)
backfill_checksums()

View file

@ -1,38 +1,46 @@
import logging
from data.database import Image, ImageStorage, db, db_for_update
from data.database import Image, ImageStorage, db
from app import app
from util.migrate import yield_random_entries
logger = logging.getLogger(__name__)
def backfill_parent_id():
logger.setLevel(logging.DEBUG)
logger.debug('backfill_parent_id: Starting')
logger.debug('backfill_parent_id: This can be a LONG RUNNING OPERATION. Please wait!')
def fetch_batch():
return (Image
.select(Image.id, Image.ancestors)
.join(ImageStorage)
.where(Image.parent >> None, Image.ancestors != '/',
ImageStorage.uploading == False))
# Check for any images without parent
has_images = bool(list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.parent >> None, Image.ancestors != '/', ImageStorage.uploading == False)
.limit(1)))
for to_backfill in yield_random_entries(fetch_batch, 10000, 0.3):
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = db_for_update(Image
.select()
.where(Image.id == to_backfill.id)).get()
image.parent = to_backfill.ancestors.split('/')[-2]
image.save()
except Image.DoesNotExist:
pass
if not has_images:
logger.debug('backfill_parent_id: No migration needed')
return
logger.debug('backfill_parent_id: Completed')
while True:
# Load the record from the DB.
batch_images_ids = list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.parent >> None, Image.ancestors != '/', ImageStorage.uploading == False)
.limit(100))
if len(batch_images_ids) == 0:
logger.debug('backfill_parent_id: Completed')
return
for image_id in batch_images_ids:
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = Image.select(Image.id, Image.ancestors).where(Image.id == image_id).get()
image.parent = image.ancestors.split('/')[-2]
image.save()
except Image.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)

View file

@ -33,6 +33,7 @@ def _get_image_to_export(version):
images = (Image
.select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum)
.distinct()
.from_(candidates)
.order_by(db_random_func())
.tuples()
@ -54,13 +55,14 @@ def _get_image_to_export(version):
images = (Image
.select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum, candidates.c.parent_docker_image_id, candidates.c.parent_storage_uuid)
.distinct()
.from_(candidates)
.order_by(db_random_func())
.tuples()
.limit(BATCH_SIZE))
for image in images:
rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': image[3], 'parent_storage_uuid': image[4]})
rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': None, 'parent_storage_uuid': None})
# Re-shuffle, otherwise the images without parents will always be on the top
random.shuffle(rimages)
@ -164,8 +166,7 @@ class SecurityWorker(Worker):
'TarSum': img['storage_checksum'],
'Path': uri
}
if img['parent_docker_image_id'] is not None and img['parent_storage_uuid'] is not None:
if img['parent_docker_image_id'] is not None:
request['ParentID'] = img['parent_docker_image_id']+'.'+img['parent_storage_uuid']
# Post request