c6f6204630
This change adjusts our batch size to coerce to integer after all floating point math in order to get a more accurate end result. In addition, we handle the scenario when there are no longer any images in the database to be scanned when finding the min id.
111 lines
3.7 KiB
Python
111 lines
3.7 KiB
Python
import logging.config
|
|
import time
|
|
|
|
from math import log10
|
|
|
|
import features
|
|
|
|
from app import app, secscan_api, prometheus
|
|
from workers.worker import Worker
|
|
from data.database import UseThenDisconnect
|
|
from data.model.image import (get_images_eligible_for_scan, get_image_pk_field,
|
|
get_max_id_for_sec_scan, get_min_id_for_sec_scan)
|
|
from util.secscan.api import SecurityConfigValidator
|
|
from util.secscan.analyzer import LayerAnalyzer, PreemptedException
|
|
from util.migrate.allocator import yield_random_entries
|
|
from endpoints.v2 import v2_bp
|
|
|
|
|
|
DEFAULT_INDEXING_INTERVAL = 30
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
unscanned_images_gauge = prometheus.create_gauge('unscanned_images',
|
|
'Number of images that clair needs to scan.')
|
|
max_unscanned_images_gauge = prometheus.create_gauge('max_unscanned_image_id',
|
|
'Max ID of the unscanned images.')
|
|
|
|
class SecurityWorker(Worker):
|
|
def __init__(self):
|
|
super(SecurityWorker, self).__init__()
|
|
validator = SecurityConfigValidator(app.config)
|
|
if not validator.valid():
|
|
logger.warning('Failed to validate security scan configuration')
|
|
return
|
|
|
|
self._target_version = app.config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 3)
|
|
self._analyzer = LayerAnalyzer(app.config, secscan_api)
|
|
self._min_id = None
|
|
|
|
interval = app.config.get('SECURITY_SCANNER_INDEXING_INTERVAL', DEFAULT_INDEXING_INTERVAL)
|
|
self.add_operation(self._index_images, interval)
|
|
|
|
def _index_images(self):
|
|
def batch_query():
|
|
return get_images_eligible_for_scan(self._target_version)
|
|
|
|
# Get the ID of the last image we can analyze. Will be None if there are no images in the
|
|
# database.
|
|
max_id = get_max_id_for_sec_scan()
|
|
if max_id is None:
|
|
return
|
|
|
|
if self.min_id is None or self.min_id > max_id:
|
|
logger.info('Could not find any available images for scanning.')
|
|
return
|
|
|
|
max_unscanned_images_gauge.Set(max_id)
|
|
|
|
# 4^log10(total) gives us a scalable batch size into the billions.
|
|
batch_size = int(4 ** log10(max(10, max_id - self.min_id)))
|
|
|
|
with UseThenDisconnect(app.config):
|
|
to_scan_generator = yield_random_entries(
|
|
batch_query,
|
|
get_image_pk_field(),
|
|
batch_size,
|
|
max_id,
|
|
self.min_id,
|
|
)
|
|
for candidate, abt, num_remaining in to_scan_generator:
|
|
try:
|
|
self._analyzer.analyze_recursively(candidate)
|
|
except PreemptedException:
|
|
logger.info('Another worker pre-empted us for layer: %s', candidate.id)
|
|
abt.set()
|
|
|
|
unscanned_images_gauge.Set(num_remaining)
|
|
|
|
# If we reach this point, we analyzed every images up to max_id, next time the worker runs,
|
|
# we want to start from the next image.
|
|
self.min_id = max_id + 1
|
|
|
|
|
|
@property
|
|
def min_id(self):
|
|
""" If it hasn't already been determined, finds the ID of the first image to be analyzed.
|
|
First checks the config, then the database, and returns None if there are no images
|
|
available for scanning.
|
|
"""
|
|
if self._min_id is None:
|
|
self._min_id = app.config.get('SECURITY_SCANNER_INDEXING_MIN_ID')
|
|
if self._min_id is None:
|
|
self._min_id = get_min_id_for_sec_scan(self._target_version)
|
|
return self._min_id
|
|
|
|
@min_id.setter
|
|
def min_id(self, new_min_id):
|
|
self._min_id = new_min_id
|
|
|
|
|
|
if __name__ == '__main__':
|
|
app.register_blueprint(v2_bp, url_prefix='/v2')
|
|
|
|
if not features.SECURITY_SCANNER:
|
|
logger.debug('Security scanner disabled; skipping SecurityWorker')
|
|
while True:
|
|
time.sleep(100000)
|
|
|
|
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
|
|
worker = SecurityWorker()
|
|
worker.start()
|