Merge pull request #2424 from jzelinskie/qss-image

workers.securityworker: revert to image querying
This commit is contained in:
Jimmy Zelinskie 2017-03-10 17:38:02 -05:00 committed by GitHub
commit 123d003d4e
2 changed files with 15 additions and 10 deletions

View file

@ -490,7 +490,7 @@ def total_image_count():
return Image.select().count() return Image.select().count()
def get_image_id(): def get_image_pk_field():
""" Returns the primary key for Image DB model """ """ Returns the primary key for Image DB model """
return Image.id return Image.id

View file

@ -1,12 +1,14 @@
import logging.config import logging.config
import time import time
from math import floor, log10
import features import features
from app import app, secscan_api, prometheus from app import app, secscan_api, prometheus
from workers.worker import Worker from workers.worker import Worker
from data.database import UseThenDisconnect from data.database import UseThenDisconnect
from data.model.tag import (get_tags_images_eligible_for_scan, get_tag_pk_field, from data.model.image import (get_images_eligible_for_scan, get_image_pk_field,
get_max_id_for_sec_scan, get_min_id_for_sec_scan) get_max_id_for_sec_scan, get_min_id_for_sec_scan)
from util.secscan.api import SecurityConfigValidator from util.secscan.api import SecurityConfigValidator
from util.secscan.analyzer import LayerAnalyzer, PreemptedException from util.secscan.analyzer import LayerAnalyzer, PreemptedException
@ -14,7 +16,6 @@ from util.migrate.allocator import yield_random_entries
from endpoints.v2 import v2_bp from endpoints.v2 import v2_bp
BATCH_SIZE = 50
DEFAULT_INDEXING_INTERVAL = 30 DEFAULT_INDEXING_INTERVAL = 30
@ -33,8 +34,9 @@ class SecurityWorker(Worker):
self._analyzer = LayerAnalyzer(app.config, secscan_api) self._analyzer = LayerAnalyzer(app.config, secscan_api)
# Get the ID of the first image we want to analyze. # Get the ID of the first image we want to analyze.
self._min_id = app.config.get('SECURITY_SCANNER_INDEXING_MIN_ID', self._min_id = app.config.get('SECURITY_SCANNER_INDEXING_MIN_ID')
get_min_id_for_sec_scan(self._target_version)) if self._min_id is None:
self._min_id = get_min_id_for_sec_scan(self._target_version)
interval = app.config.get('SECURITY_SCANNER_INDEXING_INTERVAL', DEFAULT_INDEXING_INTERVAL) interval = app.config.get('SECURITY_SCANNER_INDEXING_INTERVAL', DEFAULT_INDEXING_INTERVAL)
self.add_operation(self._index_images, interval) self.add_operation(self._index_images, interval)
@ -43,7 +45,7 @@ class SecurityWorker(Worker):
def _index_images(self): def _index_images(self):
def batch_query(): def batch_query():
return get_tags_images_eligible_for_scan(self._target_version) return get_images_eligible_for_scan(self._target_version)
# Get the ID of the last image we can analyze. Will be None if there are no images in the # Get the ID of the last image we can analyze. Will be None if there are no images in the
# database. # database.
@ -53,17 +55,20 @@ class SecurityWorker(Worker):
max_unscanned_images_gauge.Set(max_id) max_unscanned_images_gauge.Set(max_id)
# 4^log10(total) gives us a scalable batch size into the billions.
batch_size = 4 ** int(floor(log10(max(10, max_id - self._min_id))))
with UseThenDisconnect(app.config): with UseThenDisconnect(app.config):
to_scan_generator = yield_random_entries( to_scan_generator = yield_random_entries(
batch_query, batch_query,
get_tag_pk_field(), get_image_pk_field(),
BATCH_SIZE, batch_size,
max_id, max_id,
self._min_id, self._min_id,
) )
for candidate, abt, num_remaining in to_scan_generator: for candidate, abt, num_remaining in to_scan_generator:
try: try:
self._analyzer.analyze_recursively(candidate.image) self._analyzer.analyze_recursively(candidate)
except PreemptedException: except PreemptedException:
logger.info('Another worker pre-empted us for layer: %s', candidate.id) logger.info('Another worker pre-empted us for layer: %s', candidate.id)
abt.set() abt.set()