From b26a131085230badad7ab4fc91495fedbb2f986d Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 3 May 2018 11:57:20 +0300 Subject: [PATCH] Fix worker count to use CPU affinity correctly and be properly bounded We were using the `cpu_count`, which doesn't respect container affinity. Now, we use `cpu_affinity` and also bound to make sure we don't start a million workers Fixes https://jira.coreos.com/browse/QUAY-928 --- conf/gunicorn_local.py | 4 +-- conf/gunicorn_registry.py | 4 +-- conf/gunicorn_secscan.py | 4 +-- conf/gunicorn_verbs.py | 4 +-- conf/gunicorn_web.py | 4 +-- util/test/test_workers.py | 74 +++++++++++++++++++++++++++++++++++++++ util/workers.py | 31 ++++++++++++++++ 7 files changed, 115 insertions(+), 10 deletions(-) create mode 100644 util/test/test_workers.py create mode 100644 util/workers.py diff --git a/conf/gunicorn_local.py b/conf/gunicorn_local.py index 3c581818b..b33558ef2 100644 --- a/conf/gunicorn_local.py +++ b/conf/gunicorn_local.py @@ -2,16 +2,16 @@ import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), "../")) -import multiprocessing import logging from Crypto import Random from util.log import logfile_path +from util.workers import get_worker_count logconfig = logfile_path(debug=True) bind = '0.0.0.0:5000' -workers = max(int(os.getenv("WORKER_COUNT_LOCAL", multiprocessing.cpu_count())), 2) +workers = get_worker_count('local', 2, minimum=2, maximum=8) worker_class = 'gevent' daemon = False pythonpath = '.' diff --git a/conf/gunicorn_registry.py b/conf/gunicorn_registry.py index 508d96aa3..23590ba45 100644 --- a/conf/gunicorn_registry.py +++ b/conf/gunicorn_registry.py @@ -2,16 +2,16 @@ import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), "../")) -import multiprocessing import logging from Crypto import Random from util.log import logfile_path +from util.workers import get_worker_count logconfig = logfile_path(debug=False) bind = 'unix:/tmp/gunicorn_registry.sock' -workers = max(int(os.getenv("WORKER_COUNT_REGISTRY", multiprocessing.cpu_count() * 4)), 8) +workers = get_worker_count('registry', 4, minimum=8, maximum=64) worker_class = 'gevent' pythonpath = '.' preload_app = True diff --git a/conf/gunicorn_secscan.py b/conf/gunicorn_secscan.py index fe7d8f14f..daea39c38 100644 --- a/conf/gunicorn_secscan.py +++ b/conf/gunicorn_secscan.py @@ -2,16 +2,16 @@ import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), "../")) -import multiprocessing import logging from Crypto import Random from util.log import logfile_path +from util.workers import get_worker_count logconfig = logfile_path(debug=False) bind = 'unix:/tmp/gunicorn_secscan.sock' -workers = max(int(os.getenv("WORKER_COUNT_SECSCAN", multiprocessing.cpu_count())), 2) +workers = get_worker_count('secscan', 2, minimum=2, maximum=4) worker_class = 'gevent' pythonpath = '.' preload_app = True diff --git a/conf/gunicorn_verbs.py b/conf/gunicorn_verbs.py index 5135abdcf..9502f7563 100644 --- a/conf/gunicorn_verbs.py +++ b/conf/gunicorn_verbs.py @@ -2,16 +2,16 @@ import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), "../")) -import multiprocessing import logging from Crypto import Random from util.log import logfile_path +from util.workers import get_worker_count logconfig = logfile_path(debug=False) bind = 'unix:/tmp/gunicorn_verbs.sock' -workers = max(int(os.getenv("WORKER_COUNT_VERBS", multiprocessing.cpu_count())), 2) +workers = get_worker_count('verbs', 2, minimum=2, maximum=32) pythonpath = '.' preload_app = True timeout = 2000 # Because sync workers diff --git a/conf/gunicorn_web.py b/conf/gunicorn_web.py index 2c546f29f..8bd1abaa0 100644 --- a/conf/gunicorn_web.py +++ b/conf/gunicorn_web.py @@ -2,17 +2,17 @@ import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), "../")) -import multiprocessing import logging from Crypto import Random from util.log import logfile_path +from util.workers import get_worker_count logconfig = logfile_path(debug=False) bind = 'unix:/tmp/gunicorn_web.sock' -workers = max(int(os.getenv("WORKER_COUNT_WEB", multiprocessing.cpu_count())), 2) +workers = get_worker_count('web', 2, minimum=2, maximum=32) worker_class = 'gevent' pythonpath = '.' preload_app = True diff --git a/util/test/test_workers.py b/util/test/test_workers.py new file mode 100644 index 000000000..7df327cf7 --- /dev/null +++ b/util/test/test_workers.py @@ -0,0 +1,74 @@ +from mock import patch + +import pytest + +from util.workers import get_worker_count + +@pytest.mark.parametrize('kind_name,env_vars,cpu_affinity,multiplier,minimum,maximum,expected', [ + # No override and CPU affinity * multiplier is between min and max => cpu affinity * multiplier. + ('registry', {}, [0, 1], 10, 8, 64, 20), + + # No override and CPU affinity * multiplier is below min => min. + ('registry', {}, [0], 1, 8, 64, 8), + + # No override and CPU affinity * multiplier is above max => max. + ('registry', {}, [0, 1, 2, 3], 20, 8, 64, 64), + + # Override based on specific env var. + ('registry', { + 'WORKER_COUNT_REGISTRY': 12, + }, [0, 1], 10, 8, 64, 12), + + # Override based on specific env var (ignores maximum). + ('registry', { + 'WORKER_COUNT_REGISTRY': 120, + }, [0, 1], 10, 8, 64, 120), + + # Override based on specific env var (respects minimum). + ('registry', { + 'WORKER_COUNT_REGISTRY': 1, + }, [0, 1], 10, 8, 64, 8), + + # Override based on generic env var. + ('registry', { + 'WORKER_COUNT': 12, + }, [0, 1], 10, 8, 64, 12), + + # Override based on generic env var (ignores maximum). + ('registry', { + 'WORKER_COUNT': 120, + }, [0, 1], 10, 8, 64, 120), + + # Override based on generic env var (respects minimum). + ('registry', { + 'WORKER_COUNT': 1, + }, [0, 1], 10, 8, 64, 8), + + # Override always uses specific first. + ('registry', { + 'WORKER_COUNT_REGISTRY': 120, + 'WORKER_COUNT': 12, + }, [0, 1], 10, 8, 64, 120), + + # Non-matching override. + ('verbs', { + 'WORKER_COUNT_REGISTRY': 120, + }, [0, 1], 10, 8, 64, 20), + + # Zero worker count (use defaults). + ('verbs', { + 'WORKER_COUNT': 0, + }, [0, 1], 10, 8, 64, 8), +]) +def test_get_worker_count(kind_name, env_vars, cpu_affinity, multiplier, minimum, maximum, + expected): + class FakeProcess(object): + def __init__(self, pid): + pass + + def cpu_affinity(self): + return cpu_affinity + + with patch('os.environ.get', env_vars.get): + with patch('psutil.Process', FakeProcess): + assert get_worker_count(kind_name, multiplier, minimum, maximum) == expected diff --git a/util/workers.py b/util/workers.py new file mode 100644 index 000000000..ca29ff0fd --- /dev/null +++ b/util/workers.py @@ -0,0 +1,31 @@ +import os +import psutil + +def get_worker_count(worker_kind_name, multiplier, minimum=None, maximum=None): + """ Returns the number of gunicorn workers to run for the given worker kind, + based on a combination of environment variable, multiplier, minimum (if any), + and number of accessible CPU cores. + """ + minimum = minimum or multiplier + maximum = maximum or (multiplier * multiplier) + + # Check for an override via an environment variable. + override_value = os.environ.get('WORKER_COUNT_' + worker_kind_name.upper()) + if override_value is not None: + return max(override_value, minimum) + + override_value = os.environ.get('WORKER_COUNT') + if override_value is not None: + return max(override_value, minimum) + + # Load the number of CPU cores via affinity, and use that to calculate the + # number of workers to run. + p = psutil.Process(os.getpid()) + + try: + cpu_count = len(p.cpu_affinity()) + except AttributeError: + # cpu_affinity isn't supported on this platform. Assume 2. + cpu_count = 2 + + return min(max(cpu_count * multiplier, minimum), maximum)