Fix worker count to use CPU affinity correctly and be properly bounded
We were using the `cpu_count`, which doesn't respect container affinity. Now, we use `cpu_affinity` and also bound to make sure we don't start a million workers Fixes https://jira.coreos.com/browse/QUAY-928
This commit is contained in:
parent
e80c56e441
commit
b26a131085
7 changed files with 115 additions and 10 deletions
|
@ -2,16 +2,16 @@ import sys
|
||||||
import os
|
import os
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
||||||
|
|
||||||
import multiprocessing
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from Crypto import Random
|
from Crypto import Random
|
||||||
from util.log import logfile_path
|
from util.log import logfile_path
|
||||||
|
from util.workers import get_worker_count
|
||||||
|
|
||||||
|
|
||||||
logconfig = logfile_path(debug=True)
|
logconfig = logfile_path(debug=True)
|
||||||
bind = '0.0.0.0:5000'
|
bind = '0.0.0.0:5000'
|
||||||
workers = max(int(os.getenv("WORKER_COUNT_LOCAL", multiprocessing.cpu_count())), 2)
|
workers = get_worker_count('local', 2, minimum=2, maximum=8)
|
||||||
worker_class = 'gevent'
|
worker_class = 'gevent'
|
||||||
daemon = False
|
daemon = False
|
||||||
pythonpath = '.'
|
pythonpath = '.'
|
||||||
|
|
|
@ -2,16 +2,16 @@ import sys
|
||||||
import os
|
import os
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
||||||
|
|
||||||
import multiprocessing
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from Crypto import Random
|
from Crypto import Random
|
||||||
from util.log import logfile_path
|
from util.log import logfile_path
|
||||||
|
from util.workers import get_worker_count
|
||||||
|
|
||||||
|
|
||||||
logconfig = logfile_path(debug=False)
|
logconfig = logfile_path(debug=False)
|
||||||
bind = 'unix:/tmp/gunicorn_registry.sock'
|
bind = 'unix:/tmp/gunicorn_registry.sock'
|
||||||
workers = max(int(os.getenv("WORKER_COUNT_REGISTRY", multiprocessing.cpu_count() * 4)), 8)
|
workers = get_worker_count('registry', 4, minimum=8, maximum=64)
|
||||||
worker_class = 'gevent'
|
worker_class = 'gevent'
|
||||||
pythonpath = '.'
|
pythonpath = '.'
|
||||||
preload_app = True
|
preload_app = True
|
||||||
|
|
|
@ -2,16 +2,16 @@ import sys
|
||||||
import os
|
import os
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
||||||
|
|
||||||
import multiprocessing
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from Crypto import Random
|
from Crypto import Random
|
||||||
from util.log import logfile_path
|
from util.log import logfile_path
|
||||||
|
from util.workers import get_worker_count
|
||||||
|
|
||||||
|
|
||||||
logconfig = logfile_path(debug=False)
|
logconfig = logfile_path(debug=False)
|
||||||
bind = 'unix:/tmp/gunicorn_secscan.sock'
|
bind = 'unix:/tmp/gunicorn_secscan.sock'
|
||||||
workers = max(int(os.getenv("WORKER_COUNT_SECSCAN", multiprocessing.cpu_count())), 2)
|
workers = get_worker_count('secscan', 2, minimum=2, maximum=4)
|
||||||
worker_class = 'gevent'
|
worker_class = 'gevent'
|
||||||
pythonpath = '.'
|
pythonpath = '.'
|
||||||
preload_app = True
|
preload_app = True
|
||||||
|
|
|
@ -2,16 +2,16 @@ import sys
|
||||||
import os
|
import os
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
||||||
|
|
||||||
import multiprocessing
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from Crypto import Random
|
from Crypto import Random
|
||||||
from util.log import logfile_path
|
from util.log import logfile_path
|
||||||
|
from util.workers import get_worker_count
|
||||||
|
|
||||||
logconfig = logfile_path(debug=False)
|
logconfig = logfile_path(debug=False)
|
||||||
|
|
||||||
bind = 'unix:/tmp/gunicorn_verbs.sock'
|
bind = 'unix:/tmp/gunicorn_verbs.sock'
|
||||||
workers = max(int(os.getenv("WORKER_COUNT_VERBS", multiprocessing.cpu_count())), 2)
|
workers = get_worker_count('verbs', 2, minimum=2, maximum=32)
|
||||||
pythonpath = '.'
|
pythonpath = '.'
|
||||||
preload_app = True
|
preload_app = True
|
||||||
timeout = 2000 # Because sync workers
|
timeout = 2000 # Because sync workers
|
||||||
|
|
|
@ -2,17 +2,17 @@ import sys
|
||||||
import os
|
import os
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
|
||||||
|
|
||||||
import multiprocessing
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from Crypto import Random
|
from Crypto import Random
|
||||||
from util.log import logfile_path
|
from util.log import logfile_path
|
||||||
|
from util.workers import get_worker_count
|
||||||
|
|
||||||
|
|
||||||
logconfig = logfile_path(debug=False)
|
logconfig = logfile_path(debug=False)
|
||||||
|
|
||||||
bind = 'unix:/tmp/gunicorn_web.sock'
|
bind = 'unix:/tmp/gunicorn_web.sock'
|
||||||
workers = max(int(os.getenv("WORKER_COUNT_WEB", multiprocessing.cpu_count())), 2)
|
workers = get_worker_count('web', 2, minimum=2, maximum=32)
|
||||||
worker_class = 'gevent'
|
worker_class = 'gevent'
|
||||||
pythonpath = '.'
|
pythonpath = '.'
|
||||||
preload_app = True
|
preload_app = True
|
||||||
|
|
74
util/test/test_workers.py
Normal file
74
util/test/test_workers.py
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
from mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from util.workers import get_worker_count
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('kind_name,env_vars,cpu_affinity,multiplier,minimum,maximum,expected', [
|
||||||
|
# No override and CPU affinity * multiplier is between min and max => cpu affinity * multiplier.
|
||||||
|
('registry', {}, [0, 1], 10, 8, 64, 20),
|
||||||
|
|
||||||
|
# No override and CPU affinity * multiplier is below min => min.
|
||||||
|
('registry', {}, [0], 1, 8, 64, 8),
|
||||||
|
|
||||||
|
# No override and CPU affinity * multiplier is above max => max.
|
||||||
|
('registry', {}, [0, 1, 2, 3], 20, 8, 64, 64),
|
||||||
|
|
||||||
|
# Override based on specific env var.
|
||||||
|
('registry', {
|
||||||
|
'WORKER_COUNT_REGISTRY': 12,
|
||||||
|
}, [0, 1], 10, 8, 64, 12),
|
||||||
|
|
||||||
|
# Override based on specific env var (ignores maximum).
|
||||||
|
('registry', {
|
||||||
|
'WORKER_COUNT_REGISTRY': 120,
|
||||||
|
}, [0, 1], 10, 8, 64, 120),
|
||||||
|
|
||||||
|
# Override based on specific env var (respects minimum).
|
||||||
|
('registry', {
|
||||||
|
'WORKER_COUNT_REGISTRY': 1,
|
||||||
|
}, [0, 1], 10, 8, 64, 8),
|
||||||
|
|
||||||
|
# Override based on generic env var.
|
||||||
|
('registry', {
|
||||||
|
'WORKER_COUNT': 12,
|
||||||
|
}, [0, 1], 10, 8, 64, 12),
|
||||||
|
|
||||||
|
# Override based on generic env var (ignores maximum).
|
||||||
|
('registry', {
|
||||||
|
'WORKER_COUNT': 120,
|
||||||
|
}, [0, 1], 10, 8, 64, 120),
|
||||||
|
|
||||||
|
# Override based on generic env var (respects minimum).
|
||||||
|
('registry', {
|
||||||
|
'WORKER_COUNT': 1,
|
||||||
|
}, [0, 1], 10, 8, 64, 8),
|
||||||
|
|
||||||
|
# Override always uses specific first.
|
||||||
|
('registry', {
|
||||||
|
'WORKER_COUNT_REGISTRY': 120,
|
||||||
|
'WORKER_COUNT': 12,
|
||||||
|
}, [0, 1], 10, 8, 64, 120),
|
||||||
|
|
||||||
|
# Non-matching override.
|
||||||
|
('verbs', {
|
||||||
|
'WORKER_COUNT_REGISTRY': 120,
|
||||||
|
}, [0, 1], 10, 8, 64, 20),
|
||||||
|
|
||||||
|
# Zero worker count (use defaults).
|
||||||
|
('verbs', {
|
||||||
|
'WORKER_COUNT': 0,
|
||||||
|
}, [0, 1], 10, 8, 64, 8),
|
||||||
|
])
|
||||||
|
def test_get_worker_count(kind_name, env_vars, cpu_affinity, multiplier, minimum, maximum,
|
||||||
|
expected):
|
||||||
|
class FakeProcess(object):
|
||||||
|
def __init__(self, pid):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def cpu_affinity(self):
|
||||||
|
return cpu_affinity
|
||||||
|
|
||||||
|
with patch('os.environ.get', env_vars.get):
|
||||||
|
with patch('psutil.Process', FakeProcess):
|
||||||
|
assert get_worker_count(kind_name, multiplier, minimum, maximum) == expected
|
31
util/workers.py
Normal file
31
util/workers.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
import os
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
def get_worker_count(worker_kind_name, multiplier, minimum=None, maximum=None):
|
||||||
|
""" Returns the number of gunicorn workers to run for the given worker kind,
|
||||||
|
based on a combination of environment variable, multiplier, minimum (if any),
|
||||||
|
and number of accessible CPU cores.
|
||||||
|
"""
|
||||||
|
minimum = minimum or multiplier
|
||||||
|
maximum = maximum or (multiplier * multiplier)
|
||||||
|
|
||||||
|
# Check for an override via an environment variable.
|
||||||
|
override_value = os.environ.get('WORKER_COUNT_' + worker_kind_name.upper())
|
||||||
|
if override_value is not None:
|
||||||
|
return max(override_value, minimum)
|
||||||
|
|
||||||
|
override_value = os.environ.get('WORKER_COUNT')
|
||||||
|
if override_value is not None:
|
||||||
|
return max(override_value, minimum)
|
||||||
|
|
||||||
|
# Load the number of CPU cores via affinity, and use that to calculate the
|
||||||
|
# number of workers to run.
|
||||||
|
p = psutil.Process(os.getpid())
|
||||||
|
|
||||||
|
try:
|
||||||
|
cpu_count = len(p.cpu_affinity())
|
||||||
|
except AttributeError:
|
||||||
|
# cpu_affinity isn't supported on this platform. Assume 2.
|
||||||
|
cpu_count = 2
|
||||||
|
|
||||||
|
return min(max(cpu_count * multiplier, minimum), maximum)
|
Reference in a new issue