Add support for adjusting etcd ttl on job_heartbeat. Switch the heartbeat method to a coroutine.

This commit is contained in:
Jake Moshenko 2014-12-22 17:24:44 -05:00
parent 2b6c2a2a50
commit 34bf92673b
6 changed files with 62 additions and 13 deletions

View file

@ -83,7 +83,8 @@ class EphemeralBuilderManager(BaseManager):
etcd_port = self._manager_config.get('ETCD_PORT', 2379)
logger.debug('Connecting to etcd on %s:%s', etcd_host, etcd_port)
self._async_thread_executor = ThreadPoolExecutor(self._manager_config.get('ETCD_WORKERS', 5))
worker_threads = self._manager_config.get('ETCD_WORKER_THREADS', 5)
self._async_thread_executor = ThreadPoolExecutor(worker_threads)
self._etcd_client = AsyncWrapper(self._etcd_client_klass(host=etcd_host, port=etcd_port),
executor=self._async_thread_executor)
@ -131,14 +132,15 @@ class EphemeralBuilderManager(BaseManager):
# First try to take a lock for this job, meaning we will be responsible for its lifeline
realm = str(uuid.uuid4())
token = str(uuid.uuid4())
expiration = datetime.utcnow() + timedelta(seconds=self.setup_time())
ttl = self.setup_time()
expiration = datetime.utcnow() + timedelta(seconds=ttl)
payload = {
'expiration': calendar.timegm(expiration.timetuple()),
}
try:
yield From(self._etcd_client.write(job_key, payload, prevExist=False))
yield From(self._etcd_client.write(job_key, payload, prevExist=False, ttl=ttl))
component = self.register_component(realm, BuildComponent, token=token)
self._component_to_job[component] = build_job
except KeyError:
@ -168,11 +170,14 @@ class EphemeralBuilderManager(BaseManager):
def build_component_disposed(self, build_component, timed_out):
logger.debug('Calling build_component_disposed.')
# TODO make it so that I don't have to unregister the component if it timed out
self.unregister_component(build_component)
@coroutine
def job_completed(self, build_job, job_status, build_component):
logger.debug('Calling job_completed with status: %s', job_status)
# Kill he ephmeral builder
# Kill the ephmeral builder
self._executor.stop_builder(self._component_to_builder.pop(build_component))
# Release the lock in etcd
@ -181,6 +186,24 @@ class EphemeralBuilderManager(BaseManager):
self.job_complete_callback(build_job, job_status)
@coroutine
def job_heartbeat(self, build_job):
# Extend the deadline in etcd
job_key = self._etcd_job_key(build_job)
build_job_response = yield From(self._etcd_client.read(job_key))
ttl = self.heartbeat_period_sec * 2
new_expiration = datetime.utcnow() + timedelta(seconds=ttl)
payload = {
'expiration': calendar.timegm(new_expiration.timetuple()),
'builder_id': build_job_response.value['builder_id'],
}
yield From(self._etcd_client.write(job_key, payload, ttl=ttl))
self.job_heartbeat_callback(build_job)
@coroutine
def _clean_up_old_builder(self, job_key, job_payload):
""" Terminate an old builders once the expiration date has passed.
@ -197,3 +220,8 @@ class EphemeralBuilderManager(BaseManager):
""" Create a key which is used to track a job in etcd.
"""
return os.path.join(ETCD_BUILDER_PREFIX, build_job.job_details['build_uuid'])
def num_workers(self):
""" Return the number of workers we're managing locally.
"""
return len(self._component_to_builder)