From c29f9ccc7fd14af77ffec5d6e7e7773a508f4c88 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 1 Aug 2016 13:18:21 -0400 Subject: [PATCH] Fix TTL on heartbeat in etcd Until now, once the heartbeat has expired, we would issue a TTL that is negative, which causes etcd to either raise an exception or simply ignore the expiration (depending on the version of etcd). This change ensures that once the key is expired, it is removed immediately via a set of a TTL of 0. Also adds tests for this case and the normal expiration case. --- buildman/manager/ephemeral.py | 7 ++++++- test/test_buildman.py | 19 ++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/buildman/manager/ephemeral.py b/buildman/manager/ephemeral.py index 8694d8941..1e1e06af7 100644 --- a/buildman/manager/ephemeral.py +++ b/buildman/manager/ephemeral.py @@ -382,6 +382,7 @@ class EphemeralBuilderManager(BaseManager): max_expiration = datetime.utcnow() + timedelta(seconds=machine_max_expiration) payload = { + # TODO: remove expiration (but not max_expiration) after migration; not used. 'expiration': calendar.timegm(expiration.timetuple()), 'max_expiration': calendar.timegm(max_expiration.timetuple()), 'nonce': nonce, @@ -566,13 +567,17 @@ class EphemeralBuilderManager(BaseManager): new_expiration = datetime.utcnow() + timedelta(seconds=ttl) payload = { + # TODO: remove expiration (but not max_expiration) after migration; not used. 'expiration': calendar.timegm(new_expiration.timetuple()), 'job_queue_item': build_job.job_item, 'max_expiration': build_job_metadata['max_expiration'], 'had_heartbeat': True, } - yield From(self._etcd_client.write(job_key, json.dumps(payload), ttl=ttl)) + # Note: A TTL of < 0 in etcd results in the key *never being expired*. We use a max here + # to ensure that if the TTL is < 0, the key will expire immediately. + etcd_ttl = max(ttl, 0) + yield From(self._etcd_client.write(job_key, json.dumps(payload), ttl=etcd_ttl)) self.job_heartbeat_callback(build_job) def _etcd_job_key(self, build_job): diff --git a/test/test_buildman.py b/test/test_buildman.py index eebfd6eeb..e5849424c 100644 --- a/test/test_buildman.py +++ b/test/test_buildman.py @@ -374,7 +374,19 @@ class TestEphemeralLifecycle(EphemeralBuilderTestCase): @async_test def test_heartbeat_response(self): - expiration_timestamp = time.time() + 60 + yield From(self.assertHeartbeatWithExpiration(100, self.manager.heartbeat_period_sec * 2)) + + @async_test + def test_heartbeat_future_expiration(self): + yield From(self.assertHeartbeatWithExpiration(10, 10, ranged=True)) + + @async_test + def test_heartbeat_expired(self): + yield From(self.assertHeartbeatWithExpiration(-60, 0)) + + @coroutine + def assertHeartbeatWithExpiration(self, expires_in_sec, expected_ttl, ranged=False): + expiration_timestamp = time.time() + expires_in_sec builder_result = Mock(spec=etcd.EtcdResult) builder_result.value = json.dumps({ 'expiration': expiration_timestamp, @@ -392,6 +404,11 @@ class TestEphemeralLifecycle(EphemeralBuilderTestCase): self.assertTrue(job_key_data['had_heartbeat']) self.assertEquals(self.mock_job.job_item, job_key_data['job_queue_item']) + if not ranged: + self.assertEquals(expected_ttl, self.etcd_client_mock.write.call_args_list[0][1]['ttl']) + else: + self.assertTrue(self.etcd_client_mock.write.call_args_list[0][1]['ttl'] <= expected_ttl) + class TestEphemeral(EphemeralBuilderTestCase): """ Simple unit tests for the ephemeral builder around config management, starting and stopping