Merge pull request #168 from coreos-inc/etcdindex
Fix ephemeral build manager to ask for watches in index order with no gaps
This commit is contained in:
		
						commit
						2ade08468d
					
				
					 2 changed files with 31 additions and 18 deletions
				
			
		|  | @ -68,28 +68,41 @@ class EphemeralBuilderManager(BaseManager): | |||
| 
 | ||||
|     super(EphemeralBuilderManager, self).__init__(*args, **kwargs) | ||||
| 
 | ||||
|   def _watch_etcd(self, etcd_key, change_callback, recursive=True): | ||||
|   def _watch_etcd(self, etcd_key, change_callback, start_index=None, recursive=True): | ||||
|     watch_task_key = (etcd_key, recursive) | ||||
|     def callback_wrapper(changed_key_future): | ||||
|       new_index = start_index | ||||
|       etcd_result = None | ||||
| 
 | ||||
|       if not changed_key_future.cancelled(): | ||||
|         try: | ||||
|           etcd_result = changed_key_future.result() | ||||
|           existing_index = getattr(etcd_result, 'etcd_index', None) | ||||
|           new_index = etcd_result.modifiedIndex + 1 | ||||
| 
 | ||||
|           logger.debug('Got watch of key: %s%s at #%s with result: %s', etcd_key, | ||||
|                        '*' if recursive else '', existing_index, etcd_result) | ||||
| 
 | ||||
|         except ReadTimeoutError: | ||||
|           logger.debug('Read-timeout on etcd watch: %s', etcd_key) | ||||
| 
 | ||||
|         except (ProtocolError, etcd.EtcdException): | ||||
|           logger.exception('Exception on etcd watch: %s', etcd_key) | ||||
| 
 | ||||
|       if watch_task_key not in self._watch_tasks or self._watch_tasks[watch_task_key].done(): | ||||
|         self._watch_etcd(etcd_key, change_callback) | ||||
|         self._watch_etcd(etcd_key, change_callback, start_index=new_index) | ||||
| 
 | ||||
|       if changed_key_future.cancelled(): | ||||
|         # Due to lack of interest, tomorrow has been cancelled | ||||
|         return | ||||
| 
 | ||||
|       try: | ||||
|         etcd_result = changed_key_future.result() | ||||
|       except (ReadTimeoutError, ProtocolError, etcd.EtcdException): | ||||
|         return | ||||
| 
 | ||||
|       change_callback(etcd_result) | ||||
|       if etcd_result: | ||||
|         change_callback(etcd_result) | ||||
| 
 | ||||
|     if not self._shutting_down: | ||||
|       watch_future = self._etcd_client.watch(etcd_key, recursive=recursive, | ||||
|       logger.debug('Scheduling watch of key: %s%s at start index %s', etcd_key, | ||||
|                    '*' if recursive else '', start_index) | ||||
| 
 | ||||
|       watch_future = self._etcd_client.watch(etcd_key, recursive=recursive, index=start_index, | ||||
|                                              timeout=ETCD_DISABLE_TIMEOUT) | ||||
|       watch_future.add_done_callback(callback_wrapper) | ||||
|       logger.debug('Scheduling watch of key: %s%s', etcd_key, '/*' if recursive else '') | ||||
| 
 | ||||
|       self._watch_tasks[watch_task_key] = async(watch_future) | ||||
| 
 | ||||
|   @coroutine | ||||
|  | @ -329,7 +342,7 @@ class EphemeralBuilderManager(BaseManager): | |||
|                    job.job_details['build_uuid'], build_component.builder_realm) | ||||
|       yield From(build_component.start_build(job)) | ||||
|     except (KeyError, etcd.EtcdKeyError): | ||||
|       logger.exception('Builder is asking for more work, but work already completed') | ||||
|       logger.warning('Builder is asking for more work, but work already completed') | ||||
| 
 | ||||
|   def build_component_disposed(self, build_component, timed_out): | ||||
|     logger.debug('Calling build_component_disposed.') | ||||
|  |  | |||
		Reference in a new issue