Add more check conditions for unhealthy workers and make the messaging better.
This commit is contained in:
		
							parent
							
								
									67905c277e
								
							
						
					
					
						commit
						510bbe7889
					
				
					 2 changed files with 7 additions and 5 deletions
				
			
		|  | @ -41,12 +41,13 @@ def matches_system_error(status_str): | ||||||
|   """ Returns true if the given status string matches a known system error in the |   """ Returns true if the given status string matches a known system error in the | ||||||
|       Docker builder. |       Docker builder. | ||||||
|   """ |   """ | ||||||
|   KNOWN_MATCHES = ['lxc-start: invalid', 'lxc-start: failed to', 'lxc-start: Permission denied'] |   KNOWN_MATCHES = ['lxc-start: invalid', 'lxc-start: failed to', 'lxc-start: Permission denied', | ||||||
|  |                    'lxc-start: The container failed'] | ||||||
| 
 | 
 | ||||||
|   for match in KNOWN_MATCHES: |   for match in KNOWN_MATCHES: | ||||||
|     # 4 because we might have a Unix control code at the start. |     # 10 because we might have a Unix control code at the start. | ||||||
|     found = status_str.find(match[0:len(match) + 4]) |     found = status_str.find(match[0:len(match) + 10]) | ||||||
|     if found >= 0 and found <= 4:  |     if found >= 0 and found <= 10:  | ||||||
|       return True |       return True | ||||||
| 
 | 
 | ||||||
|   return False |   return False | ||||||
|  | @ -613,6 +614,7 @@ class DockerfileBuildWorker(Worker): | ||||||
| 
 | 
 | ||||||
|     except WorkerUnhealthyException as exc: |     except WorkerUnhealthyException as exc: | ||||||
|       # Spawn a notification that the build has failed. |       # Spawn a notification that the build has failed. | ||||||
|  |       log_appender('Worker has become unhealthy. Will retry shortly.', build_logs.ERROR) | ||||||
|       spawn_failure(exc.message, event_data) |       spawn_failure(exc.message, event_data) | ||||||
|        |        | ||||||
|       # Raise the exception to the queue. |       # Raise the exception to the queue. | ||||||
|  |  | ||||||
|  | @ -135,8 +135,8 @@ class Worker(object): | ||||||
| 
 | 
 | ||||||
|       except WorkerUnhealthyException: |       except WorkerUnhealthyException: | ||||||
|         logger.error('The worker has encountered an error and will not take new jobs. Job is being requeued.') |         logger.error('The worker has encountered an error and will not take new jobs. Job is being requeued.') | ||||||
|         self._stop.set() |  | ||||||
|         self.mark_current_incomplete(restore_retry=True) |         self.mark_current_incomplete(restore_retry=True) | ||||||
|  |         self._stop.set() | ||||||
| 
 | 
 | ||||||
|       finally: |       finally: | ||||||
|         # Close the db handle periodically |         # Close the db handle periodically | ||||||
|  |  | ||||||
		Reference in a new issue