From ce3f8b438c32a91e6d5b26c5cb025c912365e61e Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 29 Jan 2015 18:01:42 -0500 Subject: [PATCH 1/5] Fix pull credentials bug, fix job details parse bug and add some better logging --- buildman/component/buildcomponent.py | 13 +++++-------- buildman/jobutil/buildjob.py | 5 +++++ buildman/server.py | 1 + 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/buildman/component/buildcomponent.py b/buildman/component/buildcomponent.py index c31d7aafe..0a364e45a 100644 --- a/buildman/component/buildcomponent.py +++ b/buildman/component/buildcomponent.py @@ -116,9 +116,9 @@ class BuildComponent(BaseComponent): status_dict['total_commands'] = len(parsed_dockerfile.commands) # Add the pull robot information, if any. - if build_config.get('pull_credentials') is not None: - base_image_information['username'] = build_config['pull_credentials'].get('username', '') - base_image_information['password'] = build_config['pull_credentials'].get('password', '') + if build_job.pull_credentials: + base_image_information['username'] = build_job.pull_credentials.get('username', '') + base_image_information['password'] = build_job.pull_credentials.get('password', '') # Retrieve the repository's fully qualified name. repo = build_job.repo_build.repository @@ -244,7 +244,7 @@ class BuildComponent(BaseComponent): def _build_failure(self, error_message, exception=None): """ Handles and logs a failed build. """ self._build_status.set_error(error_message, { - 'internal_error': exception.message if exception else None + 'internal_error': str(exception) if exception else None }) build_id = self._current_job.repo_build.uuid @@ -360,14 +360,11 @@ class BuildComponent(BaseComponent): def _timeout(self): yield trollius.From(self._set_status(ComponentStatus.TIMED_OUT)) logger.warning('Build component with realm %s has timed out', self.builder_realm) - self._dispose(timed_out=True) - def _dispose(self, timed_out=False): # If we still have a running job, then it has not completed and we need to tell the parent # manager. if self._current_job is not None: - if timed_out: - self._build_status.set_error('Build worker timed out', internal_error=True) + self._build_status.set_error('Build worker timed out', internal_error=True) self.parent_manager.job_completed(self._current_job, BuildJobResult.INCOMPLETE, self) self._build_status = None diff --git a/buildman/jobutil/buildjob.py b/buildman/jobutil/buildjob.py index c2d2769db..d120417f7 100644 --- a/buildman/jobutil/buildjob.py +++ b/buildman/jobutil/buildjob.py @@ -33,6 +33,11 @@ class BuildJob(object): def repo_build(self): return self._load_repo_build() + @property + def pull_credentials(self): + """ Returns the pull credentials for this job, or None if none. """ + return self.job_details.get('pull_credentials') + @property def build_config(self): try: diff --git a/buildman/server.py b/buildman/server.py index 324e81bcd..4734f8d4f 100644 --- a/buildman/server.py +++ b/buildman/server.py @@ -154,6 +154,7 @@ class BuilderServer(object): except BuildJobLoadException as irbe: logger.exception(irbe) self._queue.incomplete(job_item, restore_retry=False) + continue logger.debug('Build job found. Checking for an avaliable worker.') scheduled = yield From(self._lifecycle_manager.schedule(build_job)) From 60eae43ae4de01724b33c441edfef913ed7f2fcd Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 29 Jan 2015 18:05:05 -0500 Subject: [PATCH 2/5] Add the date time to the log entries --- buildman/jobutil/buildstatus.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/buildman/jobutil/buildstatus.py b/buildman/jobutil/buildstatus.py index 1b4670c87..217e3aa6c 100644 --- a/buildman/jobutil/buildstatus.py +++ b/buildman/jobutil/buildstatus.py @@ -1,5 +1,6 @@ from data.database import BUILD_PHASE from data import model +import datetime class StatusHandler(object): """ Context wrapper for writing status to build logs. """ @@ -20,6 +21,8 @@ class StatusHandler(object): self.__exit__(None, None, None) def _append_log_message(self, log_message, log_type=None, log_data=None): + log_data = log_data or {} + log_data['datetime'] = str(datetime.datetime.now()) self._build_logs.append_log_message(self._uuid, log_message, log_type, log_data) def append_log(self, log_message, extra_data=None): From 0e5f6dc17d17483d73e7ce0603d19c6e7a334472 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 29 Jan 2015 18:13:31 -0500 Subject: [PATCH 3/5] Fix typo in timed out --- buildman/component/buildcomponent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildman/component/buildcomponent.py b/buildman/component/buildcomponent.py index 0a364e45a..d2df26ec9 100644 --- a/buildman/component/buildcomponent.py +++ b/buildman/component/buildcomponent.py @@ -371,4 +371,4 @@ class BuildComponent(BaseComponent): self._current_job = None # Unregister the current component so that it cannot be invoked again. - self.parent_manager.build_component_disposed(self, timed_out) + self.parent_manager.build_component_disposed(self, True) From a6fa08c19c15b3a52c6842ab4c4c286be4684617 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 29 Jan 2015 18:21:32 -0500 Subject: [PATCH 4/5] Change returns to trollius returns --- buildman/component/buildcomponent.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/buildman/component/buildcomponent.py b/buildman/component/buildcomponent.py index d2df26ec9..cd5c4b4e8 100644 --- a/buildman/component/buildcomponent.py +++ b/buildman/component/buildcomponent.py @@ -294,7 +294,7 @@ class BuildComponent(BaseComponent): version) raise trollius.Return(False) - if self._component_status != 'waiting': + if self._component_status != 'waiting5': logger.warning('Build component (token "%s") is already connected', self.expected_token) raise trollius.Return(False) @@ -334,7 +334,7 @@ class BuildComponent(BaseComponent): # If the component is no longer running or actively building, nothing more to do. if (self._component_status != ComponentStatus.RUNNING and self._component_status != ComponentStatus.BUILDING): - return + raise trollius.Return() # If there is an active build, write the heartbeat to its status. build_status = self._build_status @@ -352,12 +352,15 @@ class BuildComponent(BaseComponent): if (self._last_heartbeat and self._last_heartbeat < datetime.datetime.utcnow() - HEARTBEAT_DELTA): yield trollius.From(self._timeout()) - return + raise trollius.Return() yield trollius.From(trollius.sleep(HEARTBEAT_TIMEOUT)) @trollius.coroutine def _timeout(self): + if self._component_status == ComponentStatus.TIMED_OUT: + raise trollius.Return() + yield trollius.From(self._set_status(ComponentStatus.TIMED_OUT)) logger.warning('Build component with realm %s has timed out', self.builder_realm) From 838bfe23b134d4087b8b755d86a78455a4d7fedb Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 29 Jan 2015 18:33:17 -0500 Subject: [PATCH 5/5] Remove retries update in the extend processing call and make sure it is under a transaction --- buildman/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildman/server.py b/buildman/server.py index 4734f8d4f..ce185fff6 100644 --- a/buildman/server.py +++ b/buildman/server.py @@ -121,7 +121,7 @@ class BuilderServer(object): def _job_heartbeat(self, build_job): WorkQueue.extend_processing(build_job.job_item, seconds_from_now=JOB_TIMEOUT_SECONDS, - retry_count=1, minimum_extension=MINIMUM_JOB_EXTENSION) + minimum_extension=MINIMUM_JOB_EXTENSION) def _job_complete(self, build_job, job_status): if job_status == BuildJobResult.INCOMPLETE: