import re LINE_CONTINUATION_REGEX = re.compile('\s*\\\s*\n') COMMAND_REGEX = re.compile('([A-Za-z]+)\s(.*)') COMMENT_CHARACTER = '#' class ParsedDockerfile(object): def __init__(self, commands): self.commands = commands def get_commands_of_kind(self, kind): return [command for command in self.commands if command['command'] == kind] def get_base_image(self): image_and_tag = self.get_base_image_and_tag() if not image_and_tag: return None return self.base_image_from_repo_identifier(image_and_tag) @staticmethod def base_image_from_repo_identifier(image_and_tag): # Note: # Dockerfile images references can be of multiple forms: # server:port/some/path # somepath # server/some/path # server/some/path:tag # server:port/some/path:tag parts = image_and_tag.strip().split(':') if len(parts) == 1: # somepath return parts[0] # Otherwise, determine if the last part is a port # or a tag. if parts[-1].find('/') >= 0: # Last part is part of the hostname. return image_and_tag # Remaining cases: # server/some/path:tag # server:port/some/path:tag return ':'.join(parts[0:-1]) def get_base_image_and_tag(self): from_commands = self.get_commands_of_kind('FROM') if not from_commands: return None return from_commands[-1]['parameters'] def strip_comments(contents): lines = [line for line in contents.split('\n') if not line.startswith(COMMENT_CHARACTER)] return '\n'.join(lines) def join_continued_lines(contents): return LINE_CONTINUATION_REGEX.sub('', contents) def parse_dockerfile(contents): contents = join_continued_lines(strip_comments(contents)) lines = [line for line in contents.split('\n') if len(line) > 0] commands = [] for line in lines: match_command = COMMAND_REGEX.match(line) if match_command: command = match_command.group(1).upper() parameters = match_command.group(2) commands.append({ 'command': command, 'parameters': parameters }) return ParsedDockerfile(commands) def serialize_dockerfile(parsed_dockerfile): return '\n'.join([' '.join([command['command'], command['parameters']]) for command in parsed_dockerfile.commands])