From fb86b4bf2c1c2e900145863ca272835d82b28df6 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 31 Aug 2015 13:58:28 -0400 Subject: [PATCH] Fix Dockerfile parsing for unicode and add testing Fixes #423 --- test/test_dockerfileparse.py | 66 ++++++++++++++++++++++++++++++++++++ tools/reparsedockerfile.py | 26 -------------- util/dockerfileparse.py | 30 ++++++++++------ 3 files changed, 86 insertions(+), 36 deletions(-) create mode 100644 test/test_dockerfileparse.py delete mode 100644 tools/reparsedockerfile.py diff --git a/test/test_dockerfileparse.py b/test/test_dockerfileparse.py new file mode 100644 index 000000000..719c4f246 --- /dev/null +++ b/test/test_dockerfileparse.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +import unittest + +from util.dockerfileparse import parse_dockerfile + +class TestParsedDockerfile(unittest.TestCase): + def test_basic_parse(self): + parsed = parse_dockerfile(""" + FROM someimage:latest + RUN dosomething + """) + + self.assertEquals(("someimage", "latest"), parsed.get_image_and_tag()) + self.assertEquals("someimage", parsed.get_base_image()) + + def test_basic_parse_notag(self): + parsed = parse_dockerfile(""" + FROM someimage + RUN dosomething + """) + + self.assertEquals(("someimage", "latest"), parsed.get_image_and_tag()) + self.assertEquals("someimage", parsed.get_base_image()) + + def test_two_from_lines(self): + parsed = parse_dockerfile(""" + FROM someimage:latest + FROM secondimage:second + """) + + self.assertEquals(("secondimage", "second"), parsed.get_image_and_tag()) + self.assertEquals("secondimage", parsed.get_base_image()) + + def test_parse_comments(self): + parsed = parse_dockerfile(""" + # FROM someimage:latest + FROM anotherimage:foobar # This is a comment + RUN dosomething + """) + + self.assertEquals(("anotherimage", "foobar"), parsed.get_image_and_tag()) + self.assertEquals("anotherimage", parsed.get_base_image()) + + def test_unicode_parse_as_ascii(self): + parsed = parse_dockerfile(""" + FROM someimage:latest + MAINTAINER José Schorr + """) + + self.assertEquals(("someimage", "latest"), parsed.get_image_and_tag()) + self.assertEquals("someimage", parsed.get_base_image()) + + + def test_unicode_parse_as_unicode(self): + parsed = parse_dockerfile(""" + FROM someimage:latest + MAINTAINER José Schorr + """.decode('utf-8')) + + self.assertEquals(("someimage", "latest"), parsed.get_image_and_tag()) + self.assertEquals("someimage", parsed.get_base_image()) + +if __name__ == '__main__': + unittest.main() + diff --git a/tools/reparsedockerfile.py b/tools/reparsedockerfile.py deleted file mode 100644 index 69c6251a0..000000000 --- a/tools/reparsedockerfile.py +++ /dev/null @@ -1,26 +0,0 @@ -from util.dockerfileparse import parse_dockerfile, ParsedDockerfile, serialize_dockerfile - -with open('Dockerfile.test', 'r') as dockerfileobj: - parsed_dockerfile = parse_dockerfile(dockerfileobj.read()) - -quay_reponame = 'something' -env_command = { - 'command': 'ENV', - 'parameters': 'QUAY_REPOSITORY %s' % quay_reponame -} - -for index, command in reversed(list(enumerate(parsed_dockerfile.commands))): - if command['command'] == 'FROM': - new_command_index = index + 1 - parsed_dockerfile.commands.insert(new_command_index, env_command) - break - -image_and_tag_tuple = parsed_dockerfile.get_image_and_tag() -print image_and_tag_tuple -if image_and_tag_tuple is None or image_and_tag_tuple[0] is None: - raise Exception('Missing FROM command in Dockerfile') - -print serialize_dockerfile(parsed_dockerfile) - -with open('Dockerfile.test.out', 'w') as dockerfileobj: - dockerfileobj.write(serialize_dockerfile(parsed_dockerfile)) diff --git a/util/dockerfileparse.py b/util/dockerfileparse.py index 4dde4b0e6..3904ae96e 100644 --- a/util/dockerfileparse.py +++ b/util/dockerfileparse.py @@ -63,7 +63,16 @@ class ParsedDockerfile(object): def strip_comments(contents): - lines = [line for line in contents.split('\n') if not line.startswith(COMMENT_CHARACTER)] + lines = [] + for line in contents.split('\n'): + index = line.find(COMMENT_CHARACTER) + if index < 0: + lines.append(line) + continue + + line = line[:index] + lines.append(line) + return '\n'.join(lines) @@ -72,15 +81,22 @@ def join_continued_lines(contents): def parse_dockerfile(contents): + # If we receive ASCII, translate into unicode. + try: + contents = contents.decode('utf-8') + except ValueError: + # Already unicode or unable to convert. + pass + contents = join_continued_lines(strip_comments(contents)) - lines = [line for line in contents.split('\n') if len(line) > 0] + lines = [line.strip() for line in contents.split('\n') if len(line) > 0] commands = [] for line in lines: match_command = COMMAND_REGEX.match(line) if match_command: - command = match_command.group(1).decode('utf-8').upper() - parameters = match_command.group(2).decode('utf-8') + command = match_command.group(1).upper() + parameters = match_command.group(2) commands.append({ 'command': command, @@ -88,9 +104,3 @@ def parse_dockerfile(contents): }) return ParsedDockerfile(commands) - - -def serialize_dockerfile(parsed_dockerfile): - return '\n'.join([' '.join([command['command'].encode('utf-8'), - command['parameters'].encode('utf-8')]) - for command in parsed_dockerfile.commands])