Fix Dockerfile parsing for unicode and add testing

Fixes #423
This commit is contained in:
Joseph Schorr 2015-08-31 13:58:28 -04:00
parent 844e773ec4
commit fb86b4bf2c
3 changed files with 86 additions and 36 deletions

View file

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
import unittest
from util.dockerfileparse import parse_dockerfile
class TestParsedDockerfile(unittest.TestCase):
def test_basic_parse(self):
parsed = parse_dockerfile("""
FROM someimage:latest
RUN dosomething
""")
self.assertEquals(("someimage", "latest"), parsed.get_image_and_tag())
self.assertEquals("someimage", parsed.get_base_image())
def test_basic_parse_notag(self):
parsed = parse_dockerfile("""
FROM someimage
RUN dosomething
""")
self.assertEquals(("someimage", "latest"), parsed.get_image_and_tag())
self.assertEquals("someimage", parsed.get_base_image())
def test_two_from_lines(self):
parsed = parse_dockerfile("""
FROM someimage:latest
FROM secondimage:second
""")
self.assertEquals(("secondimage", "second"), parsed.get_image_and_tag())
self.assertEquals("secondimage", parsed.get_base_image())
def test_parse_comments(self):
parsed = parse_dockerfile("""
# FROM someimage:latest
FROM anotherimage:foobar # This is a comment
RUN dosomething
""")
self.assertEquals(("anotherimage", "foobar"), parsed.get_image_and_tag())
self.assertEquals("anotherimage", parsed.get_base_image())
def test_unicode_parse_as_ascii(self):
parsed = parse_dockerfile("""
FROM someimage:latest
MAINTAINER José Schorr <jschorr@whatever.com>
""")
self.assertEquals(("someimage", "latest"), parsed.get_image_and_tag())
self.assertEquals("someimage", parsed.get_base_image())
def test_unicode_parse_as_unicode(self):
parsed = parse_dockerfile("""
FROM someimage:latest
MAINTAINER José Schorr <jschorr@whatever.com>
""".decode('utf-8'))
self.assertEquals(("someimage", "latest"), parsed.get_image_and_tag())
self.assertEquals("someimage", parsed.get_base_image())
if __name__ == '__main__':
unittest.main()

View file

@ -1,26 +0,0 @@
from util.dockerfileparse import parse_dockerfile, ParsedDockerfile, serialize_dockerfile
with open('Dockerfile.test', 'r') as dockerfileobj:
parsed_dockerfile = parse_dockerfile(dockerfileobj.read())
quay_reponame = 'something'
env_command = {
'command': 'ENV',
'parameters': 'QUAY_REPOSITORY %s' % quay_reponame
}
for index, command in reversed(list(enumerate(parsed_dockerfile.commands))):
if command['command'] == 'FROM':
new_command_index = index + 1
parsed_dockerfile.commands.insert(new_command_index, env_command)
break
image_and_tag_tuple = parsed_dockerfile.get_image_and_tag()
print image_and_tag_tuple
if image_and_tag_tuple is None or image_and_tag_tuple[0] is None:
raise Exception('Missing FROM command in Dockerfile')
print serialize_dockerfile(parsed_dockerfile)
with open('Dockerfile.test.out', 'w') as dockerfileobj:
dockerfileobj.write(serialize_dockerfile(parsed_dockerfile))

View file

@ -63,7 +63,16 @@ class ParsedDockerfile(object):
def strip_comments(contents):
lines = [line for line in contents.split('\n') if not line.startswith(COMMENT_CHARACTER)]
lines = []
for line in contents.split('\n'):
index = line.find(COMMENT_CHARACTER)
if index < 0:
lines.append(line)
continue
line = line[:index]
lines.append(line)
return '\n'.join(lines)
@ -72,15 +81,22 @@ def join_continued_lines(contents):
def parse_dockerfile(contents):
# If we receive ASCII, translate into unicode.
try:
contents = contents.decode('utf-8')
except ValueError:
# Already unicode or unable to convert.
pass
contents = join_continued_lines(strip_comments(contents))
lines = [line for line in contents.split('\n') if len(line) > 0]
lines = [line.strip() for line in contents.split('\n') if len(line) > 0]
commands = []
for line in lines:
match_command = COMMAND_REGEX.match(line)
if match_command:
command = match_command.group(1).decode('utf-8').upper()
parameters = match_command.group(2).decode('utf-8')
command = match_command.group(1).upper()
parameters = match_command.group(2)
commands.append({
'command': command,
@ -88,9 +104,3 @@ def parse_dockerfile(contents):
})
return ParsedDockerfile(commands)
def serialize_dockerfile(parsed_dockerfile):
return '\n'.join([' '.join([command['command'].encode('utf-8'),
command['parameters'].encode('utf-8')])
for command in parsed_dockerfile.commands])