Fix Dockerfile parsing for unicode and add testing

Fixes #423
This commit is contained in:
Joseph Schorr 2015-08-31 13:58:28 -04:00
parent 844e773ec4
commit fb86b4bf2c
3 changed files with 86 additions and 36 deletions

View file

@ -63,7 +63,16 @@ class ParsedDockerfile(object):
def strip_comments(contents):
lines = [line for line in contents.split('\n') if not line.startswith(COMMENT_CHARACTER)]
lines = []
for line in contents.split('\n'):
index = line.find(COMMENT_CHARACTER)
if index < 0:
lines.append(line)
continue
line = line[:index]
lines.append(line)
return '\n'.join(lines)
@ -72,15 +81,22 @@ def join_continued_lines(contents):
def parse_dockerfile(contents):
# If we receive ASCII, translate into unicode.
try:
contents = contents.decode('utf-8')
except ValueError:
# Already unicode or unable to convert.
pass
contents = join_continued_lines(strip_comments(contents))
lines = [line for line in contents.split('\n') if len(line) > 0]
lines = [line.strip() for line in contents.split('\n') if len(line) > 0]
commands = []
for line in lines:
match_command = COMMAND_REGEX.match(line)
if match_command:
command = match_command.group(1).decode('utf-8').upper()
parameters = match_command.group(2).decode('utf-8')
command = match_command.group(1).upper()
parameters = match_command.group(2)
commands.append({
'command': command,
@ -88,9 +104,3 @@ def parse_dockerfile(contents):
})
return ParsedDockerfile(commands)
def serialize_dockerfile(parsed_dockerfile):
return '\n'.join([' '.join([command['command'].encode('utf-8'),
command['parameters'].encode('utf-8')])
for command in parsed_dockerfile.commands])