cosmopolitan/third_party/python/Lib/test/test_string_literals.py
Justine Tunney 39bf41f4eb Make numerous improvements
- Python static hello world now 1.8mb
- Python static fully loaded now 10mb
- Python HTTPS client now uses MbedTLS
- Python REPL now completes import stmts
- Increase stack size for Python for now
- Begin synthesizing posixpath and ntpath
- Restore Python \N{UNICODE NAME} support
- Restore Python NFKD symbol normalization
- Add optimized code path for Intel SHA-NI
- Get more Python unit tests passing faster
- Get Python help() pagination working on NT
- Python hashlib now supports MbedTLS PBKDF2
- Make memcpy/memmove/memcmp/bcmp/etc. faster
- Add Mersenne Twister and Vigna to LIBC_RAND
- Provide privileged __printf() for error code
- Fix zipos opendir() so that it reports ENOTDIR
- Add basic chmod() implementation for Windows NT
- Add Cosmo's best functions to Python cosmo module
- Pin function trace indent depth to that of caller
- Show memory diagram on invalid access in MODE=dbg
- Differentiate stack overflow on crash in MODE=dbg
- Add stb_truetype and tools for analyzing font files
- Upgrade to UNICODE 13 and reduce its binary footprint
- COMPILE.COM now logs resource usage of build commands
- Start implementing basic poll() support on bare metal
- Set getauxval(AT_EXECFN) to GetModuleFileName() on NT
- Add descriptions to strerror() in non-TINY build modes
- Add COUNTBRANCH() macro to help with micro-optimizations
- Make error / backtrace / asan / memory code more unbreakable
- Add fast perfect C implementation of μ-Law and a-Law audio codecs
- Make strtol() functions consistent with other libc implementations
- Improve Linenoise implementation (see also github.com/jart/bestline)
- COMPILE.COM now suppresses stdout/stderr of successful build commands
2021-09-28 01:52:34 -07:00

250 lines
9.8 KiB
Python

r"""Test correct treatment of various string literals by the parser.
There are four types of string literals:
'abc' -- normal str
r'abc' -- raw str
b'xyz' -- normal bytes
br'xyz' | rb'xyz' -- raw bytes
The difference between normal and raw strings is of course that in a
raw string, \ escapes (while still used to determine the end of the
literal) are not interpreted, so that r'\x00' contains four
characters: a backslash, an x, and two zeros; while '\x00' contains a
single character (code point zero).
The tricky thing is what should happen when non-ASCII bytes are used
inside literals. For bytes literals, this is considered illegal. But
for str literals, those bytes are supposed to be decoded using the
encoding declared for the file (UTF-8 by default).
We have to test this with various file encodings. We also test it with
exec()/eval(), which uses a different code path.
This file is really about correct treatment of encodings and
backslashes. It doesn't concern itself with issues like single
vs. double quotes or singly- vs. triply-quoted strings: that's dealt
with elsewhere (I assume).
"""
import os
import sys
import shutil
import tempfile
import warnings
import unittest
TEMPLATE = r"""# coding: %s
a = 'x'
assert ord(a) == 120
b = '\x01'
assert ord(b) == 1
c = r'\x01'
assert list(map(ord, c)) == [92, 120, 48, 49]
d = '\x81'
assert ord(d) == 0x81
e = r'\x81'
assert list(map(ord, e)) == [92, 120, 56, 49]
f = '\u1881'
assert ord(f) == 0x1881
g = r'\u1881'
assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
h = '\U0001d120'
assert ord(h) == 0x1d120
i = r'\U0001d120'
assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
"""
def byte(i):
return bytes([i])
class TestLiterals(unittest.TestCase):
def setUp(self):
self.save_path = sys.path[:]
self.tmpdir = tempfile.mkdtemp()
sys.path.insert(0, self.tmpdir)
def tearDown(self):
sys.path[:] = self.save_path
shutil.rmtree(self.tmpdir, ignore_errors=True)
def test_template(self):
# Check that the template doesn't contain any non-printables
# except for \n.
for c in TEMPLATE:
assert c == '\n' or ' ' <= c <= '~', repr(c)
def test_eval_str_normal(self):
self.assertEqual(eval(""" 'x' """), 'x')
self.assertEqual(eval(r""" '\x01' """), chr(1))
self.assertEqual(eval(""" '\x01' """), chr(1))
self.assertEqual(eval(r""" '\x81' """), chr(0x81))
self.assertEqual(eval(""" '\x81' """), chr(0x81))
self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
def test_eval_str_incomplete(self):
self.assertRaises(SyntaxError, eval, r""" '\x' """)
self.assertRaises(SyntaxError, eval, r""" '\x0' """)
self.assertRaises(SyntaxError, eval, r""" '\u' """)
self.assertRaises(SyntaxError, eval, r""" '\u0' """)
self.assertRaises(SyntaxError, eval, r""" '\u00' """)
self.assertRaises(SyntaxError, eval, r""" '\u000' """)
self.assertRaises(SyntaxError, eval, r""" '\U' """)
self.assertRaises(SyntaxError, eval, r""" '\U0' """)
self.assertRaises(SyntaxError, eval, r""" '\U00' """)
self.assertRaises(SyntaxError, eval, r""" '\U000' """)
self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
def test_eval_str_invalid_escape(self):
for b in range(1, 128):
if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
continue
with self.assertWarns(DeprecationWarning):
self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', category=DeprecationWarning)
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=DeprecationWarning)
with self.assertRaises(SyntaxError) as cm:
eval("'''\n\\z'''")
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 2)
def test_eval_str_raw(self):
self.assertEqual(eval(""" r'x' """), 'x')
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
self.assertEqual(eval(""" r'\x01' """), chr(1))
self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
self.assertEqual(eval(""" r'\x81' """), chr(0x81))
self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
def test_eval_bytes_normal(self):
self.assertEqual(eval(""" b'x' """), b'x')
self.assertEqual(eval(r""" b'\x01' """), byte(1))
self.assertEqual(eval(""" b'\x01' """), byte(1))
self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
self.assertRaises(SyntaxError, eval, """ b'\x81' """)
self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
def test_eval_bytes_incomplete(self):
self.assertRaises(SyntaxError, eval, r""" b'\x' """)
self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
def test_eval_bytes_invalid_escape(self):
for b in range(1, 128):
if b in b"""\n\r"'01234567\\abfnrtvx""":
continue
with self.assertWarns(DeprecationWarning):
self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', category=DeprecationWarning)
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=DeprecationWarning)
with self.assertRaises(SyntaxError) as cm:
eval("b'''\n\\z'''")
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 2)
def test_eval_bytes_raw(self):
self.assertEqual(eval(""" br'x' """), b'x')
self.assertEqual(eval(""" rb'x' """), b'x')
self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
self.assertEqual(eval(""" br'\x01' """), byte(1))
self.assertEqual(eval(""" rb'\x01' """), byte(1))
self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
self.assertRaises(SyntaxError, eval, """ br'\x81' """)
self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
self.assertRaises(SyntaxError, eval, """ bb'' """)
self.assertRaises(SyntaxError, eval, """ rr'' """)
self.assertRaises(SyntaxError, eval, """ brr'' """)
self.assertRaises(SyntaxError, eval, """ bbr'' """)
self.assertRaises(SyntaxError, eval, """ rrb'' """)
self.assertRaises(SyntaxError, eval, """ rbb'' """)
def test_eval_str_u(self):
self.assertEqual(eval(""" u'x' """), 'x')
self.assertEqual(eval(""" U'\u00e4' """), 'ä')
self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
self.assertRaises(SyntaxError, eval, """ ur'' """)
self.assertRaises(SyntaxError, eval, """ ru'' """)
self.assertRaises(SyntaxError, eval, """ bu'' """)
self.assertRaises(SyntaxError, eval, """ ub'' """)
def check_encoding(self, encoding, extra=""):
modname = "xx_" + encoding.replace("-", "_")
fn = os.path.join(self.tmpdir, modname + ".py")
f = open(fn, "w", encoding=encoding)
try:
f.write(TEMPLATE % encoding)
f.write(extra)
finally:
f.close()
__import__(modname)
del sys.modules[modname]
def test_file_utf_8(self):
extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
self.check_encoding("utf-8", extra)
def test_file_utf_8_error(self):
extra = "b'\x80'\n"
self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
def test_file_utf8(self):
self.check_encoding("utf-8")
def test_file_iso_8859_1(self):
self.check_encoding("iso-8859-1")
def test_file_latin_1(self):
self.check_encoding("latin-1")
def test_file_latin9(self):
return
self.check_encoding("latin9")
if __name__ == "__main__":
unittest.main()