Improve Libc by making Python work even better

Actually Portable Python is now outperforming the Python binaries
that come bundled with Linux distros, at things like HTTP serving.
You can now have a fully featured Python install in just one .com
file that runs on six operating systems and is about 10mb in size.
With tuning, the tiniest is ~1mb. We've got most of the libraries
working, including pysqlite, and the repl now feels very pleasant.
The things you can't do quite yet are: threads and shared objects
but that can happen in the future, if the community falls in love
with this project and wants to see it developed further. Changes:

- Add siginterrupt()
- Add sqlite3 to Python
- Add issymlink() helper
- Make GetZipCdir() faster
- Add tgamma() and finite()
- Add legacy function lutimes()
- Add readlink() and realpath()
- Use heap allocations when appropriate
- Reorganize Python into two-stage build
- Save Lua / Python shell history to dotfile
- Integrate Python Lib embedding into linkage
- Make isregularfile() and isdirectory() go faster
- Make Python shell auto-completion work perfectly
- Make crash reports work better if changed directory
- Fix Python+NT open() / access() flag overflow error
- Disable Python tests relating to \N{LONG NAME} syntax
- Have Python REPL copyright() show all notice embeddings

The biggest technical challenge at the moment is working around
when Python tries to be too clever about filenames.
This commit is contained in:
Justine Tunney 2021-08-18 14:21:30 -07:00
parent 98ccbf44b1
commit 8af197560e
179 changed files with 6728 additions and 10430 deletions

View file

@ -66,23 +66,29 @@ class _Printer(object):
def __call__(self):
self.__setup()
prompt = 'Hit Return for more, or q (and Return) to quit: '
import os
if os.isatty(1):
prompt = 'Hit Return for more, or q (and Return) to quit: '
n = os.get_terminal_size().lines
else:
n = self.MAXLINES
lineno = 0
while 1:
try:
for i in range(lineno, lineno + self.MAXLINES):
for i in range(lineno, lineno + n):
print(self.__lines[i])
except IndexError:
break
else:
lineno += self.MAXLINES
lineno += n
key = None
while key is None:
key = input(prompt)
if key not in ('', 'q'):
key = None
if key == 'q':
break
if os.isatty(1):
while key is None:
key = input(prompt)
if key not in ('', 'q'):
key = None
if key == 'q':
break
class _Helper(object):

View file

@ -120,7 +120,7 @@ build_time_vars = {'ABIFLAGS': 'm',
'DYNLOADFILE': 'dynload_shlib.o',
'ENABLE_IPV6': 0,
'ENSUREPIP': 'no',
'EXE': '.com.dbg',
'EXE': '.com',
'EXEMODE': 755,
'EXTRAMACHDEPPATH': '',
'EXTRATESTOPTS': '',
@ -134,19 +134,16 @@ build_time_vars = {'ABIFLAGS': 'm',
'GITTAG': 'git --git-dir ./.git describe --all --always --dirty',
'GITVERSION': 'git --git-dir ./.git rev-parse --short HEAD',
'GNULD': 'yes',
'HAVE_ACCEPT4': 1,
'HAVE_ACOSH': 1,
'HAVE_ADDRINFO': 1,
'HAVE_ALARM': 1,
'HAVE_ALIGNED_REQUIRED': 0,
'HAVE_ALLOCA_H': 1,
'HAVE_ALTZONE': 0,
'HAVE_ASINH': 1,
'HAVE_ASM_TYPES_H': 0,
'HAVE_ATANH': 1,
'HAVE_BIND_TEXTDOMAIN_CODESET': 0,
'HAVE_BLUETOOTH_BLUETOOTH_H': 0,
'HAVE_BLUETOOTH_H': 0,
'HAVE_BROKEN_MBSTOWCS': 0,
'HAVE_BROKEN_NICE': 0,
'HAVE_BROKEN_PIPE_BUF': 0,
@ -165,13 +162,11 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_CLOCK_SETTIME': 1,
'HAVE_COMPUTED_GOTOS': 1,
'HAVE_CONFSTR': 0,
'HAVE_CONIO_H': 0,
'HAVE_COPYSIGN': 1,
'HAVE_CRYPT_H': 0,
'HAVE_COSMO_CAN_RAW_FD_FRAMES': 0,
'HAVE_CTERMID': 0,
'HAVE_CTERMID_R': 0,
'HAVE_CURSES_FILTER': 1,
'HAVE_CURSES_H': 1,
'HAVE_CURSES_HAS_KEY': 1,
'HAVE_CURSES_IMMEDOK': 1,
'HAVE_CURSES_IS_PAD': 1,
@ -196,21 +191,16 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_DEVICE_MACROS': 0,
'HAVE_DEV_PTC': 0,
'HAVE_DEV_PTMX': 1,
'HAVE_DIRECT_H': 0,
'HAVE_DIRENT_D_TYPE': 1,
'HAVE_DIRENT_H': 1,
'HAVE_DIRFD': 1,
'HAVE_DLFCN_H': 1,
'HAVE_DLOPEN': 1,
'HAVE_DUP2': 1,
'HAVE_DUP3': 1,
'HAVE_DYNAMIC_LOADING': 1,
'HAVE_ENDIAN_H': 0,
'HAVE_EPOLL': 0,
'HAVE_EPOLL_CREATE1': 0,
'HAVE_ERF': 1,
'HAVE_ERFC': 1,
'HAVE_ERRNO_H': 1,
'HAVE_EXECV': 1,
'HAVE_EXPM1': 1,
'HAVE_FACCESSAT': 1,
@ -219,11 +209,10 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_FCHMODAT': 1,
'HAVE_FCHOWN': 1,
'HAVE_FCHOWNAT': 1,
'HAVE_FCNTL_H': 1,
'HAVE_FDATASYNC': 1,
'HAVE_FDOPENDIR': 1,
'HAVE_FEXECVE': 0,
'HAVE_FINITE': 0,
'HAVE_FINITE': 1,
'HAVE_FLOCK': 1,
'HAVE_FORK': 1,
'HAVE_FORKPTY': 1,
@ -262,37 +251,32 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_GETNAMEINFO': 1,
'HAVE_GETPAGESIZE': 1,
'HAVE_GETPEERNAME': 1,
'HAVE_GETPGID': 0,
'HAVE_GETPGID': 1,
'HAVE_GETPGRP': 1,
'HAVE_GETPID': 1,
'HAVE_GETPRIORITY': 1,
'HAVE_GETPWENT': 1,
'HAVE_GETRANDOM': 1,
'HAVE_GETRANDOM_SYSCALL': 0,
'HAVE_GETRESGID': 0,
'HAVE_GETRESUID': 0,
'HAVE_GETRESGID': 1,
'HAVE_GETRESUID': 1,
'HAVE_GETSID': 1,
'HAVE_GETSPENT': 0,
'HAVE_GETSPNAM': 0,
'HAVE_GETTIMEOFDAY': 1,
'HAVE_GETWD': 0,
'HAVE_GLIBC_MEMMOVE_BUG': 1,
'HAVE_GRP_H': 1,
'HAVE_HSTRERROR': 0,
'HAVE_HTOLE64': 1,
'HAVE_HYPOT': 1,
'HAVE_IEEEFP_H': 0,
'HAVE_IF_NAMEINDEX': 0,
'HAVE_INET_ATON': 1,
'HAVE_INET_PTON': 1,
'HAVE_INITGROUPS': 1,
'HAVE_INTTYPES_H': 1,
'HAVE_IO_H': 0,
'HAVE_IPA_PURE_CONST_BUG': 1,
'HAVE_KILL': 1,
'HAVE_KILLPG': 1,
'HAVE_KQUEUE': 0,
'HAVE_LANGINFO_H': 0,
'HAVE_LARGEFILE_SUPPORT': 0,
'HAVE_LCHFLAGS': 0,
'HAVE_LCHMOD': 0,
@ -301,43 +285,29 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_LIBDL': 0,
'HAVE_LIBDLD': 0,
'HAVE_LIBIEEE': 0,
'HAVE_LIBINTL_H': 0,
'HAVE_LIBREADLINE': 0,
'HAVE_LIBRESOLV': 0,
'HAVE_LIBSENDFILE': 0,
'HAVE_LIBUTIL_H': 0,
'HAVE_LINK': 1,
'HAVE_LINKAT': 1,
'HAVE_COSMO_CAN_BCM_H': 0,
'HAVE_COSMO_CAN_H': 0,
'HAVE_COSMO_CAN_RAW_FD_FRAMES': 0,
'HAVE_COSMO_CAN_RAW_H': 0,
'HAVE_COSMO_NETLINK_H': 0,
'HAVE_COSMO_RANDOM_H': 0,
'HAVE_COSMO_TIPC_H': 0,
'HAVE_LOCKF': 0,
'HAVE_LOG1P': 1,
'HAVE_LOG2': 1,
'HAVE_LONG_DOUBLE': 1,
'HAVE_LSTAT': 1,
'HAVE_LUTIMES': 0,
'HAVE_MAKEDEV': 0,
'HAVE_MAKEDEV': 1,
'HAVE_MBRTOWC': 1,
'HAVE_MEMMOVE': 1,
'HAVE_MEMORY_H': 1,
'HAVE_MEMRCHR': 1,
'HAVE_MKDIRAT': 1,
'HAVE_MKFIFO': 1,
'HAVE_MKFIFOAT': 0,
'HAVE_MKFIFOAT': 1,
'HAVE_MKNOD': 1,
'HAVE_MKNODAT': 1,
'HAVE_MKTIME': 1,
'HAVE_MMAP': 1,
'HAVE_MREMAP': 1,
'HAVE_NCURSES_H': 1,
'HAVE_NDIR_H': 0,
'HAVE_NETPACKET_PACKET_H': 0,
'HAVE_NET_IF_H': 0,
'HAVE_NICE': 1,
'HAVE_OPENAT': 1,
'HAVE_OPENPTY': 1,
@ -350,15 +320,12 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_POSIX_FALLOCATE': 0,
'HAVE_PREAD': 1,
'HAVE_PRLIMIT': 0,
'HAVE_PROCESS_H': 0,
'HAVE_PROTOTYPES': 1,
'HAVE_PTHREAD_ATFORK': 0,
'HAVE_PTHREAD_DESTRUCTOR': 0,
'HAVE_PTHREAD_H': 1,
'HAVE_PTHREAD_INIT': 0,
'HAVE_PTHREAD_KILL': 0,
'HAVE_PTHREAD_SIGMASK': 0,
'HAVE_PTY_H': 1,
'HAVE_PUTENV': 1,
'HAVE_PWRITE': 1,
'HAVE_READLINK': 1,
@ -377,7 +344,6 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_RL_RESIZE_TERMINAL': 0,
'HAVE_ROUND': 1,
'HAVE_SCHED_GET_PRIORITY_MAX': 0,
'HAVE_SCHED_H': 0,
'HAVE_SCHED_RR_GET_INTERVAL': 0,
'HAVE_SCHED_SETAFFINITY': 1,
'HAVE_SCHED_SETPARAM': 1,
@ -405,11 +371,9 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_SETSID': 1,
'HAVE_SETUID': 1,
'HAVE_SETVBUF': 1,
'HAVE_SHADOW_H': 0,
'HAVE_SIGACTION': 1,
'HAVE_SIGALTSTACK': 0,
'HAVE_SIGINTERRUPT': 0,
'HAVE_SIGNAL_H': 1,
'HAVE_SIGINTERRUPT': 1,
'HAVE_SIGPENDING': 0,
'HAVE_SIGRELSE': 0,
'HAVE_SIGTIMEDWAIT': 0,
@ -420,21 +384,15 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_SOCKADDR_SA_LEN': 0,
'HAVE_SOCKADDR_STORAGE': 1,
'HAVE_SOCKETPAIR': 1,
'HAVE_SPAWN_H': 1,
'HAVE_SSIZE_T': 1,
'HAVE_STATVFS': 0,
'HAVE_STAT_TV_NSEC': 1,
'HAVE_STAT_TV_NSEC2': 0,
'HAVE_STDARG_PROTOTYPES': 1,
'HAVE_STDINT_H': 1,
'HAVE_STDLIB_H': 1,
'HAVE_STD_ATOMIC': 0,
'HAVE_STRDUP': 1,
'HAVE_STRFTIME': 1,
'HAVE_STRINGS_H': 1,
'HAVE_STRING_H': 1,
'HAVE_STRLCPY': 1,
'HAVE_STROPTS_H': 0,
'HAVE_STRUCT_PASSWD_PW_GECOS': 1,
'HAVE_STRUCT_PASSWD_PW_PASSWD': 1,
'HAVE_STRUCT_STAT_ST_BIRTHTIME': 0,
@ -448,11 +406,86 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_SYMLINKAT': 1,
'HAVE_SYNC': 1,
'HAVE_SYSCONF': 1,
'HAVE_TCGETPGRP': 1,
'HAVE_TCSETPGRP': 1,
'HAVE_TEMPNAM': 1,
'HAVE_TGAMMA': 1,
'HAVE_TIMEGM': 1,
'HAVE_TIMES': 1,
'HAVE_TMPFILE': 1,
'HAVE_TMPNAM': 0,
'HAVE_TMPNAM_R': 0,
'HAVE_TM_ZONE': 1,
'HAVE_TRUNCATE': 1,
'HAVE_TZNAME': 0,
'HAVE_UCS4_TCL': 0,
'HAVE_UNAME': 1,
'HAVE_UNLINKAT': 1,
'HAVE_UNSETENV': 1,
'HAVE_USABLE_WCHAR_T': 1,
'HAVE_UTIMENSAT': 1,
'HAVE_UTIMES': 1,
'HAVE_WAIT3': 1,
'HAVE_WAIT4': 1,
'HAVE_WAITID': 0,
'HAVE_WAITPID': 1,
'HAVE_WCSCOLL': 0,
'HAVE_WCSFTIME': 0,
'HAVE_WCSXFRM': 0,
'HAVE_WMEMCMP': 1,
'HAVE_WORKING_TZSET': 1,
'HAVE_WRITEV': 1,
'HAVE_ZLIB_COPY': 1,
'HAVE__GETPTY': 0,
'HAVE_ALLOCA_H': 1,
'HAVE_ASM_TYPES_H': 0,
'HAVE_BLUETOOTH_BLUETOOTH_H': 0,
'HAVE_BLUETOOTH_H': 0,
'HAVE_CONIO_H': 0,
'HAVE_COSMO_CAN_BCM_H': 0,
'HAVE_COSMO_CAN_H': 0,
'HAVE_COSMO_CAN_RAW_H': 0,
'HAVE_COSMO_NETLINK_H': 0,
'HAVE_COSMO_RANDOM_H': 0,
'HAVE_COSMO_TIPC_H': 0,
'HAVE_CRYPT_H': 0,
'HAVE_CURSES_H': 1,
'HAVE_DIRECT_H': 0,
'HAVE_DIRENT_H': 1,
'HAVE_DLFCN_H': 1,
'HAVE_ENDIAN_H': 1,
'HAVE_ERRNO_H': 1,
'HAVE_FCNTL_H': 1,
'HAVE_GRP_H': 1,
'HAVE_IEEEFP_H': 0,
'HAVE_INTTYPES_H': 1,
'HAVE_IO_H': 0,
'HAVE_LANGINFO_H': 0,
'HAVE_LIBINTL_H': 0,
'HAVE_LIBUTIL_H': 0,
'HAVE_MEMORY_H': 1,
'HAVE_NCURSES_H': 1,
'HAVE_NDIR_H': 0,
'HAVE_NETPACKET_PACKET_H': 0,
'HAVE_NET_IF_H': 0,
'HAVE_PROCESS_H': 0,
'HAVE_PTHREAD_H': 1,
'HAVE_PTY_H': 1,
'HAVE_SCHED_H': 1,
'HAVE_SHADOW_H': 0,
'HAVE_SIGNAL_H': 1,
'HAVE_SPAWN_H': 1,
'HAVE_STDINT_H': 1,
'HAVE_STDLIB_H': 1,
'HAVE_STRINGS_H': 1,
'HAVE_STRING_H': 1,
'HAVE_STROPTS_H': 0,
'HAVE_SYSEXITS_H': 1,
'HAVE_SYS_AUDIOIO_H': 0,
'HAVE_SYS_BSDTTY_H': 0,
'HAVE_SYS_DEVPOLL_H': 0,
'HAVE_SYS_DIR_H': 0,
'HAVE_SYS_DIR_H': 1,
'HAVE_SYS_ENDIAN_H': 0,
'HAVE_SYS_EPOLL_H': 1,
'HAVE_SYS_EVENT_H': 0,
@ -469,12 +502,12 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_SYS_RANDOM_H': 1,
'HAVE_SYS_RESOURCE_H': 1,
'HAVE_SYS_SELECT_H': 1,
'HAVE_SYS_SENDFILE_H': 0,
'HAVE_SYS_SENDFILE_H': 1,
'HAVE_SYS_SOCKET_H': 1,
'HAVE_SYS_STATVFS_H': 1,
'HAVE_SYS_STAT_H': 1,
'HAVE_SYS_SYSCALL_H': 1,
'HAVE_SYS_SYSMACROS_H': 0,
'HAVE_SYS_SYSMACROS_H': 1,
'HAVE_SYS_SYS_DOMAIN_H': 0,
'HAVE_SYS_TERMIO_H': 0,
'HAVE_SYS_TIMES_H': 1,
@ -484,44 +517,14 @@ build_time_vars = {'ABIFLAGS': 'm',
'HAVE_SYS_UN_H': 1,
'HAVE_SYS_UTSNAME_H': 1,
'HAVE_SYS_WAIT_H': 1,
'HAVE_SYS_XATTR_H': 0,
'HAVE_TCGETPGRP': 1,
'HAVE_TCSETPGRP': 1,
'HAVE_TEMPNAM': 1,
'HAVE_SYS_XATTR_H': 1,
'HAVE_TERMIOS_H': 1,
'HAVE_TERM_H': 1,
'HAVE_TGAMMA': 0,
'HAVE_TIMEGM': 1,
'HAVE_TIMES': 1,
'HAVE_TMPFILE': 1,
'HAVE_TMPNAM': 0,
'HAVE_TMPNAM_R': 0,
'HAVE_TM_ZONE': 1,
'HAVE_TRUNCATE': 1,
'HAVE_TZNAME': 0,
'HAVE_UCS4_TCL': 0,
'HAVE_UNAME': 1,
'HAVE_UNISTD_H': 1,
'HAVE_UNLINKAT': 1,
'HAVE_UNSETENV': 1,
'HAVE_USABLE_WCHAR_T': 0,
'HAVE_UTIL_H': 0,
'HAVE_UTIMENSAT': 1,
'HAVE_UTIMES': 1,
'HAVE_UTIME_H': 1,
'HAVE_WAIT3': 1,
'HAVE_WAIT4': 1,
'HAVE_WAITID': 0,
'HAVE_WAITPID': 1,
'HAVE_WCHAR_H': 1,
'HAVE_WCSCOLL': 0,
'HAVE_WCSFTIME': 0,
'HAVE_WCSXFRM': 0,
'HAVE_WMEMCMP': 1,
'HAVE_WORKING_TZSET': 1,
'HAVE_WRITEV': 1,
'HAVE_ZLIB_COPY': 1,
'HAVE__GETPTY': 0,
'HOST_GNU_TYPE': 'x86_64-pc-cosmo-gnu',
'INCLDIRSTOMAKE': '/include /include /include/python3.6m /include/python3.6m',
'INCLUDEDIR': '/include',
@ -532,7 +535,6 @@ build_time_vars = {'ABIFLAGS': 'm',
'INSTALL_SCRIPT': '/usr/bin/install -c',
'INSTALL_SHARED': '/usr/bin/install -c -m 555',
'INSTSONAME': 'libpython3.6m.a',
'IO_H': 'Modules/_io/_iomodule.h',
'IO_OBJS': '\\',
'LDCXXSHARED': 'g++ -shared',
'LDFLAGS': '-static -nostdlib -nostdinc -fno-pie -mno-red-zone '

View file

@ -44,6 +44,14 @@ def _get_sep(path):
else:
return '/'
def _get_starters(path):
if isinstance(path, bytes):
return (b'zip!', b'/', b'\\', b'zip:')
else:
return ('zip!', '/', '\\', 'zip:')
# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
# On MS-DOS this may also turn slashes into backslashes; however, other
# normalizations (such as optimizing '../' away) are not allowed
@ -64,8 +72,10 @@ def normcase(s):
def isabs(s):
"""Test whether a path is absolute"""
s = os.fspath(s)
sep = _get_sep(s)
return s.startswith(sep)
if isinstance(s, bytes):
return s.startswith((b'zip!', b'/', b'\\', b'zip:'))
else:
return s.startswith(('zip!', '/', '\\', 'zip:'))
# Join pathnames.
@ -79,12 +89,13 @@ def join(a, *p):
ends with a separator."""
a = os.fspath(a)
sep = _get_sep(a)
starters = _get_starters(a)
path = a
try:
if not p:
path[:0] + sep #23780: Ensure compatible data type even if p is null.
for b in map(os.fspath, p):
if b.startswith(sep):
if b.startswith(starters):
path = b
elif not path or path.endswith(sep):
path += b
@ -339,11 +350,15 @@ def normpath(path):
"""Normalize path, eliminating double slashes, etc."""
path = os.fspath(path)
if isinstance(path, bytes):
if path.startswith((b'zip!', b'zip:')):
return path
sep = b'/'
empty = b''
dot = b'.'
dotdot = b'..'
else:
if path.startswith(('zip!', 'zip:')):
return path
sep = '/'
empty = ''
dot = '.'

View file

@ -351,12 +351,7 @@ def setquit():
def setcopyright():
"""Set 'copyright' and 'credits' in builtins"""
builtins.copyright = _sitebuiltins._Printer("copyright", sys.copyright)
if sys.platform[:4] == 'java':
builtins.credits = _sitebuiltins._Printer(
"credits",
"Jython is maintained by the Jython developers (www.jython.org).")
else:
builtins.credits = _sitebuiltins._Printer("credits", """\
builtins.credits = _sitebuiltins._Printer("credits", """\
Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands
for supporting Python development. See www.python.org for more information.""")
files, dirs = [], []

View file

@ -2662,11 +2662,12 @@ class AbstractIdentityPersistentPicklerTests(unittest.TestCase):
for obj in [b"abc\n", "abc\n", -1, -1.1 * 0.1, str]:
self._check_return_correct_type(obj, proto)
def test_protocol0_is_ascii_only(self):
non_ascii_str = "\N{EMPTY SET}"
self.assertRaises(pickle.PicklingError, self.dumps, non_ascii_str, 0)
pickled = pickle.PERSID + non_ascii_str.encode('utf-8') + b'\n.'
self.assertRaises(pickle.UnpicklingError, self.loads, pickled)
# # TODO(jart): pycomp.com needs \N thing
# def test_protocol0_is_ascii_only(self):
# non_ascii_str = "\N{EMPTY SET}"
# self.assertRaises(pickle.PicklingError, self.dumps, non_ascii_str, 0)
# pickled = pickle.PERSID + non_ascii_str.encode('utf-8') + b'\n.'
# self.assertRaises(pickle.UnpicklingError, self.loads, pickled)
class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):

View file

@ -661,10 +661,11 @@ xyzabc
('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
]
u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
tests.extend([
# bug 410271: \b broken under locales
(r'\b.\b', 'a', SUCCEED, 'found', 'a'),
(r'(?u)\b.\b', u, SUCCEED, 'found', u),
(r'(?u)\w', u, SUCCEED, 'found', u),
])
# # TODO(jart): pycomp.com needs \N thing
# u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
# tests.extend([
# # bug 410271: \b broken under locales
# (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
# (r'(?u)\b.\b', u, SUCCEED, 'found', u),
# (r'(?u)\w', u, SUCCEED, 'found', u),
# ])

View file

@ -1953,23 +1953,22 @@ class TestAddSubparsers(TestCase):
++foo foo help
'''))
def test_help_non_breaking_spaces(self):
parser = ErrorRaisingArgumentParser(
prog='PROG', description='main description')
parser.add_argument(
"--non-breaking", action='store_false',
help='help message containing non-breaking spaces shall not '
'wrap\N{NO-BREAK SPACE}at non-breaking spaces')
self.assertEqual(parser.format_help(), textwrap.dedent('''\
usage: PROG [-h] [--non-breaking]
main description
optional arguments:
-h, --help show this help message and exit
--non-breaking help message containing non-breaking spaces shall not
wrap\N{NO-BREAK SPACE}at non-breaking spaces
'''))
# # TODO(jart): pycomp.com needs \N thing
# def test_help_non_breaking_spaces(self):
# parser = ErrorRaisingArgumentParser(
# prog='PROG', description='main description')
# parser.add_argument(
# "--non-breaking", action='store_false',
# help='help message containing non-breaking spaces shall not '
# 'wrap\N{NO-BREAK SPACE}at non-breaking spaces')
# self.assertEqual(parser.format_help(), textwrap.dedent('''\
# usage: PROG [-h] [--non-breaking]
# main description
# optional arguments:
# -h, --help show this help message and exit
# --non-breaking help message containing non-breaking spaces shall not
# wrap\N{NO-BREAK SPACE}at non-breaking spaces
# '''))
def test_help_alternate_prefix_chars(self):
parser = self._get_parser(prefix_chars='+:/')

View file

@ -102,17 +102,18 @@ class TestInteractiveConsole(unittest.TestCase):
self.console.interact(banner='', exitmsg='')
self.assertEqual(len(self.stderr.method_calls), 1)
# custom exit message
self.stderr.reset_mock()
message = (
'bye! \N{GREEK SMALL LETTER ZETA}\N{CYRILLIC SMALL LETTER ZHE}'
)
self.infunc.side_effect = EOFError('Finished')
self.console.interact(banner='', exitmsg=message)
self.assertEqual(len(self.stderr.method_calls), 2)
err_msg = self.stderr.method_calls[1]
expected = message + '\n'
self.assertEqual(err_msg, ['write', (expected,), {}])
# TODO(jart): pycomp.com needs \N thing
# # custom exit message
# self.stderr.reset_mock()
# message = (
# 'bye! \N{GREEK SMALL LETTER ZETA}\N{CYRILLIC SMALL LETTER ZHE}'
# )
# self.infunc.side_effect = EOFError('Finished')
# self.console.interact(banner='', exitmsg=message)
# self.assertEqual(len(self.stderr.method_calls), 2)
# err_msg = self.stderr.method_calls[1]
# expected = message + '\n'
# self.assertEqual(err_msg, ['write', (expected,), {}])
def test_cause_tb(self):

View file

@ -149,21 +149,20 @@ class CodecCallbackTest(unittest.TestCase):
sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
def test_nameescape(self):
# Does the same as backslashescape, but prefers ``\N{...}`` escape
# sequences.
sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
self.assertEqual(sin.encode("ascii", "namereplace"), sout)
sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
self.assertEqual(sin.encode("latin-1", "namereplace"), sout)
sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4'
b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout)
# # TODO(jart): pycomp.com needs \N thing
# def test_nameescape(self):
# # Does the same as backslashescape, but prefers ``\N{...}`` escape
# # sequences.
# sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
# sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
# b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
# self.assertEqual(sin.encode("ascii", "namereplace"), sout)
# sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
# b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
# self.assertEqual(sin.encode("latin-1", "namereplace"), sout)
# sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4'
# b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
# self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout)
def test_decoding_callbacks(self):
# This is a test for a decoding callback handler
@ -615,51 +614,52 @@ class CodecCallbackTest(unittest.TestCase):
(r, 2)
)
def test_badandgoodnamereplaceexceptions(self):
# "namereplace" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.namereplace_errors,
42
)
# "namereplace" complains about the wrong exception types
self.assertRaises(
TypeError,
codecs.namereplace_errors,
UnicodeError("ouch")
)
# "namereplace" can only be used for encoding
self.assertRaises(
TypeError,
codecs.namereplace_errors,
UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
)
self.assertRaises(
TypeError,
codecs.namereplace_errors,
UnicodeTranslateError("\u3042", 0, 1, "ouch")
)
# Use the correct exception
tests = [
("\u3042", "\\N{HIRAGANA LETTER A}"),
("\x00", "\\x00"),
("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH "
"HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"),
("\U000e007f", "\\N{CANCEL TAG}"),
("\U0010ffff", "\\U0010ffff"),
# Lone surrogates
("\ud800", "\\ud800"),
("\udfff", "\\udfff"),
("\ud800\udfff", "\\ud800\\udfff"),
]
for s, r in tests:
with self.subTest(str=s):
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "a" + s + "b",
1, 1 + len(s), "ouch")),
(r, 1 + len(s))
)
# # TODO(jart): pycomp.com needs \N thing
# def test_badandgoodnamereplaceexceptions(self):
# # "namereplace" complains about a non-exception passed in
# self.assertRaises(
# TypeError,
# codecs.namereplace_errors,
# 42
# )
# # "namereplace" complains about the wrong exception types
# self.assertRaises(
# TypeError,
# codecs.namereplace_errors,
# UnicodeError("ouch")
# )
# # "namereplace" can only be used for encoding
# self.assertRaises(
# TypeError,
# codecs.namereplace_errors,
# UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
# )
# self.assertRaises(
# TypeError,
# codecs.namereplace_errors,
# UnicodeTranslateError("\u3042", 0, 1, "ouch")
# )
# # Use the correct exception
# tests = [
# ("\u3042", "\\N{HIRAGANA LETTER A}"),
# ("\x00", "\\x00"),
# ("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH "
# "HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"),
# ("\U000e007f", "\\N{CANCEL TAG}"),
# ("\U0010ffff", "\\U0010ffff"),
# # Lone surrogates
# ("\ud800", "\\ud800"),
# ("\udfff", "\\udfff"),
# ("\ud800\udfff", "\\ud800\\udfff"),
# ]
# for s, r in tests:
# with self.subTest(str=s):
# self.assertEqual(
# codecs.namereplace_errors(
# UnicodeEncodeError("ascii", "a" + s + "b",
# 1, 1 + len(s), "ouch")),
# (r, 1 + len(s))
# )
def test_badandgoodsurrogateescapeexceptions(self):
surrogateescape_errors = codecs.lookup_error('surrogateescape')

View file

@ -3073,38 +3073,39 @@ class CodePageTest(unittest.TestCase):
self.assertRaises(UnicodeEncodeError,
codecs.code_page_encode, cp, text, errors)
def test_cp932(self):
self.check_encode(932, (
('abc', 'strict', b'abc'),
('\uff44\u9a3e', 'strict', b'\x82\x84\xe9\x80'),
# test error handlers
('\xff', 'strict', None),
('[\xff]', 'ignore', b'[]'),
('[\xff]', 'replace', b'[y]'),
('[\u20ac]', 'replace', b'[?]'),
('[\xff]', 'backslashreplace', b'[\\xff]'),
('[\xff]', 'namereplace',
b'[\\N{LATIN SMALL LETTER Y WITH DIAERESIS}]'),
('[\xff]', 'xmlcharrefreplace', b'[&#255;]'),
('\udcff', 'strict', None),
('[\udcff]', 'surrogateescape', b'[\xff]'),
('[\udcff]', 'surrogatepass', None),
))
self.check_decode(932, (
(b'abc', 'strict', 'abc'),
(b'\x82\x84\xe9\x80', 'strict', '\uff44\u9a3e'),
# invalid bytes
(b'[\xff]', 'strict', None),
(b'[\xff]', 'ignore', '[]'),
(b'[\xff]', 'replace', '[\ufffd]'),
(b'[\xff]', 'backslashreplace', '[\\xff]'),
(b'[\xff]', 'surrogateescape', '[\udcff]'),
(b'[\xff]', 'surrogatepass', None),
(b'\x81\x00abc', 'strict', None),
(b'\x81\x00abc', 'ignore', '\x00abc'),
(b'\x81\x00abc', 'replace', '\ufffd\x00abc'),
(b'\x81\x00abc', 'backslashreplace', '\\x81\x00abc'),
))
# TODO(jart): pycomp.com needs \N thing
# def test_cp932(self):
# self.check_encode(932, (
# ('abc', 'strict', b'abc'),
# ('\uff44\u9a3e', 'strict', b'\x82\x84\xe9\x80'),
# # test error handlers
# ('\xff', 'strict', None),
# ('[\xff]', 'ignore', b'[]'),
# ('[\xff]', 'replace', b'[y]'),
# ('[\u20ac]', 'replace', b'[?]'),
# ('[\xff]', 'backslashreplace', b'[\\xff]'),
# ('[\xff]', 'namereplace',
# b'[\\N{LATIN SMALL LETTER Y WITH DIAERESIS}]'),
# ('[\xff]', 'xmlcharrefreplace', b'[&#255;]'),
# ('\udcff', 'strict', None),
# ('[\udcff]', 'surrogateescape', b'[\xff]'),
# ('[\udcff]', 'surrogatepass', None),
# ))
# self.check_decode(932, (
# (b'abc', 'strict', 'abc'),
# (b'\x82\x84\xe9\x80', 'strict', '\uff44\u9a3e'),
# # invalid bytes
# (b'[\xff]', 'strict', None),
# (b'[\xff]', 'ignore', '[]'),
# (b'[\xff]', 'replace', '[\ufffd]'),
# (b'[\xff]', 'backslashreplace', '[\\xff]'),
# (b'[\xff]', 'surrogateescape', '[\udcff]'),
# (b'[\xff]', 'surrogatepass', None),
# (b'\x81\x00abc', 'strict', None),
# (b'\x81\x00abc', 'ignore', '\x00abc'),
# (b'\x81\x00abc', 'replace', '\ufffd\x00abc'),
# (b'\x81\x00abc', 'backslashreplace', '\\x81\x00abc'),
# ))
def test_cp1252(self):
self.check_encode(1252, (

View file

@ -341,13 +341,14 @@ class ComplexTest(unittest.TestCase):
self.assertRaises(ValueError, complex, "1.11.1j")
self.assertRaises(ValueError, complex, "1e1.1j")
# check that complex accepts long unicode strings
self.assertEqual(type(complex("1"*500)), complex)
# check whitespace processing
self.assertEqual(complex('\N{EM SPACE}(\N{EN SPACE}1+1j ) '), 1+1j)
# Invalid unicode string
# See bpo-34087
self.assertRaises(ValueError, complex, '\u3053\u3093\u306b\u3061\u306f')
# # TODO(jart): pycomp.com needs \N thing
# # check that complex accepts long unicode strings
# self.assertEqual(type(complex("1"*500)), complex)
# # check whitespace processing
# self.assertEqual(complex('\N{EM SPACE}(\N{EN SPACE}1+1j ) '), 1+1j)
# # Invalid unicode string
# # See bpo-34087
# self.assertRaises(ValueError, complex, '\u3053\u3093\u306b\u3061\u306f')
class EvilExc(Exception):
pass

View file

@ -56,7 +56,8 @@ class GeneralFloatCases(unittest.TestCase):
self.assertRaises(ValueError, float, "-1.7d29")
self.assertRaises(ValueError, float, "3D-14")
self.assertEqual(float(" \u0663.\u0661\u0664 "), 3.14)
self.assertEqual(float("\N{EM SPACE}3.14\N{EN SPACE}"), 3.14)
# TODO(jart): Need \N in pycomp.com
# self.assertEqual(float("\N{EM SPACE}3.14\N{EN SPACE}"), 3.14)
# extra long strings should not be a problem
float(b'.' + b'1'*1000)
float('.' + '1'*1000)

View file

@ -599,13 +599,14 @@ non-important content
self.assertEqual(f'{2}\U00000394{3}', '2\u03943')
self.assertEqual(f'\U00000394{3}', '\u03943')
self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}', '\u0394')
self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}{3}', '2\u03943')
self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}{3}', '\u03943')
self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}3', '2\u03943')
self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}3', '\u03943')
# # TODO(jart): pycomp.com needs \N thing
# self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}', '\u0394')
# self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
# self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}{3}', '2\u03943')
# self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}{3}', '\u03943')
# self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
# self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}3', '2\u03943')
# self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}3', '\u03943')
self.assertEqual(f'\x20', ' ')
self.assertEqual(r'\x20', '\\x20')
@ -624,50 +625,53 @@ non-important content
self.assertEqual(f'\\{6*7}', '\\42')
self.assertEqual(fr'\{6*7}', '\\42')
AMPERSAND = 'spam'
# Get the right unicode character (&), or pick up local variable
# depending on the number of backslashes.
self.assertEqual(f'\N{AMPERSAND}', '&')
self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
# # TODO(jart): pycomp.com needs \N thing
# AMPERSAND = 'spam'
# # Get the right unicode character (&), or pick up local variable
# # depending on the number of backslashes.
# self.assertEqual(f'\N{AMPERSAND}', '&')
# self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
# self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
# self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
def test_misformed_unicode_character_name(self):
# These test are needed because unicode names are parsed
# differently inside f-strings.
self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape",
[r"f'\N'",
r"f'\N{'",
r"f'\N{GREEK CAPITAL LETTER DELTA'",
# # TODO(jart): pycomp.com needs \N thing
# def test_misformed_unicode_character_name(self):
# # These test are needed because unicode names are parsed
# # differently inside f-strings.
# self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape",
# [r"f'\N'",
# r"f'\N{'",
# r"f'\N{GREEK CAPITAL LETTER DELTA'",
# # Here are the non-f-string versions,
# # which should give the same errors.
# r"'\N'",
# r"'\N{'",
# r"'\N{GREEK CAPITAL LETTER DELTA'",
# ])
# Here are the non-f-string versions,
# which should give the same errors.
r"'\N'",
r"'\N{'",
r"'\N{GREEK CAPITAL LETTER DELTA'",
])
# # TODO(jart): pycomp.com needs \N thing
# def test_no_backslashes_in_expression_part(self):
# self.assertAllRaise(SyntaxError, 'f-string expression part cannot include a backslash',
# [r"f'{\'a\'}'",
# r"f'{\t3}'",
# r"f'{\}'",
# r"rf'{\'a\'}'",
# r"rf'{\t3}'",
# r"rf'{\}'",
# r"""rf'{"\N{LEFT CURLY BRACKET}"}'""",
# r"f'{\n}'",
# ])
def test_no_backslashes_in_expression_part(self):
self.assertAllRaise(SyntaxError, 'f-string expression part cannot include a backslash',
[r"f'{\'a\'}'",
r"f'{\t3}'",
r"f'{\}'",
r"rf'{\'a\'}'",
r"rf'{\t3}'",
r"rf'{\}'",
r"""rf'{"\N{LEFT CURLY BRACKET}"}'""",
r"f'{\n}'",
])
def test_no_escapes_for_braces(self):
"""
Only literal curly braces begin an expression.
"""
# \x7b is '{'.
self.assertEqual(f'\x7b1+1}}', '{1+1}')
self.assertEqual(f'\x7b1+1', '{1+1')
self.assertEqual(f'\u007b1+1', '{1+1')
self.assertEqual(f'\N{LEFT CURLY BRACKET}1+1\N{RIGHT CURLY BRACKET}', '{1+1}')
# # TODO(jart): pycomp.com needs \N thing
# def test_no_escapes_for_braces(self):
# """
# Only literal curly braces begin an expression.
# """
# # \x7b is '{'.
# self.assertEqual(f'\x7b1+1}}', '{1+1}')
# self.assertEqual(f'\x7b1+1', '{1+1')
# self.assertEqual(f'\u007b1+1', '{1+1')
# self.assertEqual(f'\N{LEFT CURLY BRACKET}1+1\N{RIGHT CURLY BRACKET}', '{1+1}')
def test_newlines_in_expressions(self):
self.assertEqual(f'{0}', '0')

View file

@ -216,15 +216,16 @@ class CookieTests(unittest.TestCase):
with self.assertRaises(cookies.CookieError):
C.load(rawdata)
def test_comment_quoting(self):
c = cookies.SimpleCookie()
c['foo'] = '\N{COPYRIGHT SIGN}'
self.assertEqual(str(c['foo']), 'Set-Cookie: foo="\\251"')
c['foo']['comment'] = 'comment \N{COPYRIGHT SIGN}'
self.assertEqual(
str(c['foo']),
'Set-Cookie: foo="\\251"; Comment="comment \\251"'
)
# # TODO(jart): pycomp.com needs \N thing
# def test_comment_quoting(self):
# c = cookies.SimpleCookie()
# c['foo'] = '\N{COPYRIGHT SIGN}'
# self.assertEqual(str(c['foo']), 'Set-Cookie: foo="\\251"')
# c['foo']['comment'] = 'comment \N{COPYRIGHT SIGN}'
# self.assertEqual(
# str(c['foo']),
# 'Set-Cookie: foo="\\251"; Comment="comment \\251"'
# )
class MorselTests(unittest.TestCase):

View file

@ -42,7 +42,8 @@ class IntTestCases(unittest.TestCase):
self.assertEqual(int(-3.5), -3)
self.assertEqual(int("-3"), -3)
self.assertEqual(int(" -3 "), -3)
self.assertEqual(int("\N{EM SPACE}-3\N{EN SPACE}"), -3)
# # TODO(jart): pycomp.com needs \N thing
# self.assertEqual(int("\N{EM SPACE}-3\N{EN SPACE}"), -3)
# Different base:
self.assertEqual(int("10",16), 16)
# Test conversion from strings and various anomalies

View file

@ -7,25 +7,23 @@ class TestUnicode:
# test_encoding1 and test_encoding2 from 2.x are irrelevant (only str
# is supported as input, not bytes).
def test_encoding3(self):
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = self.dumps(u)
self.assertEqual(j, '"\\u03b1\\u03a9"')
def test_encoding4(self):
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = self.dumps([u])
self.assertEqual(j, '["\\u03b1\\u03a9"]')
def test_encoding5(self):
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = self.dumps(u, ensure_ascii=False)
self.assertEqual(j, '"{0}"'.format(u))
def test_encoding6(self):
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = self.dumps([u], ensure_ascii=False)
self.assertEqual(j, '["{0}"]'.format(u))
# # TODO(jart): pycomp.com needs \N thing
# def test_encoding3(self):
# u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
# j = self.dumps(u)
# self.assertEqual(j, '"\\u03b1\\u03a9"')
# def test_encoding4(self):
# u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
# j = self.dumps([u])
# self.assertEqual(j, '["\\u03b1\\u03a9"]')
# def test_encoding5(self):
# u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
# j = self.dumps(u, ensure_ascii=False)
# self.assertEqual(j, '"{0}"'.format(u))
# def test_encoding6(self):
# u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
# j = self.dumps([u], ensure_ascii=False)
# self.assertEqual(j, '["{0}"]'.format(u))
def test_big_unicode_encode(self):
u = '\U0001d120'

View file

@ -217,11 +217,12 @@ class Test_ISO2022(unittest.TestCase):
uni = ':hu4:unit\xe9 de famille'
self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)
def test_iso2022_jp_g0(self):
self.assertNotIn(b'\x0e', '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'):
e = '\u3406'.encode(encoding)
self.assertFalse(any(x > 0x80 for x in e))
# TODO(jart): put _codecsmodule / unicodedata in pycomp.com
# def test_iso2022_jp_g0(self):
# self.assertNotIn(b'\x0e', '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
# for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'):
# e = '\u3406'.encode(encoding)
# self.assertFalse(any(x > 0x80 for x in e))
def test_bug1572832(self):
for x in range(0x10000, 0x110000):

View file

@ -132,15 +132,16 @@ class MiscSourceEncodingTest(unittest.TestCase):
unload(TESTFN)
rmtree('__pycache__')
def test_error_from_string(self):
# See http://bugs.python.org/issue6289
input = "# coding: ascii\n\N{SNOWMAN}".encode('utf-8')
with self.assertRaises(SyntaxError) as c:
compile(input, "<string>", "exec")
expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \
"ordinal not in range(128)"
self.assertTrue(c.exception.args[0].startswith(expected),
msg=c.exception.args[0])
# # TODO(jart): pycomp.com needs \N thing
# def test_error_from_string(self):
# # See http://bugs.python.org/issue6289
# input = "# coding: ascii\n\N{SNOWMAN}".encode('utf-8')
# with self.assertRaises(SyntaxError) as c:
# compile(input, "<string>", "exec")
# expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \
# "ordinal not in range(128)"
# self.assertTrue(c.exception.args[0].startswith(expected),
# msg=c.exception.args[0])
class AbstractSourceEncodingTest:

View file

@ -203,14 +203,15 @@ class TestLiterals(unittest.TestCase):
self.assertRaises(SyntaxError, eval, """ rrb'' """)
self.assertRaises(SyntaxError, eval, """ rbb'' """)
def test_eval_str_u(self):
self.assertEqual(eval(""" u'x' """), 'x')
self.assertEqual(eval(""" U'\u00e4' """), 'ä')
self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
self.assertRaises(SyntaxError, eval, """ ur'' """)
self.assertRaises(SyntaxError, eval, """ ru'' """)
self.assertRaises(SyntaxError, eval, """ bu'' """)
self.assertRaises(SyntaxError, eval, """ ub'' """)
# # TODO(jart): pycomp.com needs \N thing
# def test_eval_str_u(self):
# self.assertEqual(eval(""" u'x' """), 'x')
# self.assertEqual(eval(""" U'\u00e4' """), 'ä')
# self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
# self.assertRaises(SyntaxError, eval, """ ur'' """)
# self.assertRaises(SyntaxError, eval, """ ru'' """)
# self.assertRaises(SyntaxError, eval, """ bu'' """)
# self.assertRaises(SyntaxError, eval, """ ub'' """)
def check_encoding(self, encoding, extra=""):
modname = "xx_" + encoding.replace("-", "_")

View file

@ -444,36 +444,32 @@ What a mess!
text = "aa \xe4\xe4-\xe4\xe4"
self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"])
def test_non_breaking_space(self):
text = 'This is a sentence with non-breaking\N{NO-BREAK SPACE}space.'
self.check_wrap(text, 20,
['This is a sentence',
'with non-',
'breaking\N{NO-BREAK SPACE}space.'],
break_on_hyphens=True)
self.check_wrap(text, 20,
['This is a sentence',
'with',
'non-breaking\N{NO-BREAK SPACE}space.'],
break_on_hyphens=False)
def test_narrow_non_breaking_space(self):
text = ('This is a sentence with non-breaking'
'\N{NARROW NO-BREAK SPACE}space.')
self.check_wrap(text, 20,
['This is a sentence',
'with non-',
'breaking\N{NARROW NO-BREAK SPACE}space.'],
break_on_hyphens=True)
self.check_wrap(text, 20,
['This is a sentence',
'with',
'non-breaking\N{NARROW NO-BREAK SPACE}space.'],
break_on_hyphens=False)
# TODO(jart): Need \N in pycomp.com
# def test_non_breaking_space(self):
# text = 'This is a sentence with non-breaking\N{NO-BREAK SPACE}space.'
# self.check_wrap(text, 20,
# ['This is a sentence',
# 'with non-',
# 'breaking\N{NO-BREAK SPACE}space.'],
# break_on_hyphens=True)
# self.check_wrap(text, 20,
# ['This is a sentence',
# 'with',
# 'non-breaking\N{NO-BREAK SPACE}space.'],
# break_on_hyphens=False)
# def test_narrow_non_breaking_space(self):
# text = ('This is a sentence with non-breaking'
# '\N{NARROW NO-BREAK SPACE}space.')
# self.check_wrap(text, 20,
# ['This is a sentence',
# 'with non-',
# 'breaking\N{NARROW NO-BREAK SPACE}space.'],
# break_on_hyphens=True)
# self.check_wrap(text, 20,
# ['This is a sentence',
# 'with',
# 'non-breaking\N{NARROW NO-BREAK SPACE}space.'],
# break_on_hyphens=False)
class MaxLinesTestCase(BaseTestCase):

View file

@ -2058,8 +2058,9 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')
self.assertEqual(str(b'\202 x', 'ascii', 'replace'), '\uFFFD x')
# Error handling (unknown character names)
self.assertEqual(b"\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
# # TODO(jart): pycomp.com needs \N thing
# # Error handling (unknown character names)
# self.assertEqual(b"\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
# Error handling (truncated escape sequence)
self.assertRaises(UnicodeError, b"\\".decode, "unicode-escape")
@ -2794,33 +2795,35 @@ class CAPITest(unittest.TestCase):
self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1)
self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)
@support.cpython_only
def test_encode_decimal(self):
from _testcapi import unicode_encodedecimal
self.assertEqual(unicode_encodedecimal('123'),
b'123')
self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
b'3.14')
self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
b' 3.14 ')
self.assertRaises(UnicodeEncodeError,
unicode_encodedecimal, "123\u20ac", "strict")
self.assertRaisesRegex(
ValueError,
"^'decimal' codec can't encode character",
unicode_encodedecimal, "123\u20ac", "replace")
# # TODO(jart): pycomp.com needs \N thing
# @support.cpython_only
# def test_encode_decimal(self):
# from _testcapi import unicode_encodedecimal
# self.assertEqual(unicode_encodedecimal('123'),
# b'123')
# self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
# b'3.14')
# self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
# b' 3.14 ')
# self.assertRaises(UnicodeEncodeError,
# unicode_encodedecimal, "123\u20ac", "strict")
# self.assertRaisesRegex(
# ValueError,
# "^'decimal' codec can't encode character",
# unicode_encodedecimal, "123\u20ac", "replace")
@support.cpython_only
def test_transform_decimal(self):
from _testcapi import unicode_transformdecimaltoascii as transform_decimal
self.assertEqual(transform_decimal('123'),
'123')
self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
'3.14')
self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
"\N{EM SPACE}3.14\N{EN SPACE}")
self.assertEqual(transform_decimal('123\u20ac'),
'123\u20ac')
# # TODO(jart): pycomp.com needs \N thing
# @support.cpython_only
# def test_transform_decimal(self):
# from _testcapi import unicode_transformdecimaltoascii as transform_decimal
# self.assertEqual(transform_decimal('123'),
# '123')
# self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
# '3.14')
# self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
# "\N{EM SPACE}3.14\N{EN SPACE}")
# self.assertEqual(transform_decimal('123\u20ac'),
# '123\u20ac')
@support.cpython_only
def test_pep393_utf8_caching_bug(self):

View file

@ -241,21 +241,21 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
class UnicodeMiscTest(UnicodeDatabaseTest):
def test_failed_import_during_compiling(self):
# Issue 4367
# Decoding \N escapes requires the unicodedata module. If it can't be
# imported, we shouldn't segfault.
# This program should raise a SyntaxError in the eval.
code = "import sys;" \
"sys.modules['unicodedata'] = None;" \
"""eval("'\\\\N{SOFT HYPHEN}'")"""
# We use a separate process because the unicodedata module may already
# have been loaded in this process.
result = script_helper.assert_python_failure("-c", code)
error = "SyntaxError: (unicode error) \\N escapes not supported " \
"(can't load unicodedata module)"
self.assertIn(error, result.err.decode("ascii"))
# # TODO(jart): pycomp.com needs \N thing
# def test_failed_import_during_compiling(self):
# # Issue 4367
# # Decoding \N escapes requires the unicodedata module. If it can't be
# # imported, we shouldn't segfault.
# # This program should raise a SyntaxError in the eval.
# code = "import sys;" \
# "sys.modules['unicodedata'] = None;" \
# """eval("'\\\\N{SOFT HYPHEN}'")"""
# # We use a separate process because the unicodedata module may already
# # have been loaded in this process.
# result = script_helper.assert_python_failure("-c", code)
# error = "SyntaxError: (unicode error) \\N escapes not supported " \
# "(can't load unicodedata module)"
# self.assertIn(error, result.err.decode("ascii"))
def test_decimal_numeric_consistent(self):
# Test that decimal and numeric are consistent,

View file

@ -787,18 +787,19 @@ class SimpleServerTestCase(BaseServerTestCase):
# protocol error; provide additional information in test output
self.fail("%s\n%s" % (e, getattr(e, "headers", "")))
def test_nonascii(self):
start_string = 'P\N{LATIN SMALL LETTER Y WITH CIRCUMFLEX}t'
end_string = 'h\N{LATIN SMALL LETTER O WITH HORN}n'
try:
p = xmlrpclib.ServerProxy(URL)
self.assertEqual(p.add(start_string, end_string),
start_string + end_string)
except (xmlrpclib.ProtocolError, OSError) as e:
# ignore failures due to non-blocking socket 'unavailable' errors
if not is_unavailable_exception(e):
# protocol error; provide additional information in test output
self.fail("%s\n%s" % (e, getattr(e, "headers", "")))
# TODO(jart): pycomp.com needs \N thing
# def test_nonascii(self):
# start_string = 'P\N{LATIN SMALL LETTER Y WITH CIRCUMFLEX}t'
# end_string = 'h\N{LATIN SMALL LETTER O WITH HORN}n'
# try:
# p = xmlrpclib.ServerProxy(URL)
# self.assertEqual(p.add(start_string, end_string),
# start_string + end_string)
# except (xmlrpclib.ProtocolError, OSError) as e:
# # ignore failures due to non-blocking socket 'unavailable' errors
# if not is_unavailable_exception(e):
# # protocol error; provide additional information in test output
# self.fail("%s\n%s" % (e, getattr(e, "headers", "")))
def test_client_encoding(self):
start_string = '\u20ac'