Productionize new APE loader and more

The APE_NO_MODIFY_SELF loader payload has been moved out of the examples folder and improved so that it works on BSD systems, and permits general elf program headers. This brings its quality up enough that it should be acceptable to use by default for many programs, e.g. Python, Lua, SQLite and Python. It's the responsibility of the user to define an appropriate TMPDIR if /tmp is considered an adversarial environment. Mac OS shall be supported by APE_NO_MODIFY_SELF soon. Fixes and improvements have been made to program_executable_name as it's now the one true way to get the absolute path of the executing image. This change fixes a memory leak in linenoise history loading, introduced by performance optimizations in 51904e2687 This change fixes a longstanding regression with Mach system calls, that 23ae9dfceb back in February which impacted our sched_yield() implementation, which is why no one noticed until now. The Blinkenlights PC emulator has been improved. We now fix rendering on XNU and BSD by not making the assumption that the kernel terminal driver understands UTF8 since that seems to break its internal modeling of \r\n which is now being addressed by using \e[𝑦H instead. The paneling is now more compact in real mode so you won't need to make your font as tiny if you're only emulating an 8086 program. The CLMUL ISA is now emulated too This change also makes improvement to time. CLOCK_MONOTONIC now does the right thing on Windows NT. The nanosecond time module functions added in Python 3.7 have been backported. This change doubles the performance of Argon2 password stretching simply by not using its copy_block and xor_block helper functions, as they were trivial to inline thus resulting in us needing to iterate over each 1024 byte block four fewer times. This change makes code size improvements. _PyUnicode_ToNumeric() was 64k in size and now it's 10k. The CJK codec lookup tables now use lazy delta zigzag deflate (δzd) encoding which reduces their size from 600k to 200k plus the code bloat caused by macro abuse in _decimal.c is now addressed so our fully-loaded statically-linked hermetically-sealed Python virtual interpreter container is now 9.4 megs in the default build mode and 5.5m in MODE=tiny which leaves plenty of room for chibicc. The pydoc web server now accommodates the use case of people who work by SSH'ing into a different machine w/ python.com -m pydoc -p8080 -h0.0.0.0 Finally Python Capsulae delenda est and won't be supported in the future
2025-10-25 10:40:57 +00:00 · 2021-10-02 08:17:04 -07:00 · 2021-10-02 08:17:04 -07:00 · 47a53e143b
commit 47a53e143b
parent 9cb54218ab
270 changed files with 214544 additions and 23331 deletions
--- a/third_party/python/Lib/pydoc.py
+++ b/third_party/python/Lib/pydoc.py
@ -2142,7 +2142,7 @@ def apropos(key):

 # --------------------------------------- enhanced Web browser interface

-def _start_server(urlhandler, port):
+def _start_server(urlhandler, port, host='localhost'):
    """Start an HTTP server thread on a specific port.

    Start an HTML/text server thread, so HTML or text documents can be
@ -2230,8 +2230,8 @@ def _start_server(urlhandler, port):

    class DocServer(http.server.HTTPServer):

-        def __init__(self, port, callback):
-            self.host = 'localhost'
+        def __init__(self, host, port, callback):
+            self.host = host
            self.address = (self.host, port)
            self.callback = callback
            self.base.__init__(self, self.address, self.handler)
@ -2251,8 +2251,9 @@ def _start_server(urlhandler, port):

    class ServerThread(threading.Thread):

-        def __init__(self, urlhandler, port):
+        def __init__(self, urlhandler, host, port):
            self.urlhandler = urlhandler
+            self.host = host
            self.port = int(port)
            threading.Thread.__init__(self)
            self.serving = False
@ -2265,7 +2266,7 @@ def _start_server(urlhandler, port):
                DocServer.handler = DocHandler
                DocHandler.MessageClass = email.message.Message
                DocHandler.urlhandler = staticmethod(self.urlhandler)
-                docsvr = DocServer(self.port, self.ready)
+                docsvr = DocServer(self.host, self.port, self.ready)
                self.docserver = docsvr
                docsvr.serve_until_quit()
            except Exception as e:
@ -2287,7 +2288,7 @@ def _start_server(urlhandler, port):
            self.serving = False
            self.url = None

-    thread = ServerThread(urlhandler, port)
+    thread = ServerThread(urlhandler, host, port)
    thread.start()
    # Wait until thread.serving is True to make sure we are
    # really up before returning.
@ -2533,20 +2534,27 @@ def _url_handler(url, content_type="text/html"):
    raise TypeError('unknown content type %r for url %s' % (content_type, url))


-def browse(port=0, *, open_browser=True):
+def browse(port=0, *, open_browser=True, host='localhost', have_threads=True):
    """Start the enhanced pydoc Web server and open a Web browser.

    Use port '0' to start the server on an arbitrary port.
    Set open_browser to False to suppress opening a browser.
    """
-    import webbrowser
-    serverthread = _start_server(_url_handler, port)
+    try:
+        import webbrowser
+    except ImportError:
+        webbrowser = None
+    if not have_threads:
+        print('starting server on http://%s:%s/' % (host, port))
+    serverthread = _start_server(_url_handler, port, host)
    if serverthread.error:
        print(serverthread.error)
        return
+    if not have_threads:
+        return
    if serverthread.serving:
        server_help_msg = 'Server commands: [b]rowser, [q]uit'
-        if open_browser:
+        if open_browser and webbrowser is not None:
            webbrowser.open(serverthread.url)
        try:
            print('Server ready at', serverthread.url)
@ -2557,7 +2565,8 @@ def browse(port=0, *, open_browser=True):
                if cmd == 'q':
                    break
                elif cmd == 'b':
-                    webbrowser.open(serverthread.url)
+                    if webbrowser is not None:
+                        webbrowser.open(serverthread.url)
                else:
                    print(server_help_msg)
        except (KeyboardInterrupt, EOFError):
@ -2576,7 +2585,14 @@ def ispath(x):
 def cli():
    """Command-line interface (looks at sys.argv to decide what to do)."""
    import getopt
-    class BadUsage(Exception): pass
+    try:
+        import _thread
+        have_threads = True
+    except ImportError:
+        have_threads = False
+
+    class BadUsage(Exception):
+        pass

    # Scripts don't get the current directory in their path by default
    # unless they are run with the '-m' switch
@ -2587,10 +2603,11 @@ def cli():
        sys.path.insert(0, '.')

    try:
-        opts, args = getopt.getopt(sys.argv[1:], 'bk:p:w')
+        opts, args = getopt.getopt(sys.argv[1:], 'bk:p:h:w')
        writing = False
        start_server = False
        open_browser = False
+        host = None
        port = None
        for opt, val in opts:
            if opt == '-b':
@ -2602,13 +2619,18 @@ def cli():
            if opt == '-p':
                start_server = True
                port = val
+            if opt == '-h':
+                host = val
            if opt == '-w':
                writing = True

        if start_server:
            if port is None:
                port = 0
-            browse(port, open_browser=open_browser)
+            if host is None:
+                host = 'localhost'
+            browse(port, open_browser=open_browser, host=host,
+                   have_threads=have_threads)
            return

        if not args: raise BadUsage
--- a/third_party/python/Lib/pydoc_data/_pydoc.css
+++ b/third_party/python/Lib/pydoc_data/_pydoc.css
@ -1,6 +1,4 @@
 /*
    CSS file for pydoc.
-
    Contents of this file are subject to change without notice.
-
 */
--- a/third_party/python/Lib/sqlite3/init.py
+++ b/third_party/python/Lib/sqlite3/init.py
@ -20,6 +20,9 @@
 #    misrepresented as being the original software.
 # 3. This notice may not be removed or altered from any source distribution.

+"""SQLite Python Bindings Module
+"""
+
 from sqlite3.dbapi2 import *

 if __name__ == 'PYOBJ.COM':
--- a/third_party/python/Lib/sqlite3/dbapi2.py
+++ b/third_party/python/Lib/sqlite3/dbapi2.py
@ -20,6 +20,9 @@
 #    misrepresented as being the original software.
 # 3. This notice may not be removed or altered from any source distribution.

+"""SQLite Python Database API Module
+"""
+
 import datetime
 import time
 import collections.abc
--- a/third_party/python/Lib/sqlite3/dump.py
+++ b/third_party/python/Lib/sqlite3/dump.py
@ -1,11 +1,14 @@
 # Mimic the sqlite3 console shell's .dump command
 # Author: Paul Kippes <kippesp@gmail.com>

-# Every identifier in sql is quoted based on a comment in sqlite
-# documentation "SQLite adds new keywords from time to time when it
-# takes on new features. So to prevent your code from being broken by
-# future enhancements, you should normally quote any identifier that
-# is an English language word, even if you do not have to."
+"""SQLite Python Dump Module
+
+Every identifier in sql is quoted based on a comment in sqlite
+documentation "SQLite adds new keywords from time to time when it
+takes on new features. So to prevent your code from being broken by
+future enhancements, you should normally quote any identifier that
+is an English language word, even if you do not have to."
+"""

 def _iterdump(connection):
    """
--- a/third_party/python/Lib/test/BIG5.TXT
+++ b/third_party/python/Lib/test/BIG5.TXT
--- a/third_party/python/Lib/test/BIG5HKSCS-2004.TXT
+++ b/third_party/python/Lib/test/BIG5HKSCS-2004.TXT
--- a/third_party/python/Lib/test/CP932.TXT
+++ b/third_party/python/Lib/test/CP932.TXT
--- a/third_party/python/Lib/test/CP936.TXT
+++ b/third_party/python/Lib/test/CP936.TXT
--- a/third_party/python/Lib/test/CP949.TXT
+++ b/third_party/python/Lib/test/CP949.TXT
--- a/third_party/python/Lib/test/CP950.TXT
+++ b/third_party/python/Lib/test/CP950.TXT
--- a/third_party/python/Lib/test/EUC-CN.TXT
+++ b/third_party/python/Lib/test/EUC-CN.TXT
--- a/third_party/python/Lib/test/EUC-JISX0213.TXT
+++ b/third_party/python/Lib/test/EUC-JISX0213.TXT
--- a/third_party/python/Lib/test/EUC-JP.TXT
+++ b/third_party/python/Lib/test/EUC-JP.TXT
--- a/third_party/python/Lib/test/EUC-KR.TXT
+++ b/third_party/python/Lib/test/EUC-KR.TXT
--- a/third_party/python/Lib/test/JOHAB.TXT
+++ b/third_party/python/Lib/test/JOHAB.TXT
--- a/third_party/python/Lib/test/SHIFTJIS.TXT
+++ b/third_party/python/Lib/test/SHIFTJIS.TXT
--- a/third_party/python/Lib/test/SHIFT_JISX0213.TXT
+++ b/third_party/python/Lib/test/SHIFT_JISX0213.TXT
--- a/third_party/python/Lib/test/gb-18030-2000.ucm
+++ b/third_party/python/Lib/test/gb-18030-2000.ucm
--- a/third_party/python/Lib/test/multibytecodec_support.py
+++ b/third_party/python/Lib/test/multibytecodec_support.py
@ -8,7 +8,6 @@ import os
 import re
 import sys
 import unittest
-from http.client import HTTPException
 from test import support
 from io import BytesIO

@ -285,16 +284,13 @@ class TestBase_Mapping(unittest.TestCase):
    codectests = []

    def setUp(self):
-        try:
-            self.open_mapping_file().close() # test it to report the error early
-        except (OSError, HTTPException):
-            self.skipTest("Could not retrieve "+self.mapfileurl)
+        pass

    def open_mapping_file(self):
-        return support.open_urlresource(self.mapfileurl)
+        return open(self.mapfileurl)

    def test_mapping_file(self):
-        if self.mapfileurl.endswith('.xml'):
+        if self.mapfileurl.endswith('.ucm'):
            self._test_mapping_file_ucm()
        else:
            self._test_mapping_file_plain()
@ -307,30 +303,25 @@ class TestBase_Mapping(unittest.TestCase):

        with self.open_mapping_file() as f:
            for line in f:
+                line = line.split('#')[0].strip()
                if not line:
                    break
-                data = line.split('#')[0].split()
+                data = line.split()
                if len(data) != 2:
                    continue
-
-                if data[0][:2] != '0x':
-                    self.fail(f"Invalid line: {line!r}")
-                csetch = bytes.fromhex(data[0][2:])
+                csetch = bytes.fromhex(data[0])
                if len(csetch) == 1 and 0x80 <= csetch[0]:
                    continue
-
                unich = unichrs(data[1])
                if ord(unich) == 0xfffd or unich in urt_wa:
                    continue
                urt_wa[unich] = csetch
-
                self._testpoint(csetch, unich)

    def _test_mapping_file_ucm(self):
        with self.open_mapping_file() as f:
            ucmdata = f.read()
-        uc = re.findall('<a u="([A-F0-9]{4})" b="([0-9A-F ]+)"/>', ucmdata)
-        for uni, coded in uc:
+        for uni, coded in re.findall('^([A-F0-9]+)\t([0-9A-F ]+)$', ucmdata):
            unich = chr(int(uni, 16))
            codech = bytes(int(c, 16) for c in coded.split())
            self._testpoint(codech, unich)
--- a/third_party/python/Lib/test/test_codecencodings_cn.py
+++ b/third_party/python/Lib/test/test_codecencodings_cn.py
@ -11,8 +11,8 @@ class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase):
    tstring = multibytecodec_support.load_teststring('gb2312')
    codectests = (
        # invalid bytes
-        (b"abc\x81\x81\xc1\xc4", "strict",  None),
-        (b"abc\xc8", "strict",  None),
+        (b"abc\x81\x81\xc1\xc4", "strict", None),
+        (b"abc\xc8", "strict", None),
        (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
        (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
        (b"abc\x81\x81\xc1\xc4", "ignore",  "abc\u804a"),
@ -24,8 +24,8 @@ class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase):
    tstring = multibytecodec_support.load_teststring('gbk')
    codectests = (
        # invalid bytes
-        (b"abc\x80\x80\xc1\xc4", "strict",  None),
-        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "strict", None),
+        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
@ -38,8 +38,8 @@ class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase):
    tstring = multibytecodec_support.load_teststring('gb18030')
    codectests = (
        # invalid bytes
-        (b"abc\x80\x80\xc1\xc4", "strict",  None),
-        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "strict", None),
+        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
--- a/third_party/python/Lib/test/test_codecmaps_cn.py
+++ b/third_party/python/Lib/test/test_codecmaps_cn.py
@ -9,18 +9,17 @@ import unittest
 class TestGB2312Map(multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'gb2312'
-    mapfileurl = 'http://www.pythontest.net/unicode/EUC-CN.TXT'
+    mapfileurl = '/zip/.python/test/EUC-CN.TXT'

 class TestGBKMap(multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'gbk'
-    mapfileurl = 'http://www.pythontest.net/unicode/CP936.TXT'
+    mapfileurl = '/zip/.python/test/CP936.TXT'

 class TestGB18030Map(multibytecodec_support.TestBase_Mapping,
                     unittest.TestCase):
    encoding = 'gb18030'
-    mapfileurl = 'http://www.pythontest.net/unicode/gb-18030-2000.xml'
-
+    mapfileurl = '/zip/.python/test/gb-18030-2000.ucm'

 if __name__ == "__main__":
    unittest.main()
--- a/third_party/python/Lib/test/test_codecmaps_hk.py
+++ b/third_party/python/Lib/test/test_codecmaps_hk.py
@ -9,7 +9,7 @@ import unittest
 class TestBig5HKSCSMap(multibytecodec_support.TestBase_Mapping,
                       unittest.TestCase):
    encoding = 'big5hkscs'
-    mapfileurl = 'http://www.pythontest.net/unicode/BIG5HKSCS-2004.TXT'
+    mapfileurl = '/zip/.python/test/BIG5HKSCS-2004.TXT'

 if __name__ == "__main__":
    unittest.main()
--- a/third_party/python/Lib/test/test_codecmaps_jp.py
+++ b/third_party/python/Lib/test/test_codecmaps_jp.py
@ -9,7 +9,7 @@ import unittest
 class TestCP932Map(multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'cp932'
-    mapfileurl = 'http://www.pythontest.net/unicode/CP932.TXT'
+    mapfileurl = '/zip/.python/test/CP932.TXT'
    supmaps = [
        (b'\x80', '\u0080'),
        (b'\xa0', '\uf8f0'),
@ -25,14 +25,14 @@ class TestEUCJPCOMPATMap(multibytecodec_support.TestBase_Mapping,
                         unittest.TestCase):
    encoding = 'euc_jp'
    mapfilename = 'EUC-JP.TXT'
-    mapfileurl = 'http://www.pythontest.net/unicode/EUC-JP.TXT'
+    mapfileurl = '/zip/.python/test/EUC-JP.TXT'


 class TestSJISCOMPATMap(multibytecodec_support.TestBase_Mapping,
                        unittest.TestCase):
    encoding = 'shift_jis'
    mapfilename = 'SHIFTJIS.TXT'
-    mapfileurl = 'http://www.pythontest.net/unicode/SHIFTJIS.TXT'
+    mapfileurl = '/zip/.python/test/SHIFTJIS.TXT'
    pass_enctest = [
        (b'\x81_', '\\'),
    ]
@ -46,14 +46,14 @@ class TestEUCJISX0213Map(multibytecodec_support.TestBase_Mapping,
                         unittest.TestCase):
    encoding = 'euc_jisx0213'
    mapfilename = 'EUC-JISX0213.TXT'
-    mapfileurl = 'http://www.pythontest.net/unicode/EUC-JISX0213.TXT'
+    mapfileurl = '/zip/.python/test/EUC-JISX0213.TXT'


 class TestSJISX0213Map(multibytecodec_support.TestBase_Mapping,
                       unittest.TestCase):
    encoding = 'shift_jisx0213'
    mapfilename = 'SHIFT_JISX0213.TXT'
-    mapfileurl = 'http://www.pythontest.net/unicode/SHIFT_JISX0213.TXT'
+    mapfileurl = '/zip/.python/test/SHIFT_JISX0213.TXT'


 if __name__ == "__main__":
--- a/third_party/python/Lib/test/test_codecmaps_kr.py
+++ b/third_party/python/Lib/test/test_codecmaps_kr.py
@ -9,23 +9,21 @@ import unittest
 class TestCP949Map(multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'cp949'
-    mapfileurl = 'http://www.pythontest.net/unicode/CP949.TXT'
-
+    mapfileurl = '/zip/.python/test/CP949.TXT'

 class TestEUCKRMap(multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'euc_kr'
-    mapfileurl = 'http://www.pythontest.net/unicode/EUC-KR.TXT'
+    mapfileurl = '/zip/.python/test/EUC-KR.TXT'

    # A4D4 HANGUL FILLER indicates the begin of 8-bytes make-up sequence.
    pass_enctest = [(b'\xa4\xd4', '\u3164')]
    pass_dectest = [(b'\xa4\xd4', '\u3164')]

-
 class TestJOHABMap(multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'johab'
-    mapfileurl = 'http://www.pythontest.net/unicode/JOHAB.TXT'
+    mapfileurl = '/zip/.python/test/JOHAB.TXT'
    # KS X 1001 standard assigned 0x5c as WON SIGN.
    # but, in early 90s that is the only era used johab widely,
    # the most softwares implements it as REVERSE SOLIDUS.
--- a/third_party/python/Lib/test/test_codecmaps_tw.py
+++ b/third_party/python/Lib/test/test_codecmaps_tw.py
@ -9,12 +9,12 @@ import unittest
 class TestBIG5Map(multibytecodec_support.TestBase_Mapping,
                  unittest.TestCase):
    encoding = 'big5'
-    mapfileurl = 'http://www.pythontest.net/unicode/BIG5.TXT'
+    mapfileurl = '/zip/.python/test/BIG5.TXT'

 class TestCP950Map(multibytecodec_support.TestBase_Mapping,
                   unittest.TestCase):
    encoding = 'cp950'
-    mapfileurl = 'http://www.pythontest.net/unicode/CP950.TXT'
+    mapfileurl = '/zip/.python/test/CP950.TXT'
    pass_enctest = [
        (b'\xa2\xcc', '\u5341'),
        (b'\xa2\xce', '\u5345'),
--- a/third_party/python/Lib/test/test_decimal.py
+++ b/third_party/python/Lib/test/test_decimal.py
@ -453,10 +453,6 @@ class IBMTestCases(unittest.TestCase):
        myexceptions.sort(key=repr)
        theirexceptions.sort(key=repr)

-        if result == ans and str(result) != str(ans):
-            print('WUT %s %s' % (result, ans))
-        if result != ans or str(result) != str(ans):
-            print('wut %r %r' % (result, ans))
        self.assertEqual(result, ans,
                         'Incorrect answer for ' + s + ' -- got ' + result)

--- a/third_party/python/Lib/test/test_scratch.py
+++ b/third_party/python/Lib/test/test_scratch.py
@ -7,6 +7,9 @@ import unittest
 class BooTest(unittest.TestCase):
    def test_boo(self):
        pass
+        # cosmo.ftrace()
+        # print('hi')
+        # os._exit(0)

 if __name__ == '__main__':
    unittest.main()