Bring Lua to 5.4.6. (#1214)

This essentially re-does the work of #875 on top of master.

This is what I did to check that Cosmo's Lua extensions still worked:

```
$ build/bootstrap/make MODE=aarch64 o/aarch64/third_party/lua/lua
$ ape o/aarch64/third_party/lua/lua
>: 10
10
>: 010
8
>: 0b10
2
>: string.byte("\e")
27
>: "Hello, %s" % {"world"}
Hello, world
>: "*" * 3
***
```

`luaL_traceback2` was used to show the stack trace with parameter
values; it's used in `LuaCallWithTrace`, which is used in Redbean to run
Lua code. You should be able to see the extended stack trace by running
something like this: `redbean -e "function a(b)c()end a(2)"` (with
"params" indicating the extended stack trace):

```
stack traceback:
 [string "function a(b)c()end a(2)"]:1: in function 'a', params: b = 2;
 [string "function a(b)c()end a(2)"]:1: in main chunk
```
@pkulchenko confirmed that I get the expected result with the updated
code.

This is what I did to check that Lua itself still worked:

```
$ cd third_party/lua/test/
$ ape ../../../o/aarch64/third_party/lua/lua all.lua
```

There's one test failure, in `files.lua`:

```
***** FILE 'files.lua'*****
testing i/o
../../../o/aarch64/third_party/lua/lua: files.lua:84: assertion failed!
stack traceback:
[C]: in function 'assert'
files.lua:84: in main chunk
(...tail calls...)
all.lua:195: in main chunk
[C]: in ?
.>>> closing state <<<
```

That isn't a result of these changes; the same test is failing in
master.

The failure is here:

```lua
if not _port then   -- invalid seek
  local status, msg, code = io.stdin:seek("set", 1000)
  assert(not status and type(msg) == "string" and type(code) == "number")
end
```

The test expects a seek to offset 1,000 on stdin to fail — but it
doesn't. `status` ends up being the new offset rather than `nil`.

If I comment out that one test, the remaining tests succeed.
This commit is contained in:
Michael Lenaghan 2024-06-15 20:13:08 -04:00 committed by GitHub
parent 3a599bfbe1
commit 0dbf01bf1d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
90 changed files with 2741 additions and 1376 deletions

View file

@ -1,9 +1,30 @@
/*
** $Id: lutf8lib.c $
** Standard library for UTF-8 manipulation
** See Copyright Notice in lua.h
*/
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Lua
Copyright © 2004-2023 Lua.org, PUC-Rio.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#define lutf8lib_c
#define LUA_LIB
@ -11,12 +32,16 @@
#include "third_party/lua/lprefix.h"
#include "third_party/lua/lua.h"
#include "third_party/lua/lualib.h"
__static_yoink("lua_notice");
#define MAXUNICODE 0x10FFFFu
#define MAXUTF 0x7FFFFFFFu
#define MSGInvalid "invalid UTF-8 code"
/*
** Integer type for decoded UTF-8 values; MAXUTF needs 31 bits.
*/
@ -27,7 +52,8 @@ typedef unsigned long utfint;
#endif
#define iscont(p) ((*(p) & 0xC0) == 0x80)
#define iscont(c) (((c) & 0xC0) == 0x80)
#define iscontp(p) iscont(*(p))
/* from strlib */
@ -57,7 +83,7 @@ static const char *utf8_decode (const char *s, utfint *val, int strict) {
int count = 0; /* to count number of continuation bytes */
for (; c & 0x40; c <<= 1) { /* while it needs continuation bytes... */
unsigned int cc = (unsigned char)s[++count]; /* read next byte */
if ((cc & 0xC0) != 0x80) /* not a continuation byte? */
if (!iscont(cc)) /* not a continuation byte? */
return NULL; /* invalid byte sequence */
res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
}
@ -132,7 +158,7 @@ static int codepoint (lua_State *L) {
utfint code;
s = utf8_decode(s, &code, !lax);
if (s == NULL)
return luaL_error(L, "invalid UTF-8 code");
return luaL_error(L, MSGInvalid);
lua_pushinteger(L, code);
n++;
}
@ -182,16 +208,16 @@ static int byteoffset (lua_State *L) {
"position out of bounds");
if (n == 0) {
/* find beginning of current byte sequence */
while (posi > 0 && iscont(s + posi)) posi--;
while (posi > 0 && iscontp(s + posi)) posi--;
}
else {
if (iscont(s + posi))
if (iscontp(s + posi))
return luaL_error(L, "initial position is a continuation byte");
if (n < 0) {
while (n < 0 && posi > 0) { /* move back */
do { /* find beginning of previous character */
posi--;
} while (posi > 0 && iscont(s + posi));
} while (posi > 0 && iscontp(s + posi));
n++;
}
}
@ -200,7 +226,7 @@ static int byteoffset (lua_State *L) {
while (n > 0 && posi < (lua_Integer)len) {
do { /* find beginning of next character */
posi++;
} while (iscont(s + posi)); /* (cannot pass final '\0') */
} while (iscontp(s + posi)); /* (cannot pass final '\0') */
n--;
}
}
@ -216,20 +242,17 @@ static int byteoffset (lua_State *L) {
static int iter_aux (lua_State *L, int strict) {
size_t len;
const char *s = luaL_checklstring(L, 1, &len);
lua_Integer n = lua_tointeger(L, 2) - 1;
if (n < 0) /* first iteration? */
n = 0; /* start from here */
else if (n < (lua_Integer)len) {
n++; /* skip current byte */
while (iscont(s + n)) n++; /* and its continuations */
lua_Unsigned n = (lua_Unsigned)lua_tointeger(L, 2);
if (n < len) {
while (iscontp(s + n)) n++; /* go to next character */
}
if (n >= (lua_Integer)len)
if (n >= len) /* (also handles original 'n' being negative) */
return 0; /* no more codepoints */
else {
utfint code;
const char *next = utf8_decode(s + n, &code, strict);
if (next == NULL)
return luaL_error(L, "invalid UTF-8 code");
if (next == NULL || iscontp(next))
return luaL_error(L, MSGInvalid);
lua_pushinteger(L, n + 1);
lua_pushinteger(L, code);
return 2;
@ -248,7 +271,8 @@ static int iter_auxlax (lua_State *L) {
static int iter_codes (lua_State *L) {
int lax = lua_toboolean(L, 2);
luaL_checkstring(L, 1);
const char *s = luaL_checkstring(L, 1);
luaL_argcheck(L, !iscontp(s), 1, MSGInvalid);
lua_pushcfunction(L, lax ? iter_auxlax : iter_auxstrict);
lua_pushvalue(L, 1);
lua_pushinteger(L, 0);