From d3ff48c63f89060844dcfa80f0526b2534dfd56f Mon Sep 17 00:00:00 2001 From: BONNAURE Olivier Date: Wed, 14 Feb 2024 10:55:50 +0100 Subject: [PATCH] [Redbean] Feature / OnError(status, message) hook (#1103) --- tool/net/definitions.lua | 155 +++++++++++++++++++++------------------ tool/net/help.txt | 6 ++ tool/net/redbean.c | 30 ++++++-- 3 files changed, 114 insertions(+), 77 deletions(-) diff --git a/tool/net/definitions.lua b/tool/net/definitions.lua index bbe4476af..aa9e830de 100644 --- a/tool/net/definitions.lua +++ b/tool/net/definitions.lua @@ -392,22 +392,22 @@ LUA ENHANCEMENTS - redbean supports a printf modulus operator, like Python. For example, you can say `"hello %s" % {"world"}` instead of `string.format("hello %s", "world")`. - + -- - redbean supports a string multiply operator, like Python. For example, you can say `"hi" * 2` instead of `string.rep("hi", 2)`. - + - redbean supports octal (base 8) integer literals. For example `0644 == 420` is the case in redbean, whereas in upstream Lua `0644 == 644` would be the case. - + - redbean supports binary (base 2) integer literals. For example `0b1010 == 10` is the case in redbean, whereas in upstream Lua `0b1010` would result in an error. - + - redbean supports the GNU syntax for the ASCII ESC character in string literals. For example, `"\e"` is the same as `"\x1b"`. - + ]] ---@class string @@ -550,6 +550,17 @@ SPECIAL PATHS --- function OnHttpRequest() end +--- Hooks catch errors +--- +--- If this functiopn is defined in the global scope by your `/.init.lua` +--- then any errors occuring in the OnHttpRequest() hook will be catched. +--- You'll be able then to do whatever you need with the error status and +--- error message. +--- +---@param status uint16 +---@param message string +function OnError(status, message) end + --- Hooks client connection creation. --- --- If this function is defined it'll be called from the main process @@ -1436,9 +1447,9 @@ function Log(level, message) end function ParseHttpDateTime(rfc1123) end --- Parses URL. ---- +--- ---@return Url url An object containing the following fields is returned: ---- +--- --- - `scheme` is a string, e.g. `"http"` --- - `user` is the username string, or nil if absent --- - `pass` is the password string, or nil if absent @@ -1448,28 +1459,28 @@ function ParseHttpDateTime(rfc1123) end --- - `params` is the URL paramaters, e.g. `/?a=b&c` would be --- represented as the data structure `{{"a", "b"}, {"c"}, ...}` --- - `fragment` is the stuff after the `#` character ---- +--- ---@param url string ---@param flags integer? may have: ---- +--- --- - `kUrlPlus` to turn `+` into space --- - `kUrlLatin1` to transcode ISO-8859-1 input into UTF-8 ---- +--- --- This parser is charset agnostic. Percent encoded bytes are --- decoded for all fields. Returned values might contain things --- like NUL characters, spaces, control codes, and non-canonical --- encodings. Absent can be discerned from empty by checking if --- the pointer is set. ---- +--- --- There's no failure condition for this routine. This is a --- permissive parser. This doesn't normalize path segments like ---- `.` or `..` so use IsAcceptablePath() to check for those. No +--- `.` or `..` so use IsAcceptablePath() to check for those. No --- restrictions are imposed beyond that which is strictly --- necessary for parsing. All the data that is provided will be --- consumed to the one of the fields. Strict conformance is --- enforced on some fields more than others, like scheme, since --- it's the most non-deterministically defined field of them all. ---- +--- --- Please note this is a URL parser, not a URI parser. Which --- means we support everything the URI spec says we should do --- except for the things we won't do, like tokenizing path @@ -2244,7 +2255,7 @@ function ProgramTrustedIp(ip, cidr) end --- is granted per second to all buckets. The minimum value is 1/3600 --- which means once per hour. The maximum value for this setting is --- 1e6, which means once every microsecond. ---- +--- --- `cidr` is the specificity of judgement. Since creating 2^32 buckets --- would need 4GB of RAM, redbean defaults this value to 24 which means --- filtering applies to class c network blocks (i.e. x.x.x.*), and your @@ -2253,38 +2264,38 @@ function ProgramTrustedIp(ip, cidr) end --- number means you use less ram/cpu, but splash damage applies more to --- your clients; whereas higher numbers means more ram/cpu usage, while --- ensuring rate limiting only applies to specific compromised actors. ---- +--- --- `reject` is the token count or treshold at which redbean should send --- 429 Too Many Request warnings to the client. Permitted values can be --- anywhere between -1 and 126 inclusively. The default value is 30 and --- -1 means disable to disable (assuming AcquireToken() will be used). ---- +--- --- `ignore` is the token count or treshold, at which redbean should try --- simply ignoring clients and close the connection without logging any --- kind of warning, and without sending any response. The default value --- for this setting is `MIN(reject / 2, 15)`. This must be less than or --- equal to the `reject` setting. Allowed values are [-1,126] where you --- can use -1 as a means of disabling `ignore`. ---- +--- --- `ban` is the token count at which redbean should report IP addresses --- to the blackhole daemon via a unix-domain socket datagram so they'll --- get banned in the kernel routing tables. redbean's default value for --- this setting is `MIN(ignore / 10, 1)`. Permitted values are [-1,126] --- where -1 may be used as a means of disabling the `ban` feature. ---- +--- --- This function throws an exception if the constraints described above --- are not the case. Warnings are logged should redbean fail to connect --- to the blackhole daemon, assuming it hasn't been disabled. It's safe --- to use load balancing tools when banning is enabled, since you can't --- accidentally ban your own network interface addresses, loopback ips, --- or ProgramTrustedIp() addresses where these rate limits don't apply. ---- +--- --- It's assumed will be called from the .init.lua global scope although --- it could be used in interpreter mode, or from a forked child process --- in which case the only processes that'll have ability to use it will --- be that same process, and any descendent processes. This function is --- only able to be called once. ---- +--- --- This feature is not available in unsecure mode. ---@param replenish number? ---@param cidr integer? @@ -2309,10 +2320,10 @@ function ProgramTokenBucket(replenish, cidr, reject, ignore, ban) end function AcquireToken(ip) end --- Counts number of tokens in bucket. ---- +--- --- This function is the same as AcquireToken() except no subtraction is --- performed, i.e. no token is taken. ---- +--- --- `ip` should be an IPv4 address and this defaults to GetClientAddr(), --- although other interpretations of its meaning are possible. ---@param ip uint32? @@ -2326,12 +2337,12 @@ function CountTokens(ip) end --- based on the banned threshold. However if your Lua code calls --- `AcquireToken()` manually, then you'll need this function to take --- action on the returned values. ---- +--- --- This function returns true if a datagram could be sent sucessfully. --- Otherwise false is returned, which can happen if blackholed isn't --- running, or if a lot of processes are sending messages to it and the --- operation would have blocked. ---- +--- --- It's assumed that the blackholed service is running locally in the --- background. ---@param ip uint32 @@ -2416,7 +2427,7 @@ lsqlite3 = { --- An `lsqlite3.BUSY` error can occur at any point in a transaction: when --- the transaction is first started, during any write or update operations, --- or when the transaction commits. To avoid encountering `lsqlite3.BUSY` - --- errors in the middle of a transaction, the application can use + --- errors in the middle of a transaction, the application can use --- `BEGIN IMMEDIATE` instead of just `BEGIN` to start a transaction. The --- `BEGIN IMMEDIATE` command might itself return `lsqlite3.BUSY`, but if it --- succeeds, then SQLite guarantees that no subsequent operations on the same database through the next COMMIT will return `lsqlite3.BUSY`. @@ -2472,7 +2483,7 @@ lsqlite3 = { CORRUPT = 11, --- The `lsqlite3.NOTFOUND` result code is exposed in three ways: --- - --- `lsqlite3.NOTFOUND` can be returned by the `sqlite3_file_control()` + --- `lsqlite3.NOTFOUND` can be returned by the `sqlite3_file_control()` --- interface to indicate that the file control opcode passed as the third --- argument was not recognized by the underlying VFS. --- @@ -2491,7 +2502,7 @@ lsqlite3 = { --- complete because the disk is full. Note that this error can occur when --- trying to write information into the main database file, or it can also --- occur when writing into temporary disk files. - --- + --- --- Sometimes applications encounter this error even though there is an --- abundance of primary disk space because the error occurs when writing --- into temporary disk files on a system where temporary files are stored @@ -2521,19 +2532,19 @@ lsqlite3 = { --- the database schema was changed by some other process in between the --- time that the statement was prepared and the time the statement was run, --- this error can result. - --- + --- --- The statement is automatically re-prepared if the schema changes, up to --- `SQLITE_MAX_SCHEMA_RETRY` times (default: 50). The `step()` interface --- will only return `lsqlite3.SCHEMA` back to the application if the --- failure persists after these many retries. SCHEMA = 17, --- The `lsqlite3.TOOBIG` error code indicates that a string or BLOB was too - --- large. The default maximum length of a string or BLOB in SQLite is + --- large. The default maximum length of a string or BLOB in SQLite is --- 1,000,000,000 bytes. This maximum length can be changed at compile-time --- using the `SQLITE_MAX_LENGTH` compile-time option. The `lsqlite3.TOOBIG` --- error results when SQLite encounters a string or BLOB that exceeds the --- compile-time limit. - --- + --- --- The `lsqlite3.TOOBIG` error code can also result when an oversized SQL --- statement is passed into one of the `db:prepare()` interface. The --- maximum length of an SQL statement defaults to a much smaller value of @@ -2544,7 +2555,7 @@ lsqlite3 = { --- information about the failed constraint can be found by consulting the --- accompanying error message (returned via `errmsg()`) or by looking at --- the extended error code. - --- + --- --- The `lsqlite3.CONSTRAINT` code can also be used as the return value from --- the `xBestIndex()` method of a virtual table implementation. When --- `xBestIndex()` returns `lsqlite3.CONSTRAINT`, that indicates that the @@ -2558,7 +2569,7 @@ lsqlite3 = { --- BLOB in a column with a declared type of BOOLEAN. But in a few cases, --- SQLite is strict about types. The `lsqlite3.MISMATCH` error is returned --- in those few cases when the types do not match. - --- + --- --- The rowid of a table must be an integer. Attempt to set the rowid to --- anything other than an integer (or a NULL which will be automatically --- converted into the next available integer rowid) results in an @@ -2843,7 +2854,7 @@ function Database:close_vm(temponly) end ---@param func fun(udata: Udata) a Lua function that is invoked by SQLite3 whenever a transaction is committed. This callback receives one argument: ---@param udata Udata argument used when the callback was installed. --- ---- If `func` returns `false` or `nil` the COMMIT is allowed to proceed, +--- If `func` returns `false` or `nil` the COMMIT is allowed to proceed, --- otherwise the COMMIT is converted to a ROLLBACK. --- --- See: `db:rollback_hook` and `db:update_hook` @@ -2898,7 +2909,7 @@ function Database:create_aggregate(name, nargs, step, final, userdata) end --- This creates a collation callback. A collation callback is used to establish --- a collation order, mostly for string comparisons and sorting purposes. ---@param name string the name of the collation to be created ----@param func fun(s1: string, s2: string): -1|0|1 a function that accepts two string arguments, compares them and returns `0` if both strings are identical, `-1` if the first argument is lower in the collation order than the second and `1` if the first argument is higher in the collation order than the second. +---@param func fun(s1: string, s2: string): -1|0|1 a function that accepts two string arguments, compares them and returns `0` if both strings are identical, `-1` if the first argument is lower in the collation order than the second and `1` if the first argument is higher in the collation order than the second. --- A simple example: --- --- local function collate(s1,s2) @@ -7263,15 +7274,15 @@ function unix.isatty(fd) end function unix.tiocgwinsz(fd) end --- Returns file descriptor of open anonymous file. ---- +--- --- This creates a secure temporary file inside `$TMPDIR`. If it isn't --- defined, then `/tmp` is used on UNIX and GetTempPath() is used on --- the New Technology. This resolution of `$TMPDIR` happens once. ---- +--- --- Once close() is called, the returned file is guaranteed to be --- deleted automatically. On UNIX the file is unlink()'d before this --- function returns. On the New Technology it happens upon close(). ---- +--- --- On the New Technology, temporary files created by this function --- should have better performance, because `kNtFileAttributeTemporary` --- asks the kernel to more aggressively cache and reduce i/o ops. @@ -7284,7 +7295,7 @@ function unix.tmpfd() end function unix.sched_yield() end --- Creates interprocess shared memory mapping. ---- +--- --- This function allocates special memory that'll be inherited across --- fork in a shared way. By default all memory in Redbean is "private" --- memory that's only viewable and editable to the process that owns @@ -7294,15 +7305,15 @@ function unix.sched_yield() end --- don't want that to happen, and you want the memory to be shared --- similar to how it would be shared if you were using threads, then --- you can use this function to achieve just that. ---- +--- --- The memory object this function returns may be accessed using its --- methods, which support atomics and futexes. It's very low-level. --- For example, you can use it to implement scalable mutexes: ---- +--- --- mem = unix.mapshared(8000 * 8) ---- +--- --- LOCK = 0 -- pick an arbitrary word index for lock ---- +--- --- -- From Futexes Are Tricky Version 1.1 § Mutex, Take 3; --- -- Ulrich Drepper, Red Hat Incorporated, June 27, 2004. --- function Lock() @@ -7324,7 +7335,7 @@ function unix.sched_yield() end --- mem:wake(LOCK, 1) --- end --- end ---- +--- --- It's possible to accomplish the same thing as unix.mapshared() --- using files and unix.fcntl() advisory locks. However this goes --- significantly faster. For example, that's what SQLite does and @@ -7333,7 +7344,7 @@ function unix.sched_yield() end --- might need something lower level than file locks, to implement --- things like throttling. Shared memory is a good way to do that --- since there's nothing that's faster. ---- +--- ---@param size integer --- The `size` parameter needs to be a multiple of 8. The returned --- memory is zero initialized. When allocating shared memory, you @@ -7341,7 +7352,7 @@ function unix.sched_yield() end --- overhead of allocating a single shared mapping is 500 words of --- resident memory and 8000 words of virtual memory. It's because --- the Cosmopolitan Libc mmap() granularity is 2**16. ---- +--- --- This system call does not fail. An exception is instead thrown --- if sufficient memory isn't available. --- @@ -7351,16 +7362,16 @@ function unix.mapshared(size) end ---@class unix.Memory: userdata --- unix.Memory encapsulates memory that's shared across fork() and --- this module provides the fundamental synchronization primitives ---- +--- --- Redbean memory maps may be used in two ways: ---- +--- --- 1. as an array of bytes a.k.a. a string --- 2. as an array of words a.k.a. integers ---- +--- --- They're aliased, union, or overlapped views of the same memory. --- For example if you write a string to your memory region, you'll --- be able to read it back as an integer. ---- +--- --- Reads, writes, and word operations will throw an exception if a --- memory boundary error or overflow occurs. unix.Memory = {} @@ -7372,7 +7383,7 @@ unix.Memory = {} --- If `bytes` is none or nil, then the nul-terminated string at --- `offset` is returned. You may specify `bytes` to safely read --- binary data. ---- +--- --- This operation happens atomically. Each shared mapping has a --- single lock which is used to synchronize reads and writes to --- that specific map. To make it scale, create additional maps. @@ -7381,7 +7392,7 @@ unix.Memory = {} function unix.Memory:read(offset, bytes) end --- Writes bytes to memory region. ---- +--- ---@param data string ---@param offset integer? --- `offset` is the starting byte index to which memory is copied, @@ -7391,7 +7402,7 @@ function unix.Memory:read(offset, bytes) end --- If `bytes` is none or nil, then an implicit nil-terminator --- will be included after your `data` so things like json can --- be easily serialized to shared memory. ---- +--- --- This operation happens atomically. Each shared mapping has a --- single lock which is used to synchronize reads and writes to --- that specific map. To make it scale, create additional maps. @@ -7399,7 +7410,7 @@ function unix.Memory:write(data, offset, bytes) end --- Loads word from memory region. ---- +--- --- This operation is atomic and has relaxed barrier semantics. ---@param word_index integer ---@return integer @@ -7407,7 +7418,7 @@ function unix.Memory:write(data, offset, bytes) end function unix.Memory:load(word_index) end --- Stores word from memory region. ---- +--- --- This operation is atomic and has relaxed barrier semantics. ---@param word_index integer ---@param value integer @@ -7415,10 +7426,10 @@ function unix.Memory:store(word_index, value) end --- Exchanges value. ---- +--- --- This sets word at `word_index` to `value` and returns the value --- previously held in by the word. ---- +--- --- This operation is atomic and provides the same memory barrier --- semantics as the aligned x86 LOCK XCHG instruction. ---@param word_index integer @@ -7427,12 +7438,12 @@ function unix.Memory:store(word_index, value) end function unix.Memory:xchg(word_index, value) end --- Compares and exchanges value. ---- +--- --- This inspects the word at `word_index` and if its value is the same --- as `old` then it'll be replaced by the value `new`, in which case --- `true, old` shall be returned. If a different value was held at --- word, then `false` shall be returned along with the word. ---- +--- --- This operation happens atomically and provides the same memory --- barrier semantics as the aligned x86 LOCK CMPXCHG instruction. ---@param word_index integer @@ -7442,11 +7453,11 @@ function unix.Memory:xchg(word_index, value) end function unix.Memory:cmpxchg(word_index, old, new) end --- Fetches then adds value. ---- +--- --- This method modifies the word at `word_index` to contain the sum of --- value and the `value` paremeter. This method then returns the value --- as it existed before the addition was performed. ---- +--- --- This operation is atomic and provides the same memory barrier --- semantics as the aligned x86 LOCK XADD instruction. ---@param word_index integer @@ -7455,7 +7466,7 @@ function unix.Memory:cmpxchg(word_index, old, new) end function unix.Memory:fetch_add(word_index, value) end --- Fetches and bitwise ands value. ---- +--- --- This operation happens atomically and provides the same memory --- barrier ordering semantics as its x86 implementation. ---@param word_index integer @@ -7464,7 +7475,7 @@ function unix.Memory:fetch_add(word_index, value) end function unix.Memory:fetch_and(word_index, value) end --- Fetches and bitwise ors value. ---- +--- --- This operation happens atomically and provides the same memory --- barrier ordering semantics as its x86 implementation. ---@param word_index integer @@ -7473,7 +7484,7 @@ function unix.Memory:fetch_and(word_index, value) end function unix.Memory:fetch_or(word_index, value) end --- Fetches and bitwise xors value. ---- +--- --- This operation happens atomically and provides the same memory --- barrier ordering semantics as its x86 implementation. ---@param word_index integer @@ -7482,24 +7493,24 @@ function unix.Memory:fetch_or(word_index, value) end function unix.Memory:fetch_xor(word_index, value) end --- Waits for word to have a different value. ---- +--- --- This method asks the kernel to suspend the process until either the --- absolute deadline expires or we're woken up by another process that --- calls `unix.Memory:wake()`. ---- +--- --- The `expect` parameter is used only upon entry to synchronize the --- transition to kernelspace. The kernel doesn't actually poll the --- memory location. It uses `expect` to make sure the process doesn't --- get added to the wait list unless it's sure that it needs to wait, --- since the kernel can only control the ordering of wait / wake calls --- across processes. ---- +--- --- The default behavior is to wait until the heat death of the universe --- if necessary. You may alternatively specify an absolute deadline. If --- it's less than or equal to the value returned by clock_gettime, then --- this routine is non-blocking. Otherwise we'll block at most until --- the current time reaches the absolute deadline. ---- +--- --- Futexes are currently supported on Linux, FreeBSD, OpenBSD. On other --- platforms this method calls sched_yield() and will either (1) return --- unix.EINTR if a deadline is specified, otherwise (2) 0 is returned. @@ -7512,7 +7523,7 @@ function unix.Memory:fetch_xor(word_index, value) end --- well-supported on all supported platforms but requires using files. --- Please test your use case though, because it's kind of an edge case --- to have the scenario above, and chances are this op will work fine. ---- +--- ---@return 0 ---@overload fun(self, word_index: integer, expect: integer, abs_deadline?: integer, nanos?: integer): nil, error: unix.Errno --- @@ -7520,10 +7531,10 @@ function unix.Memory:fetch_xor(word_index, value) end --- should use futexes inside a loop that is able to cope with spurious --- wakeups. We don't actually guarantee the value at word has in fact --- changed when this returns. ---- +--- --- `EAGAIN` is raised if, upon entry, the word at `word_index` had a --- different value than what's specified at `expect`. ---- +--- --- `ETIMEDOUT` is raised when the absolute deadline expires. --- ---@param word_index integer @@ -7533,11 +7544,11 @@ function unix.Memory:fetch_xor(word_index, value) end function unix.Memory:wait(word_index, expect, abs_deadline, nanos) end --- Wakes other processes waiting on word. ---- +--- --- This method may be used to signal or broadcast to waiters. The --- `count` specifies the number of processes that should be woken, --- which defaults to `INT_MAX`. ---- +--- --- The return value is the number of processes that were actually woken --- as a result of the system call. No failure conditions are defined. ---@param index integer diff --git a/tool/net/help.txt b/tool/net/help.txt index e238634f9..9c3ae4a7d 100644 --- a/tool/net/help.txt +++ b/tool/net/help.txt @@ -576,6 +576,12 @@ HOOKS *). See functions like Route which asks redbean to do its default thing from the handler. + OnError(status:int, message:string) + If this function is defined and if any errors occurs in + OnHttpRequest() then this method will be called instead of displaying + the default error page. Useful if you need to display the error page + using your specific code or send it to any tier service. + OnClientConnection(ip:int, port:int, serverip:int, serverport:int) → bool If this function is defined it'll be called from the main process each time redbean accepts a new client connection. If it returns diff --git a/tool/net/redbean.c b/tool/net/redbean.c index 4f76409c1..047ec72d0 100644 --- a/tool/net/redbean.c +++ b/tool/net/redbean.c @@ -456,6 +456,7 @@ static bool isexitingworker; static bool hasonworkerstart; static bool leakcrashreports; static bool hasonhttprequest; +static bool hasonerror; static bool ishandlingrequest; static bool listeningonport443; static bool hasonprocesscreate; @@ -2532,7 +2533,7 @@ img { vertical-align: middle; }\r\n\ return p; } -static char *ServeErrorImpl(unsigned code, const char *reason, +static char *ServeErrorImplDefault(unsigned code, const char *reason, const char *details) { size_t n; char *p, *s; @@ -2570,6 +2571,28 @@ static char *ServeErrorImpl(unsigned code, const char *reason, } } +static char *GetLuaResponse(void) { + return cpm.luaheaderp ? cpm.luaheaderp : SetStatus(200, "OK"); +} + +static char *ServeErrorImpl(unsigned code, const char *reason, + const char *details) { + lua_State *L = GL; + if (hasonerror) { + lua_getglobal(L, "OnError"); + lua_pushinteger(L, code); + lua_pushstring(L, reason); + if (LuaCallWithTrace(L, 2, 0, NULL) == LUA_OK) { + return CommitOutput(GetLuaResponse()); + } else { + return ServeErrorImplDefault(code, reason, details); + } + } else { + return ServeErrorImplDefault(code, reason, details); + } + +} + static char *ServeErrorWithPath(unsigned code, const char *reason, const char *path, size_t pathlen) { ERRORF("(srvr) server error: %d %s %`'.*s", code, reason, pathlen, path); @@ -3227,10 +3250,6 @@ static char *ServeIndex(const char *path, size_t pathlen) { return p; } -static char *GetLuaResponse(void) { - return cpm.luaheaderp ? cpm.luaheaderp : SetStatus(200, "OK"); -} - static bool ShouldServeCrashReportDetails(void) { uint32_t ip; uint16_t port; @@ -5569,6 +5588,7 @@ static void LuaInit(void) { } if (LuaRunAsset("/.init.lua", true)) { hasonhttprequest = IsHookDefined("OnHttpRequest"); + hasonerror = IsHookDefined("OnError"); hasonclientconnection = IsHookDefined("OnClientConnection"); hasonprocesscreate = IsHookDefined("OnProcessCreate"); hasonprocessdestroy = IsHookDefined("OnProcessDestroy");