mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
957c61cbbf
This change upgrades to GCC 12.3 and GNU binutils 2.42. The GNU linker appears to have changed things so that only a single de-duplicated str table is present in the binary, and it gets placed wherever the linker wants, regardless of what the linker script says. To cope with that we need to stop using .ident to embed licenses. As such, this change does significant work to revamp how third party licenses are defined in the codebase, using `.section .notice,"aR",@progbits`. This new GCC 12.3 toolchain has support for GNU indirect functions. It lets us support __target_clones__ for the first time. This is used for optimizing the performance of libc string functions such as strlen and friends so far on x86, by ensuring AVX systems favor a second codepath that uses VEX encoding. It shaves some latency off certain operations. It's a useful feature to have for scientific computing for the reasons explained by the test/libcxx/openmp_test.cc example which compiles for fifteen different microarchitectures. Thanks to the upgrades, it's now also possible to use newer instruction sets, such as AVX512FP16, VNNI. Cosmo now uses the %gs register on x86 by default for TLS. Doing it is helpful for any program that links `cosmo_dlopen()`. Such programs had to recompile their binaries at startup to change the TLS instructions. That's not great, since it means every page in the executable needs to be faulted. The work of rewriting TLS-related x86 opcodes, is moved to fixupobj.com instead. This is great news for MacOS x86 users, since we previously needed to morph the binary every time for that platform but now that's no longer necessary. The only platforms where we need fixup of TLS x86 opcodes at runtime are now Windows, OpenBSD, and NetBSD. On Windows we morph TLS to point deeper into the TIB, based on a TlsAlloc assignment, and on OpenBSD/NetBSD we morph %gs back into %fs since the kernels do not allow us to specify a value for the %gs register. OpenBSD users are now required to use APE Loader to run Cosmo binaries and assimilation is no longer possible. OpenBSD kernel needs to change to allow programs to specify a value for the %gs register, or it needs to stop marking executable pages loaded by the kernel as mimmutable(). This release fixes __constructor__, .ctor, .init_array, and lastly the .preinit_array so they behave the exact same way as glibc. We no longer use hex constants to define math.h symbols like M_PI.
451 lines
16 KiB
C
451 lines
16 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "third_party/stb/stb_rect_pack.h"
|
|
#include "libc/assert.h"
|
|
#include "libc/dce.h"
|
|
#include "libc/mem/alg.h"
|
|
|
|
__notice(stb_rect_pack_notice, "\
|
|
stb_rect_pack (MIT License)\n\
|
|
Copyright 2017 Sean Barrett");
|
|
|
|
// stb_rect_pack.h - v1.01 - public domain - rectangle packing
|
|
// Sean Barrett 2014
|
|
//
|
|
// Useful for e.g. packing rectangular textures into an atlas.
|
|
// Does not do rotation.
|
|
//
|
|
// Not necessarily the awesomest packing method, but better than
|
|
// the totally naive one in stb_truetype (which is primarily what
|
|
// this is meant to replace).
|
|
//
|
|
// Has only had a few tests run, may have issues.
|
|
//
|
|
// More docs to come.
|
|
//
|
|
// No memory allocations; uses qsort() and assert() from stdlib.
|
|
// Can override those by defining STBRP_SORT and STBRP_ASSERT.
|
|
//
|
|
// This library currently uses the Skyline Bottom-Left algorithm.
|
|
//
|
|
// Please note: better rectangle packers are welcome! Please
|
|
// implement them to the same API, but with a different init
|
|
// function.
|
|
//
|
|
// Credits
|
|
//
|
|
// Library
|
|
// Sean Barrett
|
|
// Minor features
|
|
// Martins Mozeiko
|
|
// github:IntellectualKitty
|
|
//
|
|
// Bugfixes / warning fixes
|
|
// Jeremy Jaussaud
|
|
// Fabian Giesen
|
|
//
|
|
// Version history:
|
|
//
|
|
// 1.01 (2021-07-11) always use large rect mode, expose STBRP__MAXVAL in public section
|
|
// 1.00 (2019-02-25) avoid small space waste; gracefully fail too-wide rectangles
|
|
// 0.99 (2019-02-07) warning fixes
|
|
// 0.11 (2017-03-03) return packing success/fail result
|
|
// 0.10 (2016-10-25) remove cast-away-const to avoid warnings
|
|
// 0.09 (2016-08-27) fix compiler warnings
|
|
// 0.08 (2015-09-13) really fix bug with empty rects (w=0 or h=0)
|
|
// 0.07 (2015-09-13) fix bug with empty rects (w=0 or h=0)
|
|
// 0.06 (2015-04-15) added STBRP_SORT to allow replacing qsort
|
|
// 0.05: added STBRP_ASSERT to allow replacing assert
|
|
// 0.04: fixed minor bug in STBRP_LARGE_RECTS support
|
|
// 0.01: initial release
|
|
|
|
#define STBRP__INIT_skyline 1
|
|
|
|
typedef struct
|
|
{
|
|
int x,y;
|
|
stbrp_node **prev_link;
|
|
} stbrp__findresult;
|
|
|
|
// Optionally select which packing heuristic the library should use. Different
|
|
// heuristics will produce better/worse results for different data sets.
|
|
// If you call init again, this will be reset to the default.
|
|
void stbrp_setup_heuristic(stbrp_context *context, int heuristic)
|
|
{
|
|
switch (context->init_mode) {
|
|
case STBRP__INIT_skyline:
|
|
assert(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight ||
|
|
heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight);
|
|
context->heuristic = heuristic;
|
|
break;
|
|
default:
|
|
__builtin_unreachable();
|
|
}
|
|
}
|
|
|
|
// Optionally call this function after init but before doing any packing to
|
|
// change the handling of the out-of-temp-memory scenario, described above.
|
|
// If you call init again, this will be reset to the default (false).
|
|
void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem)
|
|
{
|
|
if (allow_out_of_mem) {
|
|
// if it's ok to run out of memory, then don't bother aligning them;
|
|
// this gives better packing, but may fail due to OOM (even though
|
|
// the rectangles easily fit). @TODO a smarter approach would be to only
|
|
// quantize once we've hit OOM, then we could get rid of this parameter.
|
|
context->align = 1;
|
|
} else {
|
|
// if it's not ok to run out of memory, then quantize the widths
|
|
// so that num_nodes is always enough nodes.
|
|
//
|
|
// I.e. num_nodes * align >= width
|
|
// align >= width / num_nodes
|
|
// align = ceil(width/num_nodes)
|
|
context->align = (context->width + context->num_nodes-1) / context->num_nodes;
|
|
}
|
|
}
|
|
|
|
// Initialize a rectangle packer to:
|
|
// pack a rectangle that is 'width' by 'height' in dimensions
|
|
// using temporary storage provided by the array 'nodes', which is 'num_nodes' long
|
|
//
|
|
// You must call this function every time you start packing into a new target.
|
|
//
|
|
// There is no "shutdown" function. The 'nodes' memory must stay valid for
|
|
// the following stbrp_pack_rects() call (or calls), but can be freed after
|
|
// the call (or calls) finish.
|
|
//
|
|
// Note: to guarantee best results, either:
|
|
// 1. make sure 'num_nodes' >= 'width'
|
|
// or 2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1'
|
|
//
|
|
// If you don't do either of the above things, widths will be quantized to multiples
|
|
// of small integers to guarantee the algorithm doesn't run out of temporary storage.
|
|
//
|
|
// If you do #2, then the non-quantized algorithm will be used, but the algorithm
|
|
// may run out of temporary storage and be unable to pack some rectangles.
|
|
void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes)
|
|
{
|
|
int i;
|
|
for (i=0; i < num_nodes-1; ++i)
|
|
nodes[i].next = &nodes[i+1];
|
|
nodes[i].next = NULL;
|
|
context->init_mode = STBRP__INIT_skyline;
|
|
context->heuristic = STBRP_HEURISTIC_Skyline_default;
|
|
context->free_head = &nodes[0];
|
|
context->active_head = &context->extra[0];
|
|
context->width = width;
|
|
context->height = height;
|
|
context->num_nodes = num_nodes;
|
|
stbrp_setup_allow_out_of_mem(context, 0);
|
|
// node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly)
|
|
context->extra[0].x = 0;
|
|
context->extra[0].y = 0;
|
|
context->extra[0].next = &context->extra[1];
|
|
context->extra[1].x = (stbrp_coord) width;
|
|
context->extra[1].y = (1<<30);
|
|
context->extra[1].next = NULL;
|
|
}
|
|
|
|
// find minimum y position if it starts at x1
|
|
static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste)
|
|
{
|
|
stbrp_node *node = first;
|
|
int x1 = x0 + width;
|
|
int min_y, visited_width, waste_area;
|
|
assert(first->x <= x0);
|
|
#if 0
|
|
// skip in case we're past the node
|
|
while (node->next->x <= x0)
|
|
++node;
|
|
#else
|
|
assert(node->next->x > x0); // we ended up handling this in the caller for efficiency
|
|
#endif
|
|
assert(node->x <= x0);
|
|
min_y = 0;
|
|
waste_area = 0;
|
|
visited_width = 0;
|
|
while (node->x < x1) {
|
|
if (node->y > min_y) {
|
|
// raise min_y higher.
|
|
// we've accounted for all waste up to min_y,
|
|
// but we'll now add more waste for everything we've visted
|
|
waste_area += visited_width * (node->y - min_y);
|
|
min_y = node->y;
|
|
// the first time through, visited_width might be reduced
|
|
if (node->x < x0)
|
|
visited_width += node->next->x - x0;
|
|
else
|
|
visited_width += node->next->x - node->x;
|
|
} else {
|
|
// add waste area
|
|
int under_width = node->next->x - node->x;
|
|
if (under_width + visited_width > width)
|
|
under_width = width - visited_width;
|
|
waste_area += under_width * (min_y - node->y);
|
|
visited_width += under_width;
|
|
}
|
|
node = node->next;
|
|
}
|
|
*pwaste = waste_area;
|
|
return min_y;
|
|
}
|
|
|
|
static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height)
|
|
{
|
|
int best_waste = (1<<30), best_x, best_y = (1 << 30);
|
|
stbrp__findresult fr;
|
|
stbrp_node **prev, *node, *tail, **best = NULL;
|
|
// align to multiple of c->align
|
|
width = (width + c->align - 1);
|
|
width -= width % c->align;
|
|
assert(width % c->align == 0);
|
|
// if it can't possibly fit, bail immediately
|
|
if (width > c->width || height > c->height) {
|
|
fr.prev_link = NULL;
|
|
fr.x = fr.y = 0;
|
|
return fr;
|
|
}
|
|
node = c->active_head;
|
|
prev = &c->active_head;
|
|
while (node->x + width <= c->width) {
|
|
int y,waste;
|
|
y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste);
|
|
if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL
|
|
// bottom left
|
|
if (y < best_y) {
|
|
best_y = y;
|
|
best = prev;
|
|
}
|
|
} else {
|
|
// best-fit
|
|
if (y + height <= c->height) {
|
|
// can only use it if it first vertically
|
|
if (y < best_y || (y == best_y && waste < best_waste)) {
|
|
best_y = y;
|
|
best_waste = waste;
|
|
best = prev;
|
|
}
|
|
}
|
|
}
|
|
prev = &node->next;
|
|
node = node->next;
|
|
}
|
|
best_x = (best == NULL) ? 0 : (*best)->x;
|
|
// if doing best-fit (BF), we also have to try aligning right edge to each node position
|
|
//
|
|
// e.g, if fitting
|
|
//
|
|
// ____________________
|
|
// |____________________|
|
|
//
|
|
// into
|
|
//
|
|
// | |
|
|
// | ____________|
|
|
// |____________|
|
|
//
|
|
// then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned
|
|
//
|
|
// This makes BF take about 2x the time
|
|
if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) {
|
|
tail = c->active_head;
|
|
node = c->active_head;
|
|
prev = &c->active_head;
|
|
// find first node that's admissible
|
|
while (tail->x < width)
|
|
tail = tail->next;
|
|
while (tail) {
|
|
int xpos = tail->x - width;
|
|
int y,waste;
|
|
assert(xpos >= 0);
|
|
// find the left position that matches this
|
|
while (node->next->x <= xpos) {
|
|
prev = &node->next;
|
|
node = node->next;
|
|
}
|
|
assert(node->next->x > xpos && node->x <= xpos);
|
|
y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste);
|
|
if (y + height <= c->height) {
|
|
if (y <= best_y) {
|
|
if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) {
|
|
best_x = xpos;
|
|
assert(y <= best_y);
|
|
best_y = y;
|
|
best_waste = waste;
|
|
best = prev;
|
|
}
|
|
}
|
|
}
|
|
tail = tail->next;
|
|
}
|
|
}
|
|
fr.prev_link = best;
|
|
fr.x = best_x;
|
|
fr.y = best_y;
|
|
return fr;
|
|
}
|
|
|
|
static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height)
|
|
{
|
|
// find best position according to heuristic
|
|
stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height);
|
|
stbrp_node *node, *cur;
|
|
// bail if:
|
|
// 1. it failed
|
|
// 2. the best node doesn't fit (we don't always check this)
|
|
// 3. we're out of memory
|
|
if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) {
|
|
res.prev_link = NULL;
|
|
return res;
|
|
}
|
|
// on success, create new node
|
|
node = context->free_head;
|
|
node->x = (stbrp_coord) res.x;
|
|
node->y = (stbrp_coord) (res.y + height);
|
|
context->free_head = node->next;
|
|
// insert the new node into the right starting point, and
|
|
// let 'cur' point to the remaining nodes needing to be
|
|
// stiched back in
|
|
cur = *res.prev_link;
|
|
if (cur->x < res.x) {
|
|
// preserve the existing one, so start testing with the next one
|
|
stbrp_node *next = cur->next;
|
|
cur->next = node;
|
|
cur = next;
|
|
} else {
|
|
*res.prev_link = node;
|
|
}
|
|
// from here, traverse cur and free the nodes, until we get to one
|
|
// that shouldn't be freed
|
|
while (cur->next && cur->next->x <= res.x + width) {
|
|
stbrp_node *next = cur->next;
|
|
// move the current node to the free list
|
|
cur->next = context->free_head;
|
|
context->free_head = cur;
|
|
cur = next;
|
|
}
|
|
// stitch the list back in
|
|
node->next = cur;
|
|
if (cur->x < res.x + width)
|
|
cur->x = (stbrp_coord) (res.x + width);
|
|
#ifndef NDEBUG
|
|
cur = context->active_head;
|
|
while (cur->x < context->width) {
|
|
assert(cur->x < cur->next->x);
|
|
cur = cur->next;
|
|
}
|
|
assert(cur->next == NULL);
|
|
{
|
|
int count=0;
|
|
cur = context->active_head;
|
|
while (cur) {
|
|
cur = cur->next;
|
|
++count;
|
|
}
|
|
cur = context->free_head;
|
|
while (cur) {
|
|
cur = cur->next;
|
|
++count;
|
|
}
|
|
assert(count == context->num_nodes+2);
|
|
}
|
|
#endif
|
|
return res;
|
|
}
|
|
|
|
static int rect_height_compare(const void *a, const void *b)
|
|
{
|
|
const stbrp_rect *p = (const stbrp_rect *) a;
|
|
const stbrp_rect *q = (const stbrp_rect *) b;
|
|
if (p->h > q->h)
|
|
return -1;
|
|
if (p->h < q->h)
|
|
return 1;
|
|
if (p->w > q->w)
|
|
return -1;
|
|
if (p->w < q->w)
|
|
return 1;
|
|
return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed);
|
|
}
|
|
|
|
static int rect_original_order(const void *a, const void *b)
|
|
{
|
|
const stbrp_rect *p = (const stbrp_rect *) a;
|
|
const stbrp_rect *q = (const stbrp_rect *) b;
|
|
return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed);
|
|
}
|
|
|
|
// Assign packed locations to rectangles. The rectangles are of type
|
|
// 'stbrp_rect' defined below, stored in the array 'rects', and there
|
|
// are 'num_rects' many of them.
|
|
//
|
|
// Rectangles which are successfully packed have the 'was_packed' flag
|
|
// set to a non-zero value and 'x' and 'y' store the minimum location
|
|
// on each axis (i.e. bottom-left in cartesian coordinates, top-left
|
|
// if you imagine y increasing downwards). Rectangles which do not fit
|
|
// have the 'was_packed' flag set to 0.
|
|
//
|
|
// You should not try to access the 'rects' array from another thread
|
|
// while this function is running, as the function temporarily reorders
|
|
// the array while it executes.
|
|
//
|
|
// To pack into another rectangle, you need to call stbrp_init_target
|
|
// again. To continue packing into the same rectangle, you can call
|
|
// this function again. Calling this multiple times with multiple rect
|
|
// arrays will probably produce worse packing results than calling it
|
|
// a single time with the full rectangle array, but the option is
|
|
// available.
|
|
//
|
|
// The function returns 1 if all of the rectangles were successfully
|
|
// packed and 0 otherwise.
|
|
int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects)
|
|
{
|
|
stbrp__findresult fr;
|
|
int i, all_rects_packed = 1;
|
|
// we use the 'was_packed' field internally to allow sorting/unsorting
|
|
for (i=0; i < num_rects; ++i) {
|
|
rects[i].was_packed = i;
|
|
}
|
|
// sort according to heuristic
|
|
qsort(rects, num_rects, sizeof(rects[0]), rect_height_compare);
|
|
for (i=0; i < num_rects; ++i) {
|
|
if (rects[i].w == 0 || rects[i].h == 0) {
|
|
rects[i].x = rects[i].y = 0; // empty rect needs no space
|
|
} else {
|
|
fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h);
|
|
if (fr.prev_link) {
|
|
rects[i].x = (stbrp_coord) fr.x;
|
|
rects[i].y = (stbrp_coord) fr.y;
|
|
} else {
|
|
rects[i].x = rects[i].y = STBRP__MAXVAL;
|
|
}
|
|
}
|
|
}
|
|
// unsort
|
|
qsort(rects, num_rects, sizeof(rects[0]), rect_original_order);
|
|
// set was_packed flags and all_rects_packed status
|
|
for (i=0; i < num_rects; ++i) {
|
|
rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL);
|
|
if (!rects[i].was_packed)
|
|
all_rects_packed = 0;
|
|
}
|
|
// return the all_rects_packed status
|
|
return all_rects_packed;
|
|
}
|