wip ctl::string small-string optimization

A small-string optimization is a way of reusing inline storage space for
sufficiently small strings, rather than allocating them on the heap. The
current approach takes after an old Facebook string class: it reuses the
highest-order byte for flags and small-string size, in such a way that a
maximally-sized small string will have its last byte zeroed, making it a
null terminator for the C string.

The only flag we have is in the highest-order bit, that says whether the
string is big (set) or small (cleared.) Most of the logic switches based
on the value of this bit; e.g. data() returns big()->p if it's set, else
small()->buf if it's cleared.

Morally speaking, our class's storage is a union over two POD C structs.
It may be that this winds up being the best way to actually write it but
for now I gravitated towards a slightly more obtuse approach: the string
class itself contains a blob of the right size, and we alias that blob's
pointer for the two structs, taking some care not to run afoul of object
lifetime rules in C++. Only in writing this now do I realize that we may
be able to relatively easily sidestep those rules.

TODO:

- [ ] tests are currently segfaulting
- [ ] think about operator string_view
- [ ] maybe migrate to POD anonymous union
- [ ] benchmark and see if this is even worth it
- [ ] __ namespace needs documented, at least here
- [ ] we are probably incorrectly setting size in a few places
- [ ] explain why assign-by-value and "swapperator", at least here
This commit is contained in:
Jōshin 2024-06-05 23:57:48 -07:00
parent 2c5e7ec547
commit a535cdc417
No known key found for this signature in database
3 changed files with 313 additions and 132 deletions

View file

@ -34,10 +34,15 @@ strcat(const string_view lhs, const string_view rhs) noexcept
__builtin_trap();
res.reserve(need);
if (lhs.n)
memcpy(res.p, lhs.p, lhs.n);
memcpy(res.data(), lhs.p, lhs.n);
if (rhs.n)
memcpy(res.p + lhs.n, rhs.p, rhs.n);
res.p[res.n = lhs.n + rhs.n] = 0;
memcpy(res.data() + lhs.n, rhs.p, rhs.n);
if (res.isbig()) {
res.big()->n = lhs.n + rhs.n;
} else {
res.small()->rem = __::sso_max - lhs.n - rhs.n;
}
res.data()[res.size()] = 0;
return res;
}

View file

@ -23,7 +23,9 @@
namespace ctl {
string::~string() noexcept
namespace __ {
big_string::~big_string() /* noexcept */
{
if (n) {
if (n >= c)
@ -36,6 +38,15 @@ string::~string() noexcept
free(p);
}
} // namespace __
string::~string() /* noexcept */
{
if (isbig()) {
big()->~big_string();
}
}
string::string(const char* s) noexcept
{
append(s, strlen(s));
@ -43,7 +54,7 @@ string::string(const char* s) noexcept
string::string(const string& s) noexcept
{
append(s.p, s.n);
append(s.data(), s.size());
}
string::string(const string_view s) noexcept
@ -64,29 +75,37 @@ string::string(const char* s, size_t size) noexcept
const char*
string::c_str() const noexcept
{
if (!n)
return "";
if (n >= c)
if (size() >= capacity())
__builtin_trap();
if (p[n])
if (data()[size()])
__builtin_trap();
return p;
return data();
}
void
string::reserve(size_t c2) noexcept
{
char* p2;
if (c2 < n)
c2 = n;
if (c2 < size())
c2 = size();
if (ckd_add(&c2, c2, 15))
__builtin_trap();
c2 &= -16;
if (!(p2 = (char*)realloc(p, c2)))
__builtin_trap();
if (c2 <= __::sso_max)
return;
if (!isbig()) {
if (!(p2 = (char *)malloc(c2)))
__builtin_trap();
__builtin_memcpy(p2, data(), size() + 1);
} else {
if (!(p2 = (char *)realloc(big()->p, c2)))
__builtin_trap();
}
size_t n = size();
std::atomic_signal_fence(std::memory_order_seq_cst);
c = c2;
p = p2;
set_big_capacity(c2);
big()->n = n;
big()->p = p2;
}
void
@ -96,41 +115,54 @@ string::resize(size_t n2, char ch) noexcept
if (ckd_add(&c2, n2, 1))
__builtin_trap();
reserve(c2);
if (n2 > n)
memset(p + n, ch, n2 - n);
p[n = n2] = 0;
if (n2 > size())
memset(data() + size(), ch, n2 - size());
if (isbig()) {
big()->p[big()->n = n2] = 0;
} else {
set_small_size(n2);
data()[size()] = 0;
}
}
void
string::append(char ch) noexcept
{
if (n + 2 > c) {
size_t c2 = c + 2;
c2 += c2 >> 1;
size_t n2;
if (ckd_add(&n2, size(), 2))
__builtin_trap();
if (n2 > capacity()) {
size_t c2 = capacity() + 2;
if (ckd_add(&c2, c2, c2 >> 1))
__builtin_trap();
reserve(c2);
}
p[n++] = ch;
p[n] = 0;
// XXX do we care to fence this?
data()[size()] = ch;
data()[size() + 1] = 0;
if (isbig()) {
++big()->n;
} else {
--small()->rem;
}
}
void
string::grow(size_t size) noexcept
{
size_t need;
if (ckd_add(&need, n, size))
if (ckd_add(&need, this->size(), size))
__builtin_trap();
if (ckd_add(&need, need, 1))
__builtin_trap();
if (need <= c)
if (need <= capacity())
return;
size_t c2 = c;
if (!c2) {
c2 = need;
} else {
while (c2 < need)
if (ckd_add(&c2, c2, c2 >> 1))
__builtin_trap();
}
size_t c2 = capacity();
if (!c2)
__builtin_trap();
while (c2 < need)
if (ckd_add(&c2, c2, c2 >> 1))
__builtin_trap();
reserve(c2);
}
@ -139,8 +171,14 @@ string::append(char ch, size_t size) noexcept
{
grow(size);
if (size)
memset(p + n, ch, size);
p[n += size] = 0;
memset(data() + this->size(), ch, size);
// XXX fence?
if (isbig()) {
big()->n += size;
} else {
small()->rem -= size;
}
data()[this->size()] = 0;
}
void
@ -148,55 +186,53 @@ string::append(const void* data, size_t size) noexcept
{
grow(size);
if (size)
memcpy(p + n, data, size);
p[n += size] = 0;
memcpy(this->data() + this->size(), data, size);
if (isbig()) {
big()->n += size;
} else {
small()->rem -= size;
}
this->data()[this->size()] = 0;
}
void
string::pop_back() noexcept
{
if (!n)
if (!size())
__builtin_trap();
p[--n] = 0;
if (isbig()) {
--big()->n;
} else {
++small()->rem;
}
data()[size()] = 0;
}
string&
string::operator=(string&& s) noexcept
string::operator=(string s) noexcept
{
if (p != s.p) {
if (p) {
clear();
append(s.p, s.n);
} else {
p = s.p;
n = s.n;
c = s.c;
s.p = nullptr;
s.n = 0;
s.c = 0;
}
}
swap(s);
return *this;
}
bool
string::operator==(const string_view s) const noexcept
{
if (n != s.n)
if (size() != s.n)
return false;
if (!n)
if (!s.n)
return true;
return !memcmp(p, s.p, n);
return !memcmp(data(), s.p, s.n);
}
bool
string::operator!=(const string_view s) const noexcept
{
if (n != s.n)
if (size() != s.n)
return true;
if (!n)
if (!s.n)
return false;
return !!memcmp(p, s.p, n);
return !!memcmp(data(), s.p, s.n);
}
bool
@ -204,35 +240,35 @@ string::contains(const string_view s) const noexcept
{
if (!s.n)
return true;
return !!memmem(p, n, s.p, s.n);
return !!memmem(data(), size(), s.p, s.n);
}
bool
string::ends_with(const string_view s) const noexcept
{
if (n < s.n)
if (size() < s.n)
return false;
if (!s.n)
return true;
return !memcmp(p + n - s.n, s.p, s.n);
return !memcmp(data() + size() - s.n, s.p, s.n);
}
bool
string::starts_with(const string_view s) const noexcept
{
if (n < s.n)
if (size() < s.n)
return false;
if (!s.n)
return true;
return !memcmp(p, s.p, s.n);
return !memcmp(data(), s.p, s.n);
}
size_t
string::find(char ch, size_t pos) const noexcept
{
char* q;
if ((q = (char*)memchr(p, ch, n)))
return q - p;
if ((q = (char*)memchr(data(), ch, size())))
return q - data();
return npos;
}
@ -240,10 +276,10 @@ size_t
string::find(const string_view s, size_t pos) const noexcept
{
char* q;
if (pos > n)
if (pos > size())
__builtin_trap();
if ((q = (char*)memmem(p + pos, n - pos, s.p, s.n)))
return q - p;
if ((q = (char*)memmem(data() + pos, size() - pos, s.p, s.n)))
return q - data();
return npos;
}
@ -251,15 +287,15 @@ string
string::substr(size_t pos, size_t count) const noexcept
{
size_t last;
if (pos > n)
if (pos > size())
__builtin_trap();
if (count > n - pos)
count = n - pos;
if (count > size() - pos)
count = size() - pos;
if (ckd_add(&last, pos, count))
last = n;
if (last > n)
last = size();
if (last > size())
__builtin_trap();
return string(p + pos, count);
return string(data() + pos, count);
}
string&
@ -268,12 +304,12 @@ string::replace(size_t pos, size_t count, const string_view& s) noexcept
size_t last;
if (ckd_add(&last, pos, count))
__builtin_trap();
if (last > n)
if (last > size())
__builtin_trap();
size_t need;
if (ckd_add(&need, pos, s.n))
__builtin_trap();
size_t extra = n - last;
size_t extra = size() - last;
if (ckd_add(&need, need, extra))
__builtin_trap();
size_t c2;
@ -281,42 +317,57 @@ string::replace(size_t pos, size_t count, const string_view& s) noexcept
__builtin_trap();
reserve(c2);
if (extra)
memmove(p + pos + s.n, p + last, extra);
memcpy(p + pos, s.p, s.n);
p[n = need] = 0;
memmove(data() + pos + s.n, data() + last, extra);
memcpy(data() + pos, s.p, s.n);
if (isbig()) {
big()->p[big()->n = need] = 0;
} else {
set_small_size(need);
data()[size()] = 0;
}
return *this;
}
string&
string::insert(size_t i, const string_view s) noexcept
{
if (i > n)
if (i > size())
__builtin_trap();
size_t extra = n - i;
size_t extra = size() - i;
size_t need;
if (ckd_add(&need, n, s.n))
if (ckd_add(&need, size(), s.n))
__builtin_trap();
if (ckd_add(&need, need, 1))
__builtin_trap();
reserve(need);
if (extra)
memmove(p + i + s.n, p + i, extra);
memcpy(p + i, s.p, s.n);
p[n += s.n] = 0;
memmove(data() + i + s.n, data() + i, extra);
memcpy(data() + i, s.p, s.n);
if (isbig()) {
big()->n += s.n;
} else {
small()->rem -= s.n;
}
data()[size()] = 0;
return *this;
}
string&
string::erase(size_t pos, size_t count) noexcept
{
if (pos > n)
if (pos > size())
__builtin_trap();
if (count > n - pos)
count = n - pos;
size_t extra = n - (pos + count);
if (count > size() - pos)
count = size() - pos;
size_t extra = size() - (pos + count);
if (extra)
memmove(p + pos, p + pos + count, extra);
p[n = pos + extra] = 0;
memmove(data() + pos, data() + pos + count, extra);
if (isbig()) {
big()->n = pos + extra;
} else {
set_small_size(pos + extra);
}
data()[size()] = 0;
return *this;
}

View file

@ -6,29 +6,59 @@
namespace ctl {
struct string;
class string;
string
strcat(const string_view, const string_view) noexcept __wur;
struct string
{
char* p = nullptr;
size_t n = 0;
size_t c = 0;
namespace __ {
constexpr size_t string_size = 3 * sizeof(size_t);
constexpr size_t sso_max = string_size - 1;
constexpr size_t big_mask = ~(1ull << (8ull * sizeof(size_t) - 1ull));
struct small_string
{
char buf[sso_max];
// interpretation is: size == sso_max - rem
unsigned char rem;
#if 0
size_t rem : 7;
size_t big : 1 /* = 0 */;
#endif
};
struct big_string
{
char* p;
size_t n;
// interpretation is: capacity == c & big_mask
size_t c;
#if 0
size_t c : sizeof(size_t) * 8 - 1;
size_t big : 1 /* = 1 */;
#endif
~big_string() /* noexcept */;
};
} // namespace __
class string
{
public:
using iterator = char*;
using const_iterator = const char*;
static constexpr size_t npos = -1;
~string() noexcept;
string() = default;
~string() /* noexcept */;
string(const string_view) noexcept;
string(const char*) noexcept;
string(const string&) noexcept;
string(const char*, size_t) noexcept;
explicit string(size_t, char = 0) noexcept;
string& operator=(string&&) noexcept;
string& operator=(string) noexcept;
const char* c_str() const noexcept;
void pop_back() noexcept;
@ -51,103 +81,137 @@ struct string
size_t find(char, size_t = 0) const noexcept;
size_t find(const string_view, size_t = 0) const noexcept;
string(string&& s) noexcept : p(s.p), n(s.n), c(s.c)
string() noexcept
{
s.p = nullptr;
s.n = 0;
s.c = 0;
set_small_size(0);
small()->buf[0] = 0;
}
void swap(string& s) noexcept
{
char tmp[__::string_size];
__builtin_memcpy(tmp, __builtin_launder(blob), sizeof(tmp));
__builtin_memcpy(
__builtin_launder(blob), __builtin_launder(s.blob), sizeof(tmp));
__builtin_memcpy(__builtin_launder(s.blob), tmp, sizeof(tmp));
}
string(string&& s) noexcept
{
__builtin_memcpy(blob, __builtin_launder(s.blob), sizeof(blob));
s.set_small_size(0);
/* shouldn't be necessary, but the spec says s should be left in a valid
state and our c_str() depends on this */
s.small()->buf[0] = 0;
}
void clear() noexcept
{
n = 0;
if (isbig()) {
big()->n = 0;
} else {
set_small_size(0);
}
}
bool empty() const noexcept
{
return !n;
return isbig() ? !big()->n : small()->rem >= __::sso_max;
}
char* data() const noexcept
inline char* data() noexcept
{
return p;
return isbig() ? big()->p : small()->buf;
}
size_t size() const noexcept
inline const char* data() const noexcept
{
return n;
return isbig() ? big()->p : small()->buf;
}
inline size_t size() const noexcept
{
#if 0
if (!isbig() && small()->rem > __::sso_max)
__builtin_trap();
#endif
return isbig() ? big()->n : __::sso_max - small()->rem;
}
size_t length() const noexcept
{
return n;
return size();
}
size_t capacity() const noexcept
{
return c;
#if 0
if (isbig() && big()->c <= __::sso_max)
__builtin_trap();
#endif
return isbig() ? big()->c : __::sso_max;
}
iterator begin() noexcept
{
return p;
return data();
}
iterator end() noexcept
{
return p + n;
return data() + size();
}
const_iterator cbegin() const noexcept
{
return p;
return data();
}
const_iterator cend() const noexcept
{
return p + n;
return data() + size();
}
char& front()
{
if (!n)
if (!size())
__builtin_trap();
return p[0];
return data()[0];
}
const char& front() const
{
if (!n)
if (!size())
__builtin_trap();
return p[0];
return data()[0];
}
char& back()
{
if (!n)
if (!size())
__builtin_trap();
return p[n - 1];
return data()[size() - 1];
}
const char& back() const
{
if (!n)
if (!size())
__builtin_trap();
return p[n - 1];
return data()[size() - 1];
}
char& operator[](size_t i) noexcept
{
if (i >= n)
if (i >= size())
__builtin_trap();
return p[i];
return data()[i];
}
const char& operator[](size_t i) const noexcept
{
if (i >= n)
if (i >= size())
__builtin_trap();
return p[i];
return data()[i];
}
void push_back(char ch) noexcept
@ -160,9 +224,10 @@ struct string
append(s.p, s.n);
}
inline constexpr operator string_view() const noexcept
// TODO(mrdomino): explicit?
inline operator string_view() const noexcept
{
return string_view(p, n);
return string_view(data(), size());
}
string& operator=(const char* s) noexcept
@ -220,8 +285,68 @@ struct string
{
return compare(s) >= 0;
}
private:
inline bool isbig() const noexcept
{
return *(__builtin_launder(blob) + __::sso_max) & 0x80;
}
inline void set_small_size(size_t size) noexcept
{
if (size > __::sso_max)
__builtin_trap();
*(__builtin_launder(blob) + __::sso_max) = (__::sso_max - size);
}
inline void set_big_capacity(size_t c2) noexcept
{
if (c2 > __::big_mask)
__builtin_trap();
*((size_t *)__builtin_launder(blob) + 2) = ~__::big_mask | c2;
}
inline __::small_string* small() noexcept
{
if (isbig())
__builtin_trap();
return __builtin_launder(reinterpret_cast<__::small_string*>(blob));
}
inline const __::small_string* small() const noexcept
{
if (isbig())
__builtin_trap();
return __builtin_launder(
reinterpret_cast<const __::small_string*>(blob));
}
inline __::big_string* big() noexcept
{
if (!isbig())
__builtin_trap();
return __builtin_launder(reinterpret_cast<__::big_string*>(blob));
}
inline const __::big_string* big() const noexcept
{
if (!isbig())
__builtin_trap();
return __builtin_launder(reinterpret_cast<const __::big_string*>(blob));
}
friend string strcat(const string_view, const string_view);
alignas(union {
__::big_string a;
__::small_string b;
}) char blob[__::string_size];
};
static_assert(sizeof(string) == __::string_size);
static_assert(sizeof(__::small_string) == __::string_size);
static_assert(sizeof(__::big_string) == __::string_size);
} // namespace ctl
#pragma GCC diagnostic push