cosmopolitan/ctl/string.h
Jōshin 2ba6b0158f
Fix some memory issues with ctl::string (#1201)
There were a few errors in how capacity and memory was being handled for
small strings. The capacity errors meant that small strings would become
big strings too soon, and the memory error introduced undefined behavior
that was caught by CheckMemoryLeaks in our test file but only sometimes.

The crucial change is in reserve: we only copy n bytes into p2, and then
we manually set the null terminator instead of expecting it to have been
there already. (E.g. it might not be there for an empty small string.)

We also fix one other doozy in append when we were exactly at the small-
to-big string boundary: we set the last byte (i.e., the remainder field)
to 0, then decremented it, giving us size_t max. Whoops. We boneheadedly
fix this by setting the 0 byte after we've fixed up the remainder, so it
is at worst a no-op.

Otherwise, capacity now works the same for small strings as it does with
big strings: it's the amount of space available including the null byte.

We test all of this with a new test that only gets included if our class
under test is not std::string (presumably meaning it's ctl::string.) The
test manually verifies that the small string optimization behaves how we
expect.

Since this test checks against std::string, we go ahead and include that
other header from the STL.

Also modifies the new test we introduced to also run on std::string, but
it just does the append without expecting anything about how its data is
stored. We also check that the string has the right value afterwards.
2024-06-07 01:15:37 -04:00

364 lines
8 KiB
C++

// -*-mode:c++;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8-*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
#ifndef COSMOPOLITAN_CTL_STRING_H_
#define COSMOPOLITAN_CTL_STRING_H_
#include "string_view.h"
namespace ctl {
class string;
string
strcat(const string_view, const string_view) noexcept __wur;
namespace __ {
constexpr size_t string_size = 3 * sizeof(size_t);
constexpr size_t sso_max = string_size - 1;
constexpr size_t big_mask = ~(1ull << (8ull * sizeof(size_t) - 1ull));
struct small_string
{
char buf[sso_max];
// interpretation is: size == sso_max - rem
unsigned char rem;
#if 0
size_t rem : 7;
size_t big : 1 /* = 0 */;
#endif
};
struct big_string
{
char* p;
size_t n;
// interpretation is: capacity == c & big_mask
size_t c;
#if 0
size_t c : sizeof(size_t) * 8 - 1;
size_t big : 1 /* = 1 */;
#endif
~big_string() /* noexcept */;
};
} // namespace __
class string
{
public:
using iterator = char*;
using const_iterator = const char*;
static constexpr size_t npos = -1;
~string() /* noexcept */;
string(const string_view) noexcept;
string(const char*) noexcept;
string(const string&) noexcept;
string(const char*, size_t) noexcept;
explicit string(size_t, char = 0) noexcept;
string& operator=(string) noexcept;
const char* c_str() const noexcept;
void pop_back() noexcept;
void grow(size_t) noexcept;
void reserve(size_t) noexcept;
void resize(size_t, char = 0) noexcept;
void append(char) noexcept;
void append(char, size_t) noexcept;
void append(unsigned long) noexcept;
void append(const void*, size_t) noexcept;
string& insert(size_t, const string_view) noexcept;
string& erase(size_t = 0, size_t = npos) noexcept;
string substr(size_t = 0, size_t = npos) const noexcept;
string& replace(size_t, size_t, const string_view&) noexcept;
bool operator==(const string_view) const noexcept;
bool operator!=(const string_view) const noexcept;
bool contains(const string_view) const noexcept;
bool ends_with(const string_view) const noexcept;
bool starts_with(const string_view) const noexcept;
size_t find(char, size_t = 0) const noexcept;
size_t find(const string_view, size_t = 0) const noexcept;
string() noexcept
{
set_small_size(0);
#if 0
small()->buf[0] = 0;
#endif
}
void swap(string& s) noexcept
{
char tmp[__::string_size];
__builtin_memcpy(tmp, __builtin_launder(blob), sizeof(tmp));
__builtin_memcpy(
__builtin_launder(blob), __builtin_launder(s.blob), sizeof(tmp));
__builtin_memcpy(__builtin_launder(s.blob), tmp, sizeof(tmp));
}
string(string&& s) noexcept
{
__builtin_memcpy(blob, __builtin_launder(s.blob), sizeof(blob));
s.set_small_size(0);
#if 0
s.small()->buf[0] = 0;
#endif
}
void clear() noexcept
{
if (isbig()) {
big()->n = 0;
} else {
set_small_size(0);
}
}
bool empty() const noexcept
{
return isbig() ? !big()->n : small()->rem >= __::sso_max;
}
inline char* data() noexcept
{
return isbig() ? big()->p : small()->buf;
}
inline const char* data() const noexcept
{
return isbig() ? big()->p : small()->buf;
}
inline size_t size() const noexcept
{
#if 0
if (!isbig() && small()->rem > __::sso_max)
__builtin_trap();
#endif
return isbig() ? big()->n : __::sso_max - small()->rem;
}
size_t length() const noexcept
{
return size();
}
size_t capacity() const noexcept
{
#if 0
if (isbig() && big()->c <= __::sso_max)
__builtin_trap();
#endif
return isbig() ? __::big_mask & big()->c : __::string_size;
}
iterator begin() noexcept
{
return data();
}
iterator end() noexcept
{
return data() + size();
}
const_iterator cbegin() const noexcept
{
return data();
}
const_iterator cend() const noexcept
{
return data() + size();
}
char& front()
{
if (!size())
__builtin_trap();
return data()[0];
}
const char& front() const
{
if (!size())
__builtin_trap();
return data()[0];
}
char& back()
{
if (!size())
__builtin_trap();
return data()[size() - 1];
}
const char& back() const
{
if (!size())
__builtin_trap();
return data()[size() - 1];
}
char& operator[](size_t i) noexcept
{
if (i >= size())
__builtin_trap();
return data()[i];
}
const char& operator[](size_t i) const noexcept
{
if (i >= size())
__builtin_trap();
return data()[i];
}
void push_back(char ch) noexcept
{
append(ch);
}
void append(const string_view s) noexcept
{
append(s.p, s.n);
}
inline operator string_view() const noexcept
{
return string_view(data(), size());
}
string& operator=(const char* s) noexcept
{
clear();
append(s);
return *this;
}
string& operator=(const string_view s) noexcept
{
clear();
append(s);
return *this;
}
string& operator+=(char x) noexcept
{
append(x);
return *this;
}
string& operator+=(const string_view s) noexcept
{
append(s);
return *this;
}
string operator+(const string_view s) const noexcept
{
return strcat(*this, s);
}
int compare(const string_view s) const noexcept
{
return strcmp(*this, s);
}
bool operator<(const string_view s) const noexcept
{
return compare(s) < 0;
}
bool operator<=(const string_view s) const noexcept
{
return compare(s) <= 0;
}
bool operator>(const string_view s) const noexcept
{
return compare(s) > 0;
}
bool operator>=(const string_view s) const noexcept
{
return compare(s) >= 0;
}
private:
inline bool isbig() const noexcept
{
return *(__builtin_launder(blob) + __::sso_max) & 0x80;
}
inline void set_small_size(size_t size) noexcept
{
if (size > __::sso_max)
__builtin_trap();
*(__builtin_launder(blob) + __::sso_max) = (__::sso_max - size);
}
inline void set_big_capacity(size_t c2) noexcept
{
if (c2 > __::big_mask)
__builtin_trap();
*(__builtin_launder(blob) + __::sso_max) = 0x80;
big()->c &= ~__::big_mask;
big()->c |= c2;
}
inline __::small_string* small() noexcept
{
if (isbig())
__builtin_trap();
return __builtin_launder(reinterpret_cast<__::small_string*>(blob));
}
inline const __::small_string* small() const noexcept
{
if (isbig())
__builtin_trap();
return __builtin_launder(
reinterpret_cast<const __::small_string*>(blob));
}
inline __::big_string* big() noexcept
{
if (!isbig())
__builtin_trap();
return __builtin_launder(reinterpret_cast<__::big_string*>(blob));
}
inline const __::big_string* big() const noexcept
{
if (!isbig())
__builtin_trap();
return __builtin_launder(reinterpret_cast<const __::big_string*>(blob));
}
friend string strcat(const string_view, const string_view);
alignas(union {
__::big_string a;
__::small_string b;
}) char blob[__::string_size];
};
static_assert(sizeof(string) == __::string_size);
static_assert(sizeof(__::small_string) == __::string_size);
static_assert(sizeof(__::big_string) == __::string_size);
} // namespace ctl
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wliteral-suffix"
inline ctl::string
operator"" s(const char* s, size_t n)
{
return ctl::string(s, n);
}
#pragma GCC diagnostic pop
#endif // COSMOPOLITAN_CTL_STRING_H_