diff --git a/ctl/strcat.cc b/ctl/strcat.cc index 05ba5a05e..0e7c49070 100644 --- a/ctl/strcat.cc +++ b/ctl/strcat.cc @@ -34,10 +34,15 @@ strcat(const string_view lhs, const string_view rhs) noexcept __builtin_trap(); res.reserve(need); if (lhs.n) - memcpy(res.p, lhs.p, lhs.n); + memcpy(res.data(), lhs.p, lhs.n); if (rhs.n) - memcpy(res.p + lhs.n, rhs.p, rhs.n); - res.p[res.n = lhs.n + rhs.n] = 0; + memcpy(res.data() + lhs.n, rhs.p, rhs.n); + if (res.isbig()) { + res.big()->n = lhs.n + rhs.n; + } else { + res.small()->rem = __::sso_max - lhs.n - rhs.n; + } + res.data()[res.size()] = 0; return res; } diff --git a/ctl/string.cc b/ctl/string.cc index 9002113c1..ae5692b0f 100644 --- a/ctl/string.cc +++ b/ctl/string.cc @@ -23,7 +23,9 @@ namespace ctl { -string::~string() noexcept +namespace __ { + +big_string::~big_string() /* noexcept */ { if (n) { if (n >= c) @@ -36,6 +38,15 @@ string::~string() noexcept free(p); } +} // namespace __ + +string::~string() /* noexcept */ +{ + if (isbig()) { + big()->~big_string(); + } +} + string::string(const char* s) noexcept { append(s, strlen(s)); @@ -43,7 +54,7 @@ string::string(const char* s) noexcept string::string(const string& s) noexcept { - append(s.p, s.n); + append(s.data(), s.size()); } string::string(const string_view s) noexcept @@ -64,29 +75,37 @@ string::string(const char* s, size_t size) noexcept const char* string::c_str() const noexcept { - if (!n) - return ""; - if (n >= c) + if (size() >= capacity()) __builtin_trap(); - if (p[n]) + if (data()[size()]) __builtin_trap(); - return p; + return data(); } void string::reserve(size_t c2) noexcept { char* p2; - if (c2 < n) - c2 = n; + if (c2 < size()) + c2 = size(); if (ckd_add(&c2, c2, 15)) __builtin_trap(); c2 &= -16; - if (!(p2 = (char*)realloc(p, c2))) - __builtin_trap(); + if (c2 <= __::sso_max) + return; + if (!isbig()) { + if (!(p2 = (char *)malloc(c2))) + __builtin_trap(); + __builtin_memcpy(p2, data(), size() + 1); + } else { + if (!(p2 = (char *)realloc(big()->p, c2))) + __builtin_trap(); + } + size_t n = size(); std::atomic_signal_fence(std::memory_order_seq_cst); - c = c2; - p = p2; + set_big_capacity(c2); + big()->n = n; + big()->p = p2; } void @@ -96,41 +115,54 @@ string::resize(size_t n2, char ch) noexcept if (ckd_add(&c2, n2, 1)) __builtin_trap(); reserve(c2); - if (n2 > n) - memset(p + n, ch, n2 - n); - p[n = n2] = 0; + if (n2 > size()) + memset(data() + size(), ch, n2 - size()); + if (isbig()) { + big()->p[big()->n = n2] = 0; + } else { + set_small_size(n2); + data()[size()] = 0; + } } void string::append(char ch) noexcept { - if (n + 2 > c) { - size_t c2 = c + 2; - c2 += c2 >> 1; + size_t n2; + if (ckd_add(&n2, size(), 2)) + __builtin_trap(); + if (n2 > capacity()) { + size_t c2 = capacity() + 2; + if (ckd_add(&c2, c2, c2 >> 1)) + __builtin_trap(); reserve(c2); } - p[n++] = ch; - p[n] = 0; + // XXX do we care to fence this? + data()[size()] = ch; + data()[size() + 1] = 0; + if (isbig()) { + ++big()->n; + } else { + --small()->rem; + } } void string::grow(size_t size) noexcept { size_t need; - if (ckd_add(&need, n, size)) + if (ckd_add(&need, this->size(), size)) __builtin_trap(); if (ckd_add(&need, need, 1)) __builtin_trap(); - if (need <= c) + if (need <= capacity()) return; - size_t c2 = c; - if (!c2) { - c2 = need; - } else { - while (c2 < need) - if (ckd_add(&c2, c2, c2 >> 1)) - __builtin_trap(); - } + size_t c2 = capacity(); + if (!c2) + __builtin_trap(); + while (c2 < need) + if (ckd_add(&c2, c2, c2 >> 1)) + __builtin_trap(); reserve(c2); } @@ -139,8 +171,14 @@ string::append(char ch, size_t size) noexcept { grow(size); if (size) - memset(p + n, ch, size); - p[n += size] = 0; + memset(data() + this->size(), ch, size); + // XXX fence? + if (isbig()) { + big()->n += size; + } else { + small()->rem -= size; + } + data()[this->size()] = 0; } void @@ -148,55 +186,53 @@ string::append(const void* data, size_t size) noexcept { grow(size); if (size) - memcpy(p + n, data, size); - p[n += size] = 0; + memcpy(this->data() + this->size(), data, size); + if (isbig()) { + big()->n += size; + } else { + small()->rem -= size; + } + this->data()[this->size()] = 0; } void string::pop_back() noexcept { - if (!n) + if (!size()) __builtin_trap(); - p[--n] = 0; + if (isbig()) { + --big()->n; + } else { + ++small()->rem; + } + data()[size()] = 0; } string& -string::operator=(string&& s) noexcept +string::operator=(string s) noexcept { - if (p != s.p) { - if (p) { - clear(); - append(s.p, s.n); - } else { - p = s.p; - n = s.n; - c = s.c; - s.p = nullptr; - s.n = 0; - s.c = 0; - } - } + swap(s); return *this; } bool string::operator==(const string_view s) const noexcept { - if (n != s.n) + if (size() != s.n) return false; - if (!n) + if (!s.n) return true; - return !memcmp(p, s.p, n); + return !memcmp(data(), s.p, s.n); } bool string::operator!=(const string_view s) const noexcept { - if (n != s.n) + if (size() != s.n) return true; - if (!n) + if (!s.n) return false; - return !!memcmp(p, s.p, n); + return !!memcmp(data(), s.p, s.n); } bool @@ -204,35 +240,35 @@ string::contains(const string_view s) const noexcept { if (!s.n) return true; - return !!memmem(p, n, s.p, s.n); + return !!memmem(data(), size(), s.p, s.n); } bool string::ends_with(const string_view s) const noexcept { - if (n < s.n) + if (size() < s.n) return false; if (!s.n) return true; - return !memcmp(p + n - s.n, s.p, s.n); + return !memcmp(data() + size() - s.n, s.p, s.n); } bool string::starts_with(const string_view s) const noexcept { - if (n < s.n) + if (size() < s.n) return false; if (!s.n) return true; - return !memcmp(p, s.p, s.n); + return !memcmp(data(), s.p, s.n); } size_t string::find(char ch, size_t pos) const noexcept { char* q; - if ((q = (char*)memchr(p, ch, n))) - return q - p; + if ((q = (char*)memchr(data(), ch, size()))) + return q - data(); return npos; } @@ -240,10 +276,10 @@ size_t string::find(const string_view s, size_t pos) const noexcept { char* q; - if (pos > n) + if (pos > size()) __builtin_trap(); - if ((q = (char*)memmem(p + pos, n - pos, s.p, s.n))) - return q - p; + if ((q = (char*)memmem(data() + pos, size() - pos, s.p, s.n))) + return q - data(); return npos; } @@ -251,15 +287,15 @@ string string::substr(size_t pos, size_t count) const noexcept { size_t last; - if (pos > n) + if (pos > size()) __builtin_trap(); - if (count > n - pos) - count = n - pos; + if (count > size() - pos) + count = size() - pos; if (ckd_add(&last, pos, count)) - last = n; - if (last > n) + last = size(); + if (last > size()) __builtin_trap(); - return string(p + pos, count); + return string(data() + pos, count); } string& @@ -268,12 +304,12 @@ string::replace(size_t pos, size_t count, const string_view& s) noexcept size_t last; if (ckd_add(&last, pos, count)) __builtin_trap(); - if (last > n) + if (last > size()) __builtin_trap(); size_t need; if (ckd_add(&need, pos, s.n)) __builtin_trap(); - size_t extra = n - last; + size_t extra = size() - last; if (ckd_add(&need, need, extra)) __builtin_trap(); size_t c2; @@ -281,42 +317,57 @@ string::replace(size_t pos, size_t count, const string_view& s) noexcept __builtin_trap(); reserve(c2); if (extra) - memmove(p + pos + s.n, p + last, extra); - memcpy(p + pos, s.p, s.n); - p[n = need] = 0; + memmove(data() + pos + s.n, data() + last, extra); + memcpy(data() + pos, s.p, s.n); + if (isbig()) { + big()->p[big()->n = need] = 0; + } else { + set_small_size(need); + data()[size()] = 0; + } return *this; } string& string::insert(size_t i, const string_view s) noexcept { - if (i > n) + if (i > size()) __builtin_trap(); - size_t extra = n - i; + size_t extra = size() - i; size_t need; - if (ckd_add(&need, n, s.n)) + if (ckd_add(&need, size(), s.n)) __builtin_trap(); if (ckd_add(&need, need, 1)) __builtin_trap(); reserve(need); if (extra) - memmove(p + i + s.n, p + i, extra); - memcpy(p + i, s.p, s.n); - p[n += s.n] = 0; + memmove(data() + i + s.n, data() + i, extra); + memcpy(data() + i, s.p, s.n); + if (isbig()) { + big()->n += s.n; + } else { + small()->rem -= s.n; + } + data()[size()] = 0; return *this; } string& string::erase(size_t pos, size_t count) noexcept { - if (pos > n) + if (pos > size()) __builtin_trap(); - if (count > n - pos) - count = n - pos; - size_t extra = n - (pos + count); + if (count > size() - pos) + count = size() - pos; + size_t extra = size() - (pos + count); if (extra) - memmove(p + pos, p + pos + count, extra); - p[n = pos + extra] = 0; + memmove(data() + pos, data() + pos + count, extra); + if (isbig()) { + big()->n = pos + extra; + } else { + set_small_size(pos + extra); + } + data()[size()] = 0; return *this; } diff --git a/ctl/string.h b/ctl/string.h index 2fc4a158d..12e42453a 100644 --- a/ctl/string.h +++ b/ctl/string.h @@ -6,29 +6,59 @@ namespace ctl { -struct string; +class string; string strcat(const string_view, const string_view) noexcept __wur; -struct string -{ - char* p = nullptr; - size_t n = 0; - size_t c = 0; +namespace __ { +constexpr size_t string_size = 3 * sizeof(size_t); +constexpr size_t sso_max = string_size - 1; +constexpr size_t big_mask = ~(1ull << (8ull * sizeof(size_t) - 1ull)); + +struct small_string +{ + char buf[sso_max]; + // interpretation is: size == sso_max - rem + unsigned char rem; +#if 0 + size_t rem : 7; + size_t big : 1 /* = 0 */; +#endif +}; + +struct big_string +{ + char* p; + size_t n; + // interpretation is: capacity == c & big_mask + size_t c; +#if 0 + size_t c : sizeof(size_t) * 8 - 1; + size_t big : 1 /* = 1 */; +#endif + + ~big_string() /* noexcept */; +}; + +} // namespace __ + +class string +{ + public: using iterator = char*; using const_iterator = const char*; static constexpr size_t npos = -1; - ~string() noexcept; - string() = default; + ~string() /* noexcept */; string(const string_view) noexcept; string(const char*) noexcept; string(const string&) noexcept; string(const char*, size_t) noexcept; explicit string(size_t, char = 0) noexcept; - string& operator=(string&&) noexcept; + + string& operator=(string) noexcept; const char* c_str() const noexcept; void pop_back() noexcept; @@ -51,103 +81,137 @@ struct string size_t find(char, size_t = 0) const noexcept; size_t find(const string_view, size_t = 0) const noexcept; - string(string&& s) noexcept : p(s.p), n(s.n), c(s.c) + string() noexcept { - s.p = nullptr; - s.n = 0; - s.c = 0; + set_small_size(0); + small()->buf[0] = 0; + } + + void swap(string& s) noexcept + { + char tmp[__::string_size]; + __builtin_memcpy(tmp, __builtin_launder(blob), sizeof(tmp)); + __builtin_memcpy( + __builtin_launder(blob), __builtin_launder(s.blob), sizeof(tmp)); + __builtin_memcpy(__builtin_launder(s.blob), tmp, sizeof(tmp)); + } + + string(string&& s) noexcept + { + __builtin_memcpy(blob, __builtin_launder(s.blob), sizeof(blob)); + s.set_small_size(0); + /* shouldn't be necessary, but the spec says s should be left in a valid + state and our c_str() depends on this */ + s.small()->buf[0] = 0; } void clear() noexcept { - n = 0; + if (isbig()) { + big()->n = 0; + } else { + set_small_size(0); + } } bool empty() const noexcept { - return !n; + return isbig() ? !big()->n : small()->rem >= __::sso_max; } - char* data() const noexcept + inline char* data() noexcept { - return p; + return isbig() ? big()->p : small()->buf; } - size_t size() const noexcept + inline const char* data() const noexcept { - return n; + return isbig() ? big()->p : small()->buf; + } + + inline size_t size() const noexcept + { +#if 0 + if (!isbig() && small()->rem > __::sso_max) + __builtin_trap(); +#endif + return isbig() ? big()->n : __::sso_max - small()->rem; } size_t length() const noexcept { - return n; + return size(); } size_t capacity() const noexcept { - return c; +#if 0 + if (isbig() && big()->c <= __::sso_max) + __builtin_trap(); +#endif + return isbig() ? big()->c : __::sso_max; } iterator begin() noexcept { - return p; + return data(); } iterator end() noexcept { - return p + n; + return data() + size(); } const_iterator cbegin() const noexcept { - return p; + return data(); } const_iterator cend() const noexcept { - return p + n; + return data() + size(); } char& front() { - if (!n) + if (!size()) __builtin_trap(); - return p[0]; + return data()[0]; } const char& front() const { - if (!n) + if (!size()) __builtin_trap(); - return p[0]; + return data()[0]; } char& back() { - if (!n) + if (!size()) __builtin_trap(); - return p[n - 1]; + return data()[size() - 1]; } const char& back() const { - if (!n) + if (!size()) __builtin_trap(); - return p[n - 1]; + return data()[size() - 1]; } char& operator[](size_t i) noexcept { - if (i >= n) + if (i >= size()) __builtin_trap(); - return p[i]; + return data()[i]; } const char& operator[](size_t i) const noexcept { - if (i >= n) + if (i >= size()) __builtin_trap(); - return p[i]; + return data()[i]; } void push_back(char ch) noexcept @@ -160,9 +224,10 @@ struct string append(s.p, s.n); } - inline constexpr operator string_view() const noexcept + // TODO(mrdomino): explicit? + inline operator string_view() const noexcept { - return string_view(p, n); + return string_view(data(), size()); } string& operator=(const char* s) noexcept @@ -220,8 +285,68 @@ struct string { return compare(s) >= 0; } + + private: + inline bool isbig() const noexcept + { + return *(__builtin_launder(blob) + __::sso_max) & 0x80; + } + + inline void set_small_size(size_t size) noexcept + { + if (size > __::sso_max) + __builtin_trap(); + *(__builtin_launder(blob) + __::sso_max) = (__::sso_max - size); + } + + inline void set_big_capacity(size_t c2) noexcept + { + if (c2 > __::big_mask) + __builtin_trap(); + *((size_t *)__builtin_launder(blob) + 2) = ~__::big_mask | c2; + } + + inline __::small_string* small() noexcept + { + if (isbig()) + __builtin_trap(); + return __builtin_launder(reinterpret_cast<__::small_string*>(blob)); + } + + inline const __::small_string* small() const noexcept + { + if (isbig()) + __builtin_trap(); + return __builtin_launder( + reinterpret_cast(blob)); + } + + inline __::big_string* big() noexcept + { + if (!isbig()) + __builtin_trap(); + return __builtin_launder(reinterpret_cast<__::big_string*>(blob)); + } + + inline const __::big_string* big() const noexcept + { + if (!isbig()) + __builtin_trap(); + return __builtin_launder(reinterpret_cast(blob)); + } + + friend string strcat(const string_view, const string_view); + + alignas(union { + __::big_string a; + __::small_string b; + }) char blob[__::string_size]; }; +static_assert(sizeof(string) == __::string_size); +static_assert(sizeof(__::small_string) == __::string_size); +static_assert(sizeof(__::big_string) == __::string_size); + } // namespace ctl #pragma GCC diagnostic push