2020-06-15 14:18:57 +00:00
|
|
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
2023-12-08 03:11:56 +00:00
|
|
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
2020-06-15 14:18:57 +00:00
|
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
|
|
│ │
|
2020-12-28 01:18:44 +00:00
|
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
2020-06-15 14:18:57 +00:00
|
|
|
│ │
|
2020-12-28 01:18:44 +00:00
|
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
2020-06-15 14:18:57 +00:00
|
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
2023-07-24 15:31:54 +00:00
|
|
|
#include "libc/mem/mem.h"
|
2024-09-01 08:14:40 +00:00
|
|
|
#include "libc/assert.h"
|
|
|
|
#include "libc/calls/calls.h"
|
2022-08-11 19:13:18 +00:00
|
|
|
#include "libc/intrin/likely.h"
|
2024-09-01 08:14:40 +00:00
|
|
|
#include "libc/intrin/safemacros.h"
|
2023-07-24 15:31:54 +00:00
|
|
|
#include "libc/mem/alg.h"
|
2024-09-01 08:14:40 +00:00
|
|
|
#include "libc/runtime/runtime.h"
|
|
|
|
#include "libc/runtime/sysconf.h"
|
2022-08-11 19:26:30 +00:00
|
|
|
#include "libc/stdio/rand.h"
|
2020-06-15 14:18:57 +00:00
|
|
|
#include "libc/str/str.h"
|
2024-09-01 08:14:40 +00:00
|
|
|
#include "libc/sysv/consts/map.h"
|
|
|
|
#include "libc/sysv/consts/prot.h"
|
2021-05-03 19:09:35 +00:00
|
|
|
#include "libc/testlib/ezbench.h"
|
|
|
|
#include "libc/testlib/hyperion.h"
|
2020-06-15 14:18:57 +00:00
|
|
|
#include "libc/testlib/testlib.h"
|
|
|
|
|
2021-02-01 11:33:13 +00:00
|
|
|
#define MakeMemory(SL) memcpy(malloc(sizeof(SL) - 1), SL, sizeof(SL) - 1)
|
2020-06-15 14:18:57 +00:00
|
|
|
|
2022-08-07 00:18:40 +00:00
|
|
|
void *memmem_naive(const void *haystk, size_t haystklen, //
|
|
|
|
const void *needle, size_t needlelen) {
|
|
|
|
size_t i, j;
|
Apply clang-format update to repo (#1154)
Commit bc6c183 introduced a bunch of discrepancies between what files
look like in the repo and what clang-format says they should look like.
However, there were already a few discrepancies prior to that. Most of
these discrepancies seemed to be unintentional, but a few of them were
load-bearing (e.g., a #include that violated header ordering needing
something to have been #defined by a 'later' #include.)
I opted to take what I hope is a relatively smooth-brained approach: I
reverted the .clang-format change, ran clang-format on the whole repo,
reapplied the .clang-format change, reran clang-format again, and then
reverted the commit that contained the first run. Thus the full effect
of this PR should only be to apply the changed formatting rules to the
repo, and from skimming the results, this seems to be the case.
My work can be checked by applying the short, manual commits, and then
rerunning the command listed in the autogenerated commits (those whose
messages I have prefixed auto:) and seeing if your results agree.
It might be that the other diffs should be fixed at some point but I'm
leaving that aside for now.
fd '\.c(c|pp)?$' --print0| xargs -0 clang-format -i
2024-04-25 17:38:00 +00:00
|
|
|
if (!needlelen)
|
|
|
|
return (void *)haystk;
|
|
|
|
if (needlelen > haystklen)
|
|
|
|
return 0;
|
2022-08-07 00:18:40 +00:00
|
|
|
for (i = 0; i < haystklen; ++i) {
|
|
|
|
for (j = 0;; ++j) {
|
Apply clang-format update to repo (#1154)
Commit bc6c183 introduced a bunch of discrepancies between what files
look like in the repo and what clang-format says they should look like.
However, there were already a few discrepancies prior to that. Most of
these discrepancies seemed to be unintentional, but a few of them were
load-bearing (e.g., a #include that violated header ordering needing
something to have been #defined by a 'later' #include.)
I opted to take what I hope is a relatively smooth-brained approach: I
reverted the .clang-format change, ran clang-format on the whole repo,
reapplied the .clang-format change, reran clang-format again, and then
reverted the commit that contained the first run. Thus the full effect
of this PR should only be to apply the changed formatting rules to the
repo, and from skimming the results, this seems to be the case.
My work can be checked by applying the short, manual commits, and then
rerunning the command listed in the autogenerated commits (those whose
messages I have prefixed auto:) and seeing if your results agree.
It might be that the other diffs should be fixed at some point but I'm
leaving that aside for now.
fd '\.c(c|pp)?$' --print0| xargs -0 clang-format -i
2024-04-25 17:38:00 +00:00
|
|
|
if (j == needlelen)
|
|
|
|
return (/*unconst*/ char *)haystk + i;
|
|
|
|
if (i + j == haystklen)
|
|
|
|
break;
|
|
|
|
if (((char *)haystk)[i + j] != ((char *)needle)[j])
|
|
|
|
break;
|
2022-08-07 00:18:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, special) {
|
|
|
|
EXPECT_EQ(0, memmem(0, 0, 0, 0));
|
|
|
|
EXPECT_EQ(0, memmem(0, 0, "", 1));
|
|
|
|
EXPECT_EQ("", memmem("", 1, 0, 0));
|
|
|
|
}
|
|
|
|
|
2020-06-15 14:18:57 +00:00
|
|
|
TEST(memmem, test) {
|
|
|
|
char *needle = MakeMemory("abcdefgh");
|
|
|
|
char *haystk = MakeMemory("acccccccbbbbbbbbabcdefghdddddddd");
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_BINEQ(u"abcdefghdddddddd", memmem(haystk, 32, needle, 8));
|
2020-06-15 14:18:57 +00:00
|
|
|
memcpy(needle, "aaaaaaaa", 8);
|
|
|
|
memcpy(haystk, "acccccccbbbbbbbbaaaaaaaadddddddd", 32);
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_BINEQ(u"aaaaaaaadddddddd", memmem(haystk, 32, needle, 8));
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, testNoMatch) {
|
|
|
|
char *needle = MakeMemory("abcdefzh");
|
|
|
|
char *haystk = MakeMemory("acccccccbbbbbbbbabcdefghdddddddd");
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_EQ(NULL, memmem(haystk, 32, needle, 8));
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, testStartOfMemory) {
|
|
|
|
char *needle = MakeMemory("acccc");
|
|
|
|
char *haystk = MakeMemory("acccccccbbbbbbbbabcdefghdddddddd");
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_EQ(&haystk[0], memmem(haystk, 32, needle, 5));
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, testEndOfMemory) {
|
|
|
|
char *haystk = MakeMemory("abc123");
|
2022-08-07 00:18:40 +00:00
|
|
|
char *needle = MakeMemory("123");
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_EQ(&haystk[3], memmem(haystk, 6, needle, 3));
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
|
2021-05-03 19:09:35 +00:00
|
|
|
TEST(memmem, testOneNo) {
|
|
|
|
char *needle = MakeMemory("z");
|
|
|
|
char *haystk = MakeMemory("abc123");
|
|
|
|
EXPECT_EQ(0, memmem(haystk, 6, needle, 1));
|
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, testOneYes) {
|
|
|
|
char *needle = MakeMemory("3");
|
|
|
|
char *haystk = MakeMemory("abc123");
|
|
|
|
EXPECT_EQ(&haystk[5], memmem(haystk, 6, needle, 1));
|
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
|
|
|
}
|
|
|
|
|
2020-06-15 14:18:57 +00:00
|
|
|
TEST(memmem, testCrossesSseRegister) {
|
|
|
|
char *haystk = MakeMemory("eeeeeeeeeeeeeeeeffffffffffffffffrrrrrrrrrrrrrrrr");
|
2022-08-07 00:18:40 +00:00
|
|
|
char *needle = MakeMemory("eeeeeeeeeeeeefffffffffffff");
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_EQ(&haystk[3], memmem(haystk, 16 * 3, needle, 26));
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, testHasNulCharacters) {
|
|
|
|
char *needle = MakeMemory("eeeeeeeeeeeee\0ffffffffffff");
|
|
|
|
char *haystk =
|
|
|
|
MakeMemory("eeeeeeeeeeeeeeee\0fffffffffffffffrrrrrrrrrrrrrrrr");
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_EQ(&haystk[3], memmem(haystk, 16 * 3, needle, 26));
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, testWeird) {
|
|
|
|
char *needle = MakeMemory("-*-+-+-+-+-+-+-+");
|
|
|
|
char *haystk = MakeMemory("-+-+-+-+-+-+-+-*-+-+-+-+-+-+-+-+");
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_EQ(14, (intptr_t)memmem(haystk, 32, needle, 16) - (intptr_t)haystk);
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, testEmptyNeedle_matchesStartOfHaystack) {
|
2021-02-01 11:33:13 +00:00
|
|
|
char *needle = malloc(0);
|
2020-06-15 14:18:57 +00:00
|
|
|
char *haystk = MakeMemory("-+-+-+-+-+-+-+-*-+-+-+-+-+-+-+-+");
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_EQ(0, (intptr_t)memmem(haystk, 32, needle, 0) - (intptr_t)haystk);
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, testEmptyHaystack_alwaysReturnsNull) {
|
|
|
|
char *needle = MakeMemory("-*-+-+-+-+-+-+-+");
|
2021-02-01 11:33:13 +00:00
|
|
|
char *haystk = malloc(0);
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_EQ(NULL, memmem(haystk, 0, needle, 16));
|
|
|
|
EXPECT_EQ(NULL, memmem(haystk, 0, needle, 1));
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(memmem, testEmptyHaystackAndNeedle_returnsHaystack) {
|
2021-02-01 11:33:13 +00:00
|
|
|
char *needle = malloc(0);
|
|
|
|
char *haystk = malloc(0);
|
2021-02-07 14:11:44 +00:00
|
|
|
EXPECT_EQ(haystk, memmem(haystk, 0, needle, 0));
|
2021-02-01 11:33:13 +00:00
|
|
|
free(haystk);
|
|
|
|
free(needle);
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
2021-02-07 14:11:44 +00:00
|
|
|
|
|
|
|
TEST(memmem, testWut) {
|
|
|
|
ASSERT_STREQ("x", memmem("x", 1, "x", 1));
|
|
|
|
}
|
2021-05-03 19:09:35 +00:00
|
|
|
|
2022-08-07 00:18:40 +00:00
|
|
|
TEST(memmem, fuzz) {
|
2023-09-02 03:49:13 +00:00
|
|
|
int i, n, m;
|
2022-08-07 00:18:40 +00:00
|
|
|
char a[128], b[128], *p, *q;
|
|
|
|
for (i = 0; i < 10000; ++i) {
|
|
|
|
rngset(a, sizeof(a), lemur64, -1);
|
|
|
|
rngset(b, sizeof(b), lemur64, -1);
|
|
|
|
p = a + lemur64() % sizeof(a) / 2;
|
|
|
|
q = b + lemur64() % sizeof(b) / 2;
|
|
|
|
n = lemur64() % sizeof(a) / 2;
|
|
|
|
m = lemur64() % sizeof(b) / 2;
|
|
|
|
ASSERT_EQ(memmem_naive(p, n, q, m), memmem(p, n, q, m));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-01 08:14:40 +00:00
|
|
|
TEST(memmem, safety) {
|
|
|
|
int pagesz = sysconf(_SC_PAGESIZE);
|
|
|
|
char *map = (char *)mmap(0, pagesz * 2, PROT_READ | PROT_WRITE,
|
|
|
|
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
|
|
|
npassert(map != MAP_FAILED);
|
|
|
|
npassert(!mprotect(map + pagesz, pagesz, PROT_NONE));
|
|
|
|
for (int haylen = 1; haylen < 128; ++haylen) {
|
|
|
|
char *hay = map + pagesz - (haylen + 1);
|
|
|
|
for (int i = 0; i < haylen; ++i)
|
|
|
|
hay[i] = max(rand() & 255, 1);
|
|
|
|
hay[haylen] = 0;
|
|
|
|
for (int neelen = 1; neelen < haylen; ++neelen) {
|
|
|
|
char *nee = hay + (haylen + 1) - (neelen + 1);
|
|
|
|
ASSERT_EQ(memmem_naive(hay, haylen, nee, neelen),
|
|
|
|
memmem(hay, haylen, nee, neelen));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
munmap(map, pagesz * 2);
|
|
|
|
}
|
|
|
|
|
2022-08-07 00:18:40 +00:00
|
|
|
/*
|
|
|
|
* memmem naive l: 43,783c 14,142ns m: 31,285c 10,105ns
|
|
|
|
* memmem l: 2,597c 839ns m: 2,612c 844ns
|
|
|
|
* memmem l: 509c 164ns m: 599c 193ns
|
|
|
|
*
|
|
|
|
* strstr naive l: 103,057c 33,287ns m: 47,035c 15,192ns
|
|
|
|
* strstr l: 3,186c 1,029ns m: 3,218c 1,039ns
|
|
|
|
* strstr torture 1 l: 27c 9ns m: 61c 20ns
|
|
|
|
* strstr torture 2 l: 2,322c 750ns m: 2,362c 763ns
|
|
|
|
* strstr torture 4 l: 2,407c 777ns m: 2,448c 791ns
|
|
|
|
* strstr torture 8 l: 2,803c 905ns m: 2,862c 924ns
|
|
|
|
* strstr torture 16 l: 4,559c 1,473ns m: 3,614c 1,167ns
|
|
|
|
* strstr torture 32 l: 5,324c 1,720ns m: 5,577c 1,801ns
|
|
|
|
*
|
|
|
|
* strcasestr naive l: 129,908c 41,959ns m: 155,420c 50,200ns
|
|
|
|
* strcasestr l: 33,464c 10,809ns m: 31,636c 10,218ns
|
|
|
|
* strcasestr tort 1 l: 38c 12ns m: 69c 22ns
|
|
|
|
* strcasestr tort 2 l: 2,544c 822ns m: 2,580c 833ns
|
|
|
|
* strcasestr tort 4 l: 2,745c 887ns m: 2,767c 894ns
|
|
|
|
* strcasestr tort 8 l: 4,198c 1,356ns m: 4,216c 1,362ns
|
|
|
|
* strcasestr tort 16 l: 7,402c 2,391ns m: 7,487c 2,418ns
|
|
|
|
* strcasestr tort 32 l: 13,772c 4,448ns m: 12,945c 4,181ns
|
|
|
|
*/
|
2021-05-03 19:09:35 +00:00
|
|
|
BENCH(memmem, bench) {
|
2022-08-07 00:18:40 +00:00
|
|
|
EZBENCH2("memmem naive", donothing,
|
2023-07-26 20:54:49 +00:00
|
|
|
__expropriate(memmem_naive(kHyperion, kHyperionSize, "THE END", 7)));
|
2021-05-03 19:09:35 +00:00
|
|
|
EZBENCH2("memmem", donothing,
|
2023-07-26 20:54:49 +00:00
|
|
|
__expropriate(memmem(kHyperion, kHyperionSize, "THE END", 7)));
|
2021-05-03 19:09:35 +00:00
|
|
|
EZBENCH2("memmem", donothing,
|
2024-09-01 08:14:40 +00:00
|
|
|
__expropriate(
|
|
|
|
memmem("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab",
|
|
|
|
152,
|
|
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
|
|
"aaaaaaaaaaaaaaaaaaaaaaaab",
|
|
|
|
81)));
|
2021-05-03 19:09:35 +00:00
|
|
|
}
|