From 221817e53731d5085a926ab45693ad89b188419e Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 14 May 2021 10:18:28 -0700 Subject: [PATCH] Further polish SQLite vendoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Now integrated with `make tags` for Emacs IDE features - Delete some old deprecated broken full-text search engines - Rename .h → .inc files that don't meet our definition of header - Make sure every #include line is normal form so tools understand See #162 --- third_party/sqlite3/alter.c | 2 +- third_party/sqlite3/analyze.c | 2 +- third_party/sqlite3/attach.c | 2 +- third_party/sqlite3/auth.c | 2 +- third_party/sqlite3/backup.c | 4 +- third_party/sqlite3/bitvec.c | 2 +- third_party/sqlite3/btmutex.c | 2 +- third_party/sqlite3/btree.c | 2 +- third_party/sqlite3/{btree.h => btree.inc} | 0 .../sqlite3/{btreeInt.h => btreeInt.inc} | 2 +- third_party/sqlite3/build.c | 2 +- third_party/sqlite3/callback.c | 2 +- third_party/sqlite3/complete.c | 2 +- third_party/sqlite3/ctime.c | 9 - third_party/sqlite3/date.c | 2 +- third_party/sqlite3/dbpage.c | 2 +- third_party/sqlite3/dbstat.c | 2 +- third_party/sqlite3/delete.c | 2 +- third_party/sqlite3/expr.c | 2 +- third_party/sqlite3/fault.c | 2 +- third_party/sqlite3/fkey.c | 2 +- third_party/sqlite3/fts1.c | 3351 -------- third_party/sqlite3/fts1.h | 11 - third_party/sqlite3/fts1_hash.c | 368 - third_party/sqlite3/fts1_hash.h | 114 - third_party/sqlite3/fts1_porter.c | 643 -- third_party/sqlite3/fts1_tokenizer1.c | 221 - third_party/sqlite3/fts2.c | 6861 ----------------- third_party/sqlite3/fts2.h | 26 - third_party/sqlite3/fts2_hash.c | 376 - third_party/sqlite3/fts2_hash.h | 112 - third_party/sqlite3/fts2_icu.c | 261 - third_party/sqlite3/fts2_porter.c | 644 -- third_party/sqlite3/fts2_tokenizer.c | 375 - third_party/sqlite3/fts2_tokenizer.h | 147 - third_party/sqlite3/fts2_tokenizer1.c | 233 - third_party/sqlite3/fts3.c | 4 +- third_party/sqlite3/{fts3.h => fts3.inc} | 0 .../sqlite3/{fts3Int.h => fts3Int.inc} | 4 +- third_party/sqlite3/fts3_aux.c | 2 +- third_party/sqlite3/fts3_expr.c | 2 +- third_party/sqlite3/fts3_hash.c | 4 +- .../sqlite3/{fts3_hash.h => fts3_hash.inc} | 0 third_party/sqlite3/fts3_icu.c | 4 +- third_party/sqlite3/fts3_porter.c | 4 +- third_party/sqlite3/fts3_snippet.c | 2 +- third_party/sqlite3/fts3_tokenize_vtab.c | 2 +- third_party/sqlite3/fts3_tokenizer.c | 8 +- .../{fts3_tokenizer.h => fts3_tokenizer.inc} | 0 third_party/sqlite3/fts3_tokenizer1.c | 4 +- third_party/sqlite3/fts3_unicode.c | 4 +- third_party/sqlite3/fts3_write.c | 2 +- third_party/sqlite3/{fts5.h => fts5.inc} | 0 third_party/sqlite3/func.c | 4 +- .../sqlite3/{geopoly.c => geopoly.inc} | 4 - third_party/sqlite3/global.c | 4 +- third_party/sqlite3/hash.c | 2 +- third_party/sqlite3/{hash.h => hash.inc} | 0 third_party/sqlite3/{hwtime.h => hwtime.inc} | 0 third_party/sqlite3/insert.c | 2 +- third_party/sqlite3/inttypes.inc | 62 + .../{keywordhash.h => keywordhash.inc} | 0 third_party/sqlite3/legacy.c | 2 +- third_party/sqlite3/loadext.c | 2 +- third_party/sqlite3/main.c | 8 +- third_party/sqlite3/malloc.c | 2 +- third_party/sqlite3/mem0.c | 2 +- third_party/sqlite3/mem1.c | 2 +- third_party/sqlite3/mem2.c | 2 +- third_party/sqlite3/mem3.c | 2 +- third_party/sqlite3/mem5.c | 2 +- third_party/sqlite3/memdb.c | 2 +- third_party/sqlite3/memjournal.c | 2 +- third_party/sqlite3/{msvc.h => msvc.inc} | 0 third_party/sqlite3/mutex.c | 2 +- third_party/sqlite3/{mutex.h => mutex.inc} | 0 third_party/sqlite3/mutex_noop.c | 2 +- third_party/sqlite3/mutex_unix.c | 2 +- third_party/sqlite3/mutex_w32.c | 400 - third_party/sqlite3/notify.c | 4 +- .../sqlite3/{opcodes.h => opcodes.inc} | 0 third_party/sqlite3/os.c | 2 +- third_party/sqlite3/{os.h => os.inc} | 2 +- .../sqlite3/{os_common.h => os_common.inc} | 2 +- .../sqlite3/{os_setup.h => os_setup.inc} | 0 third_party/sqlite3/os_unix.c | 12 +- third_party/sqlite3/os_win.c | 9 +- third_party/sqlite3/os_win.h | 90 - third_party/sqlite3/pager.c | 4 +- third_party/sqlite3/{pager.h => pager.inc} | 0 third_party/sqlite3/parse.c | 2 +- third_party/sqlite3/{parse.h => parse.inc} | 0 third_party/sqlite3/pcache.c | 2 +- third_party/sqlite3/{pcache.h => pcache.inc} | 0 third_party/sqlite3/pcache1.c | 2 +- third_party/sqlite3/pragma.c | 4 +- third_party/sqlite3/{pragma.h => pragma.inc} | 0 third_party/sqlite3/prepare.c | 2 +- third_party/sqlite3/printf.c | 2 +- third_party/sqlite3/random.c | 2 +- third_party/sqlite3/resolve.c | 2 +- third_party/sqlite3/rowset.c | 2 +- third_party/sqlite3/rtree.c | 4 +- third_party/sqlite3/{rtree.h => rtree.inc} | 0 third_party/sqlite3/select.c | 2 +- third_party/sqlite3/shell.c | 9 +- third_party/sqlite3/shell.c.in | 4 +- third_party/sqlite3/sqlite3.h | 49 +- third_party/sqlite3/sqlite3.mk | 40 +- third_party/sqlite3/sqlite3ext.h | 19 +- third_party/sqlite3/sqlite3rbu.c | 6 +- .../sqlite3/{sqlite3rbu.h => sqlite3rbu.inc} | 0 .../{sqlite3rtree.h => sqlite3rtree.inc} | 0 third_party/sqlite3/sqlite3session.c | 6 +- .../{sqlite3session.h => sqlite3session.inc} | 0 ...{sqlite3userauth.h => sqlite3userauth.inc} | 0 .../sqlite3/{sqliteInt.h => sqliteInt.inc} | 51 +- .../{sqliteLimit.h => sqliteLimit.inc} | 0 .../sqlite3/{sqliteicu.h => sqliteicu.inc} | 0 third_party/sqlite3/status.c | 4 +- third_party/sqlite3/table.c | 2 +- third_party/sqlite3/threads.c | 5 +- third_party/sqlite3/tokenize.c | 4 +- .../{fts1_tokenizer.h => tokenizer.inc} | 0 third_party/sqlite3/treeview.c | 2 +- third_party/sqlite3/trigger.c | 2 +- third_party/sqlite3/update.c | 2 +- third_party/sqlite3/upsert.c | 2 +- third_party/sqlite3/userauth.c | 2 +- third_party/sqlite3/utf.c | 4 +- third_party/sqlite3/util.c | 2 +- third_party/sqlite3/vacuum.c | 4 +- third_party/sqlite3/vdbe.c | 6 +- third_party/sqlite3/{vdbe.h => vdbe.inc} | 2 +- .../sqlite3/{vdbeInt.h => vdbeInt.inc} | 0 third_party/sqlite3/vdbeapi.c | 4 +- third_party/sqlite3/vdbeaux.c | 4 +- third_party/sqlite3/vdbeblob.c | 4 +- third_party/sqlite3/vdbemem.c | 4 +- third_party/sqlite3/vdbesort.c | 4 +- third_party/sqlite3/vdbetrace.c | 4 +- third_party/sqlite3/vdbevtab.c | 4 +- third_party/sqlite3/vtab.c | 2 +- .../sqlite3/{vxworks.h => vxworks.inc} | 0 third_party/sqlite3/wal.c | 2 +- third_party/sqlite3/{wal.h => wal.inc} | 2 +- third_party/sqlite3/walker.c | 2 +- third_party/sqlite3/where.c | 4 +- .../sqlite3/{whereInt.h => whereInt.inc} | 0 third_party/sqlite3/wherecode.c | 4 +- third_party/sqlite3/whereexpr.c | 4 +- third_party/sqlite3/window.c | 2 +- 152 files changed, 255 insertions(+), 14523 deletions(-) rename third_party/sqlite3/{btree.h => btree.inc} (100%) rename third_party/sqlite3/{btreeInt.h => btreeInt.inc} (99%) delete mode 100644 third_party/sqlite3/fts1.c delete mode 100644 third_party/sqlite3/fts1.h delete mode 100644 third_party/sqlite3/fts1_hash.c delete mode 100644 third_party/sqlite3/fts1_hash.h delete mode 100644 third_party/sqlite3/fts1_porter.c delete mode 100644 third_party/sqlite3/fts1_tokenizer1.c delete mode 100644 third_party/sqlite3/fts2.c delete mode 100644 third_party/sqlite3/fts2.h delete mode 100644 third_party/sqlite3/fts2_hash.c delete mode 100644 third_party/sqlite3/fts2_hash.h delete mode 100644 third_party/sqlite3/fts2_icu.c delete mode 100644 third_party/sqlite3/fts2_porter.c delete mode 100644 third_party/sqlite3/fts2_tokenizer.c delete mode 100644 third_party/sqlite3/fts2_tokenizer.h delete mode 100644 third_party/sqlite3/fts2_tokenizer1.c rename third_party/sqlite3/{fts3.h => fts3.inc} (100%) rename third_party/sqlite3/{fts3Int.h => fts3Int.inc} (99%) rename third_party/sqlite3/{fts3_hash.h => fts3_hash.inc} (100%) rename third_party/sqlite3/{fts3_tokenizer.h => fts3_tokenizer.inc} (100%) rename third_party/sqlite3/{fts5.h => fts5.inc} (100%) rename third_party/sqlite3/{geopoly.c => geopoly.inc} (99%) rename third_party/sqlite3/{hash.h => hash.inc} (100%) rename third_party/sqlite3/{hwtime.h => hwtime.inc} (100%) create mode 100644 third_party/sqlite3/inttypes.inc rename third_party/sqlite3/{keywordhash.h => keywordhash.inc} (100%) rename third_party/sqlite3/{msvc.h => msvc.inc} (100%) rename third_party/sqlite3/{mutex.h => mutex.inc} (100%) delete mode 100644 third_party/sqlite3/mutex_w32.c rename third_party/sqlite3/{opcodes.h => opcodes.inc} (100%) rename third_party/sqlite3/{os.h => os.inc} (99%) rename third_party/sqlite3/{os_common.h => os_common.inc} (98%) rename third_party/sqlite3/{os_setup.h => os_setup.inc} (100%) delete mode 100644 third_party/sqlite3/os_win.h rename third_party/sqlite3/{pager.h => pager.inc} (100%) rename third_party/sqlite3/{parse.h => parse.inc} (100%) rename third_party/sqlite3/{pcache.h => pcache.inc} (100%) rename third_party/sqlite3/{pragma.h => pragma.inc} (100%) rename third_party/sqlite3/{rtree.h => rtree.inc} (100%) rename third_party/sqlite3/{sqlite3rbu.h => sqlite3rbu.inc} (100%) rename third_party/sqlite3/{sqlite3rtree.h => sqlite3rtree.inc} (100%) rename third_party/sqlite3/{sqlite3session.h => sqlite3session.inc} (100%) rename third_party/sqlite3/{sqlite3userauth.h => sqlite3userauth.inc} (100%) rename third_party/sqlite3/{sqliteInt.h => sqliteInt.inc} (99%) rename third_party/sqlite3/{sqliteLimit.h => sqliteLimit.inc} (100%) rename third_party/sqlite3/{sqliteicu.h => sqliteicu.inc} (100%) rename third_party/sqlite3/{fts1_tokenizer.h => tokenizer.inc} (100%) rename third_party/sqlite3/{vdbe.h => vdbe.inc} (99%) rename third_party/sqlite3/{vdbeInt.h => vdbeInt.inc} (100%) rename third_party/sqlite3/{vxworks.h => vxworks.inc} (100%) rename third_party/sqlite3/{wal.h => wal.inc} (99%) rename third_party/sqlite3/{whereInt.h => whereInt.inc} (100%) diff --git a/third_party/sqlite3/alter.c b/third_party/sqlite3/alter.c index be29d9f58..e65a28453 100644 --- a/third_party/sqlite3/alter.c +++ b/third_party/sqlite3/alter.c @@ -12,7 +12,7 @@ ** This file contains C code routines that used to generate VDBE code ** that implements the ALTER TABLE command. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/analyze.c b/third_party/sqlite3/analyze.c index d579e89ca..c370b4da0 100644 --- a/third_party/sqlite3/analyze.c +++ b/third_party/sqlite3/analyze.c @@ -140,7 +140,7 @@ ** integer in the equivalent columns in sqlite_stat4. */ #ifndef SQLITE_OMIT_ANALYZE -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #if defined(SQLITE_ENABLE_STAT4) diff --git a/third_party/sqlite3/attach.c b/third_party/sqlite3/attach.c index 4bae45fed..44f52c2f6 100644 --- a/third_party/sqlite3/attach.c +++ b/third_party/sqlite3/attach.c @@ -11,7 +11,7 @@ ************************************************************************* ** This file contains code used to implement the ATTACH and DETACH commands. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_ATTACH diff --git a/third_party/sqlite3/auth.c b/third_party/sqlite3/auth.c index ce65bb7a5..710f8ce75 100644 --- a/third_party/sqlite3/auth.c +++ b/third_party/sqlite3/auth.c @@ -14,7 +14,7 @@ ** systems that do not need this facility may omit it by recompiling ** the library with -DSQLITE_OMIT_AUTHORIZATION=1 */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/backup.c b/third_party/sqlite3/backup.c index 860841deb..6c46bb5c4 100644 --- a/third_party/sqlite3/backup.c +++ b/third_party/sqlite3/backup.c @@ -12,8 +12,8 @@ ** This file contains the implementation of the sqlite3_backup_XXX() ** API functions and the related features. */ -#include "third_party/sqlite3/btreeInt.h" -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/btreeInt.inc" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/bitvec.c b/third_party/sqlite3/bitvec.c index 14cba75cb..5ce04aee0 100644 --- a/third_party/sqlite3/bitvec.c +++ b/third_party/sqlite3/bitvec.c @@ -34,7 +34,7 @@ ** start of a transaction, and is thus usually less than a few thousand, ** but can be as large as 2 billion for a really big database. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* Size of the Bitvec structure in bytes. */ diff --git a/third_party/sqlite3/btmutex.c b/third_party/sqlite3/btmutex.c index 9a21e6e88..be05f044d 100644 --- a/third_party/sqlite3/btmutex.c +++ b/third_party/sqlite3/btmutex.c @@ -15,7 +15,7 @@ ** big and we want to break it down some. This packaged seemed like ** a good breakout. */ -#include "third_party/sqlite3/btreeInt.h" +#include "third_party/sqlite3/btreeInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_SHARED_CACHE diff --git a/third_party/sqlite3/btree.c b/third_party/sqlite3/btree.c index ec56027d1..cb7faf87a 100644 --- a/third_party/sqlite3/btree.c +++ b/third_party/sqlite3/btree.c @@ -13,7 +13,7 @@ ** See the header comment on "btreeInt.h" for additional information. ** Including a description of file format and an overview of operation. */ -#include "third_party/sqlite3/btreeInt.h" +#include "third_party/sqlite3/btreeInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/btree.h b/third_party/sqlite3/btree.inc similarity index 100% rename from third_party/sqlite3/btree.h rename to third_party/sqlite3/btree.inc diff --git a/third_party/sqlite3/btreeInt.h b/third_party/sqlite3/btreeInt.inc similarity index 99% rename from third_party/sqlite3/btreeInt.h rename to third_party/sqlite3/btreeInt.inc index fc3ebf06e..ad36ead69 100644 --- a/third_party/sqlite3/btreeInt.h +++ b/third_party/sqlite3/btreeInt.inc @@ -213,7 +213,7 @@ ** 4 Number of leaf pointers on this page ** * zero or more pages numbers of leaves */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/build.c b/third_party/sqlite3/build.c index e0e5908f4..288864ab5 100644 --- a/third_party/sqlite3/build.c +++ b/third_party/sqlite3/build.c @@ -22,7 +22,7 @@ ** COMMIT ** ROLLBACK */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/callback.c b/third_party/sqlite3/callback.c index dc659886c..cf429bb22 100644 --- a/third_party/sqlite3/callback.c +++ b/third_party/sqlite3/callback.c @@ -13,7 +13,7 @@ ** This file contains functions used to access the internal hash tables ** of user defined functions and collation sequences. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/complete.c b/third_party/sqlite3/complete.c index 12f0973a9..dd626aa91 100644 --- a/third_party/sqlite3/complete.c +++ b/third_party/sqlite3/complete.c @@ -16,7 +16,7 @@ ** separating it out, the code will be automatically omitted from ** static links that do not use it. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/ctime.c b/third_party/sqlite3/ctime.c index 2ea9bd3cc..cc42ed757 100644 --- a/third_party/sqlite3/ctime.c +++ b/third_party/sqlite3/ctime.c @@ -17,15 +17,6 @@ #ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS /* IMP: R-16824-07538 */ -/* -** Include the configuration header output by 'configure' if we're using the -** autoconf-based build -*/ -#if defined(_HAVE_SQLITE_CONFIG_H) && !defined(SQLITECONFIG_H) -#include "third_party/sqlite3/config.h" -#define SQLITECONFIG_H 1 -#endif - /* These macros are provided to "stringify" the value of the define ** for those options in which the value is meaningful. */ #define CTIMEOPT_VAL_(opt) #opt diff --git a/third_party/sqlite3/date.c b/third_party/sqlite3/date.c index c6e543ade..5daa0ce4e 100644 --- a/third_party/sqlite3/date.c +++ b/third_party/sqlite3/date.c @@ -48,7 +48,7 @@ #include "libc/mem/mem.h" #include "libc/time/struct/tm.h" #include "libc/time/time.h" -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/dbpage.c b/third_party/sqlite3/dbpage.c index 9800a13b5..d6540a00f 100644 --- a/third_party/sqlite3/dbpage.c +++ b/third_party/sqlite3/dbpage.c @@ -30,7 +30,7 @@ ** value must be a BLOB which is the correct page size, otherwise the ** update fails. Rows may not be deleted or inserted. */ -#include "third_party/sqlite3/sqliteInt.h" /* Requires access to internal data structures */ +#include "third_party/sqlite3/sqliteInt.inc" /* Requires access to internal data inc */ /* clang-format off */ diff --git a/third_party/sqlite3/dbstat.c b/third_party/sqlite3/dbstat.c index a29822215..16626f6d7 100644 --- a/third_party/sqlite3/dbstat.c +++ b/third_party/sqlite3/dbstat.c @@ -20,7 +20,7 @@ ** Additional information is available on the "dbstat.html" page of the ** official SQLite documentation. */ -#include "third_party/sqlite3/sqliteInt.h" /* Requires access to internal data structures */ +#include "third_party/sqlite3/sqliteInt.inc" /* Requires access to internal data inc */ /* clang-format off */ diff --git a/third_party/sqlite3/delete.c b/third_party/sqlite3/delete.c index 7b8c7d605..e0a86533b 100644 --- a/third_party/sqlite3/delete.c +++ b/third_party/sqlite3/delete.c @@ -12,7 +12,7 @@ ** This file contains C code routines that are called by the parser ** in order to generate code for DELETE FROM statements. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/expr.c b/third_party/sqlite3/expr.c index 235a427dc..3e3573d37 100644 --- a/third_party/sqlite3/expr.c +++ b/third_party/sqlite3/expr.c @@ -12,7 +12,7 @@ ** This file contains routines used for analyzing expressions and ** for generating VDBE code that evaluates expressions in SQLite. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/fault.c b/third_party/sqlite3/fault.c index 104a09793..599e70025 100644 --- a/third_party/sqlite3/fault.c +++ b/third_party/sqlite3/fault.c @@ -23,7 +23,7 @@ ** hash table will continue to function normally. So a malloc failure ** during a hash table resize is a benign fault. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/fkey.c b/third_party/sqlite3/fkey.c index fdb52a910..bb304bcef 100644 --- a/third_party/sqlite3/fkey.c +++ b/third_party/sqlite3/fkey.c @@ -11,7 +11,7 @@ ** This file contains code used by the compiler to add foreign key ** support to compiled SQL statements. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/fts1.c b/third_party/sqlite3/fts1.c deleted file mode 100644 index 6a24348d3..000000000 --- a/third_party/sqlite3/fts1.c +++ /dev/null @@ -1,3351 +0,0 @@ -/* fts1 has a design flaw which can lead to database corruption (see -** below). It is recommended not to use it any longer, instead use -** fts3 (or higher). If you believe that your use of fts1 is safe, -** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS. -*/ - -/* clang-format off */ - -#if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \ - && !defined(SQLITE_ENABLE_BROKEN_FTS1) -#error fts1 has a design flaw and has been deprecated. -#endif -/* The flaw is that fts1 uses the content table's unaliased rowid as -** the unique docid. fts1 embeds the rowid in the index it builds, -** and expects the rowid to not change. The SQLite VACUUM operation -** will renumber such rowids, thereby breaking fts1. If you are using -** fts1 in a system which has disabled VACUUM, then you can continue -** to use it safely. Note that PRAGMA auto_vacuum does NOT disable -** VACUUM, though systems using auto_vacuum are unlikely to invoke -** VACUUM. -** -** fts1 should be safe even across VACUUM if you only insert documents -** and never delete. -*/ - -/* The author disclaims copyright to this source code. - * - * This is an SQLite module implementing full-text search. - */ - -/* -** The code in this file is only compiled if: -** -** * The FTS1 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS1 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS1 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) - -#if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE) -# define SQLITE_CORE 1 -#endif - -#include -#include -#include -#include -#include - -#include "fts1.h" -#include "fts1_hash.h" -#include "fts1_tokenizer.h" -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT1 - - -#if 0 -# define TRACE(A) printf A; fflush(stdout) -#else -# define TRACE(A) -#endif - -/* utility functions */ - -typedef struct StringBuffer { - int len; /* length, not including null terminator */ - int alloced; /* Space allocated for s[] */ - char *s; /* Content of the string */ -} StringBuffer; - -static void initStringBuffer(StringBuffer *sb){ - sb->len = 0; - sb->alloced = 100; - sb->s = malloc(100); - sb->s[0] = '\0'; -} - -static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){ - if( sb->len + nFrom >= sb->alloced ){ - sb->alloced = sb->len + nFrom + 100; - sb->s = realloc(sb->s, sb->alloced+1); - if( sb->s==0 ){ - initStringBuffer(sb); - return; - } - } - memcpy(sb->s + sb->len, zFrom, nFrom); - sb->len += nFrom; - sb->s[sb->len] = 0; -} -static void append(StringBuffer *sb, const char *zFrom){ - nappend(sb, zFrom, strlen(zFrom)); -} - -/* We encode variable-length integers in little-endian order using seven bits - * per byte as follows: -** -** KEY: -** A = 0xxxxxxx 7 bits of data and one flag bit -** B = 1xxxxxxx 7 bits of data and one flag bit -** -** 7 bits - A -** 14 bits - BA -** 21 bits - BBA -** and so on. -*/ - -/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */ -#define VARINT_MAX 10 - -/* Write a 64-bit variable-length integer to memory starting at p[0]. - * The length of data written will be between 1 and VARINT_MAX bytes. - * The number of bytes written is returned. */ -static int putVarint(char *p, sqlite_int64 v){ - unsigned char *q = (unsigned char *) p; - sqlite_uint64 vu = v; - do{ - *q++ = (unsigned char) ((vu & 0x7f) | 0x80); - vu >>= 7; - }while( vu!=0 ); - q[-1] &= 0x7f; /* turn off high bit in final byte */ - assert( q - (unsigned char *)p <= VARINT_MAX ); - return (int) (q - (unsigned char *)p); -} - -/* Read a 64-bit variable-length integer from memory starting at p[0]. - * Return the number of bytes read, or 0 on error. - * The value is stored in *v. */ -static int getVarint(const char *p, sqlite_int64 *v){ - const unsigned char *q = (const unsigned char *) p; - sqlite_uint64 x = 0, y = 1; - while( (*q & 0x80) == 0x80 ){ - x += y * (*q++ & 0x7f); - y <<= 7; - if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ - assert( 0 ); - return 0; - } - } - x += y * (*q++); - *v = (sqlite_int64) x; - return (int) (q - (unsigned char *)p); -} - -static int getVarint32(const char *p, int *pi){ - sqlite_int64 i; - int ret = getVarint(p, &i); - *pi = (int) i; - assert( *pi==i ); - return ret; -} - -/*** Document lists *** - * - * A document list holds a sorted list of varint-encoded document IDs. - * - * A doclist with type DL_POSITIONS_OFFSETS is stored like this: - * - * array { - * varint docid; - * array { - * varint position; (delta from previous position plus POS_BASE) - * varint startOffset; (delta from previous startOffset) - * varint endOffset; (delta from startOffset) - * } - * } - * - * Here, array { X } means zero or more occurrences of X, adjacent in memory. - * - * A position list may hold positions for text in multiple columns. A position - * POS_COLUMN is followed by a varint containing the index of the column for - * following positions in the list. Any positions appearing before any - * occurrences of POS_COLUMN are for column 0. - * - * A doclist with type DL_POSITIONS is like the above, but holds only docids - * and positions without offset information. - * - * A doclist with type DL_DOCIDS is like the above, but holds only docids - * without positions or offset information. - * - * On disk, every document list has positions and offsets, so we don't bother - * to serialize a doclist's type. - * - * We don't yet delta-encode document IDs; doing so will probably be a - * modest win. - * - * NOTE(shess) I've thought of a slightly (1%) better offset encoding. - * After the first offset, estimate the next offset by using the - * current token position and the previous token position and offset, - * offset to handle some variance. So the estimate would be - * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded - * as normal. Offsets more than 64 chars from the estimate are - * encoded as the delta to the previous start offset + 128. An - * additional tiny increment can be gained by using the end offset of - * the previous token to make the estimate a tiny bit more precise. -*/ - -/* It is not safe to call isspace(), tolower(), or isalnum() on -** hi-bit-set characters. This is the same solution used in the -** tokenizer. -*/ -/* TODO(shess) The snippet-generation code should be using the -** tokenizer-generated tokens rather than doing its own local -** tokenization. -*/ -/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ -static int safe_isspace(char c){ - return (c&0x80)==0 ? isspace((unsigned char)c) : 0; -} -static int safe_tolower(char c){ - return (c&0x80)==0 ? tolower((unsigned char)c) : c; -} -static int safe_isalnum(char c){ - return (c&0x80)==0 ? isalnum((unsigned char)c) : 0; -} - -typedef enum DocListType { - DL_DOCIDS, /* docids only */ - DL_POSITIONS, /* docids + positions */ - DL_POSITIONS_OFFSETS /* docids + positions + offsets */ -} DocListType; - -/* -** By default, only positions and not offsets are stored in the doclists. -** To change this so that offsets are stored too, compile with -** -** -DDL_DEFAULT=DL_POSITIONS_OFFSETS -** -*/ -#ifndef DL_DEFAULT -# define DL_DEFAULT DL_POSITIONS -#endif - -typedef struct DocList { - char *pData; - int nData; - DocListType iType; - int iLastColumn; /* the last column written */ - int iLastPos; /* the last position written */ - int iLastOffset; /* the last start offset written */ -} DocList; - -enum { - POS_END = 0, /* end of this position list */ - POS_COLUMN, /* followed by new column number */ - POS_BASE -}; - -/* Initialize a new DocList to hold the given data. */ -static void docListInit(DocList *d, DocListType iType, - const char *pData, int nData){ - d->nData = nData; - if( nData>0 ){ - d->pData = malloc(nData); - memcpy(d->pData, pData, nData); - } else { - d->pData = NULL; - } - d->iType = iType; - d->iLastColumn = 0; - d->iLastPos = d->iLastOffset = 0; -} - -/* Create a new dynamically-allocated DocList. */ -static DocList *docListNew(DocListType iType){ - DocList *d = (DocList *) malloc(sizeof(DocList)); - docListInit(d, iType, 0, 0); - return d; -} - -static void docListDestroy(DocList *d){ - free(d->pData); -#ifndef NDEBUG - memset(d, 0x55, sizeof(*d)); -#endif -} - -static void docListDelete(DocList *d){ - docListDestroy(d); - free(d); -} - -static char *docListEnd(DocList *d){ - return d->pData + d->nData; -} - -/* Append a varint to a DocList's data. */ -static void appendVarint(DocList *d, sqlite_int64 i){ - char c[VARINT_MAX]; - int n = putVarint(c, i); - d->pData = realloc(d->pData, d->nData + n); - memcpy(d->pData + d->nData, c, n); - d->nData += n; -} - -static void docListAddDocid(DocList *d, sqlite_int64 iDocid){ - appendVarint(d, iDocid); - if( d->iType>=DL_POSITIONS ){ - appendVarint(d, POS_END); /* initially empty position list */ - d->iLastColumn = 0; - d->iLastPos = d->iLastOffset = 0; - } -} - -/* helper function for docListAddPos and docListAddPosOffset */ -static void addPos(DocList *d, int iColumn, int iPos){ - assert( d->nData>0 ); - --d->nData; /* remove previous terminator */ - if( iColumn!=d->iLastColumn ){ - assert( iColumn>d->iLastColumn ); - appendVarint(d, POS_COLUMN); - appendVarint(d, iColumn); - d->iLastColumn = iColumn; - d->iLastPos = d->iLastOffset = 0; - } - assert( iPos>=d->iLastPos ); - appendVarint(d, iPos-d->iLastPos+POS_BASE); - d->iLastPos = iPos; -} - -/* Add a position to the last position list in a doclist. */ -static void docListAddPos(DocList *d, int iColumn, int iPos){ - assert( d->iType==DL_POSITIONS ); - addPos(d, iColumn, iPos); - appendVarint(d, POS_END); /* add new terminator */ -} - -/* -** Add a position and starting and ending offsets to a doclist. -** -** If the doclist is setup to handle only positions, then insert -** the position only and ignore the offsets. -*/ -static void docListAddPosOffset( - DocList *d, /* Doclist under construction */ - int iColumn, /* Column the inserted term is part of */ - int iPos, /* Position of the inserted term */ - int iStartOffset, /* Starting offset of inserted term */ - int iEndOffset /* Ending offset of inserted term */ -){ - assert( d->iType>=DL_POSITIONS ); - addPos(d, iColumn, iPos); - if( d->iType==DL_POSITIONS_OFFSETS ){ - assert( iStartOffset>=d->iLastOffset ); - appendVarint(d, iStartOffset-d->iLastOffset); - d->iLastOffset = iStartOffset; - assert( iEndOffset>=iStartOffset ); - appendVarint(d, iEndOffset-iStartOffset); - } - appendVarint(d, POS_END); /* add new terminator */ -} - -/* -** A DocListReader object is a cursor into a doclist. Initialize -** the cursor to the beginning of the doclist by calling readerInit(). -** Then use routines -** -** peekDocid() -** readDocid() -** readPosition() -** skipPositionList() -** and so forth... -** -** to read information out of the doclist. When we reach the end -** of the doclist, atEnd() returns TRUE. -*/ -typedef struct DocListReader { - DocList *pDoclist; /* The document list we are stepping through */ - char *p; /* Pointer to next unread byte in the doclist */ - int iLastColumn; - int iLastPos; /* the last position read, or -1 when not in a position list */ -} DocListReader; - -/* -** Initialize the DocListReader r to point to the beginning of pDoclist. -*/ -static void readerInit(DocListReader *r, DocList *pDoclist){ - r->pDoclist = pDoclist; - if( pDoclist!=NULL ){ - r->p = pDoclist->pData; - } - r->iLastColumn = -1; - r->iLastPos = -1; -} - -/* -** Return TRUE if we have reached then end of pReader and there is -** nothing else left to read. -*/ -static int atEnd(DocListReader *pReader){ - return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist)); -} - -/* Peek at the next docid without advancing the read pointer. -*/ -static sqlite_int64 peekDocid(DocListReader *pReader){ - sqlite_int64 ret; - assert( !atEnd(pReader) ); - assert( pReader->iLastPos==-1 ); - getVarint(pReader->p, &ret); - return ret; -} - -/* Read the next docid. See also nextDocid(). -*/ -static sqlite_int64 readDocid(DocListReader *pReader){ - sqlite_int64 ret; - assert( !atEnd(pReader) ); - assert( pReader->iLastPos==-1 ); - pReader->p += getVarint(pReader->p, &ret); - if( pReader->pDoclist->iType>=DL_POSITIONS ){ - pReader->iLastColumn = 0; - pReader->iLastPos = 0; - } - return ret; -} - -/* Read the next position and column index from a position list. - * Returns the position, or -1 at the end of the list. */ -static int readPosition(DocListReader *pReader, int *iColumn){ - int i; - int iType = pReader->pDoclist->iType; - - if( pReader->iLastPos==-1 ){ - return -1; - } - assert( !atEnd(pReader) ); - - if( iTypep += getVarint32(pReader->p, &i); - if( i==POS_END ){ - pReader->iLastColumn = pReader->iLastPos = -1; - *iColumn = -1; - return -1; - } - if( i==POS_COLUMN ){ - pReader->p += getVarint32(pReader->p, &pReader->iLastColumn); - pReader->iLastPos = 0; - pReader->p += getVarint32(pReader->p, &i); - assert( i>=POS_BASE ); - } - pReader->iLastPos += ((int) i)-POS_BASE; - if( iType>=DL_POSITIONS_OFFSETS ){ - /* Skip over offsets, ignoring them for now. */ - int iStart, iEnd; - pReader->p += getVarint32(pReader->p, &iStart); - pReader->p += getVarint32(pReader->p, &iEnd); - } - *iColumn = pReader->iLastColumn; - return pReader->iLastPos; -} - -/* Skip past the end of a position list. */ -static void skipPositionList(DocListReader *pReader){ - DocList *p = pReader->pDoclist; - if( p && p->iType>=DL_POSITIONS ){ - int iColumn; - while( readPosition(pReader, &iColumn)!=-1 ){} - } -} - -/* Skip over a docid, including its position list if the doclist has - * positions. */ -static void skipDocument(DocListReader *pReader){ - readDocid(pReader); - skipPositionList(pReader); -} - -/* Skip past all docids which are less than [iDocid]. Returns 1 if a docid - * matching [iDocid] was found. */ -static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){ - sqlite_int64 d = 0; - while( !atEnd(pReader) && (d=peekDocid(pReader))iType>=DL_POSITIONS ){ - int iPos, iCol; - const char *zDiv = ""; - printf("("); - while( (iPos = readPosition(&r, &iCol))>=0 ){ - printf("%s%d:%d", zDiv, iCol, iPos); - zDiv = ":"; - } - printf(")"); - } - } - printf("\n"); - fflush(stdout); -} -#endif /* SQLITE_DEBUG */ - -/* Trim the given doclist to contain only positions in column - * [iRestrictColumn]. */ -static void docListRestrictColumn(DocList *in, int iRestrictColumn){ - DocListReader r; - DocList out; - - assert( in->iType>=DL_POSITIONS ); - readerInit(&r, in); - docListInit(&out, DL_POSITIONS, NULL, 0); - - while( !atEnd(&r) ){ - sqlite_int64 iDocid = readDocid(&r); - int iPos, iColumn; - - docListAddDocid(&out, iDocid); - while( (iPos = readPosition(&r, &iColumn)) != -1 ){ - if( iColumn==iRestrictColumn ){ - docListAddPos(&out, iColumn, iPos); - } - } - } - - docListDestroy(in); - *in = out; -} - -/* Trim the given doclist by discarding any docids without any remaining - * positions. */ -static void docListDiscardEmpty(DocList *in) { - DocListReader r; - DocList out; - - /* TODO: It would be nice to implement this operation in place; that - * could save a significant amount of memory in queries with long doclists. */ - assert( in->iType>=DL_POSITIONS ); - readerInit(&r, in); - docListInit(&out, DL_POSITIONS, NULL, 0); - - while( !atEnd(&r) ){ - sqlite_int64 iDocid = readDocid(&r); - int match = 0; - int iPos, iColumn; - while( (iPos = readPosition(&r, &iColumn)) != -1 ){ - if( !match ){ - docListAddDocid(&out, iDocid); - match = 1; - } - docListAddPos(&out, iColumn, iPos); - } - } - - docListDestroy(in); - *in = out; -} - -/* Helper function for docListUpdate() and docListAccumulate(). -** Splices a doclist element into the doclist represented by r, -** leaving r pointing after the newly spliced element. -*/ -static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid, - const char *pSource, int nSource){ - DocList *d = r->pDoclist; - char *pTarget; - int nTarget, found; - - found = skipToDocid(r, iDocid); - - /* Describe slice in d to place pSource/nSource. */ - pTarget = r->p; - if( found ){ - skipDocument(r); - nTarget = r->p-pTarget; - }else{ - nTarget = 0; - } - - /* The sense of the following is that there are three possibilities. - ** If nTarget==nSource, we should not move any memory nor realloc. - ** If nTarget>nSource, trim target and realloc. - ** If nTargetnSource ){ - memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget)); - } - if( nTarget!=nSource ){ - int iDoclist = pTarget-d->pData; - d->pData = realloc(d->pData, d->nData+nSource-nTarget); - pTarget = d->pData+iDoclist; - } - if( nTargetnData += nSource-nTarget; - r->p = pTarget+nSource; -} - -/* Insert/update pUpdate into the doclist. */ -static void docListUpdate(DocList *d, DocList *pUpdate){ - DocListReader reader; - - assert( d!=NULL && pUpdate!=NULL ); - assert( d->iType==pUpdate->iType); - - readerInit(&reader, d); - docListSpliceElement(&reader, firstDocid(pUpdate), - pUpdate->pData, pUpdate->nData); -} - -/* Propagate elements from pUpdate to pAcc, overwriting elements with -** matching docids. -*/ -static void docListAccumulate(DocList *pAcc, DocList *pUpdate){ - DocListReader accReader, updateReader; - - /* Handle edge cases where one doclist is empty. */ - assert( pAcc!=NULL ); - if( pUpdate==NULL || pUpdate->nData==0 ) return; - if( pAcc->nData==0 ){ - pAcc->pData = malloc(pUpdate->nData); - memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData); - pAcc->nData = pUpdate->nData; - return; - } - - readerInit(&accReader, pAcc); - readerInit(&updateReader, pUpdate); - - while( !atEnd(&updateReader) ){ - char *pSource = updateReader.p; - sqlite_int64 iDocid = readDocid(&updateReader); - skipPositionList(&updateReader); - docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource); - } -} - -/* -** Read the next docid off of pIn. Return 0 if we reach the end. -* -* TODO: This assumes that docids are never 0, but they may actually be 0 since -* users can choose docids when inserting into a full-text table. Fix this. -*/ -static sqlite_int64 nextDocid(DocListReader *pIn){ - skipPositionList(pIn); - return atEnd(pIn) ? 0 : readDocid(pIn); -} - -/* -** pLeft and pRight are two DocListReaders that are pointing to -** positions lists of the same document: iDocid. -** -** If there are no instances in pLeft or pRight where the position -** of pLeft is one less than the position of pRight, then this -** routine adds nothing to pOut. -** -** If there are one or more instances where positions from pLeft -** are exactly one less than positions from pRight, then add a new -** document record to pOut. If pOut wants to hold positions, then -** include the positions from pRight that are one more than a -** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. -** -** pLeft and pRight are left pointing at the next document record. -*/ -static void mergePosList( - DocListReader *pLeft, /* Left position list */ - DocListReader *pRight, /* Right position list */ - sqlite_int64 iDocid, /* The docid from pLeft and pRight */ - DocList *pOut /* Write the merged document record here */ -){ - int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol); - int iRightCol, iRightPos = readPosition(pRight, &iRightCol); - int match = 0; - - /* Loop until we've reached the end of both position lists. */ - while( iLeftPos!=-1 && iRightPos!=-1 ){ - if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){ - if( !match ){ - docListAddDocid(pOut, iDocid); - match = 1; - } - if( pOut->iType>=DL_POSITIONS ){ - docListAddPos(pOut, iRightCol, iRightPos); - } - iLeftPos = readPosition(pLeft, &iLeftCol); - iRightPos = readPosition(pRight, &iRightCol); - }else if( iRightCol=0 ) skipPositionList(pLeft); - if( iRightPos>=0 ) skipPositionList(pRight); -} - -/* We have two doclists: pLeft and pRight. -** Write the phrase intersection of these two doclists into pOut. -** -** A phrase intersection means that two documents only match -** if pLeft.iPos+1==pRight.iPos. -** -** The output pOut may or may not contain positions. If pOut -** does contain positions, they are the positions of pRight. -*/ -static void docListPhraseMerge( - DocList *pLeft, /* Doclist resulting from the words on the left */ - DocList *pRight, /* Doclist for the next word to the right */ - DocList *pOut /* Write the combined doclist here */ -){ - DocListReader left, right; - sqlite_int64 docidLeft, docidRight; - - readerInit(&left, pLeft); - readerInit(&right, pRight); - docidLeft = nextDocid(&left); - docidRight = nextDocid(&right); - - while( docidLeft>0 && docidRight>0 ){ - if( docidLeftiType0 && docidRight>0 ){ - if( docidLeft0 && docidRight>0 ){ - if( docidLeft<=docidRight ){ - docListAddDocid(pOut, docidLeft); - }else{ - docListAddDocid(pOut, docidRight); - } - priorLeft = docidLeft; - if( docidLeft<=docidRight ){ - docidLeft = nextDocid(&left); - } - if( docidRight>0 && docidRight<=priorLeft ){ - docidRight = nextDocid(&right); - } - } - while( docidLeft>0 ){ - docListAddDocid(pOut, docidLeft); - docidLeft = nextDocid(&left); - } - while( docidRight>0 ){ - docListAddDocid(pOut, docidRight); - docidRight = nextDocid(&right); - } -} - -/* We have two doclists: pLeft and pRight. -** Write into pOut all documents that occur in pLeft but not -** in pRight. -** -** Only docids are matched. Position information is ignored. -** -** The output pOut never holds positions. -*/ -static void docListExceptMerge( - DocList *pLeft, /* Doclist resulting from the words on the left */ - DocList *pRight, /* Doclist for the next word to the right */ - DocList *pOut /* Write the combined doclist here */ -){ - DocListReader left, right; - sqlite_int64 docidLeft, docidRight, priorLeft; - - readerInit(&left, pLeft); - readerInit(&right, pRight); - docidLeft = nextDocid(&left); - docidRight = nextDocid(&right); - - while( docidLeft>0 && docidRight>0 ){ - priorLeft = docidLeft; - if( docidLeft0 && docidRight<=priorLeft ){ - docidRight = nextDocid(&right); - } - } - while( docidLeft>0 ){ - docListAddDocid(pOut, docidLeft); - docidLeft = nextDocid(&left); - } -} - -static char *string_dup_n(const char *s, int n){ - char *str = malloc(n + 1); - memcpy(str, s, n); - str[n] = '\0'; - return str; -} - -/* Duplicate a string; the caller must free() the returned string. - * (We don't use strdup() since it is not part of the standard C library and - * may not be available everywhere.) */ -static char *string_dup(const char *s){ - return string_dup_n(s, strlen(s)); -} - -/* Format a string, replacing each occurrence of the % character with - * zDb.zName. This may be more convenient than sqlite_mprintf() - * when one string is used repeatedly in a format string. - * The caller must free() the returned string. */ -static char *string_format(const char *zFormat, - const char *zDb, const char *zName){ - const char *p; - size_t len = 0; - size_t nDb = strlen(zDb); - size_t nName = strlen(zName); - size_t nFullTableName = nDb+1+nName; - char *result; - char *r; - - /* first compute length needed */ - for(p = zFormat ; *p ; ++p){ - len += (*p=='%' ? nFullTableName : 1); - } - len += 1; /* for null terminator */ - - r = result = malloc(len); - for(p = zFormat; *p; ++p){ - if( *p=='%' ){ - memcpy(r, zDb, nDb); - r += nDb; - *r++ = '.'; - memcpy(r, zName, nName); - r += nName; - } else { - *r++ = *p; - } - } - *r++ = '\0'; - assert( r == result + len ); - return result; -} - -static int sql_exec(sqlite3 *db, const char *zDb, const char *zName, - const char *zFormat){ - char *zCommand = string_format(zFormat, zDb, zName); - int rc; - TRACE(("FTS1 sql: %s\n", zCommand)); - rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); - free(zCommand); - return rc; -} - -static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName, - sqlite3_stmt **ppStmt, const char *zFormat){ - char *zCommand = string_format(zFormat, zDb, zName); - int rc; - TRACE(("FTS1 prepare: %s\n", zCommand)); - rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL); - free(zCommand); - return rc; -} - -/* end utility functions */ - -/* Forward reference */ -typedef struct fulltext_vtab fulltext_vtab; - -/* A single term in a query is represented by an instances of -** the following structure. -*/ -typedef struct QueryTerm { - short int nPhrase; /* How many following terms are part of the same phrase */ - short int iPhrase; /* This is the i-th term of a phrase. */ - short int iColumn; /* Column of the index that must match this term */ - signed char isOr; /* this term is preceded by "OR" */ - signed char isNot; /* this term is preceded by "-" */ - char *pTerm; /* text of the term. '\000' terminated. malloced */ - int nTerm; /* Number of bytes in pTerm[] */ -} QueryTerm; - - -/* A query string is parsed into a Query structure. - * - * We could, in theory, allow query strings to be complicated - * nested expressions with precedence determined by parentheses. - * But none of the major search engines do this. (Perhaps the - * feeling is that an parenthesized expression is two complex of - * an idea for the average user to grasp.) Taking our lead from - * the major search engines, we will allow queries to be a list - * of terms (with an implied AND operator) or phrases in double-quotes, - * with a single optional "-" before each non-phrase term to designate - * negation and an optional OR connector. - * - * OR binds more tightly than the implied AND, which is what the - * major search engines seem to do. So, for example: - * - * [one two OR three] ==> one AND (two OR three) - * [one OR two three] ==> (one OR two) AND three - * - * A "-" before a term matches all entries that lack that term. - * The "-" must occur immediately before the term with in intervening - * space. This is how the search engines do it. - * - * A NOT term cannot be the right-hand operand of an OR. If this - * occurs in the query string, the NOT is ignored: - * - * [one OR -two] ==> one OR two - * - */ -typedef struct Query { - fulltext_vtab *pFts; /* The full text index */ - int nTerms; /* Number of terms in the query */ - QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */ - int nextIsOr; /* Set the isOr flag on the next inserted term */ - int nextColumn; /* Next word parsed must be in this column */ - int dfltColumn; /* The default column */ -} Query; - - -/* -** An instance of the following structure keeps track of generated -** matching-word offset information and snippets. -*/ -typedef struct Snippet { - int nMatch; /* Total number of matches */ - int nAlloc; /* Space allocated for aMatch[] */ - struct snippetMatch { /* One entry for each matching term */ - char snStatus; /* Status flag for use while constructing snippets */ - short int iCol; /* The column that contains the match */ - short int iTerm; /* The index in Query.pTerms[] of the matching term */ - short int nByte; /* Number of bytes in the term */ - int iStart; /* The offset to the first character of the term */ - } *aMatch; /* Points to space obtained from malloc */ - char *zOffset; /* Text rendering of aMatch[] */ - int nOffset; /* strlen(zOffset) */ - char *zSnippet; /* Snippet text */ - int nSnippet; /* strlen(zSnippet) */ -} Snippet; - - -typedef enum QueryType { - QUERY_GENERIC, /* table scan */ - QUERY_ROWID, /* lookup by rowid */ - QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/ -} QueryType; - -/* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0 -** before we start aggregating into larger segments. Lower CHUNK_MAX -** means that for a given input we have more individual segments per -** term, which means more rows in the table and a bigger index (due to -** both more rows and bigger rowids). But it also reduces the average -** cost of adding new elements to the segment 0 doclist, and it seems -** to reduce the number of pages read and written during inserts. 256 -** was chosen by measuring insertion times for a certain input (first -** 10k documents of Enron corpus), though including query performance -** in the decision may argue for a larger value. -*/ -#define CHUNK_MAX 256 - -typedef enum fulltext_statement { - CONTENT_INSERT_STMT, - CONTENT_SELECT_STMT, - CONTENT_UPDATE_STMT, - CONTENT_DELETE_STMT, - - TERM_SELECT_STMT, - TERM_SELECT_ALL_STMT, - TERM_INSERT_STMT, - TERM_UPDATE_STMT, - TERM_DELETE_STMT, - - MAX_STMT /* Always at end! */ -} fulltext_statement; - -/* These must exactly match the enum above. */ -/* TODO(adam): Is there some risk that a statement (in particular, -** pTermSelectStmt) will be used in two cursors at once, e.g. if a -** query joins a virtual table to itself? If so perhaps we should -** move some of these to the cursor object. -*/ -static const char *const fulltext_zStatement[MAX_STMT] = { - /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */ - /* CONTENT_SELECT */ "select * from %_content where rowid = ?", - /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */ - /* CONTENT_DELETE */ "delete from %_content where rowid = ?", - - /* TERM_SELECT */ - "select rowid, doclist from %_term where term = ? and segment = ?", - /* TERM_SELECT_ALL */ - "select doclist from %_term where term = ? order by segment", - /* TERM_INSERT */ - "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)", - /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?", - /* TERM_DELETE */ "delete from %_term where rowid = ?", -}; - -/* -** A connection to a fulltext index is an instance of the following -** structure. The xCreate and xConnect methods create an instance -** of this structure and xDestroy and xDisconnect free that instance. -** All other methods receive a pointer to the structure as one of their -** arguments. -*/ -struct fulltext_vtab { - sqlite3_vtab base; /* Base class used by SQLite core */ - sqlite3 *db; /* The database connection */ - const char *zDb; /* logical database name */ - const char *zName; /* virtual table name */ - int nColumn; /* number of columns in virtual table */ - char **azColumn; /* column names. malloced */ - char **azContentColumn; /* column names in content table; malloced */ - sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ - - /* Precompiled statements which we keep as long as the table is - ** open. - */ - sqlite3_stmt *pFulltextStatements[MAX_STMT]; -}; - -/* -** When the core wants to do a query, it create a cursor using a -** call to xOpen. This structure is an instance of a cursor. It -** is destroyed by xClose. -*/ -typedef struct fulltext_cursor { - sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */ - sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ - int eof; /* True if at End Of Results */ - Query q; /* Parsed query string */ - Snippet snippet; /* Cached snippet for the current row */ - int iColumn; /* Column being searched */ - DocListReader result; /* used when iCursorType == QUERY_FULLTEXT */ -} fulltext_cursor; - -static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ - return (fulltext_vtab *) c->base.pVtab; -} - -static const sqlite3_module fulltextModule; /* forward declaration */ - -/* Append a list of strings separated by commas to a StringBuffer. */ -static void appendList(StringBuffer *sb, int nString, char **azString){ - int i; - for(i=0; i0 ) append(sb, ", "); - append(sb, azString[i]); - } -} - -/* Return a dynamically generated statement of the form - * insert into %_content (rowid, ...) values (?, ...) - */ -static const char *contentInsertStatement(fulltext_vtab *v){ - StringBuffer sb; - int i; - - initStringBuffer(&sb); - append(&sb, "insert into %_content (rowid, "); - appendList(&sb, v->nColumn, v->azContentColumn); - append(&sb, ") values (?"); - for(i=0; inColumn; ++i) - append(&sb, ", ?"); - append(&sb, ")"); - return sb.s; -} - -/* Return a dynamically generated statement of the form - * update %_content set [col_0] = ?, [col_1] = ?, ... - * where rowid = ? - */ -static const char *contentUpdateStatement(fulltext_vtab *v){ - StringBuffer sb; - int i; - - initStringBuffer(&sb); - append(&sb, "update %_content set "); - for(i=0; inColumn; ++i) { - if( i>0 ){ - append(&sb, ", "); - } - append(&sb, v->azContentColumn[i]); - append(&sb, " = ?"); - } - append(&sb, " where rowid = ?"); - return sb.s; -} - -/* Puts a freshly-prepared statement determined by iStmt in *ppStmt. -** If the indicated statement has never been prepared, it is prepared -** and cached, otherwise the cached version is reset. -*/ -static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - assert( iStmtpFulltextStatements[iStmt]==NULL ){ - const char *zStmt; - int rc; - switch( iStmt ){ - case CONTENT_INSERT_STMT: - zStmt = contentInsertStatement(v); break; - case CONTENT_UPDATE_STMT: - zStmt = contentUpdateStatement(v); break; - default: - zStmt = fulltext_zStatement[iStmt]; - } - rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt], - zStmt); - if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt); - if( rc!=SQLITE_OK ) return rc; - } else { - int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); - if( rc!=SQLITE_OK ) return rc; - } - - *ppStmt = v->pFulltextStatements[iStmt]; - return SQLITE_OK; -} - -/* Step the indicated statement, handling errors SQLITE_BUSY (by -** retrying) and SQLITE_SCHEMA (by re-preparing and transferring -** bindings to the new statement). -** TODO(adam): We should extend this function so that it can work with -** statements declared locally, not only globally cached statements. -*/ -static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - int rc; - sqlite3_stmt *s = *ppStmt; - assert( iStmtpFulltextStatements[iStmt] ); - - while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){ - if( rc==SQLITE_BUSY ) continue; - if( rc!=SQLITE_ERROR ) return rc; - - /* If an SQLITE_SCHEMA error has occurred, then finalizing this - * statement is going to delete the fulltext_vtab structure. If - * the statement just executed is in the pFulltextStatements[] - * array, it will be finalized twice. So remove it before - * calling sqlite3_finalize(). - */ - v->pFulltextStatements[iStmt] = NULL; - rc = sqlite3_finalize(s); - break; - } - return rc; - - err: - sqlite3_finalize(s); - return rc; -} - -/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK. -** Useful for statements like UPDATE, where we expect no results. -*/ -static int sql_single_step_statement(fulltext_vtab *v, - fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - int rc = sql_step_statement(v, iStmt, ppStmt); - return (rc==SQLITE_DONE) ? SQLITE_OK : rc; -} - -/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */ -static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, - sqlite3_value **pValues){ - sqlite3_stmt *s; - int i; - int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_value(s, 1, rowid); - if( rc!=SQLITE_OK ) return rc; - - for(i=0; inColumn; ++i){ - rc = sqlite3_bind_value(s, 2+i, pValues[i]); - if( rc!=SQLITE_OK ) return rc; - } - - return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s); -} - -/* update %_content set col0 = pValues[0], col1 = pValues[1], ... - * where rowid = [iRowid] */ -static int content_update(fulltext_vtab *v, sqlite3_value **pValues, - sqlite_int64 iRowid){ - sqlite3_stmt *s; - int i; - int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - for(i=0; inColumn; ++i){ - rc = sqlite3_bind_value(s, 1+i, pValues[i]); - if( rc!=SQLITE_OK ) return rc; - } - - rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s); -} - -static void freeStringArray(int nString, const char **pString){ - int i; - - for (i=0 ; i < nString ; ++i) { - if( pString[i]!=NULL ) free((void *) pString[i]); - } - free((void *) pString); -} - -/* select * from %_content where rowid = [iRow] - * The caller must delete the returned array and all strings in it. - * null fields will be NULL in the returned array. - * - * TODO: Perhaps we should return pointer/length strings here for consistency - * with other code which uses pointer/length. */ -static int content_select(fulltext_vtab *v, sqlite_int64 iRow, - const char ***pValues){ - sqlite3_stmt *s; - const char **values; - int i; - int rc; - - *pValues = NULL; - - rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s); - if( rc!=SQLITE_ROW ) return rc; - - values = (const char **) malloc(v->nColumn * sizeof(const char *)); - for(i=0; inColumn; ++i){ - if( sqlite3_column_type(s, i)==SQLITE_NULL ){ - values[i] = NULL; - }else{ - values[i] = string_dup((char*)sqlite3_column_text(s, i)); - } - } - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ){ - *pValues = values; - return SQLITE_OK; - } - - freeStringArray(v->nColumn, values); - return rc; -} - -/* delete from %_content where rowid = [iRow ] */ -static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s); -} - -/* select rowid, doclist from %_term - * where term = [pTerm] and segment = [iSegment] - * If found, returns SQLITE_ROW; the caller must free the - * returned doclist. If no rows found, returns SQLITE_DONE. */ -static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm, - int iSegment, - sqlite_int64 *rowid, DocList *out){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 2, iSegment); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_step_statement(v, TERM_SELECT_STMT, &s); - if( rc!=SQLITE_ROW ) return rc; - - *rowid = sqlite3_column_int64(s, 0); - docListInit(out, DL_DEFAULT, - sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1)); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - return rc==SQLITE_DONE ? SQLITE_ROW : rc; -} - -/* Load the segment doclists for term pTerm and merge them in -** appropriate order into out. Returns SQLITE_OK if successful. If -** there are no segments for pTerm, successfully returns an empty -** doclist in out. -** -** Each document consists of 1 or more "columns". The number of -** columns is v->nColumn. If iColumn==v->nColumn, then return -** position information about all columns. If iColumnnColumn, -** then only return position information about the iColumn-th column -** (where the first column is 0). -*/ -static int term_select_all( - fulltext_vtab *v, /* The fulltext index we are querying against */ - int iColumn, /* If nColumn ){ /* querying a single column */ - docListRestrictColumn(&old, iColumn); - } - - /* doclist contains the newer data, so write it over old. Then - ** steal accumulated result for doclist. - */ - docListAccumulate(&old, &doclist); - docListDestroy(&doclist); - doclist = old; - } - if( rc!=SQLITE_DONE ){ - docListDestroy(&doclist); - return rc; - } - - docListDiscardEmpty(&doclist); - *out = doclist; - return SQLITE_OK; -} - -/* insert into %_term (rowid, term, segment, doclist) - values ([piRowid], [pTerm], [iSegment], [doclist]) -** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid. -** -** NOTE(shess) piRowid is IN, with values of "space of int64" plus -** null, it is not used to pass data back to the caller. -*/ -static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid, - const char *pTerm, int nTerm, - int iSegment, DocList *doclist){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - if( piRowid==NULL ){ - rc = sqlite3_bind_null(s, 1); - }else{ - rc = sqlite3_bind_int64(s, 1, *piRowid); - } - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 3, iSegment); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, TERM_INSERT_STMT, &s); -} - -/* update %_term set doclist = [doclist] where rowid = [rowid] */ -static int term_update(fulltext_vtab *v, sqlite_int64 rowid, - DocList *doclist){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, rowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, TERM_UPDATE_STMT, &s); -} - -static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, rowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, TERM_DELETE_STMT, &s); -} - -/* -** Free the memory used to contain a fulltext_vtab structure. -*/ -static void fulltext_vtab_destroy(fulltext_vtab *v){ - int iStmt, i; - - TRACE(("FTS1 Destroy %p\n", v)); - for( iStmt=0; iStmtpFulltextStatements[iStmt]!=NULL ){ - sqlite3_finalize(v->pFulltextStatements[iStmt]); - v->pFulltextStatements[iStmt] = NULL; - } - } - - if( v->pTokenizer!=NULL ){ - v->pTokenizer->pModule->xDestroy(v->pTokenizer); - v->pTokenizer = NULL; - } - - free(v->azColumn); - for(i = 0; i < v->nColumn; ++i) { - sqlite3_free(v->azContentColumn[i]); - } - free(v->azContentColumn); - free(v); -} - -/* -** Token types for parsing the arguments to xConnect or xCreate. -*/ -#define TOKEN_EOF 0 /* End of file */ -#define TOKEN_SPACE 1 /* Any kind of whitespace */ -#define TOKEN_ID 2 /* An identifier */ -#define TOKEN_STRING 3 /* A string literal */ -#define TOKEN_PUNCT 4 /* A single punctuation character */ - -/* -** If X is a character that can be used in an identifier then -** IdChar(X) will be true. Otherwise it is false. -** -** For ASCII, any character with the high-order bit set is -** allowed in an identifier. For 7-bit characters, -** sqlite3IsIdChar[X] must be 1. -** -** Ticket #1066. the SQL standard does not allow '$' in the -** middle of identfiers. But many SQL implementations do. -** SQLite will allow '$' in identifiers for compatibility. -** But the feature is undocumented. -*/ -static const char isIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20])) - - -/* -** Return the length of the token that begins at z[0]. -** Store the token type in *tokenType before returning. -*/ -static int getToken(const char *z, int *tokenType){ - int i, c; - switch( *z ){ - case 0: { - *tokenType = TOKEN_EOF; - return 0; - } - case ' ': case '\t': case '\n': case '\f': case '\r': { - for(i=1; safe_isspace(z[i]); i++){} - *tokenType = TOKEN_SPACE; - return i; - } - case '`': - case '\'': - case '"': { - int delim = z[0]; - for(i=1; (c=z[i])!=0; i++){ - if( c==delim ){ - if( z[i+1]==delim ){ - i++; - }else{ - break; - } - } - } - *tokenType = TOKEN_STRING; - return i + (c!=0); - } - case '[': { - for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} - *tokenType = TOKEN_ID; - return i; - } - default: { - if( !IdChar(*z) ){ - break; - } - for(i=1; IdChar(z[i]); i++){} - *tokenType = TOKEN_ID; - return i; - } - } - *tokenType = TOKEN_PUNCT; - return 1; -} - -/* -** A token extracted from a string is an instance of the following -** structure. -*/ -typedef struct Token { - const char *z; /* Pointer to token text. Not '\000' terminated */ - short int n; /* Length of the token text in bytes. */ -} Token; - -/* -** Given a input string (which is really one of the argv[] parameters -** passed into xConnect or xCreate) split the string up into tokens. -** Return an array of pointers to '\000' terminated strings, one string -** for each non-whitespace token. -** -** The returned array is terminated by a single NULL pointer. -** -** Space to hold the returned array is obtained from a single -** malloc and should be freed by passing the return value to free(). -** The individual strings within the token list are all a part of -** the single memory allocation and will all be freed at once. -*/ -static char **tokenizeString(const char *z, int *pnToken){ - int nToken = 0; - Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) ); - int n = 1; - int e, i; - int totalSize = 0; - char **azToken; - char *zCopy; - while( n>0 ){ - n = getToken(z, &e); - if( e!=TOKEN_SPACE ){ - aToken[nToken].z = z; - aToken[nToken].n = n; - nToken++; - totalSize += n+1; - } - z += n; - } - azToken = (char**)malloc( nToken*sizeof(char*) + totalSize ); - zCopy = (char*)&azToken[nToken]; - nToken--; - for(i=0; i=0 ){ - azIn[j] = azIn[i]; - } - j++; - } - } - azIn[j] = 0; - } -} - - -/* -** Find the first alphanumeric token in the string zIn. Null-terminate -** this token. Remove any quotation marks. And return a pointer to -** the result. -*/ -static char *firstToken(char *zIn, char **pzTail){ - int n, ttype; - while(1){ - n = getToken(zIn, &ttype); - if( ttype==TOKEN_SPACE ){ - zIn += n; - }else if( ttype==TOKEN_EOF ){ - *pzTail = zIn; - return 0; - }else{ - zIn[n] = 0; - *pzTail = &zIn[1]; - dequoteString(zIn); - return zIn; - } - } - /*NOTREACHED*/ -} - -/* Return true if... -** -** * s begins with the string t, ignoring case -** * s is longer than t -** * The first character of s beyond t is not a alphanumeric -** -** Ignore leading space in *s. -** -** To put it another way, return true if the first token of -** s[] is t[]. -*/ -static int startsWith(const char *s, const char *t){ - while( safe_isspace(*s) ){ s++; } - while( *t ){ - if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0; - } - return *s!='_' && !safe_isalnum(*s); -} - -/* -** An instance of this structure defines the "spec" of a -** full text index. This structure is populated by parseSpec -** and use by fulltextConnect and fulltextCreate. -*/ -typedef struct TableSpec { - const char *zDb; /* Logical database name */ - const char *zName; /* Name of the full-text index */ - int nColumn; /* Number of columns to be indexed */ - char **azColumn; /* Original names of columns to be indexed */ - char **azContentColumn; /* Column names for %_content */ - char **azTokenizer; /* Name of tokenizer and its arguments */ -} TableSpec; - -/* -** Reclaim all of the memory used by a TableSpec -*/ -static void clearTableSpec(TableSpec *p) { - free(p->azColumn); - free(p->azContentColumn); - free(p->azTokenizer); -} - -/* Parse a CREATE VIRTUAL TABLE statement, which looks like this: - * - * CREATE VIRTUAL TABLE email - * USING fts1(subject, body, tokenize mytokenizer(myarg)) - * - * We return parsed information in a TableSpec structure. - * - */ -static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, - char**pzErr){ - int i, n; - char *z, *zDummy; - char **azArg; - const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */ - - assert( argc>=3 ); - /* Current interface: - ** argv[0] - module name - ** argv[1] - database name - ** argv[2] - table name - ** argv[3..] - columns, optionally followed by tokenizer specification - ** and snippet delimiters specification. - */ - - /* Make a copy of the complete argv[][] array in a single allocation. - ** The argv[][] array is read-only and transient. We can write to the - ** copy in order to modify things and the copy is persistent. - */ - memset(pSpec, 0, sizeof(*pSpec)); - for(i=n=0; izDb = azArg[1]; - pSpec->zName = azArg[2]; - pSpec->nColumn = 0; - pSpec->azColumn = azArg; - zTokenizer = "tokenize simple"; - for(i=3; inColumn] = firstToken(azArg[i], &zDummy); - pSpec->nColumn++; - } - } - if( pSpec->nColumn==0 ){ - azArg[0] = "content"; - pSpec->nColumn = 1; - } - - /* - ** Construct the list of content column names. - ** - ** Each content column name will be of the form cNNAAAA - ** where NN is the column number and AAAA is the sanitized - ** column name. "sanitized" means that special characters are - ** converted to "_". The cNN prefix guarantees that all column - ** names are unique. - ** - ** The AAAA suffix is not strictly necessary. It is included - ** for the convenience of people who might examine the generated - ** %_content table and wonder what the columns are used for. - */ - pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) ); - if( pSpec->azContentColumn==0 ){ - clearTableSpec(pSpec); - return SQLITE_NOMEM; - } - for(i=0; inColumn; i++){ - char *p; - pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]); - for (p = pSpec->azContentColumn[i]; *p ; ++p) { - if( !safe_isalnum(*p) ) *p = '_'; - } - } - - /* - ** Parse the tokenizer specification string. - */ - pSpec->azTokenizer = tokenizeString(zTokenizer, &n); - tokenListToIdList(pSpec->azTokenizer); - - return SQLITE_OK; -} - -/* -** Generate a CREATE TABLE statement that describes the schema of -** the virtual table. Return a pointer to this schema string. -** -** Space is obtained from sqlite3_mprintf() and should be freed -** using sqlite3_free(). -*/ -static char *fulltextSchema( - int nColumn, /* Number of columns */ - const char *const* azColumn, /* List of columns */ - const char *zTableName /* Name of the table */ -){ - int i; - char *zSchema, *zNext; - const char *zSep = "("; - zSchema = sqlite3_mprintf("CREATE TABLE x"); - for(i=0; ibase */ - v->db = db; - v->zDb = spec->zDb; /* Freed when azColumn is freed */ - v->zName = spec->zName; /* Freed when azColumn is freed */ - v->nColumn = spec->nColumn; - v->azContentColumn = spec->azContentColumn; - spec->azContentColumn = 0; - v->azColumn = spec->azColumn; - spec->azColumn = 0; - - if( spec->azTokenizer==0 ){ - return SQLITE_NOMEM; - } - /* TODO(shess) For now, add new tokenizers as else if clauses. */ - if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){ - sqlite3Fts1SimpleTokenizerModule(&m); - }else if( startsWith(spec->azTokenizer[0], "porter") ){ - sqlite3Fts1PorterTokenizerModule(&m); - }else{ - *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); - rc = SQLITE_ERROR; - goto err; - } - for(n=0; spec->azTokenizer[n]; n++){} - if( n ){ - rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], - &v->pTokenizer); - }else{ - rc = m->xCreate(0, 0, &v->pTokenizer); - } - if( rc!=SQLITE_OK ) goto err; - v->pTokenizer->pModule = m; - - /* TODO: verify the existence of backing tables foo_content, foo_term */ - - schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, - spec->zName); - rc = sqlite3_declare_vtab(db, schema); - sqlite3_free(schema); - if( rc!=SQLITE_OK ) goto err; - - memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); - - *ppVTab = &v->base; - TRACE(("FTS1 Connect %p\n", v)); - - return rc; - -err: - fulltext_vtab_destroy(v); - return rc; -} - -static int fulltextConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVTab, - char **pzErr -){ - TableSpec spec; - int rc = parseSpec(&spec, argc, argv, pzErr); - if( rc!=SQLITE_OK ) return rc; - - rc = constructVtab(db, &spec, ppVTab, pzErr); - clearTableSpec(&spec); - return rc; -} - - /* The %_content table holds the text of each document, with - ** the rowid used as the docid. - ** - ** The %_term table maps each term to a document list blob - ** containing elements sorted by ascending docid, each element - ** encoded as: - ** - ** docid varint-encoded - ** token elements: - ** position+1 varint-encoded as delta from previous position - ** start offset varint-encoded as delta from previous start offset - ** end offset varint-encoded as delta from start offset - ** - ** The sentinel position of 0 indicates the end of the token list. - ** - ** Additionally, doclist blobs are chunked into multiple segments, - ** using segment to order the segments. New elements are added to - ** the segment at segment 0, until it exceeds CHUNK_MAX. Then - ** segment 0 is deleted, and the doclist is inserted at segment 1. - ** If there is already a doclist at segment 1, the segment 0 doclist - ** is merged with it, the segment 1 doclist is deleted, and the - ** merged doclist is inserted at segment 2, repeating those - ** operations until an insert succeeds. - ** - ** Since this structure doesn't allow us to update elements in place - ** in case of deletion or update, these are simply written to - ** segment 0 (with an empty token list in case of deletion), with - ** docListAccumulate() taking care to retain lower-segment - ** information in preference to higher-segment information. - */ - /* TODO(shess) Provide a VACUUM type operation which both removes - ** deleted elements which are no longer necessary, and duplicated - ** elements. I suspect this will probably not be necessary in - ** practice, though. - */ -static int fulltextCreate(sqlite3 *db, void *pAux, - int argc, const char * const *argv, - sqlite3_vtab **ppVTab, char **pzErr){ - int rc; - TableSpec spec; - StringBuffer schema; - TRACE(("FTS1 Create\n")); - - rc = parseSpec(&spec, argc, argv, pzErr); - if( rc!=SQLITE_OK ) return rc; - - initStringBuffer(&schema); - append(&schema, "CREATE TABLE %_content("); - appendList(&schema, spec.nColumn, spec.azContentColumn); - append(&schema, ")"); - rc = sql_exec(db, spec.zDb, spec.zName, schema.s); - free(schema.s); - if( rc!=SQLITE_OK ) goto out; - - rc = sql_exec(db, spec.zDb, spec.zName, - "create table %_term(term text, segment integer, doclist blob, " - "primary key(term, segment));"); - if( rc!=SQLITE_OK ) goto out; - - rc = constructVtab(db, &spec, ppVTab, pzErr); - -out: - clearTableSpec(&spec); - return rc; -} - -/* Decide how to handle an SQL query. */ -static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ - int i; - TRACE(("FTS1 BestIndex\n")); - - for(i=0; inConstraint; ++i){ - const struct sqlite3_index_constraint *pConstraint; - pConstraint = &pInfo->aConstraint[i]; - if( pConstraint->usable ) { - if( pConstraint->iColumn==-1 && - pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ - pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */ - TRACE(("FTS1 QUERY_ROWID\n")); - } else if( pConstraint->iColumn>=0 && - pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ - /* full-text search */ - pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn; - TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn)); - } else continue; - - pInfo->aConstraintUsage[i].argvIndex = 1; - pInfo->aConstraintUsage[i].omit = 1; - - /* An arbitrary value for now. - * TODO: Perhaps rowid matches should be considered cheaper than - * full-text searches. */ - pInfo->estimatedCost = 1.0; - - return SQLITE_OK; - } - } - pInfo->idxNum = QUERY_GENERIC; - return SQLITE_OK; -} - -static int fulltextDisconnect(sqlite3_vtab *pVTab){ - TRACE(("FTS1 Disconnect %p\n", pVTab)); - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextDestroy(sqlite3_vtab *pVTab){ - fulltext_vtab *v = (fulltext_vtab *)pVTab; - int rc; - - TRACE(("FTS1 Destroy %p\n", pVTab)); - rc = sql_exec(v->db, v->zDb, v->zName, - "drop table if exists %_content;" - "drop table if exists %_term;" - ); - if( rc!=SQLITE_OK ) return rc; - - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ - fulltext_cursor *c; - - c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1); - /* sqlite will initialize c->base */ - *ppCursor = &c->base; - TRACE(("FTS1 Open %p: %p\n", pVTab, c)); - - return SQLITE_OK; -} - - -/* Free all of the dynamically allocated memory held by *q -*/ -static void queryClear(Query *q){ - int i; - for(i = 0; i < q->nTerms; ++i){ - free(q->pTerms[i].pTerm); - } - free(q->pTerms); - memset(q, 0, sizeof(*q)); -} - -/* Free all of the dynamically allocated memory held by the -** Snippet -*/ -static void snippetClear(Snippet *p){ - free(p->aMatch); - free(p->zOffset); - free(p->zSnippet); - memset(p, 0, sizeof(*p)); -} -/* -** Append a single entry to the p->aMatch[] log. -*/ -static void snippetAppendMatch( - Snippet *p, /* Append the entry to this snippet */ - int iCol, int iTerm, /* The column and query term */ - int iStart, int nByte /* Offset and size of the match */ -){ - int i; - struct snippetMatch *pMatch; - if( p->nMatch+1>=p->nAlloc ){ - p->nAlloc = p->nAlloc*2 + 10; - p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); - if( p->aMatch==0 ){ - p->nMatch = 0; - p->nAlloc = 0; - return; - } - } - i = p->nMatch++; - pMatch = &p->aMatch[i]; - pMatch->iCol = iCol; - pMatch->iTerm = iTerm; - pMatch->iStart = iStart; - pMatch->nByte = nByte; -} - -/* -** Sizing information for the circular buffer used in snippetOffsetsOfColumn() -*/ -#define FTS1_ROTOR_SZ (32) -#define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1) - -/* -** Add entries to pSnippet->aMatch[] for every match that occurs against -** document zDoc[0..nDoc-1] which is stored in column iColumn. -*/ -static void snippetOffsetsOfColumn( - Query *pQuery, - Snippet *pSnippet, - int iColumn, - const char *zDoc, - int nDoc -){ - const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */ - sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */ - sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */ - fulltext_vtab *pVtab; /* The full text index */ - int nColumn; /* Number of columns in the index */ - const QueryTerm *aTerm; /* Query string terms */ - int nTerm; /* Number of query string terms */ - int i, j; /* Loop counters */ - int rc; /* Return code */ - unsigned int match, prevMatch; /* Phrase search bitmasks */ - const char *zToken; /* Next token from the tokenizer */ - int nToken; /* Size of zToken */ - int iBegin, iEnd, iPos; /* Offsets of beginning and end */ - - /* The following variables keep a circular buffer of the last - ** few tokens */ - unsigned int iRotor = 0; /* Index of current token */ - int iRotorBegin[FTS1_ROTOR_SZ]; /* Beginning offset of token */ - int iRotorLen[FTS1_ROTOR_SZ]; /* Length of token */ - - pVtab = pQuery->pFts; - nColumn = pVtab->nColumn; - pTokenizer = pVtab->pTokenizer; - pTModule = pTokenizer->pModule; - rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); - if( rc ) return; - pTCursor->pTokenizer = pTokenizer; - aTerm = pQuery->pTerms; - nTerm = pQuery->nTerms; - if( nTerm>=FTS1_ROTOR_SZ ){ - nTerm = FTS1_ROTOR_SZ - 1; - } - prevMatch = 0; - while(1){ - rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); - if( rc ) break; - iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin; - iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin; - match = 0; - for(i=0; i=0 && iCol1 && (prevMatch & (1<=0; j--){ - int k = (iRotor-j) & FTS1_ROTOR_MASK; - snippetAppendMatch(pSnippet, iColumn, i-j, - iRotorBegin[k], iRotorLen[k]); - } - } - } - prevMatch = match<<1; - iRotor++; - } - pTModule->xClose(pTCursor); -} - - -/* -** Compute all offsets for the current row of the query. -** If the offsets have already been computed, this routine is a no-op. -*/ -static void snippetAllOffsets(fulltext_cursor *p){ - int nColumn; - int iColumn, i; - int iFirst, iLast; - fulltext_vtab *pFts; - - if( p->snippet.nMatch ) return; - if( p->q.nTerms==0 ) return; - pFts = p->q.pFts; - nColumn = pFts->nColumn; - iColumn = p->iCursorType - QUERY_FULLTEXT; - if( iColumn<0 || iColumn>=nColumn ){ - iFirst = 0; - iLast = nColumn-1; - }else{ - iFirst = iColumn; - iLast = iColumn; - } - for(i=iFirst; i<=iLast; i++){ - const char *zDoc; - int nDoc; - zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1); - nDoc = sqlite3_column_bytes(p->pStmt, i+1); - snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc); - } -} - -/* -** Convert the information in the aMatch[] array of the snippet -** into the string zOffset[0..nOffset-1]. -*/ -static void snippetOffsetText(Snippet *p){ - int i; - int cnt = 0; - StringBuffer sb; - char zBuf[200]; - if( p->zOffset ) return; - initStringBuffer(&sb); - for(i=0; inMatch; i++){ - struct snippetMatch *pMatch = &p->aMatch[i]; - zBuf[0] = ' '; - sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d", - pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte); - append(&sb, zBuf); - cnt++; - } - p->zOffset = sb.s; - p->nOffset = sb.len; -} - -/* -** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set -** of matching words some of which might be in zDoc. zDoc is column -** number iCol. -** -** iBreak is suggested spot in zDoc where we could begin or end an -** excerpt. Return a value similar to iBreak but possibly adjusted -** to be a little left or right so that the break point is better. -*/ -static int wordBoundary( - int iBreak, /* The suggested break point */ - const char *zDoc, /* Document text */ - int nDoc, /* Number of bytes in zDoc[] */ - struct snippetMatch *aMatch, /* Matching words */ - int nMatch, /* Number of entries in aMatch[] */ - int iCol /* The column number for zDoc[] */ -){ - int i; - if( iBreak<=10 ){ - return 0; - } - if( iBreak>=nDoc-10 ){ - return nDoc; - } - for(i=0; i0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ - return aMatch[i-1].iStart; - } - } - for(i=1; i<=10; i++){ - if( safe_isspace(zDoc[iBreak-i]) ){ - return iBreak - i + 1; - } - if( safe_isspace(zDoc[iBreak+i]) ){ - return iBreak + i + 1; - } - } - return iBreak; -} - -/* -** If the StringBuffer does not end in white space, add a single -** space character to the end. -*/ -static void appendWhiteSpace(StringBuffer *p){ - if( p->len==0 ) return; - if( safe_isspace(p->s[p->len-1]) ) return; - append(p, " "); -} - -/* -** Remove white space from teh end of the StringBuffer -*/ -static void trimWhiteSpace(StringBuffer *p){ - while( p->len>0 && safe_isspace(p->s[p->len-1]) ){ - p->len--; - } -} - - - -/* -** Allowed values for Snippet.aMatch[].snStatus -*/ -#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */ -#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */ - -/* -** Generate the text of a snippet. -*/ -static void snippetText( - fulltext_cursor *pCursor, /* The cursor we need the snippet for */ - const char *zStartMark, /* Markup to appear before each match */ - const char *zEndMark, /* Markup to appear after each match */ - const char *zEllipsis /* Ellipsis mark */ -){ - int i, j; - struct snippetMatch *aMatch; - int nMatch; - int nDesired; - StringBuffer sb; - int tailCol; - int tailOffset; - int iCol; - int nDoc; - const char *zDoc; - int iStart, iEnd; - int tailEllipsis = 0; - int iMatch; - - - free(pCursor->snippet.zSnippet); - pCursor->snippet.zSnippet = 0; - aMatch = pCursor->snippet.aMatch; - nMatch = pCursor->snippet.nMatch; - initStringBuffer(&sb); - - for(i=0; iq.nTerms; i++){ - for(j=0; j0; i++){ - if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue; - nDesired--; - iCol = aMatch[i].iCol; - zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1); - nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1); - iStart = aMatch[i].iStart - 40; - iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol); - if( iStart<=10 ){ - iStart = 0; - } - if( iCol==tailCol && iStart<=tailOffset+20 ){ - iStart = tailOffset; - } - if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ - trimWhiteSpace(&sb); - appendWhiteSpace(&sb); - append(&sb, zEllipsis); - appendWhiteSpace(&sb); - } - iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; - iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); - if( iEnd>=nDoc-10 ){ - iEnd = nDoc; - tailEllipsis = 0; - }else{ - tailEllipsis = 1; - } - while( iMatchsnippet.zSnippet = sb.s; - pCursor->snippet.nSnippet = sb.len; -} - - -/* -** Close the cursor. For additional information see the documentation -** on the xClose method of the virtual table interface. -*/ -static int fulltextClose(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - TRACE(("FTS1 Close %p\n", c)); - sqlite3_finalize(c->pStmt); - queryClear(&c->q); - snippetClear(&c->snippet); - if( c->result.pDoclist!=NULL ){ - docListDelete(c->result.pDoclist); - } - free(c); - return SQLITE_OK; -} - -static int fulltextNext(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - sqlite_int64 iDocid; - int rc; - - TRACE(("FTS1 Next %p\n", pCursor)); - snippetClear(&c->snippet); - if( c->iCursorType < QUERY_FULLTEXT ){ - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - switch( rc ){ - case SQLITE_ROW: - c->eof = 0; - return SQLITE_OK; - case SQLITE_DONE: - c->eof = 1; - return SQLITE_OK; - default: - c->eof = 1; - return rc; - } - } else { /* full-text query */ - rc = sqlite3_reset(c->pStmt); - if( rc!=SQLITE_OK ) return rc; - - iDocid = nextDocid(&c->result); - if( iDocid==0 ){ - c->eof = 1; - return SQLITE_OK; - } - rc = sqlite3_bind_int64(c->pStmt, 1, iDocid); - if( rc!=SQLITE_OK ) return rc; - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - if( rc==SQLITE_ROW ){ /* the case we expect */ - c->eof = 0; - return SQLITE_OK; - } - /* an error occurred; abort */ - return rc==SQLITE_DONE ? SQLITE_ERROR : rc; - } -} - - -/* Return a DocList corresponding to the query term *pTerm. If *pTerm -** is the first term of a phrase query, go ahead and evaluate the phrase -** query and return the doclist for the entire phrase query. -** -** The result is stored in pTerm->doclist. -*/ -static int docListOfTerm( - fulltext_vtab *v, /* The full text index */ - int iColumn, /* column to restrict to. No restrition if >=nColumn */ - QueryTerm *pQTerm, /* Term we are looking for, or 1st term of a phrase */ - DocList **ppResult /* Write the result here */ -){ - DocList *pLeft, *pRight, *pNew; - int i, rc; - - pLeft = docListNew(DL_POSITIONS); - rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft); - if( rc ){ - docListDelete(pLeft); - return rc; - } - for(i=1; i<=pQTerm->nPhrase; i++){ - pRight = docListNew(DL_POSITIONS); - rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight); - if( rc ){ - docListDelete(pLeft); - return rc; - } - pNew = docListNew(inPhrase ? DL_POSITIONS : DL_DOCIDS); - docListPhraseMerge(pLeft, pRight, pNew); - docListDelete(pLeft); - docListDelete(pRight); - pLeft = pNew; - } - *ppResult = pLeft; - return SQLITE_OK; -} - -/* Add a new term pTerm[0..nTerm-1] to the query *q. -*/ -static void queryAdd(Query *q, const char *pTerm, int nTerm){ - QueryTerm *t; - ++q->nTerms; - q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); - if( q->pTerms==0 ){ - q->nTerms = 0; - return; - } - t = &q->pTerms[q->nTerms - 1]; - memset(t, 0, sizeof(*t)); - t->pTerm = malloc(nTerm+1); - memcpy(t->pTerm, pTerm, nTerm); - t->pTerm[nTerm] = 0; - t->nTerm = nTerm; - t->isOr = q->nextIsOr; - q->nextIsOr = 0; - t->iColumn = q->nextColumn; - q->nextColumn = q->dfltColumn; -} - -/* -** Check to see if the string zToken[0...nToken-1] matches any -** column name in the virtual table. If it does, -** return the zero-indexed column number. If not, return -1. -*/ -static int checkColumnSpecifier( - fulltext_vtab *pVtab, /* The virtual table */ - const char *zToken, /* Text of the token */ - int nToken /* Number of characters in the token */ -){ - int i; - for(i=0; inColumn; i++){ - if( memcmp(pVtab->azColumn[i], zToken, nToken)==0 - && pVtab->azColumn[i][nToken]==0 ){ - return i; - } - } - return -1; -} - -/* -** Parse the text at pSegment[0..nSegment-1]. Add additional terms -** to the query being assemblied in pQuery. -** -** inPhrase is true if pSegment[0..nSegement-1] is contained within -** double-quotes. If inPhrase is true, then the first term -** is marked with the number of terms in the phrase less one and -** OR and "-" syntax is ignored. If inPhrase is false, then every -** term found is marked with nPhrase=0 and OR and "-" syntax is significant. -*/ -static int tokenizeSegment( - sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ - const char *pSegment, int nSegment, /* Query expression being parsed */ - int inPhrase, /* True if within "..." */ - Query *pQuery /* Append results here */ -){ - const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCursor; - int firstIndex = pQuery->nTerms; - int iCol; - int nTerm = 1; - - int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); - if( rc!=SQLITE_OK ) return rc; - pCursor->pTokenizer = pTokenizer; - - while( 1 ){ - const char *pToken; - int nToken, iBegin, iEnd, iPos; - - rc = pModule->xNext(pCursor, - &pToken, &nToken, - &iBegin, &iEnd, &iPos); - if( rc!=SQLITE_OK ) break; - if( !inPhrase && - pSegment[iEnd]==':' && - (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ - pQuery->nextColumn = iCol; - continue; - } - if( !inPhrase && pQuery->nTerms>0 && nToken==2 - && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ - pQuery->nextIsOr = 1; - continue; - } - queryAdd(pQuery, pToken, nToken); - if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ - pQuery->pTerms[pQuery->nTerms-1].isNot = 1; - } - pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; - if( inPhrase ){ - nTerm++; - } - } - - if( inPhrase && pQuery->nTerms>firstIndex ){ - pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1; - } - - return pModule->xClose(pCursor); -} - -/* Parse a query string, yielding a Query object pQuery. -** -** The calling function will need to queryClear() to clean up -** the dynamically allocated memory held by pQuery. -*/ -static int parseQuery( - fulltext_vtab *v, /* The fulltext index */ - const char *zInput, /* Input text of the query string */ - int nInput, /* Size of the input text */ - int dfltColumn, /* Default column of the index to match against */ - Query *pQuery /* Write the parse results here. */ -){ - int iInput, inPhrase = 0; - - if( zInput==0 ) nInput = 0; - if( nInput<0 ) nInput = strlen(zInput); - pQuery->nTerms = 0; - pQuery->pTerms = NULL; - pQuery->nextIsOr = 0; - pQuery->nextColumn = dfltColumn; - pQuery->dfltColumn = dfltColumn; - pQuery->pFts = v; - - for(iInput=0; iInputiInput ){ - tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase, - pQuery); - } - iInput = i; - if( i=nColumn -** they are allowed to match against any column. -*/ -static int fulltextQuery( - fulltext_vtab *v, /* The full text index */ - int iColumn, /* Match against this column by default */ - const char *zInput, /* The query string */ - int nInput, /* Number of bytes in zInput[] */ - DocList **pResult, /* Write the result doclist here */ - Query *pQuery /* Put parsed query string here */ -){ - int i, iNext, rc; - DocList *pLeft = NULL; - DocList *pRight, *pNew, *pOr; - int nNot = 0; - QueryTerm *aTerm; - - rc = parseQuery(v, zInput, nInput, iColumn, pQuery); - if( rc!=SQLITE_OK ) return rc; - - /* Merge AND terms. */ - aTerm = pQuery->pTerms; - for(i = 0; inTerms; i=iNext){ - if( aTerm[i].isNot ){ - /* Handle all NOT terms in a separate pass */ - nNot++; - iNext = i + aTerm[i].nPhrase+1; - continue; - } - iNext = i + aTerm[i].nPhrase + 1; - rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight); - if( rc ){ - queryClear(pQuery); - return rc; - } - while( iNextnTerms && aTerm[iNext].isOr ){ - rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr); - iNext += aTerm[iNext].nPhrase + 1; - if( rc ){ - queryClear(pQuery); - return rc; - } - pNew = docListNew(DL_DOCIDS); - docListOrMerge(pRight, pOr, pNew); - docListDelete(pRight); - docListDelete(pOr); - pRight = pNew; - } - if( pLeft==0 ){ - pLeft = pRight; - }else{ - pNew = docListNew(DL_DOCIDS); - docListAndMerge(pLeft, pRight, pNew); - docListDelete(pRight); - docListDelete(pLeft); - pLeft = pNew; - } - } - - if( nNot && pLeft==0 ){ - /* We do not yet know how to handle a query of only NOT terms */ - return SQLITE_ERROR; - } - - /* Do the EXCEPT terms */ - for(i=0; inTerms; i += aTerm[i].nPhrase + 1){ - if( !aTerm[i].isNot ) continue; - rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight); - if( rc ){ - queryClear(pQuery); - docListDelete(pLeft); - return rc; - } - pNew = docListNew(DL_DOCIDS); - docListExceptMerge(pLeft, pRight, pNew); - docListDelete(pRight); - docListDelete(pLeft); - pLeft = pNew; - } - - *pResult = pLeft; - return rc; -} - -/* -** This is the xFilter interface for the virtual table. See -** the virtual table xFilter method documentation for additional -** information. -** -** If idxNum==QUERY_GENERIC then do a full table scan against -** the %_content table. -** -** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry -** in the %_content table. -** -** If idxNum>=QUERY_FULLTEXT then use the full text index. The -** column on the left-hand side of the MATCH operator is column -** number idxNum-QUERY_FULLTEXT, 0 indexed. argv[0] is the right-hand -** side of the MATCH operator. -*/ -/* TODO(shess) Upgrade the cursor initialization and destruction to -** account for fulltextFilter() being called multiple times on the -** same cursor. The current solution is very fragile. Apply fix to -** fts2 as appropriate. -*/ -static int fulltextFilter( - sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ - int idxNum, const char *idxStr, /* Which indexing scheme to use */ - int argc, sqlite3_value **argv /* Arguments for the indexing scheme */ -){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - fulltext_vtab *v = cursor_vtab(c); - int rc; - char *zSql; - - TRACE(("FTS1 Filter %p\n",pCursor)); - - zSql = sqlite3_mprintf("select rowid, * from %%_content %s", - idxNum==QUERY_GENERIC ? "" : "where rowid=?"); - sqlite3_finalize(c->pStmt); - rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql); - sqlite3_free(zSql); - if( rc!=SQLITE_OK ) return rc; - - c->iCursorType = idxNum; - switch( idxNum ){ - case QUERY_GENERIC: - break; - - case QUERY_ROWID: - rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0])); - if( rc!=SQLITE_OK ) return rc; - break; - - default: /* full-text search */ - { - const char *zQuery = (const char *)sqlite3_value_text(argv[0]); - DocList *pResult; - assert( idxNum<=QUERY_FULLTEXT+v->nColumn); - assert( argc==1 ); - queryClear(&c->q); - rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q); - if( rc!=SQLITE_OK ) return rc; - if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist); - readerInit(&c->result, pResult); - break; - } - } - - return fulltextNext(pCursor); -} - -/* This is the xEof method of the virtual table. The SQLite core -** calls this routine to find out if it has reached the end of -** a query's results set. -*/ -static int fulltextEof(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - return c->eof; -} - -/* This is the xColumn method of the virtual table. The SQLite -** core calls this method during a query when it needs the value -** of a column from the virtual table. This method needs to use -** one of the sqlite3_result_*() routines to store the requested -** value back in the pContext. -*/ -static int fulltextColumn(sqlite3_vtab_cursor *pCursor, - sqlite3_context *pContext, int idxCol){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - fulltext_vtab *v = cursor_vtab(c); - - if( idxColnColumn ){ - sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1); - sqlite3_result_value(pContext, pVal); - }else if( idxCol==v->nColumn ){ - /* The extra column whose name is the same as the table. - ** Return a blob which is a pointer to the cursor - */ - sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT); - } - return SQLITE_OK; -} - -/* This is the xRowid method. The SQLite core calls this routine to -** retrive the rowid for the current row of the result set. The -** rowid should be written to *pRowid. -*/ -static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - - *pRowid = sqlite3_column_int64(c->pStmt, 0); - return SQLITE_OK; -} - -/* Add all terms in [zText] to the given hash table. If [iColumn] > 0, - * we also store positions and offsets in the hash table using the given - * column number. */ -static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid, - const char *zText, int iColumn){ - sqlite3_tokenizer *pTokenizer = v->pTokenizer; - sqlite3_tokenizer_cursor *pCursor; - const char *pToken; - int nTokenBytes; - int iStartOffset, iEndOffset, iPosition; - int rc; - - rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); - if( rc!=SQLITE_OK ) return rc; - - pCursor->pTokenizer = pTokenizer; - while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, - &pToken, &nTokenBytes, - &iStartOffset, &iEndOffset, - &iPosition) ){ - DocList *p; - - /* Positions can't be negative; we use -1 as a terminator internally. */ - if( iPosition<0 ){ - pTokenizer->pModule->xClose(pCursor); - return SQLITE_ERROR; - } - - p = fts1HashFind(terms, pToken, nTokenBytes); - if( p==NULL ){ - p = docListNew(DL_DEFAULT); - docListAddDocid(p, iDocid); - fts1HashInsert(terms, pToken, nTokenBytes, p); - } - if( iColumn>=0 ){ - docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset); - } - } - - /* TODO(shess) Check return? Should this be able to cause errors at - ** this point? Actually, same question about sqlite3_finalize(), - ** though one could argue that failure there means that the data is - ** not durable. *ponder* - */ - pTokenizer->pModule->xClose(pCursor); - return rc; -} - -/* Update the %_terms table to map the term [pTerm] to the given rowid. */ -static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm, - DocList *d){ - sqlite_int64 iIndexRow; - DocList doclist; - int iSegment = 0, rc; - - rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist); - if( rc==SQLITE_DONE ){ - docListInit(&doclist, DL_DEFAULT, 0, 0); - docListUpdate(&doclist, d); - /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */ - rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist); - goto err; - } - if( rc!=SQLITE_ROW ) return SQLITE_ERROR; - - docListUpdate(&doclist, d); - if( doclist.nData<=CHUNK_MAX ){ - rc = term_update(v, iIndexRow, &doclist); - goto err; - } - - /* Doclist doesn't fit, delete what's there, and accumulate - ** forward. - */ - rc = term_delete(v, iIndexRow); - if( rc!=SQLITE_OK ) goto err; - - /* Try to insert the doclist into a higher segment bucket. On - ** failure, accumulate existing doclist with the doclist from that - ** bucket, and put results in the next bucket. - */ - iSegment++; - while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment, - &doclist))!=SQLITE_OK ){ - sqlite_int64 iSegmentRow; - DocList old; - int rc2; - - /* Retain old error in case the term_insert() error was really an - ** error rather than a bounced insert. - */ - rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old); - if( rc2!=SQLITE_ROW ) goto err; - - rc = term_delete(v, iSegmentRow); - if( rc!=SQLITE_OK ) goto err; - - /* Reusing lowest-number deleted row keeps the index smaller. */ - if( iSegmentRownColumn ; ++i){ - char *zText = (char*)sqlite3_value_text(pValues[i]); - int rc = buildTerms(v, terms, iRowid, zText, i); - if( rc!=SQLITE_OK ) return rc; - } - return SQLITE_OK; -} - -/* Add empty doclists for all terms in the given row's content to the hash - * table [pTerms]. */ -static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){ - const char **pValues; - int i; - - int rc = content_select(v, iRowid, &pValues); - if( rc!=SQLITE_OK ) return rc; - - for(i = 0 ; i < v->nColumn; ++i) { - rc = buildTerms(v, pTerms, iRowid, pValues[i], -1); - if( rc!=SQLITE_OK ) break; - } - - freeStringArray(v->nColumn, pValues); - return SQLITE_OK; -} - -/* Insert a row into the %_content table; set *piRowid to be the ID of the - * new row. Fill [pTerms] with new doclists for the %_term table. */ -static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid, - sqlite3_value **pValues, - sqlite_int64 *piRowid, fts1Hash *pTerms){ - int rc; - - rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT */ - if( rc!=SQLITE_OK ) return rc; - *piRowid = sqlite3_last_insert_rowid(v->db); - return insertTerms(v, pTerms, *piRowid, pValues); -} - -/* Delete a row from the %_content table; fill [pTerms] with empty doclists - * to be written to the %_term table. */ -static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){ - int rc = deleteTerms(v, pTerms, iRow); - if( rc!=SQLITE_OK ) return rc; - return content_delete(v, iRow); /* execute an SQL DELETE */ -} - -/* Update a row in the %_content table; fill [pTerms] with new doclists for the - * %_term table. */ -static int index_update(fulltext_vtab *v, sqlite_int64 iRow, - sqlite3_value **pValues, fts1Hash *pTerms){ - /* Generate an empty doclist for each term that previously appeared in this - * row. */ - int rc = deleteTerms(v, pTerms, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = content_update(v, pValues, iRow); /* execute an SQL UPDATE */ - if( rc!=SQLITE_OK ) return rc; - - /* Now add positions for terms which appear in the updated row. */ - return insertTerms(v, pTerms, iRow, pValues); -} - -/* This function implements the xUpdate callback; it is the top-level entry - * point for inserting, deleting or updating a row in a full-text table. */ -static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, - sqlite_int64 *pRowid){ - fulltext_vtab *v = (fulltext_vtab *) pVtab; - fts1Hash terms; /* maps term string -> PosList */ - int rc; - fts1HashElem *e; - - TRACE(("FTS1 Update %p\n", pVtab)); - - fts1HashInit(&terms, FTS1_HASH_STRING, 1); - - if( nArg<2 ){ - rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms); - } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ - /* An update: - * ppArg[0] = old rowid - * ppArg[1] = new rowid - * ppArg[2..2+v->nColumn-1] = values - * ppArg[2+v->nColumn] = value for magic column (we ignore this) - */ - sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); - if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER || - sqlite3_value_int64(ppArg[1]) != rowid ){ - rc = SQLITE_ERROR; /* we don't allow changing the rowid */ - } else { - assert( nArg==2+v->nColumn+1); - rc = index_update(v, rowid, &ppArg[2], &terms); - } - } else { - /* An insert: - * ppArg[1] = requested rowid - * ppArg[2..2+v->nColumn-1] = values - * ppArg[2+v->nColumn] = value for magic column (we ignore this) - */ - assert( nArg==2+v->nColumn+1); - rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms); - } - - if( rc==SQLITE_OK ){ - /* Write updated doclists to disk. */ - for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){ - DocList *p = fts1HashData(e); - rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p); - if( rc!=SQLITE_OK ) break; - } - } - - /* clean up */ - for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){ - DocList *p = fts1HashData(e); - docListDelete(p); - } - fts1HashClear(&terms); - - return rc; -} - -/* -** Implementation of the snippet() function for FTS1 -*/ -static void snippetFunc( - sqlite3_context *pContext, - int argc, - sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc<1 ) return; - if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1); - }else{ - const char *zStart = ""; - const char *zEnd = ""; - const char *zEllipsis = "..."; - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - if( argc>=2 ){ - zStart = (const char*)sqlite3_value_text(argv[1]); - if( argc>=3 ){ - zEnd = (const char*)sqlite3_value_text(argv[2]); - if( argc>=4 ){ - zEllipsis = (const char*)sqlite3_value_text(argv[3]); - } - } - } - snippetAllOffsets(pCursor); - snippetText(pCursor, zStart, zEnd, zEllipsis); - sqlite3_result_text(pContext, pCursor->snippet.zSnippet, - pCursor->snippet.nSnippet, SQLITE_STATIC); - } -} - -/* -** Implementation of the offsets() function for FTS1 -*/ -static void snippetOffsetsFunc( - sqlite3_context *pContext, - int argc, - sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc<1 ) return; - if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to offsets",-1); - }else{ - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - snippetAllOffsets(pCursor); - snippetOffsetText(&pCursor->snippet); - sqlite3_result_text(pContext, - pCursor->snippet.zOffset, pCursor->snippet.nOffset, - SQLITE_STATIC); - } -} - -/* -** This routine implements the xFindFunction method for the FTS1 -** virtual table. -*/ -static int fulltextFindFunction( - sqlite3_vtab *pVtab, - int nArg, - const char *zName, - void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), - void **ppArg -){ - if( strcmp(zName,"snippet")==0 ){ - *pxFunc = snippetFunc; - return 1; - }else if( strcmp(zName,"offsets")==0 ){ - *pxFunc = snippetOffsetsFunc; - return 1; - } - return 0; -} - -/* -** Rename an fts1 table. -*/ -static int fulltextRename( - sqlite3_vtab *pVtab, - const char *zName -){ - fulltext_vtab *p = (fulltext_vtab *)pVtab; - int rc = SQLITE_NOMEM; - char *zSql = sqlite3_mprintf( - "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';" - "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';" - , p->zDb, p->zName, zName - , p->zDb, p->zName, zName - ); - if( zSql ){ - rc = sqlite3_exec(p->db, zSql, 0, 0, 0); - sqlite3_free(zSql); - } - return rc; -} - -static const sqlite3_module fulltextModule = { - /* iVersion */ 0, - /* xCreate */ fulltextCreate, - /* xConnect */ fulltextConnect, - /* xBestIndex */ fulltextBestIndex, - /* xDisconnect */ fulltextDisconnect, - /* xDestroy */ fulltextDestroy, - /* xOpen */ fulltextOpen, - /* xClose */ fulltextClose, - /* xFilter */ fulltextFilter, - /* xNext */ fulltextNext, - /* xEof */ fulltextEof, - /* xColumn */ fulltextColumn, - /* xRowid */ fulltextRowid, - /* xUpdate */ fulltextUpdate, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindFunction */ fulltextFindFunction, - /* xRename */ fulltextRename, -}; - -int sqlite3Fts1Init(sqlite3 *db){ - sqlite3_overload_function(db, "snippet", -1); - sqlite3_overload_function(db, "offsets", -1); - return sqlite3_create_module(db, "fts1", &fulltextModule, 0); -} - -#if !SQLITE_CORE -#ifdef _WIN32 -__declspec(dllexport) -#endif -int sqlite3_fts1_init(sqlite3 *db, char **pzErrMsg, - const sqlite3_api_routines *pApi){ - SQLITE_EXTENSION_INIT2(pApi) - return sqlite3Fts1Init(db); -} -#endif - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */ diff --git a/third_party/sqlite3/fts1.h b/third_party/sqlite3/fts1.h deleted file mode 100644 index d55e68973..000000000 --- a/third_party/sqlite3/fts1.h +++ /dev/null @@ -1,11 +0,0 @@ -#include "sqlite3.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -int sqlite3Fts1Init(sqlite3 *db); - -#ifdef __cplusplus -} /* extern "C" */ -#endif /* __cplusplus */ diff --git a/third_party/sqlite3/fts1_hash.c b/third_party/sqlite3/fts1_hash.c deleted file mode 100644 index 00e762602..000000000 --- a/third_party/sqlite3/fts1_hash.c +++ /dev/null @@ -1,368 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the implementation of generic hash-tables used in SQLite. -** We've modified it slightly to serve as a standalone hash table -** implementation for the full-text indexing module. -*/ - -/* clang-format off */ - -/* -** The code in this file is only compiled if: -** -** * The FTS1 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS1 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS1 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) - - -#include "third_party/sqlite3/fts1_hash.h" - -static void *malloc_and_zero(int n){ - void *p = malloc(n); - if( p ){ - memset(p, 0, n); - } - return p; -} - -/* Turn bulk memory into a hash table object by initializing the -** fields of the Hash structure. -** -** "pNew" is a pointer to the hash table that is to be initialized. -** keyClass is one of the constants -** FTS1_HASH_BINARY or FTS1_HASH_STRING. The value of keyClass -** determines what kind of key the hash table will use. "copyKey" is -** true if the hash table should make its own private copy of keys and -** false if it should just use the supplied pointer. -*/ -void sqlite3Fts1HashInit(fts1Hash *pNew, int keyClass, int copyKey){ - assert( pNew!=0 ); - assert( keyClass>=FTS1_HASH_STRING && keyClass<=FTS1_HASH_BINARY ); - pNew->keyClass = keyClass; - pNew->copyKey = copyKey; - pNew->first = 0; - pNew->count = 0; - pNew->htsize = 0; - pNew->ht = 0; - pNew->xMalloc = malloc_and_zero; - pNew->xFree = free; -} - -/* Remove all entries from a hash table. Reclaim all memory. -** Call this routine to delete a hash table or to reset a hash table -** to the empty state. -*/ -void sqlite3Fts1HashClear(fts1Hash *pH){ - fts1HashElem *elem; /* For looping over all elements of the table */ - - assert( pH!=0 ); - elem = pH->first; - pH->first = 0; - if( pH->ht ) pH->xFree(pH->ht); - pH->ht = 0; - pH->htsize = 0; - while( elem ){ - fts1HashElem *next_elem = elem->next; - if( pH->copyKey && elem->pKey ){ - pH->xFree(elem->pKey); - } - pH->xFree(elem); - elem = next_elem; - } - pH->count = 0; -} - -/* -** Hash and comparison functions when the mode is FTS1_HASH_STRING -*/ -static int strHash(const void *pKey, int nKey){ - const char *z = (const char *)pKey; - int h = 0; - if( nKey<=0 ) nKey = (int) strlen(z); - while( nKey > 0 ){ - h = (h<<3) ^ h ^ *z++; - nKey--; - } - return h & 0x7fffffff; -} -static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return strncmp((const char*)pKey1,(const char*)pKey2,n1); -} - -/* -** Hash and comparison functions when the mode is FTS1_HASH_BINARY -*/ -static int binHash(const void *pKey, int nKey){ - int h = 0; - const char *z = (const char *)pKey; - while( nKey-- > 0 ){ - h = (h<<3) ^ h ^ *(z++); - } - return h & 0x7fffffff; -} -static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return memcmp(pKey1,pKey2,n1); -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** The C syntax in this function definition may be unfamilar to some -** programmers, so we provide the following additional explanation: -** -** The name of the function is "hashFunction". The function takes a -** single parameter "keyClass". The return value of hashFunction() -** is a pointer to another function. Specifically, the return value -** of hashFunction() is a pointer to a function that takes two parameters -** with types "const void*" and "int" and returns an "int". -*/ -static int (*hashFunction(int keyClass))(const void*,int){ - if( keyClass==FTS1_HASH_STRING ){ - return &strHash; - }else{ - assert( keyClass==FTS1_HASH_BINARY ); - return &binHash; - } -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** For help in interpreted the obscure C code in the function definition, -** see the header comment on the previous function. -*/ -static int (*compareFunction(int keyClass))(const void*,int,const void*,int){ - if( keyClass==FTS1_HASH_STRING ){ - return &strCompare; - }else{ - assert( keyClass==FTS1_HASH_BINARY ); - return &binCompare; - } -} - -/* Link an element into the hash table -*/ -static void insertElement( - fts1Hash *pH, /* The complete hash table */ - struct _fts1ht *pEntry, /* The entry into which pNew is inserted */ - fts1HashElem *pNew /* The element to be inserted */ -){ - fts1HashElem *pHead; /* First element already in pEntry */ - pHead = pEntry->chain; - if( pHead ){ - pNew->next = pHead; - pNew->prev = pHead->prev; - if( pHead->prev ){ pHead->prev->next = pNew; } - else { pH->first = pNew; } - pHead->prev = pNew; - }else{ - pNew->next = pH->first; - if( pH->first ){ pH->first->prev = pNew; } - pNew->prev = 0; - pH->first = pNew; - } - pEntry->count++; - pEntry->chain = pNew; -} - - -/* Resize the hash table so that it cantains "new_size" buckets. -** "new_size" must be a power of 2. The hash table might fail -** to resize if sqliteMalloc() fails. -*/ -static void rehash(fts1Hash *pH, int new_size){ - struct _fts1ht *new_ht; /* The new hash table */ - fts1HashElem *elem, *next_elem; /* For looping over existing elements */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( (new_size & (new_size-1))==0 ); - new_ht = (struct _fts1ht *)pH->xMalloc( new_size*sizeof(struct _fts1ht) ); - if( new_ht==0 ) return; - if( pH->ht ) pH->xFree(pH->ht); - pH->ht = new_ht; - pH->htsize = new_size; - xHash = hashFunction(pH->keyClass); - for(elem=pH->first, pH->first=0; elem; elem = next_elem){ - int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); - next_elem = elem->next; - insertElement(pH, &new_ht[h], elem); - } -} - -/* This function (for internal use only) locates an element in an -** hash table that matches the given key. The hash for this key has -** already been computed and is passed as the 4th parameter. -*/ -static fts1HashElem *findElementGivenHash( - const fts1Hash *pH, /* The pH to be searched */ - const void *pKey, /* The key we are searching for */ - int nKey, - int h /* The hash for this key. */ -){ - fts1HashElem *elem; /* Used to loop thru the element list */ - int count; /* Number of elements left to test */ - int (*xCompare)(const void*,int,const void*,int); /* comparison function */ - - if( pH->ht ){ - struct _fts1ht *pEntry = &pH->ht[h]; - elem = pEntry->chain; - count = pEntry->count; - xCompare = compareFunction(pH->keyClass); - while( count-- && elem ){ - if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ - return elem; - } - elem = elem->next; - } - } - return 0; -} - -/* Remove a single entry from the hash table given a pointer to that -** element and a hash on the element's key. -*/ -static void removeElementGivenHash( - fts1Hash *pH, /* The pH containing "elem" */ - fts1HashElem* elem, /* The element to be removed from the pH */ - int h /* Hash value for the element */ -){ - struct _fts1ht *pEntry; - if( elem->prev ){ - elem->prev->next = elem->next; - }else{ - pH->first = elem->next; - } - if( elem->next ){ - elem->next->prev = elem->prev; - } - pEntry = &pH->ht[h]; - if( pEntry->chain==elem ){ - pEntry->chain = elem->next; - } - pEntry->count--; - if( pEntry->count<=0 ){ - pEntry->chain = 0; - } - if( pH->copyKey && elem->pKey ){ - pH->xFree(elem->pKey); - } - pH->xFree( elem ); - pH->count--; - if( pH->count<=0 ){ - assert( pH->first==0 ); - assert( pH->count==0 ); - fts1HashClear(pH); - } -} - -/* Attempt to locate an element of the hash table pH with a key -** that matches pKey,nKey. Return the data for this element if it is -** found, or NULL if there is no match. -*/ -void *sqlite3Fts1HashFind(const fts1Hash *pH, const void *pKey, int nKey){ - int h; /* A hash on key */ - fts1HashElem *elem; /* The element that matches key */ - int (*xHash)(const void*,int); /* The hash function */ - - if( pH==0 || pH->ht==0 ) return 0; - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - h = (*xHash)(pKey,nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1)); - return elem ? elem->data : 0; -} - -/* Insert an element into the hash table pH. The key is pKey,nKey -** and the data is "data". -** -** If no element exists with a matching key, then a new -** element is created. A copy of the key is made if the copyKey -** flag is set. NULL is returned. -** -** If another element already exists with the same key, then the -** new data replaces the old data and the old data is returned. -** The key is not copied in this instance. If a malloc fails, then -** the new data is returned and the hash table is unchanged. -** -** If the "data" parameter to this function is NULL, then the -** element corresponding to "key" is removed from the hash table. -*/ -void *sqlite3Fts1HashInsert( - fts1Hash *pH, /* The hash table to insert into */ - const void *pKey, /* The key */ - int nKey, /* Number of bytes in the key */ - void *data /* The data */ -){ - int hraw; /* Raw hash value of the key */ - int h; /* the hash of the key modulo hash table size */ - fts1HashElem *elem; /* Used to loop thru the element list */ - fts1HashElem *new_elem; /* New element added to the pH */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( pH!=0 ); - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - hraw = (*xHash)(pKey, nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - elem = findElementGivenHash(pH,pKey,nKey,h); - if( elem ){ - void *old_data = elem->data; - if( data==0 ){ - removeElementGivenHash(pH,elem,h); - }else{ - elem->data = data; - } - return old_data; - } - if( data==0 ) return 0; - new_elem = (fts1HashElem*)pH->xMalloc( sizeof(fts1HashElem) ); - if( new_elem==0 ) return data; - if( pH->copyKey && pKey!=0 ){ - new_elem->pKey = pH->xMalloc( nKey ); - if( new_elem->pKey==0 ){ - pH->xFree(new_elem); - return data; - } - memcpy((void*)new_elem->pKey, pKey, nKey); - }else{ - new_elem->pKey = (void*)pKey; - } - new_elem->nKey = nKey; - pH->count++; - if( pH->htsize==0 ){ - rehash(pH,8); - if( pH->htsize==0 ){ - pH->count = 0; - pH->xFree(new_elem); - return data; - } - } - if( pH->count > pH->htsize ){ - rehash(pH,pH->htsize*2); - } - assert( pH->htsize>0 ); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - insertElement(pH, &pH->ht[h], new_elem); - new_elem->data = data; - return 0; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */ diff --git a/third_party/sqlite3/fts1_hash.h b/third_party/sqlite3/fts1_hash.h deleted file mode 100644 index e06d7721b..000000000 --- a/third_party/sqlite3/fts1_hash.h +++ /dev/null @@ -1,114 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the header file for the generic hash-table implementation -** used in SQLite. We've modified it slightly to serve as a standalone -** hash table implementation for the full-text indexing module. -** -*/ -#ifndef _FTS1_HASH_H_ -#define _FTS1_HASH_H_ - -/* clang-format off */ - -/* Forward declarations of structures. */ -typedef struct fts1Hash fts1Hash; -typedef struct fts1HashElem fts1HashElem; - -/* A complete hash table is an instance of the following structure. -** The internals of this structure are intended to be opaque -- client -** code should not attempt to access or modify the fields of this structure -** directly. Change this structure only by using the routines below. -** However, many of the "procedures" and "functions" for modifying and -** accessing this structure are really macros, so we can't really make -** this structure opaque. -*/ -struct fts1Hash { - char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ - char copyKey; /* True if copy of key made on insert */ - int count; /* Number of entries in this table */ - fts1HashElem *first; /* The first element of the array */ - void *(*xMalloc)(int); /* malloc() function to use */ - void (*xFree)(void *); /* free() function to use */ - int htsize; /* Number of buckets in the hash table */ - struct _fts1ht { /* the hash table */ - int count; /* Number of entries with this hash */ - fts1HashElem *chain; /* Pointer to first entry with this hash */ - } *ht; -}; - -/* Each element in the hash table is an instance of the following -** structure. All elements are stored on a single doubly-linked list. -** -** Again, this structure is intended to be opaque, but it can't really -** be opaque because it is used by macros. -*/ -struct fts1HashElem { - fts1HashElem *next, *prev; /* Next and previous elements in the table */ - void *data; /* Data associated with this element */ - void *pKey; int nKey; /* Key associated with this element */ -}; - -/* -** There are 2 different modes of operation for a hash table: -** -** FTS1_HASH_STRING pKey points to a string that is nKey bytes long -** (including the null-terminator, if any). Case -** is respected in comparisons. -** -** FTS1_HASH_BINARY pKey points to binary data nKey bytes long. -** memcmp() is used to compare keys. -** -** A copy of the key is made if the copyKey parameter to fts1HashInit is 1. -*/ -#define FTS1_HASH_STRING 1 -#define FTS1_HASH_BINARY 2 - -/* -** Access routines. To delete, insert a NULL pointer. -*/ -void sqlite3Fts1HashInit(fts1Hash*, int keytype, int copyKey); -void *sqlite3Fts1HashInsert(fts1Hash*, const void *pKey, int nKey, void *pData); -void *sqlite3Fts1HashFind(const fts1Hash*, const void *pKey, int nKey); -void sqlite3Fts1HashClear(fts1Hash*); - -/* -** Shorthand for the functions above -*/ -#define fts1HashInit sqlite3Fts1HashInit -#define fts1HashInsert sqlite3Fts1HashInsert -#define fts1HashFind sqlite3Fts1HashFind -#define fts1HashClear sqlite3Fts1HashClear - -/* -** Macros for looping over all elements of a hash table. The idiom is -** like this: -** -** fts1Hash h; -** fts1HashElem *p; -** ... -** for(p=fts1HashFirst(&h); p; p=fts1HashNext(p)){ -** SomeStructure *pData = fts1HashData(p); -** // do something with pData -** } -*/ -#define fts1HashFirst(H) ((H)->first) -#define fts1HashNext(E) ((E)->next) -#define fts1HashData(E) ((E)->data) -#define fts1HashKey(E) ((E)->pKey) -#define fts1HashKeysize(E) ((E)->nKey) - -/* -** Number of entries in a hash table -*/ -#define fts1HashCount(H) ((H)->count) - -#endif /* _FTS1_HASH_H_ */ diff --git a/third_party/sqlite3/fts1_porter.c b/third_party/sqlite3/fts1_porter.c deleted file mode 100644 index 620f24d04..000000000 --- a/third_party/sqlite3/fts1_porter.c +++ /dev/null @@ -1,643 +0,0 @@ -/* -** 2006 September 30 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** Implementation of the full-text-search tokenizer that implements -** a Porter stemmer. -*/ -/* -** The code in this file is only compiled if: -** -** * The FTS1 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS1 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS1 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) - -/* clang-format off */ - -#include -#include -#include -#include -#include - -#include "fts1_tokenizer.h" - -/* -** Class derived from sqlite3_tokenizer -*/ -typedef struct porter_tokenizer { - sqlite3_tokenizer base; /* Base class */ -} porter_tokenizer; - -/* -** Class derived from sqlit3_tokenizer_cursor -*/ -typedef struct porter_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *zInput; /* input we are tokenizing */ - int nInput; /* size of the input */ - int iOffset; /* current position in zInput */ - int iToken; /* index of next token to be returned */ - char *zToken; /* storage for current token */ - int nAllocated; /* space allocated to zToken buffer */ -} porter_tokenizer_cursor; - - -/* Forward declaration */ -static const sqlite3_tokenizer_module porterTokenizerModule; - - -/* -** Create a new tokenizer instance. -*/ -static int porterCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer -){ - porter_tokenizer *t; - t = (porter_tokenizer *) calloc(sizeof(*t), 1); - if( t==NULL ) return SQLITE_NOMEM; - - *ppTokenizer = &t->base; - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int porterDestroy(sqlite3_tokenizer *pTokenizer){ - free(pTokenizer); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is zInput[0..nInput-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int porterOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *zInput, int nInput, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - porter_tokenizer_cursor *c; - - c = (porter_tokenizer_cursor *) malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; - - c->zInput = zInput; - if( zInput==0 ){ - c->nInput = 0; - }else if( nInput<0 ){ - c->nInput = (int)strlen(zInput); - }else{ - c->nInput = nInput; - } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->zToken = NULL; /* no space allocated, yet. */ - c->nAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** porterOpen() above. -*/ -static int porterClose(sqlite3_tokenizer_cursor *pCursor){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - free(c->zToken); - free(c); - return SQLITE_OK; -} -/* -** Vowel or consonant -*/ -static const char cType[] = { - 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 2, 1 -}; - -/* -** isConsonant() and isVowel() determine if their first character in -** the string they point to is a consonant or a vowel, according -** to Porter ruls. -** -** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. -** 'Y' is a consonant unless it follows another consonant, -** in which case it is a vowel. -** -** In these routine, the letters are in reverse order. So the 'y' rule -** is that 'y' is a consonant unless it is followed by another -** consonent. -*/ -static int isVowel(const char*); -static int isConsonant(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return j; - return z[1]==0 || isVowel(z + 1); -} -static int isVowel(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return 1-j; - return isConsonant(z + 1); -} - -/* -** Let any sequence of one or more vowels be represented by V and let -** C be sequence of one or more consonants. Then every word can be -** represented as: -** -** [C] (VC){m} [V] -** -** In prose: A word is an optional consonant followed by zero or -** vowel-consonant pairs followed by an optional vowel. "m" is the -** number of vowel consonant pairs. This routine computes the value -** of m for the first i bytes of a word. -** -** Return true if the m-value for z is 1 or more. In other words, -** return true if z contains at least one vowel that is followed -** by a consonant. -** -** In this routine z[] is in reverse order. So we are really looking -** for an instance of of a consonant followed by a vowel. -*/ -static int m_gt_0(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* Like mgt0 above except we are looking for a value of m which is -** exactly 1 -*/ -static int m_eq_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 1; - while( isConsonant(z) ){ z++; } - return *z==0; -} - -/* Like mgt0 above except we are looking for a value of m>1 instead -** or m>0 -*/ -static int m_gt_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* -** Return TRUE if there is a vowel anywhere within z[0..n-1] -*/ -static int hasVowel(const char *z){ - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* -** Return TRUE if the word ends in a double consonant. -** -** The text is reversed here. So we are really looking at -** the first two characters of z[]. -*/ -static int doubleConsonant(const char *z){ - return isConsonant(z) && z[0]==z[1] && isConsonant(z+1); -} - -/* -** Return TRUE if the word ends with three letters which -** are consonant-vowel-consonent and where the final consonant -** is not 'w', 'x', or 'y'. -** -** The word is reversed here. So we are really checking the -** first three letters and the first one cannot be in [wxy]. -*/ -static int star_oh(const char *z){ - return - z[0]!=0 && isConsonant(z) && - z[0]!='w' && z[0]!='x' && z[0]!='y' && - z[1]!=0 && isVowel(z+1) && - z[2]!=0 && isConsonant(z+2); -} - -/* -** If the word ends with zFrom and xCond() is true for the stem -** of the word that preceeds the zFrom ending, then change the -** ending to zTo. -** -** The input word *pz and zFrom are both in reverse order. zTo -** is in normal order. -** -** Return TRUE if zFrom matches. Return FALSE if zFrom does not -** match. Not that TRUE is returned even if xCond() fails and -** no substitution occurs. -*/ -static int stem( - char **pz, /* The word being stemmed (Reversed) */ - const char *zFrom, /* If the ending matches this... (Reversed) */ - const char *zTo, /* ... change the ending to this (not reversed) */ - int (*xCond)(const char*) /* Condition that must be true */ -){ - char *z = *pz; - while( *zFrom && *zFrom==*z ){ z++; zFrom++; } - if( *zFrom!=0 ) return 0; - if( xCond && !xCond(z) ) return 1; - while( *zTo ){ - *(--z) = *(zTo++); - } - *pz = z; - return 1; -} - -/* -** This is the fallback stemmer used when the porter stemmer is -** inappropriate. The input word is copied into the output with -** US-ASCII case folding. If the input word is too long (more -** than 20 bytes if it contains no digits or more than 6 bytes if -** it contains digits) then word is truncated to 20 or 6 bytes -** by taking 10 or 3 bytes from the beginning and end. -*/ -static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ - int i, mx, j; - int hasDigit = 0; - for(i=0; i='A' && c<='Z' ){ - zOut[i] = c - 'A' + 'a'; - }else{ - if( c>='0' && c<='9' ) hasDigit = 1; - zOut[i] = c; - } - } - mx = hasDigit ? 3 : 10; - if( nIn>mx*2 ){ - for(j=mx, i=nIn-mx; i=sizeof(zReverse)-7 ){ - /* The word is too big or too small for the porter stemmer. - ** Fallback to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - for(i=0, j=sizeof(zReverse)-6; i='A' && c<='Z' ){ - zReverse[j] = c + 'a' - 'A'; - }else if( c>='a' && c<='z' ){ - zReverse[j] = c; - }else{ - /* The use of a character not in [a-zA-Z] means that we fallback - ** to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - } - memset(&zReverse[sizeof(zReverse)-5], 0, 5); - z = &zReverse[j+1]; - - - /* Step 1a */ - if( z[0]=='s' ){ - if( - !stem(&z, "sess", "ss", 0) && - !stem(&z, "sei", "i", 0) && - !stem(&z, "ss", "ss", 0) - ){ - z++; - } - } - - /* Step 1b */ - z2 = z; - if( stem(&z, "dee", "ee", m_gt_0) ){ - /* Do nothing. The work was all in the test */ - }else if( - (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) - && z!=z2 - ){ - if( stem(&z, "ta", "ate", 0) || - stem(&z, "lb", "ble", 0) || - stem(&z, "zi", "ize", 0) ){ - /* Do nothing. The work was all in the test */ - }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ - z++; - }else if( m_eq_1(z) && star_oh(z) ){ - *(--z) = 'e'; - } - } - - /* Step 1c */ - if( z[0]=='y' && hasVowel(z+1) ){ - z[0] = 'i'; - } - - /* Step 2 */ - switch( z[1] ){ - case 'a': - stem(&z, "lanoita", "ate", m_gt_0) || - stem(&z, "lanoit", "tion", m_gt_0); - break; - case 'c': - stem(&z, "icne", "ence", m_gt_0) || - stem(&z, "icna", "ance", m_gt_0); - break; - case 'e': - stem(&z, "rezi", "ize", m_gt_0); - break; - case 'g': - stem(&z, "igol", "log", m_gt_0); - break; - case 'l': - stem(&z, "ilb", "ble", m_gt_0) || - stem(&z, "illa", "al", m_gt_0) || - stem(&z, "iltne", "ent", m_gt_0) || - stem(&z, "ile", "e", m_gt_0) || - stem(&z, "ilsuo", "ous", m_gt_0); - break; - case 'o': - stem(&z, "noitazi", "ize", m_gt_0) || - stem(&z, "noita", "ate", m_gt_0) || - stem(&z, "rota", "ate", m_gt_0); - break; - case 's': - stem(&z, "msila", "al", m_gt_0) || - stem(&z, "ssenevi", "ive", m_gt_0) || - stem(&z, "ssenluf", "ful", m_gt_0) || - stem(&z, "ssensuo", "ous", m_gt_0); - break; - case 't': - stem(&z, "itila", "al", m_gt_0) || - stem(&z, "itivi", "ive", m_gt_0) || - stem(&z, "itilib", "ble", m_gt_0); - break; - } - - /* Step 3 */ - switch( z[0] ){ - case 'e': - stem(&z, "etaci", "ic", m_gt_0) || - stem(&z, "evita", "", m_gt_0) || - stem(&z, "ezila", "al", m_gt_0); - break; - case 'i': - stem(&z, "itici", "ic", m_gt_0); - break; - case 'l': - stem(&z, "laci", "ic", m_gt_0) || - stem(&z, "luf", "", m_gt_0); - break; - case 's': - stem(&z, "ssen", "", m_gt_0); - break; - } - - /* Step 4 */ - switch( z[1] ){ - case 'a': - if( z[0]=='l' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'c': - if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'e': - if( z[0]=='r' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'i': - if( z[0]=='c' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'l': - if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'n': - if( z[0]=='t' ){ - if( z[2]=='a' ){ - if( m_gt_1(z+3) ){ - z += 3; - } - }else if( z[2]=='e' ){ - stem(&z, "tneme", "", m_gt_1) || - stem(&z, "tnem", "", m_gt_1) || - stem(&z, "tne", "", m_gt_1); - } - } - break; - case 'o': - if( z[0]=='u' ){ - if( m_gt_1(z+2) ){ - z += 2; - } - }else if( z[3]=='s' || z[3]=='t' ){ - stem(&z, "noi", "", m_gt_1); - } - break; - case 's': - if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 't': - stem(&z, "eta", "", m_gt_1) || - stem(&z, "iti", "", m_gt_1); - break; - case 'u': - if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 'v': - case 'z': - if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - } - - /* Step 5a */ - if( z[0]=='e' ){ - if( m_gt_1(z+1) ){ - z++; - }else if( m_eq_1(z+1) && !star_oh(z+1) ){ - z++; - } - } - - /* Step 5b */ - if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ - z++; - } - - /* z[] is now the stemmed word in reverse order. Flip it back - ** around into forward order and return. - */ - *pnOut = i = strlen(z); - zOut[i] = 0; - while( *z ){ - zOut[--i] = *(z++); - } -} - -/* -** Characters that can be part of a token. We assume any character -** whose value is greater than 0x80 (any UTF character) can be -** part of a token. In other words, delimiters all must have -** values of 0x7f or lower. -*/ -static const char isIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define idChar(C) (((ch=C)&0x80)!=0 || (ch>0x2f && isIdChar[ch-0x30])) -#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !isIdChar[ch-0x30])) - -/* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to porterOpen(). -*/ -static int porterNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ - const char **pzToken, /* OUT: *pzToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - const char *z = c->zInput; - - while( c->iOffsetnInput ){ - int iStartOffset, ch; - - /* Scan past delimiter characters */ - while( c->iOffsetnInput && isDelim(z[c->iOffset]) ){ - c->iOffset++; - } - - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnInput && !isDelim(z[c->iOffset]) ){ - c->iOffset++; - } - - if( c->iOffset>iStartOffset ){ - int n = c->iOffset-iStartOffset; - if( n>c->nAllocated ){ - c->nAllocated = n+20; - c->zToken = realloc(c->zToken, c->nAllocated); - if( c->zToken==NULL ) return SQLITE_NOMEM; - } - porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); - *pzToken = c->zToken; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - return SQLITE_OK; - } - } - return SQLITE_DONE; -} - -/* -** The set of routines that implement the porter-stemmer tokenizer -*/ -static const sqlite3_tokenizer_module porterTokenizerModule = { - 0, - porterCreate, - porterDestroy, - porterOpen, - porterClose, - porterNext, -}; - -/* -** Allocate a new porter tokenizer. Return a pointer to the new -** tokenizer in *ppModule -*/ -void sqlite3Fts1PorterTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &porterTokenizerModule; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */ diff --git a/third_party/sqlite3/fts1_tokenizer1.c b/third_party/sqlite3/fts1_tokenizer1.c deleted file mode 100644 index 5cac2a3f1..000000000 --- a/third_party/sqlite3/fts1_tokenizer1.c +++ /dev/null @@ -1,221 +0,0 @@ -/* -** The author disclaims copyright to this source code. -** -************************************************************************* -** Implementation of the "simple" full-text-search tokenizer. -*/ -/* -** The code in this file is only compiled if: -** -** * The FTS1 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS1 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS1 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) - -/* clang-format off */ - -#include -#include -#include -#include -#include - -#include "fts1_tokenizer.h" - -typedef struct simple_tokenizer { - sqlite3_tokenizer base; - char delim[128]; /* flag ASCII delimiters */ -} simple_tokenizer; - -typedef struct simple_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *pInput; /* input we are tokenizing */ - int nBytes; /* size of the input */ - int iOffset; /* current position in pInput */ - int iToken; /* index of next token to be returned */ - char *pToken; /* storage for current token */ - int nTokenAllocated; /* space allocated to zToken buffer */ -} simple_tokenizer_cursor; - - -/* Forward declaration */ -static const sqlite3_tokenizer_module simpleTokenizerModule; - -static int isDelim(simple_tokenizer *t, unsigned char c){ - return c<0x80 && t->delim[c]; -} - -/* -** Create a new tokenizer instance. -*/ -static int simpleCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer -){ - simple_tokenizer *t; - - t = (simple_tokenizer *) calloc(sizeof(*t), 1); - if( t==NULL ) return SQLITE_NOMEM; - - /* TODO(shess) Delimiters need to remain the same from run to run, - ** else we need to reindex. One solution would be a meta-table to - ** track such information in the database, then we'd only want this - ** information on the initial create. - */ - if( argc>1 ){ - int i, n = strlen(argv[1]); - for(i=0; i=0x80 ){ - free(t); - return SQLITE_ERROR; - } - t->delim[ch] = 1; - } - } else { - /* Mark non-alphanumeric ASCII characters as delimiters */ - int i; - for(i=1; i<0x80; i++){ - t->delim[i] = !isalnum(i); - } - } - - *ppTokenizer = &t->base; - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ - free(pTokenizer); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int simpleOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *pInput, int nBytes, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - simple_tokenizer_cursor *c; - - c = (simple_tokenizer_cursor *) malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; - - c->pInput = pInput; - if( pInput==0 ){ - c->nBytes = 0; - }else if( nBytes<0 ){ - c->nBytes = (int)strlen(pInput); - }else{ - c->nBytes = nBytes; - } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->pToken = NULL; /* no space allocated, yet. */ - c->nTokenAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** simpleOpen() above. -*/ -static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - free(c->pToken); - free(c); - return SQLITE_OK; -} - -/* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to simpleOpen(). -*/ -static int simpleNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; - unsigned char *p = (unsigned char *)c->pInput; - - while( c->iOffsetnBytes ){ - int iStartOffset; - - /* Scan past delimiter characters */ - while( c->iOffsetnBytes && isDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } - - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnBytes && !isDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } - - if( c->iOffset>iStartOffset ){ - int i, n = c->iOffset-iStartOffset; - if( n>c->nTokenAllocated ){ - c->nTokenAllocated = n+20; - c->pToken = realloc(c->pToken, c->nTokenAllocated); - if( c->pToken==NULL ) return SQLITE_NOMEM; - } - for(i=0; ipToken[i] = ch<0x80 ? tolower(ch) : ch; - } - *ppToken = c->pToken; - *pnBytes = n; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - - return SQLITE_OK; - } - } - return SQLITE_DONE; -} - -/* -** The set of routines that implement the simple tokenizer -*/ -static const sqlite3_tokenizer_module simpleTokenizerModule = { - 0, - simpleCreate, - simpleDestroy, - simpleOpen, - simpleClose, - simpleNext, -}; - -/* -** Allocate a new simple tokenizer. Return a pointer to the new -** tokenizer in *ppModule -*/ -void sqlite3Fts1SimpleTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &simpleTokenizerModule; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */ diff --git a/third_party/sqlite3/fts2.c b/third_party/sqlite3/fts2.c deleted file mode 100644 index 40b1d3485..000000000 --- a/third_party/sqlite3/fts2.c +++ /dev/null @@ -1,6861 +0,0 @@ -/* fts2 has a design flaw which can lead to database corruption (see -** below). It is recommended not to use it any longer, instead use -** fts3 (or higher). If you believe that your use of fts2 is safe, -** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS. -*/ -/* clang-format off */ -#if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \ - && !defined(SQLITE_ENABLE_BROKEN_FTS2) -#error fts2 has a design flaw and has been deprecated. -#endif -/* The flaw is that fts2 uses the content table's unaliased rowid as -** the unique docid. fts2 embeds the rowid in the index it builds, -** and expects the rowid to not change. The SQLite VACUUM operation -** will renumber such rowids, thereby breaking fts2. If you are using -** fts2 in a system which has disabled VACUUM, then you can continue -** to use it safely. Note that PRAGMA auto_vacuum does NOT disable -** VACUUM, though systems using auto_vacuum are unlikely to invoke -** VACUUM. -** -** Unlike fts1, which is safe across VACUUM if you never delete -** documents, fts2 has a second exposure to this flaw, in the segments -** table. So fts2 should be considered unsafe across VACUUM in all -** cases. -*/ - -/* -** 2006 Oct 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This is an SQLite module implementing full-text search. -*/ - -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ - -/* TODO(shess) Consider exporting this comment to an HTML file or the -** wiki. -*/ -/* The full-text index is stored in a series of b+tree (-like) -** structures called segments which map terms to doclists. The -** structures are like b+trees in layout, but are constructed from the -** bottom up in optimal fashion and are not updatable. Since trees -** are built from the bottom up, things will be described from the -** bottom up. -** -** -**** Varints **** -** The basic unit of encoding is a variable-length integer called a -** varint. We encode variable-length integers in little-endian order -** using seven bits * per byte as follows: -** -** KEY: -** A = 0xxxxxxx 7 bits of data and one flag bit -** B = 1xxxxxxx 7 bits of data and one flag bit -** -** 7 bits - A -** 14 bits - BA -** 21 bits - BBA -** and so on. -** -** This is identical to how sqlite encodes varints (see util.c). -** -** -**** Document lists **** -** A doclist (document list) holds a docid-sorted list of hits for a -** given term. Doclists hold docids, and can optionally associate -** token positions and offsets with docids. -** -** A DL_POSITIONS_OFFSETS doclist is stored like this: -** -** array { -** varint docid; -** array { (position list for column 0) -** varint position; (delta from previous position plus POS_BASE) -** varint startOffset; (delta from previous startOffset) -** varint endOffset; (delta from startOffset) -** } -** array { -** varint POS_COLUMN; (marks start of position list for new column) -** varint column; (index of new column) -** array { -** varint position; (delta from previous position plus POS_BASE) -** varint startOffset;(delta from previous startOffset) -** varint endOffset; (delta from startOffset) -** } -** } -** varint POS_END; (marks end of positions for this document. -** } -** -** Here, array { X } means zero or more occurrences of X, adjacent in -** memory. A "position" is an index of a token in the token stream -** generated by the tokenizer, while an "offset" is a byte offset, -** both based at 0. Note that POS_END and POS_COLUMN occur in the -** same logical place as the position element, and act as sentinals -** ending a position list array. -** -** A DL_POSITIONS doclist omits the startOffset and endOffset -** information. A DL_DOCIDS doclist omits both the position and -** offset information, becoming an array of varint-encoded docids. -** -** On-disk data is stored as type DL_DEFAULT, so we don't serialize -** the type. Due to how deletion is implemented in the segmentation -** system, on-disk doclists MUST store at least positions. -** -** -**** Segment leaf nodes **** -** Segment leaf nodes store terms and doclists, ordered by term. Leaf -** nodes are written using LeafWriter, and read using LeafReader (to -** iterate through a single leaf node's data) and LeavesReader (to -** iterate through a segment's entire leaf layer). Leaf nodes have -** the format: -** -** varint iHeight; (height from leaf level, always 0) -** varint nTerm; (length of first term) -** char pTerm[nTerm]; (content of first term) -** varint nDoclist; (length of term's associated doclist) -** char pDoclist[nDoclist]; (content of doclist) -** array { -** (further terms are delta-encoded) -** varint nPrefix; (length of prefix shared with previous term) -** varint nSuffix; (length of unshared suffix) -** char pTermSuffix[nSuffix];(unshared suffix of next term) -** varint nDoclist; (length of term's associated doclist) -** char pDoclist[nDoclist]; (content of doclist) -** } -** -** Here, array { X } means zero or more occurrences of X, adjacent in -** memory. -** -** Leaf nodes are broken into blocks which are stored contiguously in -** the %_segments table in sorted order. This means that when the end -** of a node is reached, the next term is in the node with the next -** greater node id. -** -** New data is spilled to a new leaf node when the current node -** exceeds LEAF_MAX bytes (default 2048). New data which itself is -** larger than STANDALONE_MIN (default 1024) is placed in a standalone -** node (a leaf node with a single term and doclist). The goal of -** these settings is to pack together groups of small doclists while -** making it efficient to directly access large doclists. The -** assumption is that large doclists represent terms which are more -** likely to be query targets. -** -** TODO(shess) It may be useful for blocking decisions to be more -** dynamic. For instance, it may make more sense to have a 2.5k leaf -** node rather than splitting into 2k and .5k nodes. My intuition is -** that this might extend through 2x or 4x the pagesize. -** -** -**** Segment interior nodes **** -** Segment interior nodes store blockids for subtree nodes and terms -** to describe what data is stored by the each subtree. Interior -** nodes are written using InteriorWriter, and read using -** InteriorReader. InteriorWriters are created as needed when -** SegmentWriter creates new leaf nodes, or when an interior node -** itself grows too big and must be split. The format of interior -** nodes: -** -** varint iHeight; (height from leaf level, always >0) -** varint iBlockid; (block id of node's leftmost subtree) -** optional { -** varint nTerm; (length of first term) -** char pTerm[nTerm]; (content of first term) -** array { -** (further terms are delta-encoded) -** varint nPrefix; (length of shared prefix with previous term) -** varint nSuffix; (length of unshared suffix) -** char pTermSuffix[nSuffix]; (unshared suffix of next term) -** } -** } -** -** Here, optional { X } means an optional element, while array { X } -** means zero or more occurrences of X, adjacent in memory. -** -** An interior node encodes n terms separating n+1 subtrees. The -** subtree blocks are contiguous, so only the first subtree's blockid -** is encoded. The subtree at iBlockid will contain all terms less -** than the first term encoded (or all terms if no term is encoded). -** Otherwise, for terms greater than or equal to pTerm[i] but less -** than pTerm[i+1], the subtree for that term will be rooted at -** iBlockid+i. Interior nodes only store enough term data to -** distinguish adjacent children (if the rightmost term of the left -** child is "something", and the leftmost term of the right child is -** "wicked", only "w" is stored). -** -** New data is spilled to a new interior node at the same height when -** the current node exceeds INTERIOR_MAX bytes (default 2048). -** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing -** interior nodes and making the tree too skinny. The interior nodes -** at a given height are naturally tracked by interior nodes at -** height+1, and so on. -** -** -**** Segment directory **** -** The segment directory in table %_segdir stores meta-information for -** merging and deleting segments, and also the root node of the -** segment's tree. -** -** The root node is the top node of the segment's tree after encoding -** the entire segment, restricted to ROOT_MAX bytes (default 1024). -** This could be either a leaf node or an interior node. If the top -** node requires more than ROOT_MAX bytes, it is flushed to %_segments -** and a new root interior node is generated (which should always fit -** within ROOT_MAX because it only needs space for 2 varints, the -** height and the blockid of the previous root). -** -** The meta-information in the segment directory is: -** level - segment level (see below) -** idx - index within level -** - (level,idx uniquely identify a segment) -** start_block - first leaf node -** leaves_end_block - last leaf node -** end_block - last block (including interior nodes) -** root - contents of root node -** -** If the root node is a leaf node, then start_block, -** leaves_end_block, and end_block are all 0. -** -** -**** Segment merging **** -** To amortize update costs, segments are groups into levels and -** merged in matches. Each increase in level represents exponentially -** more documents. -** -** New documents (actually, document updates) are tokenized and -** written individually (using LeafWriter) to a level 0 segment, with -** incrementing idx. When idx reaches MERGE_COUNT (default 16), all -** level 0 segments are merged into a single level 1 segment. Level 1 -** is populated like level 0, and eventually MERGE_COUNT level 1 -** segments are merged to a single level 2 segment (representing -** MERGE_COUNT^2 updates), and so on. -** -** A segment merge traverses all segments at a given level in -** parallel, performing a straightforward sorted merge. Since segment -** leaf nodes are written in to the %_segments table in order, this -** merge traverses the underlying sqlite disk structures efficiently. -** After the merge, all segment blocks from the merged level are -** deleted. -** -** MERGE_COUNT controls how often we merge segments. 16 seems to be -** somewhat of a sweet spot for insertion performance. 32 and 64 show -** very similar performance numbers to 16 on insertion, though they're -** a tiny bit slower (perhaps due to more overhead in merge-time -** sorting). 8 is about 20% slower than 16, 4 about 50% slower than -** 16, 2 about 66% slower than 16. -** -** At query time, high MERGE_COUNT increases the number of segments -** which need to be scanned and merged. For instance, with 100k docs -** inserted: -** -** MERGE_COUNT segments -** 16 25 -** 8 12 -** 4 10 -** 2 6 -** -** This appears to have only a moderate impact on queries for very -** frequent terms (which are somewhat dominated by segment merge -** costs), and infrequent and non-existent terms still seem to be fast -** even with many segments. -** -** TODO(shess) That said, it would be nice to have a better query-side -** argument for MERGE_COUNT of 16. Also, it is possible/likely that -** optimizations to things like doclist merging will swing the sweet -** spot around. -** -** -** -**** Handling of deletions and updates **** -** Since we're using a segmented structure, with no docid-oriented -** index into the term index, we clearly cannot simply update the term -** index when a document is deleted or updated. For deletions, we -** write an empty doclist (varint(docid) varint(POS_END)), for updates -** we simply write the new doclist. Segment merges overwrite older -** data for a particular docid with newer data, so deletes or updates -** will eventually overtake the earlier data and knock it out. The -** query logic likewise merges doclists so that newer data knocks out -** older data. -** -** TODO(shess) Provide a VACUUM type operation to clear out all -** deletions and duplications. This would basically be a forced merge -** into a single segment. -*/ - -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - -#if defined(SQLITE_ENABLE_FTS2) && !defined(SQLITE_CORE) -# define SQLITE_CORE 1 -#endif - -#include -#include -#include -#include -#include "fts2.h" -#include "fts2_hash.h" -#include "fts2_tokenizer.h" -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT1 - - -/* TODO(shess) MAN, this thing needs some refactoring. At minimum, it -** would be nice to order the file better, perhaps something along the -** lines of: -** -** - utility functions -** - table setup functions -** - table update functions -** - table query functions -** -** Put the query functions last because they're likely to reference -** typedefs or functions from the table update section. -*/ - -#if 0 -# define TRACE(A) printf A; fflush(stdout) -#else -# define TRACE(A) -#endif - -/* It is not safe to call isspace(), tolower(), or isalnum() on -** hi-bit-set characters. This is the same solution used in the -** tokenizer. -*/ -/* TODO(shess) The snippet-generation code should be using the -** tokenizer-generated tokens rather than doing its own local -** tokenization. -*/ -/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ -static int safe_isspace(char c){ - return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; -} -static int safe_tolower(char c){ - return (c>='A' && c<='Z') ? (c - 'A' + 'a') : c; -} -static int safe_isalnum(char c){ - return (c>='0' && c<='9') || (c>='A' && c<='Z') || (c>='a' && c<='z'); -} - -typedef enum DocListType { - DL_DOCIDS, /* docids only */ - DL_POSITIONS, /* docids + positions */ - DL_POSITIONS_OFFSETS /* docids + positions + offsets */ -} DocListType; - -/* -** By default, only positions and not offsets are stored in the doclists. -** To change this so that offsets are stored too, compile with -** -** -DDL_DEFAULT=DL_POSITIONS_OFFSETS -** -** If DL_DEFAULT is set to DL_DOCIDS, your table can only be inserted -** into (no deletes or updates). -*/ -#ifndef DL_DEFAULT -# define DL_DEFAULT DL_POSITIONS -#endif - -enum { - POS_END = 0, /* end of this position list */ - POS_COLUMN, /* followed by new column number */ - POS_BASE -}; - -/* MERGE_COUNT controls how often we merge segments (see comment at -** top of file). -*/ -#define MERGE_COUNT 16 - -/* utility functions */ - -/* CLEAR() and SCRAMBLE() abstract memset() on a pointer to a single -** record to prevent errors of the form: -** -** my_function(SomeType *b){ -** memset(b, '\0', sizeof(b)); // sizeof(b)!=sizeof(*b) -** } -*/ -/* TODO(shess) Obvious candidates for a header file. */ -#define CLEAR(b) memset(b, '\0', sizeof(*(b))) - -#ifndef NDEBUG -# define SCRAMBLE(b) memset(b, 0x55, sizeof(*(b))) -#else -# define SCRAMBLE(b) -#endif - -/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */ -#define VARINT_MAX 10 - -/* Write a 64-bit variable-length integer to memory starting at p[0]. - * The length of data written will be between 1 and VARINT_MAX bytes. - * The number of bytes written is returned. */ -static int putVarint(char *p, sqlite_int64 v){ - unsigned char *q = (unsigned char *) p; - sqlite_uint64 vu = v; - do{ - *q++ = (unsigned char) ((vu & 0x7f) | 0x80); - vu >>= 7; - }while( vu!=0 ); - q[-1] &= 0x7f; /* turn off high bit in final byte */ - assert( q - (unsigned char *)p <= VARINT_MAX ); - return (int) (q - (unsigned char *)p); -} - -/* Read a 64-bit variable-length integer from memory starting at p[0]. - * Return the number of bytes read, or 0 on error. - * The value is stored in *v. */ -static int getVarint(const char *p, sqlite_int64 *v){ - const unsigned char *q = (const unsigned char *) p; - sqlite_uint64 x = 0, y = 1; - while( (*q & 0x80) == 0x80 ){ - x += y * (*q++ & 0x7f); - y <<= 7; - if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ - assert( 0 ); - return 0; - } - } - x += y * (*q++); - *v = (sqlite_int64) x; - return (int) (q - (unsigned char *)p); -} - -static int getVarint32(const char *p, int *pi){ - sqlite_int64 i; - int ret = getVarint(p, &i); - *pi = (int) i; - assert( *pi==i ); - return ret; -} - -/*******************************************************************/ -/* DataBuffer is used to collect data into a buffer in piecemeal -** fashion. It implements the usual distinction between amount of -** data currently stored (nData) and buffer capacity (nCapacity). -** -** dataBufferInit - create a buffer with given initial capacity. -** dataBufferReset - forget buffer's data, retaining capacity. -** dataBufferDestroy - free buffer's data. -** dataBufferSwap - swap contents of two buffers. -** dataBufferExpand - expand capacity without adding data. -** dataBufferAppend - append data. -** dataBufferAppend2 - append two pieces of data at once. -** dataBufferReplace - replace buffer's data. -*/ -typedef struct DataBuffer { - char *pData; /* Pointer to malloc'ed buffer. */ - int nCapacity; /* Size of pData buffer. */ - int nData; /* End of data loaded into pData. */ -} DataBuffer; - -static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){ - assert( nCapacity>=0 ); - pBuffer->nData = 0; - pBuffer->nCapacity = nCapacity; - pBuffer->pData = nCapacity==0 ? NULL : sqlite3_malloc(nCapacity); -} -static void dataBufferReset(DataBuffer *pBuffer){ - pBuffer->nData = 0; -} -static void dataBufferDestroy(DataBuffer *pBuffer){ - if( pBuffer->pData!=NULL ) sqlite3_free(pBuffer->pData); - SCRAMBLE(pBuffer); -} -static void dataBufferSwap(DataBuffer *pBuffer1, DataBuffer *pBuffer2){ - DataBuffer tmp = *pBuffer1; - *pBuffer1 = *pBuffer2; - *pBuffer2 = tmp; -} -static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){ - assert( nAddCapacity>0 ); - /* TODO(shess) Consider expanding more aggressively. Note that the - ** underlying malloc implementation may take care of such things for - ** us already. - */ - if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){ - pBuffer->nCapacity = pBuffer->nData+nAddCapacity; - pBuffer->pData = sqlite3_realloc(pBuffer->pData, pBuffer->nCapacity); - } -} -static void dataBufferAppend(DataBuffer *pBuffer, - const char *pSource, int nSource){ - assert( nSource>0 && pSource!=NULL ); - dataBufferExpand(pBuffer, nSource); - memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource); - pBuffer->nData += nSource; -} -static void dataBufferAppend2(DataBuffer *pBuffer, - const char *pSource1, int nSource1, - const char *pSource2, int nSource2){ - assert( nSource1>0 && pSource1!=NULL ); - assert( nSource2>0 && pSource2!=NULL ); - dataBufferExpand(pBuffer, nSource1+nSource2); - memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1); - memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2); - pBuffer->nData += nSource1+nSource2; -} -static void dataBufferReplace(DataBuffer *pBuffer, - const char *pSource, int nSource){ - dataBufferReset(pBuffer); - dataBufferAppend(pBuffer, pSource, nSource); -} - -/* StringBuffer is a null-terminated version of DataBuffer. */ -typedef struct StringBuffer { - DataBuffer b; /* Includes null terminator. */ -} StringBuffer; - -static void initStringBuffer(StringBuffer *sb){ - dataBufferInit(&sb->b, 100); - dataBufferReplace(&sb->b, "", 1); -} -static int stringBufferLength(StringBuffer *sb){ - return sb->b.nData-1; -} -static char *stringBufferData(StringBuffer *sb){ - return sb->b.pData; -} -static void stringBufferDestroy(StringBuffer *sb){ - dataBufferDestroy(&sb->b); -} - -static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){ - assert( sb->b.nData>0 ); - if( nFrom>0 ){ - sb->b.nData--; - dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1); - } -} -static void append(StringBuffer *sb, const char *zFrom){ - nappend(sb, zFrom, strlen(zFrom)); -} - -/* Append a list of strings separated by commas. */ -static void appendList(StringBuffer *sb, int nString, char **azString){ - int i; - for(i=0; i0 ) append(sb, ", "); - append(sb, azString[i]); - } -} - -static int endsInWhiteSpace(StringBuffer *p){ - return stringBufferLength(p)>0 && - safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]); -} - -/* If the StringBuffer ends in something other than white space, add a -** single space character to the end. -*/ -static void appendWhiteSpace(StringBuffer *p){ - if( stringBufferLength(p)==0 ) return; - if( !endsInWhiteSpace(p) ) append(p, " "); -} - -/* Remove white space from the end of the StringBuffer */ -static void trimWhiteSpace(StringBuffer *p){ - while( endsInWhiteSpace(p) ){ - p->b.pData[--p->b.nData-1] = '\0'; - } -} - -/*******************************************************************/ -/* DLReader is used to read document elements from a doclist. The -** current docid is cached, so dlrDocid() is fast. DLReader does not -** own the doclist buffer. -** -** dlrAtEnd - true if there's no more data to read. -** dlrDocid - docid of current document. -** dlrDocData - doclist data for current document (including docid). -** dlrDocDataBytes - length of same. -** dlrAllDataBytes - length of all remaining data. -** dlrPosData - position data for current document. -** dlrPosDataLen - length of pos data for current document (incl POS_END). -** dlrStep - step to current document. -** dlrInit - initial for doclist of given type against given data. -** dlrDestroy - clean up. -** -** Expected usage is something like: -** -** DLReader reader; -** dlrInit(&reader, pData, nData); -** while( !dlrAtEnd(&reader) ){ -** // calls to dlrDocid() and kin. -** dlrStep(&reader); -** } -** dlrDestroy(&reader); -*/ -typedef struct DLReader { - DocListType iType; - const char *pData; - int nData; - - sqlite_int64 iDocid; - int nElement; -} DLReader; - -static int dlrAtEnd(DLReader *pReader){ - assert( pReader->nData>=0 ); - return pReader->nData==0; -} -static sqlite_int64 dlrDocid(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - return pReader->iDocid; -} -static const char *dlrDocData(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - return pReader->pData; -} -static int dlrDocDataBytes(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - return pReader->nElement; -} -static int dlrAllDataBytes(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - return pReader->nData; -} -/* TODO(shess) Consider adding a field to track iDocid varint length -** to make these two functions faster. This might matter (a tiny bit) -** for queries. -*/ -static const char *dlrPosData(DLReader *pReader){ - sqlite_int64 iDummy; - int n = getVarint(pReader->pData, &iDummy); - assert( !dlrAtEnd(pReader) ); - return pReader->pData+n; -} -static int dlrPosDataLen(DLReader *pReader){ - sqlite_int64 iDummy; - int n = getVarint(pReader->pData, &iDummy); - assert( !dlrAtEnd(pReader) ); - return pReader->nElement-n; -} -static void dlrStep(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - - /* Skip past current doclist element. */ - assert( pReader->nElement<=pReader->nData ); - pReader->pData += pReader->nElement; - pReader->nData -= pReader->nElement; - - /* If there is more data, read the next doclist element. */ - if( pReader->nData!=0 ){ - sqlite_int64 iDocidDelta; - int iDummy, n = getVarint(pReader->pData, &iDocidDelta); - pReader->iDocid += iDocidDelta; - if( pReader->iType>=DL_POSITIONS ){ - assert( nnData ); - while( 1 ){ - n += getVarint32(pReader->pData+n, &iDummy); - assert( n<=pReader->nData ); - if( iDummy==POS_END ) break; - if( iDummy==POS_COLUMN ){ - n += getVarint32(pReader->pData+n, &iDummy); - assert( nnData ); - }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ - n += getVarint32(pReader->pData+n, &iDummy); - n += getVarint32(pReader->pData+n, &iDummy); - assert( nnData ); - } - } - } - pReader->nElement = n; - assert( pReader->nElement<=pReader->nData ); - } -} -static void dlrInit(DLReader *pReader, DocListType iType, - const char *pData, int nData){ - assert( pData!=NULL && nData!=0 ); - pReader->iType = iType; - pReader->pData = pData; - pReader->nData = nData; - pReader->nElement = 0; - pReader->iDocid = 0; - - /* Load the first element's data. There must be a first element. */ - dlrStep(pReader); -} -static void dlrDestroy(DLReader *pReader){ - SCRAMBLE(pReader); -} - -#ifndef NDEBUG -/* Verify that the doclist can be validly decoded. Also returns the -** last docid found because it is convenient in other assertions for -** DLWriter. -*/ -static void docListValidate(DocListType iType, const char *pData, int nData, - sqlite_int64 *pLastDocid){ - sqlite_int64 iPrevDocid = 0; - assert( nData>0 ); - assert( pData!=0 ); - assert( pData+nData>pData ); - while( nData!=0 ){ - sqlite_int64 iDocidDelta; - int n = getVarint(pData, &iDocidDelta); - iPrevDocid += iDocidDelta; - if( iType>DL_DOCIDS ){ - int iDummy; - while( 1 ){ - n += getVarint32(pData+n, &iDummy); - if( iDummy==POS_END ) break; - if( iDummy==POS_COLUMN ){ - n += getVarint32(pData+n, &iDummy); - }else if( iType>DL_POSITIONS ){ - n += getVarint32(pData+n, &iDummy); - n += getVarint32(pData+n, &iDummy); - } - assert( n<=nData ); - } - } - assert( n<=nData ); - pData += n; - nData -= n; - } - if( pLastDocid ) *pLastDocid = iPrevDocid; -} -#define ASSERT_VALID_DOCLIST(i, p, n, o) docListValidate(i, p, n, o) -#else -#define ASSERT_VALID_DOCLIST(i, p, n, o) assert( 1 ) -#endif - -/*******************************************************************/ -/* DLWriter is used to write doclist data to a DataBuffer. DLWriter -** always appends to the buffer and does not own it. -** -** dlwInit - initialize to write a given type doclistto a buffer. -** dlwDestroy - clear the writer's memory. Does not free buffer. -** dlwAppend - append raw doclist data to buffer. -** dlwCopy - copy next doclist from reader to writer. -** dlwAdd - construct doclist element and append to buffer. -** Only apply dlwAdd() to DL_DOCIDS doclists (else use PLWriter). -*/ -typedef struct DLWriter { - DocListType iType; - DataBuffer *b; - sqlite_int64 iPrevDocid; -#ifndef NDEBUG - int has_iPrevDocid; -#endif -} DLWriter; - -static void dlwInit(DLWriter *pWriter, DocListType iType, DataBuffer *b){ - pWriter->b = b; - pWriter->iType = iType; - pWriter->iPrevDocid = 0; -#ifndef NDEBUG - pWriter->has_iPrevDocid = 0; -#endif -} -static void dlwDestroy(DLWriter *pWriter){ - SCRAMBLE(pWriter); -} -/* iFirstDocid is the first docid in the doclist in pData. It is -** needed because pData may point within a larger doclist, in which -** case the first item would be delta-encoded. -** -** iLastDocid is the final docid in the doclist in pData. It is -** needed to create the new iPrevDocid for future delta-encoding. The -** code could decode the passed doclist to recreate iLastDocid, but -** the only current user (docListMerge) already has decoded this -** information. -*/ -/* TODO(shess) This has become just a helper for docListMerge. -** Consider a refactor to make this cleaner. -*/ -static void dlwAppend(DLWriter *pWriter, - const char *pData, int nData, - sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ - sqlite_int64 iDocid = 0; - char c[VARINT_MAX]; - int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */ -#ifndef NDEBUG - sqlite_int64 iLastDocidDelta; -#endif - - /* Recode the initial docid as delta from iPrevDocid. */ - nFirstOld = getVarint(pData, &iDocid); - assert( nFirstOldiType==DL_DOCIDS) ); - nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); - - /* Verify that the incoming doclist is valid AND that it ends with - ** the expected docid. This is essential because we'll trust this - ** docid in future delta-encoding. - */ - ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); - assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); - - /* Append recoded initial docid and everything else. Rest of docids - ** should have been delta-encoded from previous initial docid. - */ - if( nFirstOldb, c, nFirstNew, - pData+nFirstOld, nData-nFirstOld); - }else{ - dataBufferAppend(pWriter->b, c, nFirstNew); - } - pWriter->iPrevDocid = iLastDocid; -} -static void dlwCopy(DLWriter *pWriter, DLReader *pReader){ - dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), - dlrDocid(pReader), dlrDocid(pReader)); -} -static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){ - char c[VARINT_MAX]; - int n = putVarint(c, iDocid-pWriter->iPrevDocid); - - /* Docids must ascend. */ - assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid ); - assert( pWriter->iType==DL_DOCIDS ); - - dataBufferAppend(pWriter->b, c, n); - pWriter->iPrevDocid = iDocid; -#ifndef NDEBUG - pWriter->has_iPrevDocid = 1; -#endif -} - -/*******************************************************************/ -/* PLReader is used to read data from a document's position list. As -** the caller steps through the list, data is cached so that varints -** only need to be decoded once. -** -** plrInit, plrDestroy - create/destroy a reader. -** plrColumn, plrPosition, plrStartOffset, plrEndOffset - accessors -** plrAtEnd - at end of stream, only call plrDestroy once true. -** plrStep - step to the next element. -*/ -typedef struct PLReader { - /* These refer to the next position's data. nData will reach 0 when - ** reading the last position, so plrStep() signals EOF by setting - ** pData to NULL. - */ - const char *pData; - int nData; - - DocListType iType; - int iColumn; /* the last column read */ - int iPosition; /* the last position read */ - int iStartOffset; /* the last start offset read */ - int iEndOffset; /* the last end offset read */ -} PLReader; - -static int plrAtEnd(PLReader *pReader){ - return pReader->pData==NULL; -} -static int plrColumn(PLReader *pReader){ - assert( !plrAtEnd(pReader) ); - return pReader->iColumn; -} -static int plrPosition(PLReader *pReader){ - assert( !plrAtEnd(pReader) ); - return pReader->iPosition; -} -static int plrStartOffset(PLReader *pReader){ - assert( !plrAtEnd(pReader) ); - return pReader->iStartOffset; -} -static int plrEndOffset(PLReader *pReader){ - assert( !plrAtEnd(pReader) ); - return pReader->iEndOffset; -} -static void plrStep(PLReader *pReader){ - int i, n; - - assert( !plrAtEnd(pReader) ); - - if( pReader->nData==0 ){ - pReader->pData = NULL; - return; - } - - n = getVarint32(pReader->pData, &i); - if( i==POS_COLUMN ){ - n += getVarint32(pReader->pData+n, &pReader->iColumn); - pReader->iPosition = 0; - pReader->iStartOffset = 0; - n += getVarint32(pReader->pData+n, &i); - } - /* Should never see adjacent column changes. */ - assert( i!=POS_COLUMN ); - - if( i==POS_END ){ - pReader->nData = 0; - pReader->pData = NULL; - return; - } - - pReader->iPosition += i-POS_BASE; - if( pReader->iType==DL_POSITIONS_OFFSETS ){ - n += getVarint32(pReader->pData+n, &i); - pReader->iStartOffset += i; - n += getVarint32(pReader->pData+n, &i); - pReader->iEndOffset = pReader->iStartOffset+i; - } - assert( n<=pReader->nData ); - pReader->pData += n; - pReader->nData -= n; -} - -static void plrInit(PLReader *pReader, DLReader *pDLReader){ - pReader->pData = dlrPosData(pDLReader); - pReader->nData = dlrPosDataLen(pDLReader); - pReader->iType = pDLReader->iType; - pReader->iColumn = 0; - pReader->iPosition = 0; - pReader->iStartOffset = 0; - pReader->iEndOffset = 0; - plrStep(pReader); -} -static void plrDestroy(PLReader *pReader){ - SCRAMBLE(pReader); -} - -/*******************************************************************/ -/* PLWriter is used in constructing a document's position list. As a -** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op. -** PLWriter writes to the associated DLWriter's buffer. -** -** plwInit - init for writing a document's poslist. -** plwDestroy - clear a writer. -** plwAdd - append position and offset information. -** plwCopy - copy next position's data from reader to writer. -** plwTerminate - add any necessary doclist terminator. -** -** Calling plwAdd() after plwTerminate() may result in a corrupt -** doclist. -*/ -/* TODO(shess) Until we've written the second item, we can cache the -** first item's information. Then we'd have three states: -** -** - initialized with docid, no positions. -** - docid and one position. -** - docid and multiple positions. -** -** Only the last state needs to actually write to dlw->b, which would -** be an improvement in the DLCollector case. -*/ -typedef struct PLWriter { - DLWriter *dlw; - - int iColumn; /* the last column written */ - int iPos; /* the last position written */ - int iOffset; /* the last start offset written */ -} PLWriter; - -/* TODO(shess) In the case where the parent is reading these values -** from a PLReader, we could optimize to a copy if that PLReader has -** the same type as pWriter. -*/ -static void plwAdd(PLWriter *pWriter, int iColumn, int iPos, - int iStartOffset, int iEndOffset){ - /* Worst-case space for POS_COLUMN, iColumn, iPosDelta, - ** iStartOffsetDelta, and iEndOffsetDelta. - */ - char c[5*VARINT_MAX]; - int n = 0; - - /* Ban plwAdd() after plwTerminate(). */ - assert( pWriter->iPos!=-1 ); - - if( pWriter->dlw->iType==DL_DOCIDS ) return; - - if( iColumn!=pWriter->iColumn ){ - n += putVarint(c+n, POS_COLUMN); - n += putVarint(c+n, iColumn); - pWriter->iColumn = iColumn; - pWriter->iPos = 0; - pWriter->iOffset = 0; - } - assert( iPos>=pWriter->iPos ); - n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos)); - pWriter->iPos = iPos; - if( pWriter->dlw->iType==DL_POSITIONS_OFFSETS ){ - assert( iStartOffset>=pWriter->iOffset ); - n += putVarint(c+n, iStartOffset-pWriter->iOffset); - pWriter->iOffset = iStartOffset; - assert( iEndOffset>=iStartOffset ); - n += putVarint(c+n, iEndOffset-iStartOffset); - } - dataBufferAppend(pWriter->dlw->b, c, n); -} -static void plwCopy(PLWriter *pWriter, PLReader *pReader){ - plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader), - plrStartOffset(pReader), plrEndOffset(pReader)); -} -static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){ - char c[VARINT_MAX]; - int n; - - pWriter->dlw = dlw; - - /* Docids must ascend. */ - assert( !pWriter->dlw->has_iPrevDocid || iDocid>pWriter->dlw->iPrevDocid ); - n = putVarint(c, iDocid-pWriter->dlw->iPrevDocid); - dataBufferAppend(pWriter->dlw->b, c, n); - pWriter->dlw->iPrevDocid = iDocid; -#ifndef NDEBUG - pWriter->dlw->has_iPrevDocid = 1; -#endif - - pWriter->iColumn = 0; - pWriter->iPos = 0; - pWriter->iOffset = 0; -} -/* TODO(shess) Should plwDestroy() also terminate the doclist? But -** then plwDestroy() would no longer be just a destructor, it would -** also be doing work, which isn't consistent with the overall idiom. -** Another option would be for plwAdd() to always append any necessary -** terminator, so that the output is always correct. But that would -** add incremental work to the common case with the only benefit being -** API elegance. Punt for now. -*/ -static void plwTerminate(PLWriter *pWriter){ - if( pWriter->dlw->iType>DL_DOCIDS ){ - char c[VARINT_MAX]; - int n = putVarint(c, POS_END); - dataBufferAppend(pWriter->dlw->b, c, n); - } -#ifndef NDEBUG - /* Mark as terminated for assert in plwAdd(). */ - pWriter->iPos = -1; -#endif -} -static void plwDestroy(PLWriter *pWriter){ - SCRAMBLE(pWriter); -} - -/*******************************************************************/ -/* DLCollector wraps PLWriter and DLWriter to provide a -** dynamically-allocated doclist area to use during tokenization. -** -** dlcNew - malloc up and initialize a collector. -** dlcDelete - destroy a collector and all contained items. -** dlcAddPos - append position and offset information. -** dlcAddDoclist - add the collected doclist to the given buffer. -** dlcNext - terminate the current document and open another. -*/ -typedef struct DLCollector { - DataBuffer b; - DLWriter dlw; - PLWriter plw; -} DLCollector; - -/* TODO(shess) This could also be done by calling plwTerminate() and -** dataBufferAppend(). I tried that, expecting nominal performance -** differences, but it seemed to pretty reliably be worth 1% to code -** it this way. I suspect it is the incremental malloc overhead (some -** percentage of the plwTerminate() calls will cause a realloc), so -** this might be worth revisiting if the DataBuffer implementation -** changes. -*/ -static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){ - if( pCollector->dlw.iType>DL_DOCIDS ){ - char c[VARINT_MAX]; - int n = putVarint(c, POS_END); - dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n); - }else{ - dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData); - } -} -static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){ - plwTerminate(&pCollector->plw); - plwDestroy(&pCollector->plw); - plwInit(&pCollector->plw, &pCollector->dlw, iDocid); -} -static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos, - int iStartOffset, int iEndOffset){ - plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset); -} - -static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){ - DLCollector *pCollector = sqlite3_malloc(sizeof(DLCollector)); - dataBufferInit(&pCollector->b, 0); - dlwInit(&pCollector->dlw, iType, &pCollector->b); - plwInit(&pCollector->plw, &pCollector->dlw, iDocid); - return pCollector; -} -static void dlcDelete(DLCollector *pCollector){ - plwDestroy(&pCollector->plw); - dlwDestroy(&pCollector->dlw); - dataBufferDestroy(&pCollector->b); - SCRAMBLE(pCollector); - sqlite3_free(pCollector); -} - - -/* Copy the doclist data of iType in pData/nData into *out, trimming -** unnecessary data as we go. Only columns matching iColumn are -** copied, all columns copied if iColumn is -1. Elements with no -** matching columns are dropped. The output is an iOutType doclist. -*/ -/* NOTE(shess) This code is only valid after all doclists are merged. -** If this is run before merges, then doclist items which represent -** deletion will be trimmed, and will thus not effect a deletion -** during the merge. -*/ -static void docListTrim(DocListType iType, const char *pData, int nData, - int iColumn, DocListType iOutType, DataBuffer *out){ - DLReader dlReader; - DLWriter dlWriter; - - assert( iOutType<=iType ); - - dlrInit(&dlReader, iType, pData, nData); - dlwInit(&dlWriter, iOutType, out); - - while( !dlrAtEnd(&dlReader) ){ - PLReader plReader; - PLWriter plWriter; - int match = 0; - - plrInit(&plReader, &dlReader); - - while( !plrAtEnd(&plReader) ){ - if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ - if( !match ){ - plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader)); - match = 1; - } - plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), - plrStartOffset(&plReader), plrEndOffset(&plReader)); - } - plrStep(&plReader); - } - if( match ){ - plwTerminate(&plWriter); - plwDestroy(&plWriter); - } - - plrDestroy(&plReader); - dlrStep(&dlReader); - } - dlwDestroy(&dlWriter); - dlrDestroy(&dlReader); -} - -/* Used by docListMerge() to keep doclists in the ascending order by -** docid, then ascending order by age (so the newest comes first). -*/ -typedef struct OrderedDLReader { - DLReader *pReader; - - /* TODO(shess) If we assume that docListMerge pReaders is ordered by - ** age (which we do), then we could use pReader comparisons to break - ** ties. - */ - int idx; -} OrderedDLReader; - -/* Order eof to end, then by docid asc, idx desc. */ -static int orderedDLReaderCmp(OrderedDLReader *r1, OrderedDLReader *r2){ - if( dlrAtEnd(r1->pReader) ){ - if( dlrAtEnd(r2->pReader) ) return 0; /* Both atEnd(). */ - return 1; /* Only r1 atEnd(). */ - } - if( dlrAtEnd(r2->pReader) ) return -1; /* Only r2 atEnd(). */ - - if( dlrDocid(r1->pReader)pReader) ) return -1; - if( dlrDocid(r1->pReader)>dlrDocid(r2->pReader) ) return 1; - - /* Descending on idx. */ - return r2->idx-r1->idx; -} - -/* Bubble p[0] to appropriate place in p[1..n-1]. Assumes that -** p[1..n-1] is already sorted. -*/ -/* TODO(shess) Is this frequent enough to warrant a binary search? -** Before implementing that, instrument the code to check. In most -** current usage, I expect that p[0] will be less than p[1] a very -** high proportion of the time. -*/ -static void orderedDLReaderReorder(OrderedDLReader *p, int n){ - while( n>1 && orderedDLReaderCmp(p, p+1)>0 ){ - OrderedDLReader tmp = p[0]; - p[0] = p[1]; - p[1] = tmp; - n--; - p++; - } -} - -/* Given an array of doclist readers, merge their doclist elements -** into out in sorted order (by docid), dropping elements from older -** readers when there is a duplicate docid. pReaders is assumed to be -** ordered by age, oldest first. -*/ -/* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably -** be fixed. -*/ -static void docListMerge(DataBuffer *out, - DLReader *pReaders, int nReaders){ - OrderedDLReader readers[MERGE_COUNT]; - DLWriter writer; - int i, n; - const char *pStart = 0; - int nStart = 0; - sqlite_int64 iFirstDocid = 0, iLastDocid = 0; - - assert( nReaders>0 ); - if( nReaders==1 ){ - dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); - return; - } - - assert( nReaders<=MERGE_COUNT ); - n = 0; - for(i=0; i0 ){ - orderedDLReaderReorder(readers+i, nReaders-i); - } - - dlwInit(&writer, pReaders[0].iType, out); - while( !dlrAtEnd(readers[0].pReader) ){ - sqlite_int64 iDocid = dlrDocid(readers[0].pReader); - - /* If this is a continuation of the current buffer to copy, extend - ** that buffer. memcpy() seems to be more efficient if it has a - ** lots of data to copy. - */ - if( dlrDocData(readers[0].pReader)==pStart+nStart ){ - nStart += dlrDocDataBytes(readers[0].pReader); - }else{ - if( pStart!=0 ){ - dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); - } - pStart = dlrDocData(readers[0].pReader); - nStart = dlrDocDataBytes(readers[0].pReader); - iFirstDocid = iDocid; - } - iLastDocid = iDocid; - dlrStep(readers[0].pReader); - - /* Drop all of the older elements with the same docid. */ - for(i=1; i0 ){ - orderedDLReaderReorder(readers+i, nReaders-i); - } - } - - /* Copy over any remaining elements. */ - if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); - dlwDestroy(&writer); -} - -/* Helper function for posListUnion(). Compares the current position -** between left and right, returning as standard C idiom of <0 if -** left0 if left>right, and 0 if left==right. "End" always -** compares greater. -*/ -static int posListCmp(PLReader *pLeft, PLReader *pRight){ - assert( pLeft->iType==pRight->iType ); - if( pLeft->iType==DL_DOCIDS ) return 0; - - if( plrAtEnd(pLeft) ) return plrAtEnd(pRight) ? 0 : 1; - if( plrAtEnd(pRight) ) return -1; - - if( plrColumn(pLeft)plrColumn(pRight) ) return 1; - - if( plrPosition(pLeft)plrPosition(pRight) ) return 1; - if( pLeft->iType==DL_POSITIONS ) return 0; - - if( plrStartOffset(pLeft)plrStartOffset(pRight) ) return 1; - - if( plrEndOffset(pLeft)plrEndOffset(pRight) ) return 1; - - return 0; -} - -/* Write the union of position lists in pLeft and pRight to pOut. -** "Union" in this case meaning "All unique position tuples". Should -** work with any doclist type, though both inputs and the output -** should be the same type. -*/ -static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ - PLReader left, right; - PLWriter writer; - - assert( dlrDocid(pLeft)==dlrDocid(pRight) ); - assert( pLeft->iType==pRight->iType ); - assert( pLeft->iType==pOut->iType ); - - plrInit(&left, pLeft); - plrInit(&right, pRight); - plwInit(&writer, pOut, dlrDocid(pLeft)); - - while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ - int c = posListCmp(&left, &right); - if( c<0 ){ - plwCopy(&writer, &left); - plrStep(&left); - }else if( c>0 ){ - plwCopy(&writer, &right); - plrStep(&right); - }else{ - plwCopy(&writer, &left); - plrStep(&left); - plrStep(&right); - } - } - - plwTerminate(&writer); - plwDestroy(&writer); - plrDestroy(&left); - plrDestroy(&right); -} - -/* Write the union of doclists in pLeft and pRight to pOut. For -** docids in common between the inputs, the union of the position -** lists is written. Inputs and outputs are always type DL_DEFAULT. -*/ -static void docListUnion( - const char *pLeft, int nLeft, - const char *pRight, int nRight, - DataBuffer *pOut /* Write the combined doclist here */ -){ - DLReader left, right; - DLWriter writer; - - if( nLeft==0 ){ - if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); - return; - } - if( nRight==0 ){ - dataBufferAppend(pOut, pLeft, nLeft); - return; - } - - dlrInit(&left, DL_DEFAULT, pLeft, nLeft); - dlrInit(&right, DL_DEFAULT, pRight, nRight); - dlwInit(&writer, DL_DEFAULT, pOut); - - while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ - if( dlrAtEnd(&right) ){ - dlwCopy(&writer, &left); - dlrStep(&left); - }else if( dlrAtEnd(&left) ){ - dlwCopy(&writer, &right); - dlrStep(&right); - }else if( dlrDocid(&left)dlrDocid(&right) ){ - dlwCopy(&writer, &right); - dlrStep(&right); - }else{ - posListUnion(&left, &right, &writer); - dlrStep(&left); - dlrStep(&right); - } - } - - dlrDestroy(&left); - dlrDestroy(&right); - dlwDestroy(&writer); -} - -/* pLeft and pRight are DLReaders positioned to the same docid. -** -** If there are no instances in pLeft or pRight where the position -** of pLeft is one less than the position of pRight, then this -** routine adds nothing to pOut. -** -** If there are one or more instances where positions from pLeft -** are exactly one less than positions from pRight, then add a new -** document record to pOut. If pOut wants to hold positions, then -** include the positions from pRight that are one more than a -** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. -*/ -static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight, - DLWriter *pOut){ - PLReader left, right; - PLWriter writer; - int match = 0; - - assert( dlrDocid(pLeft)==dlrDocid(pRight) ); - assert( pOut->iType!=DL_POSITIONS_OFFSETS ); - - plrInit(&left, pLeft); - plrInit(&right, pRight); - - while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ - if( plrColumn(&left)plrColumn(&right) ){ - plrStep(&right); - }else if( plrPosition(&left)+1plrPosition(&right) ){ - plrStep(&right); - }else{ - if( !match ){ - plwInit(&writer, pOut, dlrDocid(pLeft)); - match = 1; - } - plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); - plrStep(&left); - plrStep(&right); - } - } - - if( match ){ - plwTerminate(&writer); - plwDestroy(&writer); - } - - plrDestroy(&left); - plrDestroy(&right); -} - -/* We have two doclists with positions: pLeft and pRight. -** Write the phrase intersection of these two doclists into pOut. -** -** A phrase intersection means that two documents only match -** if pLeft.iPos+1==pRight.iPos. -** -** iType controls the type of data written to pOut. If iType is -** DL_POSITIONS, the positions are those from pRight. -*/ -static void docListPhraseMerge( - const char *pLeft, int nLeft, - const char *pRight, int nRight, - DocListType iType, - DataBuffer *pOut /* Write the combined doclist here */ -){ - DLReader left, right; - DLWriter writer; - - if( nLeft==0 || nRight==0 ) return; - - assert( iType!=DL_POSITIONS_OFFSETS ); - - dlrInit(&left, DL_POSITIONS, pLeft, nLeft); - dlrInit(&right, DL_POSITIONS, pRight, nRight); - dlwInit(&writer, iType, pOut); - - while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ - if( dlrDocid(&left) one AND (two OR three) - * [one OR two three] ==> (one OR two) AND three - * - * A "-" before a term matches all entries that lack that term. - * The "-" must occur immediately before the term with in intervening - * space. This is how the search engines do it. - * - * A NOT term cannot be the right-hand operand of an OR. If this - * occurs in the query string, the NOT is ignored: - * - * [one OR -two] ==> one OR two - * - */ -typedef struct Query { - fulltext_vtab *pFts; /* The full text index */ - int nTerms; /* Number of terms in the query */ - QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */ - int nextIsOr; /* Set the isOr flag on the next inserted term */ - int nextColumn; /* Next word parsed must be in this column */ - int dfltColumn; /* The default column */ -} Query; - - -/* -** An instance of the following structure keeps track of generated -** matching-word offset information and snippets. -*/ -typedef struct Snippet { - int nMatch; /* Total number of matches */ - int nAlloc; /* Space allocated for aMatch[] */ - struct snippetMatch { /* One entry for each matching term */ - char snStatus; /* Status flag for use while constructing snippets */ - short int iCol; /* The column that contains the match */ - short int iTerm; /* The index in Query.pTerms[] of the matching term */ - short int nByte; /* Number of bytes in the term */ - int iStart; /* The offset to the first character of the term */ - } *aMatch; /* Points to space obtained from malloc */ - char *zOffset; /* Text rendering of aMatch[] */ - int nOffset; /* strlen(zOffset) */ - char *zSnippet; /* Snippet text */ - int nSnippet; /* strlen(zSnippet) */ -} Snippet; - - -typedef enum QueryType { - QUERY_GENERIC, /* table scan */ - QUERY_ROWID, /* lookup by rowid */ - QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/ -} QueryType; - -typedef enum fulltext_statement { - CONTENT_INSERT_STMT, - CONTENT_SELECT_STMT, - CONTENT_UPDATE_STMT, - CONTENT_DELETE_STMT, - CONTENT_EXISTS_STMT, - - BLOCK_INSERT_STMT, - BLOCK_SELECT_STMT, - BLOCK_DELETE_STMT, - BLOCK_DELETE_ALL_STMT, - - SEGDIR_MAX_INDEX_STMT, - SEGDIR_SET_STMT, - SEGDIR_SELECT_LEVEL_STMT, - SEGDIR_SPAN_STMT, - SEGDIR_DELETE_STMT, - SEGDIR_SELECT_SEGMENT_STMT, - SEGDIR_SELECT_ALL_STMT, - SEGDIR_DELETE_ALL_STMT, - SEGDIR_COUNT_STMT, - - MAX_STMT /* Always at end! */ -} fulltext_statement; - -/* These must exactly match the enum above. */ -/* TODO(shess): Is there some risk that a statement will be used in two -** cursors at once, e.g. if a query joins a virtual table to itself? -** If so perhaps we should move some of these to the cursor object. -*/ -static const char *const fulltext_zStatement[MAX_STMT] = { - /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */ - /* CONTENT_SELECT */ "select * from %_content where rowid = ?", - /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */ - /* CONTENT_DELETE */ "delete from %_content where rowid = ?", - /* CONTENT_EXISTS */ "select rowid from %_content limit 1", - - /* BLOCK_INSERT */ "insert into %_segments values (?)", - /* BLOCK_SELECT */ "select block from %_segments where rowid = ?", - /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?", - /* BLOCK_DELETE_ALL */ "delete from %_segments", - - /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", - /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", - /* SEGDIR_SELECT_LEVEL */ - "select start_block, leaves_end_block, root from %_segdir " - " where level = ? order by idx", - /* SEGDIR_SPAN */ - "select min(start_block), max(end_block) from %_segdir " - " where level = ? and start_block <> 0", - /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", - - /* NOTE(shess): The first three results of the following two - ** statements must match. - */ - /* SEGDIR_SELECT_SEGMENT */ - "select start_block, leaves_end_block, root from %_segdir " - " where level = ? and idx = ?", - /* SEGDIR_SELECT_ALL */ - "select start_block, leaves_end_block, root from %_segdir " - " order by level desc, idx asc", - /* SEGDIR_DELETE_ALL */ "delete from %_segdir", - /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir", -}; - -/* -** A connection to a fulltext index is an instance of the following -** structure. The xCreate and xConnect methods create an instance -** of this structure and xDestroy and xDisconnect free that instance. -** All other methods receive a pointer to the structure as one of their -** arguments. -*/ -struct fulltext_vtab { - sqlite3_vtab base; /* Base class used by SQLite core */ - sqlite3 *db; /* The database connection */ - const char *zDb; /* logical database name */ - const char *zName; /* virtual table name */ - int nColumn; /* number of columns in virtual table */ - char **azColumn; /* column names. malloced */ - char **azContentColumn; /* column names in content table; malloced */ - sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ - - /* Precompiled statements which we keep as long as the table is - ** open. - */ - sqlite3_stmt *pFulltextStatements[MAX_STMT]; - - /* Precompiled statements used for segment merges. We run a - ** separate select across the leaf level of each tree being merged. - */ - sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT]; - /* The statement used to prepare pLeafSelectStmts. */ -#define LEAF_SELECT \ - "select block from %_segments where rowid between ? and ? order by rowid" - - /* These buffer pending index updates during transactions. - ** nPendingData estimates the memory size of the pending data. It - ** doesn't include the hash-bucket overhead, nor any malloc - ** overhead. When nPendingData exceeds kPendingThreshold, the - ** buffer is flushed even before the transaction closes. - ** pendingTerms stores the data, and is only valid when nPendingData - ** is >=0 (nPendingData<0 means pendingTerms has not been - ** initialized). iPrevDocid is the last docid written, used to make - ** certain we're inserting in sorted order. - */ - int nPendingData; -#define kPendingThreshold (1*1024*1024) - sqlite_int64 iPrevDocid; - fts2Hash pendingTerms; -}; - -/* -** When the core wants to do a query, it create a cursor using a -** call to xOpen. This structure is an instance of a cursor. It -** is destroyed by xClose. -*/ -typedef struct fulltext_cursor { - sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */ - sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ - int eof; /* True if at End Of Results */ - Query q; /* Parsed query string */ - Snippet snippet; /* Cached snippet for the current row */ - int iColumn; /* Column being searched */ - DataBuffer result; /* Doclist results from fulltextQuery */ - DLReader reader; /* Result reader if result not empty */ -} fulltext_cursor; - -static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ - return (fulltext_vtab *) c->base.pVtab; -} - -static const sqlite3_module fts2Module; /* forward declaration */ - -/* Return a dynamically generated statement of the form - * insert into %_content (rowid, ...) values (?, ...) - */ -static const char *contentInsertStatement(fulltext_vtab *v){ - StringBuffer sb; - int i; - - initStringBuffer(&sb); - append(&sb, "insert into %_content (rowid, "); - appendList(&sb, v->nColumn, v->azContentColumn); - append(&sb, ") values (?"); - for(i=0; inColumn; ++i) - append(&sb, ", ?"); - append(&sb, ")"); - return stringBufferData(&sb); -} - -/* Return a dynamically generated statement of the form - * update %_content set [col_0] = ?, [col_1] = ?, ... - * where rowid = ? - */ -static const char *contentUpdateStatement(fulltext_vtab *v){ - StringBuffer sb; - int i; - - initStringBuffer(&sb); - append(&sb, "update %_content set "); - for(i=0; inColumn; ++i) { - if( i>0 ){ - append(&sb, ", "); - } - append(&sb, v->azContentColumn[i]); - append(&sb, " = ?"); - } - append(&sb, " where rowid = ?"); - return stringBufferData(&sb); -} - -/* Puts a freshly-prepared statement determined by iStmt in *ppStmt. -** If the indicated statement has never been prepared, it is prepared -** and cached, otherwise the cached version is reset. -*/ -static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - assert( iStmtpFulltextStatements[iStmt]==NULL ){ - const char *zStmt; - int rc; - switch( iStmt ){ - case CONTENT_INSERT_STMT: - zStmt = contentInsertStatement(v); break; - case CONTENT_UPDATE_STMT: - zStmt = contentUpdateStatement(v); break; - default: - zStmt = fulltext_zStatement[iStmt]; - } - rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt], - zStmt); - if( zStmt != fulltext_zStatement[iStmt]) sqlite3_free((void *) zStmt); - if( rc!=SQLITE_OK ) return rc; - } else { - int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); - if( rc!=SQLITE_OK ) return rc; - } - - *ppStmt = v->pFulltextStatements[iStmt]; - return SQLITE_OK; -} - -/* Like sqlite3_step(), but convert SQLITE_DONE to SQLITE_OK and -** SQLITE_ROW to SQLITE_ERROR. Useful for statements like UPDATE, -** where we expect no results. -*/ -static int sql_single_step(sqlite3_stmt *s){ - int rc = sqlite3_step(s); - return (rc==SQLITE_DONE) ? SQLITE_OK : rc; -} - -/* Like sql_get_statement(), but for special replicated LEAF_SELECT -** statements. idx -1 is a special case for an uncached version of -** the statement (used in the optimize implementation). -*/ -/* TODO(shess) Write version for generic statements and then share -** that between the cached-statement functions. -*/ -static int sql_get_leaf_statement(fulltext_vtab *v, int idx, - sqlite3_stmt **ppStmt){ - assert( idx>=-1 && idxdb, v->zDb, v->zName, ppStmt, LEAF_SELECT); - }else if( v->pLeafSelectStmts[idx]==NULL ){ - int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx], - LEAF_SELECT); - if( rc!=SQLITE_OK ) return rc; - }else{ - int rc = sqlite3_reset(v->pLeafSelectStmts[idx]); - if( rc!=SQLITE_OK ) return rc; - } - - *ppStmt = v->pLeafSelectStmts[idx]; - return SQLITE_OK; -} - -/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */ -static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, - sqlite3_value **pValues){ - sqlite3_stmt *s; - int i; - int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_value(s, 1, rowid); - if( rc!=SQLITE_OK ) return rc; - - for(i=0; inColumn; ++i){ - rc = sqlite3_bind_value(s, 2+i, pValues[i]); - if( rc!=SQLITE_OK ) return rc; - } - - return sql_single_step(s); -} - -/* update %_content set col0 = pValues[0], col1 = pValues[1], ... - * where rowid = [iRowid] */ -static int content_update(fulltext_vtab *v, sqlite3_value **pValues, - sqlite_int64 iRowid){ - sqlite3_stmt *s; - int i; - int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - for(i=0; inColumn; ++i){ - rc = sqlite3_bind_value(s, 1+i, pValues[i]); - if( rc!=SQLITE_OK ) return rc; - } - - rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -static void freeStringArray(int nString, const char **pString){ - int i; - - for (i=0 ; i < nString ; ++i) { - if( pString[i]!=NULL ) sqlite3_free((void *) pString[i]); - } - sqlite3_free((void *) pString); -} - -/* select * from %_content where rowid = [iRow] - * The caller must delete the returned array and all strings in it. - * null fields will be NULL in the returned array. - * - * TODO: Perhaps we should return pointer/length strings here for consistency - * with other code which uses pointer/length. */ -static int content_select(fulltext_vtab *v, sqlite_int64 iRow, - const char ***pValues){ - sqlite3_stmt *s; - const char **values; - int i; - int rc; - - *pValues = NULL; - - rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc!=SQLITE_ROW ) return rc; - - values = (const char **) sqlite3_malloc(v->nColumn * sizeof(const char *)); - for(i=0; inColumn; ++i){ - if( sqlite3_column_type(s, i)==SQLITE_NULL ){ - values[i] = NULL; - }else{ - values[i] = string_dup((char*)sqlite3_column_text(s, i)); - } - } - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ){ - *pValues = values; - return SQLITE_OK; - } - - freeStringArray(v->nColumn, values); - return rc; -} - -/* delete from %_content where rowid = [iRow ] */ -static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Returns SQLITE_ROW if any rows exist in %_content, SQLITE_DONE if -** no rows exist, and any error in case of failure. -*/ -static int content_exists(fulltext_vtab *v){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, CONTENT_EXISTS_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc!=SQLITE_ROW ) return rc; - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ) return SQLITE_ROW; - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - return rc; -} - -/* insert into %_segments values ([pData]) -** returns assigned rowid in *piBlockid -*/ -static int block_insert(fulltext_vtab *v, const char *pData, int nData, - sqlite_int64 *piBlockid){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, BLOCK_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 1, pData, nData, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - if( rc!=SQLITE_DONE ) return rc; - - *piBlockid = sqlite3_last_insert_rowid(v->db); - return SQLITE_OK; -} - -/* delete from %_segments -** where rowid between [iStartBlockid] and [iEndBlockid] -** -** Deletes the range of blocks, inclusive, used to delete the blocks -** which form a segment. -*/ -static int block_delete(fulltext_vtab *v, - sqlite_int64 iStartBlockid, sqlite_int64 iEndBlockid){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, BLOCK_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iStartBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Returns SQLITE_ROW with *pidx set to the maximum segment idx found -** at iLevel. Returns SQLITE_DONE if there are no segments at -** iLevel. Otherwise returns an error. -*/ -static int segdir_max_index(fulltext_vtab *v, int iLevel, int *pidx){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_MAX_INDEX_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - /* Should always get at least one row due to how max() works. */ - if( rc==SQLITE_DONE ) return SQLITE_DONE; - if( rc!=SQLITE_ROW ) return rc; - - /* NULL means that there were no inputs to max(). */ - if( SQLITE_NULL==sqlite3_column_type(s, 0) ){ - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - return rc; - } - - *pidx = sqlite3_column_int(s, 0); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - if( rc!=SQLITE_DONE ) return rc; - return SQLITE_ROW; -} - -/* insert into %_segdir values ( -** [iLevel], [idx], -** [iStartBlockid], [iLeavesEndBlockid], [iEndBlockid], -** [pRootData] -** ) -*/ -static int segdir_set(fulltext_vtab *v, int iLevel, int idx, - sqlite_int64 iStartBlockid, - sqlite_int64 iLeavesEndBlockid, - sqlite_int64 iEndBlockid, - const char *pRootData, int nRootData){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_SET_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 2, idx); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 3, iStartBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 4, iLeavesEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 5, iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 6, pRootData, nRootData, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Queries %_segdir for the block span of the segments in level -** iLevel. Returns SQLITE_DONE if there are no blocks for iLevel, -** SQLITE_ROW if there are blocks, else an error. -*/ -static int segdir_span(fulltext_vtab *v, int iLevel, - sqlite_int64 *piStartBlockid, - sqlite_int64 *piEndBlockid){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_SPAN_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ) return SQLITE_DONE; /* Should never happen */ - if( rc!=SQLITE_ROW ) return rc; - - /* This happens if all segments at this level are entirely inline. */ - if( SQLITE_NULL==sqlite3_column_type(s, 0) ){ - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - int rc2 = sqlite3_step(s); - if( rc2==SQLITE_ROW ) return SQLITE_ERROR; - return rc2; - } - - *piStartBlockid = sqlite3_column_int64(s, 0); - *piEndBlockid = sqlite3_column_int64(s, 1); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - if( rc!=SQLITE_DONE ) return rc; - return SQLITE_ROW; -} - -/* Delete the segment blocks and segment directory records for all -** segments at iLevel. -*/ -static int segdir_delete(fulltext_vtab *v, int iLevel){ - sqlite3_stmt *s; - sqlite_int64 iStartBlockid, iEndBlockid; - int rc = segdir_span(v, iLevel, &iStartBlockid, &iEndBlockid); - if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc; - - if( rc==SQLITE_ROW ){ - rc = block_delete(v, iStartBlockid, iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - } - - /* Delete the segment directory itself. */ - rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Delete entire fts index, SQLITE_OK on success, relevant error on -** failure. -*/ -static int segdir_delete_all(fulltext_vtab *v){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_DELETE_ALL_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_single_step(s); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Returns SQLITE_OK with *pnSegments set to the number of entries in -** %_segdir and *piMaxLevel set to the highest level which has a -** segment. Otherwise returns the SQLite error which caused failure. -*/ -static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - /* TODO(shess): This case should not be possible? Should stronger - ** measures be taken if it happens? - */ - if( rc==SQLITE_DONE ){ - *pnSegments = 0; - *piMaxLevel = 0; - return SQLITE_OK; - } - if( rc!=SQLITE_ROW ) return rc; - - *pnSegments = sqlite3_column_int(s, 0); - *piMaxLevel = sqlite3_column_int(s, 1); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ) return SQLITE_OK; - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - return rc; -} - -/* TODO(shess) clearPendingTerms() is far down the file because -** writeZeroSegment() is far down the file because LeafWriter is far -** down the file. Consider refactoring the code to move the non-vtab -** code above the vtab code so that we don't need this forward -** reference. -*/ -static int clearPendingTerms(fulltext_vtab *v); - -/* -** Free the memory used to contain a fulltext_vtab structure. -*/ -static void fulltext_vtab_destroy(fulltext_vtab *v){ - int iStmt, i; - - TRACE(("FTS2 Destroy %p\n", v)); - for( iStmt=0; iStmtpFulltextStatements[iStmt]!=NULL ){ - sqlite3_finalize(v->pFulltextStatements[iStmt]); - v->pFulltextStatements[iStmt] = NULL; - } - } - - for( i=0; ipLeafSelectStmts[i]!=NULL ){ - sqlite3_finalize(v->pLeafSelectStmts[i]); - v->pLeafSelectStmts[i] = NULL; - } - } - - if( v->pTokenizer!=NULL ){ - v->pTokenizer->pModule->xDestroy(v->pTokenizer); - v->pTokenizer = NULL; - } - - clearPendingTerms(v); - - sqlite3_free(v->azColumn); - for(i = 0; i < v->nColumn; ++i) { - sqlite3_free(v->azContentColumn[i]); - } - sqlite3_free(v->azContentColumn); - sqlite3_free(v); -} - -/* -** Token types for parsing the arguments to xConnect or xCreate. -*/ -#define TOKEN_EOF 0 /* End of file */ -#define TOKEN_SPACE 1 /* Any kind of whitespace */ -#define TOKEN_ID 2 /* An identifier */ -#define TOKEN_STRING 3 /* A string literal */ -#define TOKEN_PUNCT 4 /* A single punctuation character */ - -/* -** If X is a character that can be used in an identifier then -** IdChar(X) will be true. Otherwise it is false. -** -** For ASCII, any character with the high-order bit set is -** allowed in an identifier. For 7-bit characters, -** sqlite3IsIdChar[X] must be 1. -** -** Ticket #1066. the SQL standard does not allow '$' in the -** middle of identfiers. But many SQL implementations do. -** SQLite will allow '$' in identifiers for compatibility. -** But the feature is undocumented. -*/ -static const char isIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20])) - - -/* -** Return the length of the token that begins at z[0]. -** Store the token type in *tokenType before returning. -*/ -static int getToken(const char *z, int *tokenType){ - int i, c; - switch( *z ){ - case 0: { - *tokenType = TOKEN_EOF; - return 0; - } - case ' ': case '\t': case '\n': case '\f': case '\r': { - for(i=1; safe_isspace(z[i]); i++){} - *tokenType = TOKEN_SPACE; - return i; - } - case '`': - case '\'': - case '"': { - int delim = z[0]; - for(i=1; (c=z[i])!=0; i++){ - if( c==delim ){ - if( z[i+1]==delim ){ - i++; - }else{ - break; - } - } - } - *tokenType = TOKEN_STRING; - return i + (c!=0); - } - case '[': { - for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} - *tokenType = TOKEN_ID; - return i; - } - default: { - if( !IdChar(*z) ){ - break; - } - for(i=1; IdChar(z[i]); i++){} - *tokenType = TOKEN_ID; - return i; - } - } - *tokenType = TOKEN_PUNCT; - return 1; -} - -/* -** A token extracted from a string is an instance of the following -** structure. -*/ -typedef struct Token { - const char *z; /* Pointer to token text. Not '\000' terminated */ - short int n; /* Length of the token text in bytes. */ -} Token; - -/* -** Given a input string (which is really one of the argv[] parameters -** passed into xConnect or xCreate) split the string up into tokens. -** Return an array of pointers to '\000' terminated strings, one string -** for each non-whitespace token. -** -** The returned array is terminated by a single NULL pointer. -** -** Space to hold the returned array is obtained from a single -** malloc and should be freed by passing the return value to free(). -** The individual strings within the token list are all a part of -** the single memory allocation and will all be freed at once. -*/ -static char **tokenizeString(const char *z, int *pnToken){ - int nToken = 0; - Token *aToken = sqlite3_malloc( strlen(z) * sizeof(aToken[0]) ); - int n = 1; - int e, i; - int totalSize = 0; - char **azToken; - char *zCopy; - while( n>0 ){ - n = getToken(z, &e); - if( e!=TOKEN_SPACE ){ - aToken[nToken].z = z; - aToken[nToken].n = n; - nToken++; - totalSize += n+1; - } - z += n; - } - azToken = (char**)sqlite3_malloc( nToken*sizeof(char*) + totalSize ); - zCopy = (char*)&azToken[nToken]; - nToken--; - for(i=0; i=0 ){ - azIn[j] = azIn[i]; - } - j++; - } - } - azIn[j] = 0; - } -} - - -/* -** Find the first alphanumeric token in the string zIn. Null-terminate -** this token. Remove any quotation marks. And return a pointer to -** the result. -*/ -static char *firstToken(char *zIn, char **pzTail){ - int n, ttype; - while(1){ - n = getToken(zIn, &ttype); - if( ttype==TOKEN_SPACE ){ - zIn += n; - }else if( ttype==TOKEN_EOF ){ - *pzTail = zIn; - return 0; - }else{ - zIn[n] = 0; - *pzTail = &zIn[1]; - dequoteString(zIn); - return zIn; - } - } - /*NOTREACHED*/ -} - -/* Return true if... -** -** * s begins with the string t, ignoring case -** * s is longer than t -** * The first character of s beyond t is not a alphanumeric -** -** Ignore leading space in *s. -** -** To put it another way, return true if the first token of -** s[] is t[]. -*/ -static int startsWith(const char *s, const char *t){ - while( safe_isspace(*s) ){ s++; } - while( *t ){ - if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0; - } - return *s!='_' && !safe_isalnum(*s); -} - -/* -** An instance of this structure defines the "spec" of a -** full text index. This structure is populated by parseSpec -** and use by fulltextConnect and fulltextCreate. -*/ -typedef struct TableSpec { - const char *zDb; /* Logical database name */ - const char *zName; /* Name of the full-text index */ - int nColumn; /* Number of columns to be indexed */ - char **azColumn; /* Original names of columns to be indexed */ - char **azContentColumn; /* Column names for %_content */ - char **azTokenizer; /* Name of tokenizer and its arguments */ -} TableSpec; - -/* -** Reclaim all of the memory used by a TableSpec -*/ -static void clearTableSpec(TableSpec *p) { - sqlite3_free(p->azColumn); - sqlite3_free(p->azContentColumn); - sqlite3_free(p->azTokenizer); -} - -/* Parse a CREATE VIRTUAL TABLE statement, which looks like this: - * - * CREATE VIRTUAL TABLE email - * USING fts2(subject, body, tokenize mytokenizer(myarg)) - * - * We return parsed information in a TableSpec structure. - * - */ -static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, - char**pzErr){ - int i, n; - char *z, *zDummy; - char **azArg; - const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */ - - assert( argc>=3 ); - /* Current interface: - ** argv[0] - module name - ** argv[1] - database name - ** argv[2] - table name - ** argv[3..] - columns, optionally followed by tokenizer specification - ** and snippet delimiters specification. - */ - - /* Make a copy of the complete argv[][] array in a single allocation. - ** The argv[][] array is read-only and transient. We can write to the - ** copy in order to modify things and the copy is persistent. - */ - CLEAR(pSpec); - for(i=n=0; izDb = azArg[1]; - pSpec->zName = azArg[2]; - pSpec->nColumn = 0; - pSpec->azColumn = azArg; - zTokenizer = "tokenize simple"; - for(i=3; inColumn] = firstToken(azArg[i], &zDummy); - pSpec->nColumn++; - } - } - if( pSpec->nColumn==0 ){ - azArg[0] = "content"; - pSpec->nColumn = 1; - } - - /* - ** Construct the list of content column names. - ** - ** Each content column name will be of the form cNNAAAA - ** where NN is the column number and AAAA is the sanitized - ** column name. "sanitized" means that special characters are - ** converted to "_". The cNN prefix guarantees that all column - ** names are unique. - ** - ** The AAAA suffix is not strictly necessary. It is included - ** for the convenience of people who might examine the generated - ** %_content table and wonder what the columns are used for. - */ - pSpec->azContentColumn = sqlite3_malloc( pSpec->nColumn * sizeof(char *) ); - if( pSpec->azContentColumn==0 ){ - clearTableSpec(pSpec); - return SQLITE_NOMEM; - } - for(i=0; inColumn; i++){ - char *p; - pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]); - for (p = pSpec->azContentColumn[i]; *p ; ++p) { - if( !safe_isalnum(*p) ) *p = '_'; - } - } - - /* - ** Parse the tokenizer specification string. - */ - pSpec->azTokenizer = tokenizeString(zTokenizer, &n); - tokenListToIdList(pSpec->azTokenizer); - - return SQLITE_OK; -} - -/* -** Generate a CREATE TABLE statement that describes the schema of -** the virtual table. Return a pointer to this schema string. -** -** Space is obtained from sqlite3_mprintf() and should be freed -** using sqlite3_free(). -*/ -static char *fulltextSchema( - int nColumn, /* Number of columns */ - const char *const* azColumn, /* List of columns */ - const char *zTableName /* Name of the table */ -){ - int i; - char *zSchema, *zNext; - const char *zSep = "("; - zSchema = sqlite3_mprintf("CREATE TABLE x"); - for(i=0; ibase */ - v->db = db; - v->zDb = spec->zDb; /* Freed when azColumn is freed */ - v->zName = spec->zName; /* Freed when azColumn is freed */ - v->nColumn = spec->nColumn; - v->azContentColumn = spec->azContentColumn; - spec->azContentColumn = 0; - v->azColumn = spec->azColumn; - spec->azColumn = 0; - - if( spec->azTokenizer==0 ){ - return SQLITE_NOMEM; - } - - zTok = spec->azTokenizer[0]; - if( !zTok ){ - zTok = "simple"; - } - nTok = strlen(zTok)+1; - - m = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zTok, nTok); - if( !m ){ - *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); - rc = SQLITE_ERROR; - goto err; - } - - for(n=0; spec->azTokenizer[n]; n++){} - if( n ){ - rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], - &v->pTokenizer); - }else{ - rc = m->xCreate(0, 0, &v->pTokenizer); - } - if( rc!=SQLITE_OK ) goto err; - v->pTokenizer->pModule = m; - - /* TODO: verify the existence of backing tables foo_content, foo_term */ - - schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, - spec->zName); - rc = sqlite3_declare_vtab(db, schema); - sqlite3_free(schema); - if( rc!=SQLITE_OK ) goto err; - - memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); - - /* Indicate that the buffer is not live. */ - v->nPendingData = -1; - - *ppVTab = &v->base; - TRACE(("FTS2 Connect %p\n", v)); - - return rc; - -err: - fulltext_vtab_destroy(v); - return rc; -} - -static int fulltextConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVTab, - char **pzErr -){ - TableSpec spec; - int rc = parseSpec(&spec, argc, argv, pzErr); - if( rc!=SQLITE_OK ) return rc; - - rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr); - clearTableSpec(&spec); - return rc; -} - -/* The %_content table holds the text of each document, with -** the rowid used as the docid. -*/ -/* TODO(shess) This comment needs elaboration to match the updated -** code. Work it into the top-of-file comment at that time. -*/ -static int fulltextCreate(sqlite3 *db, void *pAux, - int argc, const char * const *argv, - sqlite3_vtab **ppVTab, char **pzErr){ - int rc; - TableSpec spec; - StringBuffer schema; - TRACE(("FTS2 Create\n")); - - rc = parseSpec(&spec, argc, argv, pzErr); - if( rc!=SQLITE_OK ) return rc; - - initStringBuffer(&schema); - append(&schema, "CREATE TABLE %_content("); - appendList(&schema, spec.nColumn, spec.azContentColumn); - append(&schema, ")"); - rc = sql_exec(db, spec.zDb, spec.zName, stringBufferData(&schema)); - stringBufferDestroy(&schema); - if( rc!=SQLITE_OK ) goto out; - - rc = sql_exec(db, spec.zDb, spec.zName, - "create table %_segments(block blob);"); - if( rc!=SQLITE_OK ) goto out; - - rc = sql_exec(db, spec.zDb, spec.zName, - "create table %_segdir(" - " level integer," - " idx integer," - " start_block integer," - " leaves_end_block integer," - " end_block integer," - " root blob," - " primary key(level, idx)" - ");"); - if( rc!=SQLITE_OK ) goto out; - - rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr); - -out: - clearTableSpec(&spec); - return rc; -} - -/* Decide how to handle an SQL query. */ -static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ - int i; - TRACE(("FTS2 BestIndex\n")); - - for(i=0; inConstraint; ++i){ - const struct sqlite3_index_constraint *pConstraint; - pConstraint = &pInfo->aConstraint[i]; - if( pConstraint->usable ) { - if( pConstraint->iColumn==-1 && - pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ - pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */ - TRACE(("FTS2 QUERY_ROWID\n")); - } else if( pConstraint->iColumn>=0 && - pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ - /* full-text search */ - pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn; - TRACE(("FTS2 QUERY_FULLTEXT %d\n", pConstraint->iColumn)); - } else continue; - - pInfo->aConstraintUsage[i].argvIndex = 1; - pInfo->aConstraintUsage[i].omit = 1; - - /* An arbitrary value for now. - * TODO: Perhaps rowid matches should be considered cheaper than - * full-text searches. */ - pInfo->estimatedCost = 1.0; - - return SQLITE_OK; - } - } - pInfo->idxNum = QUERY_GENERIC; - return SQLITE_OK; -} - -static int fulltextDisconnect(sqlite3_vtab *pVTab){ - TRACE(("FTS2 Disconnect %p\n", pVTab)); - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextDestroy(sqlite3_vtab *pVTab){ - fulltext_vtab *v = (fulltext_vtab *)pVTab; - int rc; - - TRACE(("FTS2 Destroy %p\n", pVTab)); - rc = sql_exec(v->db, v->zDb, v->zName, - "drop table if exists %_content;" - "drop table if exists %_segments;" - "drop table if exists %_segdir;" - ); - if( rc!=SQLITE_OK ) return rc; - - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ - fulltext_cursor *c; - - c = (fulltext_cursor *) sqlite3_malloc(sizeof(fulltext_cursor)); - if( c ){ - memset(c, 0, sizeof(fulltext_cursor)); - /* sqlite will initialize c->base */ - *ppCursor = &c->base; - TRACE(("FTS2 Open %p: %p\n", pVTab, c)); - return SQLITE_OK; - }else{ - return SQLITE_NOMEM; - } -} - - -/* Free all of the dynamically allocated memory held by *q -*/ -static void queryClear(Query *q){ - int i; - for(i = 0; i < q->nTerms; ++i){ - sqlite3_free(q->pTerms[i].pTerm); - } - sqlite3_free(q->pTerms); - CLEAR(q); -} - -/* Free all of the dynamically allocated memory held by the -** Snippet -*/ -static void snippetClear(Snippet *p){ - sqlite3_free(p->aMatch); - sqlite3_free(p->zOffset); - sqlite3_free(p->zSnippet); - CLEAR(p); -} -/* -** Append a single entry to the p->aMatch[] log. -*/ -static void snippetAppendMatch( - Snippet *p, /* Append the entry to this snippet */ - int iCol, int iTerm, /* The column and query term */ - int iStart, int nByte /* Offset and size of the match */ -){ - int i; - struct snippetMatch *pMatch; - if( p->nMatch+1>=p->nAlloc ){ - p->nAlloc = p->nAlloc*2 + 10; - p->aMatch = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); - if( p->aMatch==0 ){ - p->nMatch = 0; - p->nAlloc = 0; - return; - } - } - i = p->nMatch++; - pMatch = &p->aMatch[i]; - pMatch->iCol = iCol; - pMatch->iTerm = iTerm; - pMatch->iStart = iStart; - pMatch->nByte = nByte; -} - -/* -** Sizing information for the circular buffer used in snippetOffsetsOfColumn() -*/ -#define FTS2_ROTOR_SZ (32) -#define FTS2_ROTOR_MASK (FTS2_ROTOR_SZ-1) - -/* -** Add entries to pSnippet->aMatch[] for every match that occurs against -** document zDoc[0..nDoc-1] which is stored in column iColumn. -*/ -static void snippetOffsetsOfColumn( - Query *pQuery, - Snippet *pSnippet, - int iColumn, - const char *zDoc, - int nDoc -){ - const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */ - sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */ - sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */ - fulltext_vtab *pVtab; /* The full text index */ - int nColumn; /* Number of columns in the index */ - const QueryTerm *aTerm; /* Query string terms */ - int nTerm; /* Number of query string terms */ - int i, j; /* Loop counters */ - int rc; /* Return code */ - unsigned int match, prevMatch; /* Phrase search bitmasks */ - const char *zToken; /* Next token from the tokenizer */ - int nToken; /* Size of zToken */ - int iBegin, iEnd, iPos; /* Offsets of beginning and end */ - - /* The following variables keep a circular buffer of the last - ** few tokens */ - unsigned int iRotor = 0; /* Index of current token */ - int iRotorBegin[FTS2_ROTOR_SZ]; /* Beginning offset of token */ - int iRotorLen[FTS2_ROTOR_SZ]; /* Length of token */ - - pVtab = pQuery->pFts; - nColumn = pVtab->nColumn; - pTokenizer = pVtab->pTokenizer; - pTModule = pTokenizer->pModule; - rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); - if( rc ) return; - pTCursor->pTokenizer = pTokenizer; - aTerm = pQuery->pTerms; - nTerm = pQuery->nTerms; - if( nTerm>=FTS2_ROTOR_SZ ){ - nTerm = FTS2_ROTOR_SZ - 1; - } - prevMatch = 0; - while(1){ - rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); - if( rc ) break; - iRotorBegin[iRotor&FTS2_ROTOR_MASK] = iBegin; - iRotorLen[iRotor&FTS2_ROTOR_MASK] = iEnd-iBegin; - match = 0; - for(i=0; i=0 && iColnToken ) continue; - if( !aTerm[i].isPrefix && aTerm[i].nTerm1 && (prevMatch & (1<=0; j--){ - int k = (iRotor-j) & FTS2_ROTOR_MASK; - snippetAppendMatch(pSnippet, iColumn, i-j, - iRotorBegin[k], iRotorLen[k]); - } - } - } - prevMatch = match<<1; - iRotor++; - } - pTModule->xClose(pTCursor); -} - - -/* -** Compute all offsets for the current row of the query. -** If the offsets have already been computed, this routine is a no-op. -*/ -static void snippetAllOffsets(fulltext_cursor *p){ - int nColumn; - int iColumn, i; - int iFirst, iLast; - fulltext_vtab *pFts; - - if( p->snippet.nMatch ) return; - if( p->q.nTerms==0 ) return; - pFts = p->q.pFts; - nColumn = pFts->nColumn; - iColumn = (p->iCursorType - QUERY_FULLTEXT); - if( iColumn<0 || iColumn>=nColumn ){ - iFirst = 0; - iLast = nColumn-1; - }else{ - iFirst = iColumn; - iLast = iColumn; - } - for(i=iFirst; i<=iLast; i++){ - const char *zDoc; - int nDoc; - zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1); - nDoc = sqlite3_column_bytes(p->pStmt, i+1); - snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc); - } -} - -/* -** Convert the information in the aMatch[] array of the snippet -** into the string zOffset[0..nOffset-1]. -*/ -static void snippetOffsetText(Snippet *p){ - int i; - int cnt = 0; - StringBuffer sb; - char zBuf[200]; - if( p->zOffset ) return; - initStringBuffer(&sb); - for(i=0; inMatch; i++){ - struct snippetMatch *pMatch = &p->aMatch[i]; - zBuf[0] = ' '; - sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d", - pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte); - append(&sb, zBuf); - cnt++; - } - p->zOffset = stringBufferData(&sb); - p->nOffset = stringBufferLength(&sb); -} - -/* -** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set -** of matching words some of which might be in zDoc. zDoc is column -** number iCol. -** -** iBreak is suggested spot in zDoc where we could begin or end an -** excerpt. Return a value similar to iBreak but possibly adjusted -** to be a little left or right so that the break point is better. -*/ -static int wordBoundary( - int iBreak, /* The suggested break point */ - const char *zDoc, /* Document text */ - int nDoc, /* Number of bytes in zDoc[] */ - struct snippetMatch *aMatch, /* Matching words */ - int nMatch, /* Number of entries in aMatch[] */ - int iCol /* The column number for zDoc[] */ -){ - int i; - if( iBreak<=10 ){ - return 0; - } - if( iBreak>=nDoc-10 ){ - return nDoc; - } - for(i=0; i0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ - return aMatch[i-1].iStart; - } - } - for(i=1; i<=10; i++){ - if( safe_isspace(zDoc[iBreak-i]) ){ - return iBreak - i + 1; - } - if( safe_isspace(zDoc[iBreak+i]) ){ - return iBreak + i + 1; - } - } - return iBreak; -} - - - -/* -** Allowed values for Snippet.aMatch[].snStatus -*/ -#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */ -#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */ - -/* -** Generate the text of a snippet. -*/ -static void snippetText( - fulltext_cursor *pCursor, /* The cursor we need the snippet for */ - const char *zStartMark, /* Markup to appear before each match */ - const char *zEndMark, /* Markup to appear after each match */ - const char *zEllipsis /* Ellipsis mark */ -){ - int i, j; - struct snippetMatch *aMatch; - int nMatch; - int nDesired; - StringBuffer sb; - int tailCol; - int tailOffset; - int iCol; - int nDoc; - const char *zDoc; - int iStart, iEnd; - int tailEllipsis = 0; - int iMatch; - - - sqlite3_free(pCursor->snippet.zSnippet); - pCursor->snippet.zSnippet = 0; - aMatch = pCursor->snippet.aMatch; - nMatch = pCursor->snippet.nMatch; - initStringBuffer(&sb); - - for(i=0; iq.nTerms; i++){ - for(j=0; j0; i++){ - if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue; - nDesired--; - iCol = aMatch[i].iCol; - zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1); - nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1); - iStart = aMatch[i].iStart - 40; - iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol); - if( iStart<=10 ){ - iStart = 0; - } - if( iCol==tailCol && iStart<=tailOffset+20 ){ - iStart = tailOffset; - } - if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ - trimWhiteSpace(&sb); - appendWhiteSpace(&sb); - append(&sb, zEllipsis); - appendWhiteSpace(&sb); - } - iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; - iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); - if( iEnd>=nDoc-10 ){ - iEnd = nDoc; - tailEllipsis = 0; - }else{ - tailEllipsis = 1; - } - while( iMatchsnippet.zSnippet = stringBufferData(&sb); - pCursor->snippet.nSnippet = stringBufferLength(&sb); -} - - -/* -** Close the cursor. For additional information see the documentation -** on the xClose method of the virtual table interface. -*/ -static int fulltextClose(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - TRACE(("FTS2 Close %p\n", c)); - sqlite3_finalize(c->pStmt); - queryClear(&c->q); - snippetClear(&c->snippet); - if( c->result.nData!=0 ) dlrDestroy(&c->reader); - dataBufferDestroy(&c->result); - sqlite3_free(c); - return SQLITE_OK; -} - -static int fulltextNext(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - int rc; - - TRACE(("FTS2 Next %p\n", pCursor)); - snippetClear(&c->snippet); - if( c->iCursorType < QUERY_FULLTEXT ){ - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - switch( rc ){ - case SQLITE_ROW: - c->eof = 0; - return SQLITE_OK; - case SQLITE_DONE: - c->eof = 1; - return SQLITE_OK; - default: - c->eof = 1; - return rc; - } - } else { /* full-text query */ - rc = sqlite3_reset(c->pStmt); - if( rc!=SQLITE_OK ) return rc; - - if( c->result.nData==0 || dlrAtEnd(&c->reader) ){ - c->eof = 1; - return SQLITE_OK; - } - rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader)); - dlrStep(&c->reader); - if( rc!=SQLITE_OK ) return rc; - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - if( rc==SQLITE_ROW ){ /* the case we expect */ - c->eof = 0; - return SQLITE_OK; - } - /* an error occurred; abort */ - return rc==SQLITE_DONE ? SQLITE_ERROR : rc; - } -} - - -/* TODO(shess) If we pushed LeafReader to the top of the file, or to -** another file, term_select() could be pushed above -** docListOfTerm(). -*/ -static int termSelect(fulltext_vtab *v, int iColumn, - const char *pTerm, int nTerm, int isPrefix, - DocListType iType, DataBuffer *out); - -/* Return a DocList corresponding to the query term *pTerm. If *pTerm -** is the first term of a phrase query, go ahead and evaluate the phrase -** query and return the doclist for the entire phrase query. -** -** The resulting DL_DOCIDS doclist is stored in pResult, which is -** overwritten. -*/ -static int docListOfTerm( - fulltext_vtab *v, /* The full text index */ - int iColumn, /* column to restrict to. No restriction if >=nColumn */ - QueryTerm *pQTerm, /* Term we are looking for, or 1st term of a phrase */ - DataBuffer *pResult /* Write the result here */ -){ - DataBuffer left, right, new; - int i, rc; - - /* No phrase search if no position info. */ - assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS ); - - /* This code should never be called with buffered updates. */ - assert( v->nPendingData<0 ); - - dataBufferInit(&left, 0); - rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix, - 0nPhrase ? DL_POSITIONS : DL_DOCIDS, &left); - if( rc ) return rc; - for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ - dataBufferInit(&right, 0); - rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, - pQTerm[i].isPrefix, DL_POSITIONS, &right); - if( rc ){ - dataBufferDestroy(&left); - return rc; - } - dataBufferInit(&new, 0); - docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, - inPhrase ? DL_POSITIONS : DL_DOCIDS, &new); - dataBufferDestroy(&left); - dataBufferDestroy(&right); - left = new; - } - *pResult = left; - return SQLITE_OK; -} - -/* Add a new term pTerm[0..nTerm-1] to the query *q. -*/ -static void queryAdd(Query *q, const char *pTerm, int nTerm){ - QueryTerm *t; - ++q->nTerms; - q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); - if( q->pTerms==0 ){ - q->nTerms = 0; - return; - } - t = &q->pTerms[q->nTerms - 1]; - CLEAR(t); - t->pTerm = sqlite3_malloc(nTerm+1); - memcpy(t->pTerm, pTerm, nTerm); - t->pTerm[nTerm] = 0; - t->nTerm = nTerm; - t->isOr = q->nextIsOr; - t->isPrefix = 0; - q->nextIsOr = 0; - t->iColumn = q->nextColumn; - q->nextColumn = q->dfltColumn; -} - -/* -** Check to see if the string zToken[0...nToken-1] matches any -** column name in the virtual table. If it does, -** return the zero-indexed column number. If not, return -1. -*/ -static int checkColumnSpecifier( - fulltext_vtab *pVtab, /* The virtual table */ - const char *zToken, /* Text of the token */ - int nToken /* Number of characters in the token */ -){ - int i; - for(i=0; inColumn; i++){ - if( memcmp(pVtab->azColumn[i], zToken, nToken)==0 - && pVtab->azColumn[i][nToken]==0 ){ - return i; - } - } - return -1; -} - -/* -** Parse the text at pSegment[0..nSegment-1]. Add additional terms -** to the query being assemblied in pQuery. -** -** inPhrase is true if pSegment[0..nSegement-1] is contained within -** double-quotes. If inPhrase is true, then the first term -** is marked with the number of terms in the phrase less one and -** OR and "-" syntax is ignored. If inPhrase is false, then every -** term found is marked with nPhrase=0 and OR and "-" syntax is significant. -*/ -static int tokenizeSegment( - sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ - const char *pSegment, int nSegment, /* Query expression being parsed */ - int inPhrase, /* True if within "..." */ - Query *pQuery /* Append results here */ -){ - const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCursor; - int firstIndex = pQuery->nTerms; - int iCol; - int nTerm = 1; - - int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); - if( rc!=SQLITE_OK ) return rc; - pCursor->pTokenizer = pTokenizer; - - while( 1 ){ - const char *pToken; - int nToken, iBegin, iEnd, iPos; - - rc = pModule->xNext(pCursor, - &pToken, &nToken, - &iBegin, &iEnd, &iPos); - if( rc!=SQLITE_OK ) break; - if( !inPhrase && - pSegment[iEnd]==':' && - (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ - pQuery->nextColumn = iCol; - continue; - } - if( !inPhrase && pQuery->nTerms>0 && nToken==2 - && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ - pQuery->nextIsOr = 1; - continue; - } - queryAdd(pQuery, pToken, nToken); - if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ - pQuery->pTerms[pQuery->nTerms-1].isNot = 1; - } - if( iEndpTerms[pQuery->nTerms-1].isPrefix = 1; - } - pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; - if( inPhrase ){ - nTerm++; - } - } - - if( inPhrase && pQuery->nTerms>firstIndex ){ - pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1; - } - - return pModule->xClose(pCursor); -} - -/* Parse a query string, yielding a Query object pQuery. -** -** The calling function will need to queryClear() to clean up -** the dynamically allocated memory held by pQuery. -*/ -static int parseQuery( - fulltext_vtab *v, /* The fulltext index */ - const char *zInput, /* Input text of the query string */ - int nInput, /* Size of the input text */ - int dfltColumn, /* Default column of the index to match against */ - Query *pQuery /* Write the parse results here. */ -){ - int iInput, inPhrase = 0; - - if( zInput==0 ) nInput = 0; - if( nInput<0 ) nInput = strlen(zInput); - pQuery->nTerms = 0; - pQuery->pTerms = NULL; - pQuery->nextIsOr = 0; - pQuery->nextColumn = dfltColumn; - pQuery->dfltColumn = dfltColumn; - pQuery->pFts = v; - - for(iInput=0; iInputiInput ){ - tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase, - pQuery); - } - iInput = i; - if( i=nColumn -** they are allowed to match against any column. -*/ -static int fulltextQuery( - fulltext_vtab *v, /* The full text index */ - int iColumn, /* Match against this column by default */ - const char *zInput, /* The query string */ - int nInput, /* Number of bytes in zInput[] */ - DataBuffer *pResult, /* Write the result doclist here */ - Query *pQuery /* Put parsed query string here */ -){ - int i, iNext, rc; - DataBuffer left, right, or, new; - int nNot = 0; - QueryTerm *aTerm; - - /* TODO(shess) Instead of flushing pendingTerms, we could query for - ** the relevant term and merge the doclist into what we receive from - ** the database. Wait and see if this is a common issue, first. - ** - ** A good reason not to flush is to not generate update-related - ** error codes from here. - */ - - /* Flush any buffered updates before executing the query. */ - rc = flushPendingTerms(v); - if( rc!=SQLITE_OK ) return rc; - - /* TODO(shess) I think that the queryClear() calls below are not - ** necessary, because fulltextClose() already clears the query. - */ - rc = parseQuery(v, zInput, nInput, iColumn, pQuery); - if( rc!=SQLITE_OK ) return rc; - - /* Empty or NULL queries return no results. */ - if( pQuery->nTerms==0 ){ - dataBufferInit(pResult, 0); - return SQLITE_OK; - } - - /* Merge AND terms. */ - /* TODO(shess) I think we can early-exit if( i>nNot && left.nData==0 ). */ - aTerm = pQuery->pTerms; - for(i = 0; inTerms; i=iNext){ - if( aTerm[i].isNot ){ - /* Handle all NOT terms in a separate pass */ - nNot++; - iNext = i + aTerm[i].nPhrase+1; - continue; - } - iNext = i + aTerm[i].nPhrase + 1; - rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right); - if( rc ){ - if( i!=nNot ) dataBufferDestroy(&left); - queryClear(pQuery); - return rc; - } - while( iNextnTerms && aTerm[iNext].isOr ){ - rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or); - iNext += aTerm[iNext].nPhrase + 1; - if( rc ){ - if( i!=nNot ) dataBufferDestroy(&left); - dataBufferDestroy(&right); - queryClear(pQuery); - return rc; - } - dataBufferInit(&new, 0); - docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new); - dataBufferDestroy(&right); - dataBufferDestroy(&or); - right = new; - } - if( i==nNot ){ /* first term processed. */ - left = right; - }else{ - dataBufferInit(&new, 0); - docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new); - dataBufferDestroy(&right); - dataBufferDestroy(&left); - left = new; - } - } - - if( nNot==pQuery->nTerms ){ - /* We do not yet know how to handle a query of only NOT terms */ - return SQLITE_ERROR; - } - - /* Do the EXCEPT terms */ - for(i=0; inTerms; i += aTerm[i].nPhrase + 1){ - if( !aTerm[i].isNot ) continue; - rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right); - if( rc ){ - queryClear(pQuery); - dataBufferDestroy(&left); - return rc; - } - dataBufferInit(&new, 0); - docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new); - dataBufferDestroy(&right); - dataBufferDestroy(&left); - left = new; - } - - *pResult = left; - return rc; -} - -/* -** This is the xFilter interface for the virtual table. See -** the virtual table xFilter method documentation for additional -** information. -** -** If idxNum==QUERY_GENERIC then do a full table scan against -** the %_content table. -** -** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry -** in the %_content table. -** -** If idxNum>=QUERY_FULLTEXT then use the full text index. The -** column on the left-hand side of the MATCH operator is column -** number idxNum-QUERY_FULLTEXT, 0 indexed. argv[0] is the right-hand -** side of the MATCH operator. -*/ -/* TODO(shess) Upgrade the cursor initialization and destruction to -** account for fulltextFilter() being called multiple times on the -** same cursor. The current solution is very fragile. Apply fix to -** fts2 as appropriate. -*/ -static int fulltextFilter( - sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ - int idxNum, const char *idxStr, /* Which indexing scheme to use */ - int argc, sqlite3_value **argv /* Arguments for the indexing scheme */ -){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - fulltext_vtab *v = cursor_vtab(c); - int rc; - - TRACE(("FTS2 Filter %p\n",pCursor)); - - /* If the cursor has a statement that was not prepared according to - ** idxNum, clear it. I believe all calls to fulltextFilter with a - ** given cursor will have the same idxNum , but in this case it's - ** easy to be safe. - */ - if( c->pStmt && c->iCursorType!=idxNum ){ - sqlite3_finalize(c->pStmt); - c->pStmt = NULL; - } - - /* Get a fresh statement appropriate to idxNum. */ - /* TODO(shess): Add a prepared-statement cache in the vt structure. - ** The cache must handle multiple open cursors. Easier to cache the - ** statement variants at the vt to reduce malloc/realloc/free here. - ** Or we could have a StringBuffer variant which allowed stack - ** construction for small values. - */ - if( !c->pStmt ){ - char *zSql = sqlite3_mprintf("select rowid, * from %%_content %s", - idxNum==QUERY_GENERIC ? "" : "where rowid=?"); - rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql); - sqlite3_free(zSql); - if( rc!=SQLITE_OK ) return rc; - c->iCursorType = idxNum; - }else{ - sqlite3_reset(c->pStmt); - assert( c->iCursorType==idxNum ); - } - - switch( idxNum ){ - case QUERY_GENERIC: - break; - - case QUERY_ROWID: - rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0])); - if( rc!=SQLITE_OK ) return rc; - break; - - default: /* full-text search */ - { - const char *zQuery = (const char *)sqlite3_value_text(argv[0]); - assert( idxNum<=QUERY_FULLTEXT+v->nColumn); - assert( argc==1 ); - queryClear(&c->q); - if( c->result.nData!=0 ){ - /* This case happens if the same cursor is used repeatedly. */ - dlrDestroy(&c->reader); - dataBufferReset(&c->result); - }else{ - dataBufferInit(&c->result, 0); - } - rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q); - if( rc!=SQLITE_OK ) return rc; - if( c->result.nData!=0 ){ - dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); - } - break; - } - } - - return fulltextNext(pCursor); -} - -/* This is the xEof method of the virtual table. The SQLite core -** calls this routine to find out if it has reached the end of -** a query's results set. -*/ -static int fulltextEof(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - return c->eof; -} - -/* This is the xColumn method of the virtual table. The SQLite -** core calls this method during a query when it needs the value -** of a column from the virtual table. This method needs to use -** one of the sqlite3_result_*() routines to store the requested -** value back in the pContext. -*/ -static int fulltextColumn(sqlite3_vtab_cursor *pCursor, - sqlite3_context *pContext, int idxCol){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - fulltext_vtab *v = cursor_vtab(c); - - if( idxColnColumn ){ - sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1); - sqlite3_result_value(pContext, pVal); - }else if( idxCol==v->nColumn ){ - /* The extra column whose name is the same as the table. - ** Return a blob which is a pointer to the cursor - */ - sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT); - } - return SQLITE_OK; -} - -/* This is the xRowid method. The SQLite core calls this routine to -** retrive the rowid for the current row of the result set. The -** rowid should be written to *pRowid. -*/ -static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - - *pRowid = sqlite3_column_int64(c->pStmt, 0); - return SQLITE_OK; -} - -/* Add all terms in [zText] to pendingTerms table. If [iColumn] > 0, -** we also store positions and offsets in the hash table using that -** column number. -*/ -static int buildTerms(fulltext_vtab *v, sqlite_int64 iDocid, - const char *zText, int iColumn){ - sqlite3_tokenizer *pTokenizer = v->pTokenizer; - sqlite3_tokenizer_cursor *pCursor; - const char *pToken; - int nTokenBytes; - int iStartOffset, iEndOffset, iPosition; - int rc; - - rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); - if( rc!=SQLITE_OK ) return rc; - - pCursor->pTokenizer = pTokenizer; - while( SQLITE_OK==(rc=pTokenizer->pModule->xNext(pCursor, - &pToken, &nTokenBytes, - &iStartOffset, &iEndOffset, - &iPosition)) ){ - DLCollector *p; - int nData; /* Size of doclist before our update. */ - - /* Positions can't be negative; we use -1 as a terminator - * internally. Token can't be NULL or empty. */ - if( iPosition<0 || pToken == NULL || nTokenBytes == 0 ){ - rc = SQLITE_ERROR; - break; - } - - p = fts2HashFind(&v->pendingTerms, pToken, nTokenBytes); - if( p==NULL ){ - nData = 0; - p = dlcNew(iDocid, DL_DEFAULT); - fts2HashInsert(&v->pendingTerms, pToken, nTokenBytes, p); - - /* Overhead for our hash table entry, the key, and the value. */ - v->nPendingData += sizeof(struct fts2HashElem)+sizeof(*p)+nTokenBytes; - }else{ - nData = p->b.nData; - if( p->dlw.iPrevDocid!=iDocid ) dlcNext(p, iDocid); - } - if( iColumn>=0 ){ - dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset); - } - - /* Accumulate data added by dlcNew or dlcNext, and dlcAddPos. */ - v->nPendingData += p->b.nData-nData; - } - - /* TODO(shess) Check return? Should this be able to cause errors at - ** this point? Actually, same question about sqlite3_finalize(), - ** though one could argue that failure there means that the data is - ** not durable. *ponder* - */ - pTokenizer->pModule->xClose(pCursor); - if( SQLITE_DONE == rc ) return SQLITE_OK; - return rc; -} - -/* Add doclists for all terms in [pValues] to pendingTerms table. */ -static int insertTerms(fulltext_vtab *v, sqlite_int64 iRowid, - sqlite3_value **pValues){ - int i; - for(i = 0; i < v->nColumn ; ++i){ - char *zText = (char*)sqlite3_value_text(pValues[i]); - int rc = buildTerms(v, iRowid, zText, i); - if( rc!=SQLITE_OK ) return rc; - } - return SQLITE_OK; -} - -/* Add empty doclists for all terms in the given row's content to -** pendingTerms. -*/ -static int deleteTerms(fulltext_vtab *v, sqlite_int64 iRowid){ - const char **pValues; - int i, rc; - - /* TODO(shess) Should we allow such tables at all? */ - if( DL_DEFAULT==DL_DOCIDS ) return SQLITE_ERROR; - - rc = content_select(v, iRowid, &pValues); - if( rc!=SQLITE_OK ) return rc; - - for(i = 0 ; i < v->nColumn; ++i) { - rc = buildTerms(v, iRowid, pValues[i], -1); - if( rc!=SQLITE_OK ) break; - } - - freeStringArray(v->nColumn, pValues); - return SQLITE_OK; -} - -/* TODO(shess) Refactor the code to remove this forward decl. */ -static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid); - -/* Insert a row into the %_content table; set *piRowid to be the ID of the -** new row. Add doclists for terms to pendingTerms. -*/ -static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid, - sqlite3_value **pValues, sqlite_int64 *piRowid){ - int rc; - - rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT */ - if( rc!=SQLITE_OK ) return rc; - - *piRowid = sqlite3_last_insert_rowid(v->db); - rc = initPendingTerms(v, *piRowid); - if( rc!=SQLITE_OK ) return rc; - - return insertTerms(v, *piRowid, pValues); -} - -/* Delete a row from the %_content table; add empty doclists for terms -** to pendingTerms. -*/ -static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){ - int rc = initPendingTerms(v, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = deleteTerms(v, iRow); - if( rc!=SQLITE_OK ) return rc; - - return content_delete(v, iRow); /* execute an SQL DELETE */ -} - -/* Update a row in the %_content table; add delete doclists to -** pendingTerms for old terms not in the new data, add insert doclists -** to pendingTerms for terms in the new data. -*/ -static int index_update(fulltext_vtab *v, sqlite_int64 iRow, - sqlite3_value **pValues){ - int rc = initPendingTerms(v, iRow); - if( rc!=SQLITE_OK ) return rc; - - /* Generate an empty doclist for each term that previously appeared in this - * row. */ - rc = deleteTerms(v, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = content_update(v, pValues, iRow); /* execute an SQL UPDATE */ - if( rc!=SQLITE_OK ) return rc; - - /* Now add positions for terms which appear in the updated row. */ - return insertTerms(v, iRow, pValues); -} - -/*******************************************************************/ -/* InteriorWriter is used to collect terms and block references into -** interior nodes in %_segments. See commentary at top of file for -** format. -*/ - -/* How large interior nodes can grow. */ -#define INTERIOR_MAX 2048 - -/* Minimum number of terms per interior node (except the root). This -** prevents large terms from making the tree too skinny - must be >0 -** so that the tree always makes progress. Note that the min tree -** fanout will be INTERIOR_MIN_TERMS+1. -*/ -#define INTERIOR_MIN_TERMS 7 -#if INTERIOR_MIN_TERMS<1 -# error INTERIOR_MIN_TERMS must be greater than 0. -#endif - -/* ROOT_MAX controls how much data is stored inline in the segment -** directory. -*/ -/* TODO(shess) Push ROOT_MAX down to whoever is writing things. It's -** only here so that interiorWriterRootInfo() and leafWriterRootInfo() -** can both see it, but if the caller passed it in, we wouldn't even -** need a define. -*/ -#define ROOT_MAX 1024 -#if ROOT_MAXterm, 0); - dataBufferReplace(&block->term, pTerm, nTerm); - - n = putVarint(c, iHeight); - n += putVarint(c+n, iChildBlock); - dataBufferInit(&block->data, INTERIOR_MAX); - dataBufferReplace(&block->data, c, n); - } - return block; -} - -#ifndef NDEBUG -/* Verify that the data is readable as an interior node. */ -static void interiorBlockValidate(InteriorBlock *pBlock){ - const char *pData = pBlock->data.pData; - int nData = pBlock->data.nData; - int n, iDummy; - sqlite_int64 iBlockid; - - assert( nData>0 ); - assert( pData!=0 ); - assert( pData+nData>pData ); - - /* Must lead with height of node as a varint(n), n>0 */ - n = getVarint32(pData, &iDummy); - assert( n>0 ); - assert( iDummy>0 ); - assert( n0 ); - assert( n<=nData ); - pData += n; - nData -= n; - - /* Zero or more terms of positive length */ - if( nData!=0 ){ - /* First term is not delta-encoded. */ - n = getVarint32(pData, &iDummy); - assert( n>0 ); - assert( iDummy>0 ); - assert( n+iDummy>0); - assert( n+iDummy<=nData ); - pData += n+iDummy; - nData -= n+iDummy; - - /* Following terms delta-encoded. */ - while( nData!=0 ){ - /* Length of shared prefix. */ - n = getVarint32(pData, &iDummy); - assert( n>0 ); - assert( iDummy>=0 ); - assert( n0 ); - assert( iDummy>0 ); - assert( n+iDummy>0); - assert( n+iDummy<=nData ); - pData += n+iDummy; - nData -= n+iDummy; - } - } -} -#define ASSERT_VALID_INTERIOR_BLOCK(x) interiorBlockValidate(x) -#else -#define ASSERT_VALID_INTERIOR_BLOCK(x) assert( 1 ) -#endif - -typedef struct InteriorWriter { - int iHeight; /* from 0 at leaves. */ - InteriorBlock *first, *last; - struct InteriorWriter *parentWriter; - - DataBuffer term; /* Last term written to block "last". */ - sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ -#ifndef NDEBUG - sqlite_int64 iLastChildBlock; /* for consistency checks. */ -#endif -} InteriorWriter; - -/* Initialize an interior node where pTerm[nTerm] marks the leftmost -** term in the tree. iChildBlock is the leftmost child block at the -** next level down the tree. -*/ -static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm, - sqlite_int64 iChildBlock, - InteriorWriter *pWriter){ - InteriorBlock *block; - assert( iHeight>0 ); - CLEAR(pWriter); - - pWriter->iHeight = iHeight; - pWriter->iOpeningChildBlock = iChildBlock; -#ifndef NDEBUG - pWriter->iLastChildBlock = iChildBlock; -#endif - block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm); - pWriter->last = pWriter->first = block; - ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); - dataBufferInit(&pWriter->term, 0); -} - -/* Append the child node rooted at iChildBlock to the interior node, -** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree. -*/ -static void interiorWriterAppend(InteriorWriter *pWriter, - const char *pTerm, int nTerm, - sqlite_int64 iChildBlock){ - char c[VARINT_MAX+VARINT_MAX]; - int n, nPrefix = 0; - - ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); - - /* The first term written into an interior node is actually - ** associated with the second child added (the first child was added - ** in interiorWriterInit, or in the if clause at the bottom of this - ** function). That term gets encoded straight up, with nPrefix left - ** at 0. - */ - if( pWriter->term.nData==0 ){ - n = putVarint(c, nTerm); - }else{ - while( nPrefixterm.nData && - pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){ - nPrefix++; - } - - n = putVarint(c, nPrefix); - n += putVarint(c+n, nTerm-nPrefix); - } - -#ifndef NDEBUG - pWriter->iLastChildBlock++; -#endif - assert( pWriter->iLastChildBlock==iChildBlock ); - - /* Overflow to a new block if the new term makes the current block - ** too big, and the current block already has enough terms. - */ - if( pWriter->last->data.nData+n+nTerm-nPrefix>INTERIOR_MAX && - iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ - pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, - pTerm, nTerm); - pWriter->last = pWriter->last->next; - pWriter->iOpeningChildBlock = iChildBlock; - dataBufferReset(&pWriter->term); - }else{ - dataBufferAppend2(&pWriter->last->data, c, n, - pTerm+nPrefix, nTerm-nPrefix); - dataBufferReplace(&pWriter->term, pTerm, nTerm); - } - ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); -} - -/* Free the space used by pWriter, including the linked-list of -** InteriorBlocks, and parentWriter, if present. -*/ -static int interiorWriterDestroy(InteriorWriter *pWriter){ - InteriorBlock *block = pWriter->first; - - while( block!=NULL ){ - InteriorBlock *b = block; - block = block->next; - dataBufferDestroy(&b->term); - dataBufferDestroy(&b->data); - sqlite3_free(b); - } - if( pWriter->parentWriter!=NULL ){ - interiorWriterDestroy(pWriter->parentWriter); - sqlite3_free(pWriter->parentWriter); - } - dataBufferDestroy(&pWriter->term); - SCRAMBLE(pWriter); - return SQLITE_OK; -} - -/* If pWriter can fit entirely in ROOT_MAX, return it as the root info -** directly, leaving *piEndBlockid unchanged. Otherwise, flush -** pWriter to %_segments, building a new layer of interior nodes, and -** recursively ask for their root into. -*/ -static int interiorWriterRootInfo(fulltext_vtab *v, InteriorWriter *pWriter, - char **ppRootInfo, int *pnRootInfo, - sqlite_int64 *piEndBlockid){ - InteriorBlock *block = pWriter->first; - sqlite_int64 iBlockid = 0; - int rc; - - /* If we can fit the segment inline */ - if( block==pWriter->last && block->data.nDatadata.pData; - *pnRootInfo = block->data.nData; - return SQLITE_OK; - } - - /* Flush the first block to %_segments, and create a new level of - ** interior node. - */ - ASSERT_VALID_INTERIOR_BLOCK(block); - rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); - if( rc!=SQLITE_OK ) return rc; - *piEndBlockid = iBlockid; - - pWriter->parentWriter = sqlite3_malloc(sizeof(*pWriter->parentWriter)); - interiorWriterInit(pWriter->iHeight+1, - block->term.pData, block->term.nData, - iBlockid, pWriter->parentWriter); - - /* Flush additional blocks and append to the higher interior - ** node. - */ - for(block=block->next; block!=NULL; block=block->next){ - ASSERT_VALID_INTERIOR_BLOCK(block); - rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); - if( rc!=SQLITE_OK ) return rc; - *piEndBlockid = iBlockid; - - interiorWriterAppend(pWriter->parentWriter, - block->term.pData, block->term.nData, iBlockid); - } - - /* Parent node gets the chance to be the root. */ - return interiorWriterRootInfo(v, pWriter->parentWriter, - ppRootInfo, pnRootInfo, piEndBlockid); -} - -/****************************************************************/ -/* InteriorReader is used to read off the data from an interior node -** (see comment at top of file for the format). -*/ -typedef struct InteriorReader { - const char *pData; - int nData; - - DataBuffer term; /* previous term, for decoding term delta. */ - - sqlite_int64 iBlockid; -} InteriorReader; - -static void interiorReaderDestroy(InteriorReader *pReader){ - dataBufferDestroy(&pReader->term); - SCRAMBLE(pReader); -} - -/* TODO(shess) The assertions are great, but what if we're in NDEBUG -** and the blob is empty or otherwise contains suspect data? -*/ -static void interiorReaderInit(const char *pData, int nData, - InteriorReader *pReader){ - int n, nTerm; - - /* Require at least the leading flag byte */ - assert( nData>0 ); - assert( pData[0]!='\0' ); - - CLEAR(pReader); - - /* Decode the base blockid, and set the cursor to the first term. */ - n = getVarint(pData+1, &pReader->iBlockid); - assert( 1+n<=nData ); - pReader->pData = pData+1+n; - pReader->nData = nData-(1+n); - - /* A single-child interior node (such as when a leaf node was too - ** large for the segment directory) won't have any terms. - ** Otherwise, decode the first term. - */ - if( pReader->nData==0 ){ - dataBufferInit(&pReader->term, 0); - }else{ - n = getVarint32(pReader->pData, &nTerm); - dataBufferInit(&pReader->term, nTerm); - dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); - assert( n+nTerm<=pReader->nData ); - pReader->pData += n+nTerm; - pReader->nData -= n+nTerm; - } -} - -static int interiorReaderAtEnd(InteriorReader *pReader){ - return pReader->term.nData==0; -} - -static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ - return pReader->iBlockid; -} - -static int interiorReaderTermBytes(InteriorReader *pReader){ - assert( !interiorReaderAtEnd(pReader) ); - return pReader->term.nData; -} -static const char *interiorReaderTerm(InteriorReader *pReader){ - assert( !interiorReaderAtEnd(pReader) ); - return pReader->term.pData; -} - -/* Step forward to the next term in the node. */ -static void interiorReaderStep(InteriorReader *pReader){ - assert( !interiorReaderAtEnd(pReader) ); - - /* If the last term has been read, signal eof, else construct the - ** next term. - */ - if( pReader->nData==0 ){ - dataBufferReset(&pReader->term); - }else{ - int n, nPrefix, nSuffix; - - n = getVarint32(pReader->pData, &nPrefix); - n += getVarint32(pReader->pData+n, &nSuffix); - - /* Truncate the current term and append suffix data. */ - pReader->term.nData = nPrefix; - dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); - - assert( n+nSuffix<=pReader->nData ); - pReader->pData += n+nSuffix; - pReader->nData -= n+nSuffix; - } - pReader->iBlockid++; -} - -/* Compare the current term to pTerm[nTerm], returning strcmp-style -** results. If isPrefix, equality means equal through nTerm bytes. -*/ -static int interiorReaderTermCmp(InteriorReader *pReader, - const char *pTerm, int nTerm, int isPrefix){ - const char *pReaderTerm = interiorReaderTerm(pReader); - int nReaderTerm = interiorReaderTermBytes(pReader); - int c, n = nReaderTerm0 ) return -1; - if( nTerm>0 ) return 1; - return 0; - } - - c = memcmp(pReaderTerm, pTerm, n); - if( c!=0 ) return c; - if( isPrefix && n==nTerm ) return 0; - return nReaderTerm - nTerm; -} - -/****************************************************************/ -/* LeafWriter is used to collect terms and associated doclist data -** into leaf blocks in %_segments (see top of file for format info). -** Expected usage is: -** -** LeafWriter writer; -** leafWriterInit(0, 0, &writer); -** while( sorted_terms_left_to_process ){ -** // data is doclist data for that term. -** rc = leafWriterStep(v, &writer, pTerm, nTerm, pData, nData); -** if( rc!=SQLITE_OK ) goto err; -** } -** rc = leafWriterFinalize(v, &writer); -**err: -** leafWriterDestroy(&writer); -** return rc; -** -** leafWriterStep() may write a collected leaf out to %_segments. -** leafWriterFinalize() finishes writing any buffered data and stores -** a root node in %_segdir. leafWriterDestroy() frees all buffers and -** InteriorWriters allocated as part of writing this segment. -** -** TODO(shess) Document leafWriterStepMerge(). -*/ - -/* Put terms with data this big in their own block. */ -#define STANDALONE_MIN 1024 - -/* Keep leaf blocks below this size. */ -#define LEAF_MAX 2048 - -typedef struct LeafWriter { - int iLevel; - int idx; - sqlite_int64 iStartBlockid; /* needed to create the root info */ - sqlite_int64 iEndBlockid; /* when we're done writing. */ - - DataBuffer term; /* previous encoded term */ - DataBuffer data; /* encoding buffer */ - - /* bytes of first term in the current node which distinguishes that - ** term from the last term of the previous node. - */ - int nTermDistinct; - - InteriorWriter parentWriter; /* if we overflow */ - int has_parent; -} LeafWriter; - -static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){ - CLEAR(pWriter); - pWriter->iLevel = iLevel; - pWriter->idx = idx; - - dataBufferInit(&pWriter->term, 32); - - /* Start out with a reasonably sized block, though it can grow. */ - dataBufferInit(&pWriter->data, LEAF_MAX); -} - -#ifndef NDEBUG -/* Verify that the data is readable as a leaf node. */ -static void leafNodeValidate(const char *pData, int nData){ - int n, iDummy; - - if( nData==0 ) return; - assert( nData>0 ); - assert( pData!=0 ); - assert( pData+nData>pData ); - - /* Must lead with a varint(0) */ - n = getVarint32(pData, &iDummy); - assert( iDummy==0 ); - assert( n>0 ); - assert( n0 ); - assert( iDummy>0 ); - assert( n+iDummy>0 ); - assert( n+iDummy0 ); - assert( iDummy>0 ); - assert( n+iDummy>0 ); - assert( n+iDummy<=nData ); - ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL); - pData += n+iDummy; - nData -= n+iDummy; - - /* Verify that trailing terms and doclists also are readable. */ - while( nData!=0 ){ - n = getVarint32(pData, &iDummy); - assert( n>0 ); - assert( iDummy>=0 ); - assert( n0 ); - assert( iDummy>0 ); - assert( n+iDummy>0 ); - assert( n+iDummy0 ); - assert( iDummy>0 ); - assert( n+iDummy>0 ); - assert( n+iDummy<=nData ); - ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL); - pData += n+iDummy; - nData -= n+iDummy; - } -} -#define ASSERT_VALID_LEAF_NODE(p, n) leafNodeValidate(p, n) -#else -#define ASSERT_VALID_LEAF_NODE(p, n) assert( 1 ) -#endif - -/* Flush the current leaf node to %_segments, and adding the resulting -** blockid and the starting term to the interior node which will -** contain it. -*/ -static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter, - int iData, int nData){ - sqlite_int64 iBlockid = 0; - const char *pStartingTerm; - int nStartingTerm, rc, n; - - /* Must have the leading varint(0) flag, plus at least some - ** valid-looking data. - */ - assert( nData>2 ); - assert( iData>=0 ); - assert( iData+nData<=pWriter->data.nData ); - ASSERT_VALID_LEAF_NODE(pWriter->data.pData+iData, nData); - - rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid); - if( rc!=SQLITE_OK ) return rc; - assert( iBlockid!=0 ); - - /* Reconstruct the first term in the leaf for purposes of building - ** the interior node. - */ - n = getVarint32(pWriter->data.pData+iData+1, &nStartingTerm); - pStartingTerm = pWriter->data.pData+iData+1+n; - assert( pWriter->data.nData>iData+1+n+nStartingTerm ); - assert( pWriter->nTermDistinct>0 ); - assert( pWriter->nTermDistinct<=nStartingTerm ); - nStartingTerm = pWriter->nTermDistinct; - - if( pWriter->has_parent ){ - interiorWriterAppend(&pWriter->parentWriter, - pStartingTerm, nStartingTerm, iBlockid); - }else{ - interiorWriterInit(1, pStartingTerm, nStartingTerm, iBlockid, - &pWriter->parentWriter); - pWriter->has_parent = 1; - } - - /* Track the span of this segment's leaf nodes. */ - if( pWriter->iEndBlockid==0 ){ - pWriter->iEndBlockid = pWriter->iStartBlockid = iBlockid; - }else{ - pWriter->iEndBlockid++; - assert( iBlockid==pWriter->iEndBlockid ); - } - - return SQLITE_OK; -} -static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){ - int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData); - if( rc!=SQLITE_OK ) return rc; - - /* Re-initialize the output buffer. */ - dataBufferReset(&pWriter->data); - - return SQLITE_OK; -} - -/* Fetch the root info for the segment. If the entire leaf fits -** within ROOT_MAX, then it will be returned directly, otherwise it -** will be flushed and the root info will be returned from the -** interior node. *piEndBlockid is set to the blockid of the last -** interior or leaf node written to disk (0 if none are written at -** all). -*/ -static int leafWriterRootInfo(fulltext_vtab *v, LeafWriter *pWriter, - char **ppRootInfo, int *pnRootInfo, - sqlite_int64 *piEndBlockid){ - /* we can fit the segment entirely inline */ - if( !pWriter->has_parent && pWriter->data.nDatadata.pData; - *pnRootInfo = pWriter->data.nData; - *piEndBlockid = 0; - return SQLITE_OK; - } - - /* Flush remaining leaf data. */ - if( pWriter->data.nData>0 ){ - int rc = leafWriterFlush(v, pWriter); - if( rc!=SQLITE_OK ) return rc; - } - - /* We must have flushed a leaf at some point. */ - assert( pWriter->has_parent ); - - /* Tenatively set the end leaf blockid as the end blockid. If the - ** interior node can be returned inline, this will be the final - ** blockid, otherwise it will be overwritten by - ** interiorWriterRootInfo(). - */ - *piEndBlockid = pWriter->iEndBlockid; - - return interiorWriterRootInfo(v, &pWriter->parentWriter, - ppRootInfo, pnRootInfo, piEndBlockid); -} - -/* Collect the rootInfo data and store it into the segment directory. -** This has the effect of flushing the segment's leaf data to -** %_segments, and also flushing any interior nodes to %_segments. -*/ -static int leafWriterFinalize(fulltext_vtab *v, LeafWriter *pWriter){ - sqlite_int64 iEndBlockid; - char *pRootInfo; - int rc, nRootInfo; - - rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - /* Don't bother storing an entirely empty segment. */ - if( iEndBlockid==0 && nRootInfo==0 ) return SQLITE_OK; - - return segdir_set(v, pWriter->iLevel, pWriter->idx, - pWriter->iStartBlockid, pWriter->iEndBlockid, - iEndBlockid, pRootInfo, nRootInfo); -} - -static void leafWriterDestroy(LeafWriter *pWriter){ - if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter); - dataBufferDestroy(&pWriter->term); - dataBufferDestroy(&pWriter->data); -} - -/* Encode a term into the leafWriter, delta-encoding as appropriate. -** Returns the length of the new term which distinguishes it from the -** previous term, which can be used to set nTermDistinct when a node -** boundary is crossed. -*/ -static int leafWriterEncodeTerm(LeafWriter *pWriter, - const char *pTerm, int nTerm){ - char c[VARINT_MAX+VARINT_MAX]; - int n, nPrefix = 0; - - assert( nTerm>0 ); - while( nPrefixterm.nData && - pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){ - nPrefix++; - /* Failing this implies that the terms weren't in order. */ - assert( nPrefixdata.nData==0 ){ - /* Encode the node header and leading term as: - ** varint(0) - ** varint(nTerm) - ** char pTerm[nTerm] - */ - n = putVarint(c, '\0'); - n += putVarint(c+n, nTerm); - dataBufferAppend2(&pWriter->data, c, n, pTerm, nTerm); - }else{ - /* Delta-encode the term as: - ** varint(nPrefix) - ** varint(nSuffix) - ** char pTermSuffix[nSuffix] - */ - n = putVarint(c, nPrefix); - n += putVarint(c+n, nTerm-nPrefix); - dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix); - } - dataBufferReplace(&pWriter->term, pTerm, nTerm); - - return nPrefix+1; -} - -/* Used to avoid a memmove when a large amount of doclist data is in -** the buffer. This constructs a node and term header before -** iDoclistData and flushes the resulting complete node using -** leafWriterInternalFlush(). -*/ -static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter, - const char *pTerm, int nTerm, - int iDoclistData){ - char c[VARINT_MAX+VARINT_MAX]; - int iData, n = putVarint(c, 0); - n += putVarint(c+n, nTerm); - - /* There should always be room for the header. Even if pTerm shared - ** a substantial prefix with the previous term, the entire prefix - ** could be constructed from earlier data in the doclist, so there - ** should be room. - */ - assert( iDoclistData>=n+nTerm ); - - iData = iDoclistData-(n+nTerm); - memcpy(pWriter->data.pData+iData, c, n); - memcpy(pWriter->data.pData+iData+n, pTerm, nTerm); - - return leafWriterInternalFlush(v, pWriter, iData, pWriter->data.nData-iData); -} - -/* Push pTerm[nTerm] along with the doclist data to the leaf layer of -** %_segments. -*/ -static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter, - const char *pTerm, int nTerm, - DLReader *pReaders, int nReaders){ - char c[VARINT_MAX+VARINT_MAX]; - int iTermData = pWriter->data.nData, iDoclistData; - int i, nData, n, nActualData, nActual, rc, nTermDistinct; - - ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData); - nTermDistinct = leafWriterEncodeTerm(pWriter, pTerm, nTerm); - - /* Remember nTermDistinct if opening a new node. */ - if( iTermData==0 ) pWriter->nTermDistinct = nTermDistinct; - - iDoclistData = pWriter->data.nData; - - /* Estimate the length of the merged doclist so we can leave space - ** to encode it. - */ - for(i=0, nData=0; idata, c, n); - - docListMerge(&pWriter->data, pReaders, nReaders); - ASSERT_VALID_DOCLIST(DL_DEFAULT, - pWriter->data.pData+iDoclistData+n, - pWriter->data.nData-iDoclistData-n, NULL); - - /* The actual amount of doclist data at this point could be smaller - ** than the length we encoded. Additionally, the space required to - ** encode this length could be smaller. For small doclists, this is - ** not a big deal, we can just use memmove() to adjust things. - */ - nActualData = pWriter->data.nData-(iDoclistData+n); - nActual = putVarint(c, nActualData); - assert( nActualData<=nData ); - assert( nActual<=n ); - - /* If the new doclist is big enough for force a standalone leaf - ** node, we can immediately flush it inline without doing the - ** memmove(). - */ - /* TODO(shess) This test matches leafWriterStep(), which does this - ** test before it knows the cost to varint-encode the term and - ** doclist lengths. At some point, change to - ** pWriter->data.nData-iTermData>STANDALONE_MIN. - */ - if( nTerm+nActualData>STANDALONE_MIN ){ - /* Push leaf node from before this term. */ - if( iTermData>0 ){ - rc = leafWriterInternalFlush(v, pWriter, 0, iTermData); - if( rc!=SQLITE_OK ) return rc; - - pWriter->nTermDistinct = nTermDistinct; - } - - /* Fix the encoded doclist length. */ - iDoclistData += n - nActual; - memcpy(pWriter->data.pData+iDoclistData, c, nActual); - - /* Push the standalone leaf node. */ - rc = leafWriterInlineFlush(v, pWriter, pTerm, nTerm, iDoclistData); - if( rc!=SQLITE_OK ) return rc; - - /* Leave the node empty. */ - dataBufferReset(&pWriter->data); - - return rc; - } - - /* At this point, we know that the doclist was small, so do the - ** memmove if indicated. - */ - if( nActualdata.pData+iDoclistData+nActual, - pWriter->data.pData+iDoclistData+n, - pWriter->data.nData-(iDoclistData+n)); - pWriter->data.nData -= n-nActual; - } - - /* Replace written length with actual length. */ - memcpy(pWriter->data.pData+iDoclistData, c, nActual); - - /* If the node is too large, break things up. */ - /* TODO(shess) This test matches leafWriterStep(), which does this - ** test before it knows the cost to varint-encode the term and - ** doclist lengths. At some point, change to - ** pWriter->data.nData>LEAF_MAX. - */ - if( iTermData+nTerm+nActualData>LEAF_MAX ){ - /* Flush out the leading data as a node */ - rc = leafWriterInternalFlush(v, pWriter, 0, iTermData); - if( rc!=SQLITE_OK ) return rc; - - pWriter->nTermDistinct = nTermDistinct; - - /* Rebuild header using the current term */ - n = putVarint(pWriter->data.pData, 0); - n += putVarint(pWriter->data.pData+n, nTerm); - memcpy(pWriter->data.pData+n, pTerm, nTerm); - n += nTerm; - - /* There should always be room, because the previous encoding - ** included all data necessary to construct the term. - */ - assert( ndata.nData-iDoclistDatadata.pData+n, - pWriter->data.pData+iDoclistData, - pWriter->data.nData-iDoclistData); - pWriter->data.nData -= iDoclistData-n; - } - ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData); - - return SQLITE_OK; -} - -/* Push pTerm[nTerm] along with the doclist data to the leaf layer of -** %_segments. -*/ -/* TODO(shess) Revise writeZeroSegment() so that doclists are -** constructed directly in pWriter->data. -*/ -static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter, - const char *pTerm, int nTerm, - const char *pData, int nData){ - int rc; - DLReader reader; - - dlrInit(&reader, DL_DEFAULT, pData, nData); - rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); - dlrDestroy(&reader); - - return rc; -} - - -/****************************************************************/ -/* LeafReader is used to iterate over an individual leaf node. */ -typedef struct LeafReader { - DataBuffer term; /* copy of current term. */ - - const char *pData; /* data for current term. */ - int nData; -} LeafReader; - -static void leafReaderDestroy(LeafReader *pReader){ - dataBufferDestroy(&pReader->term); - SCRAMBLE(pReader); -} - -static int leafReaderAtEnd(LeafReader *pReader){ - return pReader->nData<=0; -} - -/* Access the current term. */ -static int leafReaderTermBytes(LeafReader *pReader){ - return pReader->term.nData; -} -static const char *leafReaderTerm(LeafReader *pReader){ - assert( pReader->term.nData>0 ); - return pReader->term.pData; -} - -/* Access the doclist data for the current term. */ -static int leafReaderDataBytes(LeafReader *pReader){ - int nData; - assert( pReader->term.nData>0 ); - getVarint32(pReader->pData, &nData); - return nData; -} -static const char *leafReaderData(LeafReader *pReader){ - int n, nData; - assert( pReader->term.nData>0 ); - n = getVarint32(pReader->pData, &nData); - return pReader->pData+n; -} - -static void leafReaderInit(const char *pData, int nData, - LeafReader *pReader){ - int nTerm, n; - - assert( nData>0 ); - assert( pData[0]=='\0' ); - - CLEAR(pReader); - - /* Read the first term, skipping the header byte. */ - n = getVarint32(pData+1, &nTerm); - dataBufferInit(&pReader->term, nTerm); - dataBufferReplace(&pReader->term, pData+1+n, nTerm); - - /* Position after the first term. */ - assert( 1+n+nTermpData = pData+1+n+nTerm; - pReader->nData = nData-1-n-nTerm; -} - -/* Step the reader forward to the next term. */ -static void leafReaderStep(LeafReader *pReader){ - int n, nData, nPrefix, nSuffix; - assert( !leafReaderAtEnd(pReader) ); - - /* Skip previous entry's data block. */ - n = getVarint32(pReader->pData, &nData); - assert( n+nData<=pReader->nData ); - pReader->pData += n+nData; - pReader->nData -= n+nData; - - if( !leafReaderAtEnd(pReader) ){ - /* Construct the new term using a prefix from the old term plus a - ** suffix from the leaf data. - */ - n = getVarint32(pReader->pData, &nPrefix); - n += getVarint32(pReader->pData+n, &nSuffix); - assert( n+nSuffixnData ); - pReader->term.nData = nPrefix; - dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); - - pReader->pData += n+nSuffix; - pReader->nData -= n+nSuffix; - } -} - -/* strcmp-style comparison of pReader's current term against pTerm. -** If isPrefix, equality means equal through nTerm bytes. -*/ -static int leafReaderTermCmp(LeafReader *pReader, - const char *pTerm, int nTerm, int isPrefix){ - int c, n = pReader->term.nDataterm.nData : nTerm; - if( n==0 ){ - if( pReader->term.nData>0 ) return -1; - if(nTerm>0 ) return 1; - return 0; - } - - c = memcmp(pReader->term.pData, pTerm, n); - if( c!=0 ) return c; - if( isPrefix && n==nTerm ) return 0; - return pReader->term.nData - nTerm; -} - - -/****************************************************************/ -/* LeavesReader wraps LeafReader to allow iterating over the entire -** leaf layer of the tree. -*/ -typedef struct LeavesReader { - int idx; /* Index within the segment. */ - - sqlite3_stmt *pStmt; /* Statement we're streaming leaves from. */ - int eof; /* we've seen SQLITE_DONE from pStmt. */ - - LeafReader leafReader; /* reader for the current leaf. */ - DataBuffer rootData; /* root data for inline. */ -} LeavesReader; - -/* Access the current term. */ -static int leavesReaderTermBytes(LeavesReader *pReader){ - assert( !pReader->eof ); - return leafReaderTermBytes(&pReader->leafReader); -} -static const char *leavesReaderTerm(LeavesReader *pReader){ - assert( !pReader->eof ); - return leafReaderTerm(&pReader->leafReader); -} - -/* Access the doclist data for the current term. */ -static int leavesReaderDataBytes(LeavesReader *pReader){ - assert( !pReader->eof ); - return leafReaderDataBytes(&pReader->leafReader); -} -static const char *leavesReaderData(LeavesReader *pReader){ - assert( !pReader->eof ); - return leafReaderData(&pReader->leafReader); -} - -static int leavesReaderAtEnd(LeavesReader *pReader){ - return pReader->eof; -} - -/* loadSegmentLeaves() may not read all the way to SQLITE_DONE, thus -** leaving the statement handle open, which locks the table. -*/ -/* TODO(shess) This "solution" is not satisfactory. Really, there -** should be check-in function for all statement handles which -** arranges to call sqlite3_reset(). This most likely will require -** modification to control flow all over the place, though, so for now -** just punt. -** -** Note the current system assumes that segment merges will run to -** completion, which is why this particular probably hasn't arisen in -** this case. Probably a brittle assumption. -*/ -static int leavesReaderReset(LeavesReader *pReader){ - return sqlite3_reset(pReader->pStmt); -} - -static void leavesReaderDestroy(LeavesReader *pReader){ - /* If idx is -1, that means we're using a non-cached statement - ** handle in the optimize() case, so we need to release it. - */ - if( pReader->pStmt!=NULL && pReader->idx==-1 ){ - sqlite3_finalize(pReader->pStmt); - } - leafReaderDestroy(&pReader->leafReader); - dataBufferDestroy(&pReader->rootData); - SCRAMBLE(pReader); -} - -/* Initialize pReader with the given root data (if iStartBlockid==0 -** the leaf data was entirely contained in the root), or from the -** stream of blocks between iStartBlockid and iEndBlockid, inclusive. -*/ -static int leavesReaderInit(fulltext_vtab *v, - int idx, - sqlite_int64 iStartBlockid, - sqlite_int64 iEndBlockid, - const char *pRootData, int nRootData, - LeavesReader *pReader){ - CLEAR(pReader); - pReader->idx = idx; - - dataBufferInit(&pReader->rootData, 0); - if( iStartBlockid==0 ){ - /* Entire leaf level fit in root data. */ - dataBufferReplace(&pReader->rootData, pRootData, nRootData); - leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, - &pReader->leafReader); - }else{ - sqlite3_stmt *s; - int rc = sql_get_leaf_statement(v, idx, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iStartBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ){ - pReader->eof = 1; - return SQLITE_OK; - } - if( rc!=SQLITE_ROW ) return rc; - - pReader->pStmt = s; - leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), - sqlite3_column_bytes(pReader->pStmt, 0), - &pReader->leafReader); - } - return SQLITE_OK; -} - -/* Step the current leaf forward to the next term. If we reach the -** end of the current leaf, step forward to the next leaf block. -*/ -static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){ - assert( !leavesReaderAtEnd(pReader) ); - leafReaderStep(&pReader->leafReader); - - if( leafReaderAtEnd(&pReader->leafReader) ){ - int rc; - if( pReader->rootData.pData ){ - pReader->eof = 1; - return SQLITE_OK; - } - rc = sqlite3_step(pReader->pStmt); - if( rc!=SQLITE_ROW ){ - pReader->eof = 1; - return rc==SQLITE_DONE ? SQLITE_OK : rc; - } - leafReaderDestroy(&pReader->leafReader); - leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), - sqlite3_column_bytes(pReader->pStmt, 0), - &pReader->leafReader); - } - return SQLITE_OK; -} - -/* Order LeavesReaders by their term, ignoring idx. Readers at eof -** always sort to the end. -*/ -static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){ - if( leavesReaderAtEnd(lr1) ){ - if( leavesReaderAtEnd(lr2) ) return 0; - return 1; - } - if( leavesReaderAtEnd(lr2) ) return -1; - - return leafReaderTermCmp(&lr1->leafReader, - leavesReaderTerm(lr2), leavesReaderTermBytes(lr2), - 0); -} - -/* Similar to leavesReaderTermCmp(), with additional ordering by idx -** so that older segments sort before newer segments. -*/ -static int leavesReaderCmp(LeavesReader *lr1, LeavesReader *lr2){ - int c = leavesReaderTermCmp(lr1, lr2); - if( c!=0 ) return c; - return lr1->idx-lr2->idx; -} - -/* Assume that pLr[1]..pLr[nLr] are sorted. Bubble pLr[0] into its -** sorted position. -*/ -static void leavesReaderReorder(LeavesReader *pLr, int nLr){ - while( nLr>1 && leavesReaderCmp(pLr, pLr+1)>0 ){ - LeavesReader tmp = pLr[0]; - pLr[0] = pLr[1]; - pLr[1] = tmp; - nLr--; - pLr++; - } -} - -/* Initializes pReaders with the segments from level iLevel, returning -** the number of segments in *piReaders. Leaves pReaders in sorted -** order. -*/ -static int leavesReadersInit(fulltext_vtab *v, int iLevel, - LeavesReader *pReaders, int *piReaders){ - sqlite3_stmt *s; - int i, rc = sql_get_statement(v, SEGDIR_SELECT_LEVEL_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - i = 0; - while( (rc = sqlite3_step(s))==SQLITE_ROW ){ - sqlite_int64 iStart = sqlite3_column_int64(s, 0); - sqlite_int64 iEnd = sqlite3_column_int64(s, 1); - const char *pRootData = sqlite3_column_blob(s, 2); - int nRootData = sqlite3_column_bytes(s, 2); - - assert( i0 ){ - leavesReaderDestroy(&pReaders[i]); - } - return rc; - } - - *piReaders = i; - - /* Leave our results sorted by term, then age. */ - while( i-- ){ - leavesReaderReorder(pReaders+i, *piReaders-i); - } - return SQLITE_OK; -} - -/* Merge doclists from pReaders[nReaders] into a single doclist, which -** is written to pWriter. Assumes pReaders is ordered oldest to -** newest. -*/ -/* TODO(shess) Consider putting this inline in segmentMerge(). */ -static int leavesReadersMerge(fulltext_vtab *v, - LeavesReader *pReaders, int nReaders, - LeafWriter *pWriter){ - DLReader dlReaders[MERGE_COUNT]; - const char *pTerm = leavesReaderTerm(pReaders); - int i, nTerm = leavesReaderTermBytes(pReaders); - - assert( nReaders<=MERGE_COUNT ); - - for(i=0; i0 ){ - rc = leavesReaderStep(v, lrs+i); - if( rc!=SQLITE_OK ) goto err; - - /* Reorder by term, then by age. */ - leavesReaderReorder(lrs+i, MERGE_COUNT-i); - } - } - - for(i=0; i0 ); - - for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader); - rc=leavesReaderStep(v, pReader)){ - /* TODO(shess) Really want leavesReaderTermCmp(), but that name is - ** already taken to compare the terms of two LeavesReaders. Think - ** on a better name. [Meanwhile, break encapsulation rather than - ** use a confusing name.] - */ - int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); - if( c>0 ) break; /* Past any possible matches. */ - if( c==0 ){ - const char *pData = leavesReaderData(pReader); - int iBuffer, nData = leavesReaderDataBytes(pReader); - - /* Find the first empty buffer. */ - for(iBuffer=0; iBuffer0 ){ - assert(pBuffers!=NULL); - memcpy(p, pBuffers, nBuffers*sizeof(*pBuffers)); - sqlite3_free(pBuffers); - } - pBuffers = p; - } - dataBufferInit(&(pBuffers[nBuffers]), 0); - nBuffers++; - } - - /* At this point, must have an empty at iBuffer. */ - assert(iBufferpData, p->nData); - - /* dataBufferReset() could allow a large doclist to blow up - ** our memory requirements. - */ - if( p->nCapacity<1024 ){ - dataBufferReset(p); - }else{ - dataBufferDestroy(p); - dataBufferInit(p, 0); - } - } - } - } - } - - /* Union all the doclists together into *out. */ - /* TODO(shess) What if *out is big? Sigh. */ - if( rc==SQLITE_OK && nBuffers>0 ){ - int iBuffer; - for(iBuffer=0; iBuffer0 ){ - if( out->nData==0 ){ - dataBufferSwap(out, &(pBuffers[iBuffer])); - }else{ - docListAccumulateUnion(out, pBuffers[iBuffer].pData, - pBuffers[iBuffer].nData); - } - } - } - } - - while( nBuffers-- ){ - dataBufferDestroy(&(pBuffers[nBuffers])); - } - if( pBuffers!=NULL ) sqlite3_free(pBuffers); - - return rc; -} - -/* Call loadSegmentLeavesInt() with pData/nData as input. */ -static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, - const char *pTerm, int nTerm, int isPrefix, - DataBuffer *out){ - LeavesReader reader; - int rc; - - assert( nData>1 ); - assert( *pData=='\0' ); - rc = leavesReaderInit(v, 0, 0, 0, pData, nData, &reader); - if( rc!=SQLITE_OK ) return rc; - - rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out); - leavesReaderReset(&reader); - leavesReaderDestroy(&reader); - return rc; -} - -/* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to -** iEndLeaf (inclusive) as input, and merge the resulting doclist into -** out. -*/ -static int loadSegmentLeaves(fulltext_vtab *v, - sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf, - const char *pTerm, int nTerm, int isPrefix, - DataBuffer *out){ - int rc; - LeavesReader reader; - - assert( iStartLeaf<=iEndLeaf ); - rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader); - if( rc!=SQLITE_OK ) return rc; - - rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out); - leavesReaderReset(&reader); - leavesReaderDestroy(&reader); - return rc; -} - -/* Taking pData/nData as an interior node, find the sequence of child -** nodes which could include pTerm/nTerm/isPrefix. Note that the -** interior node terms logically come between the blocks, so there is -** one more blockid than there are terms (that block contains terms >= -** the last interior-node term). -*/ -/* TODO(shess) The calling code may already know that the end child is -** not worth calculating, because the end may be in a later sibling -** node. Consider whether breaking symmetry is worthwhile. I suspect -** it is not worthwhile. -*/ -static void getChildrenContaining(const char *pData, int nData, - const char *pTerm, int nTerm, int isPrefix, - sqlite_int64 *piStartChild, - sqlite_int64 *piEndChild){ - InteriorReader reader; - - assert( nData>1 ); - assert( *pData!='\0' ); - interiorReaderInit(pData, nData, &reader); - - /* Scan for the first child which could contain pTerm/nTerm. */ - while( !interiorReaderAtEnd(&reader) ){ - if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; - interiorReaderStep(&reader); - } - *piStartChild = interiorReaderCurrentBlockid(&reader); - - /* Keep scanning to find a term greater than our term, using prefix - ** comparison if indicated. If isPrefix is false, this will be the - ** same blockid as the starting block. - */ - while( !interiorReaderAtEnd(&reader) ){ - if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; - interiorReaderStep(&reader); - } - *piEndChild = interiorReaderCurrentBlockid(&reader); - - interiorReaderDestroy(&reader); - - /* Children must ascend, and if !prefix, both must be the same. */ - assert( *piEndChild>=*piStartChild ); - assert( isPrefix || *piStartChild==*piEndChild ); -} - -/* Read block at iBlockid and pass it with other params to -** getChildrenContaining(). -*/ -static int loadAndGetChildrenContaining( - fulltext_vtab *v, - sqlite_int64 iBlockid, - const char *pTerm, int nTerm, int isPrefix, - sqlite_int64 *piStartChild, sqlite_int64 *piEndChild -){ - sqlite3_stmt *s = NULL; - int rc; - - assert( iBlockid!=0 ); - assert( pTerm!=NULL ); - assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */ - assert( piStartChild!=NULL ); - assert( piEndChild!=NULL ); - - rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ) return SQLITE_ERROR; - if( rc!=SQLITE_ROW ) return rc; - - getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0), - pTerm, nTerm, isPrefix, piStartChild, piEndChild); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain - * locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - if( rc!=SQLITE_DONE ) return rc; - - return SQLITE_OK; -} - -/* Traverse the tree represented by pData[nData] looking for -** pTerm[nTerm], placing its doclist into *out. This is internal to -** loadSegment() to make error-handling cleaner. -*/ -static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData, - sqlite_int64 iLeavesEnd, - const char *pTerm, int nTerm, int isPrefix, - DataBuffer *out){ - /* Special case where root is a leaf. */ - if( *pData=='\0' ){ - return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out); - }else{ - int rc; - sqlite_int64 iStartChild, iEndChild; - - /* Process pData as an interior node, then loop down the tree - ** until we find the set of leaf nodes to scan for the term. - */ - getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, - &iStartChild, &iEndChild); - while( iStartChild>iLeavesEnd ){ - sqlite_int64 iNextStart, iNextEnd; - rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, - &iNextStart, &iNextEnd); - if( rc!=SQLITE_OK ) return rc; - - /* If we've branched, follow the end branch, too. */ - if( iStartChild!=iEndChild ){ - sqlite_int64 iDummy; - rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix, - &iDummy, &iNextEnd); - if( rc!=SQLITE_OK ) return rc; - } - - assert( iNextStart<=iNextEnd ); - iStartChild = iNextStart; - iEndChild = iNextEnd; - } - assert( iStartChild<=iLeavesEnd ); - assert( iEndChild<=iLeavesEnd ); - - /* Scan through the leaf segments for doclists. */ - return loadSegmentLeaves(v, iStartChild, iEndChild, - pTerm, nTerm, isPrefix, out); - } -} - -/* Call loadSegmentInt() to collect the doclist for pTerm/nTerm, then -** merge its doclist over *out (any duplicate doclists read from the -** segment rooted at pData will overwrite those in *out). -*/ -/* TODO(shess) Consider changing this to determine the depth of the -** leaves using either the first characters of interior nodes (when -** ==1, we're one level above the leaves), or the first character of -** the root (which will describe the height of the tree directly). -** Either feels somewhat tricky to me. -*/ -/* TODO(shess) The current merge is likely to be slow for large -** doclists (though it should process from newest/smallest to -** oldest/largest, so it may not be that bad). It might be useful to -** modify things to allow for N-way merging. This could either be -** within a segment, with pairwise merges across segments, or across -** all segments at once. -*/ -static int loadSegment(fulltext_vtab *v, const char *pData, int nData, - sqlite_int64 iLeavesEnd, - const char *pTerm, int nTerm, int isPrefix, - DataBuffer *out){ - DataBuffer result; - int rc; - - assert( nData>1 ); - - /* This code should never be called with buffered updates. */ - assert( v->nPendingData<0 ); - - dataBufferInit(&result, 0); - rc = loadSegmentInt(v, pData, nData, iLeavesEnd, - pTerm, nTerm, isPrefix, &result); - if( rc==SQLITE_OK && result.nData>0 ){ - if( out->nData==0 ){ - DataBuffer tmp = *out; - *out = result; - result = tmp; - }else{ - DataBuffer merged; - DLReader readers[2]; - - dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); - dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); - dataBufferInit(&merged, out->nData+result.nData); - docListMerge(&merged, readers, 2); - dataBufferDestroy(out); - *out = merged; - dlrDestroy(&readers[0]); - dlrDestroy(&readers[1]); - } - } - dataBufferDestroy(&result); - return rc; -} - -/* Scan the database and merge together the posting lists for the term -** into *out. -*/ -static int termSelect(fulltext_vtab *v, int iColumn, - const char *pTerm, int nTerm, int isPrefix, - DocListType iType, DataBuffer *out){ - DataBuffer doclist; - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - /* This code should never be called with buffered updates. */ - assert( v->nPendingData<0 ); - - dataBufferInit(&doclist, 0); - - /* Traverse the segments from oldest to newest so that newer doclist - ** elements for given docids overwrite older elements. - */ - while( (rc = sqlite3_step(s))==SQLITE_ROW ){ - const char *pData = sqlite3_column_blob(s, 2); - const int nData = sqlite3_column_bytes(s, 2); - const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); - rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, - &doclist); - if( rc!=SQLITE_OK ) goto err; - } - if( rc==SQLITE_DONE ){ - if( doclist.nData!=0 ){ - /* TODO(shess) The old term_select_all() code applied the column - ** restrict as we merged segments, leading to smaller buffers. - ** This is probably worthwhile to bring back, once the new storage - ** system is checked in. - */ - if( iColumn==v->nColumn) iColumn = -1; - docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, - iColumn, iType, out); - } - rc = SQLITE_OK; - } - - err: - dataBufferDestroy(&doclist); - return rc; -} - -/****************************************************************/ -/* Used to hold hashtable data for sorting. */ -typedef struct TermData { - const char *pTerm; - int nTerm; - DLCollector *pCollector; -} TermData; - -/* Orders TermData elements in strcmp fashion ( <0 for less-than, 0 -** for equal, >0 for greater-than). -*/ -static int termDataCmp(const void *av, const void *bv){ - const TermData *a = (const TermData *)av; - const TermData *b = (const TermData *)bv; - int n = a->nTermnTerm ? a->nTerm : b->nTerm; - int c = memcmp(a->pTerm, b->pTerm, n); - if( c!=0 ) return c; - return a->nTerm-b->nTerm; -} - -/* Order pTerms data by term, then write a new level 0 segment using -** LeafWriter. -*/ -static int writeZeroSegment(fulltext_vtab *v, fts2Hash *pTerms){ - fts2HashElem *e; - int idx, rc, i, n; - TermData *pData; - LeafWriter writer; - DataBuffer dl; - - /* Determine the next index at level 0, merging as necessary. */ - rc = segdirNextIndex(v, 0, &idx); - if( rc!=SQLITE_OK ) return rc; - - n = fts2HashCount(pTerms); - pData = sqlite3_malloc(n*sizeof(TermData)); - - for(i = 0, e = fts2HashFirst(pTerms); e; i++, e = fts2HashNext(e)){ - assert( i1 ) qsort(pData, n, sizeof(*pData), termDataCmp); - - /* TODO(shess) Refactor so that we can write directly to the segment - ** DataBuffer, as happens for segment merges. - */ - leafWriterInit(0, idx, &writer); - dataBufferInit(&dl, 0); - for(i=0; inPendingData>=0 ){ - fts2HashElem *e; - for(e=fts2HashFirst(&v->pendingTerms); e; e=fts2HashNext(e)){ - dlcDelete(fts2HashData(e)); - } - fts2HashClear(&v->pendingTerms); - v->nPendingData = -1; - } - return SQLITE_OK; -} - -/* If pendingTerms has data, flush it to a level-zero segment, and -** free it. -*/ -static int flushPendingTerms(fulltext_vtab *v){ - if( v->nPendingData>=0 ){ - int rc = writeZeroSegment(v, &v->pendingTerms); - if( rc==SQLITE_OK ) clearPendingTerms(v); - return rc; - } - return SQLITE_OK; -} - -/* If pendingTerms is "too big", or docid is out of order, flush it. -** Regardless, be certain that pendingTerms is initialized for use. -*/ -static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid){ - /* TODO(shess) Explore whether partially flushing the buffer on - ** forced-flush would provide better performance. I suspect that if - ** we ordered the doclists by size and flushed the largest until the - ** buffer was half empty, that would let the less frequent terms - ** generate longer doclists. - */ - if( iDocid<=v->iPrevDocid || v->nPendingData>kPendingThreshold ){ - int rc = flushPendingTerms(v); - if( rc!=SQLITE_OK ) return rc; - } - if( v->nPendingData<0 ){ - fts2HashInit(&v->pendingTerms, FTS2_HASH_STRING, 1); - v->nPendingData = 0; - } - v->iPrevDocid = iDocid; - return SQLITE_OK; -} - -/* This function implements the xUpdate callback; it is the top-level entry - * point for inserting, deleting or updating a row in a full-text table. */ -static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, - sqlite_int64 *pRowid){ - fulltext_vtab *v = (fulltext_vtab *) pVtab; - int rc; - - TRACE(("FTS2 Update %p\n", pVtab)); - - if( nArg<2 ){ - rc = index_delete(v, sqlite3_value_int64(ppArg[0])); - if( rc==SQLITE_OK ){ - /* If we just deleted the last row in the table, clear out the - ** index data. - */ - rc = content_exists(v); - if( rc==SQLITE_ROW ){ - rc = SQLITE_OK; - }else if( rc==SQLITE_DONE ){ - /* Clear the pending terms so we don't flush a useless level-0 - ** segment when the transaction closes. - */ - rc = clearPendingTerms(v); - if( rc==SQLITE_OK ){ - rc = segdir_delete_all(v); - } - } - } - } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ - /* An update: - * ppArg[0] = old rowid - * ppArg[1] = new rowid - * ppArg[2..2+v->nColumn-1] = values - * ppArg[2+v->nColumn] = value for magic column (we ignore this) - */ - sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); - if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER || - sqlite3_value_int64(ppArg[1]) != rowid ){ - rc = SQLITE_ERROR; /* we don't allow changing the rowid */ - } else { - assert( nArg==2+v->nColumn+1); - rc = index_update(v, rowid, &ppArg[2]); - } - } else { - /* An insert: - * ppArg[1] = requested rowid - * ppArg[2..2+v->nColumn-1] = values - * ppArg[2+v->nColumn] = value for magic column (we ignore this) - */ - assert( nArg==2+v->nColumn+1); - rc = index_insert(v, ppArg[1], &ppArg[2], pRowid); - } - - return rc; -} - -static int fulltextSync(sqlite3_vtab *pVtab){ - TRACE(("FTS2 xSync()\n")); - return flushPendingTerms((fulltext_vtab *)pVtab); -} - -static int fulltextBegin(sqlite3_vtab *pVtab){ - fulltext_vtab *v = (fulltext_vtab *) pVtab; - TRACE(("FTS2 xBegin()\n")); - - /* Any buffered updates should have been cleared by the previous - ** transaction. - */ - assert( v->nPendingData<0 ); - return clearPendingTerms(v); -} - -static int fulltextCommit(sqlite3_vtab *pVtab){ - fulltext_vtab *v = (fulltext_vtab *) pVtab; - TRACE(("FTS2 xCommit()\n")); - - /* Buffered updates should have been cleared by fulltextSync(). */ - assert( v->nPendingData<0 ); - return clearPendingTerms(v); -} - -static int fulltextRollback(sqlite3_vtab *pVtab){ - TRACE(("FTS2 xRollback()\n")); - return clearPendingTerms((fulltext_vtab *)pVtab); -} - -/* -** Implementation of the snippet() function for FTS2 -*/ -static void snippetFunc( - sqlite3_context *pContext, - int argc, - sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc<1 ) return; - if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1); - }else{ - const char *zStart = ""; - const char *zEnd = ""; - const char *zEllipsis = "..."; - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - if( argc>=2 ){ - zStart = (const char*)sqlite3_value_text(argv[1]); - if( argc>=3 ){ - zEnd = (const char*)sqlite3_value_text(argv[2]); - if( argc>=4 ){ - zEllipsis = (const char*)sqlite3_value_text(argv[3]); - } - } - } - snippetAllOffsets(pCursor); - snippetText(pCursor, zStart, zEnd, zEllipsis); - sqlite3_result_text(pContext, pCursor->snippet.zSnippet, - pCursor->snippet.nSnippet, SQLITE_STATIC); - } -} - -/* -** Implementation of the offsets() function for FTS2 -*/ -static void snippetOffsetsFunc( - sqlite3_context *pContext, - int argc, - sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc<1 ) return; - if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to offsets",-1); - }else{ - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - snippetAllOffsets(pCursor); - snippetOffsetText(&pCursor->snippet); - sqlite3_result_text(pContext, - pCursor->snippet.zOffset, pCursor->snippet.nOffset, - SQLITE_STATIC); - } -} - -/* OptLeavesReader is nearly identical to LeavesReader, except that -** where LeavesReader is geared towards the merging of complete -** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader -** is geared towards implementation of the optimize() function, and -** can merge all segments simultaneously. This version may be -** somewhat less efficient than LeavesReader because it merges into an -** accumulator rather than doing an N-way merge, but since segment -** size grows exponentially (so segment count logrithmically) this is -** probably not an immediate problem. -*/ -/* TODO(shess): Prove that assertion, or extend the merge code to -** merge tree fashion (like the prefix-searching code does). -*/ -/* TODO(shess): OptLeavesReader and LeavesReader could probably be -** merged with little or no loss of performance for LeavesReader. The -** merged code would need to handle >MERGE_COUNT segments, and would -** also need to be able to optionally optimize away deletes. -*/ -typedef struct OptLeavesReader { - /* Segment number, to order readers by age. */ - int segment; - LeavesReader reader; -} OptLeavesReader; - -static int optLeavesReaderAtEnd(OptLeavesReader *pReader){ - return leavesReaderAtEnd(&pReader->reader); -} -static int optLeavesReaderTermBytes(OptLeavesReader *pReader){ - return leavesReaderTermBytes(&pReader->reader); -} -static const char *optLeavesReaderData(OptLeavesReader *pReader){ - return leavesReaderData(&pReader->reader); -} -static int optLeavesReaderDataBytes(OptLeavesReader *pReader){ - return leavesReaderDataBytes(&pReader->reader); -} -static const char *optLeavesReaderTerm(OptLeavesReader *pReader){ - return leavesReaderTerm(&pReader->reader); -} -static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){ - return leavesReaderStep(v, &pReader->reader); -} -static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ - return leavesReaderTermCmp(&lr1->reader, &lr2->reader); -} -/* Order by term ascending, segment ascending (oldest to newest), with -** exhausted readers to the end. -*/ -static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ - int c = optLeavesReaderTermCmp(lr1, lr2); - if( c!=0 ) return c; - return lr1->segment-lr2->segment; -} -/* Bubble pLr[0] to appropriate place in pLr[1..nLr-1]. Assumes that -** pLr[1..nLr-1] is already sorted. -*/ -static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){ - while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){ - OptLeavesReader tmp = pLr[0]; - pLr[0] = pLr[1]; - pLr[1] = tmp; - nLr--; - pLr++; - } -} - -/* optimize() helper function. Put the readers in order and iterate -** through them, merging doclists for matching terms into pWriter. -** Returns SQLITE_OK on success, or the SQLite error code which -** prevented success. -*/ -static int optimizeInternal(fulltext_vtab *v, - OptLeavesReader *readers, int nReaders, - LeafWriter *pWriter){ - int i, rc = SQLITE_OK; - DataBuffer doclist, merged, tmp; - - /* Order the readers. */ - i = nReaders; - while( i-- > 0 ){ - optLeavesReaderReorder(&readers[i], nReaders-i); - } - - dataBufferInit(&doclist, LEAF_MAX); - dataBufferInit(&merged, LEAF_MAX); - - /* Exhausted readers bubble to the end, so when the first reader is - ** at eof, all are at eof. - */ - while( !optLeavesReaderAtEnd(&readers[0]) ){ - - /* Figure out how many readers share the next term. */ - for(i=1; i 0 ){ - dlrDestroy(&dlReaders[nReaders]); - } - - /* Accumulated doclist to reader 0 for next pass. */ - dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); - } - - /* Destroy reader that was left in the pipeline. */ - dlrDestroy(&dlReaders[0]); - - /* Trim deletions from the doclist. */ - dataBufferReset(&merged); - docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, - -1, DL_DEFAULT, &merged); - } - - /* Only pass doclists with hits (skip if all hits deleted). */ - if( merged.nData>0 ){ - rc = leafWriterStep(v, pWriter, - optLeavesReaderTerm(&readers[0]), - optLeavesReaderTermBytes(&readers[0]), - merged.pData, merged.nData); - if( rc!=SQLITE_OK ) goto err; - } - - /* Step merged readers to next term and reorder. */ - while( i-- > 0 ){ - rc = optLeavesReaderStep(v, &readers[i]); - if( rc!=SQLITE_OK ) goto err; - - optLeavesReaderReorder(&readers[i], nReaders-i); - } - } - - err: - dataBufferDestroy(&doclist); - dataBufferDestroy(&merged); - return rc; -} - -/* Implement optimize() function for FTS3. optimize(t) merges all -** segments in the fts index into a single segment. 't' is the magic -** table-named column. -*/ -static void optimizeFunc(sqlite3_context *pContext, - int argc, sqlite3_value **argv){ - fulltext_cursor *pCursor; - if( argc>1 ){ - sqlite3_result_error(pContext, "excess arguments to optimize()",-1); - }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to optimize",-1); - }else{ - fulltext_vtab *v; - int i, rc, iMaxLevel; - OptLeavesReader *readers; - int nReaders; - LeafWriter writer; - sqlite3_stmt *s; - - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - v = cursor_vtab(pCursor); - - /* Flush any buffered updates before optimizing. */ - rc = flushPendingTerms(v); - if( rc!=SQLITE_OK ) goto err; - - rc = segdir_count(v, &nReaders, &iMaxLevel); - if( rc!=SQLITE_OK ) goto err; - if( nReaders==0 || nReaders==1 ){ - sqlite3_result_text(pContext, "Index already optimal", -1, - SQLITE_STATIC); - return; - } - - rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); - if( rc!=SQLITE_OK ) goto err; - - readers = sqlite3_malloc(nReaders*sizeof(readers[0])); - if( readers==NULL ) goto err; - - /* Note that there will already be a segment at this position - ** until we call segdir_delete() on iMaxLevel. - */ - leafWriterInit(iMaxLevel, 0, &writer); - - i = 0; - while( (rc = sqlite3_step(s))==SQLITE_ROW ){ - sqlite_int64 iStart = sqlite3_column_int64(s, 0); - sqlite_int64 iEnd = sqlite3_column_int64(s, 1); - const char *pRootData = sqlite3_column_blob(s, 2); - int nRootData = sqlite3_column_bytes(s, 2); - - assert( i 0 ){ - leavesReaderDestroy(&readers[i].reader); - } - sqlite3_free(readers); - - /* If we've successfully gotten to here, delete the old segments - ** and flush the interior structure of the new segment. - */ - if( rc==SQLITE_OK ){ - for( i=0; i<=iMaxLevel; i++ ){ - rc = segdir_delete(v, i); - if( rc!=SQLITE_OK ) break; - } - - if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer); - } - - leafWriterDestroy(&writer); - - if( rc!=SQLITE_OK ) goto err; - - sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); - return; - - /* TODO(shess): Error-handling needs to be improved along the - ** lines of the dump_ functions. - */ - err: - { - char buf[512]; - sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s", - sqlite3_errmsg(sqlite3_context_db_handle(pContext))); - sqlite3_result_error(pContext, buf, -1); - } - } -} - -#ifdef SQLITE_TEST -/* Generate an error of the form ": ". If msg is NULL, -** pull the error from the context's db handle. -*/ -static void generateError(sqlite3_context *pContext, - const char *prefix, const char *msg){ - char buf[512]; - if( msg==NULL ) msg = sqlite3_errmsg(sqlite3_context_db_handle(pContext)); - sqlite3_snprintf(sizeof(buf), buf, "%s: %s", prefix, msg); - sqlite3_result_error(pContext, buf, -1); -} - -/* Helper function to collect the set of terms in the segment into -** pTerms. The segment is defined by the leaf nodes between -** iStartBlockid and iEndBlockid, inclusive, or by the contents of -** pRootData if iStartBlockid is 0 (in which case the entire segment -** fit in a leaf). -*/ -static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, - fts2Hash *pTerms){ - const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); - const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); - const char *pRootData = sqlite3_column_blob(s, 2); - const int nRootData = sqlite3_column_bytes(s, 2); - LeavesReader reader; - int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, - pRootData, nRootData, &reader); - if( rc!=SQLITE_OK ) return rc; - - while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ - const char *pTerm = leavesReaderTerm(&reader); - const int nTerm = leavesReaderTermBytes(&reader); - void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm); - void *newValue = (void *)((char *)oldValue+1); - - /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c, - ** the data value passed is returned in case of malloc failure. - */ - if( newValue==sqlite3Fts2HashInsert(pTerms, pTerm, nTerm, newValue) ){ - rc = SQLITE_NOMEM; - }else{ - rc = leavesReaderStep(v, &reader); - } - } - - leavesReaderDestroy(&reader); - return rc; -} - -/* Helper function to build the result string for dump_terms(). */ -static int generateTermsResult(sqlite3_context *pContext, fts2Hash *pTerms){ - int iTerm, nTerms, nResultBytes, iByte; - char *result; - TermData *pData; - fts2HashElem *e; - - /* Iterate pTerms to generate an array of terms in pData for - ** sorting. - */ - nTerms = fts2HashCount(pTerms); - assert( nTerms>0 ); - pData = sqlite3_malloc(nTerms*sizeof(TermData)); - if( pData==NULL ) return SQLITE_NOMEM; - - nResultBytes = 0; - for(iTerm = 0, e = fts2HashFirst(pTerms); e; iTerm++, e = fts2HashNext(e)){ - nResultBytes += fts2HashKeysize(e)+1; /* Term plus trailing space */ - assert( iTerm0 ); /* nTerms>0, nResultsBytes must be, too. */ - result = sqlite3_malloc(nResultBytes); - if( result==NULL ){ - sqlite3_free(pData); - return SQLITE_NOMEM; - } - - if( nTerms>1 ) qsort(pData, nTerms, sizeof(*pData), termDataCmp); - - /* Read the terms in order to build the result. */ - iByte = 0; - for(iTerm=0; iTerm0 ){ - rc = generateTermsResult(pContext, &terms); - if( rc==SQLITE_NOMEM ){ - generateError(pContext, "dump_terms", "out of memory"); - }else{ - assert( rc==SQLITE_OK ); - } - }else if( argc==3 ){ - /* The specific segment asked for could not be found. */ - generateError(pContext, "dump_terms", "segment not found"); - }else{ - /* No segments found. */ - /* TODO(shess): It should be impossible to reach this. This - ** case can only happen for an empty table, in which case - ** SQLite has no rows to call this function on. - */ - sqlite3_result_null(pContext); - } - } - sqlite3Fts2HashClear(&terms); - } -} - -/* Expand the DL_DEFAULT doclist in pData into a text result in -** pContext. -*/ -static void createDoclistResult(sqlite3_context *pContext, - const char *pData, int nData){ - DataBuffer dump; - DLReader dlReader; - - assert( pData!=NULL && nData>0 ); - - dataBufferInit(&dump, 0); - dlrInit(&dlReader, DL_DEFAULT, pData, nData); - for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){ - char buf[256]; - PLReader plReader; - - plrInit(&plReader, &dlReader); - if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ - sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); - dataBufferAppend(&dump, buf, strlen(buf)); - }else{ - int iColumn = plrColumn(&plReader); - - sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", - dlrDocid(&dlReader), iColumn); - dataBufferAppend(&dump, buf, strlen(buf)); - - for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){ - if( plrColumn(&plReader)!=iColumn ){ - iColumn = plrColumn(&plReader); - sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); - assert( dump.nData>0 ); - dump.nData--; /* Overwrite trailing space. */ - assert( dump.pData[dump.nData]==' '); - dataBufferAppend(&dump, buf, strlen(buf)); - } - if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ - sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", - plrPosition(&plReader), - plrStartOffset(&plReader), plrEndOffset(&plReader)); - }else if( DL_DEFAULT==DL_POSITIONS ){ - sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); - }else{ - assert( NULL=="Unhandled DL_DEFAULT value"); - } - dataBufferAppend(&dump, buf, strlen(buf)); - } - plrDestroy(&plReader); - - assert( dump.nData>0 ); - dump.nData--; /* Overwrite trailing space. */ - assert( dump.pData[dump.nData]==' '); - dataBufferAppend(&dump, "]] ", 3); - } - } - dlrDestroy(&dlReader); - - assert( dump.nData>0 ); - dump.nData--; /* Overwrite trailing space. */ - assert( dump.pData[dump.nData]==' '); - dump.pData[dump.nData] = '\0'; - assert( dump.nData>0 ); - - /* Passes ownership of dump's buffer to pContext. */ - sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); - dump.pData = NULL; - dump.nData = dump.nCapacity = 0; -} - -/* Implements dump_doclist() for use in inspecting the fts2 index from -** tests. TEXT result containing a string representation of the -** doclist for the indicated term. dump_doclist(t, term, level, idx) -** dumps the doclist for term from the segment specified by level, idx -** (in %_segdir), while dump_doclist(t, term) dumps the logical -** doclist for the term across all segments. The per-segment doclist -** can contain deletions, while the full-index doclist will not -** (deletions are omitted). -** -** Result formats differ with the setting of DL_DEFAULTS. Examples: -** -** DL_DOCIDS: [1] [3] [7] -** DL_POSITIONS: [1 0[0 4] 1[17]] [3 1[5]] -** DL_POSITIONS_OFFSETS: [1 0[0,0,3 4,23,26] 1[17,102,105]] [3 1[5,20,23]] -** -** In each case the number after the outer '[' is the docid. In the -** latter two cases, the number before the inner '[' is the column -** associated with the values within. For DL_POSITIONS the numbers -** within are the positions, for DL_POSITIONS_OFFSETS they are the -** position, the start offset, and the end offset. -*/ -static void dumpDoclistFunc( - sqlite3_context *pContext, - int argc, sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc!=2 && argc!=4 ){ - generateError(pContext, "dump_doclist", "incorrect arguments"); - }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - generateError(pContext, "dump_doclist", "illegal first argument"); - }else if( sqlite3_value_text(argv[1])==NULL || - sqlite3_value_text(argv[1])[0]=='\0' ){ - generateError(pContext, "dump_doclist", "empty second argument"); - }else{ - const char *pTerm = (const char *)sqlite3_value_text(argv[1]); - const int nTerm = strlen(pTerm); - fulltext_vtab *v; - int rc; - DataBuffer doclist; - - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - v = cursor_vtab(pCursor); - - dataBufferInit(&doclist, 0); - - /* termSelect() yields the same logical doclist that queries are - ** run against. - */ - if( argc==2 ){ - rc = termSelect(v, v->nColumn, pTerm, nTerm, 0, DL_DEFAULT, &doclist); - }else{ - sqlite3_stmt *s = NULL; - - /* Get our specific segment's information. */ - rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s); - if( rc==SQLITE_OK ){ - rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[2])); - if( rc==SQLITE_OK ){ - rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[3])); - } - } - - if( rc==SQLITE_OK ){ - rc = sqlite3_step(s); - - if( rc==SQLITE_DONE ){ - dataBufferDestroy(&doclist); - generateError(pContext, "dump_doclist", "segment not found"); - return; - } - - /* Found a segment, load it into doclist. */ - if( rc==SQLITE_ROW ){ - const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); - const char *pData = sqlite3_column_blob(s, 2); - const int nData = sqlite3_column_bytes(s, 2); - - /* loadSegment() is used by termSelect() to load each - ** segment's data. - */ - rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, 0, - &doclist); - if( rc==SQLITE_OK ){ - rc = sqlite3_step(s); - - /* Should not have more than one matching segment. */ - if( rc!=SQLITE_DONE ){ - sqlite3_reset(s); - dataBufferDestroy(&doclist); - generateError(pContext, "dump_doclist", "invalid segdir"); - return; - } - rc = SQLITE_OK; - } - } - } - - sqlite3_reset(s); - } - - if( rc==SQLITE_OK ){ - if( doclist.nData>0 ){ - createDoclistResult(pContext, doclist.pData, doclist.nData); - }else{ - /* TODO(shess): This can happen if the term is not present, or - ** if all instances of the term have been deleted and this is - ** an all-index dump. It may be interesting to distinguish - ** these cases. - */ - sqlite3_result_text(pContext, "", 0, SQLITE_STATIC); - } - }else if( rc==SQLITE_NOMEM ){ - /* Handle out-of-memory cases specially because if they are - ** generated in fts2 code they may not be reflected in the db - ** handle. - */ - /* TODO(shess): Handle this more comprehensively. - ** sqlite3ErrStr() has what I need, but is internal. - */ - generateError(pContext, "dump_doclist", "out of memory"); - }else{ - generateError(pContext, "dump_doclist", NULL); - } - - dataBufferDestroy(&doclist); - } -} -#endif - -/* -** This routine implements the xFindFunction method for the FTS2 -** virtual table. -*/ -static int fulltextFindFunction( - sqlite3_vtab *pVtab, - int nArg, - const char *zName, - void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), - void **ppArg -){ - if( strcmp(zName,"snippet")==0 ){ - *pxFunc = snippetFunc; - return 1; - }else if( strcmp(zName,"offsets")==0 ){ - *pxFunc = snippetOffsetsFunc; - return 1; - }else if( strcmp(zName,"optimize")==0 ){ - *pxFunc = optimizeFunc; - return 1; -#ifdef SQLITE_TEST - /* NOTE(shess): These functions are present only for testing - ** purposes. No particular effort is made to optimize their - ** execution or how they build their results. - */ - }else if( strcmp(zName,"dump_terms")==0 ){ - /* fprintf(stderr, "Found dump_terms\n"); */ - *pxFunc = dumpTermsFunc; - return 1; - }else if( strcmp(zName,"dump_doclist")==0 ){ - /* fprintf(stderr, "Found dump_doclist\n"); */ - *pxFunc = dumpDoclistFunc; - return 1; -#endif - } - return 0; -} - -/* -** Rename an fts2 table. -*/ -static int fulltextRename( - sqlite3_vtab *pVtab, - const char *zName -){ - fulltext_vtab *p = (fulltext_vtab *)pVtab; - int rc = SQLITE_NOMEM; - char *zSql = sqlite3_mprintf( - "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';" - "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';" - "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';" - , p->zDb, p->zName, zName - , p->zDb, p->zName, zName - , p->zDb, p->zName, zName - ); - if( zSql ){ - rc = sqlite3_exec(p->db, zSql, 0, 0, 0); - sqlite3_free(zSql); - } - return rc; -} - -static const sqlite3_module fts2Module = { - /* iVersion */ 0, - /* xCreate */ fulltextCreate, - /* xConnect */ fulltextConnect, - /* xBestIndex */ fulltextBestIndex, - /* xDisconnect */ fulltextDisconnect, - /* xDestroy */ fulltextDestroy, - /* xOpen */ fulltextOpen, - /* xClose */ fulltextClose, - /* xFilter */ fulltextFilter, - /* xNext */ fulltextNext, - /* xEof */ fulltextEof, - /* xColumn */ fulltextColumn, - /* xRowid */ fulltextRowid, - /* xUpdate */ fulltextUpdate, - /* xBegin */ fulltextBegin, - /* xSync */ fulltextSync, - /* xCommit */ fulltextCommit, - /* xRollback */ fulltextRollback, - /* xFindFunction */ fulltextFindFunction, - /* xRename */ fulltextRename, -}; - -static void hashDestroy(void *p){ - fts2Hash *pHash = (fts2Hash *)p; - sqlite3Fts2HashClear(pHash); - sqlite3_free(pHash); -} - -/* -** The fts2 built-in tokenizers - "simple" and "porter" - are implemented -** in files fts2_tokenizer1.c and fts2_porter.c respectively. The following -** two forward declarations are for functions declared in these files -** used to retrieve the respective implementations. -** -** Calling sqlite3Fts2SimpleTokenizerModule() sets the value pointed -** to by the argument to point a the "simple" tokenizer implementation. -** Function ...PorterTokenizerModule() sets *pModule to point to the -** porter tokenizer/stemmer implementation. -*/ -void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); -void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); -void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); - -int sqlite3Fts2InitHashTable(sqlite3 *, fts2Hash *, const char *); - -/* -** Initialize the fts2 extension. If this extension is built as part -** of the sqlite library, then this function is called directly by -** SQLite. If fts2 is built as a dynamically loadable extension, this -** function is called by the sqlite3_extension_init() entry point. -*/ -int sqlite3Fts2Init(sqlite3 *db){ - int rc = SQLITE_OK; - fts2Hash *pHash = 0; - const sqlite3_tokenizer_module *pSimple = 0; - const sqlite3_tokenizer_module *pPorter = 0; - const sqlite3_tokenizer_module *pIcu = 0; - - sqlite3Fts2SimpleTokenizerModule(&pSimple); - sqlite3Fts2PorterTokenizerModule(&pPorter); -#ifdef SQLITE_ENABLE_ICU - sqlite3Fts2IcuTokenizerModule(&pIcu); -#endif - - /* Allocate and initialize the hash-table used to store tokenizers. */ - pHash = sqlite3_malloc(sizeof(fts2Hash)); - if( !pHash ){ - rc = SQLITE_NOMEM; - }else{ - sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); - } - - /* Load the built-in tokenizers into the hash table */ - if( rc==SQLITE_OK ){ - if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple) - || sqlite3Fts2HashInsert(pHash, "porter", 7, (void *)pPorter) - || (pIcu && sqlite3Fts2HashInsert(pHash, "icu", 4, (void *)pIcu)) - ){ - rc = SQLITE_NOMEM; - } - } - - /* Create the virtual table wrapper around the hash-table and overload - ** the two scalar functions. If this is successful, register the - ** module with sqlite. - */ - if( SQLITE_OK==rc - && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) -#ifdef SQLITE_TEST - && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) -#endif - ){ - return sqlite3_create_module_v2( - db, "fts2", &fts2Module, (void *)pHash, hashDestroy - ); - } - - /* An error has occurred. Delete the hash table and return the error code. */ - assert( rc!=SQLITE_OK ); - if( pHash ){ - sqlite3Fts2HashClear(pHash); - sqlite3_free(pHash); - } - return rc; -} - -#if !SQLITE_CORE -#ifdef _WIN32 -__declspec(dllexport) -#endif -int sqlite3_fts2_init( - sqlite3 *db, - char **pzErrMsg, - const sqlite3_api_routines *pApi -){ - SQLITE_EXTENSION_INIT2(pApi) - return sqlite3Fts2Init(db); -} -#endif - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/third_party/sqlite3/fts2.h b/third_party/sqlite3/fts2.h deleted file mode 100644 index 4da4c3877..000000000 --- a/third_party/sqlite3/fts2.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -** 2006 Oct 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This header file is used by programs that want to link against the -** FTS2 library. All it does is declare the sqlite3Fts2Init() interface. -*/ -#include "sqlite3.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -int sqlite3Fts2Init(sqlite3 *db); - -#ifdef __cplusplus -} /* extern "C" */ -#endif /* __cplusplus */ diff --git a/third_party/sqlite3/fts2_hash.c b/third_party/sqlite3/fts2_hash.c deleted file mode 100644 index 5dd41db78..000000000 --- a/third_party/sqlite3/fts2_hash.c +++ /dev/null @@ -1,376 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the implementation of generic hash-tables used in SQLite. -** We've modified it slightly to serve as a standalone hash table -** implementation for the full-text indexing module. -*/ -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ -/* clang-format off */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - -#include -#include -#include - -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT3 -#include "fts2_hash.h" - -/* -** Malloc and Free functions -*/ -static void *fts2HashMalloc(int n){ - void *p = sqlite3_malloc(n); - if( p ){ - memset(p, 0, n); - } - return p; -} -static void fts2HashFree(void *p){ - sqlite3_free(p); -} - -/* Turn bulk memory into a hash table object by initializing the -** fields of the Hash structure. -** -** "pNew" is a pointer to the hash table that is to be initialized. -** keyClass is one of the constants -** FTS2_HASH_BINARY or FTS2_HASH_STRING. The value of keyClass -** determines what kind of key the hash table will use. "copyKey" is -** true if the hash table should make its own private copy of keys and -** false if it should just use the supplied pointer. -*/ -void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){ - assert( pNew!=0 ); - assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY ); - pNew->keyClass = keyClass; - pNew->copyKey = copyKey; - pNew->first = 0; - pNew->count = 0; - pNew->htsize = 0; - pNew->ht = 0; -} - -/* Remove all entries from a hash table. Reclaim all memory. -** Call this routine to delete a hash table or to reset a hash table -** to the empty state. -*/ -void sqlite3Fts2HashClear(fts2Hash *pH){ - fts2HashElem *elem; /* For looping over all elements of the table */ - - assert( pH!=0 ); - elem = pH->first; - pH->first = 0; - fts2HashFree(pH->ht); - pH->ht = 0; - pH->htsize = 0; - while( elem ){ - fts2HashElem *next_elem = elem->next; - if( pH->copyKey && elem->pKey ){ - fts2HashFree(elem->pKey); - } - fts2HashFree(elem); - elem = next_elem; - } - pH->count = 0; -} - -/* -** Hash and comparison functions when the mode is FTS2_HASH_STRING -*/ -static int strHash(const void *pKey, int nKey){ - const char *z = (const char *)pKey; - int h = 0; - if( nKey<=0 ) nKey = (int) strlen(z); - while( nKey > 0 ){ - h = (h<<3) ^ h ^ *z++; - nKey--; - } - return h & 0x7fffffff; -} -static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return strncmp((const char*)pKey1,(const char*)pKey2,n1); -} - -/* -** Hash and comparison functions when the mode is FTS2_HASH_BINARY -*/ -static int binHash(const void *pKey, int nKey){ - int h = 0; - const char *z = (const char *)pKey; - while( nKey-- > 0 ){ - h = (h<<3) ^ h ^ *(z++); - } - return h & 0x7fffffff; -} -static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return memcmp(pKey1,pKey2,n1); -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** The C syntax in this function definition may be unfamilar to some -** programmers, so we provide the following additional explanation: -** -** The name of the function is "hashFunction". The function takes a -** single parameter "keyClass". The return value of hashFunction() -** is a pointer to another function. Specifically, the return value -** of hashFunction() is a pointer to a function that takes two parameters -** with types "const void*" and "int" and returns an "int". -*/ -static int (*hashFunction(int keyClass))(const void*,int){ - if( keyClass==FTS2_HASH_STRING ){ - return &strHash; - }else{ - assert( keyClass==FTS2_HASH_BINARY ); - return &binHash; - } -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** For help in interpreted the obscure C code in the function definition, -** see the header comment on the previous function. -*/ -static int (*compareFunction(int keyClass))(const void*,int,const void*,int){ - if( keyClass==FTS2_HASH_STRING ){ - return &strCompare; - }else{ - assert( keyClass==FTS2_HASH_BINARY ); - return &binCompare; - } -} - -/* Link an element into the hash table -*/ -static void insertElement( - fts2Hash *pH, /* The complete hash table */ - struct _fts2ht *pEntry, /* The entry into which pNew is inserted */ - fts2HashElem *pNew /* The element to be inserted */ -){ - fts2HashElem *pHead; /* First element already in pEntry */ - pHead = pEntry->chain; - if( pHead ){ - pNew->next = pHead; - pNew->prev = pHead->prev; - if( pHead->prev ){ pHead->prev->next = pNew; } - else { pH->first = pNew; } - pHead->prev = pNew; - }else{ - pNew->next = pH->first; - if( pH->first ){ pH->first->prev = pNew; } - pNew->prev = 0; - pH->first = pNew; - } - pEntry->count++; - pEntry->chain = pNew; -} - - -/* Resize the hash table so that it cantains "new_size" buckets. -** "new_size" must be a power of 2. The hash table might fail -** to resize if sqliteMalloc() fails. -*/ -static void rehash(fts2Hash *pH, int new_size){ - struct _fts2ht *new_ht; /* The new hash table */ - fts2HashElem *elem, *next_elem; /* For looping over existing elements */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( (new_size & (new_size-1))==0 ); - new_ht = (struct _fts2ht *)fts2HashMalloc( new_size*sizeof(struct _fts2ht) ); - if( new_ht==0 ) return; - fts2HashFree(pH->ht); - pH->ht = new_ht; - pH->htsize = new_size; - xHash = hashFunction(pH->keyClass); - for(elem=pH->first, pH->first=0; elem; elem = next_elem){ - int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); - next_elem = elem->next; - insertElement(pH, &new_ht[h], elem); - } -} - -/* This function (for internal use only) locates an element in an -** hash table that matches the given key. The hash for this key has -** already been computed and is passed as the 4th parameter. -*/ -static fts2HashElem *findElementGivenHash( - const fts2Hash *pH, /* The pH to be searched */ - const void *pKey, /* The key we are searching for */ - int nKey, - int h /* The hash for this key. */ -){ - fts2HashElem *elem; /* Used to loop thru the element list */ - int count; /* Number of elements left to test */ - int (*xCompare)(const void*,int,const void*,int); /* comparison function */ - - if( pH->ht ){ - struct _fts2ht *pEntry = &pH->ht[h]; - elem = pEntry->chain; - count = pEntry->count; - xCompare = compareFunction(pH->keyClass); - while( count-- && elem ){ - if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ - return elem; - } - elem = elem->next; - } - } - return 0; -} - -/* Remove a single entry from the hash table given a pointer to that -** element and a hash on the element's key. -*/ -static void removeElementGivenHash( - fts2Hash *pH, /* The pH containing "elem" */ - fts2HashElem* elem, /* The element to be removed from the pH */ - int h /* Hash value for the element */ -){ - struct _fts2ht *pEntry; - if( elem->prev ){ - elem->prev->next = elem->next; - }else{ - pH->first = elem->next; - } - if( elem->next ){ - elem->next->prev = elem->prev; - } - pEntry = &pH->ht[h]; - if( pEntry->chain==elem ){ - pEntry->chain = elem->next; - } - pEntry->count--; - if( pEntry->count<=0 ){ - pEntry->chain = 0; - } - if( pH->copyKey && elem->pKey ){ - fts2HashFree(elem->pKey); - } - fts2HashFree( elem ); - pH->count--; - if( pH->count<=0 ){ - assert( pH->first==0 ); - assert( pH->count==0 ); - fts2HashClear(pH); - } -} - -/* Attempt to locate an element of the hash table pH with a key -** that matches pKey,nKey. Return the data for this element if it is -** found, or NULL if there is no match. -*/ -void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){ - int h; /* A hash on key */ - fts2HashElem *elem; /* The element that matches key */ - int (*xHash)(const void*,int); /* The hash function */ - - if( pH==0 || pH->ht==0 ) return 0; - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - h = (*xHash)(pKey,nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1)); - return elem ? elem->data : 0; -} - -/* Insert an element into the hash table pH. The key is pKey,nKey -** and the data is "data". -** -** If no element exists with a matching key, then a new -** element is created. A copy of the key is made if the copyKey -** flag is set. NULL is returned. -** -** If another element already exists with the same key, then the -** new data replaces the old data and the old data is returned. -** The key is not copied in this instance. If a malloc fails, then -** the new data is returned and the hash table is unchanged. -** -** If the "data" parameter to this function is NULL, then the -** element corresponding to "key" is removed from the hash table. -*/ -void *sqlite3Fts2HashInsert( - fts2Hash *pH, /* The hash table to insert into */ - const void *pKey, /* The key */ - int nKey, /* Number of bytes in the key */ - void *data /* The data */ -){ - int hraw; /* Raw hash value of the key */ - int h; /* the hash of the key modulo hash table size */ - fts2HashElem *elem; /* Used to loop thru the element list */ - fts2HashElem *new_elem; /* New element added to the pH */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( pH!=0 ); - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - hraw = (*xHash)(pKey, nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - elem = findElementGivenHash(pH,pKey,nKey,h); - if( elem ){ - void *old_data = elem->data; - if( data==0 ){ - removeElementGivenHash(pH,elem,h); - }else{ - elem->data = data; - } - return old_data; - } - if( data==0 ) return 0; - new_elem = (fts2HashElem*)fts2HashMalloc( sizeof(fts2HashElem) ); - if( new_elem==0 ) return data; - if( pH->copyKey && pKey!=0 ){ - new_elem->pKey = fts2HashMalloc( nKey ); - if( new_elem->pKey==0 ){ - fts2HashFree(new_elem); - return data; - } - memcpy((void*)new_elem->pKey, pKey, nKey); - }else{ - new_elem->pKey = (void*)pKey; - } - new_elem->nKey = nKey; - pH->count++; - if( pH->htsize==0 ){ - rehash(pH,8); - if( pH->htsize==0 ){ - pH->count = 0; - fts2HashFree(new_elem); - return data; - } - } - if( pH->count > pH->htsize ){ - rehash(pH,pH->htsize*2); - } - assert( pH->htsize>0 ); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - insertElement(pH, &pH->ht[h], new_elem); - new_elem->data = data; - return 0; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/third_party/sqlite3/fts2_hash.h b/third_party/sqlite3/fts2_hash.h deleted file mode 100644 index 86c6c0daf..000000000 --- a/third_party/sqlite3/fts2_hash.h +++ /dev/null @@ -1,112 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the header file for the generic hash-table implementation -** used in SQLite. We've modified it slightly to serve as a standalone -** hash table implementation for the full-text indexing module. -** -*/ -#ifndef _FTS2_HASH_H_ -#define _FTS2_HASH_H_ - -/* clang-format off */ - -/* Forward declarations of structures. */ -typedef struct fts2Hash fts2Hash; -typedef struct fts2HashElem fts2HashElem; - -/* A complete hash table is an instance of the following structure. -** The internals of this structure are intended to be opaque -- client -** code should not attempt to access or modify the fields of this structure -** directly. Change this structure only by using the routines below. -** However, many of the "procedures" and "functions" for modifying and -** accessing this structure are really macros, so we can't really make -** this structure opaque. -*/ -struct fts2Hash { - char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ - char copyKey; /* True if copy of key made on insert */ - int count; /* Number of entries in this table */ - fts2HashElem *first; /* The first element of the array */ - int htsize; /* Number of buckets in the hash table */ - struct _fts2ht { /* the hash table */ - int count; /* Number of entries with this hash */ - fts2HashElem *chain; /* Pointer to first entry with this hash */ - } *ht; -}; - -/* Each element in the hash table is an instance of the following -** structure. All elements are stored on a single doubly-linked list. -** -** Again, this structure is intended to be opaque, but it can't really -** be opaque because it is used by macros. -*/ -struct fts2HashElem { - fts2HashElem *next, *prev; /* Next and previous elements in the table */ - void *data; /* Data associated with this element */ - void *pKey; int nKey; /* Key associated with this element */ -}; - -/* -** There are 2 different modes of operation for a hash table: -** -** FTS2_HASH_STRING pKey points to a string that is nKey bytes long -** (including the null-terminator, if any). Case -** is respected in comparisons. -** -** FTS2_HASH_BINARY pKey points to binary data nKey bytes long. -** memcmp() is used to compare keys. -** -** A copy of the key is made if the copyKey parameter to fts2HashInit is 1. -*/ -#define FTS2_HASH_STRING 1 -#define FTS2_HASH_BINARY 2 - -/* -** Access routines. To delete, insert a NULL pointer. -*/ -void sqlite3Fts2HashInit(fts2Hash*, int keytype, int copyKey); -void *sqlite3Fts2HashInsert(fts2Hash*, const void *pKey, int nKey, void *pData); -void *sqlite3Fts2HashFind(const fts2Hash*, const void *pKey, int nKey); -void sqlite3Fts2HashClear(fts2Hash*); - -/* -** Shorthand for the functions above -*/ -#define fts2HashInit sqlite3Fts2HashInit -#define fts2HashInsert sqlite3Fts2HashInsert -#define fts2HashFind sqlite3Fts2HashFind -#define fts2HashClear sqlite3Fts2HashClear - -/* -** Macros for looping over all elements of a hash table. The idiom is -** like this: -** -** fts2Hash h; -** fts2HashElem *p; -** ... -** for(p=fts2HashFirst(&h); p; p=fts2HashNext(p)){ -** SomeStructure *pData = fts2HashData(p); -** // do something with pData -** } -*/ -#define fts2HashFirst(H) ((H)->first) -#define fts2HashNext(E) ((E)->next) -#define fts2HashData(E) ((E)->data) -#define fts2HashKey(E) ((E)->pKey) -#define fts2HashKeysize(E) ((E)->nKey) - -/* -** Number of entries in a hash table -*/ -#define fts2HashCount(H) ((H)->count) - -#endif /* _FTS2_HASH_H_ */ diff --git a/third_party/sqlite3/fts2_icu.c b/third_party/sqlite3/fts2_icu.c deleted file mode 100644 index 170b285f2..000000000 --- a/third_party/sqlite3/fts2_icu.c +++ /dev/null @@ -1,261 +0,0 @@ -/* -** 2007 June 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This file implements a tokenizer for fts2 based on the ICU library. -** -** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $ -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) -#ifdef SQLITE_ENABLE_ICU - -/* clang-format off */ - -#include -#include -#include "fts2_tokenizer.h" - -#include -#include -#include -#include - -typedef struct IcuTokenizer IcuTokenizer; -typedef struct IcuCursor IcuCursor; - -struct IcuTokenizer { - sqlite3_tokenizer base; - char *zLocale; -}; - -struct IcuCursor { - sqlite3_tokenizer_cursor base; - - UBreakIterator *pIter; /* ICU break-iterator object */ - int nChar; /* Number of UChar elements in pInput */ - UChar *aChar; /* Copy of input using utf-16 encoding */ - int *aOffset; /* Offsets of each character in utf-8 input */ - - int nBuffer; - char *zBuffer; - - int iToken; -}; - -/* -** Create a new tokenizer instance. -*/ -static int icuCreate( - int argc, /* Number of entries in argv[] */ - const char * const *argv, /* Tokenizer creation arguments */ - sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ -){ - IcuTokenizer *p; - int n = 0; - - if( argc>0 ){ - n = strlen(argv[0])+1; - } - p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); - if( !p ){ - return SQLITE_NOMEM; - } - memset(p, 0, sizeof(IcuTokenizer)); - - if( n ){ - p->zLocale = (char *)&p[1]; - memcpy(p->zLocale, argv[0], n); - } - - *ppTokenizer = (sqlite3_tokenizer *)p; - - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int icuDestroy(sqlite3_tokenizer *pTokenizer){ - IcuTokenizer *p = (IcuTokenizer *)pTokenizer; - sqlite3_free(p); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int icuOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *zInput, /* Input string */ - int nInput, /* Length of zInput in bytes */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - IcuTokenizer *p = (IcuTokenizer *)pTokenizer; - IcuCursor *pCsr; - - const int32_t opt = U_FOLD_CASE_DEFAULT; - UErrorCode status = U_ZERO_ERROR; - int nChar; - - UChar32 c; - int iInput = 0; - int iOut = 0; - - *ppCursor = 0; - - if( nInput<0 ){ - nInput = strlen(zInput); - } - nChar = nInput+1; - pCsr = (IcuCursor *)sqlite3_malloc( - sizeof(IcuCursor) + /* IcuCursor */ - ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ - (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ - ); - if( !pCsr ){ - return SQLITE_NOMEM; - } - memset(pCsr, 0, sizeof(IcuCursor)); - pCsr->aChar = (UChar *)&pCsr[1]; - pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; - - pCsr->aOffset[iOut] = iInput; - U8_NEXT(zInput, iInput, nInput, c); - while( c>0 ){ - int isError = 0; - c = u_foldCase(c, opt); - U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); - if( isError ){ - sqlite3_free(pCsr); - return SQLITE_ERROR; - } - pCsr->aOffset[iOut] = iInput; - - if( iInputpIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); - if( !U_SUCCESS(status) ){ - sqlite3_free(pCsr); - return SQLITE_ERROR; - } - pCsr->nChar = iOut; - - ubrk_first(pCsr->pIter); - *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to icuOpen(). -*/ -static int icuClose(sqlite3_tokenizer_cursor *pCursor){ - IcuCursor *pCsr = (IcuCursor *)pCursor; - ubrk_close(pCsr->pIter); - sqlite3_free(pCsr->zBuffer); - sqlite3_free(pCsr); - return SQLITE_OK; -} - -/* -** Extract the next token from a tokenization cursor. -*/ -static int icuNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - IcuCursor *pCsr = (IcuCursor *)pCursor; - - int iStart = 0; - int iEnd = 0; - int nByte = 0; - - while( iStart==iEnd ){ - UChar32 c; - - iStart = ubrk_current(pCsr->pIter); - iEnd = ubrk_next(pCsr->pIter); - if( iEnd==UBRK_DONE ){ - return SQLITE_DONE; - } - - while( iStartaChar, iWhite, pCsr->nChar, c); - if( u_isspace(c) ){ - iStart = iWhite; - }else{ - break; - } - } - assert(iStart<=iEnd); - } - - do { - UErrorCode status = U_ZERO_ERROR; - if( nByte ){ - char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); - if( !zNew ){ - return SQLITE_NOMEM; - } - pCsr->zBuffer = zNew; - pCsr->nBuffer = nByte; - } - - u_strToUTF8( - pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ - &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ - &status /* Output success/failure */ - ); - } while( nByte>pCsr->nBuffer ); - - *ppToken = pCsr->zBuffer; - *pnBytes = nByte; - *piStartOffset = pCsr->aOffset[iStart]; - *piEndOffset = pCsr->aOffset[iEnd]; - *piPosition = pCsr->iToken++; - - return SQLITE_OK; -} - -/* -** The set of routines that implement the simple tokenizer -*/ -static const sqlite3_tokenizer_module icuTokenizerModule = { - 0, /* iVersion */ - icuCreate, /* xCreate */ - icuDestroy, /* xCreate */ - icuOpen, /* xOpen */ - icuClose, /* xClose */ - icuNext, /* xNext */ -}; - -/* -** Set *ppModule to point at the implementation of the ICU tokenizer. -*/ -void sqlite3Fts2IcuTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &icuTokenizerModule; -} - -#endif /* defined(SQLITE_ENABLE_ICU) */ -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/third_party/sqlite3/fts2_porter.c b/third_party/sqlite3/fts2_porter.c deleted file mode 100644 index 29a9c32fe..000000000 --- a/third_party/sqlite3/fts2_porter.c +++ /dev/null @@ -1,644 +0,0 @@ -/* -** 2006 September 30 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** Implementation of the full-text-search tokenizer that implements -** a Porter stemmer. -*/ -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - -/* clang-format off */ - -#include -#include -#include -#include - -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT3 -#include "fts2_tokenizer.h" - -/* -** Class derived from sqlite3_tokenizer -*/ -typedef struct porter_tokenizer { - sqlite3_tokenizer base; /* Base class */ -} porter_tokenizer; - -/* -** Class derived from sqlit3_tokenizer_cursor -*/ -typedef struct porter_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *zInput; /* input we are tokenizing */ - int nInput; /* size of the input */ - int iOffset; /* current position in zInput */ - int iToken; /* index of next token to be returned */ - char *zToken; /* storage for current token */ - int nAllocated; /* space allocated to zToken buffer */ -} porter_tokenizer_cursor; - - -/* Forward declaration */ -static const sqlite3_tokenizer_module porterTokenizerModule; - - -/* -** Create a new tokenizer instance. -*/ -static int porterCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer -){ - porter_tokenizer *t; - t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t)); - if( t==NULL ) return SQLITE_NOMEM; - memset(t, 0, sizeof(*t)); - *ppTokenizer = &t->base; - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int porterDestroy(sqlite3_tokenizer *pTokenizer){ - sqlite3_free(pTokenizer); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is zInput[0..nInput-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int porterOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *zInput, int nInput, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - porter_tokenizer_cursor *c; - - c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; - - c->zInput = zInput; - if( zInput==0 ){ - c->nInput = 0; - }else if( nInput<0 ){ - c->nInput = (int)strlen(zInput); - }else{ - c->nInput = nInput; - } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->zToken = NULL; /* no space allocated, yet. */ - c->nAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** porterOpen() above. -*/ -static int porterClose(sqlite3_tokenizer_cursor *pCursor){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - sqlite3_free(c->zToken); - sqlite3_free(c); - return SQLITE_OK; -} -/* -** Vowel or consonant -*/ -static const char cType[] = { - 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 2, 1 -}; - -/* -** isConsonant() and isVowel() determine if their first character in -** the string they point to is a consonant or a vowel, according -** to Porter ruls. -** -** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. -** 'Y' is a consonant unless it follows another consonant, -** in which case it is a vowel. -** -** In these routine, the letters are in reverse order. So the 'y' rule -** is that 'y' is a consonant unless it is followed by another -** consonent. -*/ -static int isVowel(const char*); -static int isConsonant(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return j; - return z[1]==0 || isVowel(z + 1); -} -static int isVowel(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return 1-j; - return isConsonant(z + 1); -} - -/* -** Let any sequence of one or more vowels be represented by V and let -** C be sequence of one or more consonants. Then every word can be -** represented as: -** -** [C] (VC){m} [V] -** -** In prose: A word is an optional consonant followed by zero or -** vowel-consonant pairs followed by an optional vowel. "m" is the -** number of vowel consonant pairs. This routine computes the value -** of m for the first i bytes of a word. -** -** Return true if the m-value for z is 1 or more. In other words, -** return true if z contains at least one vowel that is followed -** by a consonant. -** -** In this routine z[] is in reverse order. So we are really looking -** for an instance of of a consonant followed by a vowel. -*/ -static int m_gt_0(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* Like mgt0 above except we are looking for a value of m which is -** exactly 1 -*/ -static int m_eq_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 1; - while( isConsonant(z) ){ z++; } - return *z==0; -} - -/* Like mgt0 above except we are looking for a value of m>1 instead -** or m>0 -*/ -static int m_gt_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* -** Return TRUE if there is a vowel anywhere within z[0..n-1] -*/ -static int hasVowel(const char *z){ - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* -** Return TRUE if the word ends in a double consonant. -** -** The text is reversed here. So we are really looking at -** the first two characters of z[]. -*/ -static int doubleConsonant(const char *z){ - return isConsonant(z) && z[0]==z[1] && isConsonant(z+1); -} - -/* -** Return TRUE if the word ends with three letters which -** are consonant-vowel-consonent and where the final consonant -** is not 'w', 'x', or 'y'. -** -** The word is reversed here. So we are really checking the -** first three letters and the first one cannot be in [wxy]. -*/ -static int star_oh(const char *z){ - return - z[0]!=0 && isConsonant(z) && - z[0]!='w' && z[0]!='x' && z[0]!='y' && - z[1]!=0 && isVowel(z+1) && - z[2]!=0 && isConsonant(z+2); -} - -/* -** If the word ends with zFrom and xCond() is true for the stem -** of the word that preceeds the zFrom ending, then change the -** ending to zTo. -** -** The input word *pz and zFrom are both in reverse order. zTo -** is in normal order. -** -** Return TRUE if zFrom matches. Return FALSE if zFrom does not -** match. Not that TRUE is returned even if xCond() fails and -** no substitution occurs. -*/ -static int stem( - char **pz, /* The word being stemmed (Reversed) */ - const char *zFrom, /* If the ending matches this... (Reversed) */ - const char *zTo, /* ... change the ending to this (not reversed) */ - int (*xCond)(const char*) /* Condition that must be true */ -){ - char *z = *pz; - while( *zFrom && *zFrom==*z ){ z++; zFrom++; } - if( *zFrom!=0 ) return 0; - if( xCond && !xCond(z) ) return 1; - while( *zTo ){ - *(--z) = *(zTo++); - } - *pz = z; - return 1; -} - -/* -** This is the fallback stemmer used when the porter stemmer is -** inappropriate. The input word is copied into the output with -** US-ASCII case folding. If the input word is too long (more -** than 20 bytes if it contains no digits or more than 6 bytes if -** it contains digits) then word is truncated to 20 or 6 bytes -** by taking 10 or 3 bytes from the beginning and end. -*/ -static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ - int i, mx, j; - int hasDigit = 0; - for(i=0; i='A' && c<='Z' ){ - zOut[i] = c - 'A' + 'a'; - }else{ - if( c>='0' && c<='9' ) hasDigit = 1; - zOut[i] = c; - } - } - mx = hasDigit ? 3 : 10; - if( nIn>mx*2 ){ - for(j=mx, i=nIn-mx; i=sizeof(zReverse)-7 ){ - /* The word is too big or too small for the porter stemmer. - ** Fallback to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - for(i=0, j=sizeof(zReverse)-6; i='A' && c<='Z' ){ - zReverse[j] = c + 'a' - 'A'; - }else if( c>='a' && c<='z' ){ - zReverse[j] = c; - }else{ - /* The use of a character not in [a-zA-Z] means that we fallback - ** to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - } - memset(&zReverse[sizeof(zReverse)-5], 0, 5); - z = &zReverse[j+1]; - - - /* Step 1a */ - if( z[0]=='s' ){ - if( - !stem(&z, "sess", "ss", 0) && - !stem(&z, "sei", "i", 0) && - !stem(&z, "ss", "ss", 0) - ){ - z++; - } - } - - /* Step 1b */ - z2 = z; - if( stem(&z, "dee", "ee", m_gt_0) ){ - /* Do nothing. The work was all in the test */ - }else if( - (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) - && z!=z2 - ){ - if( stem(&z, "ta", "ate", 0) || - stem(&z, "lb", "ble", 0) || - stem(&z, "zi", "ize", 0) ){ - /* Do nothing. The work was all in the test */ - }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ - z++; - }else if( m_eq_1(z) && star_oh(z) ){ - *(--z) = 'e'; - } - } - - /* Step 1c */ - if( z[0]=='y' && hasVowel(z+1) ){ - z[0] = 'i'; - } - - /* Step 2 */ - switch( z[1] ){ - case 'a': - stem(&z, "lanoita", "ate", m_gt_0) || - stem(&z, "lanoit", "tion", m_gt_0); - break; - case 'c': - stem(&z, "icne", "ence", m_gt_0) || - stem(&z, "icna", "ance", m_gt_0); - break; - case 'e': - stem(&z, "rezi", "ize", m_gt_0); - break; - case 'g': - stem(&z, "igol", "log", m_gt_0); - break; - case 'l': - stem(&z, "ilb", "ble", m_gt_0) || - stem(&z, "illa", "al", m_gt_0) || - stem(&z, "iltne", "ent", m_gt_0) || - stem(&z, "ile", "e", m_gt_0) || - stem(&z, "ilsuo", "ous", m_gt_0); - break; - case 'o': - stem(&z, "noitazi", "ize", m_gt_0) || - stem(&z, "noita", "ate", m_gt_0) || - stem(&z, "rota", "ate", m_gt_0); - break; - case 's': - stem(&z, "msila", "al", m_gt_0) || - stem(&z, "ssenevi", "ive", m_gt_0) || - stem(&z, "ssenluf", "ful", m_gt_0) || - stem(&z, "ssensuo", "ous", m_gt_0); - break; - case 't': - stem(&z, "itila", "al", m_gt_0) || - stem(&z, "itivi", "ive", m_gt_0) || - stem(&z, "itilib", "ble", m_gt_0); - break; - } - - /* Step 3 */ - switch( z[0] ){ - case 'e': - stem(&z, "etaci", "ic", m_gt_0) || - stem(&z, "evita", "", m_gt_0) || - stem(&z, "ezila", "al", m_gt_0); - break; - case 'i': - stem(&z, "itici", "ic", m_gt_0); - break; - case 'l': - stem(&z, "laci", "ic", m_gt_0) || - stem(&z, "luf", "", m_gt_0); - break; - case 's': - stem(&z, "ssen", "", m_gt_0); - break; - } - - /* Step 4 */ - switch( z[1] ){ - case 'a': - if( z[0]=='l' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'c': - if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'e': - if( z[0]=='r' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'i': - if( z[0]=='c' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'l': - if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'n': - if( z[0]=='t' ){ - if( z[2]=='a' ){ - if( m_gt_1(z+3) ){ - z += 3; - } - }else if( z[2]=='e' ){ - stem(&z, "tneme", "", m_gt_1) || - stem(&z, "tnem", "", m_gt_1) || - stem(&z, "tne", "", m_gt_1); - } - } - break; - case 'o': - if( z[0]=='u' ){ - if( m_gt_1(z+2) ){ - z += 2; - } - }else if( z[3]=='s' || z[3]=='t' ){ - stem(&z, "noi", "", m_gt_1); - } - break; - case 's': - if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 't': - stem(&z, "eta", "", m_gt_1) || - stem(&z, "iti", "", m_gt_1); - break; - case 'u': - if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 'v': - case 'z': - if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - } - - /* Step 5a */ - if( z[0]=='e' ){ - if( m_gt_1(z+1) ){ - z++; - }else if( m_eq_1(z+1) && !star_oh(z+1) ){ - z++; - } - } - - /* Step 5b */ - if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ - z++; - } - - /* z[] is now the stemmed word in reverse order. Flip it back - ** around into forward order and return. - */ - *pnOut = i = strlen(z); - zOut[i] = 0; - while( *z ){ - zOut[--i] = *(z++); - } -} - -/* -** Characters that can be part of a token. We assume any character -** whose value is greater than 0x80 (any UTF character) can be -** part of a token. In other words, delimiters all must have -** values of 0x7f or lower. -*/ -static const char porterIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30])) - -/* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to porterOpen(). -*/ -static int porterNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ - const char **pzToken, /* OUT: *pzToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - const char *z = c->zInput; - - while( c->iOffsetnInput ){ - int iStartOffset, ch; - - /* Scan past delimiter characters */ - while( c->iOffsetnInput && isDelim(z[c->iOffset]) ){ - c->iOffset++; - } - - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnInput && !isDelim(z[c->iOffset]) ){ - c->iOffset++; - } - - if( c->iOffset>iStartOffset ){ - int n = c->iOffset-iStartOffset; - if( n>c->nAllocated ){ - c->nAllocated = n+20; - c->zToken = sqlite3_realloc(c->zToken, c->nAllocated); - if( c->zToken==NULL ) return SQLITE_NOMEM; - } - porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); - *pzToken = c->zToken; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - return SQLITE_OK; - } - } - return SQLITE_DONE; -} - -/* -** The set of routines that implement the porter-stemmer tokenizer -*/ -static const sqlite3_tokenizer_module porterTokenizerModule = { - 0, - porterCreate, - porterDestroy, - porterOpen, - porterClose, - porterNext, -}; - -/* -** Allocate a new porter tokenizer. Return a pointer to the new -** tokenizer in *ppModule -*/ -void sqlite3Fts2PorterTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &porterTokenizerModule; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/third_party/sqlite3/fts2_tokenizer.c b/third_party/sqlite3/fts2_tokenizer.c deleted file mode 100644 index 8c9aa8abc..000000000 --- a/third_party/sqlite3/fts2_tokenizer.c +++ /dev/null @@ -1,375 +0,0 @@ -/* -** 2007 June 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This is part of an SQLite module implementing full-text search. -** This particular file implements the generic tokenizer interface. -*/ -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - -/* clang-format off */ - -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT3 - -#include "fts2_hash.h" -#include "fts2_tokenizer.h" -#include - -/* -** Implementation of the SQL scalar function for accessing the underlying -** hash table. This function may be called as follows: -** -** SELECT (); -** SELECT (, ); -** -** where is the name passed as the second argument -** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer'). -** -** If the argument is specified, it must be a blob value -** containing a pointer to be stored as the hash data corresponding -** to the string . If is not specified, then -** the string must already exist in the has table. Otherwise, -** an error is returned. -** -** Whether or not the argument is specified, the value returned -** is a blob containing the pointer stored as the hash data corresponding -** to string (after the hash-table is updated, if applicable). -*/ -static void scalarFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - fts2Hash *pHash; - void *pPtr = 0; - const unsigned char *zName; - int nName; - - assert( argc==1 || argc==2 ); - - pHash = (fts2Hash *)sqlite3_user_data(context); - - zName = sqlite3_value_text(argv[0]); - nName = sqlite3_value_bytes(argv[0])+1; - - if( argc==2 ){ - void *pOld; - int n = sqlite3_value_bytes(argv[1]); - if( n!=sizeof(pPtr) ){ - sqlite3_result_error(context, "argument type mismatch", -1); - return; - } - pPtr = *(void **)sqlite3_value_blob(argv[1]); - pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr); - if( pOld==pPtr ){ - sqlite3_result_error(context, "out of memory", -1); - return; - } - }else{ - pPtr = sqlite3Fts2HashFind(pHash, zName, nName); - if( !pPtr ){ - char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); - sqlite3_result_error(context, zErr, -1); - sqlite3_free(zErr); - return; - } - } - - sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); -} - -#ifdef SQLITE_TEST - -#if defined(INCLUDE_SQLITE_TCL_H) -# include "sqlite_tcl.h" -#else -# include "tcl.h" -#endif -#include - -/* -** Implementation of a special SQL scalar function for testing tokenizers -** designed to be used in concert with the Tcl testing framework. This -** function must be called with two arguments: -** -** SELECT (, ); -** SELECT (, ); -** -** where is the name passed as the second argument -** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer') -** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test'). -** -** The return value is a string that may be interpreted as a Tcl -** list. For each token in the , three elements are -** added to the returned list. The first is the token position, the -** second is the token text (folded, stemmed, etc.) and the third is the -** substring of associated with the token. For example, -** using the built-in "simple" tokenizer: -** -** SELECT fts_tokenizer_test('simple', 'I don't see how'); -** -** will return the string: -** -** "{0 i I 1 dont don't 2 see see 3 how how}" -** -*/ -static void testFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - fts2Hash *pHash; - sqlite3_tokenizer_module *p; - sqlite3_tokenizer *pTokenizer = 0; - sqlite3_tokenizer_cursor *pCsr = 0; - - const char *zErr = 0; - - const char *zName; - int nName; - const char *zInput; - int nInput; - - const char *zArg = 0; - - const char *zToken; - int nToken; - int iStart; - int iEnd; - int iPos; - - Tcl_Obj *pRet; - - assert( argc==2 || argc==3 ); - - nName = sqlite3_value_bytes(argv[0]); - zName = (const char *)sqlite3_value_text(argv[0]); - nInput = sqlite3_value_bytes(argv[argc-1]); - zInput = (const char *)sqlite3_value_text(argv[argc-1]); - - if( argc==3 ){ - zArg = (const char *)sqlite3_value_text(argv[1]); - } - - pHash = (fts2Hash *)sqlite3_user_data(context); - p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1); - - if( !p ){ - char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); - sqlite3_result_error(context, zErr, -1); - sqlite3_free(zErr); - return; - } - - pRet = Tcl_NewObj(); - Tcl_IncrRefCount(pRet); - - if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){ - zErr = "error in xCreate()"; - goto finish; - } - pTokenizer->pModule = p; - if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){ - zErr = "error in xOpen()"; - goto finish; - } - pCsr->pTokenizer = pTokenizer; - - while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - zToken = &zInput[iStart]; - nToken = iEnd-iStart; - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - } - - if( SQLITE_OK!=p->xClose(pCsr) ){ - zErr = "error in xClose()"; - goto finish; - } - if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ - zErr = "error in xDestroy()"; - goto finish; - } - -finish: - if( zErr ){ - sqlite3_result_error(context, zErr, -1); - }else{ - sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); - } - Tcl_DecrRefCount(pRet); -} - -static -int registerTokenizer( - sqlite3 *db, - char *zName, - const sqlite3_tokenizer_module *p -){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts2_tokenizer(?, ?)"; - - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); - sqlite3_step(pStmt); - - return sqlite3_finalize(pStmt); -} - -static -int queryFts2Tokenizer( - sqlite3 *db, - char *zName, - const sqlite3_tokenizer_module **pp -){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts2_tokenizer(?)"; - - *pp = 0; - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ - memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); - } - } - - return sqlite3_finalize(pStmt); -} - -void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); - -/* -** Implementation of the scalar function fts2_tokenizer_internal_test(). -** This function is used for testing only, it is not included in the -** build unless SQLITE_TEST is defined. -** -** The purpose of this is to test that the fts2_tokenizer() function -** can be used as designed by the C-code in the queryFts2Tokenizer and -** registerTokenizer() functions above. These two functions are repeated -** in the README.tokenizer file as an example, so it is important to -** test them. -** -** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar -** function with no arguments. An assert() will fail if a problem is -** detected. i.e.: -** -** SELECT fts2_tokenizer_internal_test(); -** -*/ -static void intTestFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - int rc; - const sqlite3_tokenizer_module *p1; - const sqlite3_tokenizer_module *p2; - sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); - - /* Test the query function */ - sqlite3Fts2SimpleTokenizerModule(&p1); - rc = queryFts2Tokenizer(db, "simple", &p2); - assert( rc==SQLITE_OK ); - assert( p1==p2 ); - rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2); - assert( rc==SQLITE_ERROR ); - assert( p2==0 ); - assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); - - /* Test the storage function */ - rc = registerTokenizer(db, "nosuchtokenizer", p1); - assert( rc==SQLITE_OK ); - rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2); - assert( rc==SQLITE_OK ); - assert( p2==p1 ); - - sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); -} - -#endif - -/* -** Set up SQL objects in database db used to access the contents of -** the hash table pointed to by argument pHash. The hash table must -** been initialized to use string keys, and to take a private copy -** of the key when a value is inserted. i.e. by a call similar to: -** -** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); -** -** This function adds a scalar function (see header comment above -** scalarFunc() in this file for details) and, if ENABLE_TABLE is -** defined at compilation time, a temporary virtual table (see header -** comment above struct HashTableVtab) to the database schema. Both -** provide read/write access to the contents of *pHash. -** -** The third argument to this function, zName, is used as the name -** of both the scalar and, if created, the virtual table. -*/ -int sqlite3Fts2InitHashTable( - sqlite3 *db, - fts2Hash *pHash, - const char *zName -){ - int rc = SQLITE_OK; - void *p = (void *)pHash; - const int any = SQLITE_ANY; - char *zTest = 0; - char *zTest2 = 0; - -#ifdef SQLITE_TEST - void *pdb = (void *)db; - zTest = sqlite3_mprintf("%s_test", zName); - zTest2 = sqlite3_mprintf("%s_internal_test", zName); - if( !zTest || !zTest2 ){ - rc = SQLITE_NOMEM; - } -#endif - - if( rc!=SQLITE_OK - || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0)) - || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0)) -#ifdef SQLITE_TEST - || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0)) - || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0)) - || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0)) -#endif - ); - - sqlite3_free(zTest); - sqlite3_free(zTest2); - return rc; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/third_party/sqlite3/fts2_tokenizer.h b/third_party/sqlite3/fts2_tokenizer.h deleted file mode 100644 index 126f6b321..000000000 --- a/third_party/sqlite3/fts2_tokenizer.h +++ /dev/null @@ -1,147 +0,0 @@ -/* -** 2006 July 10 -** -** The author disclaims copyright to this source code. -** -************************************************************************* -** Defines the interface to tokenizers used by fulltext-search. There -** are three basic components: -** -** sqlite3_tokenizer_module is a singleton defining the tokenizer -** interface functions. This is essentially the class structure for -** tokenizers. -** -** sqlite3_tokenizer is used to define a particular tokenizer, perhaps -** including customization information defined at creation time. -** -** sqlite3_tokenizer_cursor is generated by a tokenizer to generate -** tokens from a particular input. -*/ -#ifndef _FTS2_TOKENIZER_H_ -#define _FTS2_TOKENIZER_H_ - -/* clang-format off */ - -/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. -** If tokenizers are to be allowed to call sqlite3_*() functions, then -** we will need a way to register the API consistently. -*/ -#include "sqlite3.h" - -/* -** Structures used by the tokenizer interface. When a new tokenizer -** implementation is registered, the caller provides a pointer to -** an sqlite3_tokenizer_module containing pointers to the callback -** functions that make up an implementation. -** -** When an fts2 table is created, it passes any arguments passed to -** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the -** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer -** implementation. The xCreate() function in turn returns an -** sqlite3_tokenizer structure representing the specific tokenizer to -** be used for the fts2 table (customized by the tokenizer clause arguments). -** -** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen() -** method is called. It returns an sqlite3_tokenizer_cursor object -** that may be used to tokenize a specific input buffer based on -** the tokenization rules supplied by a specific sqlite3_tokenizer -** object. -*/ -typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; -typedef struct sqlite3_tokenizer sqlite3_tokenizer; -typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; - -struct sqlite3_tokenizer_module { - - /* - ** Structure version. Should always be set to 0. - */ - int iVersion; - - /* - ** Create a new tokenizer. The values in the argv[] array are the - ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL - ** TABLE statement that created the fts2 table. For example, if - ** the following SQL is executed: - ** - ** CREATE .. USING fts2( ... , tokenizer arg1 arg2) - ** - ** then argc is set to 2, and the argv[] array contains pointers - ** to the strings "arg1" and "arg2". - ** - ** This method should return either SQLITE_OK (0), or an SQLite error - ** code. If SQLITE_OK is returned, then *ppTokenizer should be set - ** to point at the newly created tokenizer structure. The generic - ** sqlite3_tokenizer.pModule variable should not be initialized by - ** this callback. The caller will do so. - */ - int (*xCreate)( - int argc, /* Size of argv array */ - const char *const*argv, /* Tokenizer argument strings */ - sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ - ); - - /* - ** Destroy an existing tokenizer. The fts2 module calls this method - ** exactly once for each successful call to xCreate(). - */ - int (*xDestroy)(sqlite3_tokenizer *pTokenizer); - - /* - ** Create a tokenizer cursor to tokenize an input buffer. The caller - ** is responsible for ensuring that the input buffer remains valid - ** until the cursor is closed (using the xClose() method). - */ - int (*xOpen)( - sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ - const char *pInput, int nBytes, /* Input buffer */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ - ); - - /* - ** Destroy an existing tokenizer cursor. The fts2 module calls this - ** method exactly once for each successful call to xOpen(). - */ - int (*xClose)(sqlite3_tokenizer_cursor *pCursor); - - /* - ** Retrieve the next token from the tokenizer cursor pCursor. This - ** method should either return SQLITE_OK and set the values of the - ** "OUT" variables identified below, or SQLITE_DONE to indicate that - ** the end of the buffer has been reached, or an SQLite error code. - ** - ** *ppToken should be set to point at a buffer containing the - ** normalized version of the token (i.e. after any case-folding and/or - ** stemming has been performed). *pnBytes should be set to the length - ** of this buffer in bytes. The input text that generated the token is - ** identified by the byte offsets returned in *piStartOffset and - ** *piEndOffset. - ** - ** The buffer *ppToken is set to point at is managed by the tokenizer - ** implementation. It is only required to be valid until the next call - ** to xNext() or xClose(). - */ - /* TODO(shess) current implementation requires pInput to be - ** nul-terminated. This should either be fixed, or pInput/nBytes - ** should be converted to zInput. - */ - int (*xNext)( - sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */ - const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */ - int *piStartOffset, /* OUT: Byte offset of token in input buffer */ - int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */ - int *piPosition /* OUT: Number of tokens returned before this one */ - ); -}; - -struct sqlite3_tokenizer { - const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ - /* Tokenizer implementations will typically add additional fields */ -}; - -struct sqlite3_tokenizer_cursor { - sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ - /* Tokenizer implementations will typically add additional fields */ -}; - -#endif /* _FTS2_TOKENIZER_H_ */ diff --git a/third_party/sqlite3/fts2_tokenizer1.c b/third_party/sqlite3/fts2_tokenizer1.c deleted file mode 100644 index bb3261118..000000000 --- a/third_party/sqlite3/fts2_tokenizer1.c +++ /dev/null @@ -1,233 +0,0 @@ -/* -** 2006 Oct 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** Implementation of the "simple" full-text-search tokenizer. -*/ -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - -/* clang-format off */ - -#include -#include -#include -#include - -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT3 -#include "fts2_tokenizer.h" - -typedef struct simple_tokenizer { - sqlite3_tokenizer base; - char delim[128]; /* flag ASCII delimiters */ -} simple_tokenizer; - -typedef struct simple_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *pInput; /* input we are tokenizing */ - int nBytes; /* size of the input */ - int iOffset; /* current position in pInput */ - int iToken; /* index of next token to be returned */ - char *pToken; /* storage for current token */ - int nTokenAllocated; /* space allocated to zToken buffer */ -} simple_tokenizer_cursor; - - -/* Forward declaration */ -static const sqlite3_tokenizer_module simpleTokenizerModule; - -static int simpleDelim(simple_tokenizer *t, unsigned char c){ - return c<0x80 && t->delim[c]; -} - -/* -** Create a new tokenizer instance. -*/ -static int simpleCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer -){ - simple_tokenizer *t; - - t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); - if( t==NULL ) return SQLITE_NOMEM; - memset(t, 0, sizeof(*t)); - - /* TODO(shess) Delimiters need to remain the same from run to run, - ** else we need to reindex. One solution would be a meta-table to - ** track such information in the database, then we'd only want this - ** information on the initial create. - */ - if( argc>1 ){ - int i, n = strlen(argv[1]); - for(i=0; i=0x80 ){ - sqlite3_free(t); - return SQLITE_ERROR; - } - t->delim[ch] = 1; - } - } else { - /* Mark non-alphanumeric ASCII characters as delimiters */ - int i; - for(i=1; i<0x80; i++){ - t->delim[i] = !((i>='0' && i<='9') || (i>='A' && i<='Z') || - (i>='a' && i<='z')); - } - } - - *ppTokenizer = &t->base; - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ - sqlite3_free(pTokenizer); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int simpleOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *pInput, int nBytes, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - simple_tokenizer_cursor *c; - - c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; - - c->pInput = pInput; - if( pInput==0 ){ - c->nBytes = 0; - }else if( nBytes<0 ){ - c->nBytes = (int)strlen(pInput); - }else{ - c->nBytes = nBytes; - } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->pToken = NULL; /* no space allocated, yet. */ - c->nTokenAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** simpleOpen() above. -*/ -static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - sqlite3_free(c->pToken); - sqlite3_free(c); - return SQLITE_OK; -} - -/* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to simpleOpen(). -*/ -static int simpleNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; - unsigned char *p = (unsigned char *)c->pInput; - - while( c->iOffsetnBytes ){ - int iStartOffset; - - /* Scan past delimiter characters */ - while( c->iOffsetnBytes && simpleDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } - - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnBytes && !simpleDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } - - if( c->iOffset>iStartOffset ){ - int i, n = c->iOffset-iStartOffset; - if( n>c->nTokenAllocated ){ - c->nTokenAllocated = n+20; - c->pToken = sqlite3_realloc(c->pToken, c->nTokenAllocated); - if( c->pToken==NULL ) return SQLITE_NOMEM; - } - for(i=0; ipToken[i] = (ch>='A' && ch<='Z') ? (ch - 'A' + 'a') : ch; - } - *ppToken = c->pToken; - *pnBytes = n; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - - return SQLITE_OK; - } - } - return SQLITE_DONE; -} - -/* -** The set of routines that implement the simple tokenizer -*/ -static const sqlite3_tokenizer_module simpleTokenizerModule = { - 0, - simpleCreate, - simpleDestroy, - simpleOpen, - simpleClose, - simpleNext, -}; - -/* -** Allocate a new simple tokenizer. Return a pointer to the new -** tokenizer in *ppModule -*/ -void sqlite3Fts2SimpleTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &simpleTokenizerModule; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/third_party/sqlite3/fts3.c b/third_party/sqlite3/fts3.c index cedd5ca5d..f1dae05ec 100644 --- a/third_party/sqlite3/fts3.c +++ b/third_party/sqlite3/fts3.c @@ -289,7 +289,7 @@ */ /* clang-format off */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE) @@ -300,7 +300,7 @@ #include "libc/mem/mem.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" -#include "third_party/sqlite3/fts3.h" +#include "third_party/sqlite3/fts3.inc" #ifndef SQLITE_CORE #include "third_party/sqlite3/sqlite3ext.h" SQLITE_EXTENSION_INIT1 diff --git a/third_party/sqlite3/fts3.h b/third_party/sqlite3/fts3.inc similarity index 100% rename from third_party/sqlite3/fts3.h rename to third_party/sqlite3/fts3.inc diff --git a/third_party/sqlite3/fts3Int.h b/third_party/sqlite3/fts3Int.inc similarity index 99% rename from third_party/sqlite3/fts3Int.h rename to third_party/sqlite3/fts3Int.inc index 0d2a77173..52e481a60 100644 --- a/third_party/sqlite3/fts3Int.h +++ b/third_party/sqlite3/fts3Int.inc @@ -42,8 +42,8 @@ SQLITE_EXTENSION_INIT3 #endif -#include "third_party/sqlite3/fts3_hash.h" -#include "third_party/sqlite3/fts3_tokenizer.h" +#include "third_party/sqlite3/fts3_hash.inc" +#include "third_party/sqlite3/fts3_tokenizer.inc" #include "third_party/sqlite3/sqlite3.h" /* diff --git a/third_party/sqlite3/fts3_aux.c b/third_party/sqlite3/fts3_aux.c index b9240c0d5..012edf6d3 100644 --- a/third_party/sqlite3/fts3_aux.c +++ b/third_party/sqlite3/fts3_aux.c @@ -12,7 +12,7 @@ ** */ /* clang-format off */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include "libc/assert.h" diff --git a/third_party/sqlite3/fts3_expr.c b/third_party/sqlite3/fts3_expr.c index 44ae012f2..35fc10953 100644 --- a/third_party/sqlite3/fts3_expr.c +++ b/third_party/sqlite3/fts3_expr.c @@ -15,7 +15,7 @@ ** syntax is relatively simple, the whole tokenizer/parser system is ** hand-coded. */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) /* clang-format off */ diff --git a/third_party/sqlite3/fts3_hash.c b/third_party/sqlite3/fts3_hash.c index 46c2a2df4..8fa342002 100644 --- a/third_party/sqlite3/fts3_hash.c +++ b/third_party/sqlite3/fts3_hash.c @@ -24,13 +24,13 @@ ** * The FTS3 module is being built into the core of ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include "libc/assert.h" #include "libc/mem/mem.h" #include "libc/str/str.h" -#include "third_party/sqlite3/fts3_hash.h" +#include "third_party/sqlite3/fts3_hash.inc" /* ** Malloc and Free functions diff --git a/third_party/sqlite3/fts3_hash.h b/third_party/sqlite3/fts3_hash.inc similarity index 100% rename from third_party/sqlite3/fts3_hash.h rename to third_party/sqlite3/fts3_hash.inc diff --git a/third_party/sqlite3/fts3_icu.c b/third_party/sqlite3/fts3_icu.c index 2bb31efdb..f864e9c07 100644 --- a/third_party/sqlite3/fts3_icu.c +++ b/third_party/sqlite3/fts3_icu.c @@ -11,7 +11,7 @@ ************************************************************************* ** This file implements a tokenizer for fts3 based on the ICU library. */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #ifdef SQLITE_ENABLE_ICU /* clang-format off */ @@ -19,7 +19,7 @@ #include "libc/assert.h" #include "libc/str/str.h" #include "libc/unicode/unicode.h" -#include "third_party/sqlite3/fts3_tokenizer.h" +#include "third_party/sqlite3/fts3_tokenizer.inc" typedef struct IcuTokenizer IcuTokenizer; typedef struct IcuCursor IcuCursor; diff --git a/third_party/sqlite3/fts3_porter.c b/third_party/sqlite3/fts3_porter.c index b1713f99c..415905913 100644 --- a/third_party/sqlite3/fts3_porter.c +++ b/third_party/sqlite3/fts3_porter.c @@ -23,14 +23,14 @@ ** * The FTS3 module is being built into the core of ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include "libc/assert.h" #include "libc/mem/mem.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" -#include "third_party/sqlite3/fts3_tokenizer.h" +#include "third_party/sqlite3/fts3_tokenizer.inc" /* ** Class derived from sqlite3_tokenizer diff --git a/third_party/sqlite3/fts3_snippet.c b/third_party/sqlite3/fts3_snippet.c index 0931ea59e..8ab5a78ae 100644 --- a/third_party/sqlite3/fts3_snippet.c +++ b/third_party/sqlite3/fts3_snippet.c @@ -12,7 +12,7 @@ */ /* clang-format off */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include "libc/assert.h" diff --git a/third_party/sqlite3/fts3_tokenize_vtab.c b/third_party/sqlite3/fts3_tokenize_vtab.c index 1abcac613..79a43b99f 100644 --- a/third_party/sqlite3/fts3_tokenize_vtab.c +++ b/third_party/sqlite3/fts3_tokenize_vtab.c @@ -38,7 +38,7 @@ ** pos: Token offset of token within input. ** */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) /* clang-format off */ diff --git a/third_party/sqlite3/fts3_tokenizer.c b/third_party/sqlite3/fts3_tokenizer.c index 97b50a3ff..617351754 100644 --- a/third_party/sqlite3/fts3_tokenizer.c +++ b/third_party/sqlite3/fts3_tokenizer.c @@ -24,7 +24,7 @@ ** * The FTS3 module is being built into the core of ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include "libc/assert.h" @@ -226,12 +226,6 @@ int sqlite3Fts3InitTokenizer( #ifdef SQLITE_TEST - -#if defined(INCLUDE_SQLITE_TCL_H) -#include "third_party/sqlite3/sqlite_tcl.h" -#else -#include "third_party/sqlite3/tcl.h" -#endif #include "libc/str/str.h" /* diff --git a/third_party/sqlite3/fts3_tokenizer.h b/third_party/sqlite3/fts3_tokenizer.inc similarity index 100% rename from third_party/sqlite3/fts3_tokenizer.h rename to third_party/sqlite3/fts3_tokenizer.inc diff --git a/third_party/sqlite3/fts3_tokenizer1.c b/third_party/sqlite3/fts3_tokenizer1.c index fe06ba770..348ab6d69 100644 --- a/third_party/sqlite3/fts3_tokenizer1.c +++ b/third_party/sqlite3/fts3_tokenizer1.c @@ -23,14 +23,14 @@ ** * The FTS3 module is being built into the core of ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include "libc/assert.h" #include "libc/mem/mem.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" -#include "third_party/sqlite3/fts3_tokenizer.h" +#include "third_party/sqlite3/fts3_tokenizer.inc" typedef struct simple_tokenizer { sqlite3_tokenizer base; diff --git a/third_party/sqlite3/fts3_unicode.c b/third_party/sqlite3/fts3_unicode.c index 4d422f8b3..648f1361a 100644 --- a/third_party/sqlite3/fts3_unicode.c +++ b/third_party/sqlite3/fts3_unicode.c @@ -16,14 +16,14 @@ #ifndef SQLITE_DISABLE_FTS3_UNICODE -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include "libc/assert.h" #include "libc/mem/mem.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" -#include "third_party/sqlite3/fts3_tokenizer.h" +#include "third_party/sqlite3/fts3_tokenizer.inc" /* ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied diff --git a/third_party/sqlite3/fts3_write.c b/third_party/sqlite3/fts3_write.c index cd9f6fd09..fa5aae5fe 100644 --- a/third_party/sqlite3/fts3_write.c +++ b/third_party/sqlite3/fts3_write.c @@ -18,7 +18,7 @@ */ /* clang-format off */ -#include "third_party/sqlite3/fts3Int.h" +#include "third_party/sqlite3/fts3Int.inc" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include "libc/alg/alg.h" diff --git a/third_party/sqlite3/fts5.h b/third_party/sqlite3/fts5.inc similarity index 100% rename from third_party/sqlite3/fts5.h rename to third_party/sqlite3/fts5.inc diff --git a/third_party/sqlite3/func.c b/third_party/sqlite3/func.c index dc2c43599..7eb0b492b 100644 --- a/third_party/sqlite3/func.c +++ b/third_party/sqlite3/func.c @@ -15,11 +15,11 @@ */ #include "libc/assert.h" #include "libc/mem/mem.h" -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #ifndef SQLITE_OMIT_FLOATING_POINT #include "libc/math.h" #endif -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/geopoly.c b/third_party/sqlite3/geopoly.inc similarity index 99% rename from third_party/sqlite3/geopoly.c rename to third_party/sqlite3/geopoly.inc index 83533adae..7904f6b01 100644 --- a/third_party/sqlite3/geopoly.c +++ b/third_party/sqlite3/geopoly.inc @@ -16,11 +16,7 @@ ** This file is #include-ed onto the end of "rtree.c" so that it has ** access to all of the R-Tree internals. */ -#include "libc/fmt/conv.h" -#include "libc/mem/mem.h" #include "third_party/gdtoa/gdtoa.h" -#include "third_party/sqlite3/rtree.h" -#include "third_party/sqlite3/sqlite3.h" /* clang-format off */ /* Enable -DGEOPOLY_ENABLE_DEBUG for debugging facilities */ diff --git a/third_party/sqlite3/global.c b/third_party/sqlite3/global.c index bf96ced8e..703533cf5 100644 --- a/third_party/sqlite3/global.c +++ b/third_party/sqlite3/global.c @@ -12,7 +12,7 @@ ** ** This file contains definitions of global variables and constants. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* An array to map all upper-case characters into their corresponding @@ -307,7 +307,7 @@ int sqlite3PendingByte = 0x40000000; u32 sqlite3SelectTrace = 0; u32 sqlite3WhereTrace = 0; -#include "third_party/sqlite3/opcodes.h" +#include "third_party/sqlite3/opcodes.inc" /* ** Properties of opcodes. The OPFLG_INITIALIZER macro is ** created by mkopcodeh.awk during compilation. Data is obtained diff --git a/third_party/sqlite3/hash.c b/third_party/sqlite3/hash.c index 773f76da7..3f1244043 100644 --- a/third_party/sqlite3/hash.c +++ b/third_party/sqlite3/hash.c @@ -13,7 +13,7 @@ ** used in SQLite. */ #include "libc/assert.h" -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* Turn bulk memory into a hash table object by initializing the diff --git a/third_party/sqlite3/hash.h b/third_party/sqlite3/hash.inc similarity index 100% rename from third_party/sqlite3/hash.h rename to third_party/sqlite3/hash.inc diff --git a/third_party/sqlite3/hwtime.h b/third_party/sqlite3/hwtime.inc similarity index 100% rename from third_party/sqlite3/hwtime.h rename to third_party/sqlite3/hwtime.inc diff --git a/third_party/sqlite3/insert.c b/third_party/sqlite3/insert.c index 7a5824e5c..783a60125 100644 --- a/third_party/sqlite3/insert.c +++ b/third_party/sqlite3/insert.c @@ -12,7 +12,7 @@ ** This file contains C code routines that are called by the parser ** to handle INSERT statements in SQLite. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/inttypes.inc b/third_party/sqlite3/inttypes.inc new file mode 100644 index 000000000..a8462e8fa --- /dev/null +++ b/third_party/sqlite3/inttypes.inc @@ -0,0 +1,62 @@ +#ifndef COSMOPOLITAN_THIRD_PARTY_SQLITE3_INTTYPES_H_ +#define COSMOPOLITAN_THIRD_PARTY_SQLITE3_INTTYPES_H_ +#include "third_party/sqlite3/sqlite3.h" +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +/* +** Integers of known sizes. These typedefs might change for architectures +** where the sizes very. Preprocessor macros are available so that the +** types can be conveniently redefined at compile-type. Like this: +** +** cc '-DUINTPTR_TYPE=long long int' ... +*/ +#ifndef UINT32_TYPE +#ifdef HAVE_UINT32_T +#define UINT32_TYPE uint32_t +#else +#define UINT32_TYPE unsigned int +#endif +#endif +#ifndef UINT16_TYPE +#ifdef HAVE_UINT16_T +#define UINT16_TYPE uint16_t +#else +#define UINT16_TYPE unsigned short int +#endif +#endif +#ifndef INT16_TYPE +#ifdef HAVE_INT16_T +#define INT16_TYPE int16_t +#else +#define INT16_TYPE short int +#endif +#endif +#ifndef UINT8_TYPE +#ifdef HAVE_UINT8_T +#define UINT8_TYPE uint8_t +#else +#define UINT8_TYPE unsigned char +#endif +#endif +#ifndef INT8_TYPE +#ifdef HAVE_INT8_T +#define INT8_TYPE int8_t +#else +#define INT8_TYPE signed char +#endif +#endif +#ifndef LONGDOUBLE_TYPE +#define LONGDOUBLE_TYPE long double +#endif +typedef sqlite_int64 i64; /* 8-byte signed integer */ +typedef sqlite_uint64 u64; /* 8-byte unsigned integer */ +typedef UINT32_TYPE u32; /* 4-byte unsigned integer */ +typedef UINT16_TYPE u16; /* 2-byte unsigned integer */ +typedef INT16_TYPE i16; /* 2-byte signed integer */ +typedef UINT8_TYPE u8; /* 1-byte unsigned integer */ +typedef INT8_TYPE i8; /* 1-byte signed integer */ + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_THIRD_PARTY_SQLITE3_INTTYPES_H_ */ diff --git a/third_party/sqlite3/keywordhash.h b/third_party/sqlite3/keywordhash.inc similarity index 100% rename from third_party/sqlite3/keywordhash.h rename to third_party/sqlite3/keywordhash.inc diff --git a/third_party/sqlite3/legacy.c b/third_party/sqlite3/legacy.c index 430ff1179..70d9dc062 100644 --- a/third_party/sqlite3/legacy.c +++ b/third_party/sqlite3/legacy.c @@ -16,7 +16,7 @@ */ /* clang-format off */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* ** Execute SQL code. Return one of the SQLITE_ success/failure diff --git a/third_party/sqlite3/loadext.c b/third_party/sqlite3/loadext.c index c7a1829f3..794217714 100644 --- a/third_party/sqlite3/loadext.c +++ b/third_party/sqlite3/loadext.c @@ -18,7 +18,7 @@ #define SQLITE_CORE 1 /* Disable the API redefinition in sqlite3ext.h */ #endif #include "third_party/sqlite3/sqlite3ext.h" -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #ifndef SQLITE_OMIT_LOAD_EXTENSION /* diff --git a/third_party/sqlite3/main.c b/third_party/sqlite3/main.c index 9c78646bf..31673e193 100644 --- a/third_party/sqlite3/main.c +++ b/third_party/sqlite3/main.c @@ -14,17 +14,17 @@ ** other files are for internal use by SQLite and should not be ** accessed by users of the library. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #ifdef SQLITE_ENABLE_FTS3 -#include "third_party/sqlite3/fts3.h" +#include "third_party/sqlite3/fts3.inc" #endif #ifdef SQLITE_ENABLE_RTREE -#include "third_party/sqlite3/rtree.h" +#include "third_party/sqlite3/rtree.inc" #endif #if defined(SQLITE_ENABLE_ICU) || defined(SQLITE_ENABLE_ICU_COLLATIONS) -#include "third_party/sqlite3/sqliteicu.h" +#include "third_party/sqlite3/sqliteicu.inc" #endif /* diff --git a/third_party/sqlite3/malloc.c b/third_party/sqlite3/malloc.c index 76ec456c0..d1175ce24 100644 --- a/third_party/sqlite3/malloc.c +++ b/third_party/sqlite3/malloc.c @@ -14,7 +14,7 @@ */ /* clang-format off */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* ** Attempt to release up to n bytes of non-essential memory currently diff --git a/third_party/sqlite3/mem0.c b/third_party/sqlite3/mem0.c index 4f2b73151..52ac7fc02 100644 --- a/third_party/sqlite3/mem0.c +++ b/third_party/sqlite3/mem0.c @@ -16,7 +16,7 @@ ** are merely placeholders. Real drivers must be substituted using ** sqlite3_config() before SQLite will operate. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/mem1.c b/third_party/sqlite3/mem1.c index a988240eb..4c7dd182a 100644 --- a/third_party/sqlite3/mem1.c +++ b/third_party/sqlite3/mem1.c @@ -41,7 +41,7 @@ ** be necessary when compiling for Delphi, ** for example. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/mem2.c b/third_party/sqlite3/mem2.c index 9fdbdfc36..3249f81fc 100644 --- a/third_party/sqlite3/mem2.c +++ b/third_party/sqlite3/mem2.c @@ -19,7 +19,7 @@ ** This file contains implementations of the low-level memory allocation ** routines specified in the sqlite3_mem_methods object. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/mem3.c b/third_party/sqlite3/mem3.c index 770d024f0..90a367729 100644 --- a/third_party/sqlite3/mem3.c +++ b/third_party/sqlite3/mem3.c @@ -23,7 +23,7 @@ ** This version of the memory allocation subsystem is included ** in the build only if SQLITE_ENABLE_MEMSYS3 is defined. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/mem5.c b/third_party/sqlite3/mem5.c index 77f2d82e8..cb754e5c1 100644 --- a/third_party/sqlite3/mem5.c +++ b/third_party/sqlite3/mem5.c @@ -48,7 +48,7 @@ ** The sqlite3_status() logic tracks the maximum values of n and M so ** that an application can, at any time, verify this constraint. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/memdb.c b/third_party/sqlite3/memdb.c index 7cabb03b3..0b3939acb 100644 --- a/third_party/sqlite3/memdb.c +++ b/third_party/sqlite3/memdb.c @@ -16,7 +16,7 @@ ** This file also implements interface sqlite3_serialize() and ** sqlite3_deserialize(). */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #ifdef SQLITE_ENABLE_DESERIALIZE /* clang-format off */ diff --git a/third_party/sqlite3/memjournal.c b/third_party/sqlite3/memjournal.c index fac909462..45dd2cd14 100644 --- a/third_party/sqlite3/memjournal.c +++ b/third_party/sqlite3/memjournal.c @@ -23,7 +23,7 @@ ** in the common case, they are usually small and no file I/O needs to ** occur. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* Forward references to internal structures */ diff --git a/third_party/sqlite3/msvc.h b/third_party/sqlite3/msvc.inc similarity index 100% rename from third_party/sqlite3/msvc.h rename to third_party/sqlite3/msvc.inc diff --git a/third_party/sqlite3/mutex.c b/third_party/sqlite3/mutex.c index 6b1302123..256287739 100644 --- a/third_party/sqlite3/mutex.c +++ b/third_party/sqlite3/mutex.c @@ -13,7 +13,7 @@ ** ** This file contains code that is common across all mutex implementations. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #if defined(SQLITE_DEBUG) && !defined(SQLITE_MUTEX_OMIT) diff --git a/third_party/sqlite3/mutex.h b/third_party/sqlite3/mutex.inc similarity index 100% rename from third_party/sqlite3/mutex.h rename to third_party/sqlite3/mutex.inc diff --git a/third_party/sqlite3/mutex_noop.c b/third_party/sqlite3/mutex_noop.c index 0d6a54907..6802b7888 100644 --- a/third_party/sqlite3/mutex_noop.c +++ b/third_party/sqlite3/mutex_noop.c @@ -25,7 +25,7 @@ ** that does error checking on mutexes to make sure they are being ** called correctly. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #ifndef SQLITE_MUTEX_OMIT diff --git a/third_party/sqlite3/mutex_unix.c b/third_party/sqlite3/mutex_unix.c index 3eb82febb..2228e07f5 100644 --- a/third_party/sqlite3/mutex_unix.c +++ b/third_party/sqlite3/mutex_unix.c @@ -11,7 +11,7 @@ ************************************************************************* ** This file contains the C functions that implement mutexes for pthreads */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/mutex_w32.c b/third_party/sqlite3/mutex_w32.c deleted file mode 100644 index 54e65408f..000000000 --- a/third_party/sqlite3/mutex_w32.c +++ /dev/null @@ -1,400 +0,0 @@ -/* -** 2007 August 14 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This file contains the C functions that implement mutexes for Win32. -*/ -#include "third_party/sqlite3/sqliteInt.h" -/* clang-format off */ - -#if SQLITE_OS_WIN -/* -** Include code that is common to all os_*.c files -*/ -#include "third_party/sqlite3/os_common.h" - -/* -** Include the header file for the Windows VFS. -*/ -#include "third_party/sqlite3/os_win.h" -#endif - -/* -** The code in this file is only used if we are compiling multithreaded -** on a Win32 system. -*/ -#ifdef SQLITE_MUTEX_W32 - -/* -** Each recursive mutex is an instance of the following structure. -*/ -struct sqlite3_mutex { - CRITICAL_SECTION mutex; /* Mutex controlling the lock */ - int id; /* Mutex type */ -#ifdef SQLITE_DEBUG - volatile int nRef; /* Number of enterances */ - volatile DWORD owner; /* Thread holding this mutex */ - volatile LONG trace; /* True to trace changes */ -#endif -}; - -/* -** These are the initializer values used when declaring a "static" mutex -** on Win32. It should be noted that all mutexes require initialization -** on the Win32 platform. -*/ -#define SQLITE_W32_MUTEX_INITIALIZER { 0 } - -#ifdef SQLITE_DEBUG -#define SQLITE3_MUTEX_INITIALIZER(id) { SQLITE_W32_MUTEX_INITIALIZER, id, \ - 0L, (DWORD)0, 0 } -#else -#define SQLITE3_MUTEX_INITIALIZER(id) { SQLITE_W32_MUTEX_INITIALIZER, id } -#endif - -#ifdef SQLITE_DEBUG -/* -** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are -** intended for use only inside assert() statements. -*/ -static int winMutexHeld(sqlite3_mutex *p){ - return p->nRef!=0 && p->owner==GetCurrentThreadId(); -} - -static int winMutexNotheld2(sqlite3_mutex *p, DWORD tid){ - return p->nRef==0 || p->owner!=tid; -} - -static int winMutexNotheld(sqlite3_mutex *p){ - DWORD tid = GetCurrentThreadId(); - return winMutexNotheld2(p, tid); -} -#endif - -/* -** Try to provide a memory barrier operation, needed for initialization -** and also for the xShmBarrier method of the VFS in cases when SQLite is -** compiled without mutexes (SQLITE_THREADSAFE=0). -*/ -void sqlite3MemoryBarrier(void){ -#if defined(SQLITE_MEMORY_BARRIER) - SQLITE_MEMORY_BARRIER; -#elif defined(__GNUC__) - __sync_synchronize(); -#elif MSVC_VERSION>=1300 - _ReadWriteBarrier(); -#elif defined(MemoryBarrier) - MemoryBarrier(); -#endif -} - -/* -** Initialize and deinitialize the mutex subsystem. -*/ -static sqlite3_mutex winMutex_staticMutexes[] = { - SQLITE3_MUTEX_INITIALIZER(2), - SQLITE3_MUTEX_INITIALIZER(3), - SQLITE3_MUTEX_INITIALIZER(4), - SQLITE3_MUTEX_INITIALIZER(5), - SQLITE3_MUTEX_INITIALIZER(6), - SQLITE3_MUTEX_INITIALIZER(7), - SQLITE3_MUTEX_INITIALIZER(8), - SQLITE3_MUTEX_INITIALIZER(9), - SQLITE3_MUTEX_INITIALIZER(10), - SQLITE3_MUTEX_INITIALIZER(11), - SQLITE3_MUTEX_INITIALIZER(12), - SQLITE3_MUTEX_INITIALIZER(13) -}; - -static int winMutex_isInit = 0; -static int winMutex_isNt = -1; /* <0 means "need to query" */ - -/* As the winMutexInit() and winMutexEnd() functions are called as part -** of the sqlite3_initialize() and sqlite3_shutdown() processing, the -** "interlocked" magic used here is probably not strictly necessary. -*/ -static LONG SQLITE_WIN32_VOLATILE winMutex_lock = 0; - -int sqlite3_win32_is_nt(void); /* os_win.c */ -void sqlite3_win32_sleep(DWORD milliseconds); /* os_win.c */ - -static int winMutexInit(void){ - /* The first to increment to 1 does actual initialization */ - if( InterlockedCompareExchange(&winMutex_lock, 1, 0)==0 ){ - int i; - for(i=0; i -**
  • SQLITE_MUTEX_FAST -**
  • SQLITE_MUTEX_RECURSIVE -**
  • SQLITE_MUTEX_STATIC_MAIN -**
  • SQLITE_MUTEX_STATIC_MEM -**
  • SQLITE_MUTEX_STATIC_OPEN -**
  • SQLITE_MUTEX_STATIC_PRNG -**
  • SQLITE_MUTEX_STATIC_LRU -**
  • SQLITE_MUTEX_STATIC_PMEM -**
  • SQLITE_MUTEX_STATIC_APP1 -**
  • SQLITE_MUTEX_STATIC_APP2 -**
  • SQLITE_MUTEX_STATIC_APP3 -**
  • SQLITE_MUTEX_STATIC_VFS1 -**
  • SQLITE_MUTEX_STATIC_VFS2 -**
  • SQLITE_MUTEX_STATIC_VFS3 -** -** -** The first two constants cause sqlite3_mutex_alloc() to create -** a new mutex. The new mutex is recursive when SQLITE_MUTEX_RECURSIVE -** is used but not necessarily so when SQLITE_MUTEX_FAST is used. -** The mutex implementation does not need to make a distinction -** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does -** not want to. But SQLite will only request a recursive mutex in -** cases where it really needs one. If a faster non-recursive mutex -** implementation is available on the host platform, the mutex subsystem -** might return such a mutex in response to SQLITE_MUTEX_FAST. -** -** The other allowed parameters to sqlite3_mutex_alloc() each return -** a pointer to a static preexisting mutex. Six static mutexes are -** used by the current version of SQLite. Future versions of SQLite -** may add additional static mutexes. Static mutexes are for internal -** use by SQLite only. Applications that use SQLite mutexes should -** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or -** SQLITE_MUTEX_RECURSIVE. -** -** Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST -** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() -** returns a different mutex on every call. But for the static -** mutex types, the same mutex is returned on every call that has -** the same type number. -*/ -static sqlite3_mutex *winMutexAlloc(int iType){ - sqlite3_mutex *p; - - switch( iType ){ - case SQLITE_MUTEX_FAST: - case SQLITE_MUTEX_RECURSIVE: { - p = sqlite3MallocZero( sizeof(*p) ); - if( p ){ - p->id = iType; -#ifdef SQLITE_DEBUG -#ifdef SQLITE_WIN32_MUTEX_TRACE_DYNAMIC - p->trace = 1; -#endif -#endif -#if SQLITE_OS_WINRT - InitializeCriticalSectionEx(&p->mutex, 0, 0); -#else - InitializeCriticalSection(&p->mutex); -#endif - } - break; - } - default: { -#ifdef SQLITE_ENABLE_API_ARMOR - if( iType-2<0 || iType-2>=ArraySize(winMutex_staticMutexes) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; - } -#endif - p = &winMutex_staticMutexes[iType-2]; -#ifdef SQLITE_DEBUG -#ifdef SQLITE_WIN32_MUTEX_TRACE_STATIC - InterlockedCompareExchange(&p->trace, 1, 0); -#endif -#endif - break; - } - } - assert( p==0 || p->id==iType ); - return p; -} - - -/* -** This routine deallocates a previously -** allocated mutex. SQLite is careful to deallocate every -** mutex that it allocates. -*/ -static void winMutexFree(sqlite3_mutex *p){ - assert( p ); - assert( p->nRef==0 && p->owner==0 ); - if( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE ){ - DeleteCriticalSection(&p->mutex); - sqlite3_free(p); - }else{ -#ifdef SQLITE_ENABLE_API_ARMOR - (void)SQLITE_MISUSE_BKPT; -#endif - } -} - -/* -** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt -** to enter a mutex. If another thread is already within the mutex, -** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return -** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK -** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can -** be entered multiple times by the same thread. In such cases the, -** mutex must be exited an equal number of times before another thread -** can enter. If the same thread tries to enter any other kind of mutex -** more than once, the behavior is undefined. -*/ -static void winMutexEnter(sqlite3_mutex *p){ -#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) - DWORD tid = GetCurrentThreadId(); -#endif -#ifdef SQLITE_DEBUG - assert( p ); - assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) ); -#else - assert( p ); -#endif - assert( winMutex_isInit==1 ); - EnterCriticalSection(&p->mutex); -#ifdef SQLITE_DEBUG - assert( p->nRef>0 || p->owner==0 ); - p->owner = tid; - p->nRef++; - if( p->trace ){ - OSTRACE(("ENTER-MUTEX tid=%lu, mutex(%d)=%p (%d), nRef=%d\n", - tid, p->id, p, p->trace, p->nRef)); - } -#endif -} - -static int winMutexTry(sqlite3_mutex *p){ -#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) - DWORD tid = GetCurrentThreadId(); -#endif - int rc = SQLITE_BUSY; - assert( p ); - assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) ); - /* - ** The sqlite3_mutex_try() routine is very rarely used, and when it - ** is used it is merely an optimization. So it is OK for it to always - ** fail. - ** - ** The TryEnterCriticalSection() interface is only available on WinNT. - ** And some windows compilers complain if you try to use it without - ** first doing some #defines that prevent SQLite from building on Win98. - ** For that reason, we will omit this optimization for now. See - ** ticket #2685. - */ -#if defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0400 - assert( winMutex_isInit==1 ); - assert( winMutex_isNt>=-1 && winMutex_isNt<=1 ); - if( winMutex_isNt<0 ){ - winMutex_isNt = sqlite3_win32_is_nt(); - } - assert( winMutex_isNt==0 || winMutex_isNt==1 ); - if( winMutex_isNt && TryEnterCriticalSection(&p->mutex) ){ -#ifdef SQLITE_DEBUG - p->owner = tid; - p->nRef++; -#endif - rc = SQLITE_OK; - } -#else - UNUSED_PARAMETER(p); -#endif -#ifdef SQLITE_DEBUG - if( p->trace ){ - OSTRACE(("TRY-MUTEX tid=%lu, mutex(%d)=%p (%d), owner=%lu, nRef=%d, rc=%s\n", - tid, p->id, p, p->trace, p->owner, p->nRef, sqlite3ErrName(rc))); - } -#endif - return rc; -} - -/* -** The sqlite3_mutex_leave() routine exits a mutex that was -** previously entered by the same thread. The behavior -** is undefined if the mutex is not currently entered or -** is not currently allocated. SQLite will never do either. -*/ -static void winMutexLeave(sqlite3_mutex *p){ -#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) - DWORD tid = GetCurrentThreadId(); -#endif - assert( p ); -#ifdef SQLITE_DEBUG - assert( p->nRef>0 ); - assert( p->owner==tid ); - p->nRef--; - if( p->nRef==0 ) p->owner = 0; - assert( p->nRef==0 || p->id==SQLITE_MUTEX_RECURSIVE ); -#endif - assert( winMutex_isInit==1 ); - LeaveCriticalSection(&p->mutex); -#ifdef SQLITE_DEBUG - if( p->trace ){ - OSTRACE(("LEAVE-MUTEX tid=%lu, mutex(%d)=%p (%d), nRef=%d\n", - tid, p->id, p, p->trace, p->nRef)); - } -#endif -} - -sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ - static const sqlite3_mutex_methods sMutex = { - winMutexInit, - winMutexEnd, - winMutexAlloc, - winMutexFree, - winMutexEnter, - winMutexTry, - winMutexLeave, -#ifdef SQLITE_DEBUG - winMutexHeld, - winMutexNotheld -#else - 0, - 0 -#endif - }; - return &sMutex; -} - -#endif /* SQLITE_MUTEX_W32 */ diff --git a/third_party/sqlite3/notify.c b/third_party/sqlite3/notify.c index 9e79c7a92..66f7fb7d1 100644 --- a/third_party/sqlite3/notify.c +++ b/third_party/sqlite3/notify.c @@ -13,8 +13,8 @@ ** This file contains the implementation of the sqlite3_unlock_notify() ** API method and its associated functionality. */ -#include "third_party/sqlite3/btreeInt.h" -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/btreeInt.inc" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* Omit this entire file if SQLITE_ENABLE_UNLOCK_NOTIFY is not defined. */ diff --git a/third_party/sqlite3/opcodes.h b/third_party/sqlite3/opcodes.inc similarity index 100% rename from third_party/sqlite3/opcodes.h rename to third_party/sqlite3/opcodes.inc diff --git a/third_party/sqlite3/os.c b/third_party/sqlite3/os.c index d66b7c359..dc1fdfb79 100644 --- a/third_party/sqlite3/os.c +++ b/third_party/sqlite3/os.c @@ -13,7 +13,7 @@ ** This file contains OS interface code that is common to all ** architectures. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/os.h b/third_party/sqlite3/os.inc similarity index 99% rename from third_party/sqlite3/os.h rename to third_party/sqlite3/os.inc index b3d0fbebd..248074373 100644 --- a/third_party/sqlite3/os.h +++ b/third_party/sqlite3/os.inc @@ -25,7 +25,7 @@ ** Attempt to automatically detect the operating system and setup the ** necessary pre-processor macros for it. */ -#include "third_party/sqlite3/os_setup.h" +#include "third_party/sqlite3/os_setup.inc" /* If the SET_FULLSYNC macro is not defined above, then make it ** a no-op diff --git a/third_party/sqlite3/os_common.h b/third_party/sqlite3/os_common.inc similarity index 98% rename from third_party/sqlite3/os_common.h rename to third_party/sqlite3/os_common.inc index f6b2dc377..fc2b12f28 100644 --- a/third_party/sqlite3/os_common.h +++ b/third_party/sqlite3/os_common.inc @@ -40,7 +40,7 @@ ** hwtime.h contains inline assembler code for implementing ** high-performance timing routines. */ -#include "third_party/sqlite3/hwtime.h" +#include "third_party/sqlite3/hwtime.inc" static sqlite_uint64 g_start; static sqlite_uint64 g_elapsed; diff --git a/third_party/sqlite3/os_setup.h b/third_party/sqlite3/os_setup.inc similarity index 100% rename from third_party/sqlite3/os_setup.h rename to third_party/sqlite3/os_setup.inc diff --git a/third_party/sqlite3/os_unix.c b/third_party/sqlite3/os_unix.c index b6825a7e7..ec1cc62dd 100644 --- a/third_party/sqlite3/os_unix.c +++ b/third_party/sqlite3/os_unix.c @@ -43,7 +43,7 @@ ** * Definitions of sqlite3_vfs objects for all locking methods ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #if SQLITE_OS_UNIX /* This file is used on unix only */ /* clang-format off */ @@ -102,15 +102,7 @@ #include "libc/sysv/consts/prot.h" #include "libc/time/struct/tm.h" #include "libc/time/time.h" -#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE > 0 #include "libc/mem/mem.h" -#endif - -#if SQLITE_ENABLE_LOCKING_STYLE -# include -# include -# include -#endif /* SQLITE_ENABLE_LOCKING_STYLE */ /* ** Try to determine if gethostuuid() is available based on standard @@ -305,7 +297,7 @@ static pid_t randomnessPid = 0; /* ** Include code that is common to all os_*.c files */ -#include "third_party/sqlite3/os_common.h" +#include "third_party/sqlite3/os_common.inc" /* ** Define various macros that are missing from some systems. diff --git a/third_party/sqlite3/os_win.c b/third_party/sqlite3/os_win.c index 4b56d662c..26eaa9b89 100644 --- a/third_party/sqlite3/os_win.c +++ b/third_party/sqlite3/os_win.c @@ -12,19 +12,14 @@ ** ** This file contains code that is specific to Windows. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #if SQLITE_OS_WIN /* This file is used for Windows only */ /* clang-format off */ /* ** Include code that is common to all os_*.c files */ -#include "third_party/sqlite3/os_common.h" - -/* -** Include the header file for the Windows VFS. -*/ -#include "third_party/sqlite3/os_win.h" +#include "third_party/sqlite3/os_common.inc" /* ** Compiling and using WAL mode requires several APIs that are only diff --git a/third_party/sqlite3/os_win.h b/third_party/sqlite3/os_win.h deleted file mode 100644 index a51285b65..000000000 --- a/third_party/sqlite3/os_win.h +++ /dev/null @@ -1,90 +0,0 @@ -/* -** 2013 November 25 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This file contains code that is specific to Windows. -*/ -#ifndef SQLITE_OS_WIN_H -#define SQLITE_OS_WIN_H -/* clang-format off */ - -/* -** Include the primary Windows SDK header file. -*/ -#include "third_party/sqlite3/windows.h" - -#ifdef __CYGWIN__ -#include - -#include "libc/errno.h" /* amalgamator: dontcache */ -#endif - -/* -** Determine if we are dealing with Windows NT. -** -** We ought to be able to determine if we are compiling for Windows 9x or -** Windows NT using the _WIN32_WINNT macro as follows: -** -** #if defined(_WIN32_WINNT) -** # define SQLITE_OS_WINNT 1 -** #else -** # define SQLITE_OS_WINNT 0 -** #endif -** -** However, Visual Studio 2005 does not set _WIN32_WINNT by default, as -** it ought to, so the above test does not work. We'll just assume that -** everything is Windows NT unless the programmer explicitly says otherwise -** by setting SQLITE_OS_WINNT to 0. -*/ -#if SQLITE_OS_WIN && !defined(SQLITE_OS_WINNT) -# define SQLITE_OS_WINNT 1 -#endif - -/* -** Determine if we are dealing with Windows CE - which has a much reduced -** API. -*/ -#if defined(_WIN32_WCE) -# define SQLITE_OS_WINCE 1 -#else -# define SQLITE_OS_WINCE 0 -#endif - -/* -** Determine if we are dealing with WinRT, which provides only a subset of -** the full Win32 API. -*/ -#if !defined(SQLITE_OS_WINRT) -# define SQLITE_OS_WINRT 0 -#endif - -/* -** For WinCE, some API function parameters do not appear to be declared as -** volatile. -*/ -#if SQLITE_OS_WINCE -# define SQLITE_WIN32_VOLATILE -#else -# define SQLITE_WIN32_VOLATILE volatile -#endif - -/* -** For some Windows sub-platforms, the _beginthreadex() / _endthreadex() -** functions are not available (e.g. those not using MSVC, Cygwin, etc). -*/ -#if SQLITE_OS_WIN && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && \ - SQLITE_THREADSAFE>0 && !defined(__CYGWIN__) -# define SQLITE_OS_WIN_THREADS 1 -#else -# define SQLITE_OS_WIN_THREADS 0 -#endif - -#endif /* SQLITE_OS_WIN_H */ diff --git a/third_party/sqlite3/pager.c b/third_party/sqlite3/pager.c index ff4be22ad..88ad7272a 100644 --- a/third_party/sqlite3/pager.c +++ b/third_party/sqlite3/pager.c @@ -19,8 +19,8 @@ ** another is writing. */ #ifndef SQLITE_OMIT_DISKIO -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/wal.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/wal.inc" /* clang-format off */ diff --git a/third_party/sqlite3/pager.h b/third_party/sqlite3/pager.inc similarity index 100% rename from third_party/sqlite3/pager.h rename to third_party/sqlite3/pager.inc diff --git a/third_party/sqlite3/parse.c b/third_party/sqlite3/parse.c index 08f25a185..5fe825ced 100644 --- a/third_party/sqlite3/parse.c +++ b/third_party/sqlite3/parse.c @@ -23,7 +23,7 @@ #line 58 "parse.y" /* clang-format off */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* ** Disable all error recovery processing in the parser push-down diff --git a/third_party/sqlite3/parse.h b/third_party/sqlite3/parse.inc similarity index 100% rename from third_party/sqlite3/parse.h rename to third_party/sqlite3/parse.inc diff --git a/third_party/sqlite3/pcache.c b/third_party/sqlite3/pcache.c index 7f3b9093d..00f601979 100644 --- a/third_party/sqlite3/pcache.c +++ b/third_party/sqlite3/pcache.c @@ -11,7 +11,7 @@ ************************************************************************* ** This file implements that page cache. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/pcache.h b/third_party/sqlite3/pcache.inc similarity index 100% rename from third_party/sqlite3/pcache.h rename to third_party/sqlite3/pcache.inc diff --git a/third_party/sqlite3/pcache1.c b/third_party/sqlite3/pcache1.c index 8f7b75843..11109b170 100644 --- a/third_party/sqlite3/pcache1.c +++ b/third_party/sqlite3/pcache1.c @@ -80,7 +80,7 @@ ** show that method (3) with N==100 provides about a 5% performance boost for ** common workloads. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ typedef struct PCache1 PCache1; diff --git a/third_party/sqlite3/pragma.c b/third_party/sqlite3/pragma.c index 9642ff030..b22edbd8a 100644 --- a/third_party/sqlite3/pragma.c +++ b/third_party/sqlite3/pragma.c @@ -11,7 +11,7 @@ ************************************************************************* ** This file contains code used to implement the PRAGMA command. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #if !defined(SQLITE_ENABLE_LOCKING_STYLE) @@ -29,7 +29,7 @@ ** lexicographical order to facility a binary search of the pragma name. ** Do not edit pragma.h directly. Edit and rerun the script in at ** ../tool/mkpragmatab.tcl. */ -#include "third_party/sqlite3/pragma.h" +#include "third_party/sqlite3/pragma.inc" /* ** Interpret the given string as a safety level. Return 0 for OFF, diff --git a/third_party/sqlite3/pragma.h b/third_party/sqlite3/pragma.inc similarity index 100% rename from third_party/sqlite3/pragma.h rename to third_party/sqlite3/pragma.inc diff --git a/third_party/sqlite3/prepare.c b/third_party/sqlite3/prepare.c index 0653a7267..c7575105e 100644 --- a/third_party/sqlite3/prepare.c +++ b/third_party/sqlite3/prepare.c @@ -13,7 +13,7 @@ ** interface, and routines that contribute to loading the database schema ** from disk. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/printf.c b/third_party/sqlite3/printf.c index dbf540bda..f916d9f54 100644 --- a/third_party/sqlite3/printf.c +++ b/third_party/sqlite3/printf.c @@ -9,7 +9,7 @@ ** library, though the implementation here has enhancements to support ** SQLite. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/random.c b/third_party/sqlite3/random.c index 0eb6b8ff8..2c613b53d 100644 --- a/third_party/sqlite3/random.c +++ b/third_party/sqlite3/random.c @@ -15,7 +15,7 @@ ** Random numbers are used by some of the database backends in order ** to generate random integer keys for tables or random filenames. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/resolve.c b/third_party/sqlite3/resolve.c index ab68d2516..4876c90fc 100644 --- a/third_party/sqlite3/resolve.c +++ b/third_party/sqlite3/resolve.c @@ -14,7 +14,7 @@ ** resolve all identifiers by associating them with a particular ** table and column. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/rowset.c b/third_party/sqlite3/rowset.c index e18e62f9d..5f0a9b8ff 100644 --- a/third_party/sqlite3/rowset.c +++ b/third_party/sqlite3/rowset.c @@ -61,7 +61,7 @@ ** be possible, but the feature was not used, so it was removed in order ** to simplify the code. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ diff --git a/third_party/sqlite3/rtree.c b/third_party/sqlite3/rtree.c index 5edc89588..916d31739 100644 --- a/third_party/sqlite3/rtree.c +++ b/third_party/sqlite3/rtree.c @@ -66,7 +66,7 @@ SQLITE_EXTENSION_INIT1 int sqlite3GetToken(const unsigned char*,int*); /* In the SQLite core */ #ifndef SQLITE_AMALGAMATION -#include "third_party/sqlite3/sqlite3rtree.h" +#include "third_party/sqlite3/sqlite3rtree.inc" typedef sqlite3_int64 i64; typedef sqlite3_uint64 u64; typedef unsigned char u8; @@ -4367,7 +4367,7 @@ static void rtreecheck( /* Conditionally include the geopoly code */ #ifdef SQLITE_ENABLE_GEOPOLY -#include "third_party/sqlite3/geopoly.c" +#include "third_party/sqlite3/geopoly.inc" #endif /* diff --git a/third_party/sqlite3/rtree.h b/third_party/sqlite3/rtree.inc similarity index 100% rename from third_party/sqlite3/rtree.h rename to third_party/sqlite3/rtree.inc diff --git a/third_party/sqlite3/select.c b/third_party/sqlite3/select.c index 7375221e1..91f51f954 100644 --- a/third_party/sqlite3/select.c +++ b/third_party/sqlite3/select.c @@ -12,7 +12,7 @@ ** This file contains C code routines that are called by the parser ** to handle SELECT statements in SQLite. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/shell.c b/third_party/sqlite3/shell.c index b60655f57..eef5d12f2 100644 --- a/third_party/sqlite3/shell.c +++ b/third_party/sqlite3/shell.c @@ -95,7 +95,7 @@ typedef sqlite3_int64 i64; typedef sqlite3_uint64 u64; typedef unsigned char u8; #if SQLITE_USER_AUTHENTICATION -#include "third_party/sqlite3/sqlite3userauth.h" +#include "third_party/sqlite3/sqlite3userauth.inc" #endif #include "libc/str/str.h" @@ -139,7 +139,6 @@ typedef unsigned char u8; #elif HAVE_LINENOISE -#include "third_party/sqlite3/linenoise.h" #define shell_add_history(X) linenoiseHistoryAdd(X) #define shell_read_history(X) linenoiseHistoryLoad(X) #define shell_write_history(X) linenoiseHistorySave(X) @@ -1058,14 +1057,10 @@ static void shellAddSchemaName( #define WIN32_LEAN_AND_MEAN #endif -#include "third_party/sqlite3/windows.h" - /* ** We need several support functions from the SQLite core. */ -/* #include "third_party/sqlite3/sqlite3.h" */ - /* ** We need several things from the ANSI and MSVCRT headers. */ @@ -1208,7 +1203,7 @@ extern INT closedir(LPDIR dirp); */ #if defined(_WIN32) && defined(_MSC_VER) -/* #include "third_party/sqlite3/test_windirent.h" */ +/* #include "third_party/sqlite3/test_windirent.inc" */ /* ** Implementation of the POSIX getenv() function using the Win32 API. diff --git a/third_party/sqlite3/shell.c.in b/third_party/sqlite3/shell.c.in index c96d3ecf2..d15293d24 100644 --- a/third_party/sqlite3/shell.c.in +++ b/third_party/sqlite3/shell.c.in @@ -74,7 +74,7 @@ typedef sqlite3_int64 i64; typedef sqlite3_uint64 u64; typedef unsigned char u8; #if SQLITE_USER_AUTHENTICATION -# include "sqlite3userauth.h" +# include "sqlite3userauth.inc" #endif #include #include @@ -120,7 +120,7 @@ typedef unsigned char u8; #elif HAVE_LINENOISE -# include "linenoise.h" +# include "linenoise.inc" # define shell_add_history(X) linenoiseHistoryAdd(X) # define shell_read_history(X) linenoiseHistoryLoad(X) # define shell_write_history(X) linenoiseHistorySave(X) diff --git a/third_party/sqlite3/sqlite3.h b/third_party/sqlite3/sqlite3.h index 284b958c2..b0701b550 100644 --- a/third_party/sqlite3/sqlite3.h +++ b/third_party/sqlite3/sqlite3.h @@ -1,14 +1,7 @@ +#ifndef SQLITE3_H +#define SQLITE3_H +/* clang-format off */ /* -** 2001-09-15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* ** This header file defines the interface that the SQLite library ** presents to client programs. If a C-function, structure, datatype, ** or constant definition does not appear in this file, then it is @@ -30,10 +23,6 @@ ** the version number) and changes its name to "sqlite3.h" as ** part of the build process. */ -#ifndef SQLITE3_H -#define SQLITE3_H -/* Needed for the definition of va_list */ -/* clang-format off */ /* ** Make sure we can call this stuff from C++. @@ -252,35 +241,11 @@ typedef struct sqlite3 sqlite3; /* ** CAPI3REF: 64-Bit Integer Types ** KEYWORDS: sqlite_int64 sqlite_uint64 -** -** Because there is no cross-platform way to specify 64-bit integer types -** SQLite includes typedefs for 64-bit signed and unsigned integers. -** -** The sqlite3_int64 and sqlite3_uint64 are the preferred type definitions. -** The sqlite_int64 and sqlite_uint64 types are supported for backwards -** compatibility only. -** -** ^The sqlite3_int64 and sqlite_int64 types can store integer values -** between -9223372036854775808 and +9223372036854775807 inclusive. ^The -** sqlite3_uint64 and sqlite_uint64 types can store integer values -** between 0 and +18446744073709551615 inclusive. */ -#ifdef SQLITE_INT64_TYPE - typedef SQLITE_INT64_TYPE sqlite_int64; -# ifdef SQLITE_UINT64_TYPE - typedef SQLITE_UINT64_TYPE sqlite_uint64; -# else - typedef unsigned SQLITE_INT64_TYPE sqlite_uint64; -# endif -#elif defined(_MSC_VER) || defined(__BORLANDC__) - typedef __int64 sqlite_int64; - typedef unsigned __int64 sqlite_uint64; -#else - typedef long long int sqlite_int64; - typedef unsigned long long int sqlite_uint64; -#endif -typedef sqlite_int64 sqlite3_int64; -typedef sqlite_uint64 sqlite3_uint64; +#define sqlite_int64 int64_t +#define sqlite_uint64 uint64_t +#define sqlite3_int64 int64_t +#define sqlite3_uint64 uint64_t /* ** If compiling for a processor that lacks floating point support, diff --git a/third_party/sqlite3/sqlite3.mk b/third_party/sqlite3/sqlite3.mk index 3b009baac..831b23a8d 100644 --- a/third_party/sqlite3/sqlite3.mk +++ b/third_party/sqlite3/sqlite3.mk @@ -8,11 +8,13 @@ THIRD_PARTY_SQLITE3 = $(THIRD_PARTY_SQLITE3_A_DEPS) $(THIRD_PARTY_SQLITE3_A) $(T THIRD_PARTY_SQLITE3_A = o/$(MODE)/third_party/sqlite3/libsqlite3.a THIRD_PARTY_SQLITE3_A_FILES := $(wildcard third_party/sqlite3/*) THIRD_PARTY_SQLITE3_A_HDRS = $(filter %.h,$(THIRD_PARTY_SQLITE3_A_FILES)) -THIRD_PARTY_SQLITE3_A_SRCS_C = $(filter-out %/geopoly.c,$(filter %.c,$(THIRD_PARTY_SQLITE3_A_FILES))) +THIRD_PARTY_SQLITE3_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_SQLITE3_A_FILES)) +THIRD_PARTY_SQLITE3_A_SRCS_T = $(filter %.inc,$(THIRD_PARTY_SQLITE3_A_FILES)) THIRD_PARTY_SQLITE3_BINS = $(THIRD_PARTY_SQLITE3_COMS) $(THIRD_PARTY_SQLITE3_COMS:%=%.dbg) THIRD_PARTY_SQLITE3_A_SRCS = \ - $(THIRD_PARTY_SQLITE3_A_SRCS_C) + $(THIRD_PARTY_SQLITE3_A_SRCS_C) \ + $(THIRD_PARTY_SQLITE3_A_SRCS_T) THIRD_PARTY_SQLITE3_A_OBJS = \ $(THIRD_PARTY_SQLITE3_A_SRCS_C:%.c=o/$(MODE)/%.o) @@ -66,13 +68,23 @@ $(THIRD_PARTY_SQLITE3_A).pkg: \ $(foreach x,$(THIRD_PARTY_SQLITE3_A_DIRECTDEPS),$($(x)_A).pkg) $(THIRD_PARTY_SQLITE3_A_OBJS): \ - OVERRIDE_CFLAGS = \ - -DSTACK_FRAME_UNLIMITED \ + OVERRIDE_CFLAGS += \ + -DNDEBUG \ -DSQLITE_CORE \ - -DSQLITE_OS_UNIX=1 \ - -DBUILD_sqlite -DNDEBUG \ + -DSQLITE_OS_UNIX \ + -DBUILD_sqlite \ + -DHAVE_READLINE=0 \ + -DHAVE_EDITLINE=0 \ -DSQLITE_THREADSAFE=0 \ - -DSQLITE_OMIT_LOAD_EXTENSION=1 \ + -DSQLITE_HAVE_ZLIB \ + -DHAVE_MALLOC_USABLE_SIZE \ + -DHAVE_FDATASYNC \ + -DHAVE_STRCHRNUL \ + -DHAVE_LOCALTIME_R \ + -DHAVE_GMTIME_R \ + -DHAVE_USLEEP \ + -DSQLITE_HAVE_C99_MATH_FUNCS \ + -DSQLITE_OMIT_LOAD_EXTENSION \ -DSQLITE_ENABLE_MATH_FUNCTIONS \ -DSQLITE_ENABLE_FTS3 \ -DSQLITE_ENABLE_FTS4 \ @@ -82,9 +94,7 @@ $(THIRD_PARTY_SQLITE3_A_OBJS): \ -DSQLITE_ENABLE_RTREE \ -DSQLITE_ENABLE_SESSION \ -DSQLITE_ENABLE_PREUPDATE_HOOK \ - -DSQLITE_TEMP_STORE=1 \ - -DHAVE_READLINE=0 \ - -DHAVE_EDITLINE=0 \ + -DSQLITE_TEMP_STORE \ -DSQLITE_ENABLE_JSON1 \ -DSQLITE_ENABLE_RTREE \ -DSQLITE_ENABLE_EXPLAIN_COMMENTS \ @@ -96,6 +106,16 @@ $(THIRD_PARTY_SQLITE3_A_OBJS): \ -DSQLITE_ENABLE_OFFSET_SQL_FUNC \ -DSQLITE_ENABLE_DESERIALIZE +o/$(MODE)/third_party/sqlite3/shell.o: \ + OVERRIDE_CFLAGS += \ + -DSTACK_FRAME_UNLIMITED + +THIRD_PARTY_SQLITE3_LIBS = $(foreach x,$(THIRD_PARTY_SQLITE3_ARTIFACTS),$($(x))) +THIRD_PARTY_SQLITE3_SRCS = $(foreach x,$(THIRD_PARTY_SQLITE3_ARTIFACTS),$($(x)_SRCS)) +THIRD_PARTY_SQLITE3_CHECKS = $(foreach x,$(THIRD_PARTY_SQLITE3_ARTIFACTS),$($(x)_CHECKS)) +THIRD_PARTY_SQLITE3_OBJS = $(foreach x,$(THIRD_PARTY_SQLITE3_ARTIFACTS),$($(x)_OBJS)) +$(THIRD_PARTY_SQLITE3_OBJS): third_party/sqlite3/sqlite3.mk + .PHONY: o/$(MODE)/third_party/sqlite3 o/$(MODE)/third_party/sqlite3: \ $(THIRD_PARTY_SQLITE3_BINS) \ diff --git a/third_party/sqlite3/sqlite3ext.h b/third_party/sqlite3/sqlite3ext.h index c35a3fd38..c836347fc 100644 --- a/third_party/sqlite3/sqlite3ext.h +++ b/third_party/sqlite3/sqlite3ext.h @@ -1,24 +1,15 @@ +#ifndef SQLITE3EXT_H +#define SQLITE3EXT_H +#include "third_party/sqlite3/sqlite3.h" +/* clang-format off */ + /* -** 2006 June 7 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* ** This header file defines the SQLite interface for use by ** shared libraries that want to be imported as extensions into ** an SQLite instance. Shared libraries that intend to be loaded ** as extensions by SQLite should #include this file instead of ** sqlite3.h. */ -#ifndef SQLITE3EXT_H -#define SQLITE3EXT_H -#include "third_party/sqlite3/sqlite3.h" -/* clang-format off */ /* ** The following structure holds pointers to all of the SQLite API diff --git a/third_party/sqlite3/sqlite3rbu.c b/third_party/sqlite3/sqlite3rbu.c index 2c8de1b3d..7a90de084 100644 --- a/third_party/sqlite3/sqlite3rbu.c +++ b/third_party/sqlite3/sqlite3rbu.c @@ -88,11 +88,7 @@ #include "third_party/sqlite3/sqlite3.h" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RBU) -#include "third_party/sqlite3/sqlite3rbu.h" - -#if defined(_WIN32_WCE) -#include "third_party/sqlite3/windows.h" -#endif +#include "third_party/sqlite3/sqlite3rbu.inc" /* Maximum number of prepared UPDATE statements held by this module */ #define SQLITE_RBU_UPDATE_CACHESIZE 16 diff --git a/third_party/sqlite3/sqlite3rbu.h b/third_party/sqlite3/sqlite3rbu.inc similarity index 100% rename from third_party/sqlite3/sqlite3rbu.h rename to third_party/sqlite3/sqlite3rbu.inc diff --git a/third_party/sqlite3/sqlite3rtree.h b/third_party/sqlite3/sqlite3rtree.inc similarity index 100% rename from third_party/sqlite3/sqlite3rtree.h rename to third_party/sqlite3/sqlite3rtree.inc diff --git a/third_party/sqlite3/sqlite3session.c b/third_party/sqlite3/sqlite3session.c index 1bf4415df..8c155af70 100644 --- a/third_party/sqlite3/sqlite3session.c +++ b/third_party/sqlite3/sqlite3session.c @@ -2,11 +2,11 @@ #if defined(SQLITE_ENABLE_SESSION) && defined(SQLITE_ENABLE_PREUPDATE_HOOK) #include "libc/assert.h" #include "libc/str/str.h" -#include "third_party/sqlite3/sqlite3session.h" +#include "third_party/sqlite3/sqlite3session.inc" #ifndef SQLITE_AMALGAMATION -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" #endif typedef struct SessionTable SessionTable; diff --git a/third_party/sqlite3/sqlite3session.h b/third_party/sqlite3/sqlite3session.inc similarity index 100% rename from third_party/sqlite3/sqlite3session.h rename to third_party/sqlite3/sqlite3session.inc diff --git a/third_party/sqlite3/sqlite3userauth.h b/third_party/sqlite3/sqlite3userauth.inc similarity index 100% rename from third_party/sqlite3/sqlite3userauth.h rename to third_party/sqlite3/sqlite3userauth.inc diff --git a/third_party/sqlite3/sqliteInt.h b/third_party/sqlite3/sqliteInt.inc similarity index 99% rename from third_party/sqlite3/sqliteInt.h rename to third_party/sqlite3/sqliteInt.inc index c7b2b0f73..092529e43 100644 --- a/third_party/sqlite3/sqliteInt.h +++ b/third_party/sqlite3/sqliteInt.inc @@ -57,12 +57,12 @@ ** compiler warnings due to subsequent content in this file and other files ** that are included by this file. */ -#include "third_party/sqlite3/msvc.h" +#include "third_party/sqlite3/msvc.inc" /* ** Special setup for VxWorks */ -#include "third_party/sqlite3/vxworks.h" +#include "third_party/sqlite3/vxworks.inc" /* ** These #defines should enable >2GB file support on POSIX if the @@ -150,26 +150,6 @@ # define deliberate_fall_through #endif -/* -** For MinGW, check to see if we can include the header file containing its -** version information, among other things. Normally, this internal MinGW -** header file would [only] be included automatically by other MinGW header -** files; however, the contained version information is now required by this -** header file to work around binary compatibility issues (see below) and -** this is the only known way to reliably obtain it. This entire #if block -** would be completely unnecessary if there was any other way of detecting -** MinGW via their preprocessor (e.g. if they customized their GCC to define -** some MinGW-specific macros). When compiling for MinGW, either the -** _HAVE_MINGW_H or _HAVE__MINGW_H (note the extra underscore) macro must be -** defined; otherwise, detection of conditions specific to MinGW will be -** disabled. -*/ -#if defined(_HAVE_MINGW_H) -#include "third_party/sqlite3/mingw.h" -#elif defined(_HAVE__MINGW_H) -#include "third_party/sqlite3/_mingw.h" -#endif - /* ** For MinGW version 4.x (and higher), check to see if the _USE_32BIT_TIME_T ** define is required to maintain binary compatibility with the MSVC runtime @@ -188,16 +168,7 @@ */ #include "third_party/sqlite3/sqlite3.h" -/* -** Include the configuration header output by 'configure' if we're using the -** autoconf-based build -*/ -#if defined(_HAVE_SQLITE_CONFIG_H) && !defined(SQLITECONFIG_H) -#include "third_party/sqlite3/config.h" -#define SQLITECONFIG_H 1 -#endif - -#include "third_party/sqlite3/sqliteLimit.h" +#include "third_party/sqlite3/sqliteLimit.inc" /* Disable nuisance warnings on Borland compilers */ #if defined(__BORLANDC__) @@ -588,8 +559,8 @@ #include "libc/mem/mem.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" -#include "third_party/sqlite3/hash.h" -#include "third_party/sqlite3/parse.h" +#include "third_party/sqlite3/hash.inc" +#include "third_party/sqlite3/parse.inc" /* ** Use a macro to replace memcpy() if compiled with SQLITE_INLINE_MEMCPY. @@ -1227,12 +1198,12 @@ typedef u32 Pgno; ** "BusyHandler" typedefs. vdbe.h also requires a few of the opaque ** pointer types (i.e. FuncDef) defined above. */ -#include "third_party/sqlite3/btree.h" -#include "third_party/sqlite3/mutex.h" -#include "third_party/sqlite3/os.h" -#include "third_party/sqlite3/pager.h" -#include "third_party/sqlite3/pcache.h" -#include "third_party/sqlite3/vdbe.h" +#include "third_party/sqlite3/btree.inc" +#include "third_party/sqlite3/mutex.inc" +#include "third_party/sqlite3/os.inc" +#include "third_party/sqlite3/pager.inc" +#include "third_party/sqlite3/pcache.inc" +#include "third_party/sqlite3/vdbe.inc" /* The SQLITE_EXTRA_DURABLE compile-time option used to set the default ** synchronous setting to EXTRA. It is no longer supported. diff --git a/third_party/sqlite3/sqliteLimit.h b/third_party/sqlite3/sqliteLimit.inc similarity index 100% rename from third_party/sqlite3/sqliteLimit.h rename to third_party/sqlite3/sqliteLimit.inc diff --git a/third_party/sqlite3/sqliteicu.h b/third_party/sqlite3/sqliteicu.inc similarity index 100% rename from third_party/sqlite3/sqliteicu.h rename to third_party/sqlite3/sqliteicu.inc diff --git a/third_party/sqlite3/status.c b/third_party/sqlite3/status.c index b78ef0415..560ec436c 100644 --- a/third_party/sqlite3/status.c +++ b/third_party/sqlite3/status.c @@ -13,8 +13,8 @@ ** This module implements the sqlite3_status() interface and related ** functionality. */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/table.c b/third_party/sqlite3/table.c index 6289a1685..e1b98c316 100644 --- a/third_party/sqlite3/table.c +++ b/third_party/sqlite3/table.c @@ -16,7 +16,7 @@ ** These routines are in a separate files so that they will not be linked ** if they are not used. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_GET_TABLE diff --git a/third_party/sqlite3/threads.c b/third_party/sqlite3/threads.c index 8b37ea8fc..45069e542 100644 --- a/third_party/sqlite3/threads.c +++ b/third_party/sqlite3/threads.c @@ -25,10 +25,7 @@ ** of multiple cores can do so, while also allowing applications to stay ** single-threaded if desired. */ -#include "third_party/sqlite3/sqliteInt.h" -#if SQLITE_OS_WIN -#include "third_party/sqlite3/os_win.h" -#endif +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #if SQLITE_MAX_WORKER_THREADS>0 diff --git a/third_party/sqlite3/tokenize.c b/third_party/sqlite3/tokenize.c index a26973a4d..5faf087b8 100644 --- a/third_party/sqlite3/tokenize.c +++ b/third_party/sqlite3/tokenize.c @@ -16,7 +16,7 @@ ** parser for analysis. */ #include "libc/mem/mem.h" -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* Character classes for tokenizing @@ -145,7 +145,7 @@ const unsigned char ebcdicToAscii[] = { ** named keywordhash.h and then included into this source file by ** the #include below. */ -#include "third_party/sqlite3/keywordhash.h" +#include "third_party/sqlite3/keywordhash.inc" /* diff --git a/third_party/sqlite3/fts1_tokenizer.h b/third_party/sqlite3/tokenizer.inc similarity index 100% rename from third_party/sqlite3/fts1_tokenizer.h rename to third_party/sqlite3/tokenizer.inc diff --git a/third_party/sqlite3/treeview.c b/third_party/sqlite3/treeview.c index 3bdf7e1e4..a04a57bb3 100644 --- a/third_party/sqlite3/treeview.c +++ b/third_party/sqlite3/treeview.c @@ -17,7 +17,7 @@ ** The interfaces in this file is only available when compiling ** with SQLITE_DEBUG. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #ifdef SQLITE_DEBUG /* clang-format off */ diff --git a/third_party/sqlite3/trigger.c b/third_party/sqlite3/trigger.c index 46bdbba68..30710bccc 100644 --- a/third_party/sqlite3/trigger.c +++ b/third_party/sqlite3/trigger.c @@ -10,7 +10,7 @@ ************************************************************************* ** This file contains the implementation for TRIGGERs */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_TRIGGER diff --git a/third_party/sqlite3/update.c b/third_party/sqlite3/update.c index c25c45bf6..621e142b5 100644 --- a/third_party/sqlite3/update.c +++ b/third_party/sqlite3/update.c @@ -12,7 +12,7 @@ ** This file contains C code routines that are called by the parser ** to handle UPDATE statements. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_VIRTUALTABLE diff --git a/third_party/sqlite3/upsert.c b/third_party/sqlite3/upsert.c index 041d64466..ebee3f14f 100644 --- a/third_party/sqlite3/upsert.c +++ b/third_party/sqlite3/upsert.c @@ -12,7 +12,7 @@ ** This file contains code to implement various aspects of UPSERT ** processing and handling of the Upsert object. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_UPSERT diff --git a/third_party/sqlite3/userauth.c b/third_party/sqlite3/userauth.c index e9e5b9cc3..335c56863 100644 --- a/third_party/sqlite3/userauth.c +++ b/third_party/sqlite3/userauth.c @@ -23,7 +23,7 @@ */ #ifdef SQLITE_USER_AUTHENTICATION #ifndef SQLITEINT_H -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #endif /* clang-format off */ diff --git a/third_party/sqlite3/utf.c b/third_party/sqlite3/utf.c index e781139d7..903d37ece 100644 --- a/third_party/sqlite3/utf.c +++ b/third_party/sqlite3/utf.c @@ -34,8 +34,8 @@ ** */ #include "libc/assert.h" -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ #if !defined(SQLITE_AMALGAMATION) && SQLITE_BYTEORDER==0 diff --git a/third_party/sqlite3/util.c b/third_party/sqlite3/util.c index d32cf6383..0740f7724 100644 --- a/third_party/sqlite3/util.c +++ b/third_party/sqlite3/util.c @@ -17,7 +17,7 @@ */ /* clang-format off */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #ifndef SQLITE_OMIT_FLOATING_POINT #include "libc/math.h" #endif diff --git a/third_party/sqlite3/vacuum.c b/third_party/sqlite3/vacuum.c index dc916616e..a5c50dcaa 100644 --- a/third_party/sqlite3/vacuum.c +++ b/third_party/sqlite3/vacuum.c @@ -14,8 +14,8 @@ ** Most of the code in this file may be omitted by defining the ** SQLITE_OMIT_VACUUM macro. */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ #if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) diff --git a/third_party/sqlite3/vdbe.c b/third_party/sqlite3/vdbe.c index 07de94253..853ea0d91 100644 --- a/third_party/sqlite3/vdbe.c +++ b/third_party/sqlite3/vdbe.c @@ -18,8 +18,8 @@ ** in this file for details. If in doubt, do not deviate from existing ** commenting and indentation practices when changing or adding code. */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ /* @@ -599,7 +599,7 @@ void sqlite3VdbeRegisterDump(Vdbe *v){ ** hwtime.h contains inline assembler code for implementing ** high-performance timing routines. */ -#include "third_party/sqlite3/hwtime.h" +#include "third_party/sqlite3/hwtime.inc" #endif diff --git a/third_party/sqlite3/vdbe.h b/third_party/sqlite3/vdbe.inc similarity index 99% rename from third_party/sqlite3/vdbe.h rename to third_party/sqlite3/vdbe.inc index 51db23e09..f07f9f12a 100644 --- a/third_party/sqlite3/vdbe.h +++ b/third_party/sqlite3/vdbe.inc @@ -167,7 +167,7 @@ typedef struct VdbeOpList VdbeOpList; ** The makefile scans the vdbe.c source file and creates the "opcodes.h" ** header file that defines a number for each opcode used by the VDBE. */ -#include "third_party/sqlite3/opcodes.h" +#include "third_party/sqlite3/opcodes.inc" /* ** Additional non-public SQLITE_PREPARE_* flags diff --git a/third_party/sqlite3/vdbeInt.h b/third_party/sqlite3/vdbeInt.inc similarity index 100% rename from third_party/sqlite3/vdbeInt.h rename to third_party/sqlite3/vdbeInt.inc diff --git a/third_party/sqlite3/vdbeapi.c b/third_party/sqlite3/vdbeapi.c index b1b99d39e..cc7a071a7 100644 --- a/third_party/sqlite3/vdbeapi.c +++ b/third_party/sqlite3/vdbeapi.c @@ -13,8 +13,8 @@ ** This file contains code use to implement APIs that are part of the ** VDBE. */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_DEPRECATED diff --git a/third_party/sqlite3/vdbeaux.c b/third_party/sqlite3/vdbeaux.c index 13334c588..60ff9b7ae 100644 --- a/third_party/sqlite3/vdbeaux.c +++ b/third_party/sqlite3/vdbeaux.c @@ -12,8 +12,8 @@ ** This file contains code used for creating, destroying, and populating ** a VDBE (or an "sqlite3_stmt" as it is known to the outside world.) */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ /* Forward references */ diff --git a/third_party/sqlite3/vdbeblob.c b/third_party/sqlite3/vdbeblob.c index eaf07f14a..aebb94652 100644 --- a/third_party/sqlite3/vdbeblob.c +++ b/third_party/sqlite3/vdbeblob.c @@ -14,8 +14,8 @@ */ /* clang-format off */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" #ifndef SQLITE_OMIT_INCRBLOB diff --git a/third_party/sqlite3/vdbemem.c b/third_party/sqlite3/vdbemem.c index 03ec6450c..d3bef6568 100644 --- a/third_party/sqlite3/vdbemem.c +++ b/third_party/sqlite3/vdbemem.c @@ -15,8 +15,8 @@ ** only within the VDBE. Interface routines refer to a Mem using the ** name sqlite_value */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ /* True if X is a power of two. 0 is considered a power of two here. diff --git a/third_party/sqlite3/vdbesort.c b/third_party/sqlite3/vdbesort.c index ef2a48e55..d1c4d64bd 100644 --- a/third_party/sqlite3/vdbesort.c +++ b/third_party/sqlite3/vdbesort.c @@ -135,8 +135,8 @@ ** thread to merge the output of each of the others to a single PMA for ** the main thread to read from. */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/vdbetrace.c b/third_party/sqlite3/vdbetrace.c index 896a813d7..09f148f11 100644 --- a/third_party/sqlite3/vdbetrace.c +++ b/third_party/sqlite3/vdbetrace.c @@ -15,8 +15,8 @@ ** ** The Vdbe parse-tree explainer is also found here. */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_TRACE diff --git a/third_party/sqlite3/vdbevtab.c b/third_party/sqlite3/vdbevtab.c index d80058ecf..1eb68aa60 100644 --- a/third_party/sqlite3/vdbevtab.c +++ b/third_party/sqlite3/vdbevtab.c @@ -13,9 +13,9 @@ ** This file implements virtual-tables for examining the bytecode content ** of a prepared statement. */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #if defined(SQLITE_ENABLE_BYTECODE_VTAB) && !defined(SQLITE_OMIT_VIRTUALTABLE) -#include "third_party/sqlite3/vdbeInt.h" +#include "third_party/sqlite3/vdbeInt.inc" /* clang-format off */ /* An instance of the bytecode() table-valued function. diff --git a/third_party/sqlite3/vtab.c b/third_party/sqlite3/vtab.c index e72e7034b..85cb3fbef 100644 --- a/third_party/sqlite3/vtab.c +++ b/third_party/sqlite3/vtab.c @@ -12,7 +12,7 @@ ** This file contains code used to help implement virtual tables. */ #ifndef SQLITE_OMIT_VIRTUALTABLE -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/vxworks.h b/third_party/sqlite3/vxworks.inc similarity index 100% rename from third_party/sqlite3/vxworks.h rename to third_party/sqlite3/vxworks.inc diff --git a/third_party/sqlite3/wal.c b/third_party/sqlite3/wal.c index 8d43638b3..f20ea704b 100644 --- a/third_party/sqlite3/wal.c +++ b/third_party/sqlite3/wal.c @@ -247,7 +247,7 @@ #ifndef SQLITE_OMIT_WAL /* clang-format off */ -#include "third_party/sqlite3/wal.h" +#include "third_party/sqlite3/wal.inc" /* ** Trace output macros diff --git a/third_party/sqlite3/wal.h b/third_party/sqlite3/wal.inc similarity index 99% rename from third_party/sqlite3/wal.h rename to third_party/sqlite3/wal.inc index 0ea24e0ad..462ad9e46 100644 --- a/third_party/sqlite3/wal.h +++ b/third_party/sqlite3/wal.inc @@ -18,7 +18,7 @@ #ifndef SQLITE_WAL_H #define SQLITE_WAL_H -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* Macros for extracting appropriate sync flags for either transaction ** commits (WAL_SYNC_FLAGS(X)) or for checkpoint ops (CKPT_SYNC_FLAGS(X)): diff --git a/third_party/sqlite3/walker.c b/third_party/sqlite3/walker.c index 54f09ba74..48f4c883f 100644 --- a/third_party/sqlite3/walker.c +++ b/third_party/sqlite3/walker.c @@ -14,7 +14,7 @@ */ #include "libc/mem/mem.h" #include "libc/str/str.h" -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" #if !defined(SQLITE_OMIT_WINDOWFUNC) /* diff --git a/third_party/sqlite3/where.c b/third_party/sqlite3/where.c index 99896d4a9..f7ba564b1 100644 --- a/third_party/sqlite3/where.c +++ b/third_party/sqlite3/where.c @@ -16,8 +16,8 @@ ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/whereInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/whereInt.inc" /* clang-format off */ /* diff --git a/third_party/sqlite3/whereInt.h b/third_party/sqlite3/whereInt.inc similarity index 100% rename from third_party/sqlite3/whereInt.h rename to third_party/sqlite3/whereInt.inc diff --git a/third_party/sqlite3/wherecode.c b/third_party/sqlite3/wherecode.c index 9888d1830..33063771b 100644 --- a/third_party/sqlite3/wherecode.c +++ b/third_party/sqlite3/wherecode.c @@ -17,8 +17,8 @@ ** that actually generate the bulk of the WHERE loop code. The original where.c ** file retains the code that does query planning and analysis. */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/whereInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/whereInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_EXPLAIN diff --git a/third_party/sqlite3/whereexpr.c b/third_party/sqlite3/whereexpr.c index 7676c7c27..dc3a968d8 100644 --- a/third_party/sqlite3/whereexpr.c +++ b/third_party/sqlite3/whereexpr.c @@ -16,8 +16,8 @@ ** readability and editabiliity. This file contains utility routines for ** analyzing Expr objects in the WHERE clause. */ -#include "third_party/sqlite3/sqliteInt.h" -#include "third_party/sqlite3/whereInt.h" +#include "third_party/sqlite3/sqliteInt.inc" +#include "third_party/sqlite3/whereInt.inc" /* clang-format off */ /* Forward declarations */ diff --git a/third_party/sqlite3/window.c b/third_party/sqlite3/window.c index 401ed1a1a..c8f8bb8c3 100644 --- a/third_party/sqlite3/window.c +++ b/third_party/sqlite3/window.c @@ -10,7 +10,7 @@ ** ************************************************************************* */ -#include "third_party/sqlite3/sqliteInt.h" +#include "third_party/sqlite3/sqliteInt.inc" /* clang-format off */ #ifndef SQLITE_OMIT_WINDOWFUNC