From 66072d32dd0c1cedfd1c98e2bd4a88e5e6537986 Mon Sep 17 00:00:00 2001 From: sowle Date: Tue, 13 Aug 2019 19:58:31 +0300 Subject: [PATCH] lmdb update: 0.9.18 -> 0.9.24 --- contrib/db/liblmdb/CHANGES | 60 +- contrib/db/liblmdb/COPYRIGHT | 2 +- contrib/db/liblmdb/Doxyfile | 2 +- contrib/db/liblmdb/Makefile | 11 +- contrib/db/liblmdb/intro.doc | 2 +- contrib/db/liblmdb/lmdb.h | 78 +- contrib/db/liblmdb/mdb.c | 2284 +++++++++++------------------ contrib/db/liblmdb/mdb_copy.1 | 11 +- contrib/db/liblmdb/mdb_copy.c | 6 +- contrib/db/liblmdb/mdb_dump.1 | 10 +- contrib/db/liblmdb/mdb_dump.c | 19 +- contrib/db/liblmdb/mdb_load.1 | 11 +- contrib/db/liblmdb/mdb_load.c | 64 +- contrib/db/liblmdb/mdb_stat.1 | 10 +- contrib/db/liblmdb/mdb_stat.c | 33 +- contrib/db/liblmdb/midl.c | 65 +- contrib/db/liblmdb/midl.h | 22 +- contrib/db/liblmdb/mtest.c | 2 +- contrib/db/liblmdb/mtest2.c | 2 +- contrib/db/liblmdb/mtest3.c | 2 +- contrib/db/liblmdb/mtest4.c | 2 +- contrib/db/liblmdb/mtest5.c | 2 +- contrib/db/liblmdb/mtest6.c | 2 +- contrib/db/liblmdb/sample-bdb.txt | 2 +- contrib/db/liblmdb/sample-mdb.txt | 2 +- 25 files changed, 967 insertions(+), 1739 deletions(-) diff --git a/contrib/db/liblmdb/CHANGES b/contrib/db/liblmdb/CHANGES index aabb1adf..f00efbb9 100644 --- a/contrib/db/liblmdb/CHANGES +++ b/contrib/db/liblmdb/CHANGES @@ -1,14 +1,70 @@ LMDB 0.9 Change Log -LMDB 0.9.18 Release Engineering +LMDB 0.9.24 Release (2019/07/24) + ITS#8969 Tweak mdb_page_split + ITS#8975 WIN32 fix writemap set_mapsize crash + ITS#9007 Fix loose pages in WRITEMAP + +LMDB 0.9.23 Release (2018/12/19) + ITS#8756 Fix loose pages in dirty list + ITS#8831 Fix mdb_load flag init + ITS#8844 Fix mdb_env_close in forked process + Documentation + ITS#8857 mdb_cursor_del doesn't invalidate cursor + ITS#8908 GET_MULTIPLE etc don't change passed in key + +LMDB 0.9.22 Release (2018/03/22) + Fix MDB_DUPSORT alignment bug (ITS#8819) + Fix regression with new db from 0.9.19 (ITS#8760) + Fix liblmdb to build on Solaris (ITS#8612) + Fix delete behavior with DUPSORT DB (ITS#8622) + Fix mdb_cursor_get/mdb_cursor_del behavior (ITS#8722) + +LMDB 0.9.21 Release (2017/06/01) + Fix xcursor after cursor_del (ITS#8622) + +LMDB 0.9.20 (Withdrawn) + Fix mdb_load with escaped plaintext (ITS#8558) + Fix mdb_cursor_last / mdb_put interaction (ITS#8557) + +LMDB 0.9.19 Release (2016/12/28) + Fix mdb_env_cwalk cursor init (ITS#8424) + Fix robust mutexes on Solaris 10/11 (ITS#8339) + Tweak Win32 error message buffer + Fix MDB_GET_BOTH on non-dup record (ITS#8393) + Optimize mdb_drop + Fix xcursors after mdb_cursor_del (ITS#8406) + Fix MDB_NEXT_DUP after mdb_cursor_del (ITS#8412) + Fix mdb_cursor_put resetting C_EOF (ITS#8489) + Fix mdb_env_copyfd2 to return EPIPE on SIGPIPE (ITS#8504) + Fix mdb_env_copy with empty DB (ITS#8209) + Fix behaviors with fork (ITS#8505) + Fix mdb_dbi_open with mainDB cursors (ITS#8542) + Fix robust mutexes on kFreeBSD (ITS#8554) + Fix utf8_to_utf16 error checks (ITS#7992) + Fix F_NOCACHE on MacOS, error is non-fatal (ITS#7682) + Build + Make shared lib suffix overridable (ITS#8481) + Documentation + Cleanup doxygen nits + Note reserved vs actual mem/disk usage + + +LMDB 0.9.18 Release (2016/02/05) Fix robust mutex detection on glibc 2.10-11 (ITS#8330) + Fix page_search_root assert on FreeDB (ITS#8336) + Fix MDB_APPENDDUP vs. rewrite(single item) (ITS#8334) + Fix mdb_copy of large files on Windows + Fix subcursor move after delete (ITS#8355) + Fix mdb_midl_shirnk off-by-one (ITS#8363) Check for utf8_to_utf16 failures (ITS#7992) Catch strdup failure in mdb_dbi_open Build Additional makefile var tweaks (ITS#8169) Documentation Add Getting Started page - + Update WRITEMAP description + LMDB 0.9.17 Release (2015/11/30) Fix ITS#7377 catch calloc failure diff --git a/contrib/db/liblmdb/COPYRIGHT b/contrib/db/liblmdb/COPYRIGHT index 722d1a51..f076556e 100644 --- a/contrib/db/liblmdb/COPYRIGHT +++ b/contrib/db/liblmdb/COPYRIGHT @@ -1,4 +1,4 @@ -Copyright 2011-2015 Howard Chu, Symas Corp. +Copyright 2011-2019 Howard Chu, Symas Corp. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/contrib/db/liblmdb/Doxyfile b/contrib/db/liblmdb/Doxyfile index 5047c0bb..5ca2cfe8 100644 --- a/contrib/db/liblmdb/Doxyfile +++ b/contrib/db/liblmdb/Doxyfile @@ -253,7 +253,7 @@ IDL_PROPERTY_SUPPORT = YES # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. -DISTRIBUTE_GROUP_DOC = NO +DISTRIBUTE_GROUP_DOC = YES # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a diff --git a/contrib/db/liblmdb/Makefile b/contrib/db/liblmdb/Makefile index f3c93a2f..f254511f 100644 --- a/contrib/db/liblmdb/Makefile +++ b/contrib/db/liblmdb/Makefile @@ -8,7 +8,7 @@ # platforms; you should not need to change any of these. # Read their descriptions in mdb.c if you do: # -# - MDB_USE_POSIX_MUTEX, MDB_USE_POSIX_SEM, MDB_USE_SYSV_SEM +# - MDB_USE_POSIX_SEM # - MDB_DSYNC # - MDB_FDATASYNC # - MDB_FDATASYNC_WORKS @@ -24,8 +24,9 @@ W = -W -Wall -Wno-unused-parameter -Wbad-function-cast -Wuninitialized THREADS = -pthread OPT = -O2 -g CFLAGS = $(THREADS) $(OPT) $(W) $(XCFLAGS) -LDLIBS = # -lntdll # Windows needs ntdll -SOLIBS = # -lntdll +LDLIBS = +SOLIBS = +SOEXT = .so prefix = /usr/local exec_prefix = $(prefix) bindir = $(exec_prefix)/bin @@ -37,7 +38,7 @@ mandir = $(datarootdir)/man ######################################################################## IHDRS = lmdb.h -ILIBS = liblmdb.a liblmdb.so +ILIBS = liblmdb.a liblmdb$(SOEXT) IPROGS = mdb_stat mdb_copy mdb_dump mdb_load IDOCS = mdb_stat.1 mdb_copy.1 mdb_dump.1 mdb_load.1 PROGS = $(IPROGS) mtest mtest2 mtest3 mtest4 mtest5 @@ -63,7 +64,7 @@ test: all liblmdb.a: mdb.o midl.o $(AR) rs $@ mdb.o midl.o -liblmdb.so: mdb.lo midl.lo +liblmdb$(SOEXT): mdb.lo midl.lo # $(CC) $(LDFLAGS) -pthread -shared -Wl,-Bsymbolic -o $@ mdb.o midl.o $(SOLIBS) $(CC) $(LDFLAGS) -pthread -shared -o $@ mdb.lo midl.lo $(SOLIBS) diff --git a/contrib/db/liblmdb/intro.doc b/contrib/db/liblmdb/intro.doc index 870c7bb8..64dfcaad 100644 --- a/contrib/db/liblmdb/intro.doc +++ b/contrib/db/liblmdb/intro.doc @@ -1,5 +1,5 @@ /* - * Copyright 2015 Howard Chu, Symas Corp. + * Copyright 2015-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/contrib/db/liblmdb/lmdb.h b/contrib/db/liblmdb/lmdb.h index 0bca3eb7..2f55290d 100644 --- a/contrib/db/liblmdb/lmdb.h +++ b/contrib/db/liblmdb/lmdb.h @@ -53,15 +53,14 @@ * * Fix: Check for stale readers periodically, using the * #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. - * Stale writers will be cleared automatically on most systems: + * Stale writers will be cleared automatically on some systems: * - Windows - automatic - * - BSD, systems using SysV semaphores - automatic * - Linux, systems using POSIX mutexes with Robust option - automatic + * - not on BSD, systems using POSIX semaphores. * Otherwise just make all programs using the database close it; * the lockfile is always reset on first open of the environment. * - * - On BSD systems or others configured with MDB_USE_SYSV_SEM or - * MDB_USE_POSIX_SEM, + * - On BSD systems or others configured with MDB_USE_POSIX_SEM, * startup can fail due to semaphores owned by another userid. * * Fix: Open and close the database as the user which owns the @@ -97,11 +96,12 @@ * transactions. Each transaction belongs to one thread. See below. * The #MDB_NOTLS flag changes this for read-only transactions. * - * - Use an MDB_env* in the process which opened it, without fork()ing. + * - Use an MDB_env* in the process which opened it, not after fork(). * * - Do not have open an LMDB database twice in the same process at * the same time. Not even from a plain open() call - close()ing it - * breaks flock() advisory locking. + * breaks fcntl() advisory locking. (It is OK to reopen it after + * fork() - exec*(), since the lockfile has FD_CLOEXEC set.) * * - Avoid long-lived transactions. Read transactions prevent * reuse of pages freed by newer write transactions, thus the @@ -135,7 +135,7 @@ * * @author Howard Chu, Symas Corporation. * - * @copyright Copyright 2011-2016 Howard Chu, Symas Corp. All rights reserved. + * @copyright Copyright 2011-2019 Howard Chu, Symas Corp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted only as authorized by the OpenLDAP @@ -166,7 +166,6 @@ #define _LMDB_H_ #include -#include #ifdef __cplusplus extern "C" { @@ -179,13 +178,6 @@ typedef int mdb_mode_t; typedef mode_t mdb_mode_t; #endif -#ifdef MDB_VL32 -typedef uint64_t mdb_size_t; -#define mdb_env_create mdb_env_create_vl32 /**< Prevent mixing with non-VL32 builds */ -#else -typedef size_t mdb_size_t; -#endif - /** An abstraction for a file handle. * On POSIX systems file handles are small integers. On Windows * they're opaque pointers. @@ -208,7 +200,7 @@ typedef int mdb_filehandle_t; /** Library minor version */ #define MDB_VERSION_MINOR 9 /** Library patch version */ -#define MDB_VERSION_PATCH 70 +#define MDB_VERSION_PATCH 24 /** Combine args a,b,c into a single integer for easy version comparisons */ #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) @@ -218,7 +210,7 @@ typedef int mdb_filehandle_t; MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) /** The release date of this library version */ -#define MDB_VERSION_DATE "December 19, 2015" +#define MDB_VERSION_DATE "July 24, 2019" /** A stringifier for the version info */ #define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")" @@ -311,8 +303,6 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_NORDAHEAD 0x800000 /** don't initialize malloc'd memory before writing to datafile */ #define MDB_NOMEMINIT 0x1000000 - /** use the previous snapshot rather than the latest one */ -#define MDB_PREVSNAPSHOT 0x2000000 /** @} */ /** @defgroup mdb_dbi_open Database Flags @@ -380,7 +370,7 @@ typedef enum MDB_cursor_op { MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */ MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */ MDB_GET_CURRENT, /**< Return key/data at current cursor position */ - MDB_GET_MULTIPLE, /**< Return key and up to a page of duplicate data items + MDB_GET_MULTIPLE, /**< Return up to a page of duplicate data items from current cursor position. Move cursor to prepare for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ MDB_LAST, /**< Position at last key/data item */ @@ -389,7 +379,7 @@ typedef enum MDB_cursor_op { MDB_NEXT, /**< Position at next data item */ MDB_NEXT_DUP, /**< Position at next data item of current key. Only for #MDB_DUPSORT */ - MDB_NEXT_MULTIPLE, /**< Return key and up to a page of duplicate data items + MDB_NEXT_MULTIPLE, /**< Return up to a page of duplicate data items from next cursor position. Move cursor to prepare for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ MDB_NEXT_NODUP, /**< Position at first data item of next key */ @@ -400,7 +390,7 @@ typedef enum MDB_cursor_op { MDB_SET, /**< Position at specified key */ MDB_SET_KEY, /**< Position at specified key, return key + data */ MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */ - MDB_PREV_MULTIPLE /**< Position at previous page and return key and up to + MDB_PREV_MULTIPLE /**< Position at previous page and return up to a page of duplicate data items. Only for #MDB_DUPFIXED */ } MDB_cursor_op; @@ -467,18 +457,18 @@ typedef struct MDB_stat { unsigned int ms_psize; /**< Size of a database page. This is currently the same for all databases. */ unsigned int ms_depth; /**< Depth (height) of the B-tree */ - mdb_size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ - mdb_size_t ms_leaf_pages; /**< Number of leaf pages */ - mdb_size_t ms_overflow_pages; /**< Number of overflow pages */ - mdb_size_t ms_entries; /**< Number of data items */ + size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ + size_t ms_leaf_pages; /**< Number of leaf pages */ + size_t ms_overflow_pages; /**< Number of overflow pages */ + size_t ms_entries; /**< Number of data items */ } MDB_stat; /** @brief Information about the environment */ typedef struct MDB_envinfo { void *me_mapaddr; /**< Address of map, if fixed */ - mdb_size_t me_mapsize; /**< Size of the data memory map */ - mdb_size_t me_last_pgno; /**< ID of the last used page */ - mdb_size_t me_last_txnid; /**< ID of the last committed transaction */ + size_t me_mapsize; /**< Size of the data memory map */ + size_t me_last_pgno; /**< ID of the last used page */ + size_t me_last_txnid; /**< ID of the last committed transaction */ unsigned int me_maxreaders; /**< max reader slots in the environment */ unsigned int me_numreaders; /**< max reader slots used in the environment */ } MDB_envinfo; @@ -624,12 +614,6 @@ int mdb_env_create(MDB_env **env); * caller is expected to overwrite all of the memory that was * reserved in that case. * This flag may be changed at any time using #mdb_env_set_flags(). - *
  • #MDB_PREVSNAPSHOT - * Open the environment with the previous snapshot rather than the latest - * one. This loses the latest transaction, but may help work around some - * types of corruption. If opened with write access, this must be the - * only process using the environment. This flag is automatically reset - * after a write transaction is successfully committed. * * @param[in] mode The UNIX permissions to set on created files and semaphores. * This parameter is ignored on Windows. @@ -696,6 +680,7 @@ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); *
  • #MDB_CP_COMPACT - Perform compaction while copying: omit free * pages and sequentially renumber all pages in output. This option * consumes more CPU and runs more slowly than the default. + * Currently it fails if the environment has suffered a page leak. * * @return A non-zero error value on failure and 0 on success. */ @@ -810,6 +795,10 @@ int mdb_env_get_flags(MDB_env *env, unsigned int *flags); int mdb_env_get_path(MDB_env *env, const char **path); /** @brief Return the filedescriptor for the given environment. + * + * This function may be called after fork(), so the descriptor can be + * closed before exec*(). Other LMDB file descriptors have FD_CLOEXEC. + * (Until LMDB 0.9.18, only the lockfile had that.) * * @param[in] env An environment handle returned by #mdb_env_create() * @param[out] fd Address of a mdb_filehandle_t to contain the descriptor. @@ -853,7 +842,7 @@ int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd); * an active write transaction. * */ -int mdb_env_set_mapsize(MDB_env *env, mdb_size_t size); +int mdb_env_set_mapsize(MDB_env *env, size_t size); /** @brief Set the maximum number of threads/reader slots for the environment. * @@ -966,10 +955,6 @@ int mdb_env_set_assert(MDB_env *env, MDB_assert_func *func); *
      *
    • #MDB_RDONLY * This transaction will not perform any write operations. - *
    • #MDB_NOSYNC - * Don't flush system buffers to disk when committing this transaction. - *
    • #MDB_NOMETASYNC - * Flush system buffers but omit metadata flush when committing this transaction. *
    * @param[out] txn Address where the new #MDB_txn handle will be stored * @return A non-zero error value on failure and 0 on success. Some possible @@ -1002,7 +987,7 @@ MDB_env *mdb_txn_env(MDB_txn *txn); * @param[in] txn A transaction handle returned by #mdb_txn_begin() * @return A transaction ID, valid if input is an active transaction. */ -mdb_size_t mdb_txn_id(MDB_txn *txn); +size_t mdb_txn_id(MDB_txn *txn); /** @brief Commit all the operations of a transaction into the database. * @@ -1118,8 +1103,9 @@ int mdb_txn_renew(MDB_txn *txn); * This flag may only be used in combination with #MDB_DUPSORT. This option * tells the library that the data items for this database are all the same * size, which allows further optimizations in storage and retrieval. When - * all data items are the same size, the #MDB_GET_MULTIPLE and #MDB_NEXT_MULTIPLE - * cursor operations may be used to retrieve multiple items at once. + * all data items are the same size, the #MDB_GET_MULTIPLE, #MDB_NEXT_MULTIPLE + * and #MDB_PREV_MULTIPLE cursor operations may be used to retrieve multiple + * items at once. *
  • #MDB_INTEGERDUP * This option specifies that duplicate data items are binary integers, * similar to #MDB_INTEGERKEY keys. @@ -1524,6 +1510,10 @@ int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, /** @brief Delete current key/data pair * * This function deletes the key/data pair to which the cursor refers. + * This does not invalidate the cursor, so operations such as MDB_NEXT + * can still be used on it. + * Both MDB_NEXT and MDB_GET_CURRENT will return the same record after + * this operation. * @param[in] cursor A cursor handle returned by #mdb_cursor_open() * @param[in] flags Options for this operation. This parameter * must be set to 0 or one of the values described here. @@ -1552,7 +1542,7 @@ int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags); *
  • EINVAL - cursor is not initialized, or an invalid parameter was specified. * */ -int mdb_cursor_count(MDB_cursor *cursor, mdb_size_t *countp); +int mdb_cursor_count(MDB_cursor *cursor, size_t *countp); /** @brief Compare two data items according to a particular database. * diff --git a/contrib/db/liblmdb/mdb.c b/contrib/db/liblmdb/mdb.c index ca9f3b12..692feaa3 100644 --- a/contrib/db/liblmdb/mdb.c +++ b/contrib/db/liblmdb/mdb.c @@ -5,7 +5,7 @@ * BerkeleyDB API, but much simplified. */ /* - * Copyright 2011-2016 Howard Chu, Symas Corp. + * Copyright 2011-2019 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,50 +35,13 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif -#if defined(MDB_VL32) || defined(__WIN64__) +#if defined(__WIN64__) #define _FILE_OFFSET_BITS 64 #endif #ifdef _WIN32 #include #include -#include - -#ifndef _NTDEF_ -typedef _Return_type_success_(return >= 0) LONG NTSTATUS; -typedef NTSTATUS *PNTSTATUS; -#endif - -/** Visual studio big files support -*/ - -/* We use native NT APIs to setup the memory map, so that we can - * let the DB file grow incrementally instead of always preallocating - * the full size. These APIs are defined in and - * but those headers are meant for driver-level development and - * conflict with the regular user-level headers, so we explicitly - * declare them here. Using these APIs also means we must link to - * ntdll.dll, which is not linked by default in user code. - */ -NTSTATUS WINAPI -NtCreateSection(OUT PHANDLE sh, IN ACCESS_MASK acc, - IN void * oa OPTIONAL, - IN PLARGE_INTEGER ms OPTIONAL, - IN ULONG pp, IN ULONG aa, IN HANDLE fh OPTIONAL); - -typedef enum _SECTION_INHERIT { - ViewShare = 1, - ViewUnmap = 2 -} SECTION_INHERIT; - -NTSTATUS WINAPI -NtMapViewOfSection(IN PHANDLE sh, IN HANDLE ph, - IN OUT PVOID *addr, IN ULONG_PTR zbits, - IN SIZE_T cs, IN OUT PLARGE_INTEGER off OPTIONAL, - IN OUT PSIZE_T vs, IN SECTION_INHERIT ih, - IN ULONG at, IN ULONG pp); - -NTSTATUS WINAPI -NtClose(HANDLE h); +#include /* get wcscpy() */ /** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it * as int64 which is wrong. MSVC doesn't define it at all, so just @@ -130,13 +93,6 @@ extern int cacheflush(char *addr, int nbytes, int cache); #define BROKEN_FDATASYNC #endif -#ifdef _WIN32 -typedef int64_t off64_t; -#else -typedef off_t off64_t; -#endif - - #include #include #include @@ -153,10 +109,14 @@ typedef SSIZE_T ssize_t; #include #endif -#if defined(__sun) || defined(__ANDROID__) +#if defined(__sun) || defined(ANDROID) /* Most platforms have posix_memalign, older may only have memalign */ #define HAVE_MEMALIGN 1 #include +/* On Solaris, we need the POSIX sigwait function */ +#if defined (__sun) +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif #endif #if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER)) @@ -164,36 +124,25 @@ typedef SSIZE_T ssize_t; #include /* defines BYTE_ORDER on HPUX and Solaris */ #endif -#if defined(__APPLE__) || defined (BSD) -# if !(defined(MDB_USE_POSIX_MUTEX) || defined(MDB_USE_POSIX_SEM)) -# define MDB_USE_SYSV_SEM 1 -# endif +#if defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__) +# define MDB_USE_POSIX_SEM 1 # define MDB_FDATASYNC fsync -#elif defined(__ANDROID__) +#elif defined(ANDROID) # define MDB_FDATASYNC fsync #endif #ifndef _WIN32 #include +#include #ifdef MDB_USE_POSIX_SEM # define MDB_USE_HASH 1 #include -#elif defined(MDB_USE_SYSV_SEM) -#include -#include -#ifdef _SEM_SEMUN_UNDEFINED -union semun { - int val; - struct semid_ds *buf; - unsigned short *array; -}; -#endif /* _SEM_SEMUN_UNDEFINED */ #else #define MDB_USE_POSIX_MUTEX 1 -#endif /* MDB_USE_POSIX_SEM */ -#endif /* !_WIN32 */ +#endif +#endif -#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) + defined(MDB_USE_SYSV_SEM) \ +#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) \ + defined(MDB_USE_POSIX_MUTEX) != 1 # error "Ambiguous shared-lock implementation" #endif @@ -295,8 +244,6 @@ union semun { #define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10) #ifdef _WIN32 #define MDB_OWNERDEAD ((int) WAIT_ABANDONED) -#elif defined MDB_USE_SYSV_SEM -#define MDB_OWNERDEAD (MDB_LAST_ERRCODE + 11) #elif defined(MDB_USE_POSIX_MUTEX) && defined(EOWNERDEAD) #define MDB_OWNERDEAD EOWNERDEAD /**< #LOCK_MUTEX0() result if dead owner */ #endif @@ -307,28 +254,31 @@ union semun { /** Some platforms define the EOWNERDEAD error code * even though they don't support Robust Mutexes. * Compile with -DMDB_USE_ROBUST=0, or use some other - * mechanism like -DMDB_USE_SYSV_SEM instead of - * -DMDB_USE_POSIX_MUTEX. (SysV semaphores are - * also Robust, but some systems don't support them - * either.) + * mechanism like -DMDB_USE_POSIX_SEM instead of + * -DMDB_USE_POSIX_MUTEX. + * (Posix semaphores are not robust.) */ #ifndef MDB_USE_ROBUST /* Android currently lacks Robust Mutex support. So does glibc < 2.4. */ -# if defined(MDB_USE_POSIX_MUTEX) && (defined(__ANDROID__) || \ +# if defined(MDB_USE_POSIX_MUTEX) && (defined(ANDROID) || \ (defined(__GLIBC__) && GLIBC_VER < 0x020004)) # define MDB_USE_ROBUST 0 # else # define MDB_USE_ROBUST 1 +# endif +#endif /* !MDB_USE_ROBUST */ + +#if defined(MDB_USE_POSIX_MUTEX) && (MDB_USE_ROBUST) /* glibc < 2.12 only provided _np API */ -# if defined(__GLIBC__) && GLIBC_VER < 0x02000c +# if (defined(__GLIBC__) && GLIBC_VER < 0x02000c) || \ + (defined(PTHREAD_MUTEX_ROBUST_NP) && !defined(PTHREAD_MUTEX_ROBUST)) # define PTHREAD_MUTEX_ROBUST PTHREAD_MUTEX_ROBUST_NP # define pthread_mutexattr_setrobust(attr, flag) pthread_mutexattr_setrobust_np(attr, flag) # define pthread_mutex_consistent(mutex) pthread_mutex_consistent_np(mutex) # endif -# endif -#endif /* MDB_USE_ROBUST */ +#endif /* MDB_USE_POSIX_MUTEX && MDB_USE_ROBUST */ -#if defined(MDB_OWNERDEAD) && MDB_USE_ROBUST +#if defined(MDB_OWNERDEAD) && (MDB_USE_ROBUST) #define MDB_ROBUST_SUPPORTED 1 #endif @@ -351,8 +301,10 @@ typedef HANDLE mdb_mutex_t, mdb_mutexref_t; #define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE) #define pthread_cond_signal(x) SetEvent(*x) #define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0) -#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) -#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) +#define THREAD_CREATE(thr,start,arg) \ + (((thr) = CreateThread(NULL, 0, start, arg, 0, NULL)) ? 0 : ErrCode()) +#define THREAD_FINISH(thr) \ + (WaitForSingleObject(thr, INFINITE) ? ErrCode() : 0) #define LOCK_MUTEX0(mutex) WaitForSingleObject(mutex, INFINITE) #define UNLOCK_MUTEX(mutex) ReleaseMutex(mutex) #define mdb_mutex_consistent(mutex) 0 @@ -392,50 +344,16 @@ mdb_sem_wait(sem_t *sem) return rc; } -#elif defined MDB_USE_SYSV_SEM - -typedef struct mdb_mutex { - int semid; - int semnum; - int *locked; -} mdb_mutex_t[1], *mdb_mutexref_t; - -#define LOCK_MUTEX0(mutex) mdb_sem_wait(mutex) -#define UNLOCK_MUTEX(mutex) do { \ - struct sembuf sb = { 0, 1, SEM_UNDO }; \ - sb.sem_num = (mutex)->semnum; \ - *(mutex)->locked = 0; \ - semop((mutex)->semid, &sb, 1); \ -} while(0) - -static int -mdb_sem_wait(mdb_mutexref_t sem) -{ - int rc, *locked = sem->locked; - struct sembuf sb = { 0, -1, SEM_UNDO }; - sb.sem_num = sem->semnum; - do { - if (!semop(sem->semid, &sb, 1)) { - rc = *locked ? MDB_OWNERDEAD : MDB_SUCCESS; - *locked = 1; - break; - } - } while ((rc = errno) == EINTR); - return rc; -} - -#define mdb_mutex_consistent(mutex) 0 - #else /* MDB_USE_POSIX_MUTEX: */ - /** Shared mutex/semaphore as it is stored (mdb_mutex_t), and as - * local variables keep it (mdb_mutexref_t). + /** Shared mutex/semaphore as the original is stored. * - * An mdb_mutex_t can be assigned to an mdb_mutexref_t. They can - * be the same, or an array[size 1] and a pointer. - * @{ + * Not for copies. Instead it can be assigned to an #mdb_mutexref_t. + * When mdb_mutexref_t is a pointer and mdb_mutex_t is not, then it + * is array[size 1] so it can be assigned to the pointer. */ -typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; - /* @} */ +typedef pthread_mutex_t mdb_mutex_t[1]; + /** Reference to an #mdb_mutex_t */ +typedef pthread_mutex_t *mdb_mutexref_t; /** Lock the reader or writer mutex. * Returns 0 or a code to give #mdb_mutex_failed(), as in #LOCK_MUTEX(). */ @@ -446,7 +364,7 @@ typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; /** Mark mutex-protected data as repaired, after death of previous owner. */ #define mdb_mutex_consistent(mutex) pthread_mutex_consistent(mutex) -#endif /* MDB_USE_POSIX_SEM || MDB_USE_SYSV_SEM */ +#endif /* MDB_USE_POSIX_SEM */ /** Get the error code for the last failed system function. */ @@ -471,30 +389,12 @@ typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; #define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE)) #endif -#ifdef MDB_VL32 -#ifdef _WIN32 -#define Y "I64" -#else -#define Y "ll" -#endif -#else -#define Y Z -#endif - #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) #define MNAME_LEN 32 -#elif defined(MDB_USE_SYSV_SEM) -#define MNAME_LEN (sizeof(int)) #else #define MNAME_LEN (sizeof(pthread_mutex_t)) #endif -#ifdef MDB_USE_SYSV_SEM -#define SYSV_SEM_FLAG 1 /**< SysV sems in lockfile format */ -#else -#define SYSV_SEM_FLAG 0 -#endif - /** @} */ #ifdef MDB_ROBUST_SUPPORTED @@ -636,7 +536,7 @@ static txnid_t mdb_debug_start; /** The version number for a database's datafile format. */ #define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1) /** The version number for a database's lockfile format. */ -#define MDB_LOCK_VERSION ((MDB_DEVEL) ? 999 : 1) +#define MDB_LOCK_VERSION 1 /** @brief The max size of a key we can write, or 0 for computed max. * @@ -825,6 +725,14 @@ typedef struct MDB_txbody { uint32_t mtb_magic; /** Format of this lock file. Must be set to #MDB_LOCK_FORMAT. */ uint32_t mtb_format; +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) + char mtb_rmname[MNAME_LEN]; +#else + /** Mutex protecting access to this table. + * This is the reader table lock used with LOCK_MUTEX(). + */ + mdb_mutex_t mtb_rmutex; +#endif /** The ID of the last transaction committed to the database. * This is recorded here only for convenience; the value can always * be determined by reading the main database meta pages. @@ -835,17 +743,6 @@ typedef struct MDB_txbody { * when readers release their slots. */ volatile unsigned mtb_numreaders; -#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) - char mtb_rmname[MNAME_LEN]; -#elif defined(MDB_USE_SYSV_SEM) - int mtb_semid; - int mtb_rlocked; -#else - /** Mutex protecting access to this table. - * This is the reader table lock used with LOCK_MUTEX(). - */ - mdb_mutex_t mtb_rmutex; -#endif } MDB_txbody; /** The actual reader table definition. */ @@ -858,19 +755,12 @@ typedef struct MDB_txninfo { #define mti_rmname mt1.mtb.mtb_rmname #define mti_txnid mt1.mtb.mtb_txnid #define mti_numreaders mt1.mtb.mtb_numreaders -#ifdef MDB_USE_SYSV_SEM -#define mti_semid mt1.mtb.mtb_semid -#define mti_rlocked mt1.mtb.mtb_rlocked -#endif char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)]; } mt1; union { #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) char mt2_wmname[MNAME_LEN]; #define mti_wmname mt2.mt2_wmname -#elif defined MDB_USE_SYSV_SEM - int mt2_wlocked; -#define mti_wlocked mt2.mt2_wlocked #else mdb_mutex_t mt2_wmutex; #define mti_wmutex mt2.mt2_wmutex @@ -885,13 +775,26 @@ typedef struct MDB_txninfo { ((uint32_t) \ ((MDB_LOCK_VERSION) \ /* Flags which describe functionality */ \ - + (SYSV_SEM_FLAG << 18) \ + (((MDB_PIDLOCK) != 0) << 16))) /** @} */ -/** Common header for all page types. - * Overflow records occupy a number of contiguous pages with no - * headers on any page after the first. +/** Common header for all page types. The page type depends on #mp_flags. + * + * #P_BRANCH and #P_LEAF pages have unsorted '#MDB_node's at the end, with + * sorted #mp_ptrs[] entries referring to them. Exception: #P_LEAF2 pages + * omit mp_ptrs and pack sorted #MDB_DUPFIXED values after the page header. + * + * #P_OVERFLOW records occupy one or more contiguous pages where only the + * first has a page header. They hold the real data of #F_BIGDATA nodes. + * + * #P_SUBP sub-pages are small leaf "pages" with duplicate data. + * A node with flag #F_DUPDATA but not #F_SUBDATA contains a sub-page. + * (Duplicate data can also go in sub-databases, which use normal pages.) + * + * #P_META pages contain #MDB_meta, the start point of an LMDB snapshot. + * + * Each non-metapage up to #MDB_meta.%mm_last_pg is reachable exactly once + * in the snapshot: Either used by a database or listed in a freeDB record. */ typedef struct MDB_page { #define mp_pgno mp_p.p_pgno @@ -900,7 +803,7 @@ typedef struct MDB_page { pgno_t p_pgno; /**< page number */ struct MDB_page *p_next; /**< for in-memory list of freed pages */ } mp_p; - uint16_t mp_pad; + uint16_t mp_pad; /**< key size if this is a LEAF2 page */ /** @defgroup mdb_page Page Flags * @ingroup internal * Flags for the page headers. @@ -967,25 +870,34 @@ typedef struct MDB_page { /** The number of overflow pages needed to store the given size. */ #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) - /** Link in #MDB_txn.%mt_loose_pgs list */ + /** Link in #MDB_txn.%mt_loose_pgs list. + * Kept outside the page header, which is needed when reusing the page. + */ #define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2)) /** Header for a single key/data pair within a page. * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2. * We guarantee 2-byte alignment for 'MDB_node's. + * + * #mn_lo and #mn_hi are used for data size on leaf nodes, and for child + * pgno on branch nodes. On 64 bit platforms, #mn_flags is also used + * for pgno. (Branch nodes have no flags). Lo and hi are in host byte + * order in case some accesses can be optimized to 32-bit word access. + * + * Leaf node flags describe node contents. #F_BIGDATA says the node's + * data part is the page number of an overflow page with actual data. + * #F_DUPDATA and #F_SUBDATA can be combined giving duplicate data in + * a sub-page/sub-database, and named databases (just #F_SUBDATA). */ typedef struct MDB_node { - /** lo and hi are used for data size on leaf nodes and for - * child pgno on branch nodes. On 64 bit platforms, flags - * is also used for pgno. (Branch nodes have no flags). - * They are in host byte order in case that lets some - * accesses be optimized into a 32-bit word access. - */ + /** part of data size or pgno + * @{ */ #if BYTE_ORDER == LITTLE_ENDIAN - unsigned short mn_lo, mn_hi; /**< part of data size or pgno */ + unsigned short mn_lo, mn_hi; #else unsigned short mn_hi, mn_lo; #endif + /** @} */ /** @defgroup mdb_node Node Flags * @ingroup internal * Flags for node headers. @@ -1091,13 +1003,13 @@ typedef struct MDB_db { pgno_t md_branch_pages; /**< number of internal pages */ pgno_t md_leaf_pages; /**< number of leaf pages */ pgno_t md_overflow_pages; /**< number of overflow pages */ - mdb_size_t md_entries; /**< number of data items */ + size_t md_entries; /**< number of data items */ pgno_t md_root; /**< the root page of this tree */ } MDB_db; - /** mdb_dbi_open flags */ #define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */ #define PERSISTENT_FLAGS (0xffff & ~(MDB_VALID)) + /** #mdb_dbi_open() flags */ #define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\ MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE) @@ -1121,22 +1033,17 @@ typedef struct MDB_meta { uint32_t mm_magic; /** Version number of this file. Must be set to #MDB_DATA_VERSION. */ uint32_t mm_version; -#ifdef MDB_VL32 - union { /* always zero since we don't support fixed mapping in MDB_VL32 */ - MDB_ID mmun_ull; - void *mmun_address; - } mm_un; -#define mm_address mm_un.mmun_address -#else void *mm_address; /**< address for fixed mapping */ -#endif - pgno_t mm_mapsize; /**< size of mmap region */ + size_t mm_mapsize; /**< size of mmap region */ MDB_db mm_dbs[CORE_DBS]; /**< first is free space, 2nd is main db */ /** The size of pages used in this DB */ #define mm_psize mm_dbs[FREE_DBI].md_pad /** Any persistent environment flags. @ref mdb_env */ #define mm_flags mm_dbs[FREE_DBI].md_flags - pgno_t mm_last_pg; /**< last used page in file */ + /** Last used page in the datafile. + * Actually the file may be shorter if the freeDB lists the final pages. + */ + pgno_t mm_last_pg; volatile txnid_t mm_txnid; /**< txnid that committed this page */ } MDB_meta; @@ -1173,9 +1080,6 @@ struct MDB_txn { /** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */ MDB_txn *mt_child; pgno_t mt_next_pgno; /**< next unallocated page */ -#ifdef MDB_VL32 - pgno_t mt_last_pgno; /**< last written page */ -#endif /** The ID of this transaction. IDs are integers incrementing from 1. * Only committed write transactions increment the ID. If a transaction * aborts, the ID may be re-used by the next writer. @@ -1189,7 +1093,7 @@ struct MDB_txn { * in this transaction, linked through #NEXT_LOOSE_PAGE(page). */ MDB_page *mt_loose_pgs; - /* #Number of loose pages (#mt_loose_pgs) */ + /** Number of loose pages (#mt_loose_pgs) */ int mt_loose_count; /** The sorted list of dirty pages we temporarily wrote to disk * because the dirty list was full. page numbers in here are @@ -1212,29 +1116,17 @@ struct MDB_txn { * @ingroup internal * @{ */ -#define DB_DIRTY 0x01 /**< DB was modified or is DUPSORT data */ +#define DB_DIRTY 0x01 /**< DB was written in this txn */ #define DB_STALE 0x02 /**< Named-DB record is older than txnID */ #define DB_NEW 0x04 /**< Named-DB handle opened in this txn */ #define DB_VALID 0x08 /**< DB handle is valid, see also #MDB_VALID */ #define DB_USRVALID 0x10 /**< As #DB_VALID, but not set for #FREE_DBI */ +#define DB_DUPDATA 0x20 /**< DB is #MDB_DUPSORT data */ /** @} */ /** In write txns, array of cursors for each DB */ MDB_cursor **mt_cursors; /** Array of flags for each DB */ unsigned char *mt_dbflags; -#ifdef MDB_VL32 - /** List of read-only pages (actually chunks) */ - MDB_ID3L mt_rpages; - /** We map chunks of 16 pages. Even though Windows uses 4KB pages, all - * mappings must begin on 64KB boundaries. So we round off all pgnos to - * a chunk boundary. We do the same on Linux for symmetry, and also to - * reduce the frequency of mmap/munmap calls. - */ -#define MDB_RPAGE_CHUNK 16 -#define MDB_TRPAGE_SIZE 4096 /**< size of #mt_rpages array of chunks */ -#define MDB_TRPAGE_MAX (MDB_TRPAGE_SIZE-1) /**< maximum chunk index */ - unsigned int mt_rpcheck; /**< threshold for reclaiming unref'd chunks */ -#endif /** Number of DB records in use, or 0 when the txn is finished. * This number only ever increments until the txn finishes; we * don't decrement it when individual DB handles are closed. @@ -1246,9 +1138,7 @@ struct MDB_txn { * @{ */ /** #mdb_txn_begin() flags */ -#define MDB_TXN_BEGIN_FLAGS (MDB_NOMETASYNC|MDB_NOSYNC|MDB_RDONLY) -#define MDB_TXN_NOMETASYNC MDB_NOMETASYNC /**< don't sync meta for this txn on commit */ -#define MDB_TXN_NOSYNC MDB_NOSYNC /**< don't sync this txn on commit */ +#define MDB_TXN_BEGIN_FLAGS MDB_RDONLY #define MDB_TXN_RDONLY MDB_RDONLY /**< read-only transaction */ /* internal txn flags */ #define MDB_TXN_WRITEMAP MDB_WRITEMAP /**< copy of #MDB_env flag in writers */ @@ -1314,19 +1204,10 @@ struct MDB_cursor { #define C_SUB 0x04 /**< Cursor is a sub-cursor */ #define C_DEL 0x08 /**< last op was a cursor_del */ #define C_UNTRACK 0x40 /**< Un-track cursor when closing */ -#define C_WRITEMAP MDB_TXN_WRITEMAP /**< Copy of txn flag */ -/** Read-only cursor into the txn's original snapshot in the map. - * Set for read-only txns, and in #mdb_page_alloc() for #FREE_DBI when - * #MDB_DEVEL & 2. Only implements code which is necessary for this. - */ -#define C_ORIG_RDONLY MDB_TXN_RDONLY /** @} */ unsigned int mc_flags; /**< @ref mdb_cursor */ MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */ indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */ -#ifdef MDB_VL32 - MDB_page *mc_ovpg; /**< a referenced overflow page */ -#endif }; /** Context for sorted-dup records. @@ -1345,6 +1226,23 @@ typedef struct MDB_xcursor { unsigned char mx_dbflag; } MDB_xcursor; + /** Check if there is an inited xcursor */ +#define XCURSOR_INITED(mc) \ + ((mc)->mc_xcursor && ((mc)->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) + + /** Update the xcursor's sub-page pointer, if any, in \b mc. Needed + * when the node which contains the sub-page may have moved. Called + * with leaf page \b mp = mc->mc_pg[\b top]. + */ +#define XCURSOR_REFRESH(mc, top, mp) do { \ + MDB_page *xr_pg = (mp); \ + MDB_node *xr_node; \ + if (!XCURSOR_INITED(mc) || (mc)->mc_ki[top] >= NUMKEYS(xr_pg)) break; \ + xr_node = NODEPTR(xr_pg, (mc)->mc_ki[top]); \ + if ((xr_node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) \ + (mc)->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(xr_node); \ +} while (0) + /** State of FreeDB old pages, stored in the MDB_env */ typedef struct MDB_pgstate { pgno_t *mf_pghead; /**< Reclaimed freeDB pages, or NULL before use */ @@ -1355,10 +1253,7 @@ typedef struct MDB_pgstate { struct MDB_env { HANDLE me_fd; /**< The main data file */ HANDLE me_lfd; /**< The lock file */ - HANDLE me_mfd; /**< just for writing the meta pages */ -#if defined(MDB_VL32) && defined(_WIN32) - HANDLE me_fmh; /**< File Mapping handle */ -#endif + HANDLE me_mfd; /**< For writing and syncing the meta pages */ /** Failed to update the meta page. Probably an I/O error. */ #define MDB_FATAL_ERROR 0x80000000U /** Some fields are initialized. */ @@ -1383,8 +1278,8 @@ struct MDB_env { void *me_pbuf; /**< scratch area for DUPSORT put() */ MDB_txn *me_txn; /**< current write transaction */ MDB_txn *me_txn0; /**< prealloc'd write transaction */ - mdb_size_t me_mapsize; /**< size of the data memory map */ - off64_t me_size; /**< current file size */ + size_t me_mapsize; /**< size of the data memory map */ + off_t me_size; /**< current file size */ pgno_t me_maxpg; /**< me_mapsize / me_psize */ MDB_dbx *me_dbxs; /**< array of static DB info */ uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */ @@ -1416,13 +1311,6 @@ struct MDB_env { #else mdb_mutex_t me_rmutex; mdb_mutex_t me_wmutex; -#endif -#ifdef MDB_VL32 - MDB_ID3L me_rpages; /**< like #mt_rpages, but global to env */ - pthread_mutex_t me_rpmutex; /**< control access to #me_rpages */ -#define MDB_ERPAGE_SIZE 16384 -#define MDB_ERPAGE_MAX (MDB_ERPAGE_SIZE-1) - unsigned int me_rpcheck; #endif void *me_userctx; /**< User-settable context */ MDB_assert_func *me_assert_func; /**< Callback for assertion failures */ @@ -1484,7 +1372,7 @@ static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst); static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, unsigned int nflags); -static int mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta); +static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); static MDB_meta *mdb_env_pick_meta(const MDB_env *env); static int mdb_env_write_meta(MDB_txn *txn); #ifdef MDB_USE_POSIX_MUTEX /* Drop unused excl arg */ @@ -1543,7 +1431,8 @@ static SECURITY_DESCRIPTOR mdb_null_sd; static SECURITY_ATTRIBUTES mdb_all_sa; static int mdb_sec_inited; -static int utf8_to_utf16(const char *src, int srcsize, wchar_t **dst, int *dstsize); +struct MDB_name; +static int utf8_to_utf16(const char *src, struct MDB_name *dst, int xtra); #endif /** Return the library version info. */ @@ -1722,20 +1611,20 @@ mdb_page_list(MDB_page *mp) case P_LEAF|P_LEAF2: type = "LEAF2 page"; break; case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break; case P_OVERFLOW: - fprintf(stderr, "Overflow page %"Y"u pages %u%s\n", + fprintf(stderr, "Overflow page %"Z"u pages %u%s\n", pgno, mp->mp_pages, state); return; case P_META: - fprintf(stderr, "Meta-page %"Y"u txnid %"Y"u\n", + fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n", pgno, ((MDB_meta *)METADATA(mp))->mm_txnid); return; default: - fprintf(stderr, "Bad page %"Y"u flags 0x%u\n", pgno, mp->mp_flags); + fprintf(stderr, "Bad page %"Z"u flags 0x%X\n", pgno, mp->mp_flags); return; } nkeys = NUMKEYS(mp); - fprintf(stderr, "%s %"Y"u numkeys %d%s\n", type, pgno, nkeys, state); + fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state); for (i=0; imn_data; nsize = NODESIZE + key.mv_size; if (IS_BRANCH(mp)) { - fprintf(stderr, "key %d: page %"Y"u, %s\n", i, NODEPGNO(node), + fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node), DKEY(&key)); total += nsize; } else { @@ -1785,7 +1674,7 @@ mdb_cursor_chk(MDB_cursor *mc) } if (mc->mc_ki[i] >= NUMKEYS(mc->mc_pg[i])) printf("ack!\n"); - if (mc->mc_xcursor && (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { + if (XCURSOR_INITED(mc)) { node = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) && mc->mc_xcursor->mx_cursor.mc_pg[0] != NODEDATA(node)) { @@ -1846,7 +1735,7 @@ static void mdb_audit(MDB_txn *txn) } } if (freecount + count + NUM_METAS != txn->mt_next_pgno) { - fprintf(stderr, "audit: %"Y"u freecount: %"Y"u count: %"Y"u total: %"Y"u next_pgno: %"Y"u\n", + fprintf(stderr, "audit: %"Z"u freecount: %"Z"u count: %"Z"u total: %"Z"u next_pgno: %"Z"u\n", txn->mt_txnid, freecount, count+NUM_METAS, freecount+count+NUM_METAS, txn->mt_next_pgno); } @@ -1863,8 +1752,8 @@ int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) { MDB_cmp_func *dcmp = txn->mt_dbxs[dbi].md_dcmp; -#if UINT_MAX < SIZE_MAX || defined(MDB_VL32) - if (dcmp == mdb_cmp_int && a->mv_size == sizeof(mdb_size_t)) +#if UINT_MAX < SIZE_MAX + if (dcmp == mdb_cmp_int && a->mv_size == sizeof(size_t)) dcmp = mdb_cmp_clong; #endif return dcmp(a, b); @@ -1872,6 +1761,7 @@ mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) /** Allocate memory for a page. * Re-use old malloc'd pages first for singletons, otherwise just malloc. + * Set #MDB_TXN_ERROR on failure. */ static MDB_page * mdb_page_malloc(MDB_txn *txn, unsigned num) @@ -1946,47 +1836,6 @@ mdb_dlist_free(MDB_txn *txn) dl[0].mid = 0; } -#ifdef MDB_VL32 -static void -mdb_page_unref(MDB_txn *txn, MDB_page *mp) -{ - pgno_t pgno; - MDB_ID3L tl = txn->mt_rpages; - unsigned x, rem; - if (mp->mp_flags & (P_SUBP|P_DIRTY)) - return; - rem = mp->mp_pgno & (MDB_RPAGE_CHUNK-1); - pgno = mp->mp_pgno ^ rem; - x = mdb_mid3l_search(tl, pgno); - if (x != tl[0].mid && tl[x+1].mid == mp->mp_pgno) - x++; - if (tl[x].mref) - tl[x].mref--; -} -#define MDB_PAGE_UNREF(txn, mp) mdb_page_unref(txn, mp) - -static void -mdb_cursor_unref(MDB_cursor *mc) -{ - int i; - if (mc->mc_txn->mt_rpages[0].mid) { - if (!mc->mc_snum || !mc->mc_pg[0] || IS_SUBP(mc->mc_pg[0])) - return; - for (i=0; imc_snum; i++) - mdb_page_unref(mc->mc_txn, mc->mc_pg[i]); - if (mc->mc_ovpg) { - mdb_page_unref(mc->mc_txn, mc->mc_ovpg); - mc->mc_ovpg = 0; - } - } - mc->mc_snum = mc->mc_top = 0; - mc->mc_pg[0] = NULL; - mc->mc_flags &= ~C_INITIALIZED; -} -#else -#define MDB_PAGE_UNREF(txn, mp) -#endif /* MDB_VL32 */ - /** Loosen or free a single page. * Saves single pages to a list for future reuse * in this same txn. It has been pulled from the freeDB @@ -2028,7 +1877,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp) } } if (loose) { - DPRINTF(("loosen db %d page %"Y"u", DDBI(mc), + DPRINTF(("loosen db %d page %"Z"u", DDBI(mc), mp->mp_pgno)); NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs; txn->mt_loose_pgs = mp; @@ -2280,15 +2129,13 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp) } /** Allocate page numbers and memory for writing. Maintain me_pglast, - * me_pghead and mt_next_pgno. + * me_pghead and mt_next_pgno. Set #MDB_TXN_ERROR on failure. * * If there are free pages available from older transactions, they * are re-used first. Otherwise allocate a new page at mt_next_pgno. * Do not modify the freedB, just merge freeDB records into me_pghead[] * and move me_pglast to say which records were consumed. Only this * function can create me_pghead and move me_pglast/mt_next_pgno. - * When #MDB_DEVEL & 2, it is not affected by #mdb_freelist_save(): it - * then uses the transaction's original snapshot of the freeDB. * @param[in] mc cursor A cursor handle identifying the transaction and * database for which we are allocating. * @param[in] num the number of pages to allocate. @@ -2326,7 +2173,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) np = txn->mt_loose_pgs; txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np); txn->mt_loose_count--; - DPRINTF(("db %d use loose page %"Y"u", DDBI(mc), + DPRINTF(("db %d use loose page %"Z"u", DDBI(mc), np->mp_pgno)); *mp = np; return MDB_SUCCESS; @@ -2364,14 +2211,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) last = env->me_pglast; oldest = env->me_pgoldest; mdb_cursor_init(&m2, txn, FREE_DBI, NULL); -#if (MDB_DEVEL) & 2 /* "& 2" so MDB_DEVEL=1 won't hide bugs breaking freeDB */ - /* Use original snapshot. TODO: Should need less care in code - * which modifies the database. Maybe we can delete some code? - */ - m2.mc_flags |= C_ORIG_RDONLY; - m2.mc_db = &env->me_metas[(txn->mt_txnid-1) & 1]->mm_dbs[FREE_DBI]; - m2.mc_dbflag = (unsigned char *)""; /* probably unnecessary */ -#endif if (last) { op = MDB_SET_RANGE; key.mv_data = &last; /* will look up last+1 */ @@ -2413,7 +2252,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) np = m2.mc_pg[m2.mc_top]; leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); if ((rc = mdb_node_read(&m2, leaf, &data)) != MDB_SUCCESS) - return rc; + goto fail; idl = (MDB_ID *) data.mv_data; i = idl[0]; @@ -2429,10 +2268,10 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) } env->me_pglast = last; #if (MDB_DEBUG) > 1 - DPRINTF(("IDL read txn %"Y"u root %"Y"u num %u", + DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u", last, txn->mt_dbs[FREE_DBI].md_root, i)); for (j = i; j; j--) - DPRINTF(("IDL %"Y"u", idl[j])); + DPRINTF(("IDL %"Z"u", idl[j])); #endif /* Merge in descending sorted order */ mdb_midl_xmerge(mop, idl); @@ -2447,20 +2286,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) rc = MDB_MAP_FULL; goto fail; } -#if defined(_WIN32) && !defined(MDB_VL32) - if (!(env->me_flags & MDB_RDONLY)) { - void *p; - p = (MDB_page *)(env->me_map + env->me_psize * pgno); - p = VirtualAlloc(p, env->me_psize * num, MEM_COMMIT, - (env->me_flags & MDB_WRITEMAP) ? PAGE_READWRITE: - PAGE_READONLY); - if (!p) { - DPUTS("VirtualAlloc failed"); - rc = ErrCode(); - goto fail; - } - } -#endif search_done: if (env->me_flags & MDB_WRITEMAP) { @@ -2577,6 +2402,7 @@ mdb_page_unspill(MDB_txn *txn, MDB_page *mp, MDB_page **ret) } /** Touch a page: make it dirty and re-insert into tree with updated pgno. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc cursor pointing to the page to be touched * @return 0 on success, non-zero on failure. */ @@ -2602,7 +2428,7 @@ mdb_page_touch(MDB_cursor *mc) (rc = mdb_page_alloc(mc, 1, &np))) goto fail; pgno = np->mp_pgno; - DPRINTF(("touched db %d page %"Y"u -> %"Y"u", DDBI(mc), + DPRINTF(("touched db %d page %"Z"u -> %"Z"u", DDBI(mc), mp->mp_pgno, pgno)); mdb_cassert(mc, mp->mp_pgno != pgno); mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); @@ -2665,18 +2491,11 @@ done: if (m2 == mc) continue; if (m2->mc_pg[mc->mc_top] == mp) { m2->mc_pg[mc->mc_top] = np; - if ((mc->mc_db->md_flags & MDB_DUPSORT) && - IS_LEAF(np) && - (m2->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) - { - MDB_node *leaf = NODEPTR(np, m2->mc_ki[mc->mc_top]); - if ((leaf->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); - } + if (IS_LEAF(np)) + XCURSOR_REFRESH(m2, mc->mc_top, np); } } } - MDB_PAGE_UNREF(mc->mc_txn, mp); return 0; fail: @@ -2685,7 +2504,7 @@ fail: } int -mdb_env_sync0(MDB_env *env, int force, pgno_t numpgs) +mdb_env_sync(MDB_env *env, int force) { int rc = 0; if (env->me_flags & MDB_RDONLY) @@ -2694,7 +2513,7 @@ mdb_env_sync0(MDB_env *env, int force, pgno_t numpgs) if (env->me_flags & MDB_WRITEMAP) { int flags = ((env->me_flags & MDB_MAPASYNC) && !force) ? MS_ASYNC : MS_SYNC; - if (MDB_MSYNC(env->me_map, env->me_psize * numpgs, flags)) + if (MDB_MSYNC(env->me_map, env->me_mapsize, flags)) rc = ErrCode(); #ifdef _WIN32 else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd)) @@ -2714,13 +2533,6 @@ mdb_env_sync0(MDB_env *env, int force, pgno_t numpgs) return rc; } -int -mdb_env_sync(MDB_env *env, int force) -{ - MDB_meta *m = mdb_env_pick_meta(env); - return mdb_env_sync0(env, force, m->mm_last_pg+1); -} - /** Back up parent txn's cursors, then grab the originals for tracking */ static int mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst) @@ -2965,9 +2777,6 @@ mdb_txn_renew0(MDB_txn *txn) /* Moved to here to avoid a data race in read TXNs */ txn->mt_next_pgno = meta->mm_last_pg+1; -#ifdef MDB_VL32 - txn->mt_last_pgno = txn->mt_next_pgno - 1; -#endif txn->mt_flags = flags; @@ -3003,7 +2812,7 @@ mdb_txn_renew(MDB_txn *txn) rc = mdb_txn_renew0(txn); if (rc == MDB_SUCCESS) { - DPRINTF(("renew txn %"Y"u%c %p on mdbenv %p, root page %"Y"u", + DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root)); } @@ -3046,17 +2855,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) DPRINTF(("calloc: %s", strerror(errno))); return ENOMEM; } -#ifdef MDB_VL32 - if (!parent) { - txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3)); - if (!txn->mt_rpages) { - free(txn); - return ENOMEM; - } - txn->mt_rpages[0].mid = 0; - txn->mt_rpcheck = MDB_TRPAGE_SIZE/2; - } -#endif txn->mt_dbxs = env->me_dbxs; /* static */ txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs; @@ -3084,9 +2882,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) parent->mt_child = txn; txn->mt_parent = parent; txn->mt_numdbs = parent->mt_numdbs; -#ifdef MDB_VL32 - txn->mt_rpages = parent->mt_rpages; -#endif memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); /* Copy parent's mt_dbflags, but clear DB_NEW */ for (i=0; imt_numdbs; i++) @@ -3112,16 +2907,12 @@ renew: rc = mdb_txn_renew0(txn); } if (rc) { - if (txn != env->me_txn0) { -#ifdef MDB_VL32 - free(txn->mt_rpages); -#endif + if (txn != env->me_txn0) free(txn); - } } else { txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */ *ret = txn; - DPRINTF(("begin txn %"Y"u%c %p on mdbenv %p, root page %"Y"u", + DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", txn->mt_txnid, (flags & MDB_RDONLY) ? 'r' : 'w', (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root)); } @@ -3136,7 +2927,7 @@ mdb_txn_env(MDB_txn *txn) return txn->mt_env; } -mdb_size_t +size_t mdb_txn_id(MDB_txn *txn) { if(!txn) return 0; @@ -3188,7 +2979,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) /* Export or close DBI handles opened in this txn */ mdb_dbis_update(txn, mode & MDB_END_UPDATE); - DPRINTF(("%s txn %"Y"u%c %p on mdbenv %p, root page %"Y"u", + DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", names[mode & MDB_END_OPMASK], txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root)); @@ -3242,31 +3033,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) mdb_midl_free(pghead); } -#ifdef MDB_VL32 - if (!txn->mt_parent) { - MDB_ID3L el = env->me_rpages, tl = txn->mt_rpages; - unsigned i, x, n = tl[0].mid; - pthread_mutex_lock(&env->me_rpmutex); - for (i = 1; i <= n; i++) { - if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) { - /* tmp overflow pages that we didn't share in env */ - munmap(tl[i].mptr, tl[i].mcnt * env->me_psize); - } else { - x = mdb_mid3l_search(el, tl[i].mid); - if (tl[i].mptr == el[x].mptr) { - el[x].mref--; - } else { - /* another tmp overflow page */ - munmap(tl[i].mptr, tl[i].mcnt * env->me_psize); - } - } - } - pthread_mutex_unlock(&env->me_rpmutex); - tl[0].mid = 0; - if (mode & MDB_END_FREE) - free(tl); - } -#endif + if (mode & MDB_END_FREE) free(txn); } @@ -3298,9 +3065,6 @@ mdb_txn_abort(MDB_txn *txn) /** Save the freelist as of this transaction to the freeDB. * This changes the freelist. Keep trying until it stabilizes. - * - * When (MDB_DEVEL) & 2, the changes do not affect #mdb_page_alloc(), - * it then uses the transaction's original snapshot of the freeDB. */ static int mdb_freelist_save(MDB_txn *txn) @@ -3330,10 +3094,41 @@ mdb_freelist_save(MDB_txn *txn) * we may be unable to return them to me_pghead. */ MDB_page *mp = txn->mt_loose_pgs; + MDB_ID2 *dl = txn->mt_u.dirty_list; + unsigned x; if ((rc = mdb_midl_need(&txn->mt_free_pgs, txn->mt_loose_count)) != 0) return rc; - for (; mp; mp = NEXT_LOOSE_PAGE(mp)) + for (; mp; mp = NEXT_LOOSE_PAGE(mp)) { mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); + /* must also remove from dirty list */ + if (txn->mt_flags & MDB_TXN_WRITEMAP) { + for (x=1; x<=dl[0].mid; x++) + if (dl[x].mid == mp->mp_pgno) + break; + mdb_tassert(txn, x <= dl[0].mid); + } else { + x = mdb_mid2l_search(dl, mp->mp_pgno); + mdb_tassert(txn, dl[x].mid == mp->mp_pgno); + mdb_dpage_free(env, mp); + } + dl[x].mptr = NULL; + } + { + /* squash freed slots out of the dirty list */ + unsigned y; + for (y=1; dl[y].mptr && y <= dl[0].mid; y++); + if (y <= dl[0].mid) { + for(x=y, y++;;) { + while (!dl[y].mptr && y <= dl[0].mid) y++; + if (y > dl[0].mid) break; + dl[x++] = dl[y++]; + } + dl[0].mid = x-1; + } else { + /* all slots freed */ + dl[0].mid = 0; + } + } txn->mt_loose_pgs = NULL; txn->mt_loose_count = 0; } @@ -3389,10 +3184,10 @@ mdb_freelist_save(MDB_txn *txn) #if (MDB_DEBUG) > 1 { unsigned int i = free_pgs[0]; - DPRINTF(("IDL write txn %"Y"u root %"Y"u num %u", + DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u", txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i)); for (; i; i--) - DPRINTF(("IDL %"Y"u", free_pgs[i])); + DPRINTF(("IDL %"Z"u", free_pgs[i])); } #endif continue; @@ -3503,16 +3298,15 @@ mdb_page_flush(MDB_txn *txn, int keep) MDB_ID2L dl = txn->mt_u.dirty_list; unsigned psize = env->me_psize, j; int i, pagecount = dl[0].mid, rc; - size_t size = 0; - off64_t pos = 0; + size_t size = 0, pos = 0; pgno_t pgno = 0; MDB_page *dp = NULL; #ifdef _WIN32 OVERLAPPED ov; #else struct iovec iov[MDB_COMMIT_PAGES]; - ssize_t wsize = 0, wres; - off64_t wpos = 0, next_pos = 1; /* impossible pos, so pos != next_pos */ + ssize_t wpos = 0, wsize = 0, wres; + size_t next_pos = 1; /* impossible pos, so pos != next_pos */ int n = 0; #endif @@ -3611,7 +3405,7 @@ retry_seek: wpos = pos; wsize = 0; } - DPRINTF(("committing page %"Y"u", pgno)); + DPRINTF(("committing page %"Z"u", pgno)); next_pos = pos + size; iov[n].iov_len = size; iov[n].iov_base = (char *)dp; @@ -3619,10 +3413,6 @@ retry_seek: n++; #endif /* _WIN32 */ } -#ifdef MDB_VL32 - if (pgno > txn->mt_last_pgno) - txn->mt_last_pgno = pgno; -#endif /* MIPS has cache coherency issues, this is a no-op everywhere else * Note: for any size >= on-chip cache size, entire on-chip cache is @@ -3648,8 +3438,6 @@ done: return MDB_SUCCESS; } -static int ESECT mdb_env_share_locks(MDB_env *env, int *excl); - int mdb_txn_commit(MDB_txn *txn) { @@ -3826,7 +3614,7 @@ mdb_txn_commit(MDB_txn *txn) !(txn->mt_flags & (MDB_TXN_DIRTY|MDB_TXN_SPILLS))) goto done; - DPRINTF(("committing txn %"Y"u %p on mdbenv %p, root page %"Y"u", + DPRINTF(("committing txn %"Z"u %p on mdbenv %p, root page %"Z"u", txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root)); /* Update DB root pointers */ @@ -3864,23 +3652,11 @@ mdb_txn_commit(MDB_txn *txn) mdb_audit(txn); #endif - if ((rc = mdb_page_flush(txn, 0))) - goto fail; - if (!F_ISSET(txn->mt_flags, MDB_TXN_NOSYNC) && - (rc = mdb_env_sync0(env, 0, txn->mt_next_pgno))) - goto fail; - if ((rc = mdb_env_write_meta(txn))) + if ((rc = mdb_page_flush(txn, 0)) || + (rc = mdb_env_sync(env, 0)) || + (rc = mdb_env_write_meta(txn))) goto fail; end_mode = MDB_END_COMMITTED|MDB_END_UPDATE; - if (env->me_flags & MDB_PREVSNAPSHOT) { - if (!(env->me_flags & MDB_NOLOCK)) { - int excl; - rc = mdb_env_share_locks(env, &excl); - if (rc) - goto fail; - } - env->me_flags ^= MDB_PREVSNAPSHOT; - } done: mdb_txn_end(txn, end_mode); @@ -3894,12 +3670,11 @@ fail: /** Read the environment parameters of a DB environment before * mapping it into memory. * @param[in] env the environment handle - * @param[in] prev whether to read the backup meta page * @param[out] meta address of where to store the meta information * @return 0 on success, non-zero on failure. */ static int ESECT -mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta) +mdb_env_read_header(MDB_env *env, MDB_meta *meta) { MDB_metabuf pbuf; MDB_page *p; @@ -3934,7 +3709,7 @@ mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta) p = (MDB_page *)&pbuf; if (!F_ISSET(p->mp_flags, P_META)) { - DPRINTF(("page %"Y"u not a meta page", p->mp_pgno)); + DPRINTF(("page %"Z"u not a meta page", p->mp_pgno)); return MDB_INVALID; } @@ -3950,7 +3725,7 @@ mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta) return MDB_VERSION_MISMATCH; } - if (off == 0 || (prev ? m->mm_txnid < meta->mm_txnid : m->mm_txnid > meta->mm_txnid)) + if (off == 0 || m->mm_txnid > meta->mm_txnid) *meta = *m; } return 0; @@ -4004,6 +3779,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta) p = calloc(NUM_METAS, psize); if (!p) return ENOMEM; + p->mp_pgno = 0; p->mp_flags = P_META; *(MDB_meta *)METADATA(p) = *meta; @@ -4034,8 +3810,8 @@ mdb_env_write_meta(MDB_txn *txn) MDB_env *env; MDB_meta meta, metab, *mp; unsigned flags; - mdb_size_t mapsize; - off64_t off; + size_t mapsize; + off_t off; int rc, len, toggle; char *ptr; HANDLE mfd; @@ -4046,11 +3822,11 @@ mdb_env_write_meta(MDB_txn *txn) #endif toggle = txn->mt_txnid & 1; - DPRINTF(("writing meta page %d for root page %"Y"u", + DPRINTF(("writing meta page %d for root page %"Z"u", toggle, txn->mt_dbs[MAIN_DBI].md_root)); env = txn->mt_env; - flags = txn->mt_flags | env->me_flags; + flags = env->me_flags; mp = env->me_metas[toggle]; mapsize = env->me_metas[toggle ^ 1]->mm_mapsize; /* Persist any increases of mapsize config */ @@ -4098,7 +3874,10 @@ mdb_env_write_meta(MDB_txn *txn) len = sizeof(MDB_meta) - off; off += (char *)mp - env->me_map; - /* Write to the SYNC fd */ + /* Write to the SYNC fd unless MDB_NOSYNC/MDB_NOMETASYNC. + * (me_mfd goes to the same file as me_fd, but writing to it + * also syncs to disk. Avoids a separate fdatasync() call.) + */ mfd = (flags & (MDB_NOSYNC|MDB_NOMETASYNC)) ? env->me_fd : env->me_mfd; #ifdef _WIN32 { @@ -4159,8 +3938,7 @@ static MDB_meta * mdb_env_pick_meta(const MDB_env *env) { MDB_meta *const *metas = env->me_metas; - return metas[ (metas[0]->mm_txnid < metas[1]->mm_txnid) ^ - ((env->me_flags & MDB_PREVSNAPSHOT) != 0) ]; + return metas[ metas[0]->mm_txnid < metas[1]->mm_txnid ]; } int ESECT @@ -4180,9 +3958,6 @@ mdb_env_create(MDB_env **env) #ifdef MDB_USE_POSIX_SEM e->me_rmutex = SEM_FAILED; e->me_wmutex = SEM_FAILED; -#elif defined MDB_USE_SYSV_SEM - e->me_rmutex->semid = -1; - e->me_wmutex->semid = -1; #endif e->me_pid = getpid(); GET_PAGESIZE(e->me_os_psize); @@ -4191,19 +3966,6 @@ mdb_env_create(MDB_env **env) return MDB_SUCCESS; } -#ifdef _WIN32 -/** @brief Map a result from an NTAPI call to WIN32. */ -static DWORD -mdb_nt2win32(NTSTATUS st) -{ - OVERLAPPED o = {0}; - DWORD br; - o.Internal = st; - GetOverlappedResult(NULL, &o, &br, FALSE); - return GetLastError(); -} -#endif - static int ESECT mdb_env_map(MDB_env *env, void *addr) { @@ -4211,51 +3973,42 @@ mdb_env_map(MDB_env *env, void *addr) unsigned int flags = env->me_flags; #ifdef _WIN32 int rc; - int access = SECTION_MAP_READ; HANDLE mh; - void *map; - SIZE_T msize; - ULONG pageprot = PAGE_READONLY, secprot, alloctype; + LONG sizelo, sizehi; + size_t msize; - if (flags & MDB_WRITEMAP) { - access |= SECTION_MAP_WRITE; - pageprot = PAGE_READWRITE; - } if (flags & MDB_RDONLY) { - secprot = PAGE_READONLY; + /* Don't set explicit map size, use whatever exists */ msize = 0; - alloctype = 0; + sizelo = 0; + sizehi = 0; } else { - secprot = PAGE_READWRITE; msize = env->me_mapsize; - alloctype = MEM_RESERVE; + sizelo = msize & 0xffffffff; + sizehi = msize >> 16 >> 16; /* only needed on Win64 */ + + /* Windows won't create mappings for zero length files. + * and won't map more than the file size. + * Just set the maxsize right now. + */ + if (!(flags & MDB_WRITEMAP) && (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo + || !SetEndOfFile(env->me_fd) + || SetFilePointer(env->me_fd, 0, NULL, 0) != 0)) + return ErrCode(); } - rc = NtCreateSection(&mh, access, NULL, NULL, secprot, SEC_RESERVE, env->me_fd); - if (rc) - return mdb_nt2win32(rc); - map = addr; -#ifdef MDB_VL32 - msize = NUM_METAS * env->me_psize; -#endif - rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, alloctype, pageprot); -#ifdef MDB_VL32 - env->me_fmh = mh; -#else - NtClose(mh); -#endif - if (rc) - return mdb_nt2win32(rc); - env->me_map = map; -#else -#ifdef MDB_VL32 - (void) flags; - env->me_map = mmap(addr, NUM_METAS * env->me_psize, PROT_READ, MAP_SHARED, - env->me_fd, 0); - if (env->me_map == MAP_FAILED) { - env->me_map = NULL; + mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ? + PAGE_READWRITE : PAGE_READONLY, + sizehi, sizelo, NULL); + if (!mh) return ErrCode(); - } + env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ? + FILE_MAP_WRITE : FILE_MAP_READ, + 0, 0, msize, addr); + rc = env->me_map ? 0 : ErrCode(); + CloseHandle(mh); + if (rc) + return rc; #else int prot = PROT_READ; if (flags & MDB_WRITEMAP) { @@ -4289,7 +4042,6 @@ mdb_env_map(MDB_env *env, void *addr) */ if (addr && env->me_map != addr) return EBUSY; /* TODO: Make a new MDB_* error code? */ -#endif p = (MDB_page *)env->me_map; env->me_metas[0] = METADATA(p); @@ -4299,17 +4051,15 @@ mdb_env_map(MDB_env *env, void *addr) } int ESECT -mdb_env_set_mapsize(MDB_env *env, mdb_size_t size) +mdb_env_set_mapsize(MDB_env *env, size_t size) { /* If env is already open, caller is responsible for making * sure there are no active txns. */ if (env->me_map) { - MDB_meta *meta; -#ifndef MDB_VL32 - void *old; int rc; -#endif + MDB_meta *meta; + void *old; if (env->me_txn) return EINVAL; meta = mdb_env_pick_meta(env); @@ -4317,21 +4067,16 @@ mdb_env_set_mapsize(MDB_env *env, mdb_size_t size) size = meta->mm_mapsize; { /* Silently round up to minimum if the size is too small */ - mdb_size_t minsize = (meta->mm_last_pg + 1) * env->me_psize; + size_t minsize = (meta->mm_last_pg + 1) * env->me_psize; if (size < minsize) size = minsize; } -#ifndef MDB_VL32 - /* For MDB_VL32 this bit is a noop since we dynamically remap - * chunks of the DB anyway. - */ munmap(env->me_map, env->me_mapsize); env->me_mapsize = size; old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL; rc = mdb_env_map(env, old); if (rc) return rc; -#endif /* !MDB_VL32 */ } env->me_mapsize = size; if (env->me_psize) @@ -4367,7 +4112,7 @@ mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) } static int ESECT -mdb_fsize(HANDLE fd, mdb_size_t *size) +mdb_fsize(HANDLE fd, size_t *size) { #ifdef _WIN32 LARGE_INTEGER fsize; @@ -4387,6 +4132,189 @@ mdb_fsize(HANDLE fd, mdb_size_t *size) return MDB_SUCCESS; } + +#ifdef _WIN32 +typedef wchar_t mdb_nchar_t; +# define MDB_NAME(str) L##str +# define mdb_name_cpy wcscpy +#else +/** Character type for file names: char on Unix, wchar_t on Windows */ +typedef char mdb_nchar_t; +# define MDB_NAME(str) str /**< #mdb_nchar_t[] string literal */ +# define mdb_name_cpy strcpy /**< Copy name (#mdb_nchar_t string) */ +#endif + +/** Filename - string of #mdb_nchar_t[] */ +typedef struct MDB_name { + int mn_len; /**< Length */ + int mn_alloced; /**< True if #mn_val was malloced */ + mdb_nchar_t *mn_val; /**< Contents */ +} MDB_name; + +/** Filename suffixes [datafile,lockfile][without,with MDB_NOSUBDIR] */ +static const mdb_nchar_t *const mdb_suffixes[2][2] = { + { MDB_NAME("/data.mdb"), MDB_NAME("") }, + { MDB_NAME("/lock.mdb"), MDB_NAME("-lock") } +}; + +#define MDB_SUFFLEN 9 /**< Max string length in #mdb_suffixes[] */ + +/** Set up filename + scratch area for filename suffix, for opening files. + * It should be freed with #mdb_fname_destroy(). + * On Windows, paths are converted from char *UTF-8 to wchar_t *UTF-16. + * + * @param[in] path Pathname for #mdb_env_open(). + * @param[in] envflags Whether a subdir and/or lockfile will be used. + * @param[out] fname Resulting filename, with room for a suffix if necessary. + */ +static int ESECT +mdb_fname_init(const char *path, unsigned envflags, MDB_name *fname) +{ + int no_suffix = F_ISSET(envflags, MDB_NOSUBDIR|MDB_NOLOCK); + fname->mn_alloced = 0; +#ifdef _WIN32 + return utf8_to_utf16(path, fname, no_suffix ? 0 : MDB_SUFFLEN); +#else + fname->mn_len = strlen(path); + if (no_suffix) + fname->mn_val = (char *) path; + else if ((fname->mn_val = malloc(fname->mn_len + MDB_SUFFLEN+1)) != NULL) { + fname->mn_alloced = 1; + strcpy(fname->mn_val, path); + } + else + return ENOMEM; + return MDB_SUCCESS; +#endif +} + +/** Destroy \b fname from #mdb_fname_init() */ +#define mdb_fname_destroy(fname) \ + do { if ((fname).mn_alloced) free((fname).mn_val); } while (0) + +#ifdef O_CLOEXEC /* POSIX.1-2008: Set FD_CLOEXEC atomically at open() */ +# define MDB_CLOEXEC O_CLOEXEC +#else +# define MDB_CLOEXEC 0 +#endif + +/** File type, access mode etc. for #mdb_fopen() */ +enum mdb_fopen_type { +#ifdef _WIN32 + MDB_O_RDONLY, MDB_O_RDWR, MDB_O_META, MDB_O_COPY, MDB_O_LOCKS +#else + /* A comment in mdb_fopen() explains some O_* flag choices. */ + MDB_O_RDONLY= O_RDONLY, /**< for RDONLY me_fd */ + MDB_O_RDWR = O_RDWR |O_CREAT, /**< for me_fd */ + MDB_O_META = O_WRONLY|MDB_DSYNC |MDB_CLOEXEC, /**< for me_mfd */ + MDB_O_COPY = O_WRONLY|O_CREAT|O_EXCL|MDB_CLOEXEC, /**< for #mdb_env_copy() */ + /** Bitmask for open() flags in enum #mdb_fopen_type. The other bits + * distinguish otherwise-equal MDB_O_* constants from each other. + */ + MDB_O_MASK = MDB_O_RDWR|MDB_CLOEXEC | MDB_O_RDONLY|MDB_O_META|MDB_O_COPY, + MDB_O_LOCKS = MDB_O_RDWR|MDB_CLOEXEC | ((MDB_O_MASK+1) & ~MDB_O_MASK) /**< for me_lfd */ +#endif +}; + +/** Open an LMDB file. + * @param[in] env The LMDB environment. + * @param[in,out] fname Path from from #mdb_fname_init(). A suffix is + * appended if necessary to create the filename, without changing mn_len. + * @param[in] which Determines file type, access mode, etc. + * @param[in] mode The Unix permissions for the file, if we create it. + * @param[out] res Resulting file handle. + * @return 0 on success, non-zero on failure. + */ +static int ESECT +mdb_fopen(const MDB_env *env, MDB_name *fname, + enum mdb_fopen_type which, mdb_mode_t mode, + HANDLE *res) +{ + int rc = MDB_SUCCESS; + HANDLE fd; +#ifdef _WIN32 + DWORD acc, share, disp, attrs; +#else + int flags; +#endif + + if (fname->mn_alloced) /* modifiable copy */ + mdb_name_cpy(fname->mn_val + fname->mn_len, + mdb_suffixes[which==MDB_O_LOCKS][F_ISSET(env->me_flags, MDB_NOSUBDIR)]); + + /* The directory must already exist. Usually the file need not. + * MDB_O_META requires the file because we already created it using + * MDB_O_RDWR. MDB_O_COPY must not overwrite an existing file. + * + * With MDB_O_COPY we do not want the OS to cache the writes, since + * the source data is already in the OS cache. + * + * The lockfile needs FD_CLOEXEC (close file descriptor on exec*()) + * to avoid the flock() issues noted under Caveats in lmdb.h. + * Also set it for other filehandles which the user cannot get at + * and close himself, which he may need after fork(). I.e. all but + * me_fd, which programs do use via mdb_env_get_fd(). + */ + +#ifdef _WIN32 + acc = GENERIC_READ|GENERIC_WRITE; + share = FILE_SHARE_READ|FILE_SHARE_WRITE; + disp = OPEN_ALWAYS; + attrs = FILE_ATTRIBUTE_NORMAL; + switch (which) { + case MDB_O_RDONLY: /* read-only datafile */ + acc = GENERIC_READ; + disp = OPEN_EXISTING; + break; + case MDB_O_META: /* for writing metapages */ + acc = GENERIC_WRITE; + disp = OPEN_EXISTING; + attrs = FILE_ATTRIBUTE_NORMAL|FILE_FLAG_WRITE_THROUGH; + break; + case MDB_O_COPY: /* mdb_env_copy() & co */ + acc = GENERIC_WRITE; + share = 0; + disp = CREATE_NEW; + attrs = FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH; + break; + default: break; /* silence gcc -Wswitch (not all enum values handled) */ + } + fd = CreateFileW(fname->mn_val, acc, share, NULL, disp, attrs, NULL); +#else + fd = open(fname->mn_val, which & MDB_O_MASK, mode); +#endif + + if (fd == INVALID_HANDLE_VALUE) + rc = ErrCode(); +#ifndef _WIN32 + else { + if (which != MDB_O_RDONLY && which != MDB_O_RDWR) { + /* Set CLOEXEC if we could not pass it to open() */ + if (!MDB_CLOEXEC && (flags = fcntl(fd, F_GETFD)) != -1) + (void) fcntl(fd, F_SETFD, flags | FD_CLOEXEC); + } + if (which == MDB_O_COPY && env->me_psize >= env->me_os_psize) { + /* This may require buffer alignment. There is no portable + * way to ask how much, so we require OS pagesize alignment. + */ +# ifdef F_NOCACHE /* __APPLE__ */ + (void) fcntl(fd, F_NOCACHE, 1); +# elif defined O_DIRECT + /* open(...O_DIRECT...) would break on filesystems without + * O_DIRECT support (ITS#7682). Try to set it here instead. + */ + if ((flags = fcntl(fd, F_GETFL)) != -1) + (void) fcntl(fd, F_SETFL, flags | O_DIRECT); +# endif + } + } +#endif /* !_WIN32 */ + + *res = fd; + return rc; +} + + #ifdef BROKEN_FDATASYNC #include #include @@ -4395,7 +4323,7 @@ mdb_fsize(HANDLE fd, mdb_size_t *size) /** Further setup required for opening an LMDB environment */ static int ESECT -mdb_env_open2(MDB_env *env, int prev) +mdb_env_open2(MDB_env *env) { unsigned int flags = env->me_flags; int i, newenv = 0, rc; @@ -4458,7 +4386,7 @@ mdb_env_open2(MDB_env *env, int prev) } #endif - if ((i = mdb_env_read_header(env, prev, &meta)) != 0) { + if ((i = mdb_env_read_header(env, &meta)) != 0) { if (i != ENOENT) return i; DPUTS("new mdbenv"); @@ -4481,7 +4409,7 @@ mdb_env_open2(MDB_env *env, int prev) /* Make sure mapsize >= committed data size. Even when using * mm_mapsize, which could be broken in old files (ITS#7789). */ - mdb_size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize; + size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize; if (env->me_mapsize < minsize) env->me_mapsize = minsize; } @@ -4500,18 +4428,6 @@ mdb_env_open2(MDB_env *env, int prev) return rc; newenv = 0; } -#ifdef _WIN32 - /* For FIXEDMAP, make sure the file is non-empty before we attempt to map it */ - if (newenv) { - char dummy = 0; - DWORD len; - rc = WriteFile(env->me_fd, &dummy, 1, &len, NULL); - if (!rc) { - rc = ErrCode(); - return rc; - } - } -#endif rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL); if (rc) @@ -4534,9 +4450,6 @@ mdb_env_open2(MDB_env *env, int prev) #endif env->me_maxpg = env->me_mapsize / env->me_psize; - if (env->me_txns) - env->me_txns->mti_txnid = meta.mm_txnid; - #if MDB_DEBUG { MDB_meta *meta = mdb_env_pick_meta(env); @@ -4546,11 +4459,11 @@ mdb_env_open2(MDB_env *env, int prev) meta->mm_version, env->me_psize)); DPRINTF(("using meta page %d", (int) (meta->mm_txnid & 1))); DPRINTF(("depth: %u", db->md_depth)); - DPRINTF(("entries: %"Y"u", db->md_entries)); - DPRINTF(("branch pages: %"Y"u", db->md_branch_pages)); - DPRINTF(("leaf pages: %"Y"u", db->md_leaf_pages)); - DPRINTF(("overflow pages: %"Y"u", db->md_overflow_pages)); - DPRINTF(("root: %"Y"u", db->md_root)); + DPRINTF(("entries: %"Z"u", db->md_entries)); + DPRINTF(("branch pages: %"Z"u", db->md_branch_pages)); + DPRINTF(("leaf pages: %"Z"u", db->md_leaf_pages)); + DPRINTF(("overflow pages: %"Z"u", db->md_overflow_pages)); + DPRINTF(("root: %"Z"u", db->md_root)); } #endif @@ -4567,7 +4480,11 @@ mdb_env_reader_dest(void *ptr) { MDB_reader *reader = ptr; - reader->mr_pid = 0; +#ifndef _WIN32 + if (reader->mr_pid == getpid()) /* catch pthread_exit() in child process */ +#endif + /* We omit the mutex, so do this atomically (i.e. skip mr_txnid) */ + reader->mr_pid = 0; } #ifdef _WIN32 @@ -4632,6 +4549,9 @@ static int ESECT mdb_env_share_locks(MDB_env *env, int *excl) { int rc = 0; + MDB_meta *meta = mdb_env_pick_meta(env); + + env->me_txns->mti_txnid = meta->mm_txnid; #ifdef _WIN32 { @@ -4804,56 +4724,30 @@ mdb_hash_enc(MDB_val *val, char *encbuf) /** Open and/or initialize the lock region for the environment. * @param[in] env The LMDB environment. - * @param[in] lpath The pathname of the file used for the lock region. + * @param[in] fname Filename + scratch area, from #mdb_fname_init(). * @param[in] mode The Unix permissions for the file, if we create it. * @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive * @return 0 on success, non-zero on failure. */ static int ESECT -mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) +mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl) { #ifdef _WIN32 # define MDB_ERRCODE_ROFS ERROR_WRITE_PROTECT #else # define MDB_ERRCODE_ROFS EROFS -#ifdef O_CLOEXEC /* Linux: Open file and set FD_CLOEXEC atomically */ -# define MDB_CLOEXEC O_CLOEXEC -#else - int fdflags; -# define MDB_CLOEXEC 0 -#endif -#endif -#ifdef MDB_USE_SYSV_SEM - int semid; - union semun semu; #endif int rc; - off64_t size, rsize; + off_t size, rsize; -#ifdef _WIN32 - wchar_t *wlpath; - rc = utf8_to_utf16(lpath, -1, &wlpath, NULL); - if (rc) - return rc; - env->me_lfd = CreateFileW(wlpath, GENERIC_READ|GENERIC_WRITE, - FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, - FILE_ATTRIBUTE_NORMAL, NULL); - free(wlpath); -#else - env->me_lfd = open(lpath, O_RDWR|O_CREAT|MDB_CLOEXEC, mode); -#endif - if (env->me_lfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); + rc = mdb_fopen(env, fname, MDB_O_LOCKS, mode, &env->me_lfd); + if (rc) { + /* Omit lockfile if read-only env on read-only filesystem */ if (rc == MDB_ERRCODE_ROFS && (env->me_flags & MDB_RDONLY)) { return MDB_SUCCESS; } - goto fail_errno; + goto fail; } -#if ! ((MDB_CLOEXEC) || defined(_WIN32)) - /* Lose record locks when exec*() */ - if ((fdflags = fcntl(env->me_lfd, F_GETFD) | FD_CLOEXEC) >= 0) - fcntl(env->me_lfd, F_SETFD, fdflags); -#endif if (!(env->me_flags & MDB_NOTLS)) { rc = pthread_key_create(&env->me_txkey, mdb_env_reader_dest); @@ -4978,31 +4872,29 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) env->me_wmutex = sem_open(env->me_txns->mti_wmname, O_CREAT|O_EXCL, mode, 1); if (env->me_wmutex == SEM_FAILED) goto fail_errno; -#elif defined(MDB_USE_SYSV_SEM) - unsigned short vals[2] = {1, 1}; - key_t key = ftok(lpath, 'M'); - if (key == -1) - goto fail_errno; - semid = semget(key, 2, (mode & 0777) | IPC_CREAT); - if (semid < 0) - goto fail_errno; - semu.array = vals; - if (semctl(semid, 0, SETALL, semu) < 0) - goto fail_errno; - env->me_txns->mti_semid = semid; #else /* MDB_USE_POSIX_MUTEX: */ pthread_mutexattr_t mattr; - if ((rc = pthread_mutexattr_init(&mattr)) - || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) -#ifdef MDB_ROBUST_SUPPORTED - || (rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST)) -#endif - || (rc = pthread_mutex_init(env->me_txns->mti_rmutex, &mattr)) - || (rc = pthread_mutex_init(env->me_txns->mti_wmutex, &mattr))) + /* Solaris needs this before initing a robust mutex. Otherwise + * it may skip the init and return EBUSY "seems someone already + * inited" or EINVAL "it was inited differently". + */ + memset(env->me_txns->mti_rmutex, 0, sizeof(*env->me_txns->mti_rmutex)); + memset(env->me_txns->mti_wmutex, 0, sizeof(*env->me_txns->mti_wmutex)); + + if ((rc = pthread_mutexattr_init(&mattr))) goto fail; + + rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED); +#ifdef MDB_ROBUST_SUPPORTED + if (!rc) rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST); +#endif + if (!rc) rc = pthread_mutex_init(env->me_txns->mti_rmutex, &mattr); + if (!rc) rc = pthread_mutex_init(env->me_txns->mti_wmutex, &mattr); pthread_mutexattr_destroy(&mattr); -#endif /* _WIN32 || ... */ + if (rc) + goto fail; +#endif /* _WIN32 || MDB_USE_POSIX_SEM */ env->me_txns->mti_magic = MDB_MAGIC; env->me_txns->mti_format = MDB_LOCK_FORMAT; @@ -5010,9 +4902,6 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) env->me_txns->mti_numreaders = 0; } else { -#ifdef MDB_USE_SYSV_SEM - struct semid_ds buf; -#endif if (env->me_txns->mti_magic != MDB_MAGIC) { DPUTS("lock region has invalid magic"); rc = MDB_INVALID; @@ -5038,33 +4927,8 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) if (env->me_rmutex == SEM_FAILED) goto fail_errno; env->me_wmutex = sem_open(env->me_txns->mti_wmname, 0); if (env->me_wmutex == SEM_FAILED) goto fail_errno; -#elif defined(MDB_USE_SYSV_SEM) - semid = env->me_txns->mti_semid; - semu.buf = &buf; - /* check for read access */ - if (semctl(semid, 0, IPC_STAT, semu) < 0) - goto fail_errno; - /* check for write access */ - if (semctl(semid, 0, IPC_SET, semu) < 0) - goto fail_errno; #endif } -#ifdef MDB_USE_SYSV_SEM - env->me_rmutex->semid = semid; - env->me_wmutex->semid = semid; - env->me_rmutex->semnum = 0; - env->me_wmutex->semnum = 1; - env->me_rmutex->locked = &env->me_txns->mti_rlocked; - env->me_wmutex->locked = &env->me_txns->mti_wlocked; -#endif -#ifdef MDB_VL32 -#ifdef _WIN32 - env->me_rpmutex = CreateMutex(NULL, FALSE, NULL); -#else - pthread_mutex_init(&env->me_rpmutex, NULL); -#endif -#endif - return MDB_SUCCESS; fail_errno: @@ -5073,19 +4937,13 @@ fail: return rc; } - /** The name of the lock file in the DB environment */ -#define LOCKNAME "/lock.mdb" - /** The name of the data file in the DB environment */ -#define DATANAME "/data.mdb" - /** The suffix of the lock file when no subdir is used */ -#define LOCKSUFF "-lock" /** Only a subset of the @ref mdb_env flags can be changed * at runtime. Changing other flags requires closing the * environment and re-opening it with the new flags. */ #define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT) #define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| \ - MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD|MDB_PREVSNAPSHOT) + MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD) #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS) # error "Persistent DB flags & env flags overlap, but both go in mm_flags" @@ -5094,47 +4952,18 @@ fail: int ESECT mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) { - int oflags, rc, len, excl = -1; - char *lpath, *dpath; -#ifdef _WIN32 - wchar_t *wpath; -#endif + int rc, excl = -1; + MDB_name fname; if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) return EINVAL; -#ifdef MDB_VL32 - if (flags & MDB_WRITEMAP) { - /* silently ignore WRITEMAP in 32 bit mode */ - flags ^= MDB_WRITEMAP; - } - if (flags & MDB_FIXEDMAP) { - /* cannot support FIXEDMAP */ - return EINVAL; - } -#endif - - len = strlen(path); - if (flags & MDB_NOSUBDIR) { - rc = len + sizeof(LOCKSUFF) + len + 1; - } else { - rc = len + sizeof(LOCKNAME) + len + sizeof(DATANAME); - } - lpath = malloc(rc); - if (!lpath) - return ENOMEM; - if (flags & MDB_NOSUBDIR) { - dpath = lpath + len + sizeof(LOCKSUFF); - sprintf(lpath, "%s" LOCKSUFF, path); - strcpy(dpath, path); - } else { - dpath = lpath + len + sizeof(LOCKNAME); - sprintf(lpath, "%s" LOCKNAME, path); - sprintf(dpath, "%s" DATANAME, path); - } - - rc = MDB_SUCCESS; flags |= env->me_flags; + + rc = mdb_fname_init(path, flags, &fname); + if (rc) + return rc; + if (flags & MDB_RDONLY) { /* silently ignore WRITEMAP when we're only getting read access */ flags &= ~MDB_WRITEMAP; @@ -5143,17 +4972,6 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode (env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2))))) rc = ENOMEM; } -#ifdef MDB_VL32 - if (!rc) { - env->me_rpages = malloc(MDB_ERPAGE_SIZE * sizeof(MDB_ID3)); - if (!env->me_rpages) { - rc = ENOMEM; - goto leave; - } - env->me_rpages[0].mid = 0; - env->me_rpcheck = MDB_ERPAGE_SIZE/2; - } -#endif env->me_flags = flags |= MDB_ENV_ACTIVE; if (rc) goto leave; @@ -5170,76 +4988,34 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode /* For RDONLY, get lockfile after we know datafile exists */ if (!(flags & (MDB_RDONLY|MDB_NOLOCK))) { - rc = mdb_env_setup_locks(env, lpath, mode, &excl); + rc = mdb_env_setup_locks(env, &fname, mode, &excl); if (rc) goto leave; } -#ifdef _WIN32 - if (F_ISSET(flags, MDB_RDONLY)) { - oflags = GENERIC_READ; - len = OPEN_EXISTING; - } else { - oflags = GENERIC_READ|GENERIC_WRITE; - len = OPEN_ALWAYS; - } - mode = FILE_ATTRIBUTE_NORMAL; - rc = utf8_to_utf16(dpath, -1, &wpath, NULL); + rc = mdb_fopen(env, &fname, + (flags & MDB_RDONLY) ? MDB_O_RDONLY : MDB_O_RDWR, + mode, &env->me_fd); if (rc) goto leave; - env->me_fd = CreateFileW(wpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, - NULL, len, mode, NULL); - free(wpath); -#else - if (F_ISSET(flags, MDB_RDONLY)) - oflags = O_RDONLY; - else - oflags = O_RDWR | O_CREAT; - - env->me_fd = open(dpath, oflags, mode); -#endif - if (env->me_fd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - goto leave; - } if ((flags & (MDB_RDONLY|MDB_NOLOCK)) == MDB_RDONLY) { - rc = mdb_env_setup_locks(env, lpath, mode, &excl); + rc = mdb_env_setup_locks(env, &fname, mode, &excl); if (rc) goto leave; - if ((flags & MDB_PREVSNAPSHOT) && !excl) { - rc = EAGAIN; - goto leave; - } } - if ((rc = mdb_env_open2(env, flags & MDB_PREVSNAPSHOT)) == MDB_SUCCESS) { - if (flags & (MDB_RDONLY|MDB_WRITEMAP)) { - env->me_mfd = env->me_fd; - } else { + if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) { + if (!(flags & (MDB_RDONLY|MDB_WRITEMAP))) { /* Synchronous fd for meta writes. Needed even with * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset. */ -#ifdef _WIN32 - len = OPEN_EXISTING; - rc = utf8_to_utf16(dpath, -1, &wpath, NULL); + rc = mdb_fopen(env, &fname, MDB_O_META, mode, &env->me_mfd); if (rc) goto leave; - env->me_mfd = CreateFileW(wpath, oflags, - FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, - mode | FILE_FLAG_WRITE_THROUGH, NULL); - free(wpath); -#else - oflags &= ~O_CREAT; - env->me_mfd = open(dpath, oflags | MDB_DSYNC, mode); -#endif - if (env->me_mfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - goto leave; - } } DPRINTF(("opened dbenv %p", (void *) env)); - if (excl > 0 && !(flags & MDB_PREVSNAPSHOT)) { + if (excl > 0) { rc = mdb_env_share_locks(env, &excl); if (rc) goto leave; @@ -5256,16 +5032,6 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); txn->mt_env = env; -#ifdef MDB_VL32 - txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3)); - if (!txn->mt_rpages) { - free(txn); - rc = ENOMEM; - goto leave; - } - txn->mt_rpages[0].mid = 0; - txn->mt_rpcheck = MDB_TRPAGE_SIZE/2; -#endif txn->mt_dbxs = env->me_dbxs; txn->mt_flags = MDB_TXN_FINISHED; env->me_txn0 = txn; @@ -5279,7 +5045,7 @@ leave: if (rc) { mdb_env_close0(env, excl); } - free(lpath); + mdb_fname_destroy(fname); return rc; } @@ -5304,15 +5070,6 @@ mdb_env_close0(MDB_env *env, int excl) free(env->me_dbflags); free(env->me_path); free(env->me_dirty_list); -#ifdef MDB_VL32 - if (env->me_txn0 && env->me_txn0->mt_rpages) - free(env->me_txn0->mt_rpages); - { unsigned int x; - for (x=1; x<=env->me_rpages[0].mid; x++) - munmap(env->me_rpages[x].mptr, env->me_rpages[x].mcnt * env->me_psize); - } - free(env->me_rpages); -#endif free(env->me_txn0); mdb_midl_free(env->me_free_pgs); @@ -5330,18 +5087,14 @@ mdb_env_close0(MDB_env *env, int excl) } if (env->me_map) { -#ifdef MDB_VL32 - munmap(env->me_map, NUM_METAS*env->me_psize); -#else munmap(env->me_map, env->me_mapsize); -#endif } - if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) + if (env->me_mfd != INVALID_HANDLE_VALUE) (void) close(env->me_mfd); if (env->me_fd != INVALID_HANDLE_VALUE) (void) close(env->me_fd); if (env->me_txns) { - MDB_PID_T pid = env->me_pid; + MDB_PID_T pid = getpid(); /* Clearing readers is done in this function because * me_txkey with its destructor must be disabled first. * @@ -5375,16 +5128,6 @@ mdb_env_close0(MDB_env *env, int excl) sem_unlink(env->me_txns->mti_wmname); } } -#elif defined(MDB_USE_SYSV_SEM) - if (env->me_rmutex->semid != -1) { - /* If we have the filelock: If we are the - * only remaining user, clean up semaphores. - */ - if (excl == 0) - mdb_env_excl_lock(env, &excl); - if (excl > 0) - semctl(env->me_rmutex->semid, 0, IPC_RMID); - } #endif munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); } @@ -5399,14 +5142,6 @@ mdb_env_close0(MDB_env *env, int excl) #endif (void) close(env->me_lfd); } -#ifdef MDB_VL32 -#ifdef _WIN32 - if (env->me_fmh) CloseHandle(env->me_fmh); - if (env->me_rpmutex) CloseHandle(env->me_rpmutex); -#else - pthread_mutex_destroy(&env->me_rpmutex); -#endif -#endif env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); } @@ -5430,18 +5165,18 @@ mdb_env_close(MDB_env *env) free(env); } -/** Compare two items pointing at aligned mdb_size_t's */ +/** Compare two items pointing at aligned size_t's */ static int mdb_cmp_long(const MDB_val *a, const MDB_val *b) { - return (*(mdb_size_t *)a->mv_data < *(mdb_size_t *)b->mv_data) ? -1 : - *(mdb_size_t *)a->mv_data > *(mdb_size_t *)b->mv_data; + return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 : + *(size_t *)a->mv_data > *(size_t *)b->mv_data; } /** Compare two items pointing at aligned unsigned int's. * * This is also set as #MDB_INTEGERDUP|#MDB_DUPFIXED's #MDB_dbx.%md_dcmp, - * but #mdb_cmp_clong() is called instead if the data type is mdb_size_t. + * but #mdb_cmp_clong() is called instead if the data type is size_t. */ static int mdb_cmp_int(const MDB_val *a, const MDB_val *b) @@ -5546,7 +5281,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) nkeys = NUMKEYS(mp); - DPRINTF(("searching %u keys in %s %spage %"Y"u", + DPRINTF(("searching %u keys in %s %spage %"Z"u", nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "", mdb_dbg_pgno(mp))); @@ -5558,7 +5293,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) * alignment is guaranteed. Use faster mdb_cmp_int. */ if (cmp == mdb_cmp_cint && IS_BRANCH(mp)) { - if (NODEPTR(mp, 1)->mn_ksize == sizeof(mdb_size_t)) + if (NODEPTR(mp, 1)->mn_ksize == sizeof(size_t)) cmp = mdb_cmp_long; else cmp = mdb_cmp_int; @@ -5594,7 +5329,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) DPRINTF(("found leaf index %u [%s], rc = %i", i, DKEY(&nodekey), rc)); else - DPRINTF(("found branch index %u [%s -> %"Y"u], rc = %i", + DPRINTF(("found branch index %u [%s -> %"Z"u], rc = %i", i, DKEY(&nodekey), NODEPGNO(node), rc)); #endif if (rc == 0) @@ -5642,7 +5377,7 @@ static void mdb_cursor_pop(MDB_cursor *mc) { if (mc->mc_snum) { - DPRINTF(("popping page %"Y"u off db %d cursor %p", + DPRINTF(("popping page %"Z"u off db %d cursor %p", mc->mc_pg[mc->mc_top]->mp_pgno, DDBI(mc), (void *) mc)); mc->mc_snum--; @@ -5654,11 +5389,13 @@ mdb_cursor_pop(MDB_cursor *mc) } } -/** Push a page onto the top of the cursor's stack. */ +/** Push a page onto the top of the cursor's stack. + * Set #MDB_TXN_ERROR on failure. + */ static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) { - DPRINTF(("pushing page %"Y"u on db %d cursor %p", mp->mp_pgno, + DPRINTF(("pushing page %"Z"u on db %d cursor %p", mp->mp_pgno, DDBI(mc), (void *) mc)); if (mc->mc_snum >= CURSOR_STACK) { @@ -5673,295 +5410,8 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) return MDB_SUCCESS; } -#ifdef MDB_VL32 -/** Map a read-only page. - * There are two levels of tracking in use, a per-txn list and a per-env list. - * ref'ing and unref'ing the per-txn list is faster since it requires no - * locking. Pages are cached in the per-env list for global reuse, and a lock - * is required. Pages are not immediately unmapped when their refcnt goes to - * zero; they hang around in case they will be reused again soon. - * - * When the per-txn list gets full, all pages with refcnt=0 are purged from the - * list and their refcnts in the per-env list are decremented. - * - * When the per-env list gets full, all pages with refcnt=0 are purged from the - * list and their pages are unmapped. - * - * @note "full" means the list has reached its respective rpcheck threshold. - * This threshold slowly raises if no pages could be purged on a given check, - * and returns to its original value when enough pages were purged. - * - * If purging doesn't free any slots, filling the per-txn list will return - * MDB_TXN_FULL, and filling the per-env list returns MDB_MAP_FULL. - * - * Reference tracking in a txn is imperfect, pages can linger with non-zero - * refcnt even without active references. It was deemed to be too invasive - * to add unrefs in every required location. However, all pages are unref'd - * at the end of the transaction. This guarantees that no stale references - * linger in the per-env list. - * - * Usually we map chunks of 16 pages at a time, but if an overflow page begins - * at the tail of the chunk we extend the chunk to include the entire overflow - * page. Unfortunately, pages can be turned into overflow pages after their - * chunk was already mapped. In that case we must remap the chunk if the - * overflow page is referenced. If the chunk's refcnt is 0 we can just remap - * it, otherwise we temporarily map a new chunk just for the overflow page. - * - * @note this chunk handling means we cannot guarantee that a data item - * returned from the DB will stay alive for the duration of the transaction: - * We unref pages as soon as a cursor moves away from the page - * A subsequent op may cause a purge, which may unmap any unref'd chunks - * The caller must copy the data if it must be used later in the same txn. - * - * Also - our reference counting revolves around cursors, but overflow pages - * aren't pointed to by a cursor's page stack. We have to remember them - * explicitly, in the added mc_ovpg field. A single cursor can only hold a - * reference to one overflow page at a time. - * - * @param[in] txn the transaction for this access. - * @param[in] pgno the page number for the page to retrieve. - * @param[out] ret address of a pointer where the page's address will be stored. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_rpage_get(MDB_txn *txn, pgno_t pg0, MDB_page **ret) -{ - MDB_env *env = txn->mt_env; - MDB_page *p; - MDB_ID3L tl = txn->mt_rpages; - MDB_ID3L el = env->me_rpages; - MDB_ID3 id3; - unsigned x, rem; - pgno_t pgno; - int rc, retries = 1; -#ifdef _WIN32 - LARGE_INTEGER off; - SIZE_T len; -#define SET_OFF(off,val) off.QuadPart = val -#define MAP(rc,env,addr,len,off) \ - addr = NULL; \ - rc = NtMapViewOfSection(env->me_fmh, GetCurrentProcess(), &addr, 0, \ - len, &off, &len, ViewUnmap, (env->me_flags & MDB_RDONLY) ? 0 : MEM_RESERVE, PAGE_READONLY); \ - if (rc) rc = mdb_nt2win32(rc) -#else - off64_t off; - size_t len; -#define SET_OFF(off,val) off = val -#define MAP(rc,env,addr,len,off) \ - addr = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); \ - rc = (addr == MAP_FAILED) ? errno : 0 -#endif - - /* remember the offset of the actual page number, so we can - * return the correct pointer at the end. - */ - rem = pg0 & (MDB_RPAGE_CHUNK-1); - pgno = pg0 ^ rem; - - id3.mid = 0; - x = mdb_mid3l_search(tl, pgno); - if (x <= tl[0].mid && tl[x].mid == pgno) { - if (x != tl[0].mid && tl[x+1].mid == pg0) - x++; - /* check for overflow size */ - p = (MDB_page *)((char *)tl[x].mptr + rem * env->me_psize); - if (IS_OVERFLOW(p) && p->mp_pages + rem > tl[x].mcnt) { - id3.mcnt = p->mp_pages + rem; - len = id3.mcnt * env->me_psize; - SET_OFF(off, pgno * env->me_psize); - MAP(rc, env, id3.mptr, len, off); - if (rc) - return rc; - /* check for local-only page */ - if (rem) { - mdb_tassert(txn, tl[x].mid != pg0); - /* hope there's room to insert this locally. - * setting mid here tells later code to just insert - * this id3 instead of searching for a match. - */ - id3.mid = pg0; - goto notlocal; - } else { - /* ignore the mapping we got from env, use new one */ - tl[x].mptr = id3.mptr; - tl[x].mcnt = id3.mcnt; - /* if no active ref, see if we can replace in env */ - if (!tl[x].mref) { - unsigned i; - pthread_mutex_lock(&env->me_rpmutex); - i = mdb_mid3l_search(el, tl[x].mid); - if (el[i].mref == 1) { - /* just us, replace it */ - munmap(el[i].mptr, el[i].mcnt * env->me_psize); - el[i].mptr = tl[x].mptr; - el[i].mcnt = tl[x].mcnt; - } else { - /* there are others, remove ourself */ - el[i].mref--; - } - pthread_mutex_unlock(&env->me_rpmutex); - } - } - } - id3.mptr = tl[x].mptr; - id3.mcnt = tl[x].mcnt; - tl[x].mref++; - goto ok; - } - -notlocal: - if (tl[0].mid >= MDB_TRPAGE_MAX - txn->mt_rpcheck) { - unsigned i, y; - /* purge unref'd pages from our list and unref in env */ - pthread_mutex_lock(&env->me_rpmutex); -retry: - y = 0; - for (i=1; i<=tl[0].mid; i++) { - if (!tl[i].mref) { - if (!y) y = i; - /* tmp overflow pages don't go to env */ - if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) { - munmap(tl[i].mptr, tl[i].mcnt * env->me_psize); - continue; - } - x = mdb_mid3l_search(el, tl[i].mid); - el[x].mref--; - } - } - pthread_mutex_unlock(&env->me_rpmutex); - if (!y) { - /* we didn't find any unref'd chunks. - * if we're out of room, fail. - */ - if (tl[0].mid >= MDB_TRPAGE_MAX) - return MDB_TXN_FULL; - /* otherwise, raise threshold for next time around - * and let this go. - */ - txn->mt_rpcheck /= 2; - } else { - /* we found some unused; consolidate the list */ - for (i=y+1; i<= tl[0].mid; i++) - if (tl[i].mref) - tl[y++] = tl[i]; - tl[0].mid = y-1; - /* decrease the check threshold toward its original value */ - if (!txn->mt_rpcheck) - txn->mt_rpcheck = 1; - while (txn->mt_rpcheck < tl[0].mid && txn->mt_rpcheck < MDB_TRPAGE_SIZE/2) - txn->mt_rpcheck *= 2; - } - } - if (tl[0].mid < MDB_TRPAGE_SIZE) { - id3.mref = 1; - if (id3.mid) - goto found; - /* don't map past last written page in read-only envs */ - if ((env->me_flags & MDB_RDONLY) && pgno + MDB_RPAGE_CHUNK-1 > txn->mt_last_pgno) - id3.mcnt = txn->mt_last_pgno + 1 - pgno; - else - id3.mcnt = MDB_RPAGE_CHUNK; - len = id3.mcnt * env->me_psize; - id3.mid = pgno; - - /* search for page in env */ - pthread_mutex_lock(&env->me_rpmutex); - x = mdb_mid3l_search(el, pgno); - if (x <= el[0].mid && el[x].mid == pgno) { - id3.mptr = el[x].mptr; - id3.mcnt = el[x].mcnt; - /* check for overflow size */ - p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize); - if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) { - id3.mcnt = p->mp_pages + rem; - len = id3.mcnt * env->me_psize; - SET_OFF(off, pgno * env->me_psize); - MAP(rc, env, id3.mptr, len, off); - if (rc) - goto fail; - if (!el[x].mref) { - munmap(el[x].mptr, env->me_psize * el[x].mcnt); - el[x].mptr = id3.mptr; - el[x].mcnt = id3.mcnt; - } else { - id3.mid = pg0; - pthread_mutex_unlock(&env->me_rpmutex); - goto found; - } - } - el[x].mref++; - pthread_mutex_unlock(&env->me_rpmutex); - goto found; - } - if (el[0].mid >= MDB_ERPAGE_MAX - env->me_rpcheck) { - /* purge unref'd pages */ - unsigned i, y = 0; - for (i=1; i<=el[0].mid; i++) { - if (!el[i].mref) { - if (!y) y = i; - munmap(el[i].mptr, env->me_psize * el[i].mcnt); - } - } - if (!y) { - if (retries) { - /* see if we can unref some local pages */ - retries--; - id3.mid = 0; - goto retry; - } - if (el[0].mid >= MDB_ERPAGE_MAX) { - pthread_mutex_unlock(&env->me_rpmutex); - return MDB_MAP_FULL; - } - env->me_rpcheck /= 2; - } else { - for (i=y+1; i<= el[0].mid; i++) - if (el[i].mref) - el[y++] = el[i]; - el[0].mid = y-1; - if (!env->me_rpcheck) - env->me_rpcheck = 1; - while (env->me_rpcheck < el[0].mid && env->me_rpcheck < MDB_ERPAGE_SIZE/2) - env->me_rpcheck *= 2; - } - } - SET_OFF(off, pgno * env->me_psize); - MAP(rc, env, id3.mptr, len, off); - if (rc) { -fail: - pthread_mutex_unlock(&env->me_rpmutex); - return rc; - } - /* check for overflow size */ - p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize); - if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) { - id3.mcnt = p->mp_pages + rem; - munmap(id3.mptr, len); - len = id3.mcnt * env->me_psize; - MAP(rc, env, id3.mptr, len, off); - if (rc) - goto fail; - } - mdb_mid3l_insert(el, &id3); - pthread_mutex_unlock(&env->me_rpmutex); -found: - mdb_mid3l_insert(tl, &id3); - } else { - return MDB_TXN_FULL; - } -ok: - p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize); -#if MDB_DEBUG /* we don't need this check any more */ - if (IS_OVERFLOW(p)) { - mdb_tassert(txn, p->mp_pages + rem <= id3.mcnt); - } -#endif - *ret = p; - return MDB_SUCCESS; -} -#endif - /** Find the address of the page corresponding to a given page number. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc the cursor accessing the page. * @param[in] pgno the page number for the page to retrieve. * @param[out] ret address of a pointer where the page's address will be stored. @@ -5972,13 +5422,11 @@ static int mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl) { MDB_txn *txn = mc->mc_txn; -#ifndef MDB_VL32 MDB_env *env = txn->mt_env; -#endif MDB_page *p = NULL; int level; - if (! (mc->mc_flags & (C_ORIG_RDONLY|C_WRITEMAP))) { + if (! (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_WRITEMAP))) { MDB_txn *tx2 = txn; level = 1; do { @@ -5993,13 +5441,7 @@ mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl) MDB_ID pn = pgno << 1; x = mdb_midl_search(tx2->mt_spill_pgs, pn); if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { -#ifdef MDB_VL32 - int rc = mdb_rpage_get(txn, pgno, &p); - if (rc) - return rc; -#else p = (MDB_page *)(env->me_map + env->me_psize * pgno); -#endif goto done; } } @@ -6016,17 +5458,9 @@ mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl) if (pgno < txn->mt_next_pgno) { level = 0; -#ifdef MDB_VL32 - { - int rc = mdb_rpage_get(txn, pgno, &p); - if (rc) - return rc; - } -#else p = (MDB_page *)(env->me_map + env->me_psize * pgno); -#endif } else { - DPRINTF(("page %"Y"u not found", pgno)); + DPRINTF(("page %"Z"u not found", pgno)); txn->mt_flags |= MDB_TXN_ERROR; return MDB_PAGE_NOTFOUND; } @@ -6052,18 +5486,27 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) MDB_node *node; indx_t i; - DPRINTF(("branch page %"Y"u has %u keys", mp->mp_pgno, NUMKEYS(mp))); + DPRINTF(("branch page %"Z"u has %u keys", mp->mp_pgno, NUMKEYS(mp))); /* Don't assert on branch pages in the FreeDB. We can get here * while in the process of rebalancing a FreeDB branch page; we must * let that proceed. ITS#8336 */ mdb_cassert(mc, !mc->mc_dbi || NUMKEYS(mp) > 1); - DPRINTF(("found index 0 to page %"Y"u", NODEPGNO(NODEPTR(mp, 0)))); + DPRINTF(("found index 0 to page %"Z"u", NODEPGNO(NODEPTR(mp, 0)))); if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) { i = 0; - if (flags & MDB_PS_LAST) + if (flags & MDB_PS_LAST) { i = NUMKEYS(mp) - 1; + /* if already init'd, see if we're already in right place */ + if (mc->mc_flags & C_INITIALIZED) { + if (mc->mc_ki[mc->mc_top] == i) { + mc->mc_top = mc->mc_snum++; + mp = mc->mc_pg[mc->mc_top]; + goto ready; + } + } + } } else { int exact; node = mdb_node_search(mc, key, &exact); @@ -6089,6 +5532,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) if ((rc = mdb_cursor_push(mc, mp))) return rc; +ready: if (flags & MDB_PS_MODIFY) { if ((rc = mdb_page_touch(mc)) != 0) return rc; @@ -6103,7 +5547,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) return MDB_CORRUPTED; } - DPRINTF(("found leaf page %"Y"u for key [%s]", mp->mp_pgno, + DPRINTF(("found leaf page %"Z"u for key [%s]", mp->mp_pgno, key ? DKEY(key) : "null")); mc->mc_flags |= C_INITIALIZED; mc->mc_flags &= ~C_EOF; @@ -6199,26 +5643,14 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) } mdb_cassert(mc, root > 1); - if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) { -#ifdef MDB_VL32 - if (mc->mc_pg[0]) - MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[0]); -#endif + if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) if ((rc = mdb_page_get(mc, root, &mc->mc_pg[0], NULL)) != 0) return rc; - } -#ifdef MDB_VL32 - { - int i; - for (i=1; imc_snum; i++) - MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[i]); - } -#endif mc->mc_snum = 1; mc->mc_top = 0; - DPRINTF(("db %d root page %"Y"u has flags 0x%X", + DPRINTF(("db %d root page %"Z"u has flags 0x%X", DDBI(mc), root, mc->mc_pg[0]->mp_flags)); if (flags & MDB_PS_MODIFY) { @@ -6243,7 +5675,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp) MDB_ID pn = pg << 1; int rc; - DPRINTF(("free ov page %"Y"u (%d)", pg, ovpages)); + DPRINTF(("free ov page %"Z"u (%d)", pg, ovpages)); /* If the page is dirty or on the spill list we just acquired it, * so we should give it back to our current free list, if any. * Otherwise put it onto the list of pages we freed in this txn. @@ -6304,10 +5736,6 @@ release: if (rc) return rc; } -#ifdef MDB_VL32 - if (mc->mc_ovpg == mp) - mc->mc_ovpg = NULL; -#endif mc->mc_db->md_overflow_pages -= ovpages; return 0; } @@ -6325,12 +5753,6 @@ mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data) pgno_t pgno; int rc; -#ifdef MDB_VL32 - if (mc->mc_ovpg) { - MDB_PAGE_UNREF(mc->mc_txn, mc->mc_ovpg); - mc->mc_ovpg = 0; - } -#endif if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) { data->mv_size = NODEDSZ(leaf); data->mv_data = NODEDATA(leaf); @@ -6342,13 +5764,10 @@ mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data) data->mv_size = NODEDSZ(leaf); memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); if ((rc = mdb_page_get(mc, pgno, &omp, NULL)) != 0) { - DPRINTF(("read overflow page %"Y"u failed", pgno)); + DPRINTF(("read overflow page %"Z"u failed", pgno)); return rc; } data->mv_data = METADATA(omp); -#ifdef MDB_VL32 - mc->mc_ovpg = omp; -#endif return MDB_SUCCESS; } @@ -6359,7 +5778,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, { MDB_cursor mc; MDB_xcursor mx; - int exact = 0, rc; + int exact = 0; DKBUF; DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key))); @@ -6371,16 +5790,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, return MDB_BAD_TXN; mdb_cursor_init(&mc, txn, dbi, &mx); - rc = mdb_cursor_set(&mc, key, data, MDB_SET, &exact); -#ifdef MDB_VL32 - { - /* unref all the pages - caller must copy the data - * before doing anything else - */ - mdb_cursor_unref(&mc); - } -#endif - return rc; + return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); } /** Find a sibling for a page. @@ -6397,19 +5807,13 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) int rc; MDB_node *indx; MDB_page *mp; -#ifdef MDB_VL32 - MDB_page *op; -#endif if (mc->mc_snum < 2) { return MDB_NOTFOUND; /* root has no siblings */ } -#ifdef MDB_VL32 - op = mc->mc_pg[mc->mc_top]; -#endif mdb_cursor_pop(mc); - DPRINTF(("parent page is page %"Y"u, index %u", + DPRINTF(("parent page is page %"Z"u, index %u", mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top])); if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top])) @@ -6432,8 +5836,6 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) } mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top])); - MDB_PAGE_UNREF(mc->mc_txn, op); - indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if ((rc = mdb_page_get(mc, NODEPGNO(indx), &mp, NULL)) != 0) { /* mc will be inconsistent if caller does mc_snum++ as above */ @@ -6456,14 +5858,20 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) MDB_node *leaf; int rc; - if (mc->mc_flags & C_EOF) { + if ((mc->mc_flags & C_DEL && op == MDB_NEXT_DUP)) return MDB_NOTFOUND; - } - mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); + if (!(mc->mc_flags & C_INITIALIZED)) + return mdb_cursor_first(mc, key, data); mp = mc->mc_pg[mc->mc_top]; + if (mc->mc_flags & C_EOF) { + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mp)-1) + return MDB_NOTFOUND; + mc->mc_flags ^= C_EOF; + } + if (mc->mc_db->md_flags & MDB_DUPSORT) { leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -6475,13 +5883,6 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) return rc; } } -#ifdef MDB_VL32 - else { - if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); - } - } -#endif } else { mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); if (op == MDB_NEXT_DUP) @@ -6489,7 +5890,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) } } - DPRINTF(("cursor_next: top page is %"Y"u in cursor %p", + DPRINTF(("cursor_next: top page is %"Z"u in cursor %p", mdb_dbg_pgno(mp), (void *) mc)); if (mc->mc_flags & C_DEL) { mc->mc_flags ^= C_DEL; @@ -6503,12 +5904,12 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) return rc; } mp = mc->mc_pg[mc->mc_top]; - DPRINTF(("next page is %"Y"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); + DPRINTF(("next page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); } else mc->mc_ki[mc->mc_top]++; skip: - DPRINTF(("==> cursor points to page %"Y"u with %u keys, key index %u", + DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); if (IS_LEAF2(mp)) { @@ -6546,7 +5947,12 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) MDB_node *leaf; int rc; - mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); + if (!(mc->mc_flags & C_INITIALIZED)) { + rc = mdb_cursor_last(mc, key, data); + if (rc) + return rc; + mc->mc_ki[mc->mc_top]++; + } mp = mc->mc_pg[mc->mc_top]; @@ -6563,13 +5969,6 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) return rc; } } -#ifdef MDB_VL32 - else { - if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); - } - } -#endif } else { mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); if (op == MDB_PREV_DUP) @@ -6577,7 +5976,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) } } - DPRINTF(("cursor_prev: top page is %"Y"u in cursor %p", + DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p", mdb_dbg_pgno(mp), (void *) mc)); mc->mc_flags &= ~(C_EOF|C_DEL); @@ -6589,13 +5988,11 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) } mp = mc->mc_pg[mc->mc_top]; mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1; - DPRINTF(("prev page is %"Y"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); + DPRINTF(("prev page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); } else mc->mc_ki[mc->mc_top]--; - mc->mc_flags &= ~C_EOF; - - DPRINTF(("==> cursor points to page %"Y"u with %u keys, key index %u", + DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); if (IS_LEAF2(mp)) { @@ -6638,14 +6035,8 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (key->mv_size == 0) return MDB_BAD_VALSIZE; - if (mc->mc_xcursor) { -#ifdef MDB_VL32 - if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); - } -#endif + if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - } /* See if we're already on the right page */ if (mc->mc_flags & C_INITIALIZED) { @@ -6711,6 +6102,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, } } rc = 0; + mc->mc_flags &= ~C_EOF; goto set2; } } @@ -6802,8 +6194,8 @@ set1: if ((rc = mdb_node_read(mc, leaf, &olddata)) != MDB_SUCCESS) return rc; dcmp = mc->mc_dbx->md_dcmp; -#if UINT_MAX < SIZE_MAX || defined(MDB_VL32) - if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(mdb_size_t)) +#if UINT_MAX < SIZE_MAX + if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t)) dcmp = mdb_cmp_clong; #endif rc = dcmp(data, &olddata); @@ -6837,14 +6229,8 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) int rc; MDB_node *leaf; - if (mc->mc_xcursor) { -#ifdef MDB_VL32 - if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); - } -#endif + if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - } if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { rc = mdb_page_search(mc, NULL, MDB_PS_FIRST); @@ -6887,25 +6273,16 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) int rc; MDB_node *leaf; - if (mc->mc_xcursor) { -#ifdef MDB_VL32 - if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); - } -#endif + if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + rc = mdb_page_search(mc, NULL, MDB_PS_LAST); + if (rc != MDB_SUCCESS) + return rc; } + mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); - if (!(mc->mc_flags & C_EOF)) { - - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - rc = mdb_page_search(mc, NULL, MDB_PS_LAST); - if (rc != MDB_SUCCESS) - return rc; - } - mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); - - } mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; mc->mc_flags |= C_INITIALIZED|C_EOF; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); @@ -7019,10 +6396,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, rc = MDB_INCOMPATIBLE; break; } - if (!(mc->mc_flags & C_INITIALIZED)) - rc = mdb_cursor_first(mc, key, data); - else - rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP); + rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP); if (rc == MDB_SUCCESS) { if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { MDB_cursor *mx; @@ -7064,21 +6438,11 @@ fetchm: case MDB_NEXT: case MDB_NEXT_DUP: case MDB_NEXT_NODUP: - if (!(mc->mc_flags & C_INITIALIZED)) - rc = mdb_cursor_first(mc, key, data); - else - rc = mdb_cursor_next(mc, key, data, op); + rc = mdb_cursor_next(mc, key, data, op); break; case MDB_PREV: case MDB_PREV_DUP: case MDB_PREV_NODUP: - if (!(mc->mc_flags & C_INITIALIZED)) { - rc = mdb_cursor_last(mc, key, data); - if (rc) - break; - mc->mc_flags |= C_INITIALIZED; - mc->mc_ki[mc->mc_top]++; - } rc = mdb_cursor_prev(mc, key, data, op); break; case MDB_FIRST: @@ -7095,6 +6459,11 @@ fetchm: rc = MDB_INCOMPATIBLE; break; } + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) { + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]); + rc = MDB_NOTFOUND; + break; + } { MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -7136,7 +6505,8 @@ mdb_cursor_touch(MDB_cursor *mc) { int rc = MDB_SUCCESS; - if (mc->mc_dbi >= CORE_DBS && !(*mc->mc_dbflag & DB_DIRTY)) { + if (mc->mc_dbi >= CORE_DBS && !(*mc->mc_dbflag & (DB_DIRTY|DB_DUPDATA))) { + /* Touch DB record of named DB */ MDB_cursor mc2; MDB_xcursor mcx; if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi)) @@ -7361,8 +6731,8 @@ more: if (flags == MDB_CURRENT) goto current; dcmp = mc->mc_dbx->md_dcmp; -#if UINT_MAX < SIZE_MAX || defined(MDB_VL32) - if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(mdb_size_t)) +#if UINT_MAX < SIZE_MAX + if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t)) dcmp = mdb_cmp_clong; #endif /* does data match? */ @@ -7460,8 +6830,9 @@ prep_subDB: } else { memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE, olddata.mv_size - fp->mp_upper - PAGEBASE); + memcpy((char *)(&mp->mp_ptrs), (char *)(&fp->mp_ptrs), NUMKEYS(fp) * sizeof(mp->mp_ptrs[0])); for (i=0; imp_ptrs[i] = fp->mp_ptrs[i] + offset; + mp->mp_ptrs[i] += offset; } } @@ -7515,8 +6886,13 @@ current: /* Note - this page is already counted in parent's dirty_room */ rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2); mdb_cassert(mc, rc2 == 0); + /* Currently we make the page look as with put() in the + * parent txn, in case the user peeks at MDB_RESERVEd + * or unused parts. Some users treat ovpages specially. + */ if (!(flags & MDB_RESERVE)) { - /* Copy end of page, adjusting alignment so + /* Skip the part where LMDB will put *data. + * Copy end of page, adjusting alignment so * compiler may copy words instead of bytes. */ off = (PAGEHDRSZ + data->mv_size) & -sizeof(size_t); @@ -7585,11 +6961,7 @@ new_sub: if (m3->mc_ki[i] >= mc->mc_ki[i] && insert_key) { m3->mc_ki[i]++; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { - MDB_node *n2 = NODEPTR(mp, m3->mc_ki[i]); - if ((n2->mn_flags & (F_SUBDATA|F_DUPDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); - } + XCURSOR_REFRESH(m3, i, mp); } } } @@ -7602,7 +6974,7 @@ new_sub: */ if (do_sub) { int xflags, new_dupdata; - mdb_size_t ecount; + size_t ecount; put_sub: xdata.mv_size = 0; xdata.mv_data = ""; @@ -7631,7 +7003,6 @@ put_sub: MDB_xcursor *mx = mc->mc_xcursor; unsigned i = mc->mc_top; MDB_page *mp = mc->mc_pg[i]; - int nkeys = NUMKEYS(mp); for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; @@ -7639,10 +7010,8 @@ put_sub: if (m2->mc_pg[i] == mp) { if (m2->mc_ki[i] == mc->mc_ki[i]) { mdb_xcursor_init2(m2, mx, new_dupdata); - } else if (!insert_key && m2->mc_ki[i] < nkeys) { - MDB_node *n2 = NODEPTR(mp, m2->mc_ki[i]); - if ((n2->mn_flags & (F_SUBDATA|F_DUPDATA)) == F_DUPDATA) - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); + } else if (!insert_key) { + XCURSOR_REFRESH(m2, i, mp); } } } @@ -7747,13 +7116,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue; if (m2->mc_pg[mc->mc_top] == mp) { - if (m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) { - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); - } else { - MDB_node *n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]); - if (!(n2->mn_flags & F_SUBDATA)) - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); - } + XCURSOR_REFRESH(m2, mc->mc_top, mp); } } } @@ -7798,6 +7161,7 @@ fail: } /** Allocate and initialize new pages for a database. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc a cursor on the database being added to. * @param[in] flags flags defining what type of page is being allocated. * @param[in] num the number of pages to allocate. This is usually 1, @@ -7813,7 +7177,7 @@ mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) if ((rc = mdb_page_alloc(mc, num, &np))) return rc; - DPRINTF(("allocated new mpage %"Y"u, page size %u", + DPRINTF(("allocated new mpage %"Z"u, page size %u", np->mp_pgno, mc->mc_txn->mt_env->me_psize)); np->mp_flags = flags | P_DIRTY; np->mp_lower = (PAGEHDRSZ-PAGEBASE); @@ -7883,6 +7247,7 @@ mdb_branch_size(MDB_env *env, MDB_val *key) } /** Add a node to the page pointed to by the cursor. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc The cursor for this operation. * @param[in] indx The index on the page where the new node should be added. * @param[in] key The key for the new node. @@ -7913,7 +7278,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, mdb_cassert(mc, mp->mp_upper >= mp->mp_lower); - DPRINTF(("add to %s %spage %"Y"u index %i, data size %"Z"u key size %"Z"u [%s]", + DPRINTF(("add to %s %spage %"Z"u index %i, data size %"Z"u key size %"Z"u [%s]", IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "", mdb_dbg_pgno(mp), indx, data ? data->mv_size : 0, @@ -7954,7 +7319,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, goto full; if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp))) return rc; - DPRINTF(("allocated overflow page %"Y"u", ofp->mp_pgno)); + DPRINTF(("allocated overflow page %"Z"u", ofp->mp_pgno)); flags |= F_BIGDATA; goto update; } else { @@ -8011,7 +7376,7 @@ update: return MDB_SUCCESS; full: - DPRINTF(("not enough room in page %"Y"u, got %u ptrs", + DPRINTF(("not enough room in page %"Z"u, got %u ptrs", mdb_dbg_pgno(mp), NUMKEYS(mp))); DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room)); DPRINTF(("node size = %"Z"u", node_size)); @@ -8034,7 +7399,7 @@ mdb_node_del(MDB_cursor *mc, int ksize) MDB_node *node; char *base; - DPRINTF(("delete node %u on %s page %"Y"u", indx, + DPRINTF(("delete node %u on %s page %"Z"u", indx, IS_LEAF(mp) ? "leaf" : "branch", mdb_dbg_pgno(mp))); numkeys = NUMKEYS(mp); mdb_cassert(mc, indx < numkeys); @@ -8143,10 +7508,7 @@ mdb_xcursor_init0(MDB_cursor *mc) mx->mx_cursor.mc_dbflag = &mx->mx_dbflag; mx->mx_cursor.mc_snum = 0; mx->mx_cursor.mc_top = 0; -#ifdef MDB_VL32 - mx->mx_cursor.mc_ovpg = 0; -#endif - mx->mx_cursor.mc_flags = C_SUB | (mc->mc_flags & (C_ORIG_RDONLY|C_WRITEMAP)); + mx->mx_cursor.mc_flags = C_SUB; mx->mx_dbx.md_name.mv_size = 0; mx->mx_dbx.md_name.mv_data = NULL; mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; @@ -8165,12 +7527,12 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) { MDB_xcursor *mx = mc->mc_xcursor; - mx->mx_cursor.mc_flags &= C_SUB|C_ORIG_RDONLY|C_WRITEMAP; if (node->mn_flags & F_SUBDATA) { memcpy(&mx->mx_db, NODEDATA(node), sizeof(MDB_db)); mx->mx_cursor.mc_pg[0] = 0; mx->mx_cursor.mc_snum = 0; mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_flags = C_SUB; } else { MDB_page *fp = NODEDATA(node); mx->mx_db.md_pad = 0; @@ -8183,7 +7545,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) COPY_PGNO(mx->mx_db.md_root, fp->mp_pgno); mx->mx_cursor.mc_snum = 1; mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_flags |= C_INITIALIZED; + mx->mx_cursor.mc_flags = C_INITIALIZED|C_SUB; mx->mx_cursor.mc_pg[0] = fp; mx->mx_cursor.mc_ki[0] = 0; if (mc->mc_db->md_flags & MDB_DUPFIXED) { @@ -8193,11 +7555,11 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) mx->mx_db.md_flags |= MDB_INTEGERKEY; } } - DPRINTF(("Sub-db -%u root page %"Y"u", mx->mx_cursor.mc_dbi, + DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi, mx->mx_db.md_root)); - mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ -#if UINT_MAX < SIZE_MAX || defined(MDB_VL32) - if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(mdb_size_t)) + mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DUPDATA; +#if UINT_MAX < SIZE_MAX + if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) mx->mx_dbx.md_cmp = mdb_cmp_clong; #endif } @@ -8221,7 +7583,7 @@ mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata) mx->mx_cursor.mc_top = 0; mx->mx_cursor.mc_flags |= C_INITIALIZED; mx->mx_cursor.mc_ki[0] = 0; - mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ + mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DUPDATA; #if UINT_MAX < SIZE_MAX mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp; #endif @@ -8230,7 +7592,7 @@ mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata) } mx->mx_db = src_mx->mx_db; mx->mx_cursor.mc_pg[0] = src_mx->mx_cursor.mc_pg[0]; - DPRINTF(("Sub-db -%u root page %"Y"u", mx->mx_cursor.mc_dbi, + DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi, mx->mx_db.md_root)); } @@ -8249,10 +7611,7 @@ mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) mc->mc_top = 0; mc->mc_pg[0] = 0; mc->mc_ki[0] = 0; -#ifdef MDB_VL32 - mc->mc_ovpg = 0; -#endif - mc->mc_flags = txn->mt_flags & (C_ORIG_RDONLY|C_WRITEMAP); + mc->mc_flags = 0; if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { mdb_tassert(txn, mx != NULL); mc->mc_xcursor = mx; @@ -8317,7 +7676,7 @@ mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) /* Return the count of duplicate data items for the current key */ int -mdb_cursor_count(MDB_cursor *mc, mdb_size_t *countp) +mdb_cursor_count(MDB_cursor *mc, size_t *countp) { MDB_node *leaf; @@ -8333,9 +7692,15 @@ mdb_cursor_count(MDB_cursor *mc, mdb_size_t *countp) if (!(mc->mc_flags & C_INITIALIZED)) return EINVAL; - if (!mc->mc_snum || (mc->mc_flags & C_EOF)) + if (!mc->mc_snum) return MDB_NOTFOUND; + if (mc->mc_flags & C_EOF) { + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) + return MDB_NOTFOUND; + mc->mc_flags ^= C_EOF; + } + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { *countp = 1; @@ -8351,11 +7716,6 @@ mdb_cursor_count(MDB_cursor *mc, mdb_size_t *countp) void mdb_cursor_close(MDB_cursor *mc) { -#ifdef MDB_VL32 - if (mc) { - mdb_cursor_unref(mc); - } -#endif if (mc && !mc->mc_backup) { /* remove from txn, if tracked */ if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { @@ -8382,6 +7742,7 @@ mdb_cursor_dbi(MDB_cursor *mc) } /** Replace the key for a branch node with a new key. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc Cursor pointing to the node to operate on. * @param[in] key The new key to use. * @return 0 on success, non-zero on failure. @@ -8407,7 +7768,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key) char kbuf2[DKBUF_MAXKEYSIZE*2+1]; k2.mv_data = NODEKEY(node); k2.mv_size = node->mn_ksize; - DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Y"u", + DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Z"u", indx, ptr, mdb_dkey(&k2, kbuf2), DKEY(key), @@ -8555,7 +7916,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) return rc; } - DPRINTF(("moving %s node %u [%s] on page %"Y"u to node %u on page %"Y"u", + DPRINTF(("moving %s node %u [%s] on page %"Z"u to node %u on page %"Z"u", IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch", csrc->mc_ki[csrc->mc_top], DKEY(&key), @@ -8601,12 +7962,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; m3->mc_ki[csrc->mc_top-1]++; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) && - IS_LEAF(mps)) { - MDB_node *node = NODEPTR(m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + if (IS_LEAF(mps)) + XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]); } } else /* Adding on the right, bump others down */ @@ -8627,12 +7984,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) } else { m3->mc_ki[csrc->mc_top]--; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) && - IS_LEAF(mps)) { - MDB_node *node = NODEPTR(m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + if (IS_LEAF(mps)) + XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]); } } } @@ -8649,7 +8002,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) key.mv_size = NODEKSZ(srcnode); key.mv_data = NODEKEY(srcnode); } - DPRINTF(("update separator for source page %"Y"u to [%s]", + DPRINTF(("update separator for source page %"Z"u to [%s]", csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key))); mdb_cursor_copy(csrc, &mn); mn.mc_snum--; @@ -8680,7 +8033,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) key.mv_size = NODEKSZ(srcnode); key.mv_data = NODEKEY(srcnode); } - DPRINTF(("update separator for destination page %"Y"u to [%s]", + DPRINTF(("update separator for destination page %"Z"u to [%s]", cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key))); mdb_cursor_copy(cdst, &mn); mn.mc_snum--; @@ -8726,7 +8079,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) psrc = csrc->mc_pg[csrc->mc_top]; pdst = cdst->mc_pg[cdst->mc_top]; - DPRINTF(("merging page %"Y"u into %"Y"u", psrc->mp_pgno, pdst->mp_pgno)); + DPRINTF(("merging page %"Z"u into %"Z"u", psrc->mp_pgno, pdst->mp_pgno)); mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */ mdb_cassert(csrc, cdst->mc_snum > 1); @@ -8783,7 +8136,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) } } - DPRINTF(("dst page %"Y"u now has %u keys (%.1f%% filled)", + DPRINTF(("dst page %"Z"u now has %u keys (%.1f%% filled)", pdst->mp_pgno, NUMKEYS(pdst), (float)PAGEFILL(cdst->mc_txn->mt_env, pdst) / 10)); @@ -8833,12 +8186,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) m3->mc_ki[top-1] > csrc->mc_ki[top-1]) { m3->mc_ki[top-1]--; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) && - IS_LEAF(psrc)) { - MDB_node *node = NODEPTR(m3->mc_pg[top], m3->mc_ki[top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + if (IS_LEAF(psrc)) + XCURSOR_REFRESH(m3, top, m3->mc_pg[top]); } } { @@ -8871,9 +8220,6 @@ mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst) cdst->mc_snum = csrc->mc_snum; cdst->mc_top = csrc->mc_top; cdst->mc_flags = csrc->mc_flags; -#ifdef MDB_VL32 - cdst->mc_ovpg = csrc->mc_ovpg; -#endif for (i=0; imc_snum; i++) { cdst->mc_pg[i] = csrc->mc_pg[i]; @@ -8902,14 +8248,14 @@ mdb_rebalance(MDB_cursor *mc) minkeys = 1; thresh = FILL_THRESHOLD; } - DPRINTF(("rebalancing %s page %"Y"u (has %u keys, %.1f%% full)", + DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)", IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch", mdb_dbg_pgno(mc->mc_pg[mc->mc_top]), NUMKEYS(mc->mc_pg[mc->mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10)); if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= thresh && NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) { - DPRINTF(("no need to rebalance page %"Y"u, above fill threshold", + DPRINTF(("no need to rebalance page %"Z"u, above fill threshold", mdb_dbg_pgno(mc->mc_pg[mc->mc_top]))); return MDB_SUCCESS; } @@ -9038,7 +8384,7 @@ mdb_rebalance(MDB_cursor *mc) fromleft = 1; } - DPRINTF(("found neighbor page %"Y"u (%u keys, %.1f%% full)", + DPRINTF(("found neighbor page %"Z"u (%u keys, %.1f%% full)", mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10)); @@ -9095,16 +8441,15 @@ mdb_cursor_del0(MDB_cursor *mc) if (m3->mc_pg[mc->mc_top] == mp) { if (m3->mc_ki[mc->mc_top] == ki) { m3->mc_flags |= C_DEL; - if (mc->mc_db->md_flags & MDB_DUPSORT) - m3->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED; + if (mc->mc_db->md_flags & MDB_DUPSORT) { + /* Sub-cursor referred into dataset which is gone */ + m3->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + } + continue; } else if (m3->mc_ki[mc->mc_top] > ki) { m3->mc_ki[mc->mc_top]--; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { - MDB_node *node = NODEPTR(m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + XCURSOR_REFRESH(m3, mc->mc_top, mp); } } } @@ -9130,11 +8475,32 @@ mdb_cursor_del0(MDB_cursor *mc) continue; if (m3->mc_pg[mc->mc_top] == mp) { /* if m3 points past last node in page, find next sibling */ - if (m3->mc_ki[mc->mc_top] >= nkeys) { - rc = mdb_cursor_sibling(m3, 1); - if (rc == MDB_NOTFOUND) { - m3->mc_flags |= C_EOF; - rc = MDB_SUCCESS; + if (m3->mc_ki[mc->mc_top] >= mc->mc_ki[mc->mc_top]) { + if (m3->mc_ki[mc->mc_top] >= nkeys) { + rc = mdb_cursor_sibling(m3, 1); + if (rc == MDB_NOTFOUND) { + m3->mc_flags |= C_EOF; + rc = MDB_SUCCESS; + continue; + } + } + if (mc->mc_db->md_flags & MDB_DUPSORT) { + MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]); + /* If this node has dupdata, it may need to be reinited + * because its data has moved. + * If the xcursor was not initd it must be reinited. + * Else if node points to a subDB, nothing is needed. + * Else (xcursor was initd, not a subDB) needs mc_pg[0] reset. + */ + if (node->mn_flags & F_DUPDATA) { + if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + if (!(node->mn_flags & F_SUBDATA)) + m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); + } else { + mdb_xcursor_init1(m3, node); + m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL; + } + } } } } @@ -9209,6 +8575,7 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi, } /** Split a page and insert a new node. + * Set #MDB_TXN_ERROR on failure. * @param[in,out] mc Cursor pointing to the page and desired insertion index. * The cursor will be updated to point to the actual page and index where * the node got inserted after the split. @@ -9240,7 +8607,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno newindx = mc->mc_ki[mc->mc_top]; nkeys = NUMKEYS(mp); - DPRINTF(("-----> splitting %s page %"Y"u and adding [%s] at index %i/%i", + DPRINTF(("-----> splitting %s page %"Z"u and adding [%s] at index %i/%i", IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, DKEY(newkey), mc->mc_ki[mc->mc_top], nkeys)); @@ -9248,7 +8615,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp))) return rc; rp->mp_pad = mp->mp_pad; - DPRINTF(("new right sibling: page %"Y"u", rp->mp_pgno)); + DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno)); /* Usually when splitting the root page, the cursor * height is 1. But when called from mdb_update_key, @@ -9266,7 +8633,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno mc->mc_pg[0] = pp; mc->mc_ki[0] = 0; mc->mc_db->md_root = pp->mp_pgno; - DPRINTF(("root split! new root = %"Y"u", pp->mp_pgno)); + DPRINTF(("root split! new root = %"Z"u", pp->mp_pgno)); new_root = mc->mc_db->md_depth++; /* Add left (implicit) pointer. */ @@ -9283,7 +8650,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno ptop = 0; } else { ptop = mc->mc_top-1; - DPRINTF(("parent branch page is %"Y"u", mc->mc_pg[ptop]->mp_pgno)); + DPRINTF(("parent branch page is %"Z"u", mc->mc_pg[ptop]->mp_pgno)); } mdb_cursor_copy(mc, &mn); @@ -9382,7 +8749,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno * the split so the new page is emptier than the old page. * This yields better packing during sequential inserts. */ - if (nkeys < 20 || nsize > pmax/16 || newindx >= nkeys) { + if (nkeys < 32 || nsize > pmax/16 || newindx >= nkeys) { /* Find split point */ psize = 0; if (newindx <= split_indx || newindx >= nkeys) { @@ -9618,12 +8985,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { m3->mc_ki[ptop]++; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) && - IS_LEAF(mp)) { - MDB_node *node = NODEPTR(m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + if (IS_LEAF(mp)) + XCURSOR_REFRESH(m3, mc->mc_top, m3->mc_pg[mc->mc_top]); } } DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp))); @@ -9664,23 +9027,26 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, #ifndef MDB_WBUF #define MDB_WBUF (1024*1024) #endif +#define MDB_EOF 0x10 /**< #mdb_env_copyfd1() is done reading */ - /** State needed for a compacting copy. */ + /** State needed for a double-buffering compacting copy. */ typedef struct mdb_copy { - pthread_mutex_t mc_mutex; - pthread_cond_t mc_cond; - char *mc_wbuf[2]; - char *mc_over[2]; MDB_env *mc_env; MDB_txn *mc_txn; + pthread_mutex_t mc_mutex; + pthread_cond_t mc_cond; /**< Condition variable for #mc_new */ + char *mc_wbuf[2]; + char *mc_over[2]; int mc_wlen[2]; int mc_olen[2]; pgno_t mc_next_pgno; HANDLE mc_fd; - int mc_status; - volatile int mc_new; - int mc_toggle; - + int mc_toggle; /**< Buffer number in provider */ + int mc_new; /**< (0-2 buffers to write) | (#MDB_EOF at end) */ + /** Error code. Never cleared if set. Both threads can set nonzero + * to fail the copy. Not mutex-protected, LMDB expects atomic int. + */ + volatile int mc_error; } mdb_copy; /** Dedicated writer thread for compacting copy. */ @@ -9696,26 +9062,38 @@ mdb_env_copythr(void *arg) #else int len; #define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#ifdef SIGPIPE + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGPIPE); + if ((rc = pthread_sigmask(SIG_BLOCK, &set, NULL)) != 0) + my->mc_error = rc; +#endif #endif pthread_mutex_lock(&my->mc_mutex); - my->mc_new = 0; - pthread_cond_signal(&my->mc_cond); for(;;) { while (!my->mc_new) pthread_cond_wait(&my->mc_cond, &my->mc_mutex); - if (my->mc_new < 0) { - my->mc_new = 0; + if (my->mc_new == 0 + MDB_EOF) /* 0 buffers, just EOF */ break; - } - my->mc_new = 0; wsize = my->mc_wlen[toggle]; ptr = my->mc_wbuf[toggle]; again: - while (wsize > 0) { + rc = MDB_SUCCESS; + while (wsize > 0 && !my->mc_error) { DO_WRITE(rc, my->mc_fd, ptr, wsize, len); if (!rc) { rc = ErrCode(); +#if defined(SIGPIPE) && !defined(_WIN32) + if (rc == EPIPE) { + /* Collect the pending SIGPIPE, otherwise at least OS X + * gives it to the process on thread-exit (ITS#8504). + */ + int tmp; + sigwait(&set, &tmp); + } +#endif break; } else if (len > 0) { rc = MDB_SUCCESS; @@ -9728,8 +9106,7 @@ again: } } if (rc) { - my->mc_status = rc; - break; + my->mc_error = rc; } /* If there's an overflow page tail, write it too */ if (my->mc_olen[toggle]) { @@ -9740,38 +9117,45 @@ again: } my->mc_wlen[toggle] = 0; toggle ^= 1; + /* Return the empty buffer to provider */ + my->mc_new--; pthread_cond_signal(&my->mc_cond); } - pthread_cond_signal(&my->mc_cond); pthread_mutex_unlock(&my->mc_mutex); return (THREAD_RET)0; #undef DO_WRITE } - /** Tell the writer thread there's a buffer ready to write */ + /** Give buffer and/or #MDB_EOF to writer thread, await unused buffer. + * + * @param[in] my control structure. + * @param[in] adjust (1 to hand off 1 buffer) | (MDB_EOF when ending). + */ static int ESECT -mdb_env_cthr_toggle(mdb_copy *my, int st) +mdb_env_cthr_toggle(mdb_copy *my, int adjust) { - int toggle = my->mc_toggle ^ 1; pthread_mutex_lock(&my->mc_mutex); - if (my->mc_status) { - pthread_mutex_unlock(&my->mc_mutex); - return my->mc_status; - } - while (my->mc_new == 1) - pthread_cond_wait(&my->mc_cond, &my->mc_mutex); - my->mc_new = st; - my->mc_toggle = toggle; + my->mc_new += adjust; pthread_cond_signal(&my->mc_cond); + while (my->mc_new & 2) /* both buffers in use */ + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); pthread_mutex_unlock(&my->mc_mutex); - return 0; + + my->mc_toggle ^= (adjust & 1); + /* Both threads reset mc_wlen, to be safe from threading errors */ + my->mc_wlen[my->mc_toggle] = 0; + return my->mc_error; } - /** Depth-first tree traversal for compacting copy. */ + /** Depth-first tree traversal for compacting copy. + * @param[in] my control structure. + * @param[in,out] pg database root. + * @param[in] flags includes #F_DUPDATA if it is a sorted-duplicate sub-DB. + */ static int ESECT mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) { - MDB_cursor mc; + MDB_cursor mc = {0}; MDB_node *ni; MDB_page *mo, *mp, *leaf; char *buf, *ptr; @@ -9783,7 +9167,6 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) return MDB_SUCCESS; mc.mc_snum = 1; - mc.mc_top = 0; mc.mc_txn = my->mc_txn; rc = mdb_page_get(&mc, *pg, &mc.mc_pg[0], NULL); @@ -9830,6 +9213,7 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) } memcpy(&pg, NODEDATA(ni), sizeof(pg)); + memcpy(NODEDATA(ni), &my->mc_next_pgno, sizeof(pgno_t)); rc = mdb_page_get(&mc, pg, &omp, NULL); if (rc) goto done; @@ -9852,7 +9236,6 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) goto done; toggle = my->mc_toggle; } - memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t)); } else if (ni->mn_flags & F_SUBDATA) { MDB_db db; @@ -9930,47 +9313,56 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) { MDB_meta *mm; MDB_page *mp; - mdb_copy my; + mdb_copy my = {0}; MDB_txn *txn = NULL; pthread_t thr; - int rc; + pgno_t root, new_root; + int rc = MDB_SUCCESS; #ifdef _WIN32 - my.mc_mutex = CreateMutex(NULL, FALSE, NULL); - my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL); + if (!(my.mc_mutex = CreateMutex(NULL, FALSE, NULL)) || + !(my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL))) { + rc = ErrCode(); + goto done; + } my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_os_psize); - if (my.mc_wbuf[0] == NULL) - return errno; + if (my.mc_wbuf[0] == NULL) { + /* _aligned_malloc() sets errno, but we use Windows error codes */ + rc = ERROR_NOT_ENOUGH_MEMORY; + goto done; + } #else - pthread_mutex_init(&my.mc_mutex, NULL); - pthread_cond_init(&my.mc_cond, NULL); + if ((rc = pthread_mutex_init(&my.mc_mutex, NULL)) != 0) + return rc; + if ((rc = pthread_cond_init(&my.mc_cond, NULL)) != 0) + goto done2; #ifdef HAVE_MEMALIGN my.mc_wbuf[0] = memalign(env->me_os_psize, MDB_WBUF*2); - if (my.mc_wbuf[0] == NULL) - return errno; + if (my.mc_wbuf[0] == NULL) { + rc = errno; + goto done; + } #else - rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_os_psize, MDB_WBUF*2); - if (rc) - return rc; + { + void *p; + if ((rc = posix_memalign(&p, env->me_os_psize, MDB_WBUF*2)) != 0) + goto done; + my.mc_wbuf[0] = p; + } #endif #endif memset(my.mc_wbuf[0], 0, MDB_WBUF*2); my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF; - my.mc_wlen[0] = 0; - my.mc_wlen[1] = 0; - my.mc_olen[0] = 0; - my.mc_olen[1] = 0; my.mc_next_pgno = NUM_METAS; - my.mc_status = 0; - my.mc_new = 1; - my.mc_toggle = 0; my.mc_env = env; my.mc_fd = fd; - THREAD_CREATE(thr, mdb_env_copythr, &my); + rc = THREAD_CREATE(thr, mdb_env_copythr, &my); + if (rc) + goto done; rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) - return rc; + goto finish; mp = (MDB_page *)my.mc_wbuf[0]; memset(mp, 0, NUM_METAS * env->me_psize); @@ -9986,57 +9378,64 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) *(MDB_meta *)METADATA(mp) = *mm; mm = (MDB_meta *)METADATA(mp); - /* Count the number of free pages, subtract from lastpg to find - * number of active pages - */ - { + /* Set metapage 1 with current main DB */ + root = new_root = txn->mt_dbs[MAIN_DBI].md_root; + if (root != P_INVALID) { + /* Count free pages + freeDB pages. Subtract from last_pg + * to find the new last_pg, which also becomes the new root. + */ MDB_ID freecount = 0; MDB_cursor mc; MDB_val key, data; mdb_cursor_init(&mc, txn, FREE_DBI, NULL); while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) freecount += *(MDB_ID *)data.mv_data; + if (rc != MDB_NOTFOUND) + goto finish; freecount += txn->mt_dbs[FREE_DBI].md_branch_pages + txn->mt_dbs[FREE_DBI].md_leaf_pages + txn->mt_dbs[FREE_DBI].md_overflow_pages; - /* Set metapage 1 */ - mm->mm_last_pg = txn->mt_next_pgno - freecount - 1; + new_root = txn->mt_next_pgno - 1 - freecount; + mm->mm_last_pg = new_root; mm->mm_dbs[MAIN_DBI] = txn->mt_dbs[MAIN_DBI]; - if (mm->mm_last_pg > NUM_METAS-1) { - mm->mm_dbs[MAIN_DBI].md_root = mm->mm_last_pg; - mm->mm_txnid = 1; - } else { - mm->mm_dbs[MAIN_DBI].md_root = P_INVALID; - } + mm->mm_dbs[MAIN_DBI].md_root = new_root; + } else { + /* When the DB is empty, handle it specially to + * fix any breakage like page leaks from ITS#8174. + */ + mm->mm_dbs[MAIN_DBI].md_flags = txn->mt_dbs[MAIN_DBI].md_flags; } + if (root != P_INVALID || mm->mm_dbs[MAIN_DBI].md_flags) { + mm->mm_txnid = 1; /* use metapage 1 */ + } + my.mc_wlen[0] = env->me_psize * NUM_METAS; my.mc_txn = txn; - pthread_mutex_lock(&my.mc_mutex); - while(my.mc_new) - pthread_cond_wait(&my.mc_cond, &my.mc_mutex); - pthread_mutex_unlock(&my.mc_mutex); - rc = mdb_env_cwalk(&my, &txn->mt_dbs[MAIN_DBI].md_root, 0); - if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) - rc = mdb_env_cthr_toggle(&my, 1); - mdb_env_cthr_toggle(&my, -1); - pthread_mutex_lock(&my.mc_mutex); - while(my.mc_new) - pthread_cond_wait(&my.mc_cond, &my.mc_mutex); - pthread_mutex_unlock(&my.mc_mutex); - THREAD_FINISH(thr); + rc = mdb_env_cwalk(&my, &root, 0); + if (rc == MDB_SUCCESS && root != new_root) { + rc = MDB_INCOMPATIBLE; /* page leak or corrupt DB */ + } +finish: + if (rc) + my.mc_error = rc; + mdb_env_cthr_toggle(&my, 1 | MDB_EOF); + rc = THREAD_FINISH(thr); mdb_txn_abort(txn); + +done: #ifdef _WIN32 - CloseHandle(my.mc_cond); - CloseHandle(my.mc_mutex); - _aligned_free(my.mc_wbuf[0]); + if (my.mc_wbuf[0]) _aligned_free(my.mc_wbuf[0]); + if (my.mc_cond) CloseHandle(my.mc_cond); + if (my.mc_mutex) CloseHandle(my.mc_mutex); #else - pthread_cond_destroy(&my.mc_cond); - pthread_mutex_destroy(&my.mc_mutex); free(my.mc_wbuf[0]); + pthread_cond_destroy(&my.mc_cond); +done2: + pthread_mutex_destroy(&my.mc_mutex); #endif - return rc; + return rc ? rc : my.mc_error; } /** Copy environment as-is. */ @@ -10046,7 +9445,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd) MDB_txn *txn = NULL; mdb_mutexref_t wmutex = NULL; int rc; - mdb_size_t wsize, w3; + size_t wsize, w3; char *ptr; #ifdef _WIN32 DWORD len, w2; @@ -10107,7 +9506,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd) w3 = txn->mt_next_pgno * env->me_psize; { - mdb_size_t fsize = 0; + size_t fsize = 0; if ((rc = mdb_fsize(env->me_fd, &fsize))) goto leave; if (w3 > fsize) @@ -10157,67 +9556,20 @@ mdb_env_copyfd(MDB_env *env, HANDLE fd) int ESECT mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) { - int rc, len; - char *lpath; + int rc; + MDB_name fname; HANDLE newfd = INVALID_HANDLE_VALUE; -#ifdef _WIN32 - wchar_t *wpath; -#endif - if (env->me_flags & MDB_NOSUBDIR) { - lpath = (char *)path; - } else { - len = strlen(path); - len += sizeof(DATANAME); - lpath = malloc(len); - if (!lpath) - return ENOMEM; - sprintf(lpath, "%s" DATANAME, path); + rc = mdb_fname_init(path, env->me_flags | MDB_NOLOCK, &fname); + if (rc == MDB_SUCCESS) { + rc = mdb_fopen(env, &fname, MDB_O_COPY, 0666, &newfd); + mdb_fname_destroy(fname); } - - /* The destination path must exist, but the destination file must not. - * We don't want the OS to cache the writes, since the source data is - * already in the OS cache. - */ -#ifdef _WIN32 - rc = utf8_to_utf16(lpath, -1, &wpath, NULL); - if (rc) - goto leave; - newfd = CreateFileW(wpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, - FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); - free(wpath); -#else - newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); -#endif - if (newfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - goto leave; - } - - if (env->me_psize >= env->me_os_psize) { -#ifdef O_DIRECT - /* Set O_DIRECT if the file system supports it */ - if ((rc = fcntl(newfd, F_GETFL)) != -1) - (void) fcntl(newfd, F_SETFL, rc | O_DIRECT); -#endif -#ifdef F_NOCACHE /* __APPLE__ */ - rc = fcntl(newfd, F_NOCACHE, 1); - if (rc) { - rc = ErrCode(); - goto leave; - } -#endif - } - - rc = mdb_env_copyfd2(env, newfd, flags); - -leave: - if (!(env->me_flags & MDB_NOSUBDIR)) - free(lpath); - if (newfd != INVALID_HANDLE_VALUE) + if (rc == MDB_SUCCESS) { + rc = mdb_env_copyfd2(env, newfd, flags); if (close(newfd) < 0 && rc == MDB_SUCCESS) rc = ErrCode(); - + } return rc; } @@ -10439,8 +9791,11 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) != F_SUBDATA) return MDB_INCOMPATIBLE; - } else if (! (rc == MDB_NOTFOUND && (flags & MDB_CREATE))) { - return rc; + } else { + if (rc != MDB_NOTFOUND || !(flags & MDB_CREATE)) + return rc; + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; } /* Done here so we cannot fail after creating a new DB */ @@ -10454,7 +9809,8 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db memset(&dummy, 0, sizeof(dummy)); dummy.md_root = P_INVALID; dummy.md_flags = flags & PERSISTENT_FLAGS; - rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); + WITH_CURSOR_TRACKING(mc, + rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA)); dbflag |= DB_DIRTY; } @@ -10554,11 +9910,6 @@ mdb_drop0(MDB_cursor *mc, int subs) mdb_cursor_pop(mc); mdb_cursor_copy(mc, &mx); -#ifdef MDB_VL32 - /* bump refcount for mx's pages */ - for (i=0; imc_snum; i++) - mdb_page_get(&mx, mc->mc_pg[i]->mp_pgno, &mx.mc_pg[i], NULL); -#endif while (mc->mc_snum > 0) { MDB_page *mp = mc->mc_pg[mc->mc_top]; unsigned n = NUMKEYS(mp); @@ -10624,10 +9975,6 @@ pop: done: if (rc) txn->mt_flags |= MDB_TXN_ERROR; -#ifdef MDB_VL32 - /* drop refcount for mx's pages */ - mdb_cursor_unref(&mx); -#endif } else if (rc == MDB_NOTFOUND) { rc = MDB_SUCCESS; } @@ -10747,7 +10094,7 @@ mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) if (mr[i].mr_pid) { txnid_t txnid = mr[i].mr_txnid; sprintf(buf, txnid == (txnid_t)-1 ? - "%10d %"Z"x -\n" : "%10d %"Z"x %"Y"u\n", + "%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n", (int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid); if (first) { first = 0; @@ -10816,7 +10163,7 @@ mdb_reader_check(MDB_env *env, int *dead) return env->me_txns ? mdb_reader_check0(env, 0, dead) : MDB_SUCCESS; } -/** As #mdb_reader_check(). rlocked = . */ +/** As #mdb_reader_check(). \b rlocked is set if caller locked #me_rmutex. */ static int ESECT mdb_reader_check0(MDB_env *env, int rlocked, int *dead) { @@ -10852,7 +10199,7 @@ mdb_reader_check0(MDB_env *env, int rlocked, int *dead) } for (; jmn_alloced = 1; + dst->mn_len = need - 1; + dst->mn_val = result; + return MDB_SUCCESS; + } } #endif /* defined(_WIN32) */ +/** @} */ diff --git a/contrib/db/liblmdb/mdb_copy.1 b/contrib/db/liblmdb/mdb_copy.1 index 401e47ab..594ff124 100644 --- a/contrib/db/liblmdb/mdb_copy.1 +++ b/contrib/db/liblmdb/mdb_copy.1 @@ -1,5 +1,5 @@ -.TH MDB_COPY 1 "2014/06/20" "LMDB 0.9.14" -.\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. +.TH MDB_COPY 1 "2014/07/01" "LMDB 0.9.14" +.\" Copyright 2012-2018 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. .SH NAME mdb_copy \- LMDB environment copy tool @@ -11,8 +11,6 @@ mdb_copy \- LMDB environment copy tool .BR \-c ] [\c .BR \-n ] -[\c -.BR \-v ] .B srcpath [\c .BR dstpath ] @@ -38,13 +36,10 @@ Write the library version number to the standard output, and exit. Compact while copying. Only current data pages will be copied; freed or unused pages will be omitted from the copy. This option will slow down the backup process as it is more CPU-intensive. +Currently it fails if the environment has suffered a page leak. .TP .BR \-n Open LDMB environment(s) which do not use subdirectories. -.TP -.BR \-v -Use the previous environment state instead of the latest state. -This may be useful if the latest state has been corrupted. .SH DIAGNOSTICS Exit status is zero if no errors occur. diff --git a/contrib/db/liblmdb/mdb_copy.c b/contrib/db/liblmdb/mdb_copy.c index 95a6e713..1b89396e 100644 --- a/contrib/db/liblmdb/mdb_copy.c +++ b/contrib/db/liblmdb/mdb_copy.c @@ -1,6 +1,6 @@ /* mdb_copy.c - memory-mapped database backup tool */ /* - * Copyright 2012-2015 Howard Chu, Symas Corp. + * Copyright 2012-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,8 +38,6 @@ int main(int argc,char * argv[]) for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) { if (argv[1][1] == 'n' && argv[1][2] == '\0') flags |= MDB_NOSUBDIR; - else if (argv[1][1] == 'v' && argv[1][2] == '\0') - flags |= MDB_PREVSNAPSHOT; else if (argv[1][1] == 'c' && argv[1][2] == '\0') cpflags |= MDB_CP_COMPACT; else if (argv[1][1] == 'V' && argv[1][2] == '\0') { @@ -50,7 +48,7 @@ int main(int argc,char * argv[]) } if (argc<2 || argc>3) { - fprintf(stderr, "usage: %s [-V] [-c] [-n] [-v] srcpath [dstpath]\n", progname); + fprintf(stderr, "usage: %s [-V] [-c] [-n] srcpath [dstpath]\n", progname); exit(EXIT_FAILURE); } diff --git a/contrib/db/liblmdb/mdb_dump.1 b/contrib/db/liblmdb/mdb_dump.1 index a25fb92e..72cf6ca8 100644 --- a/contrib/db/liblmdb/mdb_dump.1 +++ b/contrib/db/liblmdb/mdb_dump.1 @@ -1,5 +1,5 @@ -.TH MDB_DUMP 1 "2014/06/20" "LMDB 0.9.14" -.\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. +.TH MDB_DUMP 1 "2015/09/30" "LMDB 0.9.17" +.\" Copyright 2014-2018 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. .SH NAME mdb_dump \- LMDB environment export tool @@ -14,8 +14,6 @@ mdb_dump \- LMDB environment export tool [\c .BR \-n ] [\c -.BR \-v ] -[\c .BR \-p ] [\c .BR \-a \ | @@ -44,10 +42,6 @@ names will be listed, no data will be output. .BR \-n Dump an LMDB database which does not use subdirectories. .TP -.BR \-v -Use the previous environment state instead of the latest state. -This may be useful if the latest state has been corrupted. -.TP .BR \-p If characters in either the key or data items are printing characters (as defined by isprint(3)), output them directly. This option permits users to diff --git a/contrib/db/liblmdb/mdb_dump.c b/contrib/db/liblmdb/mdb_dump.c index 7a42bc0b..9df5dc0b 100644 --- a/contrib/db/liblmdb/mdb_dump.c +++ b/contrib/db/liblmdb/mdb_dump.c @@ -1,6 +1,6 @@ /* mdb_dump.c - memory-mapped database dump tool */ /* - * Copyright 2011-2015 Howard Chu, Symas Corp. + * Copyright 2011-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,15 +25,6 @@ #else #define Z "z" #endif -#ifdef MDB_VL32 -#ifdef _WIN32 -#define Y "I64" -#else -#define Y "ll" -#endif -#else -#define Y Z -#endif #define PRINT 1 static int mode; @@ -124,7 +115,7 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) if (name) printf("database=%s\n", name); printf("type=btree\n"); - printf("mapsize=%" Y "u\n", info.me_mapsize); + printf("mapsize=%" Z "u\n", info.me_mapsize); if (info.me_mapaddr) printf("mapaddr=%p\n", info.me_mapaddr); printf("maxreaders=%u\n", info.me_maxreaders); @@ -164,7 +155,7 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) static void usage(char *prog) { - fprintf(stderr, "usage: %s [-V] [-f output] [-l] [-n] [-p] [-v] [-a|-s subdb] dbpath\n", prog); + fprintf(stderr, "usage: %s [-V] [-f output] [-l] [-n] [-p] [-a|-s subdb] dbpath\n", prog); exit(EXIT_FAILURE); } @@ -188,7 +179,6 @@ int main(int argc, char *argv[]) * -n: use NOSUBDIR flag on env_open * -p: use printable characters * -f: write to file instead of stdout - * -v: use previous snapshot * -V: print version and exit * (default) dump only the main DB */ @@ -216,9 +206,6 @@ int main(int argc, char *argv[]) case 'n': envflags |= MDB_NOSUBDIR; break; - case 'v': - envflags |= MDB_PREVSNAPSHOT; - break; case 'p': mode |= PRINT; break; diff --git a/contrib/db/liblmdb/mdb_load.1 b/contrib/db/liblmdb/mdb_load.1 index ede3702d..998acc12 100644 --- a/contrib/db/liblmdb/mdb_load.1 +++ b/contrib/db/liblmdb/mdb_load.1 @@ -1,5 +1,5 @@ -.TH MDB_LOAD 1 "2014/06/20" "LMDB 0.9.14" -.\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. +.TH MDB_LOAD 1 "2015/09/30" "LMDB 0.9.17" +.\" Copyright 2014-2018 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. .SH NAME mdb_load \- LMDB environment import tool @@ -37,13 +37,6 @@ option below. .BR \-V Write the library version number to the standard output, and exit. .TP -.BR \-a -Append all records in the order they appear in the input. The input is assumed to already be -in correctly sorted order and no sorting or checking for redundant values will be performed. -This option must be used to reload data that was produced by running -.B mdb_dump -on a database that uses custom compare functions. -.TP .BR \-f \ file Read from the specified file instead of from the standard input. .TP diff --git a/contrib/db/liblmdb/mdb_load.c b/contrib/db/liblmdb/mdb_load.c index 797c2f97..0f177f1e 100644 --- a/contrib/db/liblmdb/mdb_load.c +++ b/contrib/db/liblmdb/mdb_load.c @@ -1,6 +1,6 @@ /* mdb_load.c - memory-mapped database load tool */ /* - * Copyright 2011-2015 Howard Chu, Symas Corp. + * Copyright 2011-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,22 +37,12 @@ static int Eof; static MDB_envinfo info; static MDB_val kbuf, dbuf; -static MDB_val k0buf; #ifdef _WIN32 #define Z "I" #else #define Z "z" #endif -#ifdef MDB_VL32 -#ifdef _WIN32 -#define Y "I64" -#else -#define Y "ll" -#endif -#else -#define Y Z -#endif #define STRLENOF(s) (sizeof(s)-1) @@ -78,6 +68,7 @@ static void readhdr(void) { char *ptr; + flags = 0; while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) { lineno++; if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) { @@ -122,7 +113,7 @@ static void readhdr(void) int i; ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); if (ptr) *ptr = '\0'; - i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Y "u", &info.me_mapsize); + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); if (i != 1) { fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); @@ -258,7 +249,8 @@ badend: c2 += 2; } } else { - c1++; c2++; + /* copies are redundant when no escapes were used */ + *c1++ = *c2++; } } } else { @@ -286,15 +278,10 @@ badend: static void usage(void) { - fprintf(stderr, "usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog); + fprintf(stderr, "usage: %s [-V] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog); exit(EXIT_FAILURE); } -static int greater(const MDB_val *a, const MDB_val *b) -{ - return 1; -} - int main(int argc, char *argv[]) { int i, rc; @@ -304,8 +291,7 @@ int main(int argc, char *argv[]) MDB_dbi dbi; char *envname; int envflags = 0, putflags = 0; - int dohdr = 0, append = 0; - MDB_val prevk; + int dohdr = 0; prog = argv[0]; @@ -313,23 +299,19 @@ int main(int argc, char *argv[]) usage(); } - /* -a: append records in input order - * -f: load file instead of stdin + /* -f: load file instead of stdin * -n: use NOSUBDIR flag on env_open * -s: load into named subDB * -N: use NOOVERWRITE on puts * -T: read plaintext * -V: print version and exit */ - while ((i = getopt(argc, argv, "af:ns:NTV")) != EOF) { + while ((i = getopt(argc, argv, "f:ns:NTV")) != EOF) { switch(i) { case 'V': printf("%s\n", MDB_VERSION_STRING); exit(0); break; - case 'a': - append = 1; - break; case 'f': if (freopen(optarg, "r", stdin) == NULL) { fprintf(stderr, "%s: %s: reopen: %s\n", @@ -388,17 +370,11 @@ int main(int argc, char *argv[]) } kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; - kbuf.mv_data = malloc(kbuf.mv_size * 2); - k0buf.mv_size = kbuf.mv_size; - k0buf.mv_data = (char *)kbuf.mv_data + kbuf.mv_size; - prevk.mv_size = 0; - prevk.mv_data = k0buf.mv_data; + kbuf.mv_data = malloc(kbuf.mv_size); while(!Eof) { MDB_val key, data; int batch = 0; - flags = 0; - int appflag; if (!dohdr) { dohdr = 1; @@ -416,11 +392,6 @@ int main(int argc, char *argv[]) fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } - if (append) { - mdb_set_compare(txn, dbi, greater); - if (flags & MDB_DUPSORT) - mdb_set_dupsort(txn, dbi, greater); - } rc = mdb_cursor_open(txn, dbi, &mc); if (rc) { @@ -439,20 +410,7 @@ int main(int argc, char *argv[]) goto txn_abort; } - if (append) { - appflag = MDB_APPEND; - if (flags & MDB_DUPSORT) { - if (prevk.mv_size == key.mv_size && !memcmp(prevk.mv_data, key.mv_data, key.mv_size)) - appflag = MDB_APPENDDUP; - else { - memcpy(prevk.mv_data, key.mv_data, key.mv_size); - prevk.mv_size = key.mv_size; - } - } - } else { - appflag = 0; - } - rc = mdb_cursor_put(mc, &key, &data, putflags|appflag); + rc = mdb_cursor_put(mc, &key, &data, putflags); if (rc == MDB_KEYEXIST && putflags) continue; if (rc) { diff --git a/contrib/db/liblmdb/mdb_stat.1 b/contrib/db/liblmdb/mdb_stat.1 index bf49bd3b..7c3f2846 100644 --- a/contrib/db/liblmdb/mdb_stat.1 +++ b/contrib/db/liblmdb/mdb_stat.1 @@ -1,5 +1,5 @@ -.TH MDB_STAT 1 "2014/06/20" "LMDB 0.9.14" -.\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. +.TH MDB_STAT 1 "2015/09/30" "LMDB 0.9.17" +.\" Copyright 2012-2018 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. .SH NAME mdb_stat \- LMDB environment status tool @@ -14,8 +14,6 @@ mdb_stat \- LMDB environment status tool [\c .BR \-n ] [\c -.BR \-v ] -[\c .BR \-r [ r ]] [\c .BR \-a \ | @@ -41,10 +39,6 @@ If \fB\-fff\fP is given, display the full list of page IDs in the freelist. .BR \-n Display the status of an LMDB database which does not use subdirectories. .TP -.BR \-v -Use the previous environment state instead of the latest state. -This may be useful if the latest state has been corrupted. -.TP .BR \-r Display information about the environment reader table. Shows the process ID, thread ID, and transaction ID for each active diff --git a/contrib/db/liblmdb/mdb_stat.c b/contrib/db/liblmdb/mdb_stat.c index 30ec81fe..f4c0dc1e 100644 --- a/contrib/db/liblmdb/mdb_stat.c +++ b/contrib/db/liblmdb/mdb_stat.c @@ -1,6 +1,6 @@ /* mdb_stat.c - memory-mapped database status tool */ /* - * Copyright 2011-2015 Howard Chu, Symas Corp. + * Copyright 2011-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -22,15 +22,6 @@ #else #define Z "z" #endif -#ifdef MDB_VL32 -#ifdef _WIN32 -#define Y "I64" -#else -#define Y "ll" -#endif -#else -#define Y Z -#endif static void prstat(MDB_stat *ms) { @@ -38,15 +29,15 @@ static void prstat(MDB_stat *ms) printf(" Page size: %u\n", ms->ms_psize); #endif printf(" Tree depth: %u\n", ms->ms_depth); - printf(" Branch pages: %"Y"u\n", ms->ms_branch_pages); - printf(" Leaf pages: %"Y"u\n", ms->ms_leaf_pages); - printf(" Overflow pages: %"Y"u\n", ms->ms_overflow_pages); - printf(" Entries: %"Y"u\n", ms->ms_entries); + printf(" Branch pages: %"Z"u\n", ms->ms_branch_pages); + printf(" Leaf pages: %"Z"u\n", ms->ms_leaf_pages); + printf(" Overflow pages: %"Z"u\n", ms->ms_overflow_pages); + printf(" Entries: %"Z"u\n", ms->ms_entries); } static void usage(char *prog) { - fprintf(stderr, "usage: %s [-V] [-n] [-e] [-r[r]] [-f[f[f]]] [-v] [-a|-s subdb] dbpath\n", prog); + fprintf(stderr, "usage: %s [-V] [-n] [-e] [-r[r]] [-f[f[f]]] [-a|-s subdb] dbpath\n", prog); exit(EXIT_FAILURE); } @@ -73,7 +64,6 @@ int main(int argc, char *argv[]) * -f: print freelist info * -r: print reader info * -n: use NOSUBDIR flag on env_open - * -v: use previous snapshot * -V: print version and exit * (default) print stat of only the main DB */ @@ -97,9 +87,6 @@ int main(int argc, char *argv[]) case 'n': envflags |= MDB_NOSUBDIR; break; - case 'v': - envflags |= MDB_PREVSNAPSHOT; - break; case 'r': rdrinfo++; break; @@ -138,11 +125,11 @@ int main(int argc, char *argv[]) (void)mdb_env_info(env, &mei); printf("Environment Info\n"); printf(" Map address: %p\n", mei.me_mapaddr); - printf(" Map size: %"Y"u\n", mei.me_mapsize); + printf(" Map size: %"Z"u\n", mei.me_mapsize); printf(" Page size: %u\n", mst.ms_psize); - printf(" Max pages: %"Y"u\n", mei.me_mapsize / mst.ms_psize); - printf(" Number of pages used: %"Y"u\n", mei.me_last_pgno+1); - printf(" Last transaction ID: %"Y"u\n", mei.me_last_txnid); + printf(" Max pages: %"Z"u\n", mei.me_mapsize / mst.ms_psize); + printf(" Number of pages used: %"Z"u\n", mei.me_last_pgno+1); + printf(" Last transaction ID: %"Z"u\n", mei.me_last_txnid); printf(" Max readers: %u\n", mei.me_maxreaders); printf(" Number of readers used: %u\n", mei.me_numreaders); } diff --git a/contrib/db/liblmdb/midl.c b/contrib/db/liblmdb/midl.c index 152a1ec0..75420c41 100644 --- a/contrib/db/liblmdb/midl.c +++ b/contrib/db/liblmdb/midl.c @@ -3,7 +3,8 @@ /* $OpenLDAP$ */ /* This work is part of OpenLDAP Software . * - * Copyright 2000-2015 The OpenLDAP Foundation. + * Copyright 2000-2019 The OpenLDAP Foundation. + * Portions Copyright 2001-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -354,67 +355,5 @@ int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ) return 0; } -#ifdef MDB_VL32 -unsigned mdb_mid3l_search( MDB_ID3L ids, MDB_ID id ) -{ - /* - * binary search of id in ids - * if found, returns position of id - * if not found, returns first position greater than id - */ - unsigned base = 0; - unsigned cursor = 1; - int val = 0; - unsigned n = (unsigned)ids[0].mid; - - while( 0 < n ) { - unsigned pivot = n >> 1; - cursor = base + pivot + 1; - val = CMP( id, ids[cursor].mid ); - - if( val < 0 ) { - n = pivot; - - } else if ( val > 0 ) { - base = cursor; - n -= pivot + 1; - - } else { - return cursor; - } - } - - if( val > 0 ) { - ++cursor; - } - return cursor; -} - -int mdb_mid3l_insert( MDB_ID3L ids, MDB_ID3 *id ) -{ - unsigned x, i; - - x = mdb_mid3l_search( ids, id->mid ); - - if( x < 1 ) { - /* internal error */ - return -2; - } - - if ( x <= ids[0].mid && ids[x].mid == id->mid ) { - /* duplicate */ - return -1; - } - - /* insert id */ - ids[0].mid++; - for (i=(unsigned)ids[0].mid; i>x; i--) - ids[i] = ids[i-1]; - ids[x] = *id; - - return 0; -} -#endif /* MDB_VL32 */ - /** @} */ /** @} */ diff --git a/contrib/db/liblmdb/midl.h b/contrib/db/liblmdb/midl.h index 1555ecb1..462c3497 100644 --- a/contrib/db/liblmdb/midl.h +++ b/contrib/db/liblmdb/midl.h @@ -11,7 +11,8 @@ /* $OpenLDAP$ */ /* This work is part of OpenLDAP Software . * - * Copyright 2000-2015 The OpenLDAP Foundation. + * Copyright 2000-2019 The OpenLDAP Foundation. + * Portions Copyright 2001-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +28,6 @@ #define _MDB_MIDL_H_ #include -#include #ifdef __cplusplus extern "C" { @@ -43,11 +43,7 @@ extern "C" { /** A generic unsigned ID number. These were entryIDs in back-bdb. * Preferably it should have the same size as a pointer. */ -#ifdef MDB_VL32 -typedef uint64_t MDB_ID; -#else typedef size_t MDB_ID; -#endif /** An IDL is an ID List, a sorted array of IDs. The first * element of the array is a counter for how many actual @@ -182,20 +178,6 @@ int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id ); */ int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ); -#ifdef MDB_VL32 -typedef struct MDB_ID3 { - MDB_ID mid; /**< The ID */ - void *mptr; /**< The pointer */ - unsigned int mcnt; /**< Number of pages */ - unsigned int mref; /**< Refcounter */ -} MDB_ID3; - -typedef MDB_ID3 *MDB_ID3L; - -unsigned mdb_mid3l_search( MDB_ID3L ids, MDB_ID id ); -int mdb_mid3l_insert( MDB_ID3L ids, MDB_ID3 *id ); - -#endif /* MDB_VL32 */ /** @} */ /** @} */ #ifdef __cplusplus diff --git a/contrib/db/liblmdb/mtest.c b/contrib/db/liblmdb/mtest.c index 9d15088b..6fc5840c 100644 --- a/contrib/db/liblmdb/mtest.c +++ b/contrib/db/liblmdb/mtest.c @@ -1,6 +1,6 @@ /* mtest.c - memory-mapped database tester/toy */ /* - * Copyright 2011-2015 Howard Chu, Symas Corp. + * Copyright 2011-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/contrib/db/liblmdb/mtest2.c b/contrib/db/liblmdb/mtest2.c index eacbe59d..64b742aa 100644 --- a/contrib/db/liblmdb/mtest2.c +++ b/contrib/db/liblmdb/mtest2.c @@ -1,6 +1,6 @@ /* mtest2.c - memory-mapped database tester/toy */ /* - * Copyright 2011-2015 Howard Chu, Symas Corp. + * Copyright 2011-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/contrib/db/liblmdb/mtest3.c b/contrib/db/liblmdb/mtest3.c index 9db79e62..81e4bbf9 100644 --- a/contrib/db/liblmdb/mtest3.c +++ b/contrib/db/liblmdb/mtest3.c @@ -1,6 +1,6 @@ /* mtest3.c - memory-mapped database tester/toy */ /* - * Copyright 2011-2015 Howard Chu, Symas Corp. + * Copyright 2011-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/contrib/db/liblmdb/mtest4.c b/contrib/db/liblmdb/mtest4.c index 6df890e2..c355cf10 100644 --- a/contrib/db/liblmdb/mtest4.c +++ b/contrib/db/liblmdb/mtest4.c @@ -1,6 +1,6 @@ /* mtest4.c - memory-mapped database tester/toy */ /* - * Copyright 2011-2015 Howard Chu, Symas Corp. + * Copyright 2011-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/contrib/db/liblmdb/mtest5.c b/contrib/db/liblmdb/mtest5.c index 14e3c0da..95793ec1 100644 --- a/contrib/db/liblmdb/mtest5.c +++ b/contrib/db/liblmdb/mtest5.c @@ -1,6 +1,6 @@ /* mtest5.c - memory-mapped database tester/toy */ /* - * Copyright 2011-2015 Howard Chu, Symas Corp. + * Copyright 2011-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/contrib/db/liblmdb/mtest6.c b/contrib/db/liblmdb/mtest6.c index ae3c7f26..cb0d4d73 100644 --- a/contrib/db/liblmdb/mtest6.c +++ b/contrib/db/liblmdb/mtest6.c @@ -1,6 +1,6 @@ /* mtest6.c - memory-mapped database tester/toy */ /* - * Copyright 2011-2015 Howard Chu, Symas Corp. + * Copyright 2011-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/contrib/db/liblmdb/sample-bdb.txt b/contrib/db/liblmdb/sample-bdb.txt index 563807a2..97220f0e 100644 --- a/contrib/db/liblmdb/sample-bdb.txt +++ b/contrib/db/liblmdb/sample-bdb.txt @@ -3,7 +3,7 @@ * Do a line-by-line comparison of this and sample-mdb.txt */ /* - * Copyright 2012-2015 Howard Chu, Symas Corp. + * Copyright 2012-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/contrib/db/liblmdb/sample-mdb.txt b/contrib/db/liblmdb/sample-mdb.txt index 10a25687..1d20ed3d 100644 --- a/contrib/db/liblmdb/sample-mdb.txt +++ b/contrib/db/liblmdb/sample-mdb.txt @@ -3,7 +3,7 @@ * Do a line-by-line comparison of this and sample-bdb.txt */ /* - * Copyright 2012-2015 Howard Chu, Symas Corp. + * Copyright 2012-2018 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without