2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid[] = "@(#)hash.c 10.36 (Sleepycat) 1/8/98";
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
73 static int __ham_c_close __P((DBC *));
74 static int __ham_c_del __P((DBC *, int));
75 static int __ham_c_get __P((DBC *, DBT *, DBT *, int));
76 static int __ham_c_put __P((DBC *, DBT *, DBT *, int));
77 static int __ham_c_init __P((DB *, DB_TXN *, DBC **));
78 static int __ham_cursor __P((DB *, DB_TXN *, DBC **));
79 static int __ham_delete __P((DB *, DB_TXN *, DBT *, int));
80 static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
81 static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
82 static void __ham_init_htab __P((HTAB *, u_int));
83 static int __ham_lookup __P((HTAB *,
84 HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
85 static int __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
86 static int __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
87 static int __ham_sync __P((DB *, int));
89 /************************** INTERFACE ROUTINES ***************************/
95 * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
98 __ham_open(dbp, dbinfo)
105 int file_existed, ret;
109 if ((hashp = (HTAB *)__db_calloc(1, sizeof(HTAB))) == NULL)
113 /* Set the hash function if specified by the user. */
114 if (dbinfo != NULL && dbinfo->h_hash != NULL)
115 hashp->hash = dbinfo->h_hash;
118 * Initialize the remaining fields of the dbp. The type, close and
119 * fd functions are all set in db_open.
121 dbp->internal = hashp;
122 dbp->cursor = __ham_cursor;
123 dbp->del = __ham_delete;
124 dbp->get = __ham_get;
125 dbp->put = __ham_put;
126 dbp->sync = __ham_sync;
128 /* If locking is turned on, lock the meta data page. */
129 if (F_ISSET(dbp, DB_AM_LOCKING)) {
130 dbp->lock.pgno = BUCKET_INVALID;
131 if ((ret = lock_get(dbenv->lk_info, dbp->locker,
132 0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
140 * Now, we can try to read the meta-data page and figure out
141 * if we set up locking and get the meta-data page properly.
142 * If this is a new file, initialize it, and put it back dirty.
144 if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
147 /* Initialize the hashp structure */
148 if (hashp->hdr->magic == DB_HASHMAGIC) {
150 /* File exists, verify the data in the header. */
151 if (hashp->hash == NULL)
153 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
154 if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
155 hashp->hdr->h_charkey) {
156 __db_err(hashp->dbp->dbenv,
157 "hash: incompatible hash function");
161 if (F_ISSET(hashp->hdr, DB_HASH_DUP))
162 F_SET(dbp, DB_AM_DUP);
165 * File does not exist, we must initialize the header. If
166 * locking is enabled that means getting a write lock first.
169 if (F_ISSET(dbp, DB_AM_LOCKING) &&
170 ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
171 (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
172 &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
178 hashp->hdr->ffactor =
179 dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
180 __ham_init_htab(hashp, dbinfo != NULL ? dbinfo->h_nelem : 0);
181 if (F_ISSET(dbp, DB_AM_DUP))
182 F_SET(hashp->hdr, DB_HASH_DUP);
183 if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
187 /* Initialize the default cursor. */
188 __ham_c_init(dbp, NULL, &curs);
189 TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
191 /* Allocate memory for our split buffer. */
192 if ((hashp->split_buf = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) {
197 #ifdef NO_STATISTICS_FOR_DB_ERR
199 "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
200 "TABLE POINTER ", (long)hashp,
201 "BUCKET SIZE ", (long)hashp->hdr->pagesize,
202 "FILL FACTOR ", (long)hashp->hdr->ffactor,
203 "MAX BUCKET ", (long)hashp->hdr->max_bucket,
204 "OVFL POINT ", (long)hashp->hdr->ovfl_point,
205 "LAST FREED ", (long)hashp->hdr->last_freed,
206 "HIGH MASK ", (long)hashp->hdr->high_mask,
207 "LOW MASK ", (long)hashp->hdr->low_mask,
208 "NELEM ", (long)hashp->hdr->nelem,
209 "FLAGS ", (long)hashp->hdr->flags);
212 /* Release the meta data page */
213 (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
214 if (F_ISSET(dbp, DB_AM_LOCKING) &&
215 (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
223 /* Sync the file so that we know that the meta data goes to disk. */
224 if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
228 out: (void)__ham_close(dbp);
233 * PUBLIC: int __ham_close __P((DB *));
242 DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
243 hashp = (HTAB *)dbp->internal;
246 /* Free the split page. */
247 if (hashp->split_buf)
248 FREE(hashp->split_buf, dbp->pgsize);
250 if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
251 (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
253 if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
254 hashp->hlock)) != 0 && ret == 0)
257 FREE(hashp, sizeof(HTAB));
258 dbp->internal = NULL;
262 /************************** LOCAL CREATION ROUTINES **********************/
264 * Returns 0 on No Error
267 __ham_init_htab(hashp, nelem)
271 int32_t l2, nbuckets;
273 hashp->hdr->nelem = 0;
274 hashp->hdr->pagesize = hashp->dbp->pgsize;
275 ZERO_LSN(hashp->hdr->lsn);
276 hashp->hdr->magic = DB_HASHMAGIC;
277 hashp->hdr->version = DB_HASHVERSION;
278 if (hashp->hash == NULL)
280 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
281 hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
282 if (nelem != 0 && hashp->hdr->ffactor != 0) {
283 nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
284 l2 = __db_log2(nelem > 2 ? nelem : 2);
290 hashp->hdr->spares[l2] = 0;
291 hashp->hdr->spares[l2 + 1] = 0;
292 hashp->hdr->ovfl_point = l2;
293 hashp->hdr->last_freed = PGNO_INVALID;
295 hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
296 hashp->hdr->low_mask = (nbuckets >> 1) - 1;
297 memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
300 /********************** DESTROY/CLOSE ROUTINES ************************/
304 * Write modified pages to disk
311 __ham_sync(dbp, flags)
317 DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
318 if ((ret = __db_syncchk(dbp, flags)) != 0)
320 if (F_ISSET(dbp, DB_AM_RDONLY))
323 if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
329 /*******************************SEARCH ROUTINES *****************************/
331 * All the access routines return
335 * 1 to indicate an external ERROR (i.e. key not found, etc)
336 * -1 to indicate an internal ERROR (i.e. out of memory, etc)
340 __ham_get(dbp, txn, key, data, flags)
353 DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
354 if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
358 if (F_ISSET(dbp, DB_AM_THREAD) &&
359 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
362 hashp = (HTAB *)ldbp->internal;
363 SET_LOCKER(ldbp, txn);
364 GET_META(ldbp, hashp);
365 cp = TAILQ_FIRST(&ldbp->curs_queue);
367 hashp->hash_accesses++;
368 hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
369 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0)
370 if (F_ISSET(hcp, H_OK))
371 ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
372 else /* Key was not found */
375 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
377 RELEASE_META(ldbp, hashp);
378 if (F_ISSET(dbp, DB_AM_THREAD))
379 __db_puthandle(ldbp);
384 __ham_put(dbp, txn, key, data, flags)
398 DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
399 if ((ret = __db_putchk(dbp, key, data,
400 flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
404 if (F_ISSET(dbp, DB_AM_THREAD) &&
405 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
408 hashp = (HTAB *)ldbp->internal;
409 SET_LOCKER(ldbp, txn);
410 GET_META(ldbp, hashp);
411 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
413 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
414 HKEYDATA_PSIZE(key->size)) +
415 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
416 HKEYDATA_PSIZE(data->size));
418 hashp->hash_accesses++;
419 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
421 if (ret == DB_NOTFOUND) {
423 if (hcp->seek_found_page != PGNO_INVALID &&
424 hcp->seek_found_page != hcp->pgno) {
425 if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
427 hcp->pgno = hcp->seek_found_page;
428 hcp->bndx = NDX_INVALID;
431 if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
433 * Doing a partial put, but the key does not exist
434 * and we are not beginning the write at 0. We
435 * must create a data item padded up to doff and
436 * then write the new bytes represented by val.
438 ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
439 &hcp->big_data, &hcp->big_datalen);
441 memset(tmp_val.data, 0, data->doff);
442 memcpy((u_int8_t *)tmp_val.data + data->doff,
443 data->data, data->size);
450 ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
451 } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
452 if (flags == DB_NOOVERWRITE)
454 else if (F_ISSET(ldbp, DB_AM_DUP))
455 ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
457 ret = __ham_overwrite(hashp, hcp, data);
460 /* Free up all the cursor pages. */
461 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
463 /* Now check if we have to grow. */
464 out: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
465 ret = __ham_expand_table(hashp);
466 F_CLR(hcp, H_EXPAND);
469 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
471 RELEASE_META(ldbp, hashp);
472 if (F_ISSET(dbp, DB_AM_THREAD))
473 __db_puthandle(ldbp);
478 __ham_cursor(dbp, txnid, dbcp)
485 DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
486 if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
490 TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
491 DB_THREAD_UNLOCK(dbp);
496 __ham_c_init(dbp, txnid, dbcp)
502 HASH_CURSOR *new_curs;
504 if ((db_curs = (DBC *)__db_calloc(sizeof(DBC), 1)) == NULL)
508 (HASH_CURSOR *)__db_calloc(sizeof(struct cursor_t), 1)) == NULL) {
509 FREE(db_curs, sizeof(DBC));
513 db_curs->internal = new_curs;
514 db_curs->c_close = __ham_c_close;
515 db_curs->c_del = __ham_c_del;
516 db_curs->c_get = __ham_c_get;
517 db_curs->c_put = __ham_c_put;
518 db_curs->txn = txnid;
521 new_curs->db_cursor = db_curs;
522 __ham_item_init(new_curs);
530 __ham_delete(dbp, txn, key, flags)
541 DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
542 if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
546 if (F_ISSET(dbp, DB_AM_THREAD) &&
547 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
549 hashp = (HTAB *)ldbp->internal;
550 SET_LOCKER(ldbp, txn);
551 GET_META(ldbp, hashp);
552 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
554 hashp->hash_accesses++;
555 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
556 if (F_ISSET(hcp, H_OK))
557 ret = __ham_del_pair(hashp, hcp, 1);
561 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
563 RELEASE_META(ldbp, hashp);
564 if (F_ISSET(dbp, DB_AM_THREAD))
565 __db_puthandle(ldbp);
569 /* ****************** CURSORS ********************************** */
571 __ham_c_close(cursor)
577 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
579 * If the pagep, dpagep, and lock fields of the cursor are all NULL,
580 * then there really isn't a need to get a handle here. However,
581 * the normal case is that at least one of those fields is non-NULL,
582 * and putting those checks in here would couple the ham_item_done
583 * functionality with cursor close which would be pretty disgusting.
584 * Instead, we pay the overhead here of always getting the handle.
587 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
588 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
591 ret = __ham_c_iclose(ldbp, cursor);
593 if (F_ISSET(ldbp, DB_AM_THREAD))
594 __db_puthandle(ldbp);
600 * Internal cursor close routine; assumes it is being passed the correct
601 * handle, rather than getting and putting a handle.
603 * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
606 __ham_c_iclose(dbp, dbc)
614 hashp = (HTAB *)dbp->internal;
615 hcp = (HASH_CURSOR *)dbc->internal;
616 ret = __ham_item_done(hashp, hcp, 0);
619 FREE(hcp->big_key, hcp->big_keylen);
621 FREE(hcp->big_data, hcp->big_datalen);
624 * All cursors (except the default ones) are linked off the master.
625 * Therefore, when we close the cursor, we have to remove it from
626 * the master, not the local one.
627 * XXX I am always removing from the master; what about local cursors?
629 DB_THREAD_LOCK(dbc->dbp);
630 TAILQ_REMOVE(&dbc->dbp->curs_queue, dbc, links);
631 DB_THREAD_UNLOCK(dbc->dbp);
633 FREE(hcp, sizeof(HASH_CURSOR));
634 FREE(dbc, sizeof(DBC));
640 __ham_c_del(cursor, flags)
647 HASH_CURSOR save_curs;
648 db_pgno_t ppgno, chg_pgno;
651 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
653 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
654 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
656 hashp = (HTAB *)ldbp->internal;
657 hcp = (HASH_CURSOR *)cursor->internal;
659 if ((ret = __db_cdelchk(ldbp, flags,
660 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
662 if (F_ISSET(hcp, H_DELETED))
663 return (DB_NOTFOUND);
665 SET_LOCKER(hashp->dbp, cursor->txn);
666 GET_META(hashp->dbp, hashp);
667 hashp->hash_accesses++;
668 if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
670 if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
672 * We are about to remove a duplicate from offpage.
675 * 1. We will remove an item on a page, but there are more
676 * items on that page.
677 * 2. We will remove the last item on a page, but there is a
678 * following page of duplicates.
679 * 3. We will remove the last item on a page, this page was the
680 * last page in a duplicate set, but there were dups before
682 * 4. We will remove the last item on a page, removing the last
684 * In case 1 hcp->dpagep is unchanged.
685 * In case 2 hcp->dpagep comes back pointing to the next dup
687 * In case 3 hcp->dpagep comes back NULL.
688 * In case 4 hcp->dpagep comes back NULL.
690 * Case 4 results in deleting the pair off the master page.
691 * The normal code for doing this knows how to delete the
692 * duplicates, so we will handle this case in the normal code.
694 ppgno = PREV_PGNO(hcp->dpagep);
695 if (ppgno == PGNO_INVALID &&
696 NEXT_PGNO(hcp->dpagep) == PGNO_INVALID &&
697 NUM_ENT(hcp->dpagep) == 1)
700 /* Remove item from duplicate page. */
701 chg_pgno = hcp->dpgno;
702 if ((ret = __db_drem(hashp->dbp,
703 &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
706 if (hcp->dpagep == NULL) {
707 if (ppgno != PGNO_INVALID) { /* Case 3 */
709 if ((ret = __ham_get_cpage(hashp, hcp,
712 hcp->dndx = NUM_ENT(hcp->dpagep);
713 F_SET(hcp, H_DELETED);
714 } else { /* Case 4 */
715 ret = __ham_del_pair(hashp, hcp, 1);
716 hcp->dpgno = PGNO_INVALID;
718 * Delpair updated the cursor queue, so we
719 * don't have to do that here.
721 chg_pgno = PGNO_INVALID;
723 } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
724 hcp->dndx = 0; /* Case 2 */
725 hcp->dpgno = PGNO(hcp->dpagep);
726 if (ppgno == PGNO_INVALID)
727 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
728 H_DATAINDEX(hcp->bndx))),
729 &hcp->dpgno, sizeof(db_pgno_t));
730 F_SET(hcp, H_DELETED);
732 F_SET(hcp, H_DELETED);
733 if (chg_pgno != PGNO_INVALID)
734 __ham_c_update(hcp, chg_pgno, 0, 0, 1);
735 } else if (F_ISSET(hcp, H_ISDUP)) { /* on page */
736 if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
737 LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
738 ret = __ham_del_pair(hashp, hcp, 1);
743 F_SET(&repldbt, DB_DBT_PARTIAL);
744 repldbt.doff = hcp->dup_off;
745 repldbt.dlen = DUP_SIZE(hcp->dup_len);
747 ret = __ham_replpair(hashp, hcp, &repldbt, 0);
748 hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
749 F_SET(hcp, H_DELETED);
750 __ham_c_update(hcp, hcp->pgno,
751 DUP_SIZE(hcp->dup_len), 0, 1);
755 /* Not a duplicate */
756 normal: ret = __ham_del_pair(hashp, hcp, 1);
758 out: if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
762 RELEASE_META(hashp->dbp, hashp);
763 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
764 __db_puthandle(ldbp);
769 __ham_c_get(cursor, key, data, flags)
777 HASH_CURSOR *hcp, save_curs;
778 int get_key, ret, t_ret;
780 DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
781 flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
784 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
785 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
787 hashp = (HTAB *)(ldbp->internal);
788 hcp = (HASH_CURSOR *)cursor->internal;
791 __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
794 SET_LOCKER(hashp->dbp, cursor->txn);
795 GET_META(hashp->dbp, hashp);
796 hashp->hash_accesses++;
804 if (hcp->bucket != BUCKET_INVALID) {
805 ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
810 ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
813 ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
816 if (hcp->bucket == BUCKET_INVALID)
818 ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
822 ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
826 if (F_ISSET(hcp, H_DELETED)) {
831 ret = __ham_item(hashp, hcp, DB_LOCK_READ);
836 * Must always enter this loop to do error handling and
837 * check for big key/data pair.
840 if (ret != 0 && ret != DB_NOTFOUND)
842 else if (F_ISSET(hcp, H_OK)) {
844 if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
845 H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
846 &hcp->big_keylen)) != 0)
849 ret = __ham_dup_return(hashp, hcp, data, flags);
851 } else if (!F_ISSET(hcp, H_NOMORE)) {
857 * Ran out of entries in a bucket; change buckets.
862 ret = __ham_item_done(hashp, hcp, 0);
863 if (hcp->bucket == 0) {
868 hcp->bndx = NDX_INVALID;
870 ret = __ham_item_prev(hashp,
875 ret = __ham_item_done(hashp, hcp, 0);
876 hcp->bndx = NDX_INVALID;
878 hcp->pgno = PGNO_INVALID;
880 if (hcp->bucket > hashp->hdr->max_bucket) {
885 ret = __ham_item_next(hashp,
895 out1: if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
899 RELEASE_META(hashp->dbp, hashp);
900 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
901 __db_puthandle(ldbp);
906 __ham_c_put(cursor, key, data, flags)
914 HASH_CURSOR *hcp, save_curs;
918 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
919 flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
922 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
923 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
925 hashp = (HTAB *)(ldbp->internal);
926 hcp = (HASH_CURSOR *)cursor->internal;
929 if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
930 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
932 if (F_ISSET(hcp, H_DELETED))
933 return (DB_NOTFOUND);
935 SET_LOCKER(hashp->dbp, cursor->txn);
936 GET_META(hashp->dbp, hashp);
942 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
943 HKEYDATA_PSIZE(key->size)) +
944 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
945 HKEYDATA_PSIZE(data->size));
946 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
951 ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
956 if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
957 ret = __ham_overwrite(hashp, hcp, data);
959 ret = __ham_add_dup(hashp, hcp, data, flags);
962 if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
963 ret = __ham_expand_table(hashp);
964 F_CLR(hcp, H_EXPAND);
967 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
971 RELEASE_META(hashp->dbp, hashp);
972 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
973 __db_puthandle(ldbp);
977 /********************************* UTILITIES ************************/
980 * __ham_expand_table --
982 * PUBLIC: int __ham_expand_table __P((HTAB *));
985 __ham_expand_table(hashp)
989 u_int32_t old_bucket, new_bucket, spare_ndx;
993 DIRTY_META(hashp, ret);
998 * If the split point is about to increase, make sure that we
999 * have enough extra pages. The calculation here is weird.
1000 * We'd like to do this after we've upped max_bucket, but it's
1001 * too late then because we've logged the meta-data split. What
1002 * we'll do between then and now is increment max bucket and then
1003 * see what the log of one greater than that is; here we have to
1004 * look at the log of max + 2. VERY NASTY STUFF.
1006 if (__db_log2(hashp->hdr->max_bucket + 2) > hashp->hdr->ovfl_point) {
1008 * We are about to shift the split point. Make sure that
1009 * if the next doubling is going to be big (more than 8
1010 * pages), we have some extra pages around.
1012 if (hashp->hdr->max_bucket + 1 >= 8 &&
1013 hashp->hdr->spares[hashp->hdr->ovfl_point] <
1014 hashp->hdr->spares[hashp->hdr->ovfl_point - 1] +
1015 hashp->hdr->ovfl_point + 1)
1016 __ham_init_ovflpages(hashp);
1019 /* Now we can log the meta-data split. */
1020 if (DB_LOGGING(hashp->dbp)) {
1021 if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
1022 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1023 hashp->dbp->log_fileid,
1024 hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
1025 hashp->hdr->spares[hashp->hdr->ovfl_point],
1026 &hashp->hdr->lsn)) != 0)
1029 hashp->hdr->lsn = new_lsn;
1032 hashp->hash_expansions++;
1033 new_bucket = ++hashp->hdr->max_bucket;
1034 old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
1037 * If the split point is increasing, copy the current contents
1038 * of the spare split bucket to the next bucket.
1040 spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
1041 if (spare_ndx > hashp->hdr->ovfl_point) {
1042 hashp->hdr->spares[spare_ndx] =
1043 hashp->hdr->spares[hashp->hdr->ovfl_point];
1044 hashp->hdr->ovfl_point = spare_ndx;
1047 if (new_bucket > hashp->hdr->high_mask) {
1048 /* Starting a new doubling */
1049 hashp->hdr->low_mask = hashp->hdr->high_mask;
1050 hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
1053 if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
1054 __db_err(hashp->dbp->dbenv,
1055 "hash: Cannot allocate new bucket. Pages exhausted.");
1059 /* Relocate records to the new bucket */
1060 return (__ham_split_page(hashp, old_bucket, new_bucket));
1064 * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1067 __ham_call_hash(hashp, k, len)
1072 u_int32_t n, bucket;
1074 n = (u_int32_t)hashp->hash(k, len);
1075 bucket = n & hashp->hdr->high_mask;
1076 if (bucket > hashp->hdr->max_bucket)
1077 bucket = bucket & hashp->hdr->low_mask;
1082 * Check for duplicates, and call __db_ret appropriately. Release
1083 * everything held by the cursor.
1086 __ham_dup_return(hashp, hcp, val, flags)
1093 DBT *myval, tmp_val;
1100 /* Check for duplicate and return the first one. */
1101 ndx = H_DATAINDEX(hcp->bndx);
1102 type = HPAGE_TYPE(hcp->pagep, ndx);
1107 * There are 3 cases:
1108 * 1. We are not in duplicate, simply call db_ret.
1109 * 2. We are looking at keys and stumbled onto a duplicate.
1110 * 3. We are in the middle of a duplicate set. (ISDUP set)
1114 * Here we check for the case where we just stumbled onto a
1115 * duplicate. In this case, we do initialization and then
1116 * let the normal duplicate code handle it.
1118 if (!F_ISSET(hcp, H_ISDUP))
1119 if (type == H_DUPLICATE) {
1120 F_SET(hcp, H_ISDUP);
1121 hcp->dup_tlen = LEN_HDATA(hcp->pagep,
1122 hashp->hdr->pagesize, hcp->bndx);
1123 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1124 if (flags == DB_LAST || flags == DB_PREV) {
1129 HKEYDATA_DATA(hk) + hcp->dup_off,
1131 hcp->dup_off += DUP_SIZE(len);
1133 } while (hcp->dup_off < hcp->dup_tlen);
1134 hcp->dup_off -= DUP_SIZE(len);
1138 HKEYDATA_DATA(hk), sizeof(db_indx_t));
1143 } else if (type == H_OFFDUP) {
1144 F_SET(hcp, H_ISDUP);
1145 memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
1147 if (flags == DB_LAST || flags == DB_PREV) {
1148 indx = (int)hcp->dndx;
1149 if ((ret = __db_dend(hashp->dbp,
1150 pgno, &hcp->dpagep)) != 0)
1152 hcp->dpgno = PGNO(hcp->dpagep);
1153 hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
1154 } else if ((ret = __ham_next_cpage(hashp,
1155 hcp, pgno, 0, H_ISDUP)) != 0)
1161 * Now, everything is initialized, grab a duplicate if
1164 if (F_ISSET(hcp, H_ISDUP))
1165 if (hcp->dpgno != PGNO_INVALID) {
1170 * Copy the DBT in case we are retrieving into
1171 * user memory and we need the parameters for
1174 memcpy(&tmp_val, val, sizeof(*val));
1175 F_SET(&tmp_val, DB_DBT_PARTIAL);
1176 tmp_val.dlen = hcp->dup_len;
1177 tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1183 * Finally, if we had a duplicate, pp, ndx, and myval should be
1184 * set appropriately.
1186 if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
1187 &hcp->big_datalen)) != 0)
1191 * In case we sent a temporary off to db_ret, set the real
1194 val->data = myval->data;
1195 val->size = myval->size;
1201 __ham_overwrite(hashp, hcp, nval)
1206 DBT *myval, tmp_val;
1209 if (F_ISSET(hashp->dbp, DB_AM_DUP))
1210 return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
1211 else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1213 memcpy(&tmp_val, nval, sizeof(*nval));
1214 F_SET(&tmp_val, DB_DBT_PARTIAL);
1216 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1217 if (HPAGE_PTYPE(hk) == H_OFFPAGE)
1218 memcpy(&tmp_val.dlen,
1219 HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1221 tmp_val.dlen = LEN_HDATA(hcp->pagep,
1222 hashp->hdr->pagesize,hcp->bndx);
1224 } else /* Regular partial put */
1227 return (__ham_replpair(hashp, hcp, myval, 0));
1231 * Given a key and a cursor, sets the cursor to the page/ndx on which
1232 * the key resides. If the key is found, the cursor H_OK flag is set
1233 * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1234 * If the key is not found, the H_OK flag is not set. If the sought
1235 * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1236 * are set indicating where an add might take place. If it is 0,
1237 * non of the cursor pointer field are valid.
1240 __ham_lookup(hashp, hcp, key, sought, mode)
1249 int match, ret, t_ret;
1253 * Set up cursor so that we're looking for space to add an item
1254 * as we cycle through the pages looking for the key.
1256 if ((ret = __ham_item_reset(hashp, hcp)) != 0)
1258 hcp->seek_size = sought;
1260 hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
1262 if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
1265 if (F_ISSET(hcp, H_NOMORE))
1268 hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1269 switch (HPAGE_PTYPE(hk)) {
1271 memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1272 if (tlen == key->size) {
1274 HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
1275 match = __db_moff(hashp->dbp, key, pgno);
1283 if (key->size == LEN_HKEY(hcp->pagep,
1284 hashp->hdr->pagesize, hcp->bndx) &&
1286 HKEYDATA_DATA(hk), key->size) == 0) {
1294 * These are errors because keys are never
1295 * duplicated, only data items are.
1297 return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
1299 hashp->hash_collisions++;
1303 * Item was not found, adjust cursor properly.
1309 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
1315 * Initialize a dbt using some possibly already allocated storage
1317 * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1320 __ham_init_dbt(dbt, size, bufp, sizep)
1326 memset(dbt, 0, sizeof(*dbt));
1327 if (*sizep < size) {
1328 if ((*bufp = (void *)(*bufp == NULL ?
1329 __db_malloc(size) : __db_realloc(*bufp, size))) == NULL) {
1341 * Adjust the cursor after an insert or delete. The cursor passed is
1342 * the one that was operated upon; we just need to check any of the
1345 * len indicates the length of the item added/deleted
1346 * add indicates if the item indicated by the cursor has just been
1347 * added (add == 1) or deleted (add == 0).
1348 * dup indicates if the addition occurred into a duplicate set.
1350 * PUBLIC: void __ham_c_update
1351 * PUBLIC: __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1354 __ham_c_update(hcp, chg_pgno, len, add, is_dup)
1366 * Regular adds are always at the end of a given page, so we never
1367 * have to adjust anyone's cursor after a regular add.
1373 * Determine if a page was deleted. If this is a regular update
1374 * (i.e., not is_dup) then the deleted page's number will be that in
1375 * chg_pgno, and the pgno in the cursor will be different. If this
1376 * was an onpage-duplicate, then the same conditions apply. If this
1377 * was an off-page duplicate, then we need to verify if hcp->dpgno
1378 * is the same (no delete) or different (delete) than chg_pgno.
1380 if (!is_dup || hcp->dpgno == PGNO_INVALID)
1382 chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno;
1385 chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno;
1387 hp = hcp->db_cursor->dbp->master->internal;
1388 DB_THREAD_LOCK(hp->dbp);
1390 for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
1391 cp = TAILQ_NEXT(cp, links)) {
1392 if (cp->internal == hcp)
1395 lcp = (HASH_CURSOR *)cp->internal;
1397 if (!is_dup && lcp->pgno != chg_pgno)
1401 if (F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1403 if (!F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1409 lcp->dpgno = hcp->dpgno;
1410 lcp->dndx = hcp->dndx;
1412 lcp->pgno = hcp->pgno;
1413 lcp->bndx = hcp->bndx;
1414 lcp->bucket = hcp->bucket;
1416 F_CLR(lcp, H_ISDUP);
1420 if (!is_dup && lcp->bndx > hcp->bndx)
1422 else if (!is_dup && lcp->bndx == hcp->bndx)
1423 F_SET(lcp, H_DELETED);
1424 else if (is_dup && lcp->bndx == hcp->bndx) {
1425 /* Assign dpgno in case there was page conversion. */
1426 lcp->dpgno = hcp->dpgno;
1427 if (add && lcp->dndx >= hcp->dndx )
1429 else if (!add && lcp->dndx > hcp->dndx)
1431 else if (!add && lcp->dndx == hcp->dndx)
1432 F_SET(lcp, H_DELETED);
1434 /* Now adjust on-page information. */
1435 if (lcp->dpgno == PGNO_INVALID)
1437 lcp->dup_tlen += len;
1438 if (lcp->dndx > hcp->dndx)
1439 lcp->dup_off += len;
1441 lcp->dup_tlen -= len;
1442 if (lcp->dndx > hcp->dndx)
1443 lcp->dup_off -= len;
1447 DB_THREAD_UNLOCK(hp->dbp);
1452 * This function gets called when we create a duplicate handle for a
1453 * threaded DB. It should create the private part of the DB structure.
1454 * PUBLIC: int __ham_hdup __P((DB *, DB *));
1457 __ham_hdup(orig, new)
1464 if ((hashp = (HTAB *)__db_malloc(sizeof(HTAB))) == NULL)
1467 new->internal = hashp;
1472 hashp->hash = ((HTAB *)orig->internal)->hash;
1473 if ((hashp->split_buf = (PAGE *)__db_malloc(orig->pgsize)) == NULL)
1475 hashp->local_errno = 0;
1476 hashp->hash_accesses = 0;
1477 hashp->hash_collisions = 0;
1478 hashp->hash_expansions = 0;
1479 hashp->hash_overflows = 0;
1480 hashp->hash_bigpages = 0;
1481 /* Initialize the cursor queue. */
1482 ret = __ham_c_init(new, NULL, &curs);
1483 TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);