2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid[] = "@(#)hash.c 10.27 (Sleepycat) 9/15/97";
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
73 static int __ham_c_close __P((DBC *));
74 static int __ham_c_del __P((DBC *, int));
75 static int __ham_c_get __P((DBC *, DBT *, DBT *, int));
76 static int __ham_c_put __P((DBC *, DBT *, DBT *, int));
77 static int __ham_c_init __P((DB *, DB_TXN *, DBC **));
78 static int __ham_cursor __P((DB *, DB_TXN *, DBC **));
79 static int __ham_delete __P((DB *, DB_TXN *, DBT *, int));
80 static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
81 static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
82 static void __ham_init_htab __P((HTAB *));
83 static int __ham_lookup __P((HTAB *,
84 HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
85 static int __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
86 static int __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
87 static int __ham_sync __P((DB *, int));
89 /************************** INTERFACE ROUTINES ***************************/
95 * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
98 __ham_open(dbp, dbinfo)
105 int file_existed, ret;
109 if ((hashp = (HTAB *)calloc(1, sizeof(HTAB))) == NULL)
113 /* Set the hash function if specified by the user. */
114 if (dbinfo != NULL && dbinfo->h_hash != NULL)
115 hashp->hash = dbinfo->h_hash;
118 * Initialize the remaining fields of the dbp. The type, close and
119 * fd functions are all set in db_open.
121 dbp->internal = hashp;
122 dbp->cursor = __ham_cursor;
123 dbp->del = __ham_delete;
124 dbp->get = __ham_get;
125 dbp->put = __ham_put;
126 dbp->sync = __ham_sync;
128 /* If locking is turned on, lock the meta data page. */
129 if (F_ISSET(dbp, DB_AM_LOCKING)) {
130 dbp->lock.pgno = BUCKET_INVALID;
131 if ((ret = lock_get(dbenv->lk_info, dbp->locker,
132 0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
140 * Now, we can try to read the meta-data page and figure out
141 * if we set up locking and get the meta-data page properly.
142 * If this is a new file, initialize it, and put it back dirty.
144 if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
147 /* Initialize the hashp structure */
148 if (hashp->hdr->magic == DB_HASHMAGIC) {
150 /* File exists, verify the data in the header. */
151 if (hashp->hash == NULL)
153 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
154 if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
155 hashp->hdr->h_charkey) {
156 __db_err(hashp->dbp->dbenv,
157 "hash: incompatible hash function");
161 if (F_ISSET(hashp->hdr, DB_HASH_DUP))
162 F_SET(dbp, DB_AM_DUP);
165 * File does not exist, we must initialize the header. If
166 * locking is enabled that means getting a write lock first.
169 if (F_ISSET(dbp, DB_AM_LOCKING) &&
170 ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
171 (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
172 &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
178 hashp->hdr->nelem = dbinfo != NULL ? dbinfo->h_nelem : 0;
179 hashp->hdr->ffactor =
180 dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
181 __ham_init_htab(hashp);
182 if (F_ISSET(dbp, DB_AM_DUP))
183 F_SET(hashp->hdr, DB_HASH_DUP);
184 if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
188 /* Initialize the default cursor. */
189 __ham_c_init(dbp, NULL, &curs);
190 TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
192 /* Allocate memory for our split buffer. */
193 if ((hashp->split_buf = (PAGE *)malloc(dbp->pgsize)) == NULL) {
198 #ifdef NO_STATISTICS_FOR_DB_ERR
200 "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
201 "TABLE POINTER ", (long)hashp,
202 "BUCKET SIZE ", (long)hashp->hdr->pagesize,
203 "FILL FACTOR ", (long)hashp->hdr->ffactor,
204 "MAX BUCKET ", (long)hashp->hdr->max_bucket,
205 "OVFL POINT ", (long)hashp->hdr->ovfl_point,
206 "LAST FREED ", (long)hashp->hdr->last_freed,
207 "HIGH MASK ", (long)hashp->hdr->high_mask,
208 "LOW MASK ", (long)hashp->hdr->low_mask,
209 "NELEM ", (long)hashp->hdr->nelem,
210 "FLAGS ", (long)hashp->hdr->flags);
213 /* Release the meta data page */
214 (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
215 if (F_ISSET(dbp, DB_AM_LOCKING) &&
216 (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
224 /* Sync the file so that we know that the meta data goes to disk. */
225 if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
229 out: (void)__ham_close(dbp);
234 * PUBLIC: int __ham_close __P((DB *));
243 DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
244 hashp = (HTAB *)dbp->internal;
247 /* Free the split page. */
248 if (hashp->split_buf)
249 FREE(hashp->split_buf, dbp->pgsize);
251 if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
252 (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
254 if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
255 hashp->hlock)) != 0 && ret == 0)
258 FREE(hashp, sizeof(HTAB));
259 dbp->internal = NULL;
263 /************************** LOCAL CREATION ROUTINES **********************/
265 * Returns 0 on No Error
268 __ham_init_htab(hashp)
272 int32_t l2, nbuckets;
274 nelem = hashp->hdr->nelem;
275 hashp->hdr->pagesize = hashp->dbp->pgsize;
276 ZERO_LSN(hashp->hdr->lsn);
277 hashp->hdr->magic = DB_HASHMAGIC;
278 hashp->hdr->version = DB_HASHVERSION;
279 if (hashp->hash == NULL)
281 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
282 hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
283 if (nelem != 0 && hashp->hdr->ffactor != 0) {
284 nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
285 l2 = __db_log2(nelem > 2 ? nelem : 2);
291 hashp->hdr->spares[l2] = 0;
292 hashp->hdr->spares[l2 + 1] = 0;
293 hashp->hdr->ovfl_point = l2;
294 hashp->hdr->last_freed = PGNO_INVALID;
296 hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
297 hashp->hdr->low_mask = (nbuckets >> 1) - 1;
298 memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
301 /********************** DESTROY/CLOSE ROUTINES ************************/
305 * Write modified pages to disk
312 __ham_sync(dbp, flags)
318 DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
319 if ((ret = __db_syncchk(dbp, flags)) != 0)
321 if (F_ISSET(dbp, DB_AM_RDONLY))
324 if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
330 /*******************************SEARCH ROUTINES *****************************/
332 * All the access routines return
336 * 1 to indicate an external ERROR (i.e. key not found, etc)
337 * -1 to indicate an internal ERROR (i.e. out of memory, etc)
341 __ham_get(dbp, txn, key, data, flags)
354 DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
355 if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
359 if (F_ISSET(dbp, DB_AM_THREAD) &&
360 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
363 hashp = (HTAB *)ldbp->internal;
364 SET_LOCKER(ldbp, txn);
365 GET_META(ldbp, hashp);
366 cp = TAILQ_FIRST(&ldbp->curs_queue);
368 hashp->hash_accesses++;
369 hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
370 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0)
371 if (F_ISSET(hcp, H_OK))
372 ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
373 else /* Key was not found */
376 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
378 RELEASE_META(ldbp, hashp);
379 if (F_ISSET(dbp, DB_AM_THREAD))
380 __db_puthandle(ldbp);
385 __ham_put(dbp, txn, key, data, flags)
399 DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
400 if ((ret = __db_putchk(dbp, key, data,
401 flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
405 if (F_ISSET(dbp, DB_AM_THREAD) &&
406 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
409 hashp = (HTAB *)ldbp->internal;
410 SET_LOCKER(ldbp, txn);
411 GET_META(ldbp, hashp);
412 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
414 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
415 HKEYDATA_PSIZE(key->size)) +
416 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
417 HKEYDATA_PSIZE(data->size));
419 hashp->hash_accesses++;
420 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
422 if (ret == DB_NOTFOUND) {
424 if (hcp->seek_found_page != PGNO_INVALID &&
425 hcp->seek_found_page != hcp->pgno) {
426 if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
428 hcp->pgno = hcp->seek_found_page;
429 hcp->bndx = NDX_INVALID;
432 if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
434 * Doing a partial put, but the key does not exist
435 * and we are not beginning the write at 0. We
436 * must create a data item padded up to doff and
437 * then write the new bytes represented by val.
439 ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
440 &hcp->big_data, &hcp->big_datalen);
442 memset(tmp_val.data, 0, data->doff);
443 memcpy((u_int8_t *)tmp_val.data + data->doff,
444 data->data, data->size);
451 ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
452 } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
453 if (flags == DB_NOOVERWRITE)
455 else if (F_ISSET(ldbp, DB_AM_DUP))
456 ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
458 ret = __ham_overwrite(hashp, hcp, data);
461 /* Free up all the cursor pages. */
462 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
464 /* Now check if we have to grow. */
465 out: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
466 ret = __ham_expand_table(hashp);
467 F_CLR(hcp, H_EXPAND);
470 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
472 RELEASE_META(ldbp, hashp);
473 if (F_ISSET(dbp, DB_AM_THREAD))
474 __db_puthandle(ldbp);
479 __ham_cursor(dbp, txnid, dbcp)
486 DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
487 if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
491 TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
492 DB_THREAD_UNLOCK(dbp);
497 __ham_c_init(dbp, txnid, dbcp)
503 HASH_CURSOR *new_curs;
505 if ((db_curs = (DBC *)calloc(sizeof(DBC), 1)) == NULL)
509 (HASH_CURSOR *)calloc(sizeof(struct cursor_t), 1)) == NULL) {
510 FREE(db_curs, sizeof(DBC));
514 db_curs->internal = new_curs;
515 db_curs->c_close = __ham_c_close;
516 db_curs->c_del = __ham_c_del;
517 db_curs->c_get = __ham_c_get;
518 db_curs->c_put = __ham_c_put;
519 db_curs->txn = txnid;
522 new_curs->db_cursor = db_curs;
523 __ham_item_init(new_curs);
531 __ham_delete(dbp, txn, key, flags)
542 DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
543 if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
547 if (F_ISSET(dbp, DB_AM_THREAD) &&
548 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
550 hashp = (HTAB *)ldbp->internal;
551 SET_LOCKER(ldbp, txn);
552 GET_META(ldbp, hashp);
553 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
555 hashp->hash_accesses++;
556 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
557 if (F_ISSET(hcp, H_OK))
558 ret = __ham_del_pair(hashp, hcp);
562 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
564 RELEASE_META(ldbp, hashp);
565 if (F_ISSET(dbp, DB_AM_THREAD))
566 __db_puthandle(ldbp);
570 /* ****************** CURSORS ********************************** */
572 __ham_c_close(cursor)
578 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
580 * If the pagep, dpagep, and lock fields of the cursor are all NULL,
581 * then there really isn't a need to get a handle here. However,
582 * the normal case is that at least one of those fields is non-NULL,
583 * and putting those checks in here would couple the ham_item_done
584 * functionality with cursor close which would be pretty disgusting.
585 * Instead, we pay the overhead here of always getting the handle.
588 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
589 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
592 ret = __ham_c_iclose(ldbp, cursor);
594 if (F_ISSET(ldbp, DB_AM_THREAD))
595 __db_puthandle(ldbp);
601 * Internal cursor close routine; assumes it is being passed the correct
602 * handle, rather than getting and putting a handle.
604 * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
607 __ham_c_iclose(dbp, dbc)
615 hashp = (HTAB *)dbp->internal;
616 hcp = (HASH_CURSOR *)dbc->internal;
617 ret = __ham_item_done(hashp, hcp, 0);
620 FREE(hcp->big_key, hcp->big_keylen);
622 FREE(hcp->big_data, hcp->big_datalen);
625 * All cursors (except the default ones) are linked off the master.
626 * Therefore, when we close the cursor, we have to remove it from
627 * the master, not the local one.
628 * XXX I am always removing from the master; what about local cursors?
630 DB_THREAD_LOCK(dbc->dbp);
631 TAILQ_REMOVE(&dbc->dbp->curs_queue, dbc, links);
632 DB_THREAD_UNLOCK(dbc->dbp);
634 FREE(hcp, sizeof(HASH_CURSOR));
635 FREE(dbc, sizeof(DBC));
641 __ham_c_del(cursor, flags)
648 HASH_CURSOR save_curs;
649 db_pgno_t ppgno, chg_pgno;
652 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
654 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
655 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
657 hashp = (HTAB *)ldbp->internal;
658 hcp = (HASH_CURSOR *)cursor->internal;
660 if ((ret = __db_cdelchk(ldbp, flags,
661 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
663 if (F_ISSET(hcp, H_DELETED))
664 return (DB_NOTFOUND);
666 SET_LOCKER(hashp->dbp, cursor->txn);
667 GET_META(hashp->dbp, hashp);
668 hashp->hash_accesses++;
669 if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
671 if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
672 ppgno = PREV_PGNO(hcp->dpagep);
674 /* Remove item from duplicate page. */
675 chg_pgno = hcp->dpgno;
676 if ((ret = __db_drem(hashp->dbp,
677 &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
682 * 1. We removed an item on a page, but nothing else changed.
683 * 2. We removed the last item on a page, but there is a
684 * following page of duplicates.
685 * 3. We removed the last item on a page, this page was the
686 * last page in a duplicate set, but there were dups before
688 * 4. We removed the last item on a page, removing the last
690 * In case 1 hcp->dpagep is unchanged.
691 * In case 2 hcp->dpagep comes back pointing to the next dup
693 * In case 3 hcp->dpagep comes back NULL.
694 * In case 4 hcp->dpagep comes back NULL.
696 if (hcp->dpagep == NULL) {
697 if (ppgno != PGNO_INVALID) { /* Case 3 */
699 if ((ret = __ham_get_cpage(hashp, hcp,
702 hcp->dndx = NUM_ENT(hcp->dpagep);
703 F_SET(hcp, H_DELETED);
704 } else { /* Case 4 */
705 ret = __ham_del_pair(hashp, hcp);
706 hcp->dpgno = PGNO_INVALID;
708 * Delpair updated the cursor queue, so we
709 * don't have to do that here.
711 chg_pgno = PGNO_INVALID;
713 } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
714 hcp->dndx = 0; /* Case 2 */
715 hcp->dpgno = PGNO(hcp->dpagep);
716 if (ppgno == PGNO_INVALID)
717 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
718 H_DATAINDEX(hcp->bndx))),
719 &hcp->dpgno, sizeof(db_pgno_t));
720 F_SET(hcp, H_DELETED);
722 F_SET(hcp, H_DELETED);
723 if (chg_pgno != PGNO_INVALID)
724 __ham_c_update(hashp, hcp, chg_pgno, 0, 0, 1);
725 } else if (F_ISSET(hcp, H_ISDUP)) { /* on page */
726 if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
727 LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
728 ret = __ham_del_pair(hashp, hcp);
733 F_SET(&repldbt, DB_DBT_PARTIAL);
734 repldbt.doff = hcp->dup_off;
735 repldbt.dlen = DUP_SIZE(hcp->dup_len);
737 ret = __ham_replpair(hashp, hcp, &repldbt, 0);
738 hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
739 __ham_c_update(hashp, hcp, hcp->pgno,
740 DUP_SIZE(hcp->dup_len), 0, 1);
741 F_SET(hcp, H_DELETED);
745 /* Not a duplicate */
746 ret = __ham_del_pair(hashp, hcp);
748 out: if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
752 RELEASE_META(hashp->dbp, hashp);
753 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
754 __db_puthandle(ldbp);
759 __ham_c_get(cursor, key, data, flags)
767 HASH_CURSOR *hcp, save_curs;
768 int get_key, ret, t_ret;
770 DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
771 flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
774 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
775 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
777 hashp = (HTAB *)(ldbp->internal);
778 hcp = (HASH_CURSOR *)cursor->internal;
781 __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
784 SET_LOCKER(hashp->dbp, cursor->txn);
785 GET_META(hashp->dbp, hashp);
786 hashp->hash_accesses++;
794 if (hcp->bucket != BUCKET_INVALID) {
795 ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
800 ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
803 ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
806 if (hcp->bucket == BUCKET_INVALID)
808 ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
812 ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
816 if (F_ISSET(hcp, H_DELETED)) {
821 ret = __ham_item(hashp, hcp, DB_LOCK_READ);
826 * Must always enter this loop to do error handling and
827 * check for big key/data pair.
830 if (ret != 0 && ret != DB_NOTFOUND)
832 else if (F_ISSET(hcp, H_OK)) {
834 if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
835 H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
836 &hcp->big_keylen)) != 0)
839 ret = __ham_dup_return(hashp, hcp, data, flags);
841 } else if (!F_ISSET(hcp, H_NOMORE)) {
847 * Ran out of entries in a bucket; change buckets.
852 ret = __ham_item_done(hashp, hcp, 0);
853 if (hcp->bucket == 0) {
858 hcp->bndx = NDX_INVALID;
860 ret = __ham_item_prev(hashp,
865 ret = __ham_item_done(hashp, hcp, 0);
866 hcp->bndx = NDX_INVALID;
868 hcp->pgno = PGNO_INVALID;
870 if (hcp->bucket > hashp->hdr->max_bucket) {
875 ret = __ham_item_next(hashp,
885 out1: if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
889 RELEASE_META(hashp->dbp, hashp);
890 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
891 __db_puthandle(ldbp);
896 __ham_c_put(cursor, key, data, flags)
904 HASH_CURSOR *hcp, save_curs;
908 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
909 flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
912 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
913 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
915 hashp = (HTAB *)(ldbp->internal);
916 hcp = (HASH_CURSOR *)cursor->internal;
919 if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
920 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
922 if (F_ISSET(hcp, H_DELETED))
923 return (DB_NOTFOUND);
925 SET_LOCKER(hashp->dbp, cursor->txn);
926 GET_META(hashp->dbp, hashp);
932 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
933 HKEYDATA_PSIZE(key->size)) +
934 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
935 HKEYDATA_PSIZE(data->size));
936 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
941 ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
946 if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
947 ret = __ham_overwrite(hashp, hcp, data);
949 ret = __ham_add_dup(hashp, hcp, data, flags);
952 if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
953 ret = __ham_expand_table(hashp);
954 F_CLR(hcp, H_EXPAND);
957 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
961 RELEASE_META(hashp->dbp, hashp);
962 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
963 __db_puthandle(ldbp);
967 /********************************* UTILITIES ************************/
970 * __ham_expand_table --
972 * PUBLIC: int __ham_expand_table __P((HTAB *));
975 __ham_expand_table(hashp)
978 u_int32_t old_bucket, new_bucket;
983 DIRTY_META(hashp, ret);
987 if (DB_LOGGING(hashp->dbp)) {
990 if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
991 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
992 hashp->dbp->log_fileid,
993 hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
994 hashp->hdr->spares[hashp->hdr->ovfl_point],
995 &hashp->hdr->lsn)) != 0)
998 hashp->hdr->lsn = new_lsn;
1001 hashp->hash_expansions++;
1002 new_bucket = ++hashp->hdr->max_bucket;
1003 old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
1006 * If the split point is increasing (hdr.max_bucket's log base 2
1007 * increases), max sure that we have enough extra pages, then
1008 * copy the current contents of the spare split bucket to the
1011 spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
1012 if (spare_ndx > hashp->hdr->ovfl_point) {
1014 * We are about to shift the split point. Make sure that
1015 * if the next doubling is going to be big (more than 8
1016 * pages), we have some extra pages around.
1018 if (hashp->hdr->spares[hashp->hdr->ovfl_point] == 0 &&
1020 __ham_init_ovflpages(hashp);
1022 hashp->hdr->spares[spare_ndx] =
1023 hashp->hdr->spares[hashp->hdr->ovfl_point];
1024 hashp->hdr->ovfl_point = spare_ndx;
1027 if (new_bucket > hashp->hdr->high_mask) {
1028 /* Starting a new doubling */
1029 hashp->hdr->low_mask = hashp->hdr->high_mask;
1030 hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
1033 if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
1034 __db_err(hashp->dbp->dbenv,
1035 "hash: Cannot allocate new bucket. Pages exhausted.");
1039 /* Relocate records to the new bucket */
1040 return (__ham_split_page(hashp, old_bucket, new_bucket));
1044 * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1047 __ham_call_hash(hashp, k, len)
1052 u_int32_t n, bucket;
1054 n = (u_int32_t)hashp->hash(k, len);
1055 bucket = n & hashp->hdr->high_mask;
1056 if (bucket > hashp->hdr->max_bucket)
1057 bucket = bucket & hashp->hdr->low_mask;
1062 * Check for duplicates, and call __db_ret appropriately. Release
1063 * everything held by the cursor.
1066 __ham_dup_return(hashp, hcp, val, flags)
1073 DBT *myval, tmp_val;
1080 /* Check for duplicate and return the first one. */
1081 ndx = H_DATAINDEX(hcp->bndx);
1082 type = HPAGE_TYPE(hcp->pagep, ndx);
1087 * There are 3 cases:
1088 * 1. We are not in duplicate, simply call db_ret.
1089 * 2. We are looking at keys and stumbled onto a duplicate.
1090 * 3. We are in the middle of a duplicate set. (ISDUP set)
1094 * Here we check for the case where we just stumbled onto a
1095 * duplicate. In this case, we do initialization and then
1096 * let the normal duplicate code handle it.
1098 if (!F_ISSET(hcp, H_ISDUP))
1099 if (type == H_DUPLICATE) {
1100 F_SET(hcp, H_ISDUP);
1101 hcp->dup_tlen = LEN_HDATA(hcp->pagep,
1102 hashp->hdr->pagesize, hcp->bndx);
1103 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1104 if (flags == DB_LAST || flags == DB_PREV) {
1109 HKEYDATA_DATA(hk) + hcp->dup_off,
1111 hcp->dup_off += DUP_SIZE(len);
1113 } while (hcp->dup_off < hcp->dup_tlen);
1114 hcp->dup_off -= DUP_SIZE(len);
1118 HKEYDATA_DATA(hk), sizeof(db_indx_t));
1123 } else if (type == H_OFFDUP) {
1124 F_SET(hcp, H_ISDUP);
1125 memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
1127 if (flags == DB_LAST || flags == DB_PREV) {
1128 indx = (int)hcp->dndx;
1129 if ((ret = __db_dend(hashp->dbp,
1130 pgno, &hcp->dpagep)) != 0)
1132 hcp->dpgno = PGNO(hcp->dpagep);
1133 hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
1134 } else if ((ret = __ham_next_cpage(hashp,
1135 hcp, pgno, 0, H_ISDUP)) != 0)
1141 * Now, everything is initialized, grab a duplicate if
1144 if (F_ISSET(hcp, H_ISDUP))
1145 if (hcp->dpgno != PGNO_INVALID) {
1150 * Copy the DBT in case we are retrieving into
1151 * user memory and we need the parameters for
1154 memcpy(&tmp_val, val, sizeof(*val));
1155 F_SET(&tmp_val, DB_DBT_PARTIAL);
1156 tmp_val.dlen = hcp->dup_len;
1157 tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1163 * Finally, if we had a duplicate, pp, ndx, and myval should be
1164 * set appropriately.
1166 if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
1167 &hcp->big_datalen)) != 0)
1171 * In case we sent a temporary off to db_ret, set the real
1174 val->data = myval->data;
1175 val->size = myval->size;
1181 __ham_overwrite(hashp, hcp, nval)
1186 DBT *myval, tmp_val;
1189 if (F_ISSET(hashp->dbp, DB_AM_DUP))
1190 return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
1191 else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1193 memcpy(&tmp_val, nval, sizeof(*nval));
1194 F_SET(&tmp_val, DB_DBT_PARTIAL);
1196 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1197 if (HPAGE_PTYPE(hk) == H_OFFPAGE)
1198 memcpy(&tmp_val.dlen,
1199 HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1201 tmp_val.dlen = LEN_HDATA(hcp->pagep,
1202 hashp->hdr->pagesize,hcp->bndx);
1204 } else /* Regular partial put */
1207 return (__ham_replpair(hashp, hcp, myval, 0));
1211 * Given a key and a cursor, sets the cursor to the page/ndx on which
1212 * the key resides. If the key is found, the cursor H_OK flag is set
1213 * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1214 * If the key is not found, the H_OK flag is not set. If the sought
1215 * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1216 * are set indicating where an add might take place. If it is 0,
1217 * non of the cursor pointer field are valid.
1220 __ham_lookup(hashp, hcp, key, sought, mode)
1229 int match, ret, t_ret;
1233 * Set up cursor so that we're looking for space to add an item
1234 * as we cycle through the pages looking for the key.
1236 if ((ret = __ham_item_reset(hashp, hcp)) != 0)
1238 hcp->seek_size = sought;
1240 hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
1242 if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
1245 if (F_ISSET(hcp, H_NOMORE))
1248 hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1249 switch (HPAGE_PTYPE(hk)) {
1251 memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1252 if (tlen == key->size) {
1254 HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
1255 match = __db_moff(hashp->dbp, key, pgno);
1263 if (key->size == LEN_HKEY(hcp->pagep,
1264 hashp->hdr->pagesize, hcp->bndx) &&
1266 HKEYDATA_DATA(hk), key->size) == 0) {
1274 * These are errors because keys are never
1275 * duplicated, only data items are.
1277 return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
1279 hashp->hash_collisions++;
1283 * Item was not found, adjust cursor properly.
1289 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
1295 * Initialize a dbt using some possibly already allocated storage
1297 * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1300 __ham_init_dbt(dbt, size, bufp, sizep)
1306 memset(dbt, 0, sizeof(*dbt));
1307 if (*sizep < size) {
1308 if ((*bufp = (void *)(*bufp == NULL ?
1309 malloc(size) : realloc(*bufp, size))) == NULL) {
1321 * Adjust the cursor after an insert or delete. The cursor passed is
1322 * the one that was operated upon; we just need to check any of the
1325 * len indicates the length of the item added/deleted
1326 * add indicates if the item indicated by the cursor has just been
1327 * added (add == 1) or deleted (add == 0).
1328 * dup indicates if the addition occurred into a duplicate set.
1330 * PUBLIC: void __ham_c_update __P((HTAB *,
1331 * PUBLIC: HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1334 __ham_c_update(hashp, hcp, chg_pgno, len, add, dup)
1348 * Regular adds are always at the end of a given page,
1349 * so we never have to adjust anyone's cursor after
1355 page_deleted = chg_pgno != PGNO_INVALID &&
1356 ((!dup && chg_pgno != hcp->pgno) ||
1357 (dup && chg_pgno != hcp->dpgno));
1359 hp = hcp->db_cursor->dbp->master->internal;
1360 DB_THREAD_LOCK(hp->dbp);
1362 for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
1363 cp = TAILQ_NEXT(cp, links)) {
1364 if (cp->internal == hcp)
1367 lcp = (HASH_CURSOR *)cp->internal;
1369 if (!dup && lcp->pgno != chg_pgno)
1372 if (dup && F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1375 if (dup && !F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1380 lcp->dpgno = hcp->dpgno;
1381 lcp->dndx = hcp->dndx;
1383 lcp->pgno = hcp->pgno;
1384 lcp->bndx = hcp->bndx;
1385 lcp->bucket = hcp->bucket;
1387 F_CLR(lcp, H_ISDUP);
1391 if (!dup && lcp->bndx > hcp->bndx)
1393 else if (!dup && lcp->bndx == hcp->bndx)
1394 F_SET(lcp, H_DELETED);
1395 else if (dup && lcp->bndx == hcp->bndx) {
1396 /* Assign dpgno in case there was page conversion. */
1397 lcp->dpgno = hcp->dpgno;
1398 if (add && lcp->dndx >= hcp->dndx )
1400 else if (!add && lcp->dndx > hcp->dndx)
1402 else if (!add && lcp->dndx == hcp->dndx)
1403 F_SET(lcp, H_DELETED);
1405 /* Now adjust on-page information. */
1406 if (lcp->dpgno == PGNO_INVALID)
1408 lcp->dup_tlen += len;
1409 if (lcp->dndx > hcp->dndx)
1410 lcp->dup_off += len;
1412 lcp->dup_tlen -= len;
1413 if (lcp->dndx > hcp->dndx)
1414 lcp->dup_off -= len;
1418 DB_THREAD_UNLOCK(hp->dbp);
1423 * This function gets called when we create a duplicate handle for a
1424 * threaded DB. It should create the private part of the DB structure.
1425 * PUBLIC: int __ham_hdup __P((DB *, DB *));
1428 __ham_hdup(orig, new)
1435 if ((hashp = (HTAB *)malloc(sizeof(HTAB))) == NULL)
1438 new->internal = hashp;
1443 hashp->hash = ((HTAB *)orig->internal)->hash;
1444 if ((hashp->split_buf = (PAGE *)malloc(orig->pgsize)) == NULL)
1446 hashp->local_errno = 0;
1447 hashp->hash_accesses = 0;
1448 hashp->hash_collisions = 0;
1449 hashp->hash_expansions = 0;
1450 hashp->hash_overflows = 0;
1451 hashp->hash_bigpages = 0;
1452 /* Initialize the cursor queue. */
1453 ret = __ham_c_init(new, NULL, &curs);
1454 TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);