2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid[] = "@(#)hash_page.c 10.31 (Sleepycat) 1/8/98";
57 * Page manipulation for hashing package.
69 #ifndef NO_SYSTEM_INCLUDES
70 #include <sys/types.h>
84 static int __ham_lock_bucket __P((DB *, HASH_CURSOR *, db_lockmode_t));
87 static void __account_page(HTAB *, db_pgno_t, int);
91 * PUBLIC: int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
94 __ham_item(hashp, cursorp, mode)
102 if (F_ISSET(cursorp, H_DELETED))
104 F_CLR(cursorp, H_OK | H_NOMORE);
106 /* Check if we need to get a page for this cursor. */
107 if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
110 /* Check if we are looking for space in which to insert an item. */
111 if (cursorp->seek_size && cursorp->seek_found_page == PGNO_INVALID
112 && cursorp->seek_size < P_FREESPACE(cursorp->pagep))
113 cursorp->seek_found_page = cursorp->pgno;
115 /* Check if we need to go on to the next page. */
116 if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno == PGNO_INVALID)
118 * ISDUP is set, and offset is at the beginning of the datum.
119 * We need to grab the length of the datum, then set the datum
120 * pointer to be the beginning of the datum.
122 memcpy(&cursorp->dup_len,
123 HKEYDATA_DATA(H_PAIRDATA(cursorp->pagep, cursorp->bndx)) +
124 cursorp->dup_off, sizeof(db_indx_t));
125 else if (F_ISSET(cursorp, H_ISDUP)) {
126 /* Make sure we're not about to run off the page. */
127 if (cursorp->dpagep == NULL && (ret = __ham_get_page(hashp->dbp,
128 cursorp->dpgno, &cursorp->dpagep)) != 0)
131 if (cursorp->dndx >= NUM_ENT(cursorp->dpagep)) {
132 if (NEXT_PGNO(cursorp->dpagep) == PGNO_INVALID) {
133 if ((ret = __ham_put_page(hashp->dbp,
134 cursorp->dpagep, 0)) != 0)
136 F_CLR(cursorp, H_ISDUP);
137 cursorp->dpagep = NULL;
138 cursorp->dpgno = PGNO_INVALID;
139 cursorp->dndx = NDX_INVALID;
141 } else if ((ret = __ham_next_cpage(hashp, cursorp,
142 NEXT_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
147 if (cursorp->bndx >= (db_indx_t)H_NUMPAIRS(cursorp->pagep)) {
148 /* Fetch next page. */
149 if (NEXT_PGNO(cursorp->pagep) == PGNO_INVALID) {
150 F_SET(cursorp, H_NOMORE);
151 if (cursorp->dpagep != NULL &&
152 (ret = __ham_put_page(hashp->dbp,
153 cursorp->dpagep, 0)) != 0)
155 cursorp->dpgno = PGNO_INVALID;
156 return (DB_NOTFOUND);
158 next_pgno = NEXT_PGNO(cursorp->pagep);
160 if ((ret = __ham_next_cpage(hashp,
161 cursorp, next_pgno, 0, 0)) != 0)
165 F_SET(cursorp, H_OK);
170 * PUBLIC: int __ham_item_reset __P((HTAB *, HASH_CURSOR *));
173 __ham_item_reset(hashp, cursorp)
175 HASH_CURSOR *cursorp;
180 ret = __ham_put_page(hashp->dbp, cursorp->pagep, 0);
184 __ham_item_init(cursorp);
189 * PUBLIC: void __ham_item_init __P((HASH_CURSOR *));
192 __ham_item_init(cursorp)
193 HASH_CURSOR *cursorp;
195 cursorp->pagep = NULL;
196 cursorp->bucket = BUCKET_INVALID;
198 cursorp->bndx = NDX_INVALID;
199 cursorp->pgno = PGNO_INVALID;
200 cursorp->dpgno = PGNO_INVALID;
201 cursorp->dndx = NDX_INVALID;
202 cursorp->dpagep = NULL;
204 cursorp->seek_size = 0;
205 cursorp->seek_found_page = PGNO_INVALID;
209 * PUBLIC: int __ham_item_done __P((HTAB *, HASH_CURSOR *, int));
212 __ham_item_done(hashp, cursorp, dirty)
214 HASH_CURSOR *cursorp;
222 ret = __ham_put_page(hashp->dbp, cursorp->pagep,
223 dirty && cursorp->dpagep == NULL);
224 cursorp->pagep = NULL;
227 t_ret = __ham_put_page(hashp->dbp, cursorp->dpagep, dirty);
228 cursorp->dpagep = NULL;
230 if (ret == 0 && t_ret != 0)
234 * If we are running with transactions, then we must
235 * not relinquish locks explicitly.
237 if (cursorp->lock && hashp->dbp->txn == NULL)
238 t_ret = lock_put(hashp->dbp->dbenv->lk_info, cursorp->lock);
243 * We don't throw out the page number since we might want to
244 * continue getting on this page.
246 return (ret != 0 ? ret : t_ret);
250 * Returns the last item in a bucket.
252 * PUBLIC: int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
255 __ham_item_last(hashp, cursorp, mode)
257 HASH_CURSOR *cursorp;
262 if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
265 cursorp->bucket = hashp->hdr->max_bucket;
266 F_SET(cursorp, H_OK);
267 return (__ham_item_prev(hashp, cursorp, mode));
270 * PUBLIC: int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
273 __ham_item_first(hashp, cursorp, mode)
275 HASH_CURSOR *cursorp;
280 if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
282 F_SET(cursorp, H_OK);
284 return (__ham_item_next(hashp, cursorp, mode));
288 * Returns a pointer to key/data pair on a page. In the case of bigkeys,
289 * just returns the page number and index of the bigkey pointer pair.
291 * PUBLIC: int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
294 __ham_item_prev(hashp, cursorp, mode)
296 HASH_CURSOR *cursorp;
303 * There are N cases for backing up in a hash file.
304 * Case 1: In the middle of a page, no duplicates, just dec the index.
305 * Case 2: In the middle of a duplicate set, back up one.
306 * Case 3: At the beginning of a duplicate set, get out of set and
307 * back up to next key.
308 * Case 4: At the beginning of a page; go to previous page.
309 * Case 5: At the beginning of a bucket; go to prev bucket.
311 F_CLR(cursorp, H_OK | H_NOMORE | H_DELETED);
314 * First handle the duplicates. Either you'll get the key here
315 * or you'll exit the duplicate set and drop into the code below
316 * to handle backing up through keys.
318 if (F_ISSET(cursorp, H_ISDUP)) {
319 if (cursorp->dpgno == PGNO_INVALID) {
320 /* Duplicates are on-page. */
321 if (cursorp->dup_off != 0)
322 if ((ret = __ham_get_cpage(hashp,
323 cursorp, mode)) != 0)
328 memcpy(&h->dup_len, HKEYDATA_DATA(
329 H_PAIRDATA(h->pagep, h->bndx))
330 + h->dup_off - sizeof(db_indx_t),
333 DUP_SIZE(cursorp->dup_len);
335 return (__ham_item(hashp,
338 } else if (cursorp->dndx > 0) { /* Duplicates are off-page. */
340 return (__ham_item(hashp, cursorp, mode));
341 } else if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
343 else if (PREV_PGNO(cursorp->dpagep) == PGNO_INVALID) {
344 F_CLR(cursorp, H_ISDUP); /* End of dups */
345 cursorp->dpgno = PGNO_INVALID;
346 if (cursorp->dpagep != NULL)
347 (void)__ham_put_page(hashp->dbp,
349 cursorp->dpagep = NULL;
350 } else if ((ret = __ham_next_cpage(hashp, cursorp,
351 PREV_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
354 cursorp->dndx = NUM_ENT(cursorp->pagep) - 1;
355 return (__ham_item(hashp, cursorp, mode));
360 * If we get here, we are not in a duplicate set, and just need
361 * to back up the cursor. There are still three cases:
362 * midpage, beginning of page, beginning of bucket.
365 if (cursorp->bndx == 0) { /* Beginning of page. */
366 if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
368 cursorp->pgno = PREV_PGNO(cursorp->pagep);
369 if (cursorp->pgno == PGNO_INVALID) {
370 /* Beginning of bucket. */
371 F_SET(cursorp, H_NOMORE);
372 return (DB_NOTFOUND);
373 } else if ((ret = __ham_next_cpage(hashp,
374 cursorp, cursorp->pgno, 0, 0)) != 0)
377 cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
381 * Either we've got the cursor set up to be decremented, or we
382 * have to find the end of a bucket.
384 if (cursorp->bndx == NDX_INVALID) {
385 if (cursorp->pagep == NULL)
386 next_pgno = BUCKET_TO_PAGE(hashp, cursorp->bucket);
391 if ((ret = __ham_next_cpage(hashp,
392 cursorp, next_pgno, 0, 0)) != 0)
394 got_page: next_pgno = NEXT_PGNO(cursorp->pagep);
395 cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
396 } while (next_pgno != PGNO_INVALID);
398 if (cursorp->bndx == 0) {
399 /* Bucket was empty. */
400 F_SET(cursorp, H_NOMORE);
401 return (DB_NOTFOUND);
407 return (__ham_item(hashp, cursorp, mode));
411 * Sets the cursor to the next key/data pair on a page.
413 * PUBLIC: int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
416 __ham_item_next(hashp, cursorp, mode)
418 HASH_CURSOR *cursorp;
422 * Deleted on-page duplicates are a weird case. If we delete the last
423 * one, then our cursor is at the very end of a duplicate set and
424 * we actually need to go on to the next key.
426 if (F_ISSET(cursorp, H_DELETED)) {
427 if (cursorp->bndx != NDX_INVALID &&
428 F_ISSET(cursorp, H_ISDUP) &&
429 cursorp->dpgno == PGNO_INVALID &&
430 cursorp->dup_tlen == cursorp->dup_off) {
431 F_CLR(cursorp, H_ISDUP);
432 cursorp->dpgno = PGNO_INVALID;
435 F_CLR(cursorp, H_DELETED);
436 } else if (cursorp->bndx == NDX_INVALID) {
438 cursorp->dpgno = PGNO_INVALID;
439 F_CLR(cursorp, H_ISDUP);
440 } else if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno != PGNO_INVALID)
442 else if (F_ISSET(cursorp, H_ISDUP)) {
444 cursorp->dup_off += DUP_SIZE(cursorp->dup_len);
445 if (cursorp->dup_off >= cursorp->dup_tlen) {
446 F_CLR(cursorp, H_ISDUP);
447 cursorp->dpgno = PGNO_INVALID;
453 return (__ham_item(hashp, cursorp, mode));
457 * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
459 * This is a little bit sleazy in that we're overloading the meaning
460 * of the H_OFFPAGE type here. When we recover deletes, we have the
461 * entire entry instead of having only the DBT, so we'll pass type
462 * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
463 * an H_KEYDATA around it.
466 __ham_putitem(p, dbt, type)
475 /* Put the item element on the page. */
476 if (type == H_OFFPAGE) {
477 off = HOFFSET(p) - dbt->size;
478 HOFFSET(p) = p->inp[n] = off;
479 memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
481 off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
482 HOFFSET(p) = p->inp[n] = off;
483 PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type);
486 /* Adjust page info. */
492 * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int));
493 * XXX TODO: if the item is an offdup, delete the other pages and
494 * then remove the pair. If the offpage page is 0, then you can
495 * just remove the pair.
498 __ham_del_pair(hashp, cursorp, reclaim_page)
500 HASH_CURSOR *cursorp;
503 DBT data_dbt, key_dbt;
505 DB_LSN new_lsn, *n_lsn, tmp_lsn;
508 db_pgno_t chg_pgno, pgno;
511 dbenv = hashp->dbp->dbenv;
513 if (cursorp->pagep == NULL && (ret =
514 __ham_get_page(hashp->dbp, cursorp->pgno, &cursorp->pagep)) != 0)
520 * We optimize for the normal case which is when neither the key nor
521 * the data are large. In this case, we write a single log record
522 * and do the delete. If either is large, we'll call __big_delete
523 * to remove the big item and then update the page to remove the
524 * entry referring to the big item.
527 if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) {
528 memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))),
530 ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
534 switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) {
537 HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
539 ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
543 HOFFDUP_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
545 ret = __db_ddup(hashp->dbp, pgno, __ham_del_page);
546 F_CLR(cursorp, H_ISDUP);
550 * If we delete a pair that is/was a duplicate, then
551 * we had better clear the flag so that we update the
552 * cursor appropriately.
554 F_CLR(cursorp, H_ISDUP);
561 /* Now log the delete off this page. */
562 if (DB_LOGGING(hashp->dbp)) {
563 key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
565 LEN_HITEM(p, hashp->hdr->pagesize, H_KEYINDEX(ndx));
566 data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
568 LEN_HITEM(p, hashp->hdr->pagesize, H_DATAINDEX(ndx));
570 if ((ret = __ham_insdel_log(dbenv->lg_info,
571 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPAIR,
572 hashp->dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
573 &LSN(p), &key_dbt, &data_dbt)) != 0)
576 /* Move lsn onto page. */
580 __ham_dpair(hashp->dbp, p, ndx);
583 * If we are locking, we will not maintain this.
584 * XXXX perhaps we can retain incremental numbers and apply them
587 if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
591 * If we need to reclaim the page, then check if the page is empty.
592 * There are two cases. If it's empty and it's not the first page
593 * in the bucket (i.e., the bucket page) then we can simply remove
594 * it. If it is the first chain in the bucket, then we need to copy
595 * the second page into it and remove the second page.
597 if (reclaim_page && NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
598 NEXT_PGNO(p) != PGNO_INVALID) {
599 PAGE *n_pagep, *nn_pagep;
603 * First page in chain is empty and we know that there
604 * are more pages in the chain.
607 __ham_get_page(hashp->dbp, NEXT_PGNO(p), &n_pagep)) != 0)
610 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
612 __ham_get_page(hashp->dbp, NEXT_PGNO(n_pagep),
614 (void) __ham_put_page(hashp->dbp, n_pagep, 0);
619 if (DB_LOGGING(hashp->dbp)) {
620 key_dbt.data = n_pagep;
621 key_dbt.size = hashp->hdr->pagesize;
622 if ((ret = __ham_copypage_log(dbenv->lg_info,
623 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
624 hashp->dbp->log_fileid, PGNO(p), &LSN(p),
625 PGNO(n_pagep), &LSN(n_pagep), NEXT_PGNO(n_pagep),
626 NEXT_PGNO(n_pagep) == PGNO_INVALID ? NULL :
627 &LSN(nn_pagep), &key_dbt)) != 0)
630 /* Move lsn onto page. */
631 LSN(p) = new_lsn; /* Structure assignment. */
632 LSN(n_pagep) = new_lsn;
633 if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
634 LSN(nn_pagep) = new_lsn;
636 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
637 PREV_PGNO(nn_pagep) = PGNO(p);
638 (void)__ham_put_page(hashp->dbp, nn_pagep, 1);
643 memcpy(p, n_pagep, hashp->hdr->pagesize);
646 PREV_PGNO(p) = PGNO_INVALID;
649 * Cursor is advanced to the beginning of the next page.
651 cursorp->bndx = NDX_INVALID;
652 cursorp->pgno = PGNO(p);
654 if ((ret = __ham_dirty_page(hashp, p)) != 0 ||
655 (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0)
657 } else if (reclaim_page &&
658 NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
659 PAGE *n_pagep, *p_pagep;
662 __ham_get_page(hashp->dbp, PREV_PGNO(p), &p_pagep)) != 0)
665 if (NEXT_PGNO(p) != PGNO_INVALID) {
666 if ((ret = __ham_get_page(hashp->dbp,
667 NEXT_PGNO(p), &n_pagep)) != 0) {
668 (void)__ham_put_page(hashp->dbp, p_pagep, 0);
671 n_lsn = &LSN(n_pagep);
677 NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
679 PREV_PGNO(n_pagep) = PGNO(p_pagep);
681 if (DB_LOGGING(hashp->dbp)) {
682 if ((ret = __ham_newpage_log(dbenv->lg_info,
683 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELOVFL,
684 hashp->dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
685 PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
688 /* Move lsn onto page. */
689 LSN(p_pagep) = new_lsn; /* Structure assignment. */
691 LSN(n_pagep) = new_lsn;
694 cursorp->pgno = NEXT_PGNO(p);
697 * Since we are about to delete the cursor page and we have
698 * just moved the cursor, we need to make sure that the
699 * old page pointer isn't left hanging around in the cursor.
701 cursorp->pagep = NULL;
703 ret = __ham_del_page(hashp->dbp, p);
704 if ((tret = __ham_put_page(hashp->dbp, p_pagep, 1)) != 0 &&
707 if (n_pagep != NULL &&
708 (tret = __ham_put_page(hashp->dbp, n_pagep, 1)) != 0 &&
715 * Mark item deleted so that we don't try to return it, and
716 * so that we update the cursor correctly on the next call
719 F_SET(cursorp, H_DELETED);
720 chg_pgno = cursorp->pgno;
721 ret = __ham_dirty_page(hashp, p);
723 __ham_c_update(cursorp, chg_pgno, 0, 0, 0);
726 * Since we just deleted a pair from the master page, anything
727 * in cursorp->dpgno should be cleared.
729 cursorp->dpgno = PGNO_INVALID;
731 F_CLR(cursorp, H_OK);
737 * Given the key data indicated by the cursor, replace part/all of it
738 * according to the fields in the dbt.
740 * PUBLIC: int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
743 __ham_replpair(hashp, hcp, dbt, make_dup)
749 DBT old_dbt, tdata, tmp;
753 int is_big, ret, type;
754 u_int8_t *beg, *dest, *end, *hk, *src;
757 * Big item replacements are handled in generic code.
758 * Items that fit on the current page fall into 4 classes.
759 * 1. On-page element, same size
760 * 2. On-page element, new is bigger (fits)
761 * 3. On-page element, new is bigger (does not fit)
762 * 4. On-page element, old is bigger
763 * Numbers 1, 2, and 4 are essentially the same (and should
764 * be the common case). We handle case 3 as a delete and
769 * We need to compute the number of bytes that we are adding or
770 * removing from the entry. Normally, we can simply substract
771 * the number of bytes we are replacing (dbt->dlen) from the
772 * number of bytes we are inserting (dbt->size). However, if
773 * we are doing a partial put off the end of a record, then this
774 * formula doesn't work, because we are essentially adding
777 change = dbt->size - dbt->dlen;
779 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
780 is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
783 memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
785 len = LEN_HKEYDATA(hcp->pagep,
786 hashp->dbp->pgsize, H_DATAINDEX(hcp->bndx));
788 if (dbt->doff + dbt->dlen > len)
789 change += dbt->doff + dbt->dlen - len;
792 if (change > (int)P_FREESPACE(hcp->pagep) || is_big) {
794 * Case 3 -- two subcases.
795 * A. This is not really a partial operation, but an overwrite.
796 * Simple del and add works.
797 * B. This is a partial and we need to construct the data that
798 * we are really inserting (yuck).
799 * In both cases, we need to grab the key off the page (in
800 * some cases we could do this outside of this routine; for
801 * cleanliness we do it here. If you happen to be on a big
802 * key, this could be a performance hit).
805 F_SET(&tmp, DB_DBT_MALLOC | DB_DBT_INTERNAL);
807 __db_ret(hashp->dbp, hcp->pagep, H_KEYINDEX(hcp->bndx),
808 &tmp, &hcp->big_key, &hcp->big_keylen)) != 0)
811 if (dbt->doff == 0 && dbt->dlen == len) {
812 ret = __ham_del_pair(hashp, hcp, 0);
814 ret = __ham_add_el(hashp,
815 hcp, &tmp, dbt, H_KEYDATA);
816 } else { /* Case B */
817 type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
818 HPAGE_PTYPE(hk) : H_KEYDATA;
820 F_SET(&tdata, DB_DBT_MALLOC | DB_DBT_INTERNAL);
822 if ((ret = __db_ret(hashp->dbp, hcp->pagep,
823 H_DATAINDEX(hcp->bndx), &tdata, &hcp->big_data,
824 &hcp->big_datalen)) != 0)
827 /* Now we can delete the item. */
828 if ((ret = __ham_del_pair(hashp, hcp, 0)) != 0) {
829 __db_free(tdata.data);
833 /* Now shift old data around to make room for new. */
835 tdata.data = (void *)__db_realloc(tdata.data,
836 tdata.size + change);
837 memset((u_int8_t *)tdata.data + tdata.size,
840 if (tdata.data == NULL)
842 end = (u_int8_t *)tdata.data + tdata.size;
844 src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
845 if (src < end && tdata.size > dbt->doff + dbt->dlen) {
846 len = tdata.size - dbt->doff - dbt->dlen;
848 memmove(dest, src, len);
850 memcpy((u_int8_t *)tdata.data + dbt->doff,
851 dbt->data, dbt->size);
852 tdata.size += change;
854 /* Now add the pair. */
855 ret = __ham_add_el(hashp, hcp, &tmp, &tdata, type);
856 __db_free(tdata.data);
858 err: __db_free(tmp.data);
863 * Set up pointer into existing data. Do it before the log
864 * message so we can use it inside of the log setup.
866 beg = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
870 * If we are going to have to move bytes at all, figure out
871 * all the parameters here. Then log the call before moving
874 if (DB_LOGGING(hashp->dbp)) {
876 old_dbt.size = dbt->dlen;
877 if ((ret = __ham_replace_log(hashp->dbp->dbenv->lg_info,
878 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
879 hashp->dbp->log_fileid, PGNO(hcp->pagep),
880 (u_int32_t)H_DATAINDEX(hcp->bndx), &LSN(hcp->pagep),
881 (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
884 LSN(hcp->pagep) = new_lsn; /* Structure assignment. */
887 __ham_onpage_replace(hcp->pagep, hashp->dbp->pgsize,
888 (u_int32_t)H_DATAINDEX(hcp->bndx), (int32_t)dbt->doff, change, dbt);
894 * Replace data on a page with new data, possibly growing or shrinking what's
895 * there. This is called on two different occasions. On one (from replpair)
896 * we are interested in changing only the data. On the other (from recovery)
897 * we are replacing the entire data (header and all) with a new element. In
898 * the latter case, the off argument is negative.
899 * pagep: the page that we're changing
900 * ndx: page index of the element that is growing/shrinking.
901 * off: Offset at which we are beginning the replacement.
902 * change: the number of bytes (+ or -) that the element is growing/shrinking.
903 * dbt: the new data that gets written at beg.
904 * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
905 * PUBLIC: int32_t, DBT *));
908 __ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
918 u_int8_t *src, *dest;
923 src = (u_int8_t *)(pagep) + HOFFSET(pagep);
925 len = pagep->inp[ndx] - HOFFSET(pagep);
926 else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
927 len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) +
928 LEN_HKEYDATA(pagep, pgsize, ndx) - src;
931 len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src;
933 memmove(dest, src, len);
935 memset(dest + len, 0, change);
937 /* Now update the indices. */
938 for (i = ndx; i < NUM_ENT(pagep); i++)
939 pagep->inp[i] -= change;
940 HOFFSET(pagep) -= change;
943 memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off,
944 dbt->data, dbt->size);
946 memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
950 * PUBLIC: int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
953 __ham_split_page(hashp, obucket, nbucket)
955 u_int32_t obucket, nbucket;
957 DBT key, val, page_dbt;
960 PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
962 db_pgno_t bucket_pgno, next_pgno;
963 u_int32_t big_len, len;
967 dbenv = hashp->dbp->dbenv;
968 temp_pagep = old_pagep = new_pagep = NULL;
970 bucket_pgno = BUCKET_TO_PAGE(hashp, obucket);
971 if ((ret = __ham_get_page(hashp->dbp, bucket_pgno, &old_pagep)) != 0)
973 if ((ret = __ham_new_page(hashp, BUCKET_TO_PAGE(hashp, nbucket), P_HASH,
977 temp_pagep = hashp->split_buf;
978 memcpy(temp_pagep, old_pagep, hashp->hdr->pagesize);
980 if (DB_LOGGING(hashp->dbp)) {
981 page_dbt.size = hashp->hdr->pagesize;
982 page_dbt.data = old_pagep;
983 if ((ret = __ham_splitdata_log(dbenv->lg_info,
984 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
985 hashp->dbp->log_fileid, SPLITOLD, PGNO(old_pagep),
986 &page_dbt, &LSN(old_pagep))) != 0)
990 P_INIT(old_pagep, hashp->hdr->pagesize, PGNO(old_pagep), PGNO_INVALID,
991 PGNO_INVALID, 0, P_HASH);
993 if (DB_LOGGING(hashp->dbp))
994 LSN(old_pagep) = new_lsn; /* Structure assignment. */
998 val.flags = key.flags = 0;
999 while (temp_pagep != NULL) {
1000 for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) {
1002 __db_ret(hashp->dbp, temp_pagep, H_KEYINDEX(n),
1003 &key, &big_buf, &big_len)) != 0)
1006 if (__ham_call_hash(hashp, key.data, key.size)
1013 * Figure out how many bytes we need on the new
1014 * page to store the key/data pair.
1017 len = LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
1019 LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
1021 2 * sizeof(db_indx_t);
1023 if (P_FREESPACE(*pp) < len) {
1024 if (DB_LOGGING(hashp->dbp)) {
1025 page_dbt.size = hashp->hdr->pagesize;
1026 page_dbt.data = *pp;
1027 if ((ret = __ham_splitdata_log(
1029 (DB_TXN *)hashp->dbp->txn,
1031 hashp->dbp->log_fileid, SPLITNEW,
1032 PGNO(*pp), &page_dbt,
1037 if ((ret = __ham_add_ovflpage(hashp,
1041 __ham_copy_item(hashp, temp_pagep, H_KEYINDEX(n), *pp);
1042 __ham_copy_item(hashp, temp_pagep, H_DATAINDEX(n), *pp);
1044 next_pgno = NEXT_PGNO(temp_pagep);
1046 /* Clear temp_page; if it's a link overflow page, free it. */
1047 if (PGNO(temp_pagep) != bucket_pgno && (ret =
1048 __ham_del_page(hashp->dbp, temp_pagep)) != 0)
1051 if (next_pgno == PGNO_INVALID)
1054 __ham_get_page(hashp->dbp, next_pgno, &temp_pagep)) != 0)
1057 if (temp_pagep != NULL && DB_LOGGING(hashp->dbp)) {
1058 page_dbt.size = hashp->hdr->pagesize;
1059 page_dbt.data = temp_pagep;
1060 if ((ret = __ham_splitdata_log(dbenv->lg_info,
1061 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1062 hashp->dbp->log_fileid, SPLITOLD, PGNO(temp_pagep),
1063 &page_dbt, &LSN(temp_pagep))) != 0)
1065 LSN(temp_pagep) = new_lsn;
1068 if (big_buf != NULL)
1072 * If the original bucket spanned multiple pages, then we've got
1073 * a pointer to a page that used to be on the bucket chain. It
1074 * should be deleted.
1076 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
1077 (ret = __ham_del_page(hashp->dbp, temp_pagep)) != 0)
1081 * Write new buckets out.
1083 if (DB_LOGGING(hashp->dbp)) {
1084 page_dbt.size = hashp->hdr->pagesize;
1085 page_dbt.data = old_pagep;
1086 if ((ret = __ham_splitdata_log(dbenv->lg_info,
1087 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1088 hashp->dbp->log_fileid, SPLITNEW, PGNO(old_pagep),
1089 &page_dbt, &LSN(old_pagep))) != 0)
1091 LSN(old_pagep) = new_lsn;
1093 page_dbt.data = new_pagep;
1094 if ((ret = __ham_splitdata_log(dbenv->lg_info,
1095 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1096 hashp->dbp->log_fileid, SPLITNEW, PGNO(new_pagep),
1097 &page_dbt, &LSN(new_pagep))) != 0)
1099 LSN(new_pagep) = new_lsn;
1101 ret = __ham_put_page(hashp->dbp, old_pagep, 1);
1102 if ((tret = __ham_put_page(hashp->dbp, new_pagep, 1)) != 0 &&
1107 if (old_pagep != NULL)
1108 (void)__ham_put_page(hashp->dbp, old_pagep, 1);
1109 if (new_pagep != NULL)
1110 (void)__ham_put_page(hashp->dbp, new_pagep, 1);
1111 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
1112 (void)__ham_put_page(hashp->dbp, temp_pagep, 1);
1118 * Add the given pair to the page. The page in question may already be
1119 * held (i.e. it was already gotten). If it is, then the page is passed
1120 * in via the pagep parameter. On return, pagep will contain the page
1121 * to which we just added something. This allows us to link overflow
1122 * pages and return the new page having correctly put the last page.
1124 * PUBLIC: int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *,
1128 __ham_add_el(hashp, hcp, key, val, type)
1131 const DBT *key, *val;
1134 const DBT *pkey, *pdata;
1135 DBT key_dbt, data_dbt;
1137 HOFFPAGE doff, koff;
1138 db_pgno_t next_pgno;
1139 u_int32_t data_size, key_size, pairsize;
1140 int do_expand, is_keybig, is_databig, rectype, ret;
1141 int key_type, data_type;
1145 if (hcp->pagep == NULL && (ret = __ham_get_page(hashp->dbp,
1146 hcp->seek_found_page != PGNO_INVALID ? hcp->seek_found_page :
1147 hcp->pgno, &hcp->pagep)) != 0)
1150 key_size = HKEYDATA_PSIZE(key->size);
1151 data_size = HKEYDATA_PSIZE(val->size);
1152 is_keybig = ISBIG(hashp, key->size);
1153 is_databig = ISBIG(hashp, val->size);
1155 key_size = HOFFPAGE_PSIZE;
1157 data_size = HOFFPAGE_PSIZE;
1159 pairsize = key_size + data_size;
1161 /* Advance to first page in chain with room for item. */
1162 while (H_NUMPAIRS(hcp->pagep) && NEXT_PGNO(hcp->pagep) !=
1165 * This may not be the end of the chain, but the pair may fit
1166 * anyway. Check if it's a bigpair that fits or a regular
1169 if (P_FREESPACE(hcp->pagep) >= pairsize)
1171 next_pgno = NEXT_PGNO(hcp->pagep);
1173 __ham_next_cpage(hashp, hcp, next_pgno, 0, 0)) != 0)
1178 * Check if we need to allocate a new page.
1180 if (P_FREESPACE(hcp->pagep) < pairsize) {
1182 if ((ret = __ham_add_ovflpage(hashp,
1183 hcp->pagep, 1, &hcp->pagep)) != 0)
1185 hcp->pgno = PGNO(hcp->pagep);
1191 hcp->bndx = H_NUMPAIRS(hcp->pagep);
1192 F_CLR(hcp, H_DELETED);
1194 if ((ret = __db_poff(hashp->dbp,
1195 key, &koff.pgno, __ham_overflow_page)) != 0)
1197 koff.type = H_OFFPAGE;
1198 koff.tlen = key->size;
1199 key_dbt.data = &koff;
1200 key_dbt.size = sizeof(koff);
1202 key_type = H_OFFPAGE;
1205 key_type = H_KEYDATA;
1209 if ((ret = __db_poff(hashp->dbp,
1210 val, &doff.pgno, __ham_overflow_page)) != 0)
1212 doff.type = H_OFFPAGE;
1213 doff.tlen = val->size;
1214 data_dbt.data = &doff;
1215 data_dbt.size = sizeof(doff);
1217 data_type = H_OFFPAGE;
1223 if (DB_LOGGING(hashp->dbp)) {
1226 rectype |= PAIR_DATAMASK;
1228 rectype |= PAIR_KEYMASK;
1230 if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
1231 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
1232 hashp->dbp->log_fileid, PGNO(hcp->pagep),
1233 (u_int32_t)H_NUMPAIRS(hcp->pagep),
1234 &LSN(hcp->pagep), pkey, pdata)) != 0)
1237 /* Move lsn onto page. */
1238 LSN(hcp->pagep) = new_lsn; /* Structure assignment. */
1241 __ham_putitem(hcp->pagep, pkey, key_type);
1242 __ham_putitem(hcp->pagep, pdata, data_type);
1245 * For splits, we are going to update item_info's page number
1246 * field, so that we can easily return to the same page the
1247 * next time we come in here. For other operations, this shouldn't
1248 * matter, since odds are this is the last thing that happens before
1249 * we return to the user program.
1251 hcp->pgno = PGNO(hcp->pagep);
1254 * XXX Maybe keep incremental numbers here
1256 if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
1257 hashp->hdr->nelem++;
1259 if (do_expand || (hashp->hdr->ffactor != 0 &&
1260 (u_int32_t)H_NUMPAIRS(hcp->pagep) > hashp->hdr->ffactor))
1261 F_SET(hcp, H_EXPAND);
1267 * Special __putitem call used in splitting -- copies one entry to
1268 * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
1269 * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
1270 * do not need to do any logging here.
1271 * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *));
1274 __ham_copy_item(hashp, src_page, src_ndx, dest_page)
1284 * Copy the key and data entries onto this new page.
1286 src = P_ENTRY(src_page, src_ndx);
1288 /* Set up space on dest. */
1289 len = LEN_HITEM(src_page, hashp->hdr->pagesize, src_ndx);
1290 HOFFSET(dest_page) -= len;
1291 dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
1292 dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
1293 NUM_ENT(dest_page)++;
1295 memcpy(dest, src, len);
1301 * pointer on success
1304 * PUBLIC: int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
1307 __ham_add_ovflpage(hashp, pagep, release, pp)
1318 dbenv = hashp->dbp->dbenv;
1320 if ((ret = __ham_overflow_page(hashp->dbp, P_HASH, &new_pagep)) != 0)
1323 if (DB_LOGGING(hashp->dbp)) {
1324 if ((ret = __ham_newpage_log(dbenv->lg_info,
1325 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, PUTOVFL,
1326 hashp->dbp->log_fileid, PGNO(pagep), &LSN(pagep),
1327 PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
1330 /* Move lsn onto page. */
1331 LSN(pagep) = LSN(new_pagep) = new_lsn;
1333 NEXT_PGNO(pagep) = PGNO(new_pagep);
1334 PREV_PGNO(new_pagep) = PGNO(pagep);
1337 ret = __ham_put_page(hashp->dbp, pagep, 1);
1339 hashp->hash_overflows++;
1346 * PUBLIC: int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
1349 __ham_new_page(hashp, addr, type, pp)
1351 u_int32_t addr, type;
1357 if ((ret = memp_fget(hashp->dbp->mpf,
1358 &addr, DB_MPOOL_CREATE, &pagep)) != 0)
1362 __account_page(hashp, addr, 1);
1364 /* This should not be necessary because page-in should do it. */
1366 hashp->hdr->pagesize, addr, PGNO_INVALID, PGNO_INVALID, 0, type);
1373 * PUBLIC: int __ham_del_page __P((DB *, PAGE *));
1376 __ham_del_page(dbp, pagep)
1384 hashp = (HTAB *)dbp->internal;
1386 DIRTY_META(hashp, ret);
1389 __db_err(hashp->dbp->dbenv,
1390 "free_ovflpage: unable to lock meta data page %s\n",
1393 * If we are going to return an error, then we should free
1394 * the page, so it doesn't stay pinned forever.
1396 (void)__ham_put_page(hashp->dbp, pagep, 0);
1400 if (DB_LOGGING(hashp->dbp)) {
1401 if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
1402 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPGNO,
1403 hashp->dbp->log_fileid, PGNO(pagep), hashp->hdr->last_freed,
1404 (u_int32_t)TYPE(pagep), NEXT_PGNO(pagep), P_INVALID,
1405 &LSN(pagep), &hashp->hdr->lsn)) != 0)
1408 hashp->hdr->lsn = new_lsn;
1409 LSN(pagep) = new_lsn;
1416 __pgno = pagep->pgno;
1418 memset(pagep, 0xff, dbp->pgsize);
1419 pagep->pgno = __pgno;
1423 TYPE(pagep) = P_INVALID;
1424 NEXT_PGNO(pagep) = hashp->hdr->last_freed;
1425 hashp->hdr->last_freed = PGNO(pagep);
1427 return (__ham_put_page(hashp->dbp, pagep, 1));
1432 * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t));
1435 __ham_put_page(dbp, pagep, is_dirty)
1441 __account_page((HTAB *)dbp->cookie,
1442 ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1);
1444 return (memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0)));
1448 * __ham_dirty_page --
1449 * Mark a page dirty.
1451 * PUBLIC: int __ham_dirty_page __P((HTAB *, PAGE *));
1454 __ham_dirty_page(hashp, pagep)
1458 return (memp_fset(hashp->dbp->mpf, pagep, DB_MPOOL_DIRTY));
1462 * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
1465 __ham_get_page(dbp, addr, pagep)
1472 ret = memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep);
1475 __account_page((HTAB *)dbp->internal, addr, 1);
1481 * PUBLIC: int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
1484 __ham_overflow_page(dbp, type, pp)
1489 DB_LSN *lsnp, new_lsn;
1492 db_pgno_t new_addr, next_free, newalloc_flag;
1493 u_int32_t offset, splitnum;
1496 hashp = (HTAB *)dbp->internal;
1499 DIRTY_META(hashp, ret);
1504 * This routine is split up into two parts. First we have
1505 * to figure out the address of the new page that we are
1506 * allocating. Then we have to log the allocation. Only
1507 * after the log do we get to complete allocation of the
1510 new_addr = hashp->hdr->last_freed;
1511 if (new_addr != PGNO_INVALID) {
1512 if ((ret = __ham_get_page(hashp->dbp, new_addr, &p)) != 0)
1514 next_free = NEXT_PGNO(p);
1518 splitnum = hashp->hdr->ovfl_point;
1519 hashp->hdr->spares[splitnum]++;
1520 offset = hashp->hdr->spares[splitnum] -
1521 (splitnum ? hashp->hdr->spares[splitnum - 1] : 0);
1522 new_addr = PGNO_OF(hashp, hashp->hdr->ovfl_point, offset);
1523 if (new_addr > MAX_PAGES(hashp)) {
1524 __db_err(hashp->dbp->dbenv, "hash: out of file pages");
1525 hashp->hdr->spares[splitnum]--;
1528 next_free = PGNO_INVALID;
1534 if (DB_LOGGING(hashp->dbp)) {
1535 if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
1536 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, ALLOCPGNO,
1537 hashp->dbp->log_fileid, new_addr, next_free,
1538 0, newalloc_flag, type, lsnp, &hashp->hdr->lsn)) != 0)
1541 hashp->hdr->lsn = new_lsn;
1547 /* We just took something off the free list, initialize it. */
1548 hashp->hdr->last_freed = next_free;
1549 P_INIT(p, hashp->hdr->pagesize, PGNO(p), PGNO_INVALID,
1550 PGNO_INVALID, 0, (u_int8_t)type);
1552 /* Get the new page. */
1553 if ((ret = __ham_new_page(hashp, new_addr, type, &p)) != 0)
1556 if (DB_LOGGING(hashp->dbp))
1565 * PUBLIC: #ifdef DEBUG
1566 * PUBLIC: int __bucket_to_page __P((HTAB *, int));
1570 __bucket_to_page(hashp, n)
1578 ret_val += hashp->hdr->spares[__db_log2(n + 1) - 1];
1585 * Create a bunch of overflow pages at the current split point.
1586 * PUBLIC: void __ham_init_ovflpages __P((HTAB *));
1589 __ham_init_ovflpages(hp)
1594 db_pgno_t last_pgno, new_pgno;
1595 u_int32_t i, curpages, numpages;
1597 curpages = hp->hdr->spares[hp->hdr->ovfl_point] -
1598 hp->hdr->spares[hp->hdr->ovfl_point - 1];
1599 numpages = hp->hdr->ovfl_point + 1 - curpages;
1601 last_pgno = hp->hdr->last_freed;
1602 new_pgno = PGNO_OF(hp, hp->hdr->ovfl_point, curpages + 1);
1603 if (DB_LOGGING(hp->dbp)) {
1604 (void)__ham_ovfl_log(hp->dbp->dbenv->lg_info,
1605 (DB_TXN *)hp->dbp->txn, &new_lsn, 0,
1606 hp->dbp->log_fileid, new_pgno,
1607 numpages, last_pgno, hp->hdr->ovfl_point, &hp->hdr->lsn);
1608 hp->hdr->lsn = new_lsn;
1612 hp->hdr->spares[hp->hdr->ovfl_point] += numpages;
1613 for (i = numpages; i > 0; i--) {
1614 if (__ham_new_page(hp,
1615 PGNO_OF(hp, hp->hdr->ovfl_point, curpages + i),
1616 P_INVALID, &p) != 0)
1619 NEXT_PGNO(p) = last_pgno;
1620 last_pgno = PGNO(p);
1621 (void)__ham_put_page(hp->dbp, p, 1);
1623 hp->hdr->last_freed = last_pgno;
1627 * PUBLIC: int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
1630 __ham_get_cpage(hashp, hcp, mode)
1637 if (hcp->lock == 0 && F_ISSET(hashp->dbp, DB_AM_LOCKING) &&
1638 (ret = __ham_lock_bucket(hashp->dbp, hcp, mode)) != 0)
1641 if (hcp->pagep == NULL) {
1642 if (hcp->pgno == PGNO_INVALID) {
1643 hcp->pgno = BUCKET_TO_PAGE(hashp, hcp->bucket);
1648 __ham_get_page(hashp->dbp, hcp->pgno, &hcp->pagep)) != 0)
1652 if (hcp->dpgno != PGNO_INVALID && hcp->dpagep == NULL)
1654 __ham_get_page(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
1660 * Get a new page at the cursor, putting the last page if necessary.
1661 * If the flag is set to H_ISDUP, then we are talking about the
1662 * duplicate page, not the main page.
1663 * PUBLIC: int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t,
1664 * PUBLIC: int, int));
1667 __ham_next_cpage(hashp, hcp, pgno, dirty, flags)
1677 if (flags & H_ISDUP && hcp->dpagep != NULL &&
1678 (ret = __ham_put_page(hashp->dbp, hcp->dpagep, dirty)) != 0)
1680 else if (!(flags & H_ISDUP) && hcp->pagep != NULL &&
1681 (ret = __ham_put_page(hashp->dbp, hcp->pagep, dirty)) != 0)
1684 if ((ret = __ham_get_page(hashp->dbp, pgno, &p)) != 0)
1687 if (flags & H_ISDUP) {
1701 * __ham_lock_bucket --
1702 * Get the lock on a particular bucket.
1705 __ham_lock_bucket(dbp, hcp, mode)
1713 * What a way to trounce on the memory system. It might be
1714 * worth copying the lk_info into the hashp.
1717 dbp->lock.pgno = (db_pgno_t)(hcp->bucket);
1718 ret = lock_get(dbp->dbenv->lk_info,
1719 dbp->txn == NULL ? dbp->locker : dbp->txn->txnid, 0,
1720 &dbp->lock_dbt, mode, &hcp->lock);
1722 return (ret < 0 ? EAGAIN : ret);
1727 * Delete a pair on a page, paying no attention to what the pair
1728 * represents. The caller is responsible for freeing up duplicates
1729 * or offpage entries that might be referenced by this pair.
1731 * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
1734 __ham_dpair(dbp, p, pndx)
1740 u_int8_t *dest, *src;
1743 * Compute "delta", the amount we have to shift all of the
1744 * offsets. To find the delta, we just need to calculate
1745 * the size of the pair of elements we are removing.
1747 delta = H_PAIRSIZE(p, dbp->pgsize, pndx);
1750 * The hard case: we want to remove something other than
1751 * the last item on the page. We need to shift data and
1754 if ((db_indx_t)pndx != H_NUMPAIRS(p) - 1) {
1756 * Move the data: src is the first occupied byte on
1757 * the page. (Length is delta.)
1759 src = (u_int8_t *)p + HOFFSET(p);
1762 * Destination is delta bytes beyond src. This might
1763 * be an overlapping copy, so we have to use memmove.
1766 memmove(dest, src, p->inp[H_DATAINDEX(pndx)] - HOFFSET(p));
1769 /* Adjust the offsets. */
1770 for (n = (db_indx_t)pndx; n < (db_indx_t)(H_NUMPAIRS(p) - 1); n++) {
1771 p->inp[H_KEYINDEX(n)] = p->inp[H_KEYINDEX(n+1)] + delta;
1772 p->inp[H_DATAINDEX(n)] = p->inp[H_DATAINDEX(n+1)] + delta;
1775 /* Adjust page metadata. */
1776 HOFFSET(p) = HOFFSET(p) + delta;
1777 NUM_ENT(p) = NUM_ENT(p) - 2;
1782 __account_page(hashp, pgno, inout)
1794 if (inout == -1) /* XXX: Kluge */
1797 /* Find page in list. */
1798 for (i = 0; i < last; i++)
1799 if (list[i].pgno == pgno)
1803 list[last].times = inout;
1804 list[last].pgno = pgno;
1807 list[i].times = inout;
1808 if (list[i].times == 0) {
1809 for (j = i; j < last; j++)
1810 list[j] = list[j + 1];
1813 for (i = 0; i < last; i++, list[i].times++)
1814 if (list[i].times > 20 &&
1815 !__is_bitmap_pgno(hashp, list[i].pgno))
1816 (void)fprintf(stderr,
1817 "Warning: pg %lu has been out for %d times\n",
1818 (u_long)list[i].pgno, list[i].times);
1820 #endif /* DEBUG_SLOW */