2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid[] = "@(#)hash_page.c 10.29 (Sleepycat) 11/2/97";
57 * Page manipulation for hashing package.
69 #ifndef NO_SYSTEM_INCLUDES
70 #include <sys/types.h>
84 static int __ham_lock_bucket __P((DB *, HASH_CURSOR *, db_lockmode_t));
87 static void __account_page(HTAB *, db_pgno_t, int);
91 * PUBLIC: int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
94 __ham_item(hashp, cursorp, mode)
102 if (F_ISSET(cursorp, H_DELETED))
104 F_CLR(cursorp, H_OK | H_NOMORE);
106 /* Check if we need to get a page for this cursor. */
107 if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
110 /* Check if we are looking for space in which to insert an item. */
111 if (cursorp->seek_size && cursorp->seek_found_page == PGNO_INVALID
112 && cursorp->seek_size < P_FREESPACE(cursorp->pagep))
113 cursorp->seek_found_page = cursorp->pgno;
115 /* Check if we need to go on to the next page. */
116 if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno == PGNO_INVALID)
118 * ISDUP is set, and offset is at the beginning of the datum.
119 * We need to grab the length of the datum, then set the datum
120 * pointer to be the beginning of the datum.
122 memcpy(&cursorp->dup_len,
123 HKEYDATA_DATA(H_PAIRDATA(cursorp->pagep, cursorp->bndx)) +
124 cursorp->dup_off, sizeof(db_indx_t));
125 else if (F_ISSET(cursorp, H_ISDUP)) {
126 /* Make sure we're not about to run off the page. */
127 if (cursorp->dpagep == NULL && (ret = __ham_get_page(hashp->dbp,
128 cursorp->dpgno, &cursorp->dpagep)) != 0)
131 if (cursorp->dndx >= NUM_ENT(cursorp->dpagep)) {
132 if (NEXT_PGNO(cursorp->dpagep) == PGNO_INVALID) {
133 if ((ret = __ham_put_page(hashp->dbp,
134 cursorp->dpagep, 0)) != 0)
136 F_CLR(cursorp, H_ISDUP);
137 cursorp->dpagep = NULL;
138 cursorp->dpgno = PGNO_INVALID;
139 cursorp->dndx = NDX_INVALID;
141 } else if ((ret = __ham_next_cpage(hashp, cursorp,
142 NEXT_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
147 if (cursorp->bndx >= (db_indx_t)H_NUMPAIRS(cursorp->pagep)) {
148 /* Fetch next page. */
149 if (NEXT_PGNO(cursorp->pagep) == PGNO_INVALID) {
150 F_SET(cursorp, H_NOMORE);
151 if (cursorp->dpagep != NULL &&
152 (ret = __ham_put_page(hashp->dbp,
153 cursorp->dpagep, 0)) != 0)
155 cursorp->dpgno = PGNO_INVALID;
156 return (DB_NOTFOUND);
158 next_pgno = NEXT_PGNO(cursorp->pagep);
160 if ((ret = __ham_next_cpage(hashp,
161 cursorp, next_pgno, 0, 0)) != 0)
165 F_SET(cursorp, H_OK);
170 * PUBLIC: int __ham_item_reset __P((HTAB *, HASH_CURSOR *));
173 __ham_item_reset(hashp, cursorp)
175 HASH_CURSOR *cursorp;
180 ret = __ham_put_page(hashp->dbp, cursorp->pagep, 0);
184 __ham_item_init(cursorp);
189 * PUBLIC: void __ham_item_init __P((HASH_CURSOR *));
192 __ham_item_init(cursorp)
193 HASH_CURSOR *cursorp;
195 cursorp->pagep = NULL;
196 cursorp->bucket = BUCKET_INVALID;
198 cursorp->bndx = NDX_INVALID;
199 cursorp->pgno = PGNO_INVALID;
200 cursorp->dpgno = PGNO_INVALID;
201 cursorp->dndx = NDX_INVALID;
202 cursorp->dpagep = NULL;
204 cursorp->seek_size = 0;
205 cursorp->seek_found_page = PGNO_INVALID;
209 * PUBLIC: int __ham_item_done __P((HTAB *, HASH_CURSOR *, int));
212 __ham_item_done(hashp, cursorp, dirty)
214 HASH_CURSOR *cursorp;
222 ret = __ham_put_page(hashp->dbp, cursorp->pagep,
223 dirty && cursorp->dpagep == NULL);
224 cursorp->pagep = NULL;
227 t_ret = __ham_put_page(hashp->dbp, cursorp->dpagep, dirty);
228 cursorp->dpagep = NULL;
230 if (ret == 0 && t_ret != 0)
234 * If we are running with transactions, then we must
235 * not relinquish locks explicitly.
237 if (cursorp->lock && hashp->dbp->txn == NULL)
238 t_ret = lock_put(hashp->dbp->dbenv->lk_info, cursorp->lock);
243 * We don't throw out the page number since we might want to
244 * continue getting on this page.
246 return (ret != 0 ? ret : t_ret);
250 * Returns the last item in a bucket.
252 * PUBLIC: int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
255 __ham_item_last(hashp, cursorp, mode)
257 HASH_CURSOR *cursorp;
262 if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
265 cursorp->bucket = hashp->hdr->max_bucket;
266 F_SET(cursorp, H_OK);
267 return (__ham_item_prev(hashp, cursorp, mode));
270 * PUBLIC: int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
273 __ham_item_first(hashp, cursorp, mode)
275 HASH_CURSOR *cursorp;
280 if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
282 F_SET(cursorp, H_OK);
284 return (__ham_item_next(hashp, cursorp, mode));
288 * Returns a pointer to key/data pair on a page. In the case of bigkeys,
289 * just returns the page number and index of the bigkey pointer pair.
291 * PUBLIC: int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
294 __ham_item_prev(hashp, cursorp, mode)
296 HASH_CURSOR *cursorp;
303 * There are N cases for backing up in a hash file.
304 * Case 1: In the middle of a page, no duplicates, just dec the index.
305 * Case 2: In the middle of a duplicate set, back up one.
306 * Case 3: At the beginning of a duplicate set, get out of set and
307 * back up to next key.
308 * Case 4: At the beginning of a page; go to previous page.
309 * Case 5: At the beginning of a bucket; go to prev bucket.
311 F_CLR(cursorp, H_OK | H_NOMORE | H_DELETED);
314 * First handle the duplicates. Either you'll get the key here
315 * or you'll exit the duplicate set and drop into the code below
316 * to handle backing up through keys.
318 if (F_ISSET(cursorp, H_ISDUP)) {
319 if (cursorp->dpgno == PGNO_INVALID) {
320 /* Duplicates are on-page. */
321 if (cursorp->dup_off != 0)
322 if ((ret = __ham_get_cpage(hashp,
323 cursorp, mode)) != 0)
328 memcpy(&h->dup_len, HKEYDATA_DATA(
329 H_PAIRDATA(h->pagep, h->bndx))
330 + h->dup_off - sizeof(db_indx_t),
333 DUP_SIZE(cursorp->dup_len);
335 return (__ham_item(hashp,
338 } else if (cursorp->dndx > 0) { /* Duplicates are off-page. */
340 return (__ham_item(hashp, cursorp, mode));
341 } else if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
343 else if (PREV_PGNO(cursorp->dpagep) == PGNO_INVALID) {
344 F_CLR(cursorp, H_ISDUP); /* End of dups */
345 cursorp->dpgno = PGNO_INVALID;
346 if (cursorp->dpagep != NULL)
347 (void)__ham_put_page(hashp->dbp,
349 cursorp->dpagep = NULL;
350 } else if ((ret = __ham_next_cpage(hashp, cursorp,
351 PREV_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
354 cursorp->dndx = NUM_ENT(cursorp->pagep) - 1;
355 return (__ham_item(hashp, cursorp, mode));
360 * If we get here, we are not in a duplicate set, and just need
361 * to back up the cursor. There are still three cases:
362 * midpage, beginning of page, beginning of bucket.
365 if (cursorp->bndx == 0) { /* Beginning of page. */
366 if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
368 cursorp->pgno = PREV_PGNO(cursorp->pagep);
369 if (cursorp->pgno == PGNO_INVALID) {
370 /* Beginning of bucket. */
371 F_SET(cursorp, H_NOMORE);
372 return (DB_NOTFOUND);
373 } else if ((ret = __ham_next_cpage(hashp,
374 cursorp, cursorp->pgno, 0, 0)) != 0)
377 cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
381 * Either we've got the cursor set up to be decremented, or we
382 * have to find the end of a bucket.
384 if (cursorp->bndx == NDX_INVALID) {
385 if (cursorp->pagep == NULL)
386 next_pgno = BUCKET_TO_PAGE(hashp, cursorp->bucket);
391 if ((ret = __ham_next_cpage(hashp,
392 cursorp, next_pgno, 0, 0)) != 0)
394 got_page: next_pgno = NEXT_PGNO(cursorp->pagep);
395 cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
396 } while (next_pgno != PGNO_INVALID);
398 if (cursorp->bndx == 0) {
399 /* Bucket was empty. */
400 F_SET(cursorp, H_NOMORE);
401 return (DB_NOTFOUND);
407 return (__ham_item(hashp, cursorp, mode));
411 * Sets the cursor to the next key/data pair on a page.
413 * PUBLIC: int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
416 __ham_item_next(hashp, cursorp, mode)
418 HASH_CURSOR *cursorp;
422 * Deleted on-page duplicates are a weird case. If we delete the last
423 * one, then our cursor is at the very end of a duplicate set and
424 * we actually need to go on to the next key.
426 if (F_ISSET(cursorp, H_DELETED)) {
427 if (cursorp->bndx != NDX_INVALID &&
428 F_ISSET(cursorp, H_ISDUP) &&
429 cursorp->dpgno == PGNO_INVALID &&
430 cursorp->dup_tlen == cursorp->dup_off) {
431 F_CLR(cursorp, H_ISDUP);
432 cursorp->dpgno = PGNO_INVALID;
435 F_CLR(cursorp, H_DELETED);
436 } else if (cursorp->bndx == NDX_INVALID) {
438 cursorp->dpgno = PGNO_INVALID;
439 F_CLR(cursorp, H_ISDUP);
440 } else if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno != PGNO_INVALID)
442 else if (F_ISSET(cursorp, H_ISDUP)) {
444 cursorp->dup_off += DUP_SIZE(cursorp->dup_len);
445 if (cursorp->dup_off >= cursorp->dup_tlen) {
446 F_CLR(cursorp, H_ISDUP);
447 cursorp->dpgno = PGNO_INVALID;
453 return (__ham_item(hashp, cursorp, mode));
457 * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
459 * This is a little bit sleazy in that we're overloading the meaning
460 * of the H_OFFPAGE type here. When we recover deletes, we have the
461 * entire entry instead of having only the DBT, so we'll pass type
462 * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
463 * an H_KEYDATA around it.
466 __ham_putitem(p, dbt, type)
475 /* Put the item element on the page. */
476 if (type == H_OFFPAGE) {
477 off = HOFFSET(p) - dbt->size;
478 HOFFSET(p) = p->inp[n] = off;
479 memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
481 off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
482 HOFFSET(p) = p->inp[n] = off;
483 PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type);
486 /* Adjust page info. */
492 * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int));
493 * XXX TODO: if the item is an offdup, delete the other pages and
494 * then remove the pair. If the offpage page is 0, then you can
495 * just remove the pair.
498 __ham_del_pair(hashp, cursorp, reclaim_page)
500 HASH_CURSOR *cursorp;
503 DBT data_dbt, key_dbt;
505 DB_LSN new_lsn, *n_lsn, tmp_lsn;
508 db_pgno_t chg_pgno, pgno;
511 dbenv = hashp->dbp->dbenv;
513 if (cursorp->pagep == NULL && (ret =
514 __ham_get_page(hashp->dbp, cursorp->pgno, &cursorp->pagep)) != 0)
520 * We optimize for the normal case which is when neither the key nor
521 * the data are large. In this case, we write a single log record
522 * and do the delete. If either is large, we'll call __big_delete
523 * to remove the big item and then update the page to remove the
524 * entry referring to the big item.
527 if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) {
528 memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))),
530 ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
534 switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) {
537 HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
539 ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
543 HOFFDUP_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
545 ret = __db_ddup(hashp->dbp, pgno, __ham_del_page);
546 F_CLR(cursorp, H_ISDUP);
550 * If we delete a pair that is/was a duplicate, then
551 * we had better clear the flag so that we update the
552 * cursor appropriately.
554 F_CLR(cursorp, H_ISDUP);
561 /* Now log the delete off this page. */
562 if (DB_LOGGING(hashp->dbp)) {
563 key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
565 LEN_HITEM(p, hashp->hdr->pagesize, H_KEYINDEX(ndx));
566 data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
568 LEN_HITEM(p, hashp->hdr->pagesize, H_DATAINDEX(ndx));
570 if ((ret = __ham_insdel_log(dbenv->lg_info,
571 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPAIR,
572 hashp->dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
573 &LSN(p), &key_dbt, &data_dbt)) != 0)
576 /* Move lsn onto page. */
580 __ham_dpair(hashp->dbp, p, ndx);
583 * If we are locking, we will not maintain this.
584 * XXXX perhaps we can retain incremental numbers and apply them
587 if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
591 * If we need to reclaim the page, then check if the page is empty.
592 * There are two cases. If it's empty and it's not the first page
593 * in the bucket (i.e., the bucket page) then we can simply remove
594 * it. If it is the first chain in the bucket, then we need to copy
595 * the second page into it and remove the second page.
597 if (reclaim_page && NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
598 NEXT_PGNO(p) != PGNO_INVALID) {
599 PAGE *n_pagep, *nn_pagep;
603 * First page in chain is empty and we know that there
604 * are more pages in the chain.
607 __ham_get_page(hashp->dbp, NEXT_PGNO(p), &n_pagep)) != 0)
610 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
612 __ham_get_page(hashp->dbp, NEXT_PGNO(n_pagep),
614 (void) __ham_put_page(hashp->dbp, n_pagep, 0);
619 if (DB_LOGGING(hashp->dbp)) {
620 key_dbt.data = n_pagep;
621 key_dbt.size = hashp->hdr->pagesize;
622 if ((ret = __ham_copypage_log(dbenv->lg_info,
623 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
624 hashp->dbp->log_fileid, PGNO(p), &LSN(p),
625 PGNO(n_pagep), &LSN(n_pagep), NEXT_PGNO(n_pagep),
626 NEXT_PGNO(n_pagep) == PGNO_INVALID ? NULL :
627 &LSN(nn_pagep), &key_dbt)) != 0)
630 /* Move lsn onto page. */
631 LSN(p) = new_lsn; /* Structure assignment. */
632 LSN(n_pagep) = new_lsn;
633 if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
634 LSN(nn_pagep) = new_lsn;
636 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
637 PREV_PGNO(nn_pagep) = PGNO(p);
638 (void)__ham_put_page(hashp->dbp, nn_pagep, 1);
643 memcpy(p, n_pagep, hashp->hdr->pagesize);
646 PREV_PGNO(p) = PGNO_INVALID;
649 * Cursor is advanced to the beginning of the next page.
651 cursorp->bndx = NDX_INVALID;
652 cursorp->pgno = PGNO(p);
654 if ((ret = __ham_dirty_page(hashp, p)) != 0 ||
655 (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0)
657 } else if (reclaim_page &&
658 NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
659 PAGE *n_pagep, *p_pagep;
662 __ham_get_page(hashp->dbp, PREV_PGNO(p), &p_pagep)) != 0)
665 if (NEXT_PGNO(p) != PGNO_INVALID) {
666 if ((ret = __ham_get_page(hashp->dbp,
667 NEXT_PGNO(p), &n_pagep)) != 0) {
668 (void)__ham_put_page(hashp->dbp, p_pagep, 0);
671 n_lsn = &LSN(n_pagep);
677 NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
679 PREV_PGNO(n_pagep) = PGNO(p_pagep);
681 if (DB_LOGGING(hashp->dbp)) {
682 if ((ret = __ham_newpage_log(dbenv->lg_info,
683 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELOVFL,
684 hashp->dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
685 PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
688 /* Move lsn onto page. */
689 LSN(p_pagep) = new_lsn; /* Structure assignment. */
691 LSN(n_pagep) = new_lsn;
694 cursorp->pgno = NEXT_PGNO(p);
697 * Since we are about to delete the cursor page and we have
698 * just moved the cursor, we need to make sure that the
699 * old page pointer isn't left hanging around in the cursor.
701 cursorp->pagep = NULL;
703 ret = __ham_del_page(hashp->dbp, p);
704 if ((tret = __ham_put_page(hashp->dbp, p_pagep, 1)) != 0 &&
707 if (n_pagep != NULL &&
708 (tret = __ham_put_page(hashp->dbp, n_pagep, 1)) != 0 &&
715 * Mark item deleted so that we don't try to return it, and
716 * so that we update the cursor correctly on the next call
719 F_SET(cursorp, H_DELETED);
720 chg_pgno = cursorp->pgno;
721 ret = __ham_dirty_page(hashp, p);
723 __ham_c_update(hashp, cursorp, chg_pgno, 0, 0, 0);
726 * Since we just deleted a pair from the master page, anything
727 * in cursorp->dpgno should be cleared.
729 cursorp->dpgno = PGNO_INVALID;
731 F_CLR(cursorp, H_OK);
737 * Given the key data indicated by the cursor, replace part/all of it
738 * according to the fields in the dbt.
740 * PUBLIC: int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
743 __ham_replpair(hashp, hcp, dbt, make_dup)
749 DBT old_dbt, tdata, tmp;
753 int is_big, ret, type;
754 u_int8_t *beg, *dest, *end, *hk, *src;
757 * Big item replacements are handled in generic code.
758 * Items that fit on the current page fall into 4 classes.
759 * 1. On-page element, same size
760 * 2. On-page element, new is bigger (fits)
761 * 3. On-page element, new is bigger (does not fit)
762 * 4. On-page element, old is bigger
763 * Numbers 1, 2, and 4 are essentially the same (and should
764 * be the common case). We handle case 3 as a delete and
769 * We need to compute the number of bytes that we are adding or
770 * removing from the entry. Normally, we can simply substract
771 * the number of bytes we are replacing (dbt->dlen) from the
772 * number of bytes we are inserting (dbt->size). However, if
773 * we are doing a partial put off the end of a record, then this
774 * formula doesn't work, because we are essentially adding
777 change = dbt->size - dbt->dlen;
779 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
780 is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
783 memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
785 len = LEN_HKEYDATA(hcp->pagep,
786 hashp->dbp->pgsize, H_DATAINDEX(hcp->bndx));
788 if (dbt->doff + dbt->dlen > len)
789 change += dbt->doff + dbt->dlen - len;
792 if (change > (int)P_FREESPACE(hcp->pagep) || is_big) {
794 * Case 3 -- two subcases.
795 * A. This is not really a partial operation, but an overwrite.
796 * Simple del and add works.
797 * B. This is a partial and we need to construct the data that
798 * we are really inserting (yuck).
799 * In both cases, we need to grab the key off the page (in
800 * some cases we could do this outside of this routine; for
801 * cleanliness we do it here. If you happen to be on a big
802 * key, this could be a performance hit).
805 F_SET(&tmp, DB_DBT_MALLOC | DB_DBT_INTERNAL);
807 __db_ret(hashp->dbp, hcp->pagep, H_KEYINDEX(hcp->bndx),
808 &tmp, &hcp->big_key, &hcp->big_keylen)) != 0)
811 if (dbt->doff == 0 && dbt->dlen == len) {
812 ret = __ham_del_pair(hashp, hcp, 0);
814 ret = __ham_add_el(hashp,
815 hcp, &tmp, dbt, H_KEYDATA);
816 } else { /* Case B */
817 type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
818 HPAGE_PTYPE(hk) : H_KEYDATA;
820 F_SET(&tdata, DB_DBT_MALLOC | DB_DBT_INTERNAL);
822 if ((ret = __db_ret(hashp->dbp, hcp->pagep,
823 H_DATAINDEX(hcp->bndx), &tdata, &hcp->big_data,
824 &hcp->big_datalen)) != 0)
827 /* Now we can delete the item. */
828 if ((ret = __ham_del_pair(hashp, hcp, 0)) != 0) {
829 __db_free(tdata.data);
833 /* Now shift old data around to make room for new. */
835 tdata.data = (void *)__db_realloc(tdata.data,
836 tdata.size + change);
837 memset((u_int8_t *)tdata.data + tdata.size,
840 if (tdata.data == NULL)
842 end = (u_int8_t *)tdata.data + tdata.size;
844 src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
845 if (src < end && tdata.size > dbt->doff + dbt->dlen) {
846 len = tdata.size - dbt->doff - dbt->dlen;
848 memmove(dest, src, len);
850 memcpy((u_int8_t *)tdata.data + dbt->doff,
851 dbt->data, dbt->size);
852 tdata.size += change;
854 /* Now add the pair. */
855 ret = __ham_add_el(hashp, hcp, &tmp, &tdata, type);
856 __db_free(tdata.data);
858 err: __db_free(tmp.data);
863 * Set up pointer into existing data. Do it before the log
864 * message so we can use it inside of the log setup.
866 beg = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
870 * If we are going to have to move bytes at all, figure out
871 * all the parameters here. Then log the call before moving
874 if (DB_LOGGING(hashp->dbp)) {
876 old_dbt.size = dbt->dlen;
877 if ((ret = __ham_replace_log(hashp->dbp->dbenv->lg_info,
878 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
879 hashp->dbp->log_fileid, PGNO(hcp->pagep),
880 (u_int32_t)H_DATAINDEX(hcp->bndx), &LSN(hcp->pagep),
881 (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
884 LSN(hcp->pagep) = new_lsn; /* Structure assignment. */
887 __ham_onpage_replace(hcp->pagep, hashp->dbp->pgsize,
888 (u_int32_t)H_DATAINDEX(hcp->bndx), (int32_t)dbt->doff, change, dbt);
894 * Replace data on a page with new data, possibly growing or shrinking what's
895 * there. This is called on two different occasions. On one (from replpair)
896 * we are interested in changing only the data. On the other (from recovery)
897 * we are replacing the entire data (header and all) with a new element. In
898 * the latter case, the off argument is negative.
899 * pagep: the page that we're changing
900 * ndx: page index of the element that is growing/shrinking.
901 * off: Offset at which we are beginning the replacement.
902 * change: the number of bytes (+ or -) that the element is growing/shrinking.
903 * dbt: the new data that gets written at beg.
904 * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
905 * PUBLIC: int32_t, DBT *));
908 __ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
918 u_int8_t *src, *dest;
923 src = (u_int8_t *)(pagep) + HOFFSET(pagep);
925 len = pagep->inp[ndx] - HOFFSET(pagep);
926 else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
927 len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) +
928 LEN_HKEYDATA(pagep, pgsize, ndx) - src;
931 len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src;
933 memmove(dest, src, len);
935 memset(dest + len, 0, change);
937 /* Now update the indices. */
938 for (i = ndx; i < NUM_ENT(pagep); i++)
939 pagep->inp[i] -= change;
940 HOFFSET(pagep) -= change;
943 memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off,
944 dbt->data, dbt->size);
946 memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
950 * PUBLIC: int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
953 __ham_split_page(hashp, obucket, nbucket)
955 u_int32_t obucket, nbucket;
957 DBT key, val, page_dbt;
960 PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
962 db_pgno_t bucket_pgno, next_pgno;
963 u_int32_t big_len, len;
967 dbenv = hashp->dbp->dbenv;
968 temp_pagep = old_pagep = new_pagep = NULL;
970 bucket_pgno = BUCKET_TO_PAGE(hashp, obucket);
971 if ((ret = __ham_get_page(hashp->dbp, bucket_pgno, &old_pagep)) != 0)
973 if ((ret = __ham_new_page(hashp, BUCKET_TO_PAGE(hashp, nbucket), P_HASH,
977 temp_pagep = hashp->split_buf;
978 memcpy(temp_pagep, old_pagep, hashp->hdr->pagesize);
980 if (DB_LOGGING(hashp->dbp)) {
981 page_dbt.size = hashp->hdr->pagesize;
982 page_dbt.data = old_pagep;
983 if ((ret = __ham_splitdata_log(dbenv->lg_info,
984 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
985 hashp->dbp->log_fileid, SPLITOLD, PGNO(old_pagep),
986 &page_dbt, &LSN(old_pagep))) != 0)
990 P_INIT(old_pagep, hashp->hdr->pagesize, PGNO(old_pagep), PGNO_INVALID,
991 PGNO_INVALID, 0, P_HASH);
993 if (DB_LOGGING(hashp->dbp))
994 LSN(old_pagep) = new_lsn; /* Structure assignment. */
998 val.flags = key.flags = 0;
999 while (temp_pagep != NULL) {
1000 for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) {
1002 __db_ret(hashp->dbp, temp_pagep, H_KEYINDEX(n),
1003 &key, &big_buf, &big_len)) != 0)
1006 if (__ham_call_hash(hashp, key.data, key.size)
1013 * Figure out how many bytes we need on the new
1014 * page to store the key/data pair.
1017 len = LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
1019 LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
1021 2 * sizeof(db_indx_t);
1023 if (P_FREESPACE(*pp) < len) {
1024 if (DB_LOGGING(hashp->dbp)) {
1025 page_dbt.size = hashp->hdr->pagesize;
1026 page_dbt.data = *pp;
1027 if ((ret = __ham_splitdata_log(
1029 (DB_TXN *)hashp->dbp->txn,
1031 hashp->dbp->log_fileid, SPLITNEW,
1032 PGNO(*pp), &page_dbt,
1037 if ((ret = __ham_add_ovflpage(hashp,
1041 __ham_copy_item(hashp, temp_pagep, H_KEYINDEX(n), *pp);
1042 __ham_copy_item(hashp, temp_pagep, H_DATAINDEX(n), *pp);
1044 next_pgno = NEXT_PGNO(temp_pagep);
1046 /* Clear temp_page; if it's a link overflow page, free it. */
1047 if (PGNO(temp_pagep) != bucket_pgno && (ret =
1048 __ham_del_page(hashp->dbp, temp_pagep)) != 0)
1051 if (next_pgno == PGNO_INVALID)
1054 __ham_get_page(hashp->dbp, next_pgno, &temp_pagep)) != 0)
1057 if (temp_pagep != NULL && DB_LOGGING(hashp->dbp)) {
1058 page_dbt.size = hashp->hdr->pagesize;
1059 page_dbt.data = temp_pagep;
1060 if ((ret = __ham_splitdata_log(dbenv->lg_info,
1061 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1062 hashp->dbp->log_fileid, SPLITOLD, PGNO(temp_pagep),
1063 &page_dbt, &LSN(temp_pagep))) != 0)
1065 LSN(temp_pagep) = new_lsn;
1068 if (big_buf != NULL)
1072 * If the original bucket spanned multiple pages, then we've got
1073 * a pointer to a page that used to be on the bucket chain. It
1074 * should be deleted.
1076 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
1077 (ret = __ham_del_page(hashp->dbp, temp_pagep)) != 0)
1081 * Write new buckets out.
1083 if (DB_LOGGING(hashp->dbp)) {
1084 page_dbt.size = hashp->hdr->pagesize;
1085 page_dbt.data = old_pagep;
1086 if ((ret = __ham_splitdata_log(dbenv->lg_info,
1087 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1088 hashp->dbp->log_fileid, SPLITNEW, PGNO(old_pagep),
1089 &page_dbt, &LSN(old_pagep))) != 0)
1091 LSN(old_pagep) = new_lsn;
1093 page_dbt.data = new_pagep;
1094 if ((ret = __ham_splitdata_log(dbenv->lg_info,
1095 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1096 hashp->dbp->log_fileid, SPLITNEW, PGNO(new_pagep),
1097 &page_dbt, &LSN(new_pagep))) != 0)
1099 LSN(new_pagep) = new_lsn;
1101 ret = __ham_put_page(hashp->dbp, old_pagep, 1);
1102 if ((tret = __ham_put_page(hashp->dbp, new_pagep, 1)) != 0 &&
1107 if (old_pagep != NULL)
1108 (void)__ham_put_page(hashp->dbp, old_pagep, 1);
1109 if (new_pagep != NULL)
1110 (void)__ham_put_page(hashp->dbp, new_pagep, 1);
1111 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
1112 (void)__ham_put_page(hashp->dbp, temp_pagep, 1);
1118 * Add the given pair to the page. The page in question may already be
1119 * held (i.e. it was already gotten). If it is, then the page is passed
1120 * in via the pagep parameter. On return, pagep will contain the page
1121 * to which we just added something. This allows us to link overflow
1122 * pages and return the new page having correctly put the last page.
1124 * PUBLIC: int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *,
1128 __ham_add_el(hashp, hcp, key, val, type)
1131 const DBT *key, *val;
1134 DBT *pkey, *pdata, key_dbt, data_dbt;
1136 HOFFPAGE doff, koff;
1137 db_pgno_t next_pgno;
1138 u_int32_t data_size, key_size, pairsize;
1139 int do_expand, is_keybig, is_databig, rectype, ret;
1140 int key_type, data_type;
1144 if (hcp->pagep == NULL && (ret = __ham_get_page(hashp->dbp,
1145 hcp->seek_found_page != PGNO_INVALID ? hcp->seek_found_page :
1146 hcp->pgno, &hcp->pagep)) != 0)
1149 key_size = HKEYDATA_PSIZE(key->size);
1150 data_size = HKEYDATA_PSIZE(val->size);
1151 is_keybig = ISBIG(hashp, key->size);
1152 is_databig = ISBIG(hashp, val->size);
1154 key_size = HOFFPAGE_PSIZE;
1156 data_size = HOFFPAGE_PSIZE;
1158 pairsize = key_size + data_size;
1160 /* Advance to first page in chain with room for item. */
1161 while (H_NUMPAIRS(hcp->pagep) && NEXT_PGNO(hcp->pagep) !=
1164 * This may not be the end of the chain, but the pair may fit
1165 * anyway. Check if it's a bigpair that fits or a regular
1168 if (P_FREESPACE(hcp->pagep) >= pairsize)
1170 next_pgno = NEXT_PGNO(hcp->pagep);
1172 __ham_next_cpage(hashp, hcp, next_pgno, 0, 0)) != 0)
1177 * Check if we need to allocate a new page.
1179 if (P_FREESPACE(hcp->pagep) < pairsize) {
1181 if ((ret = __ham_add_ovflpage(hashp,
1182 hcp->pagep, 1, &hcp->pagep)) != 0)
1184 hcp->pgno = PGNO(hcp->pagep);
1190 hcp->bndx = H_NUMPAIRS(hcp->pagep);
1191 F_CLR(hcp, H_DELETED);
1193 if ((ret = __db_poff(hashp->dbp,
1194 key, &koff.pgno, __ham_overflow_page)) != 0)
1196 koff.type = H_OFFPAGE;
1197 koff.tlen = key->size;
1198 key_dbt.data = &koff;
1199 key_dbt.size = sizeof(koff);
1201 key_type = H_OFFPAGE;
1204 key_type = H_KEYDATA;
1208 if ((ret = __db_poff(hashp->dbp,
1209 val, &doff.pgno, __ham_overflow_page)) != 0)
1211 doff.type = H_OFFPAGE;
1212 doff.tlen = val->size;
1213 data_dbt.data = &doff;
1214 data_dbt.size = sizeof(doff);
1216 data_type = H_OFFPAGE;
1222 if (DB_LOGGING(hashp->dbp)) {
1225 rectype |= PAIR_DATAMASK;
1227 rectype |= PAIR_KEYMASK;
1229 if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
1230 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
1231 hashp->dbp->log_fileid, PGNO(hcp->pagep),
1232 (u_int32_t)H_NUMPAIRS(hcp->pagep),
1233 &LSN(hcp->pagep), pkey, pdata)) != 0)
1236 /* Move lsn onto page. */
1237 LSN(hcp->pagep) = new_lsn; /* Structure assignment. */
1240 __ham_putitem(hcp->pagep, pkey, key_type);
1241 __ham_putitem(hcp->pagep, pdata, data_type);
1244 * For splits, we are going to update item_info's page number
1245 * field, so that we can easily return to the same page the
1246 * next time we come in here. For other operations, this shouldn't
1247 * matter, since odds are this is the last thing that happens before
1248 * we return to the user program.
1250 hcp->pgno = PGNO(hcp->pagep);
1253 * XXX Maybe keep incremental numbers here
1255 if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
1256 hashp->hdr->nelem++;
1258 if (do_expand || (hashp->hdr->ffactor != 0 &&
1259 (u_int32_t)H_NUMPAIRS(hcp->pagep) > hashp->hdr->ffactor))
1260 F_SET(hcp, H_EXPAND);
1266 * Special __putitem call used in splitting -- copies one entry to
1267 * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
1268 * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
1269 * do not need to do any logging here.
1270 * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *));
1273 __ham_copy_item(hashp, src_page, src_ndx, dest_page)
1283 * Copy the key and data entries onto this new page.
1285 src = P_ENTRY(src_page, src_ndx);
1287 /* Set up space on dest. */
1288 len = LEN_HITEM(src_page, hashp->hdr->pagesize, src_ndx);
1289 HOFFSET(dest_page) -= len;
1290 dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
1291 dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
1292 NUM_ENT(dest_page)++;
1294 memcpy(dest, src, len);
1300 * pointer on success
1303 * PUBLIC: int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
1306 __ham_add_ovflpage(hashp, pagep, release, pp)
1317 dbenv = hashp->dbp->dbenv;
1319 if ((ret = __ham_overflow_page(hashp->dbp, P_HASH, &new_pagep)) != 0)
1322 if (DB_LOGGING(hashp->dbp)) {
1323 if ((ret = __ham_newpage_log(dbenv->lg_info,
1324 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, PUTOVFL,
1325 hashp->dbp->log_fileid, PGNO(pagep), &LSN(pagep),
1326 PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
1329 /* Move lsn onto page. */
1330 LSN(pagep) = LSN(new_pagep) = new_lsn;
1332 NEXT_PGNO(pagep) = PGNO(new_pagep);
1333 PREV_PGNO(new_pagep) = PGNO(pagep);
1336 ret = __ham_put_page(hashp->dbp, pagep, 1);
1338 hashp->hash_overflows++;
1345 * PUBLIC: int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
1348 __ham_new_page(hashp, addr, type, pp)
1350 u_int32_t addr, type;
1356 if ((ret = memp_fget(hashp->dbp->mpf,
1357 &addr, DB_MPOOL_CREATE, &pagep)) != 0)
1361 __account_page(hashp, addr, 1);
1363 /* This should not be necessary because page-in should do it. */
1365 hashp->hdr->pagesize, addr, PGNO_INVALID, PGNO_INVALID, 0, type);
1372 * PUBLIC: int __ham_del_page __P((DB *, PAGE *));
1375 __ham_del_page(dbp, pagep)
1383 hashp = (HTAB *)dbp->internal;
1385 DIRTY_META(hashp, ret);
1388 __db_err(hashp->dbp->dbenv,
1389 "free_ovflpage: unable to lock meta data page %s\n",
1392 * If we are going to return an error, then we should free
1393 * the page, so it doesn't stay pinned forever.
1395 (void)__ham_put_page(hashp->dbp, pagep, 0);
1399 if (DB_LOGGING(hashp->dbp)) {
1400 if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
1401 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPGNO,
1402 hashp->dbp->log_fileid, PGNO(pagep), hashp->hdr->last_freed,
1403 (u_int32_t)TYPE(pagep), NEXT_PGNO(pagep), P_INVALID,
1404 &LSN(pagep), &hashp->hdr->lsn)) != 0)
1407 hashp->hdr->lsn = new_lsn;
1408 LSN(pagep) = new_lsn;
1415 __pgno = pagep->pgno;
1417 memset(pagep, 0xff, dbp->pgsize);
1418 pagep->pgno = __pgno;
1422 TYPE(pagep) = P_INVALID;
1423 NEXT_PGNO(pagep) = hashp->hdr->last_freed;
1424 hashp->hdr->last_freed = PGNO(pagep);
1426 return (__ham_put_page(hashp->dbp, pagep, 1));
1431 * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t));
1434 __ham_put_page(dbp, pagep, is_dirty)
1440 __account_page((HTAB *)dbp->cookie,
1441 ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1);
1443 return (memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0)));
1447 * __ham_dirty_page --
1448 * Mark a page dirty.
1450 * PUBLIC: int __ham_dirty_page __P((HTAB *, PAGE *));
1453 __ham_dirty_page(hashp, pagep)
1457 return (memp_fset(hashp->dbp->mpf, pagep, DB_MPOOL_DIRTY));
1461 * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
1464 __ham_get_page(dbp, addr, pagep)
1471 ret = memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep);
1474 __account_page((HTAB *)dbp->internal, addr, 1);
1480 * PUBLIC: int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
1483 __ham_overflow_page(dbp, type, pp)
1488 DB_LSN *lsnp, new_lsn;
1491 db_pgno_t new_addr, next_free, newalloc_flag;
1492 u_int32_t offset, splitnum;
1495 hashp = (HTAB *)dbp->internal;
1498 DIRTY_META(hashp, ret);
1503 * This routine is split up into two parts. First we have
1504 * to figure out the address of the new page that we are
1505 * allocating. Then we have to log the allocation. Only
1506 * after the log do we get to complete allocation of the
1509 new_addr = hashp->hdr->last_freed;
1510 if (new_addr != PGNO_INVALID) {
1511 if ((ret = __ham_get_page(hashp->dbp, new_addr, &p)) != 0)
1513 next_free = NEXT_PGNO(p);
1517 splitnum = hashp->hdr->ovfl_point;
1518 hashp->hdr->spares[splitnum]++;
1519 offset = hashp->hdr->spares[splitnum] -
1520 (splitnum ? hashp->hdr->spares[splitnum - 1] : 0);
1521 new_addr = PGNO_OF(hashp, hashp->hdr->ovfl_point, offset);
1522 if (new_addr > MAX_PAGES(hashp)) {
1523 __db_err(hashp->dbp->dbenv, "hash: out of file pages");
1524 hashp->hdr->spares[splitnum]--;
1527 next_free = PGNO_INVALID;
1533 if (DB_LOGGING(hashp->dbp)) {
1534 if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
1535 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, ALLOCPGNO,
1536 hashp->dbp->log_fileid, new_addr, next_free,
1537 0, newalloc_flag, type, lsnp, &hashp->hdr->lsn)) != 0)
1540 hashp->hdr->lsn = new_lsn;
1546 /* We just took something off the free list, initialize it. */
1547 hashp->hdr->last_freed = next_free;
1548 P_INIT(p, hashp->hdr->pagesize, PGNO(p), PGNO_INVALID,
1549 PGNO_INVALID, 0, (u_int8_t)type);
1551 /* Get the new page. */
1552 if ((ret = __ham_new_page(hashp, new_addr, type, &p)) != 0)
1555 if (DB_LOGGING(hashp->dbp))
1564 * PUBLIC: #ifdef DEBUG
1565 * PUBLIC: int __bucket_to_page __P((HTAB *, int));
1569 __bucket_to_page(hashp, n)
1577 ret_val += hashp->hdr->spares[__db_log2(n + 1) - 1];
1584 * Create a bunch of overflow pages at the current split point.
1585 * PUBLIC: void __ham_init_ovflpages __P((HTAB *));
1588 __ham_init_ovflpages(hp)
1593 db_pgno_t last_pgno, new_pgno;
1594 u_int32_t i, curpages, numpages;
1596 curpages = hp->hdr->spares[hp->hdr->ovfl_point] -
1597 hp->hdr->spares[hp->hdr->ovfl_point - 1];
1598 numpages = hp->hdr->ovfl_point + 1 - curpages;
1600 last_pgno = hp->hdr->last_freed;
1601 new_pgno = PGNO_OF(hp, hp->hdr->ovfl_point, curpages + 1);
1602 if (DB_LOGGING(hp->dbp)) {
1603 (void)__ham_ovfl_log(hp->dbp->dbenv->lg_info,
1604 (DB_TXN *)hp->dbp->txn, &new_lsn, 0,
1605 hp->dbp->log_fileid, new_pgno,
1606 numpages, last_pgno, hp->hdr->ovfl_point, &hp->hdr->lsn);
1607 hp->hdr->lsn = new_lsn;
1611 hp->hdr->spares[hp->hdr->ovfl_point] += numpages;
1612 for (i = numpages; i > 0; i--) {
1613 if (__ham_new_page(hp,
1614 PGNO_OF(hp, hp->hdr->ovfl_point, curpages + i),
1615 P_INVALID, &p) != 0)
1618 NEXT_PGNO(p) = last_pgno;
1619 last_pgno = PGNO(p);
1620 (void)__ham_put_page(hp->dbp, p, 1);
1622 hp->hdr->last_freed = last_pgno;
1626 * PUBLIC: int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
1629 __ham_get_cpage(hashp, hcp, mode)
1636 if (hcp->lock == 0 && F_ISSET(hashp->dbp, DB_AM_LOCKING) &&
1637 (ret = __ham_lock_bucket(hashp->dbp, hcp, mode)) != 0)
1640 if (hcp->pagep == NULL) {
1641 if (hcp->pgno == PGNO_INVALID) {
1642 hcp->pgno = BUCKET_TO_PAGE(hashp, hcp->bucket);
1647 __ham_get_page(hashp->dbp, hcp->pgno, &hcp->pagep)) != 0)
1651 if (hcp->dpgno != PGNO_INVALID && hcp->dpagep == NULL)
1653 __ham_get_page(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
1659 * Get a new page at the cursor, putting the last page if necessary.
1660 * If the flag is set to H_ISDUP, then we are talking about the
1661 * duplicate page, not the main page.
1662 * PUBLIC: int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t,
1663 * PUBLIC: int, int));
1666 __ham_next_cpage(hashp, hcp, pgno, dirty, flags)
1676 if (flags & H_ISDUP && hcp->dpagep != NULL &&
1677 (ret = __ham_put_page(hashp->dbp, hcp->dpagep, dirty)) != 0)
1679 else if (!(flags & H_ISDUP) && hcp->pagep != NULL &&
1680 (ret = __ham_put_page(hashp->dbp, hcp->pagep, dirty)) != 0)
1683 if ((ret = __ham_get_page(hashp->dbp, pgno, &p)) != 0)
1686 if (flags & H_ISDUP) {
1700 * __ham_lock_bucket --
1701 * Get the lock on a particular bucket.
1704 __ham_lock_bucket(dbp, hcp, mode)
1712 * What a way to trounce on the memory system. It might be
1713 * worth copying the lk_info into the hashp.
1716 dbp->lock.pgno = (db_pgno_t)(hcp->bucket);
1717 ret = lock_get(dbp->dbenv->lk_info,
1718 dbp->txn == NULL ? dbp->locker : dbp->txn->txnid, 0,
1719 &dbp->lock_dbt, mode, &hcp->lock);
1721 return (ret < 0 ? EAGAIN : ret);
1726 * Delete a pair on a page, paying no attention to what the pair
1727 * represents. The caller is responsible for freeing up duplicates
1728 * or offpage entries that might be referenced by this pair.
1730 * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
1733 __ham_dpair(dbp, p, pndx)
1739 u_int8_t *dest, *src;
1742 * Compute "delta", the amount we have to shift all of the
1743 * offsets. To find the delta, we just need to calculate
1744 * the size of the pair of elements we are removing.
1746 delta = H_PAIRSIZE(p, dbp->pgsize, pndx);
1749 * The hard case: we want to remove something other than
1750 * the last item on the page. We need to shift data and
1753 if ((db_indx_t)pndx != H_NUMPAIRS(p) - 1) {
1755 * Move the data: src is the first occupied byte on
1756 * the page. (Length is delta.)
1758 src = (u_int8_t *)p + HOFFSET(p);
1761 * Destination is delta bytes beyond src. This might
1762 * be an overlapping copy, so we have to use memmove.
1765 memmove(dest, src, p->inp[H_DATAINDEX(pndx)] - HOFFSET(p));
1768 /* Adjust the offsets. */
1769 for (n = (db_indx_t)pndx; n < (db_indx_t)(H_NUMPAIRS(p) - 1); n++) {
1770 p->inp[H_KEYINDEX(n)] = p->inp[H_KEYINDEX(n+1)] + delta;
1771 p->inp[H_DATAINDEX(n)] = p->inp[H_DATAINDEX(n+1)] + delta;
1774 /* Adjust page metadata. */
1775 HOFFSET(p) = HOFFSET(p) + delta;
1776 NUM_ENT(p) = NUM_ENT(p) - 2;
1781 __account_page(hashp, pgno, inout)
1793 if (inout == -1) /* XXX: Kluge */
1796 /* Find page in list. */
1797 for (i = 0; i < last; i++)
1798 if (list[i].pgno == pgno)
1802 list[last].times = inout;
1803 list[last].pgno = pgno;
1806 list[i].times = inout;
1807 if (list[i].times == 0) {
1808 for (j = i; j < last; j++)
1809 list[j] = list[j + 1];
1812 for (i = 0; i < last; i++, list[i].times++)
1813 if (list[i].times > 20 &&
1814 !__is_bitmap_pgno(hashp, list[i].pgno))
1815 (void)fprintf(stderr,
1816 "Warning: pg %lu has been out for %d times\n",
1817 (u_long)list[i].pgno, list[i].times);
1819 #endif /* DEBUG_SLOW */