2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid[] = "@(#)hash_page.c 10.24 (Sleepycat) 9/17/97";
57 * Page manipulation for hashing package.
69 #ifndef NO_SYSTEM_INCLUDES
70 #include <sys/types.h>
84 static int __ham_lock_bucket __P((DB *, HASH_CURSOR *, db_lockmode_t));
87 static void __account_page(HTAB *, db_pgno_t, int);
91 * PUBLIC: int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
94 __ham_item(hashp, cursorp, mode)
102 if (F_ISSET(cursorp, H_DELETED))
104 F_CLR(cursorp, H_OK | H_NOMORE);
106 /* Check if we need to get a page for this cursor. */
107 if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
110 /* Check if we are looking for space in which to insert an item. */
111 if (cursorp->seek_size && cursorp->seek_found_page == PGNO_INVALID
112 && cursorp->seek_size < P_FREESPACE(cursorp->pagep))
113 cursorp->seek_found_page = cursorp->pgno;
115 /* Check if we need to go on to the next page. */
116 if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno == PGNO_INVALID)
118 * ISDUP is set, and offset is at the beginning of the datum.
119 * We need to grab the length of the datum, then set the datum
120 * pointer to be the beginning of the datum.
122 memcpy(&cursorp->dup_len,
123 HKEYDATA_DATA(H_PAIRDATA(cursorp->pagep, cursorp->bndx)) +
124 cursorp->dup_off, sizeof(db_indx_t));
125 else if (F_ISSET(cursorp, H_ISDUP)) {
126 /* Make sure we're not about to run off the page. */
127 if (cursorp->dpagep == NULL && (ret = __ham_get_page(hashp->dbp,
128 cursorp->dpgno, &cursorp->dpagep)) != 0)
131 if (cursorp->dndx >= NUM_ENT(cursorp->dpagep)) {
132 if (NEXT_PGNO(cursorp->dpagep) == PGNO_INVALID) {
133 if ((ret = __ham_put_page(hashp->dbp,
134 cursorp->dpagep, 0)) != 0)
136 F_CLR(cursorp, H_ISDUP);
137 cursorp->dpagep = NULL;
138 cursorp->dpgno = PGNO_INVALID;
139 cursorp->dndx = NDX_INVALID;
141 } else if ((ret = __ham_next_cpage(hashp, cursorp,
142 NEXT_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
147 if (cursorp->bndx >= (db_indx_t)H_NUMPAIRS(cursorp->pagep)) {
148 /* Fetch next page. */
149 if (NEXT_PGNO(cursorp->pagep) == PGNO_INVALID) {
150 F_SET(cursorp, H_NOMORE);
151 if (cursorp->dpagep != NULL &&
152 (ret = __ham_put_page(hashp->dbp,
153 cursorp->dpagep, 0)) != 0)
155 cursorp->dpgno = PGNO_INVALID;
156 return (DB_NOTFOUND);
158 next_pgno = NEXT_PGNO(cursorp->pagep);
160 if ((ret = __ham_next_cpage(hashp,
161 cursorp, next_pgno, 0, 0)) != 0)
165 F_SET(cursorp, H_OK);
170 * PUBLIC: int __ham_item_reset __P((HTAB *, HASH_CURSOR *));
173 __ham_item_reset(hashp, cursorp)
175 HASH_CURSOR *cursorp;
180 ret = __ham_put_page(hashp->dbp, cursorp->pagep, 0);
184 __ham_item_init(cursorp);
189 * PUBLIC: void __ham_item_init __P((HASH_CURSOR *));
192 __ham_item_init(cursorp)
193 HASH_CURSOR *cursorp;
195 cursorp->pagep = NULL;
196 cursorp->bucket = BUCKET_INVALID;
198 cursorp->bndx = NDX_INVALID;
199 cursorp->pgno = PGNO_INVALID;
200 cursorp->dpgno = PGNO_INVALID;
201 cursorp->dndx = NDX_INVALID;
202 cursorp->dpagep = NULL;
204 cursorp->seek_size = 0;
205 cursorp->seek_found_page = PGNO_INVALID;
209 * PUBLIC: int __ham_item_done __P((HTAB *, HASH_CURSOR *, int));
212 __ham_item_done(hashp, cursorp, dirty)
214 HASH_CURSOR *cursorp;
222 ret = __ham_put_page(hashp->dbp, cursorp->pagep,
223 dirty && cursorp->dpagep == NULL);
224 cursorp->pagep = NULL;
227 t_ret = __ham_put_page(hashp->dbp, cursorp->dpagep, dirty);
228 cursorp->dpagep = NULL;
230 if (ret == 0 && t_ret != 0)
234 * If we are running with transactions, then we must
235 * not relinquish locks explicitly.
237 if (cursorp->lock && hashp->dbp->txn == NULL)
238 t_ret = lock_put(hashp->dbp->dbenv->lk_info, cursorp->lock);
243 * We don't throw out the page number since we might want to
244 * continue getting on this page.
246 return (ret != 0 ? ret : t_ret);
250 * Returns the last item in a bucket.
252 * PUBLIC: int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
255 __ham_item_last(hashp, cursorp, mode)
257 HASH_CURSOR *cursorp;
262 if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
265 cursorp->bucket = hashp->hdr->max_bucket;
266 F_SET(cursorp, H_OK);
267 return (__ham_item_prev(hashp, cursorp, mode));
270 * PUBLIC: int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
273 __ham_item_first(hashp, cursorp, mode)
275 HASH_CURSOR *cursorp;
280 if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
282 F_SET(cursorp, H_OK);
284 return (__ham_item_next(hashp, cursorp, mode));
288 * Returns a pointer to key/data pair on a page. In the case of bigkeys,
289 * just returns the page number and index of the bigkey pointer pair.
291 * PUBLIC: int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
294 __ham_item_prev(hashp, cursorp, mode)
296 HASH_CURSOR *cursorp;
303 * There are N cases for backing up in a hash file.
304 * Case 1: In the middle of a page, no duplicates, just dec the index.
305 * Case 2: In the middle of a duplicate set, back up one.
306 * Case 3: At the beginning of a duplicate set, get out of set and
307 * back up to next key.
308 * Case 4: At the beginning of a page; go to previous page.
309 * Case 5: At the beginning of a bucket; go to prev bucket.
311 F_CLR(cursorp, H_OK | H_NOMORE | H_DELETED);
314 * First handle the duplicates. Either you'll get the key here
315 * or you'll exit the duplicate set and drop into the code below
316 * to handle backing up through keys.
318 if (F_ISSET(cursorp, H_ISDUP)) {
319 if (cursorp->dpgno == PGNO_INVALID) {
320 /* Duplicates are on-page. */
321 if (cursorp->dup_off != 0)
322 if ((ret = __ham_get_cpage(hashp,
323 cursorp, mode)) != 0)
328 memcpy(&h->dup_len, HKEYDATA_DATA(
329 H_PAIRDATA(h->pagep, h->bndx))
330 + h->dup_off - sizeof(db_indx_t),
333 DUP_SIZE(cursorp->dup_len);
335 return (__ham_item(hashp,
338 } else if (cursorp->dndx > 0) { /* Duplicates are off-page. */
340 return (__ham_item(hashp, cursorp, mode));
341 } else if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
343 else if (PREV_PGNO(cursorp->dpagep) == PGNO_INVALID) {
344 F_CLR(cursorp, H_ISDUP); /* End of dups */
345 cursorp->dpgno = PGNO_INVALID;
346 if (cursorp->dpagep != NULL)
347 (void)__ham_put_page(hashp->dbp,
349 cursorp->dpagep = NULL;
350 } else if ((ret = __ham_next_cpage(hashp, cursorp,
351 PREV_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
354 cursorp->dndx = NUM_ENT(cursorp->pagep) - 1;
355 return (__ham_item(hashp, cursorp, mode));
360 * If we get here, we are not in a duplicate set, and just need
361 * to back up the cursor. There are still three cases:
362 * midpage, beginning of page, beginning of bucket.
365 if (cursorp->bndx == 0) { /* Beginning of page. */
366 if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
368 cursorp->pgno = PREV_PGNO(cursorp->pagep);
369 if (cursorp->pgno == PGNO_INVALID) {
370 /* Beginning of bucket. */
371 F_SET(cursorp, H_NOMORE);
372 return (DB_NOTFOUND);
373 } else if ((ret = __ham_next_cpage(hashp,
374 cursorp, cursorp->pgno, 0, 0)) != 0)
377 cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
381 * Either we've got the cursor set up to be decremented, or we
382 * have to find the end of a bucket.
384 if (cursorp->bndx == NDX_INVALID) {
385 if (cursorp->pagep == NULL)
386 next_pgno = BUCKET_TO_PAGE(hashp, cursorp->bucket);
391 if ((ret = __ham_next_cpage(hashp,
392 cursorp, next_pgno, 0, 0)) != 0)
394 got_page: next_pgno = NEXT_PGNO(cursorp->pagep);
395 cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
396 } while (next_pgno != PGNO_INVALID);
398 if (cursorp->bndx == 0) {
399 /* Bucket was empty. */
400 F_SET(cursorp, H_NOMORE);
401 return (DB_NOTFOUND);
407 return (__ham_item(hashp, cursorp, mode));
411 * Sets the cursor to the next key/data pair on a page.
413 * PUBLIC: int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
416 __ham_item_next(hashp, cursorp, mode)
418 HASH_CURSOR *cursorp;
422 * Deleted on-page duplicates are a weird case. If we delete the last
423 * one, then our cursor is at the very end of a duplicate set and
424 * we actually need to go on to the next key.
426 if (F_ISSET(cursorp, H_DELETED)) {
427 if (cursorp->bndx != NDX_INVALID &&
428 F_ISSET(cursorp, H_ISDUP) &&
429 cursorp->dpgno == PGNO_INVALID &&
430 cursorp->dup_tlen == cursorp->dup_off) {
431 F_CLR(cursorp, H_ISDUP);
432 cursorp->dpgno = PGNO_INVALID;
435 F_CLR(cursorp, H_DELETED);
436 } else if (cursorp->bndx == NDX_INVALID) {
438 cursorp->dpgno = PGNO_INVALID;
439 F_CLR(cursorp, H_ISDUP);
440 } else if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno != PGNO_INVALID)
442 else if (F_ISSET(cursorp, H_ISDUP)) {
444 cursorp->dup_off += DUP_SIZE(cursorp->dup_len);
445 if (cursorp->dup_off >= cursorp->dup_tlen) {
446 F_CLR(cursorp, H_ISDUP);
447 cursorp->dpgno = PGNO_INVALID;
453 return (__ham_item(hashp, cursorp, mode));
457 * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
459 * This is a little bit sleazy in that we're overloading the meaning
460 * of the H_OFFPAGE type here. When we recover deletes, we have the
461 * entire entry instead of having only the DBT, so we'll pass type
462 * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
463 * an H_KEYDATA around it.
466 __ham_putitem(p, dbt, type)
475 /* Put the item element on the page. */
476 if (type == H_OFFPAGE) {
477 off = HOFFSET(p) - dbt->size;
478 HOFFSET(p) = p->inp[n] = off;
479 memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
481 off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
482 HOFFSET(p) = p->inp[n] = off;
483 PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type);
486 /* Adjust page info. */
492 * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *));
493 * XXX TODO: if the item is an offdup, delete the other pages and
494 * then remove the pair. If the offpage page is 0, then you can
495 * just remove the pair.
498 __ham_del_pair(hashp, cursorp)
500 HASH_CURSOR *cursorp;
502 DBT data_dbt, key_dbt;
504 DB_LSN new_lsn, *n_lsn;
507 db_pgno_t chg_pgno, pgno;
510 dbenv = hashp->dbp->dbenv;
512 if (cursorp->pagep == NULL && (ret =
513 __ham_get_page(hashp->dbp, cursorp->pgno, &cursorp->pagep)) != 0)
519 * We optimize for the normal case which is when neither the key nor
520 * the data are large. In this case, we write a single log record
521 * and do the delete. If either is large, we'll call __big_delete
522 * to remove the big item and then update the page to remove the
523 * entry referring to the big item.
526 if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) {
527 memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))),
529 ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
533 switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) {
536 HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
538 ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
542 HOFFDUP_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
544 ret = __db_ddup(hashp->dbp, pgno, __ham_del_page);
551 /* Now log the delete off this page. */
552 if (DB_LOGGING(hashp->dbp)) {
553 key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
555 LEN_HITEM(p, hashp->hdr->pagesize, H_KEYINDEX(ndx));
556 data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
558 LEN_HITEM(p, hashp->hdr->pagesize, H_DATAINDEX(ndx));
560 if ((ret = __ham_insdel_log(dbenv->lg_info,
561 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPAIR,
562 hashp->dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
563 &LSN(p), &key_dbt, &data_dbt)) != 0)
566 /* Move lsn onto page. */
570 __ham_dpair(hashp->dbp, p, ndx);
573 * If we are locking, we will not maintain this.
574 * XXXX perhaps we can retain incremental numbers and apply them
577 if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
581 * Check if the page is empty. There are two cases. If it's
582 * empty and it's not the first chain in the bucket (i.e., the
583 * bucket page) then we can simply remove it. If it is the first
584 * chain in the bucket, then we need to copy the second page into
585 * it and remove the second page.
587 if (NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
588 NEXT_PGNO(p) != PGNO_INVALID) {
589 PAGE *n_pagep, *nn_pagep;
593 * First page in chain is empty and we know that there
594 * are more pages in the chain.
595 * XXX Need to log this.
598 __ham_get_page(hashp->dbp, NEXT_PGNO(p), &n_pagep)) != 0)
601 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
603 __ham_get_page(hashp->dbp, NEXT_PGNO(n_pagep),
605 (void) __ham_put_page(hashp->dbp, n_pagep, 0);
608 PREV_PGNO(nn_pagep) = PGNO(p);
609 (void)__ham_put_page(hashp->dbp, nn_pagep, 1);
613 memcpy(p, n_pagep, hashp->hdr->pagesize);
615 PREV_PGNO(p) = PGNO_INVALID;
618 * Cursor is advanced to the beginning of the next page.
620 cursorp->bndx = NDX_INVALID;
621 cursorp->pgno = PGNO(p);
623 if ((ret = __ham_dirty_page(hashp, p)) != 0 ||
624 (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0)
626 } else if (NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
627 PAGE *n_pagep, *p_pagep;
630 __ham_get_page(hashp->dbp, PREV_PGNO(p), &p_pagep)) != 0)
633 if (NEXT_PGNO(p) != PGNO_INVALID) {
634 if ((ret = __ham_get_page(hashp->dbp,
635 NEXT_PGNO(p), &n_pagep)) != 0) {
636 (void)__ham_put_page(hashp->dbp, p_pagep, 0);
639 n_lsn = &LSN(n_pagep);
645 NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
647 PREV_PGNO(n_pagep) = PGNO(p_pagep);
649 if (DB_LOGGING(hashp->dbp)) {
650 if ((ret = __ham_newpage_log(dbenv->lg_info,
651 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELOVFL,
652 hashp->dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
653 PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
656 /* Move lsn onto page. */
657 LSN(p_pagep) = new_lsn; /* Structure assignment. */
659 LSN(n_pagep) = new_lsn;
662 cursorp->pgno = NEXT_PGNO(p);
665 * Since we are about to delete the cursor page and we have
666 * just moved the cursor, we need to make sure that the
667 * old page pointer isn't left hanging around in the cursor.
669 cursorp->pagep = NULL;
671 ret = __ham_del_page(hashp->dbp, p);
672 if ((tret = __ham_put_page(hashp->dbp, p_pagep, 1)) != 0 &&
675 if (n_pagep != NULL &&
676 (tret = __ham_put_page(hashp->dbp, n_pagep, 1)) != 0 &&
683 * Mark item deleted so that we don't try to return it, and
684 * so that we update the cursor correctly on the next call
687 F_SET(cursorp, H_DELETED);
688 chg_pgno = cursorp->pgno;
689 ret = __ham_dirty_page(hashp, p);
691 __ham_c_update(hashp, cursorp, chg_pgno, 0, 0, 0);
693 F_CLR(cursorp, H_OK);
697 * PUBLIC: int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
698 * Given the key data indicated by the cursor, replace part/all of it
699 * according to the fields in the dbt.
702 __ham_replpair(hashp, hcp, dbt, make_dup)
708 DBT old_dbt, tdata, tmp;
712 int is_big, ret, type;
713 u_int8_t *beg, *dest, *end, *hk, *src;
716 * Big item replacements are handled in generic code.
717 * Items that fit on the current page fall into 4 classes.
718 * 1. On-page element, same size
719 * 2. On-page element, new is bigger (fits)
720 * 3. On-page element, new is bigger (does not fit)
721 * 4. On-page element, old is bigger
722 * Numbers 1, 2, and 4 are essentially the same (and should
723 * be the common case). We handle case 3 as a delete and
728 * We need to compute the number of bytes that we are adding or
729 * removing from the entry. Normally, we can simply substract
730 * the number of bytes we are replacing (dbt->dlen) from the
731 * number of bytes we are inserting (dbt->size). However, if
732 * we are doing a partial put off the end of a record, then this
733 * formula doesn't work, because we are essentially adding
736 change = dbt->size - dbt->dlen;
738 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
739 is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
742 memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
744 len = LEN_HKEYDATA(hcp->pagep,
745 hashp->dbp->pgsize, H_DATAINDEX(hcp->bndx));
747 if (dbt->doff + dbt->dlen > len)
748 change += dbt->doff + dbt->dlen - len;
751 if (change > (int)P_FREESPACE(hcp->pagep) || is_big) {
753 * Case 3 -- two subcases.
754 * A. This is not really a partial operation, but an overwrite.
755 * Simple del and add works.
756 * B. This is a partial and we need to construct the data that
757 * we are really inserting (yuck).
758 * In both cases, we need to grab the key off the page (in
759 * some cases we could do this outside of this routine; for
760 * cleanliness we do it here. If you happen to be on a big
761 * key, this could be a performance hit).
764 F_SET(&tmp, DB_DBT_MALLOC | DB_DBT_INTERNAL);
766 __db_ret(hashp->dbp, hcp->pagep, H_KEYINDEX(hcp->bndx),
767 &tmp, &hcp->big_key, &hcp->big_keylen)) != 0)
770 if (dbt->doff == 0 && dbt->dlen == len) {
771 ret = __ham_del_pair(hashp, hcp);
773 ret = __ham_add_el(hashp,
774 hcp, &tmp, dbt, H_KEYDATA);
775 } else { /* Case B */
776 type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
777 HPAGE_PTYPE(hk) : H_KEYDATA;
779 F_SET(&tdata, DB_DBT_MALLOC | DB_DBT_INTERNAL);
781 if ((ret = __db_ret(hashp->dbp, hcp->pagep,
782 H_DATAINDEX(hcp->bndx), &tdata, &hcp->big_data,
783 &hcp->big_datalen)) != 0)
786 /* Now we can delete the item. */
787 if ((ret = __ham_del_pair(hashp, hcp)) != 0) {
792 /* Now shift old data around to make room for new. */
794 tdata.data = (void *)
795 realloc(tdata.data, tdata.size + change);
796 memset((u_int8_t *)tdata.data + tdata.size,
799 if (tdata.data == NULL)
801 end = (u_int8_t *)tdata.data + tdata.size;
803 src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
804 if (src < end && tdata.size > dbt->doff + dbt->dlen) {
805 len = tdata.size - dbt->doff - dbt->dlen;
807 memmove(dest, src, len);
809 memcpy((u_int8_t *)tdata.data + dbt->doff,
810 dbt->data, dbt->size);
811 tdata.size += change;
813 /* Now add the pair. */
814 ret = __ham_add_el(hashp, hcp, &tmp, &tdata, type);
822 * Set up pointer into existing data. Do it before the log
823 * message so we can use it inside of the log setup.
825 beg = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
829 * If we are going to have to move bytes at all, figure out
830 * all the parameters here. Then log the call before moving
833 if (DB_LOGGING(hashp->dbp)) {
835 old_dbt.size = dbt->dlen;
836 if ((ret = __ham_replace_log(hashp->dbp->dbenv->lg_info,
837 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
838 hashp->dbp->log_fileid, PGNO(hcp->pagep),
839 (u_int32_t)H_DATAINDEX(hcp->bndx), &LSN(hcp->pagep),
840 (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
843 LSN(hcp->pagep) = new_lsn; /* Structure assignment. */
846 __ham_onpage_replace(hcp->pagep, hashp->dbp->pgsize,
847 (u_int32_t)H_DATAINDEX(hcp->bndx), (int32_t)dbt->doff, change, dbt);
853 * Replace data on a page with new data, possibly growing or shrinking what's
854 * there. This is called on two different occasions. On one (from replpair)
855 * we are interested in changing only the data. On the other (from recovery)
856 * we are replacing the entire data (header and all) with a new element. In
857 * the latter case, the off argument is negative.
858 * pagep: the page that we're changing
859 * ndx: page index of the element that is growing/shrinking.
860 * off: Offset at which we are beginning the replacement.
861 * change: the number of bytes (+ or -) that the element is growing/shrinking.
862 * dbt: the new data that gets written at beg.
863 * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
864 * PUBLIC: int32_t, DBT *));
867 __ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
877 u_int8_t *src, *dest;
882 src = (u_int8_t *)(pagep) + HOFFSET(pagep);
884 len = pagep->inp[ndx] - HOFFSET(pagep);
885 else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
886 len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) +
887 LEN_HKEYDATA(pagep, pgsize, ndx) - src;
890 len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src;
892 memmove(dest, src, len);
894 memset(dest + len, 0, change);
896 /* Now update the indices. */
897 for (i = ndx; i < NUM_ENT(pagep); i++)
898 pagep->inp[i] -= change;
899 HOFFSET(pagep) -= change;
902 memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off,
903 dbt->data, dbt->size);
905 memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
909 * PUBLIC: int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
912 __ham_split_page(hashp, obucket, nbucket)
914 u_int32_t obucket, nbucket;
916 DBT key, val, page_dbt;
919 PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
921 db_pgno_t bucket_pgno, next_pgno;
922 u_int32_t big_len, len;
926 dbenv = hashp->dbp->dbenv;
927 temp_pagep = old_pagep = new_pagep = NULL;
929 bucket_pgno = BUCKET_TO_PAGE(hashp, obucket);
930 if ((ret = __ham_get_page(hashp->dbp, bucket_pgno, &old_pagep)) != 0)
932 if ((ret = __ham_new_page(hashp, BUCKET_TO_PAGE(hashp, nbucket), P_HASH,
936 temp_pagep = hashp->split_buf;
937 memcpy(temp_pagep, old_pagep, hashp->hdr->pagesize);
939 if (DB_LOGGING(hashp->dbp)) {
940 page_dbt.size = hashp->hdr->pagesize;
941 page_dbt.data = old_pagep;
942 if ((ret = __ham_splitdata_log(dbenv->lg_info,
943 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
944 hashp->dbp->log_fileid, SPLITOLD, PGNO(old_pagep),
945 &page_dbt, &LSN(old_pagep))) != 0)
949 P_INIT(old_pagep, hashp->hdr->pagesize, PGNO(old_pagep), PGNO_INVALID,
950 PGNO_INVALID, 0, P_HASH);
952 if (DB_LOGGING(hashp->dbp))
953 LSN(old_pagep) = new_lsn; /* Structure assignment. */
957 val.flags = key.flags = 0;
958 while (temp_pagep != NULL) {
959 for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) {
961 __db_ret(hashp->dbp, temp_pagep, H_KEYINDEX(n),
962 &key, &big_buf, &big_len)) != 0)
965 if (__ham_call_hash(hashp, key.data, key.size)
972 * Figure out how many bytes we need on the new
973 * page to store the key/data pair.
976 len = LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
978 LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
980 2 * sizeof(db_indx_t);
982 if (P_FREESPACE(*pp) < len) {
983 if (DB_LOGGING(hashp->dbp)) {
984 page_dbt.size = hashp->hdr->pagesize;
986 if ((ret = __ham_splitdata_log(
988 (DB_TXN *)hashp->dbp->txn,
990 hashp->dbp->log_fileid, SPLITNEW,
991 PGNO(*pp), &page_dbt,
996 if ((ret = __ham_add_ovflpage(hashp,
1000 __ham_copy_item(hashp, temp_pagep, H_KEYINDEX(n), *pp);
1001 __ham_copy_item(hashp, temp_pagep, H_DATAINDEX(n), *pp);
1003 next_pgno = NEXT_PGNO(temp_pagep);
1005 /* Clear temp_page; if it's a link overflow page, free it. */
1006 if (PGNO(temp_pagep) != bucket_pgno && (ret =
1007 __ham_del_page(hashp->dbp, temp_pagep)) != 0)
1010 if (next_pgno == PGNO_INVALID)
1013 __ham_get_page(hashp->dbp, next_pgno, &temp_pagep)) != 0)
1016 if (temp_pagep != NULL && DB_LOGGING(hashp->dbp)) {
1017 page_dbt.size = hashp->hdr->pagesize;
1018 page_dbt.data = temp_pagep;
1019 if ((ret = __ham_splitdata_log(dbenv->lg_info,
1020 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1021 hashp->dbp->log_fileid, SPLITOLD, PGNO(temp_pagep),
1022 &page_dbt, &LSN(temp_pagep))) != 0)
1024 LSN(temp_pagep) = new_lsn;
1027 if (big_buf != NULL)
1031 * If the original bucket spanned multiple pages, then we've got
1032 * a pointer to a page that used to be on the bucket chain. It
1033 * should be deleted.
1035 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
1036 (ret = __ham_del_page(hashp->dbp, temp_pagep)) != 0)
1040 * Write new buckets out.
1042 if (DB_LOGGING(hashp->dbp)) {
1043 page_dbt.size = hashp->hdr->pagesize;
1044 page_dbt.data = old_pagep;
1045 if ((ret = __ham_splitdata_log(dbenv->lg_info,
1046 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1047 hashp->dbp->log_fileid, SPLITNEW, PGNO(old_pagep),
1048 &page_dbt, &LSN(old_pagep))) != 0)
1050 LSN(old_pagep) = new_lsn;
1052 page_dbt.data = new_pagep;
1053 if ((ret = __ham_splitdata_log(dbenv->lg_info,
1054 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1055 hashp->dbp->log_fileid, SPLITNEW, PGNO(new_pagep),
1056 &page_dbt, &LSN(new_pagep))) != 0)
1058 LSN(new_pagep) = new_lsn;
1060 ret = __ham_put_page(hashp->dbp, old_pagep, 1);
1061 if ((tret = __ham_put_page(hashp->dbp, new_pagep, 1)) != 0 &&
1066 if (old_pagep != NULL)
1067 (void)__ham_put_page(hashp->dbp, old_pagep, 1);
1068 if (new_pagep != NULL)
1069 (void)__ham_put_page(hashp->dbp, new_pagep, 1);
1070 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
1071 (void)__ham_put_page(hashp->dbp, temp_pagep, 1);
1077 * Add the given pair to the page. The page in question may already be
1078 * held (i.e. it was already gotten). If it is, then the page is passed
1079 * in via the pagep parameter. On return, pagep will contain the page
1080 * to which we just added something. This allows us to link overflow
1081 * pages and return the new page having correctly put the last page.
1083 * PUBLIC: int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *,
1087 __ham_add_el(hashp, hcp, key, val, type)
1090 const DBT *key, *val;
1093 DBT *pkey, *pdata, key_dbt, data_dbt;
1095 HOFFPAGE doff, koff;
1096 db_pgno_t next_pgno;
1097 u_int32_t data_size, key_size, pairsize;
1098 int do_expand, is_keybig, is_databig, rectype, ret;
1099 int key_type, data_type;
1103 if (hcp->pagep == NULL && (ret = __ham_get_page(hashp->dbp,
1104 hcp->seek_found_page != PGNO_INVALID ? hcp->seek_found_page :
1105 hcp->pgno, &hcp->pagep)) != 0)
1108 key_size = HKEYDATA_PSIZE(key->size);
1109 data_size = HKEYDATA_PSIZE(val->size);
1110 is_keybig = ISBIG(hashp, key->size);
1111 is_databig = ISBIG(hashp, val->size);
1113 key_size = HOFFPAGE_PSIZE;
1115 data_size = HOFFPAGE_PSIZE;
1117 pairsize = key_size + data_size;
1119 /* Advance to first page in chain with room for item. */
1120 while (H_NUMPAIRS(hcp->pagep) && NEXT_PGNO(hcp->pagep) !=
1123 * This may not be the end of the chain, but the pair may fit
1124 * anyway. Check if it's a bigpair that fits or a regular
1127 if (P_FREESPACE(hcp->pagep) >= pairsize)
1129 next_pgno = NEXT_PGNO(hcp->pagep);
1131 __ham_next_cpage(hashp, hcp, next_pgno, 0, 0)) != 0)
1136 * Check if we need to allocate a new page.
1138 if (P_FREESPACE(hcp->pagep) < pairsize) {
1140 if ((ret = __ham_add_ovflpage(hashp,
1141 hcp->pagep, 1, &hcp->pagep)) != 0)
1143 hcp->pgno = PGNO(hcp->pagep);
1149 hcp->bndx = H_NUMPAIRS(hcp->pagep);
1150 F_CLR(hcp, H_DELETED);
1152 if ((ret = __db_poff(hashp->dbp,
1153 key, &koff.pgno, __ham_overflow_page)) != 0)
1155 koff.type = H_OFFPAGE;
1156 koff.tlen = key->size;
1157 key_dbt.data = &koff;
1158 key_dbt.size = sizeof(koff);
1160 key_type = H_OFFPAGE;
1163 key_type = H_KEYDATA;
1167 if ((ret = __db_poff(hashp->dbp,
1168 val, &doff.pgno, __ham_overflow_page)) != 0)
1170 doff.type = H_OFFPAGE;
1171 doff.tlen = val->size;
1172 data_dbt.data = &doff;
1173 data_dbt.size = sizeof(doff);
1175 data_type = H_OFFPAGE;
1181 if (DB_LOGGING(hashp->dbp)) {
1184 rectype |= PAIR_DATAMASK;
1186 rectype |= PAIR_KEYMASK;
1188 if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
1189 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
1190 hashp->dbp->log_fileid, PGNO(hcp->pagep),
1191 (u_int32_t)H_NUMPAIRS(hcp->pagep),
1192 &LSN(hcp->pagep), pkey, pdata)) != 0)
1195 /* Move lsn onto page. */
1196 LSN(hcp->pagep) = new_lsn; /* Structure assignment. */
1199 __ham_putitem(hcp->pagep, pkey, key_type);
1200 __ham_putitem(hcp->pagep, pdata, data_type);
1203 * For splits, we are going to update item_info's page number
1204 * field, so that we can easily return to the same page the
1205 * next time we come in here. For other operations, this shouldn't
1206 * matter, since odds are this is the last thing that happens before
1207 * we return to the user program.
1209 hcp->pgno = PGNO(hcp->pagep);
1212 * XXX Maybe keep incremental numbers here
1214 if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
1215 hashp->hdr->nelem++;
1217 if (do_expand || (hashp->hdr->ffactor != 0 &&
1218 (u_int32_t)H_NUMPAIRS(hcp->pagep) > hashp->hdr->ffactor))
1219 F_SET(hcp, H_EXPAND);
1225 * Special __putitem call used in splitting -- copies one entry to
1226 * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
1227 * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
1228 * do not need to do any logging here.
1229 * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *));
1232 __ham_copy_item(hashp, src_page, src_ndx, dest_page)
1242 * Copy the key and data entries onto this new page.
1244 src = P_ENTRY(src_page, src_ndx);
1246 /* Set up space on dest. */
1247 len = LEN_HITEM(src_page, hashp->hdr->pagesize, src_ndx);
1248 HOFFSET(dest_page) -= len;
1249 dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
1250 dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
1251 NUM_ENT(dest_page)++;
1253 memcpy(dest, src, len);
1259 * pointer on success
1262 * PUBLIC: int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
1265 __ham_add_ovflpage(hashp, pagep, release, pp)
1276 dbenv = hashp->dbp->dbenv;
1278 if ((ret = __ham_overflow_page(hashp->dbp, P_HASH, &new_pagep)) != 0)
1281 if (DB_LOGGING(hashp->dbp)) {
1282 if ((ret = __ham_newpage_log(dbenv->lg_info,
1283 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, PUTOVFL,
1284 hashp->dbp->log_fileid, PGNO(pagep), &LSN(pagep),
1285 PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
1288 /* Move lsn onto page. */
1289 LSN(pagep) = LSN(new_pagep) = new_lsn;
1291 NEXT_PGNO(pagep) = PGNO(new_pagep);
1292 PREV_PGNO(new_pagep) = PGNO(pagep);
1295 ret = __ham_put_page(hashp->dbp, pagep, 1);
1297 hashp->hash_overflows++;
1304 * PUBLIC: int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
1307 __ham_new_page(hashp, addr, type, pp)
1309 u_int32_t addr, type;
1315 if ((ret = memp_fget(hashp->dbp->mpf,
1316 &addr, DB_MPOOL_CREATE, &pagep)) != 0)
1320 __account_page(hashp, addr, 1);
1322 /* This should not be necessary because page-in should do it. */
1324 hashp->hdr->pagesize, addr, PGNO_INVALID, PGNO_INVALID, 0, type);
1331 * PUBLIC: int __ham_del_page __P((DB *, PAGE *));
1334 __ham_del_page(dbp, pagep)
1342 hashp = (HTAB *)dbp->internal;
1344 DIRTY_META(hashp, ret);
1347 __db_err(hashp->dbp->dbenv,
1348 "free_ovflpage: unable to lock meta data page %s\n",
1351 * If we are going to return an error, then we should free
1352 * the page, so it doesn't stay pinned forever.
1354 (void)__ham_put_page(hashp->dbp, pagep, 0);
1358 if (DB_LOGGING(hashp->dbp)) {
1359 if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
1360 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPGNO,
1361 hashp->dbp->log_fileid, PGNO(pagep), hashp->hdr->last_freed,
1362 (u_int32_t)TYPE(pagep), NEXT_PGNO(pagep), P_INVALID,
1363 &LSN(pagep), &hashp->hdr->lsn)) != 0)
1366 hashp->hdr->lsn = new_lsn;
1367 LSN(pagep) = new_lsn;
1374 __pgno = pagep->pgno;
1376 memset(pagep, 0xff, dbp->pgsize);
1377 pagep->pgno = __pgno;
1381 TYPE(pagep) = P_INVALID;
1382 NEXT_PGNO(pagep) = hashp->hdr->last_freed;
1383 hashp->hdr->last_freed = PGNO(pagep);
1385 return (__ham_put_page(hashp->dbp, pagep, 1));
1390 * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t));
1393 __ham_put_page(dbp, pagep, is_dirty)
1399 __account_page((HTAB *)dbp->cookie,
1400 ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1);
1402 return (memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0)));
1406 * __ham_dirty_page --
1407 * Mark a page dirty.
1409 * PUBLIC: int __ham_dirty_page __P((HTAB *, PAGE *));
1412 __ham_dirty_page(hashp, pagep)
1416 return (memp_fset(hashp->dbp->mpf, pagep, DB_MPOOL_DIRTY));
1420 * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
1423 __ham_get_page(dbp, addr, pagep)
1430 ret = memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep);
1433 __account_page((HTAB *)dbp->internal, addr, 1);
1439 * PUBLIC: int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
1442 __ham_overflow_page(dbp, type, pp)
1447 DB_LSN *lsnp, new_lsn;
1450 db_pgno_t new_addr, next_free, newalloc_flag;
1451 u_int32_t offset, splitnum;
1454 hashp = (HTAB *)dbp->internal;
1457 DIRTY_META(hashp, ret);
1462 * This routine is split up into two parts. First we have
1463 * to figure out the address of the new page that we are
1464 * allocating. Then we have to log the allocation. Only
1465 * after the log do we get to complete allocation of the
1468 new_addr = hashp->hdr->last_freed;
1469 if (new_addr != PGNO_INVALID) {
1470 if ((ret = __ham_get_page(hashp->dbp, new_addr, &p)) != 0)
1472 next_free = NEXT_PGNO(p);
1476 splitnum = hashp->hdr->ovfl_point;
1477 hashp->hdr->spares[splitnum]++;
1478 offset = hashp->hdr->spares[splitnum] -
1479 (splitnum ? hashp->hdr->spares[splitnum - 1] : 0);
1480 new_addr = PGNO_OF(hashp, hashp->hdr->ovfl_point, offset);
1481 if (new_addr > MAX_PAGES(hashp)) {
1482 __db_err(hashp->dbp->dbenv, "hash: out of file pages");
1483 hashp->hdr->spares[splitnum]--;
1486 next_free = PGNO_INVALID;
1492 if (DB_LOGGING(hashp->dbp)) {
1493 if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
1494 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, ALLOCPGNO,
1495 hashp->dbp->log_fileid, new_addr, next_free,
1496 0, newalloc_flag, type, lsnp, &hashp->hdr->lsn)) != 0)
1499 hashp->hdr->lsn = new_lsn;
1505 /* We just took something off the free list, initialize it. */
1506 hashp->hdr->last_freed = next_free;
1507 P_INIT(p, hashp->hdr->pagesize, PGNO(p), PGNO_INVALID,
1508 PGNO_INVALID, 0, (u_int8_t)type);
1510 /* Get the new page. */
1511 if ((ret = __ham_new_page(hashp, new_addr, type, &p)) != 0)
1514 if (DB_LOGGING(hashp->dbp))
1523 * PUBLIC: #ifdef DEBUG
1524 * PUBLIC: int __bucket_to_page __P((HTAB *, int));
1528 __bucket_to_page(hashp, n)
1536 ret_val += hashp->hdr->spares[__db_log2(n + 1) - 1];
1543 * Create a bunch of overflow pages at the current split point.
1544 * PUBLIC: void __ham_init_ovflpages __P((HTAB *));
1547 __ham_init_ovflpages(hp)
1552 db_pgno_t last_pgno;
1553 u_int32_t i, numpages;
1555 numpages = hp->hdr->ovfl_point + 1;
1557 last_pgno = hp->hdr->last_freed;
1558 if (DB_LOGGING(hp->dbp)) {
1559 (void)__ham_ovfl_log(hp->dbp->dbenv->lg_info,
1560 (DB_TXN *)hp->dbp->txn, &new_lsn, 0,
1561 hp->dbp->log_fileid, PGNO_OF(hp, hp->hdr->ovfl_point, 1),
1562 numpages, last_pgno, &hp->hdr->lsn);
1563 hp->hdr->lsn = new_lsn;
1567 hp->hdr->spares[hp->hdr->ovfl_point] += numpages;
1568 for (i = numpages; i > 0; i--) {
1569 if (__ham_new_page(hp,
1570 PGNO_OF(hp, hp->hdr->ovfl_point, i), P_INVALID, &p) != 0)
1573 NEXT_PGNO(p) = last_pgno;
1574 last_pgno = PGNO(p);
1575 (void)__ham_put_page(hp->dbp, p, 1);
1577 hp->hdr->last_freed = last_pgno;
1581 * PUBLIC: int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
1584 __ham_get_cpage(hashp, hcp, mode)
1591 if (hcp->lock == 0 && F_ISSET(hashp->dbp, DB_AM_LOCKING) &&
1592 (ret = __ham_lock_bucket(hashp->dbp, hcp, mode)) != 0)
1595 if (hcp->pagep == NULL) {
1596 if (hcp->pgno == PGNO_INVALID) {
1597 hcp->pgno = BUCKET_TO_PAGE(hashp, hcp->bucket);
1602 __ham_get_page(hashp->dbp, hcp->pgno, &hcp->pagep)) != 0)
1606 if (hcp->dpgno != PGNO_INVALID && hcp->dpagep == NULL)
1608 __ham_get_page(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
1614 * Get a new page at the cursor, putting the last page if necessary.
1615 * If the flag is set to H_ISDUP, then we are talking about the
1616 * duplicate page, not the main page.
1617 * PUBLIC: int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t,
1618 * PUBLIC: int, int));
1621 __ham_next_cpage(hashp, hcp, pgno, dirty, flags)
1631 if (flags & H_ISDUP && hcp->dpagep != NULL &&
1632 (ret = __ham_put_page(hashp->dbp, hcp->dpagep, dirty)) != 0)
1634 else if (!(flags & H_ISDUP) && hcp->pagep != NULL &&
1635 (ret = __ham_put_page(hashp->dbp, hcp->pagep, dirty)) != 0)
1638 if ((ret = __ham_get_page(hashp->dbp, pgno, &p)) != 0)
1641 if (flags & H_ISDUP) {
1655 * __ham_lock_bucket --
1656 * Get the lock on a particular bucket.
1659 __ham_lock_bucket(dbp, hcp, mode)
1667 * What a way to trounce on the memory system. It might be
1668 * worth copying the lk_info into the hashp.
1671 dbp->lock.pgno = (db_pgno_t)(hcp->bucket);
1672 ret = lock_get(dbp->dbenv->lk_info,
1673 dbp->txn == NULL ? dbp->locker : dbp->txn->txnid, 0,
1674 &dbp->lock_dbt, mode, &hcp->lock);
1676 return (ret < 0 ? EAGAIN : ret);
1681 * Delete a pair on a page, paying no attention to what the pair
1682 * represents. The caller is responsible for freeing up duplicates
1683 * or offpage entries that might be referenced by this pair.
1685 * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
1688 __ham_dpair(dbp, p, pndx)
1694 u_int8_t *dest, *src;
1697 * Compute "delta", the amount we have to shift all of the
1698 * offsets. To find the delta, we just need to calculate
1699 * the size of the pair of elements we are removing.
1701 delta = H_PAIRSIZE(p, dbp->pgsize, pndx);
1704 * The hard case: we want to remove something other than
1705 * the last item on the page. We need to shift data and
1708 if ((db_indx_t)pndx != H_NUMPAIRS(p) - 1) {
1710 * Move the data: src is the first occupied byte on
1711 * the page. (Length is delta.)
1713 src = (u_int8_t *)p + HOFFSET(p);
1716 * Destination is delta bytes beyond src. This might
1717 * be an overlapping copy, so we have to use memmove.
1720 memmove(dest, src, p->inp[H_DATAINDEX(pndx)] - HOFFSET(p));
1723 /* Adjust the offsets. */
1724 for (n = (db_indx_t)pndx; n < (db_indx_t)(H_NUMPAIRS(p) - 1); n++) {
1725 p->inp[H_KEYINDEX(n)] = p->inp[H_KEYINDEX(n+1)] + delta;
1726 p->inp[H_DATAINDEX(n)] = p->inp[H_DATAINDEX(n+1)] + delta;
1729 /* Adjust page metadata. */
1730 HOFFSET(p) = HOFFSET(p) + delta;
1731 NUM_ENT(p) = NUM_ENT(p) - 2;
1736 __account_page(hashp, pgno, inout)
1748 if (inout == -1) /* XXX: Kluge */
1751 /* Find page in list. */
1752 for (i = 0; i < last; i++)
1753 if (list[i].pgno == pgno)
1757 list[last].times = inout;
1758 list[last].pgno = pgno;
1761 list[i].times = inout;
1762 if (list[i].times == 0) {
1763 for (j = i; j < last; j++)
1764 list[j] = list[j + 1];
1767 for (i = 0; i < last; i++, list[i].times++)
1768 if (list[i].times > 20 &&
1769 !__is_bitmap_pgno(hashp, list[i].pgno))
1770 (void)fprintf(stderr,
1771 "Warning: pg %lu has been out for %d times\n",
1772 (u_long)list[i].pgno, list[i].times);
1774 #endif /* DEBUG_SLOW */