2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * The Regents of the University of California. All rights reserved.
11 * This code is derived from software contributed to Berkeley by
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the University of
25 * California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 static const char sccsid[] = "@(#)hash_dup.c 10.8 (Sleepycat) 10/14/97";
52 * Manipulation of duplicates for the hash package.
61 #ifndef NO_SYSTEM_INCLUDES
62 #include <sys/types.h>
75 static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t));
76 static int __ham_dup_convert __P((HTAB *, HASH_CURSOR *));
77 static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *));
80 * Called from hash_access to add a duplicate key. nval is the new
81 * value that we want to add. The flags correspond to the flag values
82 * to cursor_put indicating where to add the new element.
84 * Case 1: The existing duplicate set already resides on a separate page.
85 * We can use common code for this.
86 * Case 2: The element is small enough to just be added to the existing set.
87 * Case 3: The element is large enough to be a big item, so we're going to
88 * have to push the set onto a new page.
89 * Case 4: The element is large enough to push the duplicate set onto a
92 * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int));
95 __ham_add_dup(hashp, hcp, nval, flags)
102 u_int32_t del_len, new_size;
106 if (flags == DB_CURRENT && hcp->dpgno == PGNO_INVALID)
107 del_len = hcp->dup_len;
111 if ((ret = __ham_check_move(hashp, hcp,
112 (int32_t)DUP_SIZE(nval->size) - (int32_t)del_len)) != 0)
116 * Check if resulting duplicate set is going to need to go
117 * onto a separate duplicate page. If so, convert the
118 * duplicate set and add the new one. After conversion,
119 * hcp->dndx is the first free ndx or the index of the
120 * current pointer into the duplicate set.
122 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
123 new_size = DUP_SIZE(nval->size) - del_len + LEN_HKEYDATA(hcp->pagep,
124 hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
127 * We convert to off-page duplicates if the item is a big item,
128 * the addition of the new item will make the set large, or
129 * if there isn't enough room on this page to add the next item.
131 if (HPAGE_PTYPE(hk) != H_OFFDUP &&
132 (HPAGE_PTYPE(hk) == H_OFFPAGE || ISBIG(hashp, new_size) ||
133 DUP_SIZE(nval->size) - del_len > P_FREESPACE(hcp->pagep))) {
135 if ((ret = __ham_dup_convert(hashp, hcp)) != 0)
138 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
141 /* There are two separate cases here: on page and off page. */
142 if (HPAGE_PTYPE(hk) != H_OFFDUP) {
143 if (HPAGE_PTYPE(hk) != H_DUPLICATE) {
144 HPAGE_PTYPE(hk) = H_DUPLICATE;
146 pval.data = HKEYDATA_DATA(hk);
147 pval.size = LEN_HDATA(hcp->pagep, hashp->hdr->pagesize,
150 __ham_make_dup(&pval, &tmp_val, &hcp->big_data,
151 &hcp->big_datalen)) != 0 || (ret =
152 __ham_replpair(hashp, hcp, &tmp_val, 1)) != 0)
156 /* Now make the new entry a duplicate. */
157 if ((ret = __ham_make_dup(nval,
158 &tmp_val, &hcp->big_data, &hcp->big_datalen)) != 0)
162 switch (flags) { /* On page. */
167 tmp_val.doff = LEN_HDATA(hcp->pagep,
168 hashp->hdr->pagesize, hcp->bndx);
171 tmp_val.doff = hcp->dup_off;
172 tmp_val.dlen = DUP_SIZE(hcp->dup_len);
175 tmp_val.doff = hcp->dup_off;
178 tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len);
181 /* Add the duplicate. */
182 ret = __ham_replpair(hashp, hcp, &tmp_val, 0);
184 ret = __ham_dirty_page(hashp, hcp->pagep);
185 __ham_c_update(hashp, hcp, hcp->pgno, tmp_val.size, 1, 1);
189 /* If we get here, then we're on duplicate pages. */
190 if (hcp->dpgno == PGNO_INVALID) {
191 memcpy(&hcp->dpgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t));
198 * The only way that we are already on a dup page is
199 * if we just converted the on-page representation.
200 * In that case, we've only got one page of duplicates.
202 if (hcp->dpagep == NULL && (ret =
203 __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
208 if (hcp->dpagep == NULL && (ret =
209 __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
211 hcp->dpgno = PGNO(hcp->dpagep);
212 hcp->dndx = NUM_ENT(hcp->dpagep);
215 if ((ret = __db_ditem(hashp->dbp, hcp->dpagep, hcp->dndx,
216 BKEYDATA_SIZE(GET_BKEYDATA(hcp->dpagep, hcp->dndx)->len)))
220 case DB_BEFORE: /* The default behavior is correct. */
227 ret = __db_dput(hashp->dbp,
228 nval, &hcp->dpagep, &hcp->dndx, __ham_overflow_page);
229 hcp->pgno = PGNO(hcp->pagep);
230 __ham_c_update(hashp, hcp, hcp->pgno, nval->size, 1, 1);
235 * Convert an on-page set of duplicates to an offpage set of duplicates.
238 __ham_dup_convert(hashp, hcp)
250 * Create a new page for the duplicates.
253 __ham_overflow_page(hashp->dbp, P_DUPLICATE, &hcp->dpagep)) != 0)
255 hcp->dpagep->type = P_DUPLICATE;
256 hcp->dpgno = PGNO(hcp->dpagep);
259 * Now put the duplicates onto the new page.
262 switch (HPAGE_PTYPE(H_PAIRDATA(hcp->pagep, hcp->bndx))) {
264 /* Simple case, one key on page; move it to dup page. */
267 LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
268 dbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
269 ret = __db_pitem(hashp->dbp, hcp->dpagep,
270 (u_int32_t)dndx, BKEYDATA_SIZE(dbt.size), NULL, &dbt);
272 __ham_dirty_page(hashp, hcp->dpagep);
275 /* Simple case, one key on page; move it to dup page. */
278 P_ENTRY(hcp->pagep, H_DATAINDEX(hcp->bndx)), HOFFPAGE_SIZE);
279 B_TSET(bo.type, ho.type, 0);
282 dbt.size = BOVERFLOW_SIZE;
285 ret = __db_pitem(hashp->dbp, hcp->dpagep,
286 (u_int32_t)dndx, dbt.size, &dbt, NULL);
288 __ham_dirty_page(hashp, hcp->dpagep);
291 p = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
293 LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
295 for (dndx = 0; p < pend; dndx++) {
296 memcpy(&len, p, sizeof(db_indx_t));
298 p += sizeof(db_indx_t);
300 p += len + sizeof(db_indx_t);
301 ret = __db_dput(hashp->dbp, &dbt,
302 &hcp->dpagep, &dndx, __ham_overflow_page);
308 ret = __db_pgfmt(hashp->dbp, (u_long)hcp->pgno);
312 * Now attach this to the source page in place of
313 * the old duplicate item.
315 __ham_move_offpage(hashp, hcp->pagep,
316 (u_int32_t)H_DATAINDEX(hcp->bndx), hcp->dpgno);
318 /* Can probably just do a "put" here. */
319 ret = __ham_dirty_page(hashp, hcp->pagep);
321 (void)__ham_del_page(hashp->dbp, hcp->dpagep);
328 __ham_make_dup(notdup, dup, bufp, sizep)
334 db_indx_t tsize, item_size;
338 item_size = (db_indx_t)notdup->size;
339 tsize = DUP_SIZE(item_size);
340 if ((ret = __ham_init_dbt(dup, tsize, bufp, sizep)) != 0)
344 dup->flags = notdup->flags;
345 F_SET(dup, DB_DBT_PARTIAL);
348 memcpy(p, &item_size, sizeof(db_indx_t));
349 p += sizeof(db_indx_t);
350 memcpy(p, notdup->data, notdup->size);
352 memcpy(p, &item_size, sizeof(db_indx_t));
355 dup->dlen = notdup->size;
361 __ham_check_move(hashp, hcp, add_len)
371 u_int32_t new_datalen, old_len;
375 * Check if we can do whatever we need to on this page. If not,
376 * then we'll have to move the current element to a new page.
378 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
381 * If the item is already off page duplicates or an offpage item,
382 * then we know we can do whatever we need to do in-place
384 if (HPAGE_PTYPE(hk) == H_OFFDUP || HPAGE_PTYPE(hk) == H_OFFPAGE)
388 LEN_HITEM(hcp->pagep, hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
389 new_datalen = old_len - HKEYDATA_SIZE(0) + add_len;
392 * We need to add a new page under two conditions:
393 * 1. The addition makes the total data length cross the BIG
394 * threshold and the OFFDUP structure won't fit on this page.
395 * 2. The addition does not make the total data cross the
396 * threshold, but the new data won't fit on the page.
397 * If neither of these is true, then we can return.
399 if (ISBIG(hashp, new_datalen) && (old_len > HOFFDUP_SIZE ||
400 HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->pagep)))
403 if (!ISBIG(hashp, new_datalen) &&
404 add_len <= (int32_t)P_FREESPACE(hcp->pagep))
408 * If we get here, then we need to move the item to a new page.
409 * Check if there are more pages in the chain.
412 new_datalen = ISBIG(hashp, new_datalen) ?
413 HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen);
416 for (next_pgno = NEXT_PGNO(hcp->pagep); next_pgno != PGNO_INVALID;
417 next_pgno = NEXT_PGNO(next_pagep)) {
418 if (next_pagep != NULL &&
419 (ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0)
422 if ((ret = __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0)
425 if (P_FREESPACE(next_pagep) >= new_datalen)
429 /* No more pages, add one. */
430 if (next_pagep == NULL &&
431 (ret = __ham_add_ovflpage(hashp, hcp->pagep, 0, &next_pagep)) != 0)
434 /* Add new page at the end of the chain. */
435 if (P_FREESPACE(next_pagep) < new_datalen &&
436 (ret = __ham_add_ovflpage(hashp, next_pagep, 1, &next_pagep)) != 0)
439 /* Copy the item to the new page. */
440 if (DB_LOGGING(hashp->dbp)) {
445 H_PAIRKEY(hcp->pagep, hcp->bndx)) == H_OFFPAGE) {
446 rectype |= PAIR_KEYMASK;
447 k.data = H_PAIRKEY(hcp->pagep, hcp->bndx);
448 k.size = HOFFPAGE_SIZE;
451 HKEYDATA_DATA(H_PAIRKEY(hcp->pagep, hcp->bndx));
452 k.size = LEN_HKEY(hcp->pagep,
453 hashp->hdr->pagesize, hcp->bndx);
456 if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
457 rectype |= PAIR_DATAMASK;
458 d.data = H_PAIRDATA(hcp->pagep, hcp->bndx);
459 d.size = HOFFPAGE_SIZE;
462 HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
463 d.size = LEN_HDATA(hcp->pagep,
464 hashp->hdr->pagesize, hcp->bndx);
468 if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
469 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
470 hashp->dbp->log_fileid, PGNO(next_pagep),
471 (u_int32_t)H_NUMPAIRS(next_pagep), &LSN(next_pagep),
475 /* Move lsn onto page. */
476 LSN(next_pagep) = new_lsn; /* Structure assignment. */
479 __ham_copy_item(hashp, hcp->pagep, H_KEYINDEX(hcp->bndx), next_pagep);
480 __ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep);
482 /* Now delete the pair from the current page. */
483 ret = __ham_del_pair(hashp, hcp, 0);
485 (void)__ham_put_page(hashp->dbp, hcp->pagep, 1);
486 hcp->pagep = next_pagep;
487 hcp->pgno = PGNO(hcp->pagep);
488 hcp->bndx = H_NUMPAIRS(hcp->pagep) - 1;
489 F_SET(hcp, H_EXPAND);
494 * Replace an onpage set of duplicates with the OFFDUP structure that
495 * references the duplicate page.
496 * XXX This is really just a special case of __onpage_replace; we should
497 * probably combine them.
498 * PUBLIC: void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
501 __ham_move_offpage(hashp, pagep, ndx, pgno)
517 if (DB_LOGGING(hashp->dbp)) {
519 new_dbt.size = HOFFDUP_SIZE;
520 old_dbt.data = P_ENTRY(pagep, ndx);
521 old_dbt.size = LEN_HITEM(pagep, hashp->hdr->pagesize, ndx);
522 (void)__ham_replace_log(hashp->dbp->dbenv->lg_info,
523 (DB_TXN *)hashp->dbp->txn, &LSN(pagep), 0,
524 hashp->dbp->log_fileid, PGNO(pagep), (u_int32_t)ndx,
525 &LSN(pagep), -1, &old_dbt, &new_dbt, 0);
529 LEN_HITEM(pagep, hashp->hdr->pagesize, ndx) - HOFFDUP_SIZE;
533 src = (u_int8_t *)(pagep) + HOFFSET(pagep);
534 memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep));
535 HOFFSET(pagep) += shrink;
537 /* Update index table. */
538 for (i = ndx; i < NUM_ENT(pagep); i++)
539 pagep->inp[i] += shrink;
542 /* Now copy the offdup entry onto the page. */
543 memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE);