2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
11 static const char sccsid[] = "@(#)bt_rec.c 10.14 (Sleepycat) 9/6/97";
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
30 #include "db_dispatch.h"
31 #include "common_ext.h"
34 * __bam_pg_alloc_recover --
35 * Recovery function for pg_alloc.
37 * PUBLIC: int __bam_pg_alloc_recover
38 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
41 __bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info)
48 __bam_pg_alloc_args *argp;
54 int cmp_n, cmp_p, created, modified, ret;
56 REC_PRINT(__bam_pg_alloc_print);
57 REC_INTRO(__bam_pg_alloc_read);
60 * Fix up the allocated page. If we're redoing the operation, we have
61 * to get the page (creating it if it doesn't exist), and update its
62 * LSN. If we're undoing the operation, we have to reset the page's
63 * LSN and put it on the free list.
65 * Fix up the metadata page. If we're redoing the operation, we have
66 * to get the metadata page and update its LSN and its free pointer.
67 * If we're undoing the operation and the page was ever created, we put
71 if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
72 (void)__db_pgerr(file_dbp, pgno);
75 if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
76 (void)__db_pgerr(file_dbp, argp->pgno);
77 (void)memp_fput(mpf, meta, 0);
81 /* Fix up the allocated page. */
82 created = IS_ZERO_LSN(LSN(pagep));
84 cmp_n = log_compare(lsnp, &LSN(pagep));
85 cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
86 if ((created || cmp_p == 0) && redo) {
87 /* Need to redo update described. */
88 P_INIT(pagep, file_dbp->pgsize,
89 argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
93 } else if ((created || cmp_n == 0) && !redo) {
94 /* Need to undo update described. */
95 P_INIT(pagep, file_dbp->pgsize,
96 argp->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
98 pagep->lsn = argp->page_lsn;
101 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
102 (void)__db_panic(file_dbp);
103 (void)memp_fput(mpf, meta, 0);
107 /* Fix up the metadata page. */
109 cmp_n = log_compare(lsnp, &LSN(meta));
110 cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
111 if (cmp_p == 0 && redo) {
112 /* Need to redo update described. */
114 meta->free = argp->next;
116 } else if (cmp_n == 0 && !redo) {
117 /* Need to undo update described. */
118 meta->lsn = argp->meta_lsn;
119 meta->free = argp->pgno;
122 if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
123 (void)__db_panic(file_dbp);
127 *lsnp = argp->prev_lsn;
134 * __bam_pg_free_recover --
135 * Recovery function for pg_free.
137 * PUBLIC: int __bam_pg_free_recover
138 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
141 __bam_pg_free_recover(logp, dbtp, lsnp, redo, info)
148 __bam_pg_free_args *argp;
154 int cmp_n, cmp_p, modified, ret;
156 REC_PRINT(__bam_pg_free_print);
157 REC_INTRO(__bam_pg_free_read);
160 * Fix up the freed page. If we're redoing the operation we get the
161 * page and explicitly discard its contents, then update its LSN. If
162 * we're undoing the operation, we get the page and restore its header.
164 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
165 (void)__db_pgerr(file_dbp, argp->pgno);
169 cmp_n = log_compare(lsnp, &LSN(pagep));
170 cmp_p = log_compare(&LSN(pagep), &LSN(argp->header.data));
171 if (cmp_p == 0 && redo) {
172 /* Need to redo update described. */
173 P_INIT(pagep, file_dbp->pgsize,
174 pagep->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
178 } else if (cmp_n == 0 && !redo) {
179 /* Need to undo update described. */
180 memcpy(pagep, argp->header.data, argp->header.size);
184 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
185 (void)__db_panic(file_dbp);
190 * Fix up the metadata page. If we're redoing or undoing the operation
191 * we get the page and update its LSN and free pointer.
193 pgno = PGNO_METADATA;
194 if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
195 (void)__db_pgerr(file_dbp, pgno);
200 cmp_n = log_compare(lsnp, &LSN(meta));
201 cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
202 if (cmp_p == 0 && redo) {
203 /* Need to redo update described. */
204 meta->free = argp->pgno;
208 } else if (cmp_n == 0 && !redo) {
209 /* Need to undo update described. */
210 meta->free = argp->next;
212 meta->lsn = argp->meta_lsn;
215 if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
216 (void)__db_panic(file_dbp);
220 *lsnp = argp->prev_lsn;
227 * __bam_split_recover --
228 * Recovery function for split.
230 * PUBLIC: int __bam_split_recover
231 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
234 __bam_split_recover(logp, dbtp, lsnp, redo, info)
241 __bam_split_args *argp;
244 PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
246 int l_update, p_update, r_update, ret, rootsplit, t_ret;
248 REC_PRINT(__bam_split_print);
251 _lp = lp = np = pp = _rp = rp = NULL;
253 REC_INTRO(__bam_split_read);
256 * There are two kinds of splits that we have to recover from. The
257 * first is a root-page split, where the root page is split from a
258 * leaf page into an internal page and two new leaf pages are created.
259 * The second is where a page is split into two pages, and a new key
260 * is inserted into the parent page.
264 rootsplit = pgno == PGNO_ROOT;
265 if (memp_fget(mpf, &argp->left, 0, &lp) != 0)
267 if (memp_fget(mpf, &argp->right, 0, &rp) != 0)
271 l_update = r_update = p_update = 0;
273 * Decide if we need to resplit the page.
275 * If this is a root split, then the root has to exist, it's
276 * the page we're splitting and it gets modified. If this is
277 * not a root split, then the left page has to exist, for the
281 if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
282 (void)__db_pgerr(file_dbp, pgno);
287 log_compare(&LSN(pp), &LSN(argp->pg.data)) == 0;
290 (void)__db_pgerr(file_dbp, argp->left);
293 if (lp == NULL || log_compare(&LSN(lp), &argp->llsn) == 0)
295 if (rp == NULL || log_compare(&LSN(rp), &argp->rlsn) == 0)
297 if (!p_update && !l_update && !r_update)
300 /* Allocate and initialize new left/right child pages. */
301 if ((_lp = (PAGE *)malloc(file_dbp->pgsize)) == NULL ||
302 (_rp = (PAGE *)malloc(file_dbp->pgsize)) == NULL) {
304 __db_err(file_dbp->dbenv, "%s", strerror(ret));
308 P_INIT(_lp, file_dbp->pgsize, argp->left,
310 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
311 LEVEL(sp), TYPE(sp));
312 P_INIT(_rp, file_dbp->pgsize, argp->right,
313 ISINTERNAL(sp) ? PGNO_INVALID : argp->left,
314 PGNO_INVALID, LEVEL(sp), TYPE(sp));
316 P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
317 ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
318 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
319 LEVEL(sp), TYPE(sp));
320 P_INIT(_rp, file_dbp->pgsize, argp->right,
321 ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
322 ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
323 LEVEL(sp), TYPE(sp));
326 /* Split the page. */
327 if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
328 (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
332 /* If the left child is wrong, update it. */
333 if (lp == NULL && (ret =
334 memp_fget(mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) {
335 (void)__db_pgerr(file_dbp, argp->left);
340 memcpy(lp, _lp, file_dbp->pgsize);
342 if ((ret = memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
347 /* If the right child is wrong, update it. */
348 if (rp == NULL && (ret = memp_fget(mpf,
349 &argp->right, DB_MPOOL_CREATE, &rp)) != 0) {
350 (void)__db_pgerr(file_dbp, argp->right);
355 memcpy(rp, _rp, file_dbp->pgsize);
357 if ((ret = memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
363 * If the parent page is wrong, update it. This is of interest
364 * only if it was a root split, since root splits create parent
365 * pages. All other splits modify a parent page, but those are
366 * separately logged and recovered.
368 if (rootsplit && p_update) {
369 if (file_dbp->type == DB_BTREE)
370 P_INIT(pp, file_dbp->pgsize,
371 PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
372 _lp->level + 1, P_IBTREE);
374 P_INIT(pp, file_dbp->pgsize,
375 PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
376 _lp->level + 1, P_IRECNO);
378 file_dbp->type == DB_RECNO ||
379 F_ISSET(file_dbp, DB_BT_RECNUM) ?
380 __bam_total(_lp) + __bam_total(_rp) : 0);
382 if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
388 * Finally, redo the next-page link if necessary. This is of
389 * interest only if it wasn't a root split -- inserting a new
390 * page in the tree requires that any following page have its
391 * previous-page pointer updated to our new page. The next
392 * page had better exist.
394 if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
395 if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
396 (void)__db_pgerr(file_dbp, argp->npgno);
400 if (log_compare(&LSN(np), &argp->nlsn) == 0) {
401 PREV_PGNO(np) = argp->right;
403 if ((ret = memp_fput(mpf,
404 np, DB_MPOOL_DIRTY)) != 0)
411 * If the split page is wrong, replace its contents with the
412 * logged page contents. The split page had better exist.
414 if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
415 (void)__db_pgerr(file_dbp, pgno);
419 if (log_compare(lsnp, &LSN(pp)) == 0) {
420 memcpy(pp, argp->pg.data, argp->pg.size);
421 if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
427 * If it's a root split and the left child ever existed, put
428 * it on the free list. (If it's not a root split, we just
429 * updated the left page -- it's the same as the split page.)
430 * If the right child ever existed, root split or not, put it
433 if ((rootsplit && lp != NULL) || rp != NULL) {
434 if (rootsplit && lp != NULL &&
435 log_compare(lsnp, &LSN(lp)) == 0) {
436 lp->lsn = argp->llsn;
438 memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
443 log_compare(lsnp, &LSN(rp)) == 0) {
444 rp->lsn = argp->rlsn;
446 memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
453 * Finally, undo the next-page link if necessary. This is of
454 * interest only if it wasn't a root split -- inserting a new
455 * page in the tree requires that any following page have its
456 * previous-page pointer updated to our new page. The next
457 * page had better exist.
459 if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
460 if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
461 (void)__db_pgerr(file_dbp, argp->npgno);
465 if (log_compare(lsnp, &LSN(np)) == 0) {
466 PREV_PGNO(np) = argp->left;
467 np->lsn = argp->nlsn;
468 if (memp_fput(mpf, np, DB_MPOOL_DIRTY))
476 *lsnp = argp->prev_lsn;
479 fatal: (void)__db_panic(file_dbp);
481 out: /* Free any pages that weren't dirtied. */
482 if (pp != NULL && (t_ret = memp_fput(mpf, pp, 0)) != 0 && ret == 0)
484 if (lp != NULL && (t_ret = memp_fput(mpf, lp, 0)) != 0 && ret == 0)
486 if (np != NULL && (t_ret = memp_fput(mpf, np, 0)) != 0 && ret == 0)
488 if (rp != NULL && (t_ret = memp_fput(mpf, rp, 0)) != 0 && ret == 0)
491 /* Free any allocated space. */
501 * __bam_rsplit_recover --
502 * Recovery function for a reverse split.
504 * PUBLIC: int __bam_rsplit_recover
505 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
508 __bam_rsplit_recover(logp, dbtp, lsnp, redo, info)
515 __bam_rsplit_args *argp;
520 int cmp_n, cmp_p, modified, ret;
522 REC_PRINT(__bam_rsplit_print);
523 REC_INTRO(__bam_rsplit_read);
525 /* Fix the root page. */
527 if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) != 0) {
528 __db_pgerr(file_dbp, pgno);
533 cmp_n = log_compare(lsnp, &LSN(pagep));
534 cmp_p = log_compare(&LSN(pagep), &argp->rootlsn);
535 if (cmp_p == 0 && redo) {
536 /* Need to redo update described. */
537 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
538 pagep->pgno = PGNO_ROOT;
541 } else if (cmp_n == 0 && !redo) {
542 /* Need to undo update described. */
543 P_INIT(pagep, file_dbp->pgsize, PGNO_ROOT,
544 PGNO_INVALID, PGNO_INVALID, pagep->level + 1, TYPE(pagep));
545 if ((ret = __db_pitem(file_dbp, pagep, 0,
546 argp->rootent.size, &argp->rootent, NULL)) != 0)
548 pagep->lsn = argp->rootlsn;
551 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
552 (void)__db_panic(file_dbp);
556 /* Fix the page copied over the root page. */
557 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
558 (void)__db_pgerr(file_dbp, argp->pgno);
563 cmp_n = log_compare(lsnp, &LSN(pagep));
564 cmp_p = log_compare(&LSN(pagep), &LSN(argp->pgdbt.data));
565 if (cmp_p == 0 && redo) {
566 /* Need to redo update described. */
569 } else if (cmp_n == 0 && !redo) {
570 /* Need to undo update described. */
571 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
574 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
575 (void)__db_panic(file_dbp);
580 *lsnp = argp->prev_lsn;
586 * __bam_adj_recover --
587 * Recovery function for adj.
589 * PUBLIC: int __bam_adj_recover
590 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
593 __bam_adj_recover(logp, dbtp, lsnp, redo, info)
600 __bam_adj_args *argp;
604 int cmp_n, cmp_p, modified, ret;
606 REC_PRINT(__bam_adj_print);
607 REC_INTRO(__bam_adj_read);
609 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
610 (void)__db_pgerr(file_dbp, argp->pgno);
616 cmp_n = log_compare(lsnp, &LSN(pagep));
617 cmp_p = log_compare(&LSN(pagep), &argp->lsn);
618 if (cmp_p == 0 && redo) {
619 /* Need to redo update described. */
620 if ((ret = __bam_adjindx(file_dbp,
621 pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
626 } else if (cmp_n == 0 && !redo) {
627 /* Need to undo update described. */
628 if ((ret = __bam_adjindx(file_dbp,
629 pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
632 LSN(pagep) = argp->lsn;
635 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
636 *lsnp = argp->prev_lsn;
639 err: (void)memp_fput(mpf, pagep, 0);
645 * __bam_cadjust_recover --
646 * Recovery function for the adjust of a count change in an internal
649 * PUBLIC: int __bam_cadjust_recover
650 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
653 __bam_cadjust_recover(logp, dbtp, lsnp, redo, info)
660 __bam_cadjust_args *argp;
664 int cmp_n, cmp_p, modified, ret;
666 REC_PRINT(__bam_cadjust_print);
667 REC_INTRO(__bam_cadjust_read);
669 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
670 (void)__db_pgerr(file_dbp, argp->pgno);
676 cmp_n = log_compare(lsnp, &LSN(pagep));
677 cmp_p = log_compare(&LSN(pagep), &argp->lsn);
678 if (cmp_p == 0 && redo) {
679 /* Need to redo update described. */
680 if (file_dbp->type == DB_BTREE &&
681 F_ISSET(file_dbp, DB_BT_RECNUM)) {
682 GET_BINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
683 if (argp->total && PGNO(pagep) == PGNO_ROOT)
684 RE_NREC_ADJ(pagep, argp->adjust);
686 if (file_dbp->type == DB_RECNO) {
687 GET_RINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
688 if (argp->total && PGNO(pagep) == PGNO_ROOT)
689 RE_NREC_ADJ(pagep, argp->adjust);
694 } else if (cmp_n == 0 && !redo) {
695 /* Need to undo update described. */
696 if (file_dbp->type == DB_BTREE &&
697 F_ISSET(file_dbp, DB_BT_RECNUM)) {
698 GET_BINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
699 if (argp->total && PGNO(pagep) == PGNO_ROOT)
700 RE_NREC_ADJ(pagep, argp->adjust);
702 if (file_dbp->type == DB_RECNO) {
703 GET_RINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
704 if (argp->total && PGNO(pagep) == PGNO_ROOT)
705 RE_NREC_ADJ(pagep, -(argp->adjust));
707 LSN(pagep) = argp->lsn;
710 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
711 *lsnp = argp->prev_lsn;
717 * __bam_cdel_recover --
718 * Recovery function for the intent-to-delete of a cursor record.
720 * PUBLIC: int __bam_cdel_recover
721 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
724 __bam_cdel_recover(logp, dbtp, lsnp, redo, info)
731 __bam_cdel_args *argp;
735 int cmp_n, cmp_p, modified, ret;
737 REC_PRINT(__bam_cdel_print);
738 REC_INTRO(__bam_cdel_read);
740 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
741 (void)__db_pgerr(file_dbp, argp->pgno);
747 cmp_n = log_compare(lsnp, &LSN(pagep));
748 cmp_p = log_compare(&LSN(pagep), &argp->lsn);
749 if (cmp_p == 0 && redo) {
750 /* Need to redo update described. */
751 B_DSET(GET_BKEYDATA(pagep, argp->indx + O_INDX)->type);
755 } else if (cmp_n == 0 && !redo) {
756 /* Need to undo update described. */
757 B_DCLR(GET_BKEYDATA(pagep, argp->indx + O_INDX)->type);
759 LSN(pagep) = argp->lsn;
762 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
763 *lsnp = argp->prev_lsn;