2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
11 static const char sccsid[] = "@(#)lock.c 10.52 (Sleepycat) 5/10/98";
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
26 #include "common_ext.h"
29 static void __lock_checklocker __P((DB_LOCKTAB *, struct __db_lock *, int));
30 static void __lock_freeobj __P((DB_LOCKTAB *, DB_LOCKOBJ *));
31 static int __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, u_int32_t,
32 const DBT *, db_lockmode_t, struct __db_lock **));
33 static int __lock_put_internal __P((DB_LOCKTAB *, struct __db_lock *, int));
34 static void __lock_remove_waiter
35 __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t));
45 if (lt->region->id >= DB_LOCK_MAXID)
47 id = ++lt->region->id;
48 UNLOCK_LOCKREGION(lt);
55 lock_vec(lt, locker, flags, list, nlist, elistp)
57 u_int32_t locker, flags;
59 DB_LOCKREQ *list, **elistp;
62 DB_LOCKOBJ *sh_obj, *sh_locker;
65 /* Validate arguments. */
67 __db_fchk(lt->dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0)
72 if ((ret = __lock_validate_region(lt)) != 0) {
73 UNLOCK_LOCKREGION(lt);
78 for (i = 0; i < nlist && ret == 0; i++) {
81 ret = __lock_get_internal(lt, locker, flags,
82 list[i].obj, list[i].mode, &lp);
84 list[i].lock = LOCK_TO_OFFSET(lt, lp);
85 lt->region->nrequests++;
89 lp = OFFSET_TO_LOCK(lt, list[i].lock);
90 if (lp->holder != locker) {
91 ret = DB_LOCK_NOTHELD;
94 list[i].mode = lp->mode;
96 /* XXX Need to copy the object. ??? */
97 ret = __lock_put_internal(lt, lp, 0);
100 /* Find the locker. */
101 if ((ret = __lock_getobj(lt, locker,
102 NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
105 for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
107 lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
108 if ((ret = __lock_put_internal(lt, lp, 1)) != 0)
111 __lock_freeobj(lt, sh_locker);
112 lt->region->nlockers--;
114 case DB_LOCK_PUT_OBJ:
116 /* Look up the object in the hash table. */
117 HASHLOOKUP(lt->hashtab, __db_lockobj, links,
118 list[i].obj, sh_obj, lt->region->table_size,
119 __lock_ohash, __lock_cmp);
120 if (sh_obj == NULL) {
125 * Release waiters first, because they won't cause
126 * anyone else to be awakened. If we release the
127 * lockers first, all the waiters get awakened
130 for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
132 lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) {
133 lt->region->nreleases += lp->refcount;
134 __lock_remove_waiter(lt, sh_obj, lp,
136 __lock_checklocker(lt, lp, 1);
139 for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
141 lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
143 lt->region->nreleases += lp->refcount;
144 SH_LIST_REMOVE(lp, locker_links, __db_lock);
145 SH_TAILQ_REMOVE(&sh_obj->holders, lp, links,
147 lp->status = DB_LSTAT_FREE;
148 SH_TAILQ_INSERT_HEAD(<->region->free_locks,
149 lp, links, __db_lock);
152 /* Now free the object. */
153 __lock_freeobj(lt, sh_obj);
157 /* Find the locker. */
158 if ((ret = __lock_getobj(lt, locker,
159 NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
162 for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
164 lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) {
165 __lock_printlock(lt, lp, 1);
169 __lock_freeobj(lt, sh_locker);
170 lt->region->nlockers--;
180 if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
182 lt->region->need_dd = 0;
186 UNLOCK_LOCKREGION(lt);
188 if (ret == 0 && run_dd)
189 lock_detect(lt, 0, lt->region->detect);
191 if (elistp && ret != 0)
192 *elistp = &list[i - 1];
197 lock_get(lt, locker, flags, obj, lock_mode, lock)
199 u_int32_t locker, flags;
201 db_lockmode_t lock_mode;
204 struct __db_lock *lockp;
207 /* Validate arguments. */
209 __db_fchk(lt->dbenv, "lock_get", flags, DB_LOCK_NOWAIT)) != 0)
214 ret = __lock_validate_region(lt);
215 if (ret == 0 && (ret = __lock_get_internal(lt,
216 locker, flags, obj, lock_mode, &lockp)) == 0) {
217 *lock = LOCK_TO_OFFSET(lt, lockp);
218 lt->region->nrequests++;
221 UNLOCK_LOCKREGION(lt);
230 struct __db_lock *lockp;
235 if ((ret = __lock_validate_region(lt)) != 0)
238 lockp = OFFSET_TO_LOCK(lt, lock);
239 ret = __lock_put_internal(lt, lockp, 0);
242 __lock_checklocker(lt, lockp, 0);
244 if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
246 lt->region->need_dd = 0;
250 UNLOCK_LOCKREGION(lt);
252 if (ret == 0 && run_dd)
253 lock_detect(lt, 0, lt->region->detect);
259 __lock_put_internal(lt, lockp, do_all)
261 struct __db_lock *lockp;
264 struct __db_lock *lp_w, *lp_h, *next_waiter;
268 if (lockp->refcount == 0 || (lockp->status != DB_LSTAT_HELD &&
269 lockp->status != DB_LSTAT_WAITING) || lockp->obj == 0) {
270 __db_err(lt->dbenv, "lock_put: invalid lock %lu",
271 (u_long)((u_int8_t *)lockp - (u_int8_t *)lt->region));
276 lt->region->nreleases += lockp->refcount;
278 lt->region->nreleases++;
279 if (do_all == 0 && lockp->refcount > 1) {
284 /* Get the object associated with this lock. */
285 sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
287 /* Remove lock from locker list. */
288 SH_LIST_REMOVE(lockp, locker_links, __db_lock);
290 /* Remove this lock from its holders/waitlist. */
291 if (lockp->status != DB_LSTAT_HELD)
292 __lock_remove_waiter(lt, sh_obj, lockp, DB_LSTAT_FREE);
294 SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock);
297 * We need to do lock promotion. We also need to determine if
298 * we're going to need to run the deadlock detector again. If
299 * we release locks, and there are waiters, but no one gets promoted,
300 * then we haven't fundamentally changed the lockmgr state, so
301 * we may still have a deadlock and we have to run again. However,
302 * if there were no waiters, or we actually promoted someone, then
303 * we are OK and we don't have to run it immediately.
305 for (lp_w = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock),
306 state_changed = lp_w == NULL;
308 lp_w = next_waiter) {
309 next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock);
310 for (lp_h = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
312 lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) {
313 if (CONFLICTS(lt, lp_h->mode, lp_w->mode) &&
314 lp_h->holder != lp_w->holder)
317 if (lp_h != NULL) /* Found a conflict. */
320 /* No conflict, promote the waiting lock. */
321 SH_TAILQ_REMOVE(&sh_obj->waiters, lp_w, links, __db_lock);
322 lp_w->status = DB_LSTAT_PENDING;
323 SH_TAILQ_INSERT_TAIL(&sh_obj->holders, lp_w, links);
325 /* Wake up waiter. */
326 (void)__db_mutex_unlock(&lp_w->mutex, lt->reginfo.fd);
330 /* Check if object should be reclaimed. */
331 if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) {
332 HASHREMOVE_EL(lt->hashtab, __db_lockobj,
333 links, sh_obj, lt->region->table_size, __lock_lhash);
334 if (sh_obj->lockobj.size > sizeof(sh_obj->objdata))
335 __db_shalloc_free(lt->mem,
336 SH_DBT_PTR(&sh_obj->lockobj));
337 SH_TAILQ_INSERT_HEAD(<->region->free_objs, sh_obj, links,
343 lockp->status = DB_LSTAT_FREE;
344 SH_TAILQ_INSERT_HEAD(<->region->free_locks, lockp, links, __db_lock);
347 * If we did not promote anyone; we need to run the deadlock
350 if (state_changed == 0)
351 lt->region->need_dd = 1;
357 __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
359 u_int32_t locker, flags;
361 db_lockmode_t lock_mode;
362 struct __db_lock **lockp;
364 struct __db_lock *newl, *lp;
365 DB_LOCKOBJ *sh_obj, *sh_locker;
372 * Check that lock mode is valid.
376 if ((u_int32_t)lock_mode >= lrp->nmodes) {
378 "lock_get: invalid lock mode %lu\n", (u_long)lock_mode);
382 /* Allocate a new lock. Optimize for the common case of a grant. */
383 if ((newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock)) == NULL) {
384 if ((ret = __lock_grow_region(lt, DB_LOCK_LOCK, 0)) != 0)
387 newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
389 newl_off = LOCK_TO_OFFSET(lt, newl);
391 /* Optimize for common case of granting a lock. */
392 SH_TAILQ_REMOVE(&lrp->free_locks, newl, links, __db_lock);
394 newl->mode = lock_mode;
395 newl->status = DB_LSTAT_HELD;
396 newl->holder = locker;
399 if ((ret = __lock_getobj(lt, 0, obj, DB_LOCK_OBJTYPE, &sh_obj)) != 0)
402 lrp = lt->region; /* getobj might have grown */
403 newl = OFFSET_TO_LOCK(lt, newl_off);
405 /* Now make new lock point to object */
406 newl->obj = SH_PTR_TO_OFF(newl, sh_obj);
409 * Now we have a lock and an object and we need to see if we should
410 * grant the lock. We use a FIFO ordering so we can only grant a
411 * new lock if it does not conflict with anyone on the holders list
412 * OR anyone on the waiters list. The reason that we don't grant if
413 * there's a conflict is that this can lead to starvation (a writer
414 * waiting on a popularly read item will never be granted). The
415 * downside of this is that a waiting reader can prevent an upgrade
416 * from reader to writer, which is not uncommon.
418 * There is one exception to the no-conflict rule. If a lock is held
419 * by the requesting locker AND the new lock does not conflict with
420 * any other holders, then we grant the lock. The most common place
421 * this happens is when the holder has a WRITE lock and a READ lock
422 * request comes in for the same locker. If we do not grant the read
423 * lock, then we guarantee deadlock.
425 * In case of conflict, we put the new lock on the end of the waiters
429 for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
431 lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
432 if (locker == lp->holder) {
433 if (lp->mode == lock_mode &&
434 lp->status == DB_LSTAT_HELD) {
435 /* Lock is held, just inc the ref count. */
437 SH_TAILQ_INSERT_HEAD(&lrp->free_locks,
438 newl, links, __db_lock);
443 } else if (CONFLICTS(lt, lp->mode, lock_mode))
447 if (lp == NULL && !ihold)
448 for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
450 lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
451 if (CONFLICTS(lt, lp->mode, lock_mode) &&
452 locker != lp->holder)
456 SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links);
457 else if (!(flags & DB_LOCK_NOWAIT))
458 SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links);
460 /* Free the lock and return an error. */
461 newl->status = DB_LSTAT_FREE;
462 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock);
463 return (DB_LOCK_NOTGRANTED);
467 * This is really a blocker for the process, so initialize it
468 * set. That way the current process will block when it tries
469 * to get it and the waking process will release it.
471 (void)__db_mutex_init(&newl->mutex,
472 MUTEX_LOCK_OFFSET(lt->region, &newl->mutex));
473 (void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd);
476 * Now, insert the lock onto its locker's list.
479 __lock_getobj(lt, locker, NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
483 SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock);
486 newl->status = DB_LSTAT_WAITING;
489 * We are about to wait; must release the region mutex.
490 * Then, when we wakeup, we need to reacquire the region
491 * mutex before continuing.
493 if (lrp->detect == DB_LOCK_NORUN)
494 lt->region->need_dd = 1;
495 UNLOCK_LOCKREGION(lt);
498 * We are about to wait; before waiting, see if the deadlock
499 * detector should be run.
501 if (lrp->detect != DB_LOCK_NORUN)
502 ret = lock_detect(lt, 0, lrp->detect);
504 (void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd);
507 if (newl->status != DB_LSTAT_PENDING) {
508 /* Return to free list. */
509 __lock_checklocker(lt, newl, 0);
510 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
512 switch (newl->status) {
513 case DB_LSTAT_ABORTED:
514 ret = DB_LOCK_DEADLOCK;
516 case DB_LSTAT_NOGRANT:
517 ret = DB_LOCK_NOTGRANTED;
523 newl->status = DB_LSTAT_FREE;
526 newl->status = DB_LSTAT_HELD;
534 * __lock_is_locked --
536 * PUBLIC: int __lock_is_locked
537 * PUBLIC: __P((DB_LOCKTAB *, u_int32_t, DBT *, db_lockmode_t));
540 __lock_is_locked(lt, locker, dbt, mode)
546 struct __db_lock *lp;
552 /* Look up the object in the hash table. */
553 HASHLOOKUP(lt->hashtab, __db_lockobj, links,
554 dbt, sh_obj, lrp->table_size, __lock_ohash, __lock_cmp);
558 for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
560 lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
561 if (lp->holder == locker && lp->mode == mode)
569 * __lock_printlock --
571 * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
574 __lock_printlock(lt, lp, ispgno)
576 struct __db_lock *lp;
583 const char *mode, *status;
608 switch (lp->status) {
609 case DB_LSTAT_ABORTED:
621 case DB_LSTAT_NOGRANT:
624 case DB_LSTAT_WAITING:
627 case DB_LSTAT_PENDING:
634 printf("\t%lx\t%s\t%lu\t%s\t",
635 (u_long)lp->holder, mode, (u_long)lp->refcount, status);
637 lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
638 ptr = SH_DBT_PTR(&lockobj->lockobj);
640 /* Assume this is a DBT lock. */
641 memcpy(&pgno, ptr, sizeof(db_pgno_t));
642 printf("page %lu\n", (u_long)pgno);
644 obj = (u_int8_t *)lp + lp->obj - (u_int8_t *)lt->region;
645 printf("0x%lx ", (u_long)obj);
646 __db_pr(ptr, lockobj->lockobj.size);
652 * PUBLIC: int __lock_getobj __P((DB_LOCKTAB *,
653 * PUBLIC: u_int32_t, const DBT *, u_int32_t type, DB_LOCKOBJ **));
656 __lock_getobj(lt, locker, dbt, type, objp)
658 u_int32_t locker, type;
670 /* Look up the object in the hash table. */
671 if (type == DB_LOCK_OBJTYPE) {
672 HASHLOOKUP(lt->hashtab, __db_lockobj, links, dbt, sh_obj,
673 lrp->table_size, __lock_ohash, __lock_cmp);
674 obj_size = dbt->size;
676 HASHLOOKUP(lt->hashtab, __db_lockobj, links, locker,
677 sh_obj, lrp->table_size, __lock_locker_hash,
679 obj_size = sizeof(locker);
683 * If we found the object, then we can just return it. If
684 * we didn't find the object, then we need to create it.
686 if (sh_obj == NULL) {
687 /* Create new object and then insert it into hash table. */
689 SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj)) == NULL) {
690 if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0)
693 sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
697 * If we can fit this object in the structure, do so instead
698 * of shalloc-ing space for it.
700 if (obj_size <= sizeof(sh_obj->objdata))
704 __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
705 if ((ret = __lock_grow_region(lt,
706 DB_LOCK_MEM, obj_size)) != 0)
709 /* Reacquire the head of the list. */
710 sh_obj = SH_TAILQ_FIRST(&lrp->free_objs,
712 (void)__db_shalloc(lt->mem, obj_size, 0, &p);
715 src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker;
716 memcpy(p, src, obj_size);
719 SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj);
721 SH_TAILQ_INIT(&sh_obj->waiters);
722 if (type == DB_LOCK_LOCKER)
723 SH_LIST_INIT(&sh_obj->heldby);
725 SH_TAILQ_INIT(&sh_obj->holders);
726 sh_obj->lockobj.size = obj_size;
727 sh_obj->lockobj.off = SH_PTR_TO_OFF(&sh_obj->lockobj, p);
729 HASHINSERT(lt->hashtab,
730 __db_lockobj, links, sh_obj, lrp->table_size, __lock_lhash);
732 if (type == DB_LOCK_LOCKER)
741 * Any lock on the waitlist has a process waiting for it. Therefore, we
742 * can't return the lock to the freelist immediately. Instead, we can
743 * remove the lock from the list of waiters, set the status field of the
744 * lock, and then let the process waking up return the lock to the
748 __lock_remove_waiter(lt, sh_obj, lockp, status)
751 struct __db_lock *lockp;
754 SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
755 lockp->status = status;
757 /* Wake whoever is waiting on this lock. */
758 (void)__db_mutex_unlock(&lockp->mutex, lt->reginfo.fd);
762 __lock_checklocker(lt, lockp, do_remove)
764 struct __db_lock *lockp;
767 DB_LOCKOBJ *sh_locker;
770 SH_LIST_REMOVE(lockp, locker_links, __db_lock);
772 /* if the locker list is NULL, free up the object. */
773 if (__lock_getobj(lt, lockp->holder, NULL, DB_LOCK_LOCKER, &sh_locker)
774 == 0 && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL) {
775 __lock_freeobj(lt, sh_locker);
776 lt->region->nlockers--;
781 __lock_freeobj(lt, obj)
785 HASHREMOVE_EL(lt->hashtab,
786 __db_lockobj, links, obj, lt->region->table_size, __lock_lhash);
787 if (obj->lockobj.size > sizeof(obj->objdata))
788 __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
789 SH_TAILQ_INSERT_HEAD(<->region->free_objs, obj, links, __db_lockobj);