Update to db 2.3.10.
[kopensolaris-gnu/glibc.git] / db2 / lock / lock.c
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997
5  *      Sleepycat Software.  All rights reserved.
6  */
7
8 #include "config.h"
9
10 #ifndef lint
11 static const char sccsid[] = "@(#)lock.c        10.36 (Sleepycat) 9/24/97";
12 #endif /* not lint */
13
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stddef.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 #endif
27
28 #include "db_int.h"
29 #include "shqueue.h"
30 #include "db_page.h"
31 #include "db_shash.h"
32 #include "lock.h"
33 #include "common_ext.h"
34 #include "db_am.h"
35
36 static void __lock_checklocker __P((DB_LOCKTAB *, struct __db_lock *, int));
37 static int  __lock_count_locks __P((DB_LOCKREGION *));
38 static int  __lock_count_objs __P((DB_LOCKREGION *));
39 static int  __lock_create __P((const char *, int, DB_ENV *));
40 static void __lock_freeobj __P((DB_LOCKTAB *, DB_LOCKOBJ *));
41 static int  __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, int, const DBT *,
42     db_lockmode_t, struct __db_lock **));
43 static int  __lock_grow_region __P((DB_LOCKTAB *, int, size_t));
44 static int  __lock_put_internal __P((DB_LOCKTAB *, struct __db_lock *, int));
45 static void __lock_remove_waiter
46     __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t));
47 static void __lock_reset_region __P((DB_LOCKTAB *));
48 static int  __lock_validate_region __P((DB_LOCKTAB *));
49 #ifdef DEBUG
50 static void __lock_dump_locker __P((DB_LOCKTAB *, DB_LOCKOBJ *));
51 static void __lock_dump_object __P((DB_LOCKTAB *, DB_LOCKOBJ *));
52 static void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
53 #endif
54
55 /*
56  * Create and initialize a lock region in shared memory.
57  */
58
59 /*
60  * __lock_create --
61  *      Create the lock region.  Returns an errno.  In most cases,
62  * the errno should be that returned by __db_ropen, in which case
63  * an EAGAIN means that we should retry, and an EEXIST means that
64  * the region exists and we didn't need to create it.  Any other
65  * sort of errno should be treated as a system error, leading to a
66  * failure of the original interface call.
67  */
68 static int
69 __lock_create(path, mode, dbenv)
70         const char *path;
71         int mode;
72         DB_ENV *dbenv;
73 {
74         struct __db_lock *lp;
75         struct lock_header *tq_head;
76         struct obj_header *obj_head;
77         DB_LOCKOBJ *op;
78         DB_LOCKREGION *lrp;
79         u_int maxlocks;
80         u_int32_t i;
81         int fd, lock_modes, nelements, ret;
82         u_int8_t *conflicts, *curaddr;
83
84         maxlocks = dbenv == NULL || dbenv->lk_max == 0 ?
85             DB_LOCK_DEFAULT_N : dbenv->lk_max;
86         lock_modes = dbenv == NULL || dbenv->lk_modes == 0 ?
87             DB_LOCK_RW_N : dbenv->lk_modes;
88         conflicts = dbenv == NULL || dbenv->lk_conflicts == NULL ?
89             (u_int8_t *)db_rw_conflicts : dbenv->lk_conflicts;
90
91         if ((ret =
92             __db_rcreate(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, mode,
93             LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks)),
94             &fd, &lrp)) != 0)
95                 return (ret);
96
97         /* Region exists; now initialize it. */
98         lrp->table_size = __db_tablesize(maxlocks);
99         lrp->magic = DB_LOCKMAGIC;
100         lrp->version = DB_LOCKVERSION;
101         lrp->id = 0;
102         lrp->maxlocks = maxlocks;
103         lrp->need_dd = 0;
104         lrp->detect = DB_LOCK_NORUN;
105         lrp->numobjs = maxlocks;
106         lrp->nlockers = 0;
107         lrp->mem_bytes = ALIGN(STRING_SIZE(maxlocks), sizeof(size_t));
108         lrp->increment = lrp->hdr.size / 2;
109         lrp->nmodes = lock_modes;
110         lrp->nconflicts = 0;
111         lrp->nrequests = 0;
112         lrp->nreleases = 0;
113         lrp->ndeadlocks = 0;
114
115         /*
116          * As we write the region, we've got to maintain the alignment
117          * for the structures that follow each chunk.  This information
118          * ends up being encapsulated both in here as well as in the
119          * lock.h file for the XXX_SIZE macros.
120          */
121         /* Initialize conflict matrix. */
122         curaddr = (u_int8_t *)lrp + sizeof(DB_LOCKREGION);
123         memcpy(curaddr, conflicts, lock_modes * lock_modes);
124         curaddr += lock_modes * lock_modes;
125
126         /*
127          * Initialize hash table.
128          */
129         curaddr = (u_int8_t *)ALIGNP(curaddr, LOCK_HASH_ALIGN);
130         lrp->hash_off = curaddr - (u_int8_t *)lrp;
131         nelements = lrp->table_size;
132         __db_hashinit(curaddr, nelements);
133         curaddr += nelements * sizeof(DB_HASHTAB);
134
135         /*
136          * Initialize locks onto a free list. Since locks contains mutexes,
137          * we need to make sure that each lock is aligned on a MUTEX_ALIGNMENT
138          * boundary.
139          */
140         curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
141         tq_head = &lrp->free_locks;
142         SH_TAILQ_INIT(tq_head);
143
144         for (i = 0; i++ < maxlocks;
145             curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
146                 lp = (struct __db_lock *)curaddr;
147                 lp->status = DB_LSTAT_FREE;
148                 SH_TAILQ_INSERT_HEAD(tq_head, lp, links, __db_lock);
149         }
150
151         /* Initialize objects onto a free list.  */
152         obj_head = &lrp->free_objs;
153         SH_TAILQ_INIT(obj_head);
154
155         for (i = 0; i++ < maxlocks; curaddr += sizeof(DB_LOCKOBJ)) {
156                 op = (DB_LOCKOBJ *)curaddr;
157                 SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
158         }
159
160         /*
161          * Initialize the string space; as for all shared memory allocation
162          * regions, this requires size_t alignment, since we store the
163          * lengths of malloc'd areas in the area..
164          */
165         curaddr = (u_int8_t *)ALIGNP(curaddr, sizeof(size_t));
166         lrp->mem_off = curaddr - (u_int8_t *)lrp;
167         __db_shalloc_init(curaddr, lrp->mem_bytes);
168
169         /* Release the lock. */
170         (void)__db_mutex_unlock(&lrp->hdr.lock, fd);
171
172         /* Now unmap the region. */
173         if ((ret = __db_rclose(dbenv, fd, lrp)) != 0) {
174                 (void)lock_unlink(path, 1 /* force */, dbenv);
175                 return (ret);
176         }
177
178         return (0);
179 }
180
181 int
182 lock_open(path, flags, mode, dbenv, ltp)
183         const char *path;
184         int flags, mode;
185         DB_ENV *dbenv;
186         DB_LOCKTAB **ltp;
187 {
188         DB_LOCKTAB *lt;
189         int ret, retry_cnt;
190
191         /* Validate arguments. */
192 #ifdef HAVE_SPINLOCKS
193 #define OKFLAGS (DB_CREATE | DB_THREAD)
194 #else
195 #define OKFLAGS (DB_CREATE)
196 #endif
197         if ((ret = __db_fchk(dbenv, "lock_open", flags, OKFLAGS)) != 0)
198                 return (ret);
199
200         /*
201          * Create the lock table structure.
202          */
203         if ((lt = (DB_LOCKTAB *)calloc(1, sizeof(DB_LOCKTAB))) == NULL) {
204                 __db_err(dbenv, "%s", strerror(ENOMEM));
205                 return (ENOMEM);
206         }
207         lt->dbenv = dbenv;
208
209         /*
210          * Now, create the lock region if it doesn't already exist.
211          */
212         retry_cnt = 0;
213 retry:  if (LF_ISSET(DB_CREATE) &&
214             (ret = __lock_create(path, mode, dbenv)) != 0)
215                 if (ret == EAGAIN && ++retry_cnt < 3) {
216                         (void)__db_sleep(1, 0);
217                         goto retry;
218                 } else if (ret == EEXIST) /* We did not create the region */
219                         LF_CLR(DB_CREATE);
220                 else
221                         goto out;
222
223         /*
224          * Finally, open the region, map it in, and increment the
225          * reference count.
226          */
227         retry_cnt = 0;
228 retry1: if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE,
229             LF_ISSET(~(DB_CREATE | DB_THREAD)), &lt->fd, &lt->region)) != 0) {
230                 if (ret == EAGAIN && ++retry_cnt < 3) {
231                         (void)__db_sleep(1, 0);
232                         goto retry1;
233                 }
234                 goto out;
235          }
236
237         if (lt->region->magic != DB_LOCKMAGIC) {
238                 __db_err(dbenv, "lock_open: Bad magic number");
239                 ret = EINVAL;
240                 goto out;
241         }
242
243         /* Check for automatic deadlock detection. */
244         if (dbenv->lk_detect != DB_LOCK_NORUN) {
245                 if (lt->region->detect != DB_LOCK_NORUN &&
246                     dbenv->lk_detect != DB_LOCK_DEFAULT &&
247                     lt->region->detect != dbenv->lk_detect) {
248                         __db_err(dbenv,
249                             "lock_open: incompatible deadlock detector mode");
250                         ret = EINVAL;
251                         goto out;
252                 }
253                 if (lt->region->detect == DB_LOCK_NORUN)
254                         lt->region->detect = dbenv->lk_detect;
255         }
256
257         /* Set up remaining pointers into region. */
258         lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
259         lt->hashtab =
260             (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
261         lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
262         lt->reg_size = lt->region->hdr.size;
263
264         *ltp = lt;
265         return (0);
266
267 /* Error handling. */
268 out:    if (lt->region != NULL)
269                 (void)__db_rclose(lt->dbenv, lt->fd, lt->region);
270         if (LF_ISSET(DB_CREATE))
271                 (void)lock_unlink(path, 1, lt->dbenv);
272         free(lt);
273         return (ret);
274 }
275
276 int
277 lock_id (lt, idp)
278         DB_LOCKTAB *lt;
279         u_int32_t *idp;
280 {
281         u_int32_t id;
282
283         LOCK_LOCKREGION(lt);
284         if (lt->region->id >= DB_LOCK_MAXID)
285                 lt->region->id = 0;
286         id = ++lt->region->id;
287         UNLOCK_LOCKREGION(lt);
288
289         *idp = id;
290         return (0);
291 }
292
293 int
294 lock_vec(lt, locker, flags, list, nlist, elistp)
295         DB_LOCKTAB *lt;
296         u_int32_t locker;
297         int flags, nlist;
298         DB_LOCKREQ *list, **elistp;
299 {
300         struct __db_lock *lp;
301         DB_LOCKOBJ *sh_obj, *sh_locker;
302         int i, ret, run_dd;
303
304         /* Validate arguments. */
305         if ((ret =
306             __db_fchk(lt->dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0)
307                 return (ret);
308
309         LOCK_LOCKREGION(lt);
310
311         if ((ret = __lock_validate_region(lt)) != 0) {
312                 UNLOCK_LOCKREGION(lt);
313                 return (ret);
314         }
315
316         ret = 0;
317         for (i = 0; i < nlist && ret == 0; i++) {
318                 switch (list[i].op) {
319                 case DB_LOCK_GET:
320                         ret = __lock_get_internal(lt, locker, flags,
321                             list[i].obj, list[i].mode, &lp);
322                         if (ret == 0) {
323                                 list[i].lock = LOCK_TO_OFFSET(lt, lp);
324                                 lt->region->nrequests++;
325                         }
326                         break;
327                 case DB_LOCK_PUT:
328                         lp = OFFSET_TO_LOCK(lt, list[i].lock);
329                         if (lp->holder != locker) {
330                                 ret = DB_LOCK_NOTHELD;
331                                 break;
332                         }
333                         list[i].mode = lp->mode;
334
335                         /* XXX Need to copy the object. ??? */
336                         ret = __lock_put_internal(lt, lp, 0);
337                         break;
338                 case DB_LOCK_PUT_ALL:
339                         /* Find the locker. */
340                         if ((ret = __lock_getobj(lt, locker,
341                             NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
342                                 break;
343
344                         for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
345                             lp != NULL;
346                             lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
347                                 if ((ret = __lock_put_internal(lt, lp, 0)) != 0)
348                                         break;
349                         }
350                         __lock_freeobj(lt, sh_locker);
351                         lt->region->nlockers--;
352                         break;
353                 case DB_LOCK_PUT_OBJ:
354
355                         /* Look up the object in the hash table. */
356                         HASHLOOKUP(lt->hashtab, __db_lockobj, links,
357                             list[i].obj, sh_obj, lt->region->table_size,
358                             __lock_ohash, __lock_cmp);
359                         if (sh_obj == NULL) {
360                                 ret = EINVAL;
361                                 break;
362                         }
363                         /*
364                          * Release waiters first, because they won't cause
365                          * anyone else to be awakened.  If we release the
366                          * lockers first, all the waiters get awakened
367                          * needlessly.
368                          */
369                         for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
370                             lp != NULL;
371                             lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) {
372                                 lt->region->nreleases += lp->refcount;
373                                 __lock_remove_waiter(lt, sh_obj, lp,
374                                     DB_LSTAT_NOGRANT);
375                                 __lock_checklocker(lt, lp, 1);
376                         }
377
378                         for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
379                             lp != NULL;
380                             lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
381
382                                 lt->region->nreleases += lp->refcount;
383                                 SH_LIST_REMOVE(lp, locker_links, __db_lock);
384                                 SH_TAILQ_REMOVE(&sh_obj->holders, lp, links,
385                                     __db_lock);
386                                 lp->status = DB_LSTAT_FREE;
387                                 SH_TAILQ_INSERT_HEAD(&lt->region->free_locks,
388                                     lp, links, __db_lock);
389                         }
390
391                         /* Now free the object. */
392                         __lock_freeobj(lt, sh_obj);
393                         break;
394 #ifdef DEBUG
395                 case DB_LOCK_DUMP:
396                         /* Find the locker. */
397                         if ((ret = __lock_getobj(lt, locker,
398                             NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
399                                 break;
400
401                         for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
402                             lp != NULL;
403                             lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) {
404                                 __lock_printlock(lt, lp, 1);
405                                 ret = EINVAL;
406                         }
407                         if (ret == 0) {
408                                 __lock_freeobj(lt, sh_locker);
409                                 lt->region->nlockers--;
410                         }
411                         break;
412 #endif
413                 default:
414                         ret = EINVAL;
415                         break;
416                 }
417         }
418
419         if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
420                 run_dd = 1;
421                 lt->region->need_dd = 0;
422         } else
423                 run_dd = 0;
424
425         UNLOCK_LOCKREGION(lt);
426
427         if (ret == 0 && run_dd)
428                 lock_detect(lt, 0, lt->region->detect);
429
430         if (elistp && ret != 0)
431                 *elistp = &list[i - 1];
432         return (ret);
433 }
434
435 int
436 lock_get(lt, locker, flags, obj, lock_mode, lock)
437         DB_LOCKTAB *lt;
438         u_int32_t locker;
439         int flags;
440         const DBT *obj;
441         db_lockmode_t lock_mode;
442         DB_LOCK *lock;
443 {
444         struct __db_lock *lockp;
445         int ret;
446
447         /* Validate arguments. */
448         if ((ret =
449             __db_fchk(lt->dbenv, "lock_get", flags, DB_LOCK_NOWAIT)) != 0)
450                 return (ret);
451
452         LOCK_LOCKREGION(lt);
453
454         ret = __lock_validate_region(lt);
455         if (ret == 0 && (ret = __lock_get_internal(lt,
456             locker, flags, obj, lock_mode, &lockp)) == 0) {
457                 *lock = LOCK_TO_OFFSET(lt, lockp);
458                 lt->region->nrequests++;
459         }
460
461         UNLOCK_LOCKREGION(lt);
462         return (ret);
463 }
464
465 int
466 lock_put(lt, lock)
467         DB_LOCKTAB *lt;
468         DB_LOCK lock;
469 {
470         struct __db_lock *lockp;
471         int ret, run_dd;
472
473         LOCK_LOCKREGION(lt);
474
475         if ((ret = __lock_validate_region(lt)) != 0)
476                 return (ret);
477         else {
478                 lockp = OFFSET_TO_LOCK(lt, lock);
479                 ret = __lock_put_internal(lt, lockp, 0);
480         }
481
482         __lock_checklocker(lt, lockp, 0);
483
484         if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
485                 run_dd = 1;
486                 lt->region->need_dd = 0;
487         } else
488                 run_dd = 0;
489
490         UNLOCK_LOCKREGION(lt);
491
492         if (ret == 0 && run_dd)
493                 lock_detect(lt, 0, lt->region->detect);
494
495         return (ret);
496 }
497
498 int
499 lock_close(lt)
500         DB_LOCKTAB *lt;
501 {
502         int ret;
503
504         if ((ret = __db_rclose(lt->dbenv, lt->fd, lt->region)) != 0)
505                 return (ret);
506
507         /* Free lock table. */
508         free(lt);
509         return (0);
510 }
511
512 int
513 lock_unlink(path, force, dbenv)
514         const char *path;
515         int force;
516         DB_ENV *dbenv;
517 {
518         return (__db_runlink(dbenv,
519             DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, force));
520 }
521
522 /*
523  * XXX This looks like it could be void, but I'm leaving it returning
524  * an int because I think it will have to when we go through and add
525  * the appropriate error checking for the EINTR on mutexes.
526  */
527 static int
528 __lock_put_internal(lt, lockp, do_all)
529         DB_LOCKTAB *lt;
530         struct __db_lock *lockp;
531         int do_all;
532 {
533         struct __db_lock *lp_w, *lp_h, *next_waiter;
534         DB_LOCKOBJ *sh_obj;
535         int state_changed;
536
537         if (lockp->refcount == 0 || (lockp->status != DB_LSTAT_HELD &&
538             lockp->status != DB_LSTAT_WAITING) || lockp->obj == 0) {
539                 __db_err(lt->dbenv, "lock_put: invalid lock %lu",
540                     (u_long)((u_int8_t *)lockp - (u_int8_t *)lt->region));
541                 return (EINVAL);
542         }
543
544         if (do_all)
545                 lt->region->nreleases += lockp->refcount;
546         else
547                 lt->region->nreleases++;
548         if (do_all == 0 && lockp->refcount > 1) {
549                 lockp->refcount--;
550                 return (0);
551         }
552
553         /* Get the object associated with this lock. */
554         sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
555
556         /* Remove lock from locker list. */
557         SH_LIST_REMOVE(lockp, locker_links, __db_lock);
558
559         /* Remove this lock from its holders/waitlist. */
560         if (lockp->status != DB_LSTAT_HELD)
561                 __lock_remove_waiter(lt, sh_obj, lockp, DB_LSTAT_FREE);
562         else
563                 SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock);
564
565         /*
566          * We need to do lock promotion.  We also need to determine if
567          * we're going to need to run the deadlock detector again.  If
568          * we release locks, and there are waiters, but no one gets promoted,
569          * then we haven't fundamentally changed the lockmgr state, so
570          * we may still have a deadlock and we have to run again.  However,
571          * if there were no waiters, or we actually promoted someone, then
572          * we are OK and we don't have to run it immediately.
573          */
574         for (lp_w = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock),
575             state_changed = lp_w == NULL;
576             lp_w != NULL;
577             lp_w = next_waiter) {
578                 next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock);
579                 for (lp_h = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
580                     lp_h != NULL;
581                     lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) {
582                         if (CONFLICTS(lt, lp_h->mode, lp_w->mode) &&
583                             lp_h->holder != lp_w->holder)
584                                 break;
585                 }
586                 if (lp_h != NULL)       /* Found a conflict. */
587                         break;
588
589                 /* No conflict, promote the waiting lock. */
590                 SH_TAILQ_REMOVE(&sh_obj->waiters, lp_w, links, __db_lock);
591                 lp_w->status = DB_LSTAT_PENDING;
592                 SH_TAILQ_INSERT_TAIL(&sh_obj->holders, lp_w, links);
593
594                 /* Wake up waiter. */
595                 (void)__db_mutex_unlock(&lp_w->mutex, lt->fd);
596                 state_changed = 1;
597         }
598
599         /* Check if object should be reclaimed. */
600         if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) {
601                 HASHREMOVE_EL(lt->hashtab, __db_lockobj,
602                     links, sh_obj, lt->region->table_size, __lock_lhash);
603                 __db_shalloc_free(lt->mem, SH_DBT_PTR(&sh_obj->lockobj));
604                 SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, sh_obj, links,
605                     __db_lockobj);
606                 state_changed = 1;
607         }
608
609         /* Free lock. */
610         lockp->status = DB_LSTAT_FREE;
611         SH_TAILQ_INSERT_HEAD(&lt->region->free_locks, lockp, links, __db_lock);
612
613         /*
614          * If we did not promote anyone; we need to run the deadlock
615          * detector again.
616          */
617         if (state_changed == 0)
618                 lt->region->need_dd = 1;
619
620         return (0);
621 }
622
623 static int
624 __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
625         DB_LOCKTAB *lt;
626         u_int32_t locker;
627         int flags;
628         const DBT *obj;
629         db_lockmode_t lock_mode;
630         struct __db_lock **lockp;
631 {
632         struct __db_lock *newl, *lp;
633         DB_LOCKOBJ *sh_obj, *sh_locker;
634         DB_LOCKREGION *lrp;
635         size_t newl_off;
636         int ret;
637
638         ret = 0;
639         /*
640          * Check that lock mode is valid.
641          */
642
643         lrp = lt->region;
644         if ((u_int32_t)lock_mode >= lrp->nmodes) {
645                 __db_err(lt->dbenv,
646                     "lock_get: invalid lock mode %lu\n", (u_long)lock_mode);
647                 return (EINVAL);
648         }
649
650         /* Allocate a new lock.  Optimize for the common case of a grant. */
651         if ((newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock)) == NULL) {
652                 if ((ret = __lock_grow_region(lt, DB_LOCK_LOCK, 0)) != 0)
653                         return (ret);
654                 lrp = lt->region;
655                 newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
656         }
657         newl_off = LOCK_TO_OFFSET(lt, newl);
658
659         /* Optimize for common case of granting a lock. */
660         SH_TAILQ_REMOVE(&lrp->free_locks, newl, links, __db_lock);
661
662         newl->mode = lock_mode;
663         newl->status = DB_LSTAT_HELD;
664         newl->holder = locker;
665         newl->refcount = 1;
666
667         if ((ret =
668             __lock_getobj(lt, 0, (DBT *)obj, DB_LOCK_OBJTYPE, &sh_obj)) != 0)
669                 return (ret);
670
671         lrp = lt->region;                       /* getobj might have grown */
672         newl = OFFSET_TO_LOCK(lt, newl_off);
673
674         /* Now make new lock point to object */
675         newl->obj = SH_PTR_TO_OFF(newl, sh_obj);
676
677         /*
678          * Now we have a lock and an object and we need to see if we should
679          * grant the lock.  We use a FIFO ordering so we can only grant a
680          * new lock if it does not conflict with anyone on the holders list
681          * OR anyone on the waiters list.  The reason that we don't grant if
682          * there's a conflict is that this can lead to starvation (a writer
683          * waiting on a popularly read item will never ben granted).  The
684          * downside of this is that a waiting reader can prevent an upgrade
685          * from reader to writer, which is not uncommon.  In case of conflict,
686          * we put the new lock on the end of the waiters list.
687          */
688         for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
689             lp != NULL;
690             lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
691                 if (CONFLICTS(lt, lp->mode, lock_mode) &&
692                     locker != lp->holder)
693                         break;
694                 else if (lp->holder == locker && lp->mode == lock_mode &&
695                     lp->status == DB_LSTAT_HELD) {
696                         /* Lock is already held, just inc the ref count. */
697                         lp->refcount++;
698                         SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
699                             __db_lock);
700                         *lockp = lp;
701                         return (0);
702                 }
703         }
704
705         if (lp == NULL)
706                 for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
707                     lp != NULL;
708                     lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
709                         if (CONFLICTS(lt, lp->mode, lock_mode) &&
710                             locker != lp->holder)
711                                 break;
712                 }
713         if (lp == NULL)
714                 SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links);
715         else if (!(flags & DB_LOCK_NOWAIT))
716                 SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links);
717         else {
718                 /* Free the lock and return an error. */
719                 newl->status = DB_LSTAT_FREE;
720                 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock);
721                 return (DB_LOCK_NOTGRANTED);
722         }
723
724         /*
725          * This is really a blocker for the process, so initialize it
726          * set.  That way the current process will block when it tries
727          * to get it and the waking process will release it.
728          */
729         (void)__db_mutex_init(&newl->mutex,
730             MUTEX_LOCK_OFFSET(lt->region, &newl->mutex));
731         (void)__db_mutex_lock(&newl->mutex, lt->fd,
732             lt->dbenv == NULL ? NULL : lt->dbenv->db_yield);
733
734         /*
735          * Now, insert the lock onto its locker's list.
736          */
737         if ((ret =
738             __lock_getobj(lt, locker, NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
739                 return (ret);
740
741         lrp = lt->region;
742         SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock);
743
744         if (lp != NULL) {
745                 newl->status = DB_LSTAT_WAITING;
746                 lrp->nconflicts++;
747                 /*
748                  * We are about to wait; must release the region mutex.
749                  * Then, when we wakeup, we need to reacquire the region
750                  * mutex before continuing.
751                  */
752                 if (lrp->detect == DB_LOCK_NORUN)
753                         lt->region->need_dd = 1;
754                 UNLOCK_LOCKREGION(lt);
755
756                 /*
757                  * We are about to wait; before waiting, see if the deadlock
758                  * detector should be run.
759                  */
760                 if (lrp->detect != DB_LOCK_NORUN)
761                         ret = lock_detect(lt, 0, lrp->detect);
762
763                 (void)__db_mutex_lock(&newl->mutex,
764                     lt->fd, lt->dbenv == NULL ? NULL : lt->dbenv->db_yield);
765
766                 LOCK_LOCKREGION(lt);
767                 if (newl->status != DB_LSTAT_PENDING) {
768                         /* Return to free list. */
769                         __lock_checklocker(lt, newl, 0);
770                         SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
771                             __db_lock);
772                         switch (newl->status) {
773                                 case DB_LSTAT_ABORTED:
774                                         ret = DB_LOCK_DEADLOCK;
775                                         break;
776                                 case DB_LSTAT_NOGRANT:
777                                         ret = DB_LOCK_NOTGRANTED;
778                                         break;
779                                 default:
780                                         ret = EINVAL;
781                                         break;
782                         }
783                         newl->status = DB_LSTAT_FREE;
784                         newl = NULL;
785                 } else
786                         newl->status = DB_LSTAT_HELD;
787         }
788
789         *lockp = newl;
790         return (ret);
791 }
792
793 /*
794  * This is called at every interface to verify if the region
795  * has changed size, and if so, to remap the region in and
796  * reset the process pointers.
797  */
798 static int
799 __lock_validate_region(lt)
800         DB_LOCKTAB *lt;
801 {
802         int ret;
803
804         if (lt->reg_size == lt->region->hdr.size)
805                 return (0);
806
807         /* Grow the region. */
808         if ((ret = __db_rremap(lt->dbenv, lt->region,
809             lt->reg_size, lt->region->hdr.size, lt->fd, &lt->region)) != 0)
810                 return (ret);
811
812         __lock_reset_region(lt);
813
814         return (0);
815 }
816
817 /*
818  * We have run out of space; time to grow the region.
819  */
820 static int
821 __lock_grow_region(lt, which, howmuch)
822         DB_LOCKTAB *lt;
823         int which;
824         size_t howmuch;
825 {
826         struct __db_lock *newl;
827         struct lock_header *lock_head;
828         struct obj_header *obj_head;
829         DB_LOCKOBJ *op;
830         DB_LOCKREGION *lrp;
831         float lock_ratio, obj_ratio;
832         size_t incr, oldsize, used;
833         u_int32_t i, newlocks, newmem, newobjs;
834         int ret, usedlocks, usedmem, usedobjs;
835         u_int8_t *curaddr;
836
837         lrp = lt->region;
838         oldsize = lrp->hdr.size;
839         incr = lrp->increment;
840
841         /* Figure out how much of each sort of space we have. */
842         usedmem = lrp->mem_bytes - __db_shalloc_count(lt->mem);
843         usedobjs = lrp->numobjs - __lock_count_objs(lrp);
844         usedlocks = lrp->maxlocks - __lock_count_locks(lrp);
845
846         /*
847          * Figure out what fraction of the used space belongs to each
848          * different type of "thing" in the region.  Then partition the
849          * new space up according to this ratio.
850          */
851         used = usedmem +
852             usedlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) +
853             usedobjs * sizeof(DB_LOCKOBJ);
854
855         lock_ratio = usedlocks *
856             ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) / (float)used;
857         obj_ratio = usedobjs * sizeof(DB_LOCKOBJ) / (float)used;
858
859         newlocks = (u_int32_t)(lock_ratio *
860             incr / ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
861         newobjs = (u_int32_t)(obj_ratio * incr / sizeof(DB_LOCKOBJ));
862         newmem = incr -
863             (newobjs * sizeof(DB_LOCKOBJ) +
864             newlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
865
866         /*
867          * Make sure we allocate enough memory for the object being
868          * requested.
869          */
870         switch (which) {
871                 case DB_LOCK_LOCK:
872                         if (newlocks == 0) {
873                                 newlocks = 10;
874                                 incr += newlocks * sizeof(struct __db_lock);
875                         }
876                         break;
877                 case DB_LOCK_OBJ:
878                         if (newobjs == 0) {
879                                 newobjs = 10;
880                                 incr += newobjs * sizeof(DB_LOCKOBJ);
881                         }
882                         break;
883                 case DB_LOCK_MEM:
884                         if (newmem < howmuch * 2) {
885                                 incr += howmuch * 2 - newmem;
886                                 newmem = howmuch * 2;
887                         }
888                         break;
889         }
890
891         newmem += ALIGN(incr, sizeof(size_t)) - incr;
892         incr = ALIGN(incr, sizeof(size_t));
893
894         /*
895          * Since we are going to be allocating locks at the beginning of the
896          * new chunk, we need to make sure that the chunk is MUTEX_ALIGNMENT
897          * aligned.  We did not guarantee this when we created the region, so
898          * we may need to pad the old region by extra bytes to ensure this
899          * alignment.
900          */
901         incr += ALIGN(oldsize, MUTEX_ALIGNMENT) - oldsize;
902
903         __db_err(lt->dbenv,
904             "Growing lock region: %lu locks %lu objs %lu bytes",
905             (u_long)newlocks, (u_long)newobjs, (u_long)newmem);
906
907         if ((ret = __db_rgrow(lt->dbenv, lt->fd, incr)) != 0)
908                 return (ret);
909         if ((ret = __db_rremap(lt->dbenv,
910             lt->region, oldsize, oldsize + incr, lt->fd, &lt->region)) != 0)
911                 return (ret);
912         __lock_reset_region(lt);
913
914         /* Update region parameters. */
915         lrp = lt->region;
916         lrp->increment = incr << 1;
917         lrp->maxlocks += newlocks;
918         lrp->numobjs += newobjs;
919         lrp->mem_bytes += newmem;
920
921         curaddr = (u_int8_t *)lrp + oldsize;
922         curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
923
924         /* Put new locks onto the free list. */
925         lock_head = &lrp->free_locks;
926         for (i = 0; i++ < newlocks;
927             curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
928                 newl = (struct __db_lock *)curaddr;
929                 SH_TAILQ_INSERT_HEAD(lock_head, newl, links, __db_lock);
930         }
931
932         /* Put new objects onto the free list.  */
933         obj_head = &lrp->free_objs;
934         for (i = 0; i++ < newobjs; curaddr += sizeof(DB_LOCKOBJ)) {
935                 op = (DB_LOCKOBJ *)curaddr;
936                 SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
937         }
938
939         *((size_t *)curaddr) = newmem - sizeof(size_t);
940         curaddr += sizeof(size_t);
941         __db_shalloc_free(lt->mem, curaddr);
942
943         return (0);
944 }
945
946 #ifdef DEBUG
947 void
948 __lock_dump_region(lt, flags)
949         DB_LOCKTAB *lt;
950         unsigned long flags;
951 {
952         struct __db_lock *lp;
953         DB_LOCKOBJ *op;
954         DB_LOCKREGION *lrp;
955         u_int32_t i, j;
956
957         lrp = lt->region;
958
959         printf("Lock region parameters\n");
960         printf("%s:0x%x\t%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\t\n",
961             "magic      ", lrp->magic,
962             "version    ", (u_long)lrp->version,
963             "processes  ", (u_long)lrp->hdr.refcnt,
964             "maxlocks   ", (u_long)lrp->maxlocks,
965             "table size ", (u_long)lrp->table_size,
966             "nmodes     ", (u_long)lrp->nmodes,
967             "numobjs    ", (u_long)lrp->numobjs);
968         printf("%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\n",
969             "size       ", (u_long)lrp->hdr.size,
970             "nlockers   ", (u_long)lrp->nlockers,
971             "hash_off   ", (u_long)lrp->hash_off,
972             "increment  ", (u_long)lrp->increment,
973             "mem_off    ", (u_long)lrp->mem_off,
974             "mem_bytes  ", (u_long)lrp->mem_bytes);
975 #ifndef HAVE_SPINLOCKS
976         printf("Mutex: off %lu", (u_long)lrp->hdr.lock.off);
977 #endif
978 #ifdef MUTEX_STATISTICS
979         printf(" waits %lu nowaits %lu",
980             (u_long)lrp->hdr.lock.mutex_set_wait,
981             (u_long)lrp->hdr.lock.mutex_set_nowait);
982 #endif
983         printf("\n%s:%lu\t%s:%lu\t%s:%lu\t%s:%lu\n",
984             "nconflicts ", (u_long)lrp->nconflicts,
985             "nrequests  ", (u_long)lrp->nrequests,
986             "nreleases  ", (u_long)lrp->nreleases,
987             "ndeadlocks ", (u_long)lrp->ndeadlocks);
988         printf("need_dd    %lu\n", (u_long)lrp->need_dd);
989         if (flags & LOCK_DEBUG_CONF) {
990                 printf("\nConflict matrix\n");
991
992                 for (i = 0; i < lrp->nmodes; i++) {
993                         for (j = 0; j < lrp->nmodes; j++)
994                                 printf("%lu\t",
995                                     (u_long)lt->conflicts[i * lrp->nmodes + j]);
996                         printf("\n");
997                 }
998         }
999
1000         for (i = 0; i < lrp->table_size; i++) {
1001                 op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
1002                 if (op != NULL && flags & LOCK_DEBUG_BUCKET)
1003                         printf("Bucket %lu:\n", (unsigned long)i);
1004                 while (op != NULL) {
1005                         if (op->type == DB_LOCK_LOCKER &&
1006                             flags & LOCK_DEBUG_LOCKERS)
1007                                 __lock_dump_locker(lt, op);
1008                         else if (flags & LOCK_DEBUG_OBJECTS &&
1009                             op->type == DB_LOCK_OBJTYPE)
1010                                 __lock_dump_object(lt, op);
1011                         op = SH_TAILQ_NEXT(op, links, __db_lockobj);
1012                 }
1013         }
1014
1015         if (flags & LOCK_DEBUG_LOCK) {
1016                 printf("\nLock Free List\n");
1017                 for (lp = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
1018                     lp != NULL;
1019                     lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
1020                         printf("0x%x: %lu\t%lu\t%lu\t0x%x\n", (u_int)lp,
1021                             (u_long)lp->holder, (u_long)lp->mode,
1022                             (u_long)lp->status, (u_int)lp->obj);
1023                 }
1024         }
1025
1026         if (flags & LOCK_DEBUG_LOCK) {
1027                 printf("\nObject Free List\n");
1028                 for (op = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
1029                     op != NULL;
1030                     op = SH_TAILQ_NEXT(op, links, __db_lockobj))
1031                         printf("0x%x\n", (u_int)op);
1032         }
1033
1034         if (flags & LOCK_DEBUG_MEM) {
1035                 printf("\nMemory Free List\n");
1036                 __db_shalloc_dump(stdout, lt->mem);
1037         }
1038 }
1039
1040 static void
1041 __lock_dump_locker(lt, op)
1042         DB_LOCKTAB *lt;
1043         DB_LOCKOBJ *op;
1044 {
1045         struct __db_lock *lp;
1046         u_int32_t locker;
1047         void *ptr;
1048
1049         ptr = SH_DBT_PTR(&op->lockobj);
1050         memcpy(&locker, ptr, sizeof(u_int32_t));
1051         printf("L %lx", (u_long)locker);
1052
1053         lp = SH_LIST_FIRST(&op->heldby, __db_lock);
1054         if (lp == NULL) {
1055                 printf("\n");
1056                 return;
1057         }
1058         for (; lp != NULL; lp = SH_LIST_NEXT(lp, locker_links, __db_lock))
1059                 __lock_printlock(lt, lp, 0);
1060 }
1061
1062 static void
1063 __lock_dump_object(lt, op)
1064         DB_LOCKTAB *lt;
1065         DB_LOCKOBJ *op;
1066 {
1067         struct __db_lock *lp;
1068         u_int32_t j;
1069         char *ptr;
1070
1071         ptr = SH_DBT_PTR(&op->lockobj);
1072         for (j = 0; j < op->lockobj.size; ptr++, j++)
1073                 printf("%c", (int)*ptr);
1074         printf("\n");
1075
1076         printf("H:");
1077         for (lp =
1078             SH_TAILQ_FIRST(&op->holders, __db_lock);
1079             lp != NULL;
1080             lp = SH_TAILQ_NEXT(lp, links, __db_lock))
1081                 __lock_printlock(lt, lp, 0);
1082         lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
1083         if (lp != NULL) {
1084                 printf("\nW:");
1085                 for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock))
1086                         __lock_printlock(lt, lp, 0);
1087         }
1088 }
1089
1090 int
1091 __lock_is_locked(lt, locker, dbt, mode)
1092         DB_LOCKTAB *lt;
1093         u_int32_t locker;
1094         DBT *dbt;
1095         db_lockmode_t mode;
1096 {
1097         struct __db_lock *lp;
1098         DB_LOCKOBJ *sh_obj;
1099         DB_LOCKREGION *lrp;
1100
1101         lrp = lt->region;
1102
1103         /* Look up the object in the hash table. */
1104         HASHLOOKUP(lt->hashtab, __db_lockobj, links,
1105             dbt, sh_obj, lrp->table_size, __lock_ohash, __lock_cmp);
1106         if (sh_obj == NULL)
1107                 return (0);
1108
1109         for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
1110             lp != NULL;
1111             lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
1112                 if (lp->holder == locker && lp->mode == mode)
1113                         return (1);
1114         }
1115
1116         return (0);
1117 }
1118
1119 static void
1120 __lock_printlock(lt, lp, ispgno)
1121         DB_LOCKTAB *lt;
1122         struct __db_lock *lp;
1123         int ispgno;
1124 {
1125         DB_LOCKOBJ *lockobj;
1126         db_pgno_t pgno;
1127         size_t obj;
1128         u_int8_t *ptr;
1129         char *mode, *stat;
1130
1131         switch (lp->mode) {
1132         case DB_LOCK_IREAD:
1133                 mode = "IREAD";
1134                 break;
1135         case DB_LOCK_IWR:
1136                 mode = "IWR";
1137                 break;
1138         case DB_LOCK_IWRITE:
1139                 mode = "IWRITE";
1140                 break;
1141         case DB_LOCK_NG:
1142                 mode = "NG";
1143                 break;
1144         case DB_LOCK_READ:
1145                 mode = "READ";
1146                 break;
1147         case DB_LOCK_WRITE:
1148                 mode = "WRITE";
1149                 break;
1150         default:
1151                 mode = "UNKNOWN";
1152                 break;
1153         }
1154         switch (lp->status) {
1155         case DB_LSTAT_ABORTED:
1156                 stat = "ABORT";
1157                 break;
1158         case DB_LSTAT_ERR:
1159                 stat = "ERROR";
1160                 break;
1161         case DB_LSTAT_FREE:
1162                 stat = "FREE";
1163                 break;
1164         case DB_LSTAT_HELD:
1165                 stat = "HELD";
1166                 break;
1167         case DB_LSTAT_NOGRANT:
1168                 stat = "NONE";
1169                 break;
1170         case DB_LSTAT_WAITING:
1171                 stat = "WAIT";
1172                 break;
1173         case DB_LSTAT_PENDING:
1174                 stat = "PENDING";
1175                 break;
1176         default:
1177                 stat = "UNKNOWN";
1178                 break;
1179         }
1180         printf("\t%lx\t%s\t%lu\t%s\t",
1181             (u_long)lp->holder, mode, (u_long)lp->refcount, stat);
1182
1183         lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
1184         ptr = SH_DBT_PTR(&lockobj->lockobj);
1185         if (ispgno) {
1186                 /* Assume this is a DBT lock. */
1187                 memcpy(&pgno, ptr, sizeof(db_pgno_t));
1188                 printf("page %lu\n", (u_long)pgno);
1189         } else {
1190                 obj = (u_int8_t *)lp + lp->obj - (u_int8_t *)lt->region;
1191                 printf("0x%lx ", (u_long)obj);
1192                 __db_pr(ptr, lockobj->lockobj.size);
1193                 printf("\n");
1194         }
1195 }
1196
1197 #endif
1198
1199 static int
1200 __lock_count_locks(lrp)
1201         DB_LOCKREGION *lrp;
1202 {
1203         struct __db_lock *newl;
1204         int count;
1205
1206         count = 0;
1207         for (newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
1208             newl != NULL;
1209             newl = SH_TAILQ_NEXT(newl, links, __db_lock))
1210                 count++;
1211
1212         return (count);
1213 }
1214
1215 static int
1216 __lock_count_objs(lrp)
1217         DB_LOCKREGION *lrp;
1218 {
1219         DB_LOCKOBJ *obj;
1220         int count;
1221
1222         count = 0;
1223         for (obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
1224             obj != NULL;
1225             obj = SH_TAILQ_NEXT(obj, links, __db_lockobj))
1226                 count++;
1227
1228         return (count);
1229 }
1230
1231 /*
1232  * PUBLIC: int __lock_getobj  __P((DB_LOCKTAB *,
1233  * PUBLIC:     u_int32_t, DBT *, u_int32_t type, DB_LOCKOBJ **));
1234  */
1235 int
1236 __lock_getobj(lt, locker, dbt, type, objp)
1237         DB_LOCKTAB *lt;
1238         u_int32_t locker, type;
1239         DBT *dbt;
1240         DB_LOCKOBJ **objp;
1241 {
1242         DB_LOCKREGION *lrp;
1243         DB_LOCKOBJ *sh_obj;
1244         u_int32_t obj_size;
1245         int ret;
1246         void *p, *src;
1247
1248         lrp = lt->region;
1249
1250         /* Look up the object in the hash table. */
1251         if (type == DB_LOCK_OBJTYPE) {
1252                 HASHLOOKUP(lt->hashtab, __db_lockobj, links, dbt, sh_obj,
1253                     lrp->table_size, __lock_ohash, __lock_cmp);
1254                 obj_size = dbt->size;
1255         } else {
1256                 HASHLOOKUP(lt->hashtab, __db_lockobj, links, locker,
1257                     sh_obj, lrp->table_size, __lock_locker_hash,
1258                     __lock_locker_cmp);
1259                 obj_size = sizeof(locker);
1260         }
1261
1262         /*
1263          * If we found the object, then we can just return it.  If
1264          * we didn't find the object, then we need to create it.
1265          */
1266         if (sh_obj == NULL) {
1267                 /* Create new object and then insert it into hash table. */
1268                 if ((sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj))
1269                     == NULL) {
1270                         if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0)
1271                                 return (ret);
1272                         lrp = lt->region;
1273                         sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
1274                 }
1275                 if ((ret = __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
1276                         if ((ret = __lock_grow_region(lt,
1277                             DB_LOCK_MEM, obj_size)) != 0)
1278                                 return (ret);
1279                         lrp = lt->region;
1280                         /* Reacquire the head of the list. */
1281                         sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
1282                         (void)__db_shalloc(lt->mem, obj_size, 0, &p);
1283                 }
1284                 sh_obj->type = type;
1285                 src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker;
1286                 memcpy(p, src, obj_size);
1287                 SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj);
1288
1289                 SH_TAILQ_INIT(&sh_obj->waiters);
1290                 if (type == DB_LOCK_LOCKER)
1291                         SH_LIST_INIT(&sh_obj->heldby);
1292                 else
1293                         SH_TAILQ_INIT(&sh_obj->holders);
1294                 sh_obj->lockobj.size = obj_size;
1295                 sh_obj->lockobj.off = SH_PTR_TO_OFF(&sh_obj->lockobj, p);
1296
1297                 HASHINSERT(lt->hashtab,
1298                     __db_lockobj, links, sh_obj, lrp->table_size, __lock_lhash);
1299
1300                 if (type == DB_LOCK_LOCKER)
1301                         lrp->nlockers++;
1302         }
1303
1304         *objp = sh_obj;
1305         return (0);
1306 }
1307
1308 /*
1309  * Any lock on the waitlist has a process waiting for it.  Therefore, we
1310  * can't return the lock to the freelist immediately.  Instead, we can
1311  * remove the lock from the list of waiters, set the status field of the
1312  * lock, and then let the process waking up return the lock to the
1313  * free list.
1314  */
1315 static void
1316 __lock_remove_waiter(lt, sh_obj, lockp, status)
1317         DB_LOCKTAB *lt;
1318         DB_LOCKOBJ *sh_obj;
1319         struct __db_lock *lockp;
1320         db_status_t status;
1321 {
1322         SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
1323         lockp->status = status;
1324
1325         /* Wake whoever is waiting on this lock. */
1326         (void)__db_mutex_unlock(&lockp->mutex, lt->fd);
1327 }
1328
1329 static void
1330 __lock_freeobj(lt, obj)
1331         DB_LOCKTAB *lt;
1332         DB_LOCKOBJ *obj;
1333 {
1334         HASHREMOVE_EL(lt->hashtab,
1335             __db_lockobj, links, obj, lt->region->table_size, __lock_lhash);
1336         __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
1337         SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, obj, links, __db_lockobj);
1338 }
1339
1340 static void
1341 __lock_checklocker(lt, lockp, do_remove)
1342         DB_LOCKTAB *lt;
1343         struct __db_lock *lockp;
1344         int do_remove;
1345 {
1346         DB_LOCKOBJ *sh_locker;
1347
1348         if (do_remove)
1349                 SH_LIST_REMOVE(lockp, locker_links, __db_lock);
1350
1351         /* if the locker list is NULL, free up the object. */
1352         if (__lock_getobj(lt, lockp->holder, NULL, DB_LOCK_LOCKER, &sh_locker)
1353             == 0 && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL) {
1354                 __lock_freeobj(lt, sh_locker);
1355                 lt->region->nlockers--;
1356         }
1357 }
1358
1359 static void
1360 __lock_reset_region(lt)
1361         DB_LOCKTAB *lt;
1362 {
1363         lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
1364         lt->hashtab =
1365             (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
1366         lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
1367         lt->reg_size = lt->region->hdr.size;
1368 }