Update from db-2.3.16.
[kopensolaris-gnu/glibc.git] / db2 / lock / lock.c
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997
5  *      Sleepycat Software.  All rights reserved.
6  */
7
8 #include "config.h"
9
10 #ifndef lint
11 static const char sccsid[] = "@(#)lock.c        10.43 (Sleepycat) 1/8/98";
12 #endif /* not lint */
13
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stddef.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 #endif
27
28 #include "db_int.h"
29 #include "shqueue.h"
30 #include "db_page.h"
31 #include "db_shash.h"
32 #include "lock.h"
33 #include "common_ext.h"
34 #include "db_am.h"
35
36 static void __lock_checklocker __P((DB_LOCKTAB *, struct __db_lock *, int));
37 static int  __lock_count_locks __P((DB_LOCKREGION *));
38 static int  __lock_count_objs __P((DB_LOCKREGION *));
39 static int  __lock_create __P((const char *, int, DB_ENV *));
40 static void __lock_freeobj __P((DB_LOCKTAB *, DB_LOCKOBJ *));
41 static int  __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, int, const DBT *,
42     db_lockmode_t, struct __db_lock **));
43 static int  __lock_grow_region __P((DB_LOCKTAB *, int, size_t));
44 static int  __lock_put_internal __P((DB_LOCKTAB *, struct __db_lock *, int));
45 static void __lock_remove_waiter
46     __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t));
47 static void __lock_reset_region __P((DB_LOCKTAB *));
48 static int  __lock_validate_region __P((DB_LOCKTAB *));
49 #ifdef DEBUG
50 static void __lock_dump_locker __P((DB_LOCKTAB *, DB_LOCKOBJ *));
51 static void __lock_dump_object __P((DB_LOCKTAB *, DB_LOCKOBJ *));
52 static void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
53 #endif
54
55 /*
56  * Create and initialize a lock region in shared memory.
57  */
58
59 /*
60  * __lock_create --
61  *      Create the lock region.  Returns an errno.  In most cases,
62  * the errno should be that returned by __db_ropen, in which case
63  * an EAGAIN means that we should retry, and an EEXIST means that
64  * the region exists and we didn't need to create it.  Any other
65  * sort of errno should be treated as a system error, leading to a
66  * failure of the original interface call.
67  */
68 static int
69 __lock_create(path, mode, dbenv)
70         const char *path;
71         int mode;
72         DB_ENV *dbenv;
73 {
74         struct __db_lock *lp;
75         struct lock_header *tq_head;
76         struct obj_header *obj_head;
77         DB_LOCKOBJ *op;
78         DB_LOCKREGION *lrp;
79         u_int maxlocks;
80         u_int32_t i;
81         int fd, lock_modes, nelements, ret;
82         const u_int8_t *conflicts;
83         u_int8_t *curaddr;
84
85         maxlocks = dbenv == NULL || dbenv->lk_max == 0 ?
86             DB_LOCK_DEFAULT_N : dbenv->lk_max;
87         lock_modes = dbenv == NULL || dbenv->lk_modes == 0 ?
88             DB_LOCK_RW_N : dbenv->lk_modes;
89         conflicts = dbenv == NULL || dbenv->lk_conflicts == NULL ?
90             db_rw_conflicts : dbenv->lk_conflicts;
91
92         if ((ret =
93             __db_rcreate(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, mode,
94             LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks)),
95             0, &fd, &lrp)) != 0)
96                 return (ret);
97
98         /* Region exists; now initialize it. */
99         lrp->table_size = __db_tablesize(maxlocks);
100         lrp->magic = DB_LOCKMAGIC;
101         lrp->version = DB_LOCKVERSION;
102         lrp->id = 0;
103         lrp->maxlocks = maxlocks;
104         lrp->need_dd = 0;
105         lrp->detect = DB_LOCK_NORUN;
106         lrp->numobjs = maxlocks;
107         lrp->nlockers = 0;
108         lrp->mem_bytes = ALIGN(STRING_SIZE(maxlocks), sizeof(size_t));
109         lrp->increment = lrp->hdr.size / 2;
110         lrp->nmodes = lock_modes;
111         lrp->nconflicts = 0;
112         lrp->nrequests = 0;
113         lrp->nreleases = 0;
114         lrp->ndeadlocks = 0;
115
116         /*
117          * As we write the region, we've got to maintain the alignment
118          * for the structures that follow each chunk.  This information
119          * ends up being encapsulated both in here as well as in the
120          * lock.h file for the XXX_SIZE macros.
121          */
122         /* Initialize conflict matrix. */
123         curaddr = (u_int8_t *)lrp + sizeof(DB_LOCKREGION);
124         memcpy(curaddr, conflicts, lock_modes * lock_modes);
125         curaddr += lock_modes * lock_modes;
126
127         /*
128          * Initialize hash table.
129          */
130         curaddr = (u_int8_t *)ALIGNP(curaddr, LOCK_HASH_ALIGN);
131         lrp->hash_off = curaddr - (u_int8_t *)lrp;
132         nelements = lrp->table_size;
133         __db_hashinit(curaddr, nelements);
134         curaddr += nelements * sizeof(DB_HASHTAB);
135
136         /*
137          * Initialize locks onto a free list. Since locks contains mutexes,
138          * we need to make sure that each lock is aligned on a MUTEX_ALIGNMENT
139          * boundary.
140          */
141         curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
142         tq_head = &lrp->free_locks;
143         SH_TAILQ_INIT(tq_head);
144
145         for (i = 0; i++ < maxlocks;
146             curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
147                 lp = (struct __db_lock *)curaddr;
148                 lp->status = DB_LSTAT_FREE;
149                 SH_TAILQ_INSERT_HEAD(tq_head, lp, links, __db_lock);
150         }
151
152         /* Initialize objects onto a free list.  */
153         obj_head = &lrp->free_objs;
154         SH_TAILQ_INIT(obj_head);
155
156         for (i = 0; i++ < maxlocks; curaddr += sizeof(DB_LOCKOBJ)) {
157                 op = (DB_LOCKOBJ *)curaddr;
158                 SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
159         }
160
161         /*
162          * Initialize the string space; as for all shared memory allocation
163          * regions, this requires size_t alignment, since we store the
164          * lengths of malloc'd areas in the area..
165          */
166         curaddr = (u_int8_t *)ALIGNP(curaddr, sizeof(size_t));
167         lrp->mem_off = curaddr - (u_int8_t *)lrp;
168         __db_shalloc_init(curaddr, lrp->mem_bytes);
169
170         /* Release the lock. */
171         (void)__db_mutex_unlock(&lrp->hdr.lock, fd);
172
173         /* Now unmap the region. */
174         if ((ret = __db_rclose(dbenv, fd, lrp)) != 0) {
175                 (void)lock_unlink(path, 1 /* force */, dbenv);
176                 return (ret);
177         }
178
179         return (0);
180 }
181
182 int
183 lock_open(path, flags, mode, dbenv, ltp)
184         const char *path;
185         int flags, mode;
186         DB_ENV *dbenv;
187         DB_LOCKTAB **ltp;
188 {
189         DB_LOCKTAB *lt;
190         int ret, retry_cnt;
191
192         /* Validate arguments. */
193 #ifdef HAVE_SPINLOCKS
194 #define OKFLAGS (DB_CREATE | DB_THREAD)
195 #else
196 #define OKFLAGS (DB_CREATE)
197 #endif
198         if ((ret = __db_fchk(dbenv, "lock_open", flags, OKFLAGS)) != 0)
199                 return (ret);
200
201         /*
202          * Create the lock table structure.
203          */
204         if ((lt = (DB_LOCKTAB *)__db_calloc(1, sizeof(DB_LOCKTAB))) == NULL) {
205                 __db_err(dbenv, "%s", strerror(ENOMEM));
206                 return (ENOMEM);
207         }
208         lt->dbenv = dbenv;
209
210         /*
211          * Now, create the lock region if it doesn't already exist.
212          */
213         retry_cnt = 0;
214 retry:  if (LF_ISSET(DB_CREATE) &&
215             (ret = __lock_create(path, mode, dbenv)) != 0)
216                 if (ret == EAGAIN && ++retry_cnt < 3) {
217                         (void)__db_sleep(1, 0);
218                         goto retry;
219                 } else if (ret == EEXIST) /* We did not create the region */
220                         LF_CLR(DB_CREATE);
221                 else
222                         goto out;
223
224         /*
225          * Finally, open the region, map it in, and increment the
226          * reference count.
227          */
228         retry_cnt = 0;
229 retry1: if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE,
230             LF_ISSET(~(DB_CREATE | DB_THREAD)), &lt->fd, &lt->region)) != 0) {
231                 if (ret == EAGAIN && ++retry_cnt < 3) {
232                         (void)__db_sleep(1, 0);
233                         goto retry1;
234                 }
235                 goto out;
236          }
237
238         if (lt->region->magic != DB_LOCKMAGIC) {
239                 __db_err(dbenv, "lock_open: Bad magic number");
240                 ret = EINVAL;
241                 goto out;
242         }
243
244         /* Check for automatic deadlock detection. */
245         if (dbenv->lk_detect != DB_LOCK_NORUN) {
246                 if (lt->region->detect != DB_LOCK_NORUN &&
247                     dbenv->lk_detect != DB_LOCK_DEFAULT &&
248                     lt->region->detect != dbenv->lk_detect) {
249                         __db_err(dbenv,
250                             "lock_open: incompatible deadlock detector mode");
251                         ret = EINVAL;
252                         goto out;
253                 }
254                 if (lt->region->detect == DB_LOCK_NORUN)
255                         lt->region->detect = dbenv->lk_detect;
256         }
257
258         /* Set up remaining pointers into region. */
259         lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
260         lt->hashtab =
261             (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
262         lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
263         lt->reg_size = lt->region->hdr.size;
264
265         *ltp = lt;
266         return (0);
267
268 /* Error handling. */
269 out:    if (lt->region != NULL)
270                 (void)__db_rclose(lt->dbenv, lt->fd, lt->region);
271         if (LF_ISSET(DB_CREATE))
272                 (void)lock_unlink(path, 1, lt->dbenv);
273         __db_free(lt);
274         return (ret);
275 }
276
277 int
278 lock_id (lt, idp)
279         DB_LOCKTAB *lt;
280         u_int32_t *idp;
281 {
282         u_int32_t id;
283
284         LOCK_LOCKREGION(lt);
285         if (lt->region->id >= DB_LOCK_MAXID)
286                 lt->region->id = 0;
287         id = ++lt->region->id;
288         UNLOCK_LOCKREGION(lt);
289
290         *idp = id;
291         return (0);
292 }
293
294 int
295 lock_vec(lt, locker, flags, list, nlist, elistp)
296         DB_LOCKTAB *lt;
297         u_int32_t locker;
298         int flags, nlist;
299         DB_LOCKREQ *list, **elistp;
300 {
301         struct __db_lock *lp;
302         DB_LOCKOBJ *sh_obj, *sh_locker;
303         int i, ret, run_dd;
304
305         /* Validate arguments. */
306         if ((ret =
307             __db_fchk(lt->dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0)
308                 return (ret);
309
310         LOCK_LOCKREGION(lt);
311
312         if ((ret = __lock_validate_region(lt)) != 0) {
313                 UNLOCK_LOCKREGION(lt);
314                 return (ret);
315         }
316
317         ret = 0;
318         for (i = 0; i < nlist && ret == 0; i++) {
319                 switch (list[i].op) {
320                 case DB_LOCK_GET:
321                         ret = __lock_get_internal(lt, locker, flags,
322                             list[i].obj, list[i].mode, &lp);
323                         if (ret == 0) {
324                                 list[i].lock = LOCK_TO_OFFSET(lt, lp);
325                                 lt->region->nrequests++;
326                         }
327                         break;
328                 case DB_LOCK_PUT:
329                         lp = OFFSET_TO_LOCK(lt, list[i].lock);
330                         if (lp->holder != locker) {
331                                 ret = DB_LOCK_NOTHELD;
332                                 break;
333                         }
334                         list[i].mode = lp->mode;
335
336                         /* XXX Need to copy the object. ??? */
337                         ret = __lock_put_internal(lt, lp, 0);
338                         break;
339                 case DB_LOCK_PUT_ALL:
340                         /* Find the locker. */
341                         if ((ret = __lock_getobj(lt, locker,
342                             NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
343                                 break;
344
345                         for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
346                             lp != NULL;
347                             lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
348                                 if ((ret = __lock_put_internal(lt, lp, 0)) != 0)
349                                         break;
350                         }
351                         __lock_freeobj(lt, sh_locker);
352                         lt->region->nlockers--;
353                         break;
354                 case DB_LOCK_PUT_OBJ:
355
356                         /* Look up the object in the hash table. */
357                         HASHLOOKUP(lt->hashtab, __db_lockobj, links,
358                             list[i].obj, sh_obj, lt->region->table_size,
359                             __lock_ohash, __lock_cmp);
360                         if (sh_obj == NULL) {
361                                 ret = EINVAL;
362                                 break;
363                         }
364                         /*
365                          * Release waiters first, because they won't cause
366                          * anyone else to be awakened.  If we release the
367                          * lockers first, all the waiters get awakened
368                          * needlessly.
369                          */
370                         for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
371                             lp != NULL;
372                             lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) {
373                                 lt->region->nreleases += lp->refcount;
374                                 __lock_remove_waiter(lt, sh_obj, lp,
375                                     DB_LSTAT_NOGRANT);
376                                 __lock_checklocker(lt, lp, 1);
377                         }
378
379                         for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
380                             lp != NULL;
381                             lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
382
383                                 lt->region->nreleases += lp->refcount;
384                                 SH_LIST_REMOVE(lp, locker_links, __db_lock);
385                                 SH_TAILQ_REMOVE(&sh_obj->holders, lp, links,
386                                     __db_lock);
387                                 lp->status = DB_LSTAT_FREE;
388                                 SH_TAILQ_INSERT_HEAD(&lt->region->free_locks,
389                                     lp, links, __db_lock);
390                         }
391
392                         /* Now free the object. */
393                         __lock_freeobj(lt, sh_obj);
394                         break;
395 #ifdef DEBUG
396                 case DB_LOCK_DUMP:
397                         /* Find the locker. */
398                         if ((ret = __lock_getobj(lt, locker,
399                             NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
400                                 break;
401
402                         for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
403                             lp != NULL;
404                             lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) {
405                                 __lock_printlock(lt, lp, 1);
406                                 ret = EINVAL;
407                         }
408                         if (ret == 0) {
409                                 __lock_freeobj(lt, sh_locker);
410                                 lt->region->nlockers--;
411                         }
412                         break;
413 #endif
414                 default:
415                         ret = EINVAL;
416                         break;
417                 }
418         }
419
420         if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
421                 run_dd = 1;
422                 lt->region->need_dd = 0;
423         } else
424                 run_dd = 0;
425
426         UNLOCK_LOCKREGION(lt);
427
428         if (ret == 0 && run_dd)
429                 lock_detect(lt, 0, lt->region->detect);
430
431         if (elistp && ret != 0)
432                 *elistp = &list[i - 1];
433         return (ret);
434 }
435
436 int
437 lock_get(lt, locker, flags, obj, lock_mode, lock)
438         DB_LOCKTAB *lt;
439         u_int32_t locker;
440         int flags;
441         const DBT *obj;
442         db_lockmode_t lock_mode;
443         DB_LOCK *lock;
444 {
445         struct __db_lock *lockp;
446         int ret;
447
448         /* Validate arguments. */
449         if ((ret =
450             __db_fchk(lt->dbenv, "lock_get", flags, DB_LOCK_NOWAIT)) != 0)
451                 return (ret);
452
453         LOCK_LOCKREGION(lt);
454
455         ret = __lock_validate_region(lt);
456         if (ret == 0 && (ret = __lock_get_internal(lt,
457             locker, flags, obj, lock_mode, &lockp)) == 0) {
458                 *lock = LOCK_TO_OFFSET(lt, lockp);
459                 lt->region->nrequests++;
460         }
461
462         UNLOCK_LOCKREGION(lt);
463         return (ret);
464 }
465
466 int
467 lock_put(lt, lock)
468         DB_LOCKTAB *lt;
469         DB_LOCK lock;
470 {
471         struct __db_lock *lockp;
472         int ret, run_dd;
473
474         LOCK_LOCKREGION(lt);
475
476         if ((ret = __lock_validate_region(lt)) != 0)
477                 return (ret);
478         else {
479                 lockp = OFFSET_TO_LOCK(lt, lock);
480                 ret = __lock_put_internal(lt, lockp, 0);
481         }
482
483         __lock_checklocker(lt, lockp, 0);
484
485         if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
486                 run_dd = 1;
487                 lt->region->need_dd = 0;
488         } else
489                 run_dd = 0;
490
491         UNLOCK_LOCKREGION(lt);
492
493         if (ret == 0 && run_dd)
494                 lock_detect(lt, 0, lt->region->detect);
495
496         return (ret);
497 }
498
499 int
500 lock_close(lt)
501         DB_LOCKTAB *lt;
502 {
503         int ret;
504
505         if ((ret = __db_rclose(lt->dbenv, lt->fd, lt->region)) != 0)
506                 return (ret);
507
508         /* Free lock table. */
509         __db_free(lt);
510         return (0);
511 }
512
513 int
514 lock_unlink(path, force, dbenv)
515         const char *path;
516         int force;
517         DB_ENV *dbenv;
518 {
519         return (__db_runlink(dbenv,
520             DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, force));
521 }
522
523 /*
524  * XXX This looks like it could be void, but I'm leaving it returning
525  * an int because I think it will have to when we go through and add
526  * the appropriate error checking for the EINTR on mutexes.
527  */
528 static int
529 __lock_put_internal(lt, lockp, do_all)
530         DB_LOCKTAB *lt;
531         struct __db_lock *lockp;
532         int do_all;
533 {
534         struct __db_lock *lp_w, *lp_h, *next_waiter;
535         DB_LOCKOBJ *sh_obj;
536         int state_changed;
537
538         if (lockp->refcount == 0 || (lockp->status != DB_LSTAT_HELD &&
539             lockp->status != DB_LSTAT_WAITING) || lockp->obj == 0) {
540                 __db_err(lt->dbenv, "lock_put: invalid lock %lu",
541                     (u_long)((u_int8_t *)lockp - (u_int8_t *)lt->region));
542                 return (EINVAL);
543         }
544
545         if (do_all)
546                 lt->region->nreleases += lockp->refcount;
547         else
548                 lt->region->nreleases++;
549         if (do_all == 0 && lockp->refcount > 1) {
550                 lockp->refcount--;
551                 return (0);
552         }
553
554         /* Get the object associated with this lock. */
555         sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
556
557         /* Remove lock from locker list. */
558         SH_LIST_REMOVE(lockp, locker_links, __db_lock);
559
560         /* Remove this lock from its holders/waitlist. */
561         if (lockp->status != DB_LSTAT_HELD)
562                 __lock_remove_waiter(lt, sh_obj, lockp, DB_LSTAT_FREE);
563         else
564                 SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock);
565
566         /*
567          * We need to do lock promotion.  We also need to determine if
568          * we're going to need to run the deadlock detector again.  If
569          * we release locks, and there are waiters, but no one gets promoted,
570          * then we haven't fundamentally changed the lockmgr state, so
571          * we may still have a deadlock and we have to run again.  However,
572          * if there were no waiters, or we actually promoted someone, then
573          * we are OK and we don't have to run it immediately.
574          */
575         for (lp_w = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock),
576             state_changed = lp_w == NULL;
577             lp_w != NULL;
578             lp_w = next_waiter) {
579                 next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock);
580                 for (lp_h = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
581                     lp_h != NULL;
582                     lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) {
583                         if (CONFLICTS(lt, lp_h->mode, lp_w->mode) &&
584                             lp_h->holder != lp_w->holder)
585                                 break;
586                 }
587                 if (lp_h != NULL)       /* Found a conflict. */
588                         break;
589
590                 /* No conflict, promote the waiting lock. */
591                 SH_TAILQ_REMOVE(&sh_obj->waiters, lp_w, links, __db_lock);
592                 lp_w->status = DB_LSTAT_PENDING;
593                 SH_TAILQ_INSERT_TAIL(&sh_obj->holders, lp_w, links);
594
595                 /* Wake up waiter. */
596                 (void)__db_mutex_unlock(&lp_w->mutex, lt->fd);
597                 state_changed = 1;
598         }
599
600         /* Check if object should be reclaimed. */
601         if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) {
602                 HASHREMOVE_EL(lt->hashtab, __db_lockobj,
603                     links, sh_obj, lt->region->table_size, __lock_lhash);
604                 if (sh_obj->lockobj.size > sizeof(sh_obj->objdata))
605                         __db_shalloc_free(lt->mem,
606                             SH_DBT_PTR(&sh_obj->lockobj));
607                 SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, sh_obj, links,
608                     __db_lockobj);
609                 state_changed = 1;
610         }
611
612         /* Free lock. */
613         lockp->status = DB_LSTAT_FREE;
614         SH_TAILQ_INSERT_HEAD(&lt->region->free_locks, lockp, links, __db_lock);
615
616         /*
617          * If we did not promote anyone; we need to run the deadlock
618          * detector again.
619          */
620         if (state_changed == 0)
621                 lt->region->need_dd = 1;
622
623         return (0);
624 }
625
626 static int
627 __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
628         DB_LOCKTAB *lt;
629         u_int32_t locker;
630         int flags;
631         const DBT *obj;
632         db_lockmode_t lock_mode;
633         struct __db_lock **lockp;
634 {
635         struct __db_lock *newl, *lp;
636         DB_LOCKOBJ *sh_obj, *sh_locker;
637         DB_LOCKREGION *lrp;
638         size_t newl_off;
639         int ihold, ret;
640
641         ret = 0;
642         /*
643          * Check that lock mode is valid.
644          */
645
646         lrp = lt->region;
647         if ((u_int32_t)lock_mode >= lrp->nmodes) {
648                 __db_err(lt->dbenv,
649                     "lock_get: invalid lock mode %lu\n", (u_long)lock_mode);
650                 return (EINVAL);
651         }
652
653         /* Allocate a new lock.  Optimize for the common case of a grant. */
654         if ((newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock)) == NULL) {
655                 if ((ret = __lock_grow_region(lt, DB_LOCK_LOCK, 0)) != 0)
656                         return (ret);
657                 lrp = lt->region;
658                 newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
659         }
660         newl_off = LOCK_TO_OFFSET(lt, newl);
661
662         /* Optimize for common case of granting a lock. */
663         SH_TAILQ_REMOVE(&lrp->free_locks, newl, links, __db_lock);
664
665         newl->mode = lock_mode;
666         newl->status = DB_LSTAT_HELD;
667         newl->holder = locker;
668         newl->refcount = 1;
669
670         if ((ret = __lock_getobj(lt, 0, obj, DB_LOCK_OBJTYPE, &sh_obj)) != 0)
671                 return (ret);
672
673         lrp = lt->region;                       /* getobj might have grown */
674         newl = OFFSET_TO_LOCK(lt, newl_off);
675
676         /* Now make new lock point to object */
677         newl->obj = SH_PTR_TO_OFF(newl, sh_obj);
678
679         /*
680          * Now we have a lock and an object and we need to see if we should
681          * grant the lock.  We use a FIFO ordering so we can only grant a
682          * new lock if it does not conflict with anyone on the holders list
683          * OR anyone on the waiters list.  The reason that we don't grant if
684          * there's a conflict is that this can lead to starvation (a writer
685          * waiting on a popularly read item will never be granted).  The
686          * downside of this is that a waiting reader can prevent an upgrade
687          * from reader to writer, which is not uncommon.
688          *
689          * There is one exception to the no-conflict rule.  If a lock is held
690          * by the requesting locker AND the new lock does not conflict with
691          * any other holders, then we grant the lock.  The most common place
692          * this happens is when the holder has a WRITE lock and a READ lock
693          * request comes in for the same locker.  If we do not grant the read
694          * lock, then we guarantee deadlock.
695          *
696          * In case of conflict, we put the new lock on the end of the waiters
697          * list.
698          */
699         ihold = 0;
700         for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
701             lp != NULL;
702             lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
703                 if (locker == lp->holder) {
704                         if (lp->mode == lock_mode &&
705                             lp->status == DB_LSTAT_HELD) {
706                                 /* Lock is held, just inc the ref count. */
707                                 lp->refcount++;
708                                 SH_TAILQ_INSERT_HEAD(&lrp->free_locks,
709                                     newl, links, __db_lock);
710                                 *lockp = lp;
711                                 return (0);
712                         } else
713                                 ihold = 1;
714                 } else if (CONFLICTS(lt, lp->mode, lock_mode))
715                         break;
716         }
717
718         if (lp == NULL && !ihold)
719                 for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
720                     lp != NULL;
721                     lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
722                         if (CONFLICTS(lt, lp->mode, lock_mode) &&
723                             locker != lp->holder)
724                                 break;
725                 }
726         if (lp == NULL)
727                 SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links);
728         else if (!(flags & DB_LOCK_NOWAIT))
729                 SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links);
730         else {
731                 /* Free the lock and return an error. */
732                 newl->status = DB_LSTAT_FREE;
733                 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock);
734                 return (DB_LOCK_NOTGRANTED);
735         }
736
737         /*
738          * This is really a blocker for the process, so initialize it
739          * set.  That way the current process will block when it tries
740          * to get it and the waking process will release it.
741          */
742         (void)__db_mutex_init(&newl->mutex,
743             MUTEX_LOCK_OFFSET(lt->region, &newl->mutex));
744         (void)__db_mutex_lock(&newl->mutex, lt->fd);
745
746         /*
747          * Now, insert the lock onto its locker's list.
748          */
749         if ((ret =
750             __lock_getobj(lt, locker, NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
751                 return (ret);
752
753         lrp = lt->region;
754         SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock);
755
756         if (lp != NULL) {
757                 newl->status = DB_LSTAT_WAITING;
758                 lrp->nconflicts++;
759                 /*
760                  * We are about to wait; must release the region mutex.
761                  * Then, when we wakeup, we need to reacquire the region
762                  * mutex before continuing.
763                  */
764                 if (lrp->detect == DB_LOCK_NORUN)
765                         lt->region->need_dd = 1;
766                 UNLOCK_LOCKREGION(lt);
767
768                 /*
769                  * We are about to wait; before waiting, see if the deadlock
770                  * detector should be run.
771                  */
772                 if (lrp->detect != DB_LOCK_NORUN)
773                         ret = lock_detect(lt, 0, lrp->detect);
774
775                 (void)__db_mutex_lock(&newl->mutex, lt->fd);
776
777                 LOCK_LOCKREGION(lt);
778                 if (newl->status != DB_LSTAT_PENDING) {
779                         /* Return to free list. */
780                         __lock_checklocker(lt, newl, 0);
781                         SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
782                             __db_lock);
783                         switch (newl->status) {
784                                 case DB_LSTAT_ABORTED:
785                                         ret = DB_LOCK_DEADLOCK;
786                                         break;
787                                 case DB_LSTAT_NOGRANT:
788                                         ret = DB_LOCK_NOTGRANTED;
789                                         break;
790                                 default:
791                                         ret = EINVAL;
792                                         break;
793                         }
794                         newl->status = DB_LSTAT_FREE;
795                         newl = NULL;
796                 } else
797                         newl->status = DB_LSTAT_HELD;
798         }
799
800         *lockp = newl;
801         return (ret);
802 }
803
804 /*
805  * This is called at every interface to verify if the region
806  * has changed size, and if so, to remap the region in and
807  * reset the process pointers.
808  */
809 static int
810 __lock_validate_region(lt)
811         DB_LOCKTAB *lt;
812 {
813         int ret;
814
815         if (lt->reg_size == lt->region->hdr.size)
816                 return (0);
817
818         /* Grow the region. */
819         if ((ret = __db_rremap(lt->dbenv, lt->region,
820             lt->reg_size, lt->region->hdr.size, lt->fd, &lt->region)) != 0)
821                 return (ret);
822
823         __lock_reset_region(lt);
824
825         return (0);
826 }
827
828 /*
829  * We have run out of space; time to grow the region.
830  */
831 static int
832 __lock_grow_region(lt, which, howmuch)
833         DB_LOCKTAB *lt;
834         int which;
835         size_t howmuch;
836 {
837         struct __db_lock *newl;
838         struct lock_header *lock_head;
839         struct obj_header *obj_head;
840         DB_LOCKOBJ *op;
841         DB_LOCKREGION *lrp;
842         float lock_ratio, obj_ratio;
843         size_t incr, oldsize, used;
844         u_int32_t i, newlocks, newmem, newobjs;
845         int ret, usedlocks, usedmem, usedobjs;
846         u_int8_t *curaddr;
847
848         lrp = lt->region;
849         oldsize = lrp->hdr.size;
850         incr = lrp->increment;
851
852         /* Figure out how much of each sort of space we have. */
853         usedmem = lrp->mem_bytes - __db_shalloc_count(lt->mem);
854         usedobjs = lrp->numobjs - __lock_count_objs(lrp);
855         usedlocks = lrp->maxlocks - __lock_count_locks(lrp);
856
857         /*
858          * Figure out what fraction of the used space belongs to each
859          * different type of "thing" in the region.  Then partition the
860          * new space up according to this ratio.
861          */
862         used = usedmem +
863             usedlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) +
864             usedobjs * sizeof(DB_LOCKOBJ);
865
866         lock_ratio = usedlocks *
867             ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) / (float)used;
868         obj_ratio = usedobjs * sizeof(DB_LOCKOBJ) / (float)used;
869
870         newlocks = (u_int32_t)(lock_ratio *
871             incr / ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
872         newobjs = (u_int32_t)(obj_ratio * incr / sizeof(DB_LOCKOBJ));
873         newmem = incr -
874             (newobjs * sizeof(DB_LOCKOBJ) +
875             newlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
876
877         /*
878          * Make sure we allocate enough memory for the object being
879          * requested.
880          */
881         switch (which) {
882                 case DB_LOCK_LOCK:
883                         if (newlocks == 0) {
884                                 newlocks = 10;
885                                 incr += newlocks * sizeof(struct __db_lock);
886                         }
887                         break;
888                 case DB_LOCK_OBJ:
889                         if (newobjs == 0) {
890                                 newobjs = 10;
891                                 incr += newobjs * sizeof(DB_LOCKOBJ);
892                         }
893                         break;
894                 case DB_LOCK_MEM:
895                         if (newmem < howmuch * 2) {
896                                 incr += howmuch * 2 - newmem;
897                                 newmem = howmuch * 2;
898                         }
899                         break;
900         }
901
902         newmem += ALIGN(incr, sizeof(size_t)) - incr;
903         incr = ALIGN(incr, sizeof(size_t));
904
905         /*
906          * Since we are going to be allocating locks at the beginning of the
907          * new chunk, we need to make sure that the chunk is MUTEX_ALIGNMENT
908          * aligned.  We did not guarantee this when we created the region, so
909          * we may need to pad the old region by extra bytes to ensure this
910          * alignment.
911          */
912         incr += ALIGN(oldsize, MUTEX_ALIGNMENT) - oldsize;
913
914         __db_err(lt->dbenv,
915             "Growing lock region: %lu locks %lu objs %lu bytes",
916             (u_long)newlocks, (u_long)newobjs, (u_long)newmem);
917
918         if ((ret = __db_rgrow(lt->dbenv, lt->fd, incr)) != 0)
919                 return (ret);
920         if ((ret = __db_rremap(lt->dbenv,
921             lt->region, oldsize, oldsize + incr, lt->fd, &lt->region)) != 0)
922                 return (ret);
923         __lock_reset_region(lt);
924
925         /* Update region parameters. */
926         lrp = lt->region;
927         lrp->increment = incr << 1;
928         lrp->maxlocks += newlocks;
929         lrp->numobjs += newobjs;
930         lrp->mem_bytes += newmem;
931
932         curaddr = (u_int8_t *)lrp + oldsize;
933         curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
934
935         /* Put new locks onto the free list. */
936         lock_head = &lrp->free_locks;
937         for (i = 0; i++ < newlocks;
938             curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
939                 newl = (struct __db_lock *)curaddr;
940                 SH_TAILQ_INSERT_HEAD(lock_head, newl, links, __db_lock);
941         }
942
943         /* Put new objects onto the free list.  */
944         obj_head = &lrp->free_objs;
945         for (i = 0; i++ < newobjs; curaddr += sizeof(DB_LOCKOBJ)) {
946                 op = (DB_LOCKOBJ *)curaddr;
947                 SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
948         }
949
950         *((size_t *)curaddr) = newmem - sizeof(size_t);
951         curaddr += sizeof(size_t);
952         __db_shalloc_free(lt->mem, curaddr);
953
954         return (0);
955 }
956
957 #ifdef DEBUG
958 /*
959  * __lock_dump_region --
960  *
961  * PUBLIC: void __lock_dump_region __P((DB_LOCKTAB *, u_int));
962  */
963 void
964 __lock_dump_region(lt, flags)
965         DB_LOCKTAB *lt;
966         u_int flags;
967 {
968         struct __db_lock *lp;
969         DB_LOCKOBJ *op;
970         DB_LOCKREGION *lrp;
971         u_int32_t i, j;
972
973         lrp = lt->region;
974
975         printf("Lock region parameters\n");
976         printf("%s:0x%x\t%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\t\n",
977             "magic      ", lrp->magic,
978             "version    ", (u_long)lrp->version,
979             "processes  ", (u_long)lrp->hdr.refcnt,
980             "maxlocks   ", (u_long)lrp->maxlocks,
981             "table size ", (u_long)lrp->table_size,
982             "nmodes     ", (u_long)lrp->nmodes,
983             "numobjs    ", (u_long)lrp->numobjs);
984         printf("%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\n",
985             "size       ", (u_long)lrp->hdr.size,
986             "nlockers   ", (u_long)lrp->nlockers,
987             "hash_off   ", (u_long)lrp->hash_off,
988             "increment  ", (u_long)lrp->increment,
989             "mem_off    ", (u_long)lrp->mem_off,
990             "mem_bytes  ", (u_long)lrp->mem_bytes);
991 #ifndef HAVE_SPINLOCKS
992         printf("Mutex: off %lu", (u_long)lrp->hdr.lock.off);
993 #endif
994         printf(" waits %lu nowaits %lu",
995             (u_long)lrp->hdr.lock.mutex_set_wait,
996             (u_long)lrp->hdr.lock.mutex_set_nowait);
997         printf("\n%s:%lu\t%s:%lu\t%s:%lu\t%s:%lu\n",
998             "nconflicts ", (u_long)lrp->nconflicts,
999             "nrequests  ", (u_long)lrp->nrequests,
1000             "nreleases  ", (u_long)lrp->nreleases,
1001             "ndeadlocks ", (u_long)lrp->ndeadlocks);
1002         printf("need_dd    %lu\n", (u_long)lrp->need_dd);
1003         if (flags & LOCK_DEBUG_CONF) {
1004                 printf("\nConflict matrix\n");
1005
1006                 for (i = 0; i < lrp->nmodes; i++) {
1007                         for (j = 0; j < lrp->nmodes; j++)
1008                                 printf("%lu\t",
1009                                     (u_long)lt->conflicts[i * lrp->nmodes + j]);
1010                         printf("\n");
1011                 }
1012         }
1013
1014         for (i = 0; i < lrp->table_size; i++) {
1015                 op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
1016                 if (op != NULL && flags & LOCK_DEBUG_BUCKET)
1017                         printf("Bucket %lu:\n", (unsigned long)i);
1018                 while (op != NULL) {
1019                         if (op->type == DB_LOCK_LOCKER &&
1020                             flags & LOCK_DEBUG_LOCKERS)
1021                                 __lock_dump_locker(lt, op);
1022                         else if (flags & LOCK_DEBUG_OBJECTS &&
1023                             op->type == DB_LOCK_OBJTYPE)
1024                                 __lock_dump_object(lt, op);
1025                         op = SH_TAILQ_NEXT(op, links, __db_lockobj);
1026                 }
1027         }
1028
1029         if (flags & LOCK_DEBUG_LOCK) {
1030                 printf("\nLock Free List\n");
1031                 for (lp = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
1032                     lp != NULL;
1033                     lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
1034                         printf("0x%x: %lu\t%lu\t%lu\t0x%x\n", (u_int)lp,
1035                             (u_long)lp->holder, (u_long)lp->mode,
1036                             (u_long)lp->status, (u_int)lp->obj);
1037                 }
1038         }
1039
1040         if (flags & LOCK_DEBUG_LOCK) {
1041                 printf("\nObject Free List\n");
1042                 for (op = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
1043                     op != NULL;
1044                     op = SH_TAILQ_NEXT(op, links, __db_lockobj))
1045                         printf("0x%x\n", (u_int)op);
1046         }
1047
1048         if (flags & LOCK_DEBUG_MEM) {
1049                 printf("\nMemory Free List\n");
1050                 __db_shalloc_dump(stdout, lt->mem);
1051         }
1052 }
1053
1054 static void
1055 __lock_dump_locker(lt, op)
1056         DB_LOCKTAB *lt;
1057         DB_LOCKOBJ *op;
1058 {
1059         struct __db_lock *lp;
1060         u_int32_t locker;
1061         void *ptr;
1062
1063         ptr = SH_DBT_PTR(&op->lockobj);
1064         memcpy(&locker, ptr, sizeof(u_int32_t));
1065         printf("L %lx", (u_long)locker);
1066
1067         lp = SH_LIST_FIRST(&op->heldby, __db_lock);
1068         if (lp == NULL) {
1069                 printf("\n");
1070                 return;
1071         }
1072         for (; lp != NULL; lp = SH_LIST_NEXT(lp, locker_links, __db_lock))
1073                 __lock_printlock(lt, lp, 0);
1074 }
1075
1076 static void
1077 __lock_dump_object(lt, op)
1078         DB_LOCKTAB *lt;
1079         DB_LOCKOBJ *op;
1080 {
1081         struct __db_lock *lp;
1082         u_int32_t j;
1083         char *ptr;
1084
1085         ptr = SH_DBT_PTR(&op->lockobj);
1086         for (j = 0; j < op->lockobj.size; ptr++, j++)
1087                 printf("%c", (int)*ptr);
1088         printf("\n");
1089
1090         printf("H:");
1091         for (lp =
1092             SH_TAILQ_FIRST(&op->holders, __db_lock);
1093             lp != NULL;
1094             lp = SH_TAILQ_NEXT(lp, links, __db_lock))
1095                 __lock_printlock(lt, lp, 0);
1096         lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
1097         if (lp != NULL) {
1098                 printf("\nW:");
1099                 for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock))
1100                         __lock_printlock(lt, lp, 0);
1101         }
1102 }
1103
1104 /*
1105  * __lock_is_locked --
1106  *
1107  * PUBLIC: int __lock_is_locked
1108  * PUBLIC:    __P((DB_LOCKTAB *, u_int32_t, DBT *, db_lockmode_t));
1109  */
1110 int
1111 __lock_is_locked(lt, locker, dbt, mode)
1112         DB_LOCKTAB *lt;
1113         u_int32_t locker;
1114         DBT *dbt;
1115         db_lockmode_t mode;
1116 {
1117         struct __db_lock *lp;
1118         DB_LOCKOBJ *sh_obj;
1119         DB_LOCKREGION *lrp;
1120
1121         lrp = lt->region;
1122
1123         /* Look up the object in the hash table. */
1124         HASHLOOKUP(lt->hashtab, __db_lockobj, links,
1125             dbt, sh_obj, lrp->table_size, __lock_ohash, __lock_cmp);
1126         if (sh_obj == NULL)
1127                 return (0);
1128
1129         for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
1130             lp != NULL;
1131             lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
1132                 if (lp->holder == locker && lp->mode == mode)
1133                         return (1);
1134         }
1135
1136         return (0);
1137 }
1138
1139 static void
1140 __lock_printlock(lt, lp, ispgno)
1141         DB_LOCKTAB *lt;
1142         struct __db_lock *lp;
1143         int ispgno;
1144 {
1145         DB_LOCKOBJ *lockobj;
1146         db_pgno_t pgno;
1147         size_t obj;
1148         u_int8_t *ptr;
1149         const char *mode, *status;
1150
1151         switch (lp->mode) {
1152         case DB_LOCK_IREAD:
1153                 mode = "IREAD";
1154                 break;
1155         case DB_LOCK_IWR:
1156                 mode = "IWR";
1157                 break;
1158         case DB_LOCK_IWRITE:
1159                 mode = "IWRITE";
1160                 break;
1161         case DB_LOCK_NG:
1162                 mode = "NG";
1163                 break;
1164         case DB_LOCK_READ:
1165                 mode = "READ";
1166                 break;
1167         case DB_LOCK_WRITE:
1168                 mode = "WRITE";
1169                 break;
1170         default:
1171                 mode = "UNKNOWN";
1172                 break;
1173         }
1174         switch (lp->status) {
1175         case DB_LSTAT_ABORTED:
1176                 status = "ABORT";
1177                 break;
1178         case DB_LSTAT_ERR:
1179                 status = "ERROR";
1180                 break;
1181         case DB_LSTAT_FREE:
1182                 status = "FREE";
1183                 break;
1184         case DB_LSTAT_HELD:
1185                 status = "HELD";
1186                 break;
1187         case DB_LSTAT_NOGRANT:
1188                 status = "NONE";
1189                 break;
1190         case DB_LSTAT_WAITING:
1191                 status = "WAIT";
1192                 break;
1193         case DB_LSTAT_PENDING:
1194                 status = "PENDING";
1195                 break;
1196         default:
1197                 status = "UNKNOWN";
1198                 break;
1199         }
1200         printf("\t%lx\t%s\t%lu\t%s\t",
1201             (u_long)lp->holder, mode, (u_long)lp->refcount, status);
1202
1203         lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
1204         ptr = SH_DBT_PTR(&lockobj->lockobj);
1205         if (ispgno) {
1206                 /* Assume this is a DBT lock. */
1207                 memcpy(&pgno, ptr, sizeof(db_pgno_t));
1208                 printf("page %lu\n", (u_long)pgno);
1209         } else {
1210                 obj = (u_int8_t *)lp + lp->obj - (u_int8_t *)lt->region;
1211                 printf("0x%lx ", (u_long)obj);
1212                 __db_pr(ptr, lockobj->lockobj.size);
1213                 printf("\n");
1214         }
1215 }
1216 #endif
1217
1218 static int
1219 __lock_count_locks(lrp)
1220         DB_LOCKREGION *lrp;
1221 {
1222         struct __db_lock *newl;
1223         int count;
1224
1225         count = 0;
1226         for (newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
1227             newl != NULL;
1228             newl = SH_TAILQ_NEXT(newl, links, __db_lock))
1229                 count++;
1230
1231         return (count);
1232 }
1233
1234 static int
1235 __lock_count_objs(lrp)
1236         DB_LOCKREGION *lrp;
1237 {
1238         DB_LOCKOBJ *obj;
1239         int count;
1240
1241         count = 0;
1242         for (obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
1243             obj != NULL;
1244             obj = SH_TAILQ_NEXT(obj, links, __db_lockobj))
1245                 count++;
1246
1247         return (count);
1248 }
1249
1250 /*
1251  * PUBLIC: int __lock_getobj  __P((DB_LOCKTAB *,
1252  * PUBLIC:     u_int32_t, const DBT *, u_int32_t type, DB_LOCKOBJ **));
1253  */
1254 int
1255 __lock_getobj(lt, locker, dbt, type, objp)
1256         DB_LOCKTAB *lt;
1257         u_int32_t locker, type;
1258         const DBT *dbt;
1259         DB_LOCKOBJ **objp;
1260 {
1261         DB_LOCKREGION *lrp;
1262         DB_LOCKOBJ *sh_obj;
1263         u_int32_t obj_size;
1264         int ret;
1265         void *p, *src;
1266
1267         lrp = lt->region;
1268
1269         /* Look up the object in the hash table. */
1270         if (type == DB_LOCK_OBJTYPE) {
1271                 HASHLOOKUP(lt->hashtab, __db_lockobj, links, dbt, sh_obj,
1272                     lrp->table_size, __lock_ohash, __lock_cmp);
1273                 obj_size = dbt->size;
1274         } else {
1275                 HASHLOOKUP(lt->hashtab, __db_lockobj, links, locker,
1276                     sh_obj, lrp->table_size, __lock_locker_hash,
1277                     __lock_locker_cmp);
1278                 obj_size = sizeof(locker);
1279         }
1280
1281         /*
1282          * If we found the object, then we can just return it.  If
1283          * we didn't find the object, then we need to create it.
1284          */
1285         if (sh_obj == NULL) {
1286                 /* Create new object and then insert it into hash table. */
1287                 if ((sh_obj =
1288                     SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj)) == NULL) {
1289                         if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0)
1290                                 return (ret);
1291                         lrp = lt->region;
1292                         sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
1293                 }
1294
1295                 /*
1296                  * If we can fit this object in the structure, do so instead
1297                  * of shalloc-ing space for it.
1298                  */
1299                 if (obj_size <= sizeof(sh_obj->objdata))
1300                         p = sh_obj->objdata;
1301                 else
1302                         if ((ret =
1303                             __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
1304                                 if ((ret = __lock_grow_region(lt,
1305                                     DB_LOCK_MEM, obj_size)) != 0)
1306                                         return (ret);
1307                                 lrp = lt->region;
1308                                 /* Reacquire the head of the list. */
1309                                 sh_obj = SH_TAILQ_FIRST(&lrp->free_objs,
1310                                     __db_lockobj);
1311                                 (void)__db_shalloc(lt->mem, obj_size, 0, &p);
1312                         }
1313
1314                 src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker;
1315                 memcpy(p, src, obj_size);
1316
1317                 sh_obj->type = type;
1318                 SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj);
1319
1320                 SH_TAILQ_INIT(&sh_obj->waiters);
1321                 if (type == DB_LOCK_LOCKER)
1322                         SH_LIST_INIT(&sh_obj->heldby);
1323                 else
1324                         SH_TAILQ_INIT(&sh_obj->holders);
1325                 sh_obj->lockobj.size = obj_size;
1326                 sh_obj->lockobj.off = SH_PTR_TO_OFF(&sh_obj->lockobj, p);
1327
1328                 HASHINSERT(lt->hashtab,
1329                     __db_lockobj, links, sh_obj, lrp->table_size, __lock_lhash);
1330
1331                 if (type == DB_LOCK_LOCKER)
1332                         lrp->nlockers++;
1333         }
1334
1335         *objp = sh_obj;
1336         return (0);
1337 }
1338
1339 /*
1340  * Any lock on the waitlist has a process waiting for it.  Therefore, we
1341  * can't return the lock to the freelist immediately.  Instead, we can
1342  * remove the lock from the list of waiters, set the status field of the
1343  * lock, and then let the process waking up return the lock to the
1344  * free list.
1345  */
1346 static void
1347 __lock_remove_waiter(lt, sh_obj, lockp, status)
1348         DB_LOCKTAB *lt;
1349         DB_LOCKOBJ *sh_obj;
1350         struct __db_lock *lockp;
1351         db_status_t status;
1352 {
1353         SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
1354         lockp->status = status;
1355
1356         /* Wake whoever is waiting on this lock. */
1357         (void)__db_mutex_unlock(&lockp->mutex, lt->fd);
1358 }
1359
1360 static void
1361 __lock_freeobj(lt, obj)
1362         DB_LOCKTAB *lt;
1363         DB_LOCKOBJ *obj;
1364 {
1365         HASHREMOVE_EL(lt->hashtab,
1366             __db_lockobj, links, obj, lt->region->table_size, __lock_lhash);
1367         if (obj->lockobj.size > sizeof(obj->objdata))
1368                 __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
1369         SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, obj, links, __db_lockobj);
1370 }
1371
1372 static void
1373 __lock_checklocker(lt, lockp, do_remove)
1374         DB_LOCKTAB *lt;
1375         struct __db_lock *lockp;
1376         int do_remove;
1377 {
1378         DB_LOCKOBJ *sh_locker;
1379
1380         if (do_remove)
1381                 SH_LIST_REMOVE(lockp, locker_links, __db_lock);
1382
1383         /* if the locker list is NULL, free up the object. */
1384         if (__lock_getobj(lt, lockp->holder, NULL, DB_LOCK_LOCKER, &sh_locker)
1385             == 0 && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL) {
1386                 __lock_freeobj(lt, sh_locker);
1387                 lt->region->nlockers--;
1388         }
1389 }
1390
1391 static void
1392 __lock_reset_region(lt)
1393         DB_LOCKTAB *lt;
1394 {
1395         lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
1396         lt->hashtab =
1397             (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
1398         lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
1399         lt->reg_size = lt->region->hdr.size;
1400 }