2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1995, 1996
9 * The President and Fellows of Harvard University. All rights reserved.
11 * This code is derived from software contributed to Harvard by
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the University of
25 * California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 static const char sccsid[] = "@(#)db_region.c 10.15 (Sleepycat) 10/25/97";
49 #ifndef NO_SYSTEM_INCLUDES
50 #include <sys/types.h>
62 #include "common_ext.h"
64 static int __db_rmap __P((DB_ENV *, int, size_t, void *));
69 * Common interface for creating a shared region. Handles synchronization
70 * across multiple processes.
72 * The dbenv contains the environment for this process, including naming
73 * information. The path argument represents the parameters passed to
74 * the open routines and may be either a file or a directory. If it is
75 * a directory, it must exist. If it is a file, then the file parameter
76 * must be NULL, otherwise, file is the name to be created inside the
79 * The function returns a pointer to the shared region that has been mapped
80 * into memory, NULL on error.
82 * PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME,
83 * PUBLIC: const char *, const char *, int, size_t, int *, void *));
86 __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
89 const char *path, *file;
102 * Get the filename -- note, if it's a temporary file, it will
103 * be created by the underlying temporary file creation code,
104 * so we have to check the file descriptor to be sure it's an
107 if ((ret = __db_appname(dbenv, appname, path, file, &fd, &name)) != 0)
111 * Now open the file. We need to make sure that multiple processes
112 * that attempt to create the region at the same time are properly
113 * ordered, so we open it O_EXCL and O_CREAT so two simultaneous
114 * attempts to create the region will return failure in one of the
117 if (fd == -1 && (ret = __db_open(name,
118 DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) {
121 "region create: %s: %s", name, strerror(ret));
126 /* Grow the region to the correct size. */
127 if ((ret = __db_rgrow(dbenv, fd, size)) != 0)
130 /* Map the region in. */
131 if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0)
134 /* Initialize the region. */
135 if ((ret = __db_rinit(dbenv, rp, fd, size, 1)) != 0)
146 (void)__db_unmap(rp, rp->size);
147 (void)__db_unlink(name);
148 (void)__db_close(fd);
157 * Initialize the region.
159 * PUBLIC: int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int));
162 __db_rinit(dbenv, rp, fd, size, lock_region)
171 * Initialize the common information.
174 * We have to order the region creates so that two processes don't try
175 * to simultaneously create the region and so that processes that are
176 * joining the region never see inconsistent data. We'd like to play
177 * file permissions games, but we can't because WNT filesystems won't
178 * open a file mode 0.
180 * If the lock_region flag is set, the process creating the region
181 * acquires the lock before the setting the version number. Any
182 * process joining the region checks the version number before
183 * attempting to acquire the lock. (The lock_region flag may not be
184 * set -- the mpool code sometimes malloc's private regions but still
185 * needs to initialize them, specifically, the mutex for threads.)
187 * We have to check the version number first, because if the version
188 * number has not been written, it's possible that the mutex has not
189 * been initialized in which case an attempt to get it could lead to
190 * random behavior. If the version number isn't there (the file size
191 * is too small) or it's 0, we know that the region is being created.
193 __db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock));
194 if (lock_region && (ret = __db_mutex_lock(&rp->lock, fd)) != 0)
200 db_version(&rp->majver, &rp->minver, &rp->patch);
207 * Construct the name of a file, open it and map it in.
209 * PUBLIC: int __db_ropen __P((DB_ENV *,
210 * PUBLIC: APPNAME, const char *, const char *, int, int *, void *));
213 __db_ropen(dbenv, appname, path, file, flags, fdp, retp)
216 const char *path, *file;
228 /* Get the filename. */
229 if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
233 if ((ret = __db_open(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) {
234 __db_err(dbenv, "region open: %s: %s", name, strerror(ret));
241 * Map the file in. We have to do things in a strange order so that
242 * we don't get into a situation where the file was just created and
243 * isn't yet initialized. See the comment in __db_rcreate() above.
246 * We'd like to test to see if the file is too big to mmap. Since we
247 * don't know what size or type off_t's or size_t's are, or the largest
248 * unsigned integral type is, or what random insanity the local C
249 * compiler will perpetrate, doing the comparison in a portable way is
250 * flatly impossible. Hope that mmap fails if the file is too large.
253 if ((ret = __db_ioinfo(name, fd, &size1, NULL)) != 0) {
254 __db_err(dbenv, "%s: %s", name, strerror(ret));
258 /* Check to make sure the first block has been written. */
259 if ((size_t)size1 < sizeof(RLAYOUT)) {
264 /* Map in whatever is there. */
265 if ((ret = __db_rmap(dbenv, fd, size1, &rp)) != 0)
269 * Check to make sure the region has been initialized. We can't just
270 * grab the lock because the lock may not have been initialized yet.
272 if (rp->majver == 0) {
277 /* Get the region lock. */
278 if (!LF_ISSET(DB_MUTEXDEBUG))
279 (void)__db_mutex_lock(&rp->lock, fd);
282 * The file may have been half-written if we were descheduled between
283 * getting the size of the file and checking the major version. Check
284 * to make sure we got the entire file.
286 if ((ret = __db_ioinfo(name, fd, &size2, NULL)) != 0) {
287 __db_err(dbenv, "%s: %s", name, strerror(ret));
290 if (size1 != size2) {
295 /* The file may have just been deleted. */
296 if (F_ISSET(rp, DB_R_DELETED)) {
301 /* Increment the reference count. */
304 /* Release the lock. */
305 if (!LF_ISSET(DB_MUTEXDEBUG))
306 (void)__db_mutex_unlock(&rp->lock, fd);
313 err1: if (!LF_ISSET(DB_MUTEXDEBUG))
314 (void)__db_mutex_unlock(&rp->lock, fd);
315 err2: if (rp != NULL)
316 (void)__db_unmap(rp, rp->size);
318 (void)__db_close(fd);
325 * Close a shared memory region.
327 * PUBLIC: int __db_rclose __P((DB_ENV *, int, void *));
330 __db_rclose(dbenv, fd, ptr)
343 if ((ret = __db_mutex_lock(&rp->lock, fd)) != 0) {
348 /* Decrement the reference count. */
351 /* Release the lock. */
352 if ((t_ret = __db_mutex_unlock(&rp->lock, fd)) != 0 && fail == NULL) {
354 fail = "lock release";
357 /* Discard the region. */
358 if ((t_ret = __db_unmap(ptr, rp->size)) != 0 && fail == NULL) {
363 if ((t_ret = __db_close(fd)) != 0 && fail == NULL) {
371 err: __db_err(dbenv, "region detach: %s: %s", fail, strerror(ret));
377 * Remove a shared memory region.
379 * PUBLIC: int __db_runlink __P((DB_ENV *,
380 * PUBLIC: APPNAME, const char *, const char *, int));
383 __db_runlink(dbenv, appname, path, file, force)
386 const char *path, *file;
390 int cnt, fd, ret, t_ret;
395 /* Get the filename. */
396 if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
399 /* If the file doesn't exist, we're done. */
400 if (__db_exists(name, NULL))
401 return (0); /* XXX: ENOENT? */
404 * If we're called with a force flag, try and unlink the file. This
405 * may not succeed if the file is currently open, but there's nothing
406 * we can do about that. There is a race condition between the check
407 * for existence above and the actual unlink. If someone else snuck
408 * in and removed it before we do the remove, then we might get an
409 * ENOENT error. If we get the ENOENT, we treat it as success, just
413 if ((ret = __db_unlink(name)) != 0 && ret != ENOENT)
419 /* Open and lock the region. */
420 if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
422 (void)__db_mutex_lock(&rp->lock, fd);
424 /* If the region is currently being deleted, fail. */
425 if (F_ISSET(rp, DB_R_DELETED)) {
426 ret = ENOENT; /* XXX: ENOENT? */
430 /* If the region is currently in use by someone else, fail. */
431 if (rp->refcnt > 1) {
436 /* Set the delete flag. */
437 F_SET(rp, DB_R_DELETED);
439 /* Release the lock and close the region. */
440 (void)__db_mutex_unlock(&rp->lock, fd);
441 if ((t_ret = __db_rclose(dbenv, fd, rp)) != 0 && ret == 0)
445 * Unlink the region. There's a race here -- other threads or
446 * processes might be opening the region while we're trying to
447 * remove it. They'll fail, because we've set the DELETED flag,
448 * but they could still stop us from succeeding in the unlink.
450 for (cnt = 5; cnt > 0; --cnt) {
451 if ((ret = __db_unlink(name)) == 0)
453 (void)__db_sleep(0, 250000);
460 /* Not a clue. Try to clear the DB_R_DELETED flag. */
461 if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
463 (void)__db_mutex_lock(&rp->lock, fd);
464 F_CLR(rp, DB_R_DELETED);
467 err2: (void)__db_mutex_unlock(&rp->lock, fd);
468 (void)__db_rclose(dbenv, fd, rp);
469 err1: __db_err(dbenv, "region unlink: %s: %s", name, strerror(ret));
475 * DB creates all regions on 4K boundaries so that we don't make the
476 * underlying VM unhappy.
478 #define __DB_VMPAGESIZE (4 * 1024)
482 * Extend a region by a specified amount.
484 * PUBLIC: int __db_rgrow __P((DB_ENV *, int, size_t));
487 __db_rgrow(dbenv, fd, incr)
492 #ifdef MMAP_INIT_NEEDED
497 char buf[__DB_VMPAGESIZE];
499 /* Seek to the end of the region. */
500 if ((ret = __db_seek(fd, 0, 0, 0, SEEK_END)) != 0)
503 /* Write nuls to the new bytes. */
504 memset(buf, 0, sizeof(buf));
507 * Historically, some systems required that all of the bytes of the
508 * region be written before you could mmap it and access it randomly.
510 #ifdef MMAP_INIT_NEEDED
511 /* Extend the region by writing each new page. */
512 for (i = 0; i < incr; i += __DB_VMPAGESIZE) {
513 if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
515 if (nw != sizeof(buf))
520 * Extend the region by writing the last page.
522 * Round off the increment to the next page boundary.
524 incr += __DB_VMPAGESIZE - 1;
525 incr -= incr % __DB_VMPAGESIZE;
527 /* Write the last page, not the page after the last. */
528 if ((ret = __db_seek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0)
530 if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
532 if (nw != sizeof(buf))
538 err: __db_err(dbenv, "region grow: %s", strerror(ret));
544 * Unmap the old region and map in a new region of a new size. If
545 * either call fails, returns NULL, else returns the address of the
548 * PUBLIC: int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *));
551 __db_rremap(dbenv, ptr, oldsize, newsize, fd, retp)
554 size_t oldsize, newsize;
559 if ((ret = __db_unmap(ptr, oldsize)) != 0) {
560 __db_err(dbenv, "region remap: munmap: %s", strerror(ret));
564 return (__db_rmap(dbenv, fd, newsize, retp));
569 * Attach to a shared memory region.
572 __db_rmap(dbenv, fd, size, retp)
581 if ((ret = __db_map(fd, size, 0, 0, (void **)&rp)) != 0) {
582 __db_err(dbenv, "region map: mmap %s", strerror(ret));