Update from db-2.3.16.
[kopensolaris-gnu/glibc.git] / db2 / hash / hash.c
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997
5  *      Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1990, 1993, 1994
9  *      Margo Seltzer.  All rights reserved.
10  */
11 /*
12  * Copyright (c) 1990, 1993, 1994
13  *      The Regents of the University of California.  All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Margo Seltzer.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  * 3. All advertising materials mentioning features or use of this software
27  *    must display the following acknowledgement:
28  *      This product includes software developed by the University of
29  *      California, Berkeley and its contributors.
30  * 4. Neither the name of the University nor the names of its contributors
31  *    may be used to endorse or promote products derived from this software
32  *    without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  */
46
47 #include "config.h"
48
49 #ifndef lint
50 static const char sccsid[] = "@(#)hash.c        10.36 (Sleepycat) 1/8/98";
51 #endif /* not lint */
52
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
55 #include <sys/stat.h>
56
57 #include <errno.h>
58 #include <fcntl.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63 #endif
64
65 #include "shqueue.h"
66 #include "db_int.h"
67 #include "db_page.h"
68 #include "db_am.h"
69 #include "db_ext.h"
70 #include "hash.h"
71 #include "log.h"
72
73 static int  __ham_c_close __P((DBC *));
74 static int  __ham_c_del __P((DBC *, int));
75 static int  __ham_c_get __P((DBC *, DBT *, DBT *, int));
76 static int  __ham_c_put __P((DBC *, DBT *, DBT *, int));
77 static int  __ham_c_init __P((DB *, DB_TXN *, DBC **));
78 static int  __ham_cursor __P((DB *, DB_TXN *, DBC **));
79 static int  __ham_delete __P((DB *, DB_TXN *, DBT *, int));
80 static int  __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
81 static int  __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
82 static void __ham_init_htab __P((HTAB *, u_int));
83 static int  __ham_lookup __P((HTAB *,
84                 HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
85 static int  __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
86 static int  __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
87 static int  __ham_sync __P((DB *, int));
88
89 /************************** INTERFACE ROUTINES ***************************/
90 /* OPEN/CLOSE */
91
92 /*
93  * __ham_open --
94  *
95  * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
96  */
97 int
98 __ham_open(dbp, dbinfo)
99         DB *dbp;
100         DB_INFO *dbinfo;
101 {
102         DB_ENV *dbenv;
103         DBC *curs;
104         HTAB *hashp;
105         int file_existed, ret;
106
107         dbenv = dbp->dbenv;
108
109         if ((hashp = (HTAB *)__db_calloc(1, sizeof(HTAB))) == NULL)
110                 return (ENOMEM);
111         hashp->dbp = dbp;
112
113         /* Set the hash function if specified by the user. */
114         if (dbinfo != NULL && dbinfo->h_hash != NULL)
115                 hashp->hash = dbinfo->h_hash;
116
117         /*
118          * Initialize the remaining fields of the dbp.  The type, close and
119          * fd functions are all set in db_open.
120          */
121         dbp->internal = hashp;
122         dbp->cursor = __ham_cursor;
123         dbp->del = __ham_delete;
124         dbp->get = __ham_get;
125         dbp->put = __ham_put;
126         dbp->sync = __ham_sync;
127
128         /* If locking is turned on, lock the meta data page. */
129         if (F_ISSET(dbp, DB_AM_LOCKING)) {
130                 dbp->lock.pgno = BUCKET_INVALID;
131                 if ((ret = lock_get(dbenv->lk_info, dbp->locker,
132                     0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
133                         if (ret < 0)
134                                 ret = EAGAIN;
135                         goto out;
136                 }
137         }
138
139         /*
140          * Now, we can try to read the meta-data page and figure out
141          * if we set up locking and get the meta-data page properly.
142          * If this is a new file, initialize it, and put it back dirty.
143          */
144         if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
145                 goto out;
146
147         /* Initialize the hashp structure */
148         if (hashp->hdr->magic == DB_HASHMAGIC) {
149                 file_existed = 1;
150                 /* File exists, verify the data in the header. */
151                 if (hashp->hash == NULL)
152                         hashp->hash =
153                             hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
154                 if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
155                     hashp->hdr->h_charkey) {
156                         __db_err(hashp->dbp->dbenv,
157                             "hash: incompatible hash function");
158                         ret = EINVAL;
159                         goto out;
160                 }
161                 if (F_ISSET(hashp->hdr, DB_HASH_DUP))
162                         F_SET(dbp, DB_AM_DUP);
163         } else {
164                 /*
165                  * File does not exist, we must initialize the header.  If
166                  * locking is enabled that means getting a write lock first.
167                  */
168                 file_existed = 0;
169                 if (F_ISSET(dbp, DB_AM_LOCKING) &&
170                     ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
171                     (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
172                         &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
173                         if (ret < 0)
174                                 ret = EAGAIN;
175                         goto out;
176                 }
177
178                 hashp->hdr->ffactor =
179                     dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
180                 __ham_init_htab(hashp, dbinfo != NULL ? dbinfo->h_nelem : 0);
181                 if (F_ISSET(dbp, DB_AM_DUP))
182                         F_SET(hashp->hdr, DB_HASH_DUP);
183                 if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
184                         goto out;
185         }
186
187         /* Initialize the default cursor. */
188         __ham_c_init(dbp, NULL, &curs);
189         TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
190
191         /* Allocate memory for our split buffer. */
192         if ((hashp->split_buf = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) {
193                 ret = ENOMEM;
194                 goto out;
195         }
196
197 #ifdef NO_STATISTICS_FOR_DB_ERR
198         __db_err(dbp->dbenv,
199             "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
200             "TABLE POINTER   ", (long)hashp,
201             "BUCKET SIZE     ", (long)hashp->hdr->pagesize,
202             "FILL FACTOR     ", (long)hashp->hdr->ffactor,
203             "MAX BUCKET      ", (long)hashp->hdr->max_bucket,
204             "OVFL POINT      ", (long)hashp->hdr->ovfl_point,
205             "LAST FREED      ", (long)hashp->hdr->last_freed,
206             "HIGH MASK       ", (long)hashp->hdr->high_mask,
207             "LOW  MASK       ", (long)hashp->hdr->low_mask,
208             "NELEM           ", (long)hashp->hdr->nelem,
209             "FLAGS           ", (long)hashp->hdr->flags);
210 #endif
211
212         /* Release the meta data page */
213         (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
214         if (F_ISSET(dbp, DB_AM_LOCKING) &&
215             (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
216                 if (ret < 0)
217                         ret = EAGAIN;
218                 goto out;
219         }
220
221         hashp->hlock = 0;
222         hashp->hdr = NULL;
223         /* Sync the file so that we know that the meta data goes to disk. */
224         if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
225                 goto out;
226         return (0);
227
228 out:    (void)__ham_close(dbp);
229         return (ret);
230 }
231
232 /*
233  * PUBLIC: int  __ham_close __P((DB *));
234  */
235 int
236 __ham_close(dbp)
237         DB *dbp;
238 {
239         HTAB *hashp;
240         int ret, t_ret;
241
242         DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
243         hashp = (HTAB *)dbp->internal;
244         ret = 0;
245
246         /* Free the split page. */
247         if (hashp->split_buf)
248                 FREE(hashp->split_buf, dbp->pgsize);
249
250         if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
251             (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
252                 ret = t_ret;
253         if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
254             hashp->hlock)) != 0 && ret == 0)
255                 ret = t_ret;
256
257         FREE(hashp, sizeof(HTAB));
258         dbp->internal = NULL;
259         return (ret);
260 }
261
262 /************************** LOCAL CREATION ROUTINES **********************/
263 /*
264  * Returns 0 on No Error
265  */
266 static void
267 __ham_init_htab(hashp, nelem)
268         HTAB *hashp;
269         u_int nelem;
270 {
271         int32_t l2, nbuckets;
272
273         hashp->hdr->nelem = 0;
274         hashp->hdr->pagesize = hashp->dbp->pgsize;
275         ZERO_LSN(hashp->hdr->lsn);
276         hashp->hdr->magic = DB_HASHMAGIC;
277         hashp->hdr->version = DB_HASHVERSION;
278         if (hashp->hash == NULL)
279                 hashp->hash =
280                     hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
281         hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
282         if (nelem != 0 && hashp->hdr->ffactor != 0) {
283                 nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
284                 l2 = __db_log2(nelem > 2 ? nelem : 2);
285         } else
286                 l2 = 2;
287
288         nbuckets = 1 << l2;
289
290         hashp->hdr->spares[l2] = 0;
291         hashp->hdr->spares[l2 + 1] = 0;
292         hashp->hdr->ovfl_point = l2;
293         hashp->hdr->last_freed = PGNO_INVALID;
294
295         hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
296         hashp->hdr->low_mask = (nbuckets >> 1) - 1;
297         memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
298 }
299
300 /********************** DESTROY/CLOSE ROUTINES ************************/
301
302
303 /*
304  * Write modified pages to disk
305  *
306  * Returns:
307  *       0 == OK
308  *      -1 ERROR
309  */
310 static int
311 __ham_sync(dbp, flags)
312         DB *dbp;
313         int flags;
314 {
315         int ret;
316
317         DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
318         if ((ret = __db_syncchk(dbp, flags)) != 0)
319                 return (ret);
320         if (F_ISSET(dbp, DB_AM_RDONLY))
321                 return (0);
322
323         if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
324                 ret = 0;
325
326         return (ret);
327 }
328
329 /*******************************SEARCH ROUTINES *****************************/
330 /*
331  * All the access routines return
332  *
333  * Returns:
334  *       0 on SUCCESS
335  *       1 to indicate an external ERROR (i.e. key not found, etc)
336  *      -1 to indicate an internal ERROR (i.e. out of memory, etc)
337  */
338
339 static int
340 __ham_get(dbp, txn, key, data, flags)
341         DB *dbp;
342         DB_TXN *txn;
343         DBT *key;
344         DBT *data;
345         int flags;
346 {
347         DB *ldbp;
348         DBC *cp;
349         HTAB *hashp;
350         HASH_CURSOR *hcp;
351         int ret, t_ret;
352
353         DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
354         if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
355                 return (ret);
356
357         ldbp = dbp;
358         if (F_ISSET(dbp, DB_AM_THREAD) &&
359             (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
360                 return (ret);
361
362         hashp = (HTAB *)ldbp->internal;
363         SET_LOCKER(ldbp, txn);
364         GET_META(ldbp, hashp);
365         cp = TAILQ_FIRST(&ldbp->curs_queue);
366
367         hashp->hash_accesses++;
368         hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
369         if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0)
370                 if (F_ISSET(hcp, H_OK))
371                         ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
372                 else /* Key was not found */
373                         ret = DB_NOTFOUND;
374
375         if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
376                 ret = t_ret;
377         RELEASE_META(ldbp, hashp);
378         if (F_ISSET(dbp, DB_AM_THREAD))
379                 __db_puthandle(ldbp);
380         return (ret);
381 }
382
383 static int
384 __ham_put(dbp, txn, key, data, flags)
385         DB *dbp;
386         DB_TXN *txn;
387         DBT *key;
388         DBT *data;
389         int flags;
390 {
391         DB *ldbp;
392         HTAB *hashp;
393         HASH_CURSOR *hcp;
394         DBT tmp_val, *myval;
395         int ret, t_ret;
396         u_int32_t nbytes;
397
398         DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
399         if ((ret = __db_putchk(dbp, key, data,
400             flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
401                 return (ret);
402
403         ldbp = dbp;
404         if (F_ISSET(dbp, DB_AM_THREAD) &&
405             (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
406                 return (ret);
407
408         hashp = (HTAB *)ldbp->internal;
409         SET_LOCKER(ldbp, txn);
410         GET_META(ldbp, hashp);
411         hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
412
413         nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
414             HKEYDATA_PSIZE(key->size)) +
415             (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
416             HKEYDATA_PSIZE(data->size));
417
418         hashp->hash_accesses++;
419         ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
420
421         if (ret == DB_NOTFOUND) {
422                 ret = 0;
423                 if (hcp->seek_found_page != PGNO_INVALID &&
424                     hcp->seek_found_page != hcp->pgno) {
425                         if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
426                                 goto out;
427                         hcp->pgno = hcp->seek_found_page;
428                         hcp->bndx = NDX_INVALID;
429                 }
430
431                 if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
432                         /*
433                          * Doing a partial put, but the key does not exist
434                          * and we are not beginning the write at 0.  We
435                          * must create a data item padded up to doff and
436                          * then write the new bytes represented by val.
437                          */
438                         ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
439                             &hcp->big_data, &hcp->big_datalen);
440                         if (ret == 0) {
441                                 memset(tmp_val.data, 0, data->doff);
442                                 memcpy((u_int8_t *)tmp_val.data + data->doff,
443                                     data->data, data->size);
444                                 myval = &tmp_val;
445                         }
446                 } else
447                         myval = (DBT *)data;
448
449                 if (ret == 0)
450                         ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
451         } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
452                 if (flags == DB_NOOVERWRITE)
453                         ret = DB_KEYEXIST;
454                 else if (F_ISSET(ldbp, DB_AM_DUP))
455                         ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
456                 else
457                         ret = __ham_overwrite(hashp, hcp, data);
458         }
459
460         /* Free up all the cursor pages. */
461         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
462                 ret = t_ret;
463         /* Now check if we have to grow. */
464 out:    if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
465                 ret = __ham_expand_table(hashp);
466                 F_CLR(hcp, H_EXPAND);
467         }
468
469         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
470                 ret = t_ret;
471         RELEASE_META(ldbp, hashp);
472         if (F_ISSET(dbp, DB_AM_THREAD))
473                 __db_puthandle(ldbp);
474         return (ret);
475 }
476
477 static int
478 __ham_cursor(dbp, txnid, dbcp)
479         DB *dbp;
480         DB_TXN *txnid;
481         DBC **dbcp;
482 {
483         int ret;
484
485         DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
486         if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
487                 return (ret);
488
489         DB_THREAD_LOCK(dbp);
490         TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
491         DB_THREAD_UNLOCK(dbp);
492         return (ret);
493 }
494
495 static int
496 __ham_c_init(dbp, txnid, dbcp)
497         DB *dbp;
498         DB_TXN *txnid;
499         DBC **dbcp;
500 {
501         DBC *db_curs;
502         HASH_CURSOR *new_curs;
503
504         if ((db_curs = (DBC *)__db_calloc(sizeof(DBC), 1)) == NULL)
505                 return (ENOMEM);
506
507         if ((new_curs =
508             (HASH_CURSOR *)__db_calloc(sizeof(struct cursor_t), 1)) == NULL) {
509                 FREE(db_curs, sizeof(DBC));
510                 return (ENOMEM);
511         }
512
513         db_curs->internal = new_curs;
514         db_curs->c_close = __ham_c_close;
515         db_curs->c_del = __ham_c_del;
516         db_curs->c_get = __ham_c_get;
517         db_curs->c_put = __ham_c_put;
518         db_curs->txn = txnid;
519         db_curs->dbp = dbp;
520
521         new_curs->db_cursor = db_curs;
522         __ham_item_init(new_curs);
523
524         if (dbcp != NULL)
525                 *dbcp = db_curs;
526         return (0);
527 }
528
529 static int
530 __ham_delete(dbp, txn, key, flags)
531         DB *dbp;
532         DB_TXN *txn;
533         DBT *key;
534         int flags;
535 {
536         DB *ldbp;
537         HTAB *hashp;
538         HASH_CURSOR *hcp;
539         int ret, t_ret;
540
541         DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
542         if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
543                 return (ret);
544
545         ldbp = dbp;
546         if (F_ISSET(dbp, DB_AM_THREAD) &&
547             (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
548                 return (ret);
549         hashp = (HTAB *)ldbp->internal;
550         SET_LOCKER(ldbp, txn);
551         GET_META(ldbp, hashp);
552         hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
553
554         hashp->hash_accesses++;
555         if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
556                 if (F_ISSET(hcp, H_OK))
557                         ret = __ham_del_pair(hashp, hcp, 1);
558                 else
559                         ret = DB_NOTFOUND;
560
561         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
562                 ret = t_ret;
563         RELEASE_META(ldbp, hashp);
564         if (F_ISSET(dbp, DB_AM_THREAD))
565                 __db_puthandle(ldbp);
566         return (ret);
567 }
568
569 /* ****************** CURSORS ********************************** */
570 static int
571 __ham_c_close(cursor)
572         DBC *cursor;
573 {
574         DB  *ldbp;
575         int ret;
576
577         DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
578         /*
579          * If the pagep, dpagep, and lock fields of the cursor are all NULL,
580          * then there really isn't a need to get a handle here.  However,
581          * the normal case is that at least one of those fields is non-NULL,
582          * and putting those checks in here would couple the ham_item_done
583          * functionality with cursor close which would be pretty disgusting.
584          * Instead, we pay the overhead here of always getting the handle.
585          */
586         ldbp = cursor->dbp;
587         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
588             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
589                 return (ret);
590
591         ret = __ham_c_iclose(ldbp, cursor);
592
593         if (F_ISSET(ldbp, DB_AM_THREAD))
594                 __db_puthandle(ldbp);
595         return (ret);
596 }
597 /*
598  * __ham_c_iclose --
599  *
600  * Internal cursor close routine; assumes it is being passed the correct
601  * handle, rather than getting and putting a handle.
602  *
603  * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
604  */
605 int
606 __ham_c_iclose(dbp, dbc)
607         DB *dbp;
608         DBC *dbc;
609 {
610         HASH_CURSOR *hcp;
611         HTAB *hashp;
612         int ret;
613
614         hashp = (HTAB *)dbp->internal;
615         hcp = (HASH_CURSOR *)dbc->internal;
616         ret = __ham_item_done(hashp, hcp, 0);
617
618         if (hcp->big_key)
619                 FREE(hcp->big_key, hcp->big_keylen);
620         if (hcp->big_data)
621                 FREE(hcp->big_data, hcp->big_datalen);
622
623         /*
624          * All cursors (except the default ones) are linked off the master.
625          * Therefore, when we close the cursor, we have to remove it from
626          * the master, not the local one.
627          * XXX I am always removing from the master; what about local cursors?
628          */
629         DB_THREAD_LOCK(dbc->dbp);
630         TAILQ_REMOVE(&dbc->dbp->curs_queue, dbc, links);
631         DB_THREAD_UNLOCK(dbc->dbp);
632
633         FREE(hcp, sizeof(HASH_CURSOR));
634         FREE(dbc, sizeof(DBC));
635
636         return (ret);
637 }
638
639 static int
640 __ham_c_del(cursor, flags)
641         DBC *cursor;
642         int flags;
643 {
644         DB *ldbp;
645         HTAB *hashp;
646         HASH_CURSOR *hcp;
647         HASH_CURSOR save_curs;
648         db_pgno_t ppgno, chg_pgno;
649         int ret, t_ret;
650
651         DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
652         ldbp = cursor->dbp;
653         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
654             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
655                 return (ret);
656         hashp = (HTAB *)ldbp->internal;
657         hcp = (HASH_CURSOR *)cursor->internal;
658         save_curs = *hcp;
659         if ((ret = __db_cdelchk(ldbp, flags,
660             F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
661                 return (ret);
662         if (F_ISSET(hcp, H_DELETED))
663                 return (DB_NOTFOUND);
664
665         SET_LOCKER(hashp->dbp, cursor->txn);
666         GET_META(hashp->dbp, hashp);
667         hashp->hash_accesses++;
668         if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
669                 goto out;
670         if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
671                 /*
672                  * We are about to remove a duplicate from offpage.
673                  *
674                  * There are 4 cases.
675                  * 1. We will remove an item on a page, but there are more
676                  *    items on that page.
677                  * 2. We will remove the last item on a page, but there is a
678                  *    following page of duplicates.
679                  * 3. We will remove the last item on a page, this page was the
680                  *    last page in a duplicate set, but there were dups before
681                  *    it.
682                  * 4. We will remove the last item on a page, removing the last
683                  *    duplicate.
684                  * In case 1 hcp->dpagep is unchanged.
685                  * In case 2 hcp->dpagep comes back pointing to the next dup
686                  *     page.
687                  * In case 3 hcp->dpagep comes back NULL.
688                  * In case 4 hcp->dpagep comes back NULL.
689                  *
690                  * Case 4 results in deleting the pair off the master page.
691                  * The normal code for doing this knows how to delete the
692                  * duplicates, so we will handle this case in the normal code.
693                  */
694                 ppgno = PREV_PGNO(hcp->dpagep);
695                 if (ppgno == PGNO_INVALID &&
696                     NEXT_PGNO(hcp->dpagep) == PGNO_INVALID &&
697                     NUM_ENT(hcp->dpagep) == 1)
698                         goto normal;
699
700                 /* Remove item from duplicate page. */
701                 chg_pgno = hcp->dpgno;
702                 if ((ret = __db_drem(hashp->dbp,
703                     &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
704                         goto out;
705
706                 if (hcp->dpagep == NULL) {
707                         if (ppgno != PGNO_INVALID) {            /* Case 3 */
708                                 hcp->dpgno = ppgno;
709                                 if ((ret = __ham_get_cpage(hashp, hcp,
710                                     DB_LOCK_READ)) != 0)
711                                         goto out;
712                                 hcp->dndx = NUM_ENT(hcp->dpagep);
713                                 F_SET(hcp, H_DELETED);
714                         } else {                                /* Case 4 */
715                                 ret = __ham_del_pair(hashp, hcp, 1);
716                                 hcp->dpgno = PGNO_INVALID;
717                                 /*
718                                  * Delpair updated the cursor queue, so we
719                                  * don't have to do that here.
720                                  */
721                                 chg_pgno = PGNO_INVALID;
722                         }
723                 } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
724                         hcp->dndx = 0;                          /* Case 2 */
725                         hcp->dpgno = PGNO(hcp->dpagep);
726                         if (ppgno == PGNO_INVALID)
727                                 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
728                                     H_DATAINDEX(hcp->bndx))),
729                                     &hcp->dpgno, sizeof(db_pgno_t));
730                         F_SET(hcp, H_DELETED);
731                 } else                                          /* Case 1 */
732                         F_SET(hcp, H_DELETED);
733                 if (chg_pgno != PGNO_INVALID)
734                         __ham_c_update(hcp, chg_pgno, 0, 0, 1);
735         } else if (F_ISSET(hcp, H_ISDUP)) {                     /* on page */
736                 if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
737                     LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
738                         ret = __ham_del_pair(hashp, hcp, 1);
739                 else {
740                         DBT repldbt;
741
742                         repldbt.flags = 0;
743                         F_SET(&repldbt, DB_DBT_PARTIAL);
744                         repldbt.doff = hcp->dup_off;
745                         repldbt.dlen = DUP_SIZE(hcp->dup_len);
746                         repldbt.size = 0;
747                         ret = __ham_replpair(hashp, hcp, &repldbt, 0);
748                         hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
749                         F_SET(hcp, H_DELETED);
750                         __ham_c_update(hcp, hcp->pgno,
751                             DUP_SIZE(hcp->dup_len), 0, 1);
752                 }
753
754         } else
755                 /* Not a duplicate */
756 normal:         ret = __ham_del_pair(hashp, hcp, 1);
757
758 out:    if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
759                 t_ret = ret;
760         if (ret != 0)
761                 *hcp = save_curs;
762         RELEASE_META(hashp->dbp, hashp);
763         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
764                 __db_puthandle(ldbp);
765         return (ret);
766 }
767
768 static int
769 __ham_c_get(cursor, key, data, flags)
770         DBC *cursor;
771         DBT *key;
772         DBT *data;
773         int flags;
774 {
775         DB *ldbp;
776         HTAB *hashp;
777         HASH_CURSOR *hcp, save_curs;
778         int get_key, ret, t_ret;
779
780         DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
781             flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
782             NULL, flags);
783         ldbp = cursor->dbp;
784         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
785             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
786                 return (ret);
787         hashp = (HTAB *)(ldbp->internal);
788         hcp = (HASH_CURSOR *)cursor->internal;
789         save_curs = *hcp;
790         if ((ret =
791             __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
792                 return (ret);
793
794         SET_LOCKER(hashp->dbp, cursor->txn);
795         GET_META(hashp->dbp, hashp);
796         hashp->hash_accesses++;
797
798         hcp->seek_size = 0;
799
800         ret = 0;
801         get_key = 1;
802         switch (flags) {
803         case DB_PREV:
804                 if (hcp->bucket != BUCKET_INVALID) {
805                         ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
806                         break;
807                 }
808                 /* FALL THROUGH */
809         case DB_LAST:
810                 ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
811                 break;
812         case DB_FIRST:
813                 ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
814                 break;
815         case DB_NEXT:
816                 if (hcp->bucket == BUCKET_INVALID)
817                         hcp->bucket = 0;
818                 ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
819                 break;
820         case DB_SET:
821         case DB_SET_RANGE:
822                 ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
823                 get_key = 0;
824                 break;
825         case DB_CURRENT:
826                 if (F_ISSET(hcp, H_DELETED)) {
827                         ret = DB_KEYEMPTY;
828                         goto out;
829                 }
830
831                 ret = __ham_item(hashp, hcp, DB_LOCK_READ);
832                 break;
833         }
834
835         /*
836          * Must always enter this loop to do error handling and
837          * check for big key/data pair.
838          */
839         while (1) {
840                 if (ret != 0 && ret != DB_NOTFOUND)
841                         goto out1;
842                 else if (F_ISSET(hcp, H_OK)) {
843                         /* Get the key. */
844                         if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
845                             H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
846                             &hcp->big_keylen)) != 0)
847                                 goto out1;
848
849                         ret = __ham_dup_return(hashp, hcp, data, flags);
850                         break;
851                 } else if (!F_ISSET(hcp, H_NOMORE)) {
852                         abort();
853                         break;
854                 }
855
856                 /*
857                  * Ran out of entries in a bucket; change buckets.
858                  */
859                 switch (flags) {
860                         case DB_LAST:
861                         case DB_PREV:
862                                 ret = __ham_item_done(hashp, hcp, 0);
863                                 if (hcp->bucket == 0) {
864                                         ret = DB_NOTFOUND;
865                                         goto out1;
866                                 }
867                                 hcp->bucket--;
868                                 hcp->bndx = NDX_INVALID;
869                                 if (ret == 0)
870                                         ret = __ham_item_prev(hashp,
871                                             hcp, DB_LOCK_READ);
872                                 break;
873                         case DB_FIRST:
874                         case DB_NEXT:
875                                 ret = __ham_item_done(hashp, hcp, 0);
876                                 hcp->bndx = NDX_INVALID;
877                                 hcp->bucket++;
878                                 hcp->pgno = PGNO_INVALID;
879                                 hcp->pagep = NULL;
880                                 if (hcp->bucket > hashp->hdr->max_bucket) {
881                                         ret = DB_NOTFOUND;
882                                         goto out1;
883                                 }
884                                 if (ret == 0)
885                                         ret = __ham_item_next(hashp,
886                                             hcp, DB_LOCK_READ);
887                                 break;
888                         case DB_SET:
889                         case DB_SET_RANGE:
890                                 /* Key not found. */
891                                 ret = DB_NOTFOUND;
892                                 goto out1;
893                 }
894         }
895 out1:   if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
896                 t_ret = ret;
897 out:    if (ret)
898                 *hcp = save_curs;
899         RELEASE_META(hashp->dbp, hashp);
900         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
901                 __db_puthandle(ldbp);
902         return (ret);
903 }
904
905 static int
906 __ham_c_put(cursor, key, data, flags)
907         DBC *cursor;
908         DBT *key;
909         DBT *data;
910         int flags;
911 {
912         DB *ldbp;
913         HTAB *hashp;
914         HASH_CURSOR *hcp, save_curs;
915         int ret, t_ret;
916         u_int32_t nbytes;
917
918         DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
919             flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
920             NULL, flags);
921         ldbp = cursor->dbp;
922         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
923             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
924                 return (ret);
925         hashp = (HTAB *)(ldbp->internal);
926         hcp = (HASH_CURSOR *)cursor->internal;
927         save_curs = *hcp;
928
929         if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
930             F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
931                 return (ret);
932         if (F_ISSET(hcp, H_DELETED))
933                 return (DB_NOTFOUND);
934
935         SET_LOCKER(hashp->dbp, cursor->txn);
936         GET_META(hashp->dbp, hashp);
937         ret = 0;
938
939         switch (flags) {
940         case DB_KEYLAST:
941         case DB_KEYFIRST:
942                 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
943                     HKEYDATA_PSIZE(key->size)) +
944                     (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
945                     HKEYDATA_PSIZE(data->size));
946                 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
947                 break;
948         case DB_BEFORE:
949         case DB_AFTER:
950         case DB_CURRENT:
951                 ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
952                 break;
953         }
954
955         if (ret == 0) {
956                 if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
957                         ret = __ham_overwrite(hashp, hcp, data);
958                 else
959                         ret = __ham_add_dup(hashp, hcp, data, flags);
960         }
961
962         if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
963                 ret = __ham_expand_table(hashp);
964                 F_CLR(hcp, H_EXPAND);
965         }
966
967         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
968                 ret = t_ret;
969         if (ret != 0)
970                 *hcp = save_curs;
971         RELEASE_META(hashp->dbp, hashp);
972         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
973                 __db_puthandle(ldbp);
974         return (ret);
975 }
976
977 /********************************* UTILITIES ************************/
978
979 /*
980  * __ham_expand_table --
981  *
982  * PUBLIC: int __ham_expand_table __P((HTAB *));
983  */
984 int
985 __ham_expand_table(hashp)
986         HTAB *hashp;
987 {
988         DB_LSN new_lsn;
989         u_int32_t old_bucket, new_bucket, spare_ndx;
990         int ret;
991
992         ret = 0;
993         DIRTY_META(hashp, ret);
994         if (ret)
995                 return (ret);
996
997         /*
998          * If the split point is about to increase, make sure that we
999          * have enough extra pages.  The calculation here is weird.
1000          * We'd like to do this after we've upped max_bucket, but it's
1001          * too late then because we've logged the meta-data split.  What
1002          * we'll do between then and now is increment max bucket and then
1003          * see what the log of one greater than that is; here we have to
1004          * look at the log of max + 2.  VERY NASTY STUFF.
1005          */
1006         if (__db_log2(hashp->hdr->max_bucket + 2) > hashp->hdr->ovfl_point) {
1007                 /*
1008                  * We are about to shift the split point.  Make sure that
1009                  * if the next doubling is going to be big (more than 8
1010                  * pages), we have some extra pages around.
1011                  */
1012                 if (hashp->hdr->max_bucket + 1 >= 8 &&
1013                     hashp->hdr->spares[hashp->hdr->ovfl_point] <
1014                     hashp->hdr->spares[hashp->hdr->ovfl_point - 1] +
1015                     hashp->hdr->ovfl_point + 1)
1016                         __ham_init_ovflpages(hashp);
1017         }
1018
1019         /* Now we can log the meta-data split. */
1020         if (DB_LOGGING(hashp->dbp)) {
1021                 if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
1022                     (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1023                     hashp->dbp->log_fileid,
1024                     hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
1025                     hashp->hdr->spares[hashp->hdr->ovfl_point],
1026                     &hashp->hdr->lsn)) != 0)
1027                         return (ret);
1028
1029                 hashp->hdr->lsn = new_lsn;
1030         }
1031
1032         hashp->hash_expansions++;
1033         new_bucket = ++hashp->hdr->max_bucket;
1034         old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
1035
1036         /*
1037          * If the split point is increasing, copy the current contents
1038          * of the spare split bucket to the next bucket.
1039          */
1040         spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
1041         if (spare_ndx > hashp->hdr->ovfl_point) {
1042                 hashp->hdr->spares[spare_ndx] =
1043                     hashp->hdr->spares[hashp->hdr->ovfl_point];
1044                 hashp->hdr->ovfl_point = spare_ndx;
1045         }
1046
1047         if (new_bucket > hashp->hdr->high_mask) {
1048                 /* Starting a new doubling */
1049                 hashp->hdr->low_mask = hashp->hdr->high_mask;
1050                 hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
1051         }
1052
1053         if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
1054                 __db_err(hashp->dbp->dbenv,
1055                     "hash: Cannot allocate new bucket.  Pages exhausted.");
1056                 return (ENOSPC);
1057         }
1058
1059         /* Relocate records to the new bucket */
1060         return (__ham_split_page(hashp, old_bucket, new_bucket));
1061 }
1062
1063 /*
1064  * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1065  */
1066 u_int32_t
1067 __ham_call_hash(hashp, k, len)
1068         HTAB *hashp;
1069         u_int8_t *k;
1070         int32_t len;
1071 {
1072         u_int32_t n, bucket;
1073
1074         n = (u_int32_t)hashp->hash(k, len);
1075         bucket = n & hashp->hdr->high_mask;
1076         if (bucket > hashp->hdr->max_bucket)
1077                 bucket = bucket & hashp->hdr->low_mask;
1078         return (bucket);
1079 }
1080
1081 /*
1082  * Check for duplicates, and call __db_ret appropriately.  Release
1083  * everything held by the cursor.
1084  */
1085 static int
1086 __ham_dup_return(hashp, hcp, val, flags)
1087         HTAB *hashp;
1088         HASH_CURSOR *hcp;
1089         DBT *val;
1090         int flags;
1091 {
1092         PAGE *pp;
1093         DBT *myval, tmp_val;
1094         db_indx_t ndx;
1095         db_pgno_t pgno;
1096         u_int8_t *hk, type;
1097         int indx, ret;
1098         db_indx_t len;
1099
1100         /* Check for duplicate and return the first one. */
1101         ndx = H_DATAINDEX(hcp->bndx);
1102         type = HPAGE_TYPE(hcp->pagep, ndx);
1103         pp = hcp->pagep;
1104         myval = val;
1105
1106         /*
1107          * There are 3 cases:
1108          * 1. We are not in duplicate, simply call db_ret.
1109          * 2. We are looking at keys and stumbled onto a duplicate.
1110          * 3. We are in the middle of a duplicate set. (ISDUP set)
1111          */
1112
1113         /*
1114          * Here we check for the case where we just stumbled onto a
1115          * duplicate.  In this case, we do initialization and then
1116          * let the normal duplicate code handle it.
1117          */
1118         if (!F_ISSET(hcp, H_ISDUP))
1119                 if (type == H_DUPLICATE) {
1120                         F_SET(hcp, H_ISDUP);
1121                         hcp->dup_tlen = LEN_HDATA(hcp->pagep,
1122                             hashp->hdr->pagesize, hcp->bndx);
1123                         hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1124                         if (flags == DB_LAST || flags == DB_PREV) {
1125                                 hcp->dndx = 0;
1126                                 hcp->dup_off = 0;
1127                                 do {
1128                                         memcpy(&len,
1129                                             HKEYDATA_DATA(hk) + hcp->dup_off,
1130                                             sizeof(db_indx_t));
1131                                         hcp->dup_off += DUP_SIZE(len);
1132                                         hcp->dndx++;
1133                                 } while (hcp->dup_off < hcp->dup_tlen);
1134                                 hcp->dup_off -= DUP_SIZE(len);
1135                                 hcp->dndx--;
1136                         } else {
1137                                 memcpy(&len,
1138                                     HKEYDATA_DATA(hk), sizeof(db_indx_t));
1139                                 hcp->dup_off = 0;
1140                                 hcp->dndx = 0;
1141                         }
1142                         hcp->dup_len = len;
1143                 } else if (type == H_OFFDUP) {
1144                         F_SET(hcp, H_ISDUP);
1145                         memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
1146                             sizeof(db_pgno_t));
1147                         if (flags == DB_LAST || flags == DB_PREV) {
1148                                 indx = (int)hcp->dndx;
1149                                 if ((ret = __db_dend(hashp->dbp,
1150                                     pgno, &hcp->dpagep)) != 0)
1151                                         return (ret);
1152                                 hcp->dpgno = PGNO(hcp->dpagep);
1153                                 hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
1154                         } else if ((ret = __ham_next_cpage(hashp,
1155                             hcp, pgno, 0, H_ISDUP)) != 0)
1156                                 return (ret);
1157                 }
1158
1159
1160         /*
1161          * Now, everything is initialized, grab a duplicate if
1162          * necessary.
1163          */
1164         if (F_ISSET(hcp, H_ISDUP))
1165                 if (hcp->dpgno != PGNO_INVALID) {
1166                         pp = hcp->dpagep;
1167                         ndx = hcp->dndx;
1168                 } else {
1169                         /*
1170                          * Copy the DBT in case we are retrieving into
1171                          * user memory and we need the parameters for
1172                          * it.
1173                          */
1174                         memcpy(&tmp_val, val, sizeof(*val));
1175                         F_SET(&tmp_val, DB_DBT_PARTIAL);
1176                         tmp_val.dlen = hcp->dup_len;
1177                         tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1178                         myval = &tmp_val;
1179                 }
1180
1181
1182         /*
1183          * Finally, if we had a duplicate, pp, ndx, and myval should be
1184          * set appropriately.
1185          */
1186         if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
1187             &hcp->big_datalen)) != 0)
1188                 return (ret);
1189
1190         /*
1191          * In case we sent a temporary off to db_ret, set the real
1192          * return values.
1193          */
1194         val->data = myval->data;
1195         val->size = myval->size;
1196
1197         return (0);
1198 }
1199
1200 static int
1201 __ham_overwrite(hashp, hcp, nval)
1202         HTAB *hashp;
1203         HASH_CURSOR *hcp;
1204         DBT *nval;
1205 {
1206         DBT *myval, tmp_val;
1207         u_int8_t *hk;
1208
1209         if (F_ISSET(hashp->dbp, DB_AM_DUP))
1210                 return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
1211         else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1212                 /* Put/overwrite */
1213                 memcpy(&tmp_val, nval, sizeof(*nval));
1214                 F_SET(&tmp_val, DB_DBT_PARTIAL);
1215                 tmp_val.doff = 0;
1216                 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1217                 if (HPAGE_PTYPE(hk) == H_OFFPAGE)
1218                         memcpy(&tmp_val.dlen,
1219                             HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1220                 else
1221                         tmp_val.dlen = LEN_HDATA(hcp->pagep,
1222                             hashp->hdr->pagesize,hcp->bndx);
1223                 myval = &tmp_val;
1224         } else /* Regular partial put */
1225                 myval = nval;
1226
1227         return (__ham_replpair(hashp, hcp, myval, 0));
1228 }
1229
1230 /*
1231  * Given a key and a cursor, sets the cursor to the page/ndx on which
1232  * the key resides.  If the key is found, the cursor H_OK flag is set
1233  * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1234  * If the key is not found, the H_OK flag is not set.  If the sought
1235  * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1236  * are set indicating where an add might take place.  If it is 0,
1237  * non of the cursor pointer field are valid.
1238  */
1239 static int
1240 __ham_lookup(hashp, hcp, key, sought, mode)
1241         HTAB *hashp;
1242         HASH_CURSOR *hcp;
1243         const DBT *key;
1244         u_int32_t sought;
1245         db_lockmode_t mode;
1246 {
1247         db_pgno_t pgno;
1248         u_int32_t tlen;
1249         int match, ret, t_ret;
1250         u_int8_t *hk;
1251
1252         /*
1253          * Set up cursor so that we're looking for space to add an item
1254          * as we cycle through the pages looking for the key.
1255          */
1256         if ((ret = __ham_item_reset(hashp, hcp)) != 0)
1257                 return (ret);
1258         hcp->seek_size = sought;
1259
1260         hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
1261         while (1) {
1262                 if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
1263                         return (ret);
1264
1265                 if (F_ISSET(hcp, H_NOMORE))
1266                         break;
1267
1268                 hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1269                 switch (HPAGE_PTYPE(hk)) {
1270                 case H_OFFPAGE:
1271                         memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1272                         if (tlen == key->size) {
1273                                 memcpy(&pgno,
1274                                     HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
1275                                 match = __db_moff(hashp->dbp, key, pgno);
1276                                 if (match == 0) {
1277                                         F_SET(hcp, H_OK);
1278                                         return (0);
1279                                 }
1280                         }
1281                         break;
1282                 case H_KEYDATA:
1283                         if (key->size == LEN_HKEY(hcp->pagep,
1284                             hashp->hdr->pagesize, hcp->bndx) &&
1285                             memcmp(key->data,
1286                             HKEYDATA_DATA(hk), key->size) == 0) {
1287                                 F_SET(hcp, H_OK);
1288                                 return (0);
1289                         }
1290                         break;
1291                 case H_DUPLICATE:
1292                 case H_OFFDUP:
1293                         /*
1294                          * These are errors because keys are never
1295                          * duplicated, only data items are.
1296                          */
1297                         return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
1298                 }
1299                 hashp->hash_collisions++;
1300         }
1301
1302         /*
1303          * Item was not found, adjust cursor properly.
1304          */
1305
1306         if (sought != 0)
1307                 return (ret);
1308
1309         if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
1310                 ret = t_ret;
1311         return (ret);
1312 }
1313
1314 /*
1315  * Initialize a dbt using some possibly already allocated storage
1316  * for items.
1317  * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1318  */
1319 int
1320 __ham_init_dbt(dbt, size, bufp, sizep)
1321         DBT *dbt;
1322         u_int32_t size;
1323         void **bufp;
1324         u_int32_t *sizep;
1325 {
1326         memset(dbt, 0, sizeof(*dbt));
1327         if (*sizep < size) {
1328                 if ((*bufp = (void *)(*bufp == NULL ?
1329                     __db_malloc(size) : __db_realloc(*bufp, size))) == NULL) {
1330                         *sizep = 0;
1331                         return (ENOMEM);
1332                 }
1333                 *sizep = size;
1334         }
1335         dbt->data = *bufp;
1336         dbt->size = size;
1337         return (0);
1338 }
1339
1340 /*
1341  * Adjust the cursor after an insert or delete.  The cursor passed is
1342  * the one that was operated upon; we just need to check any of the
1343  * others.
1344  *
1345  * len indicates the length of the item added/deleted
1346  * add indicates if the item indicated by the cursor has just been
1347  * added (add == 1) or deleted (add == 0).
1348  * dup indicates if the addition occurred into a duplicate set.
1349  *
1350  * PUBLIC: void __ham_c_update
1351  * PUBLIC:    __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1352  */
1353 void
1354 __ham_c_update(hcp, chg_pgno, len, add, is_dup)
1355         HASH_CURSOR *hcp;
1356         db_pgno_t chg_pgno;
1357         u_int32_t len;
1358         int add, is_dup;
1359 {
1360         DBC *cp;
1361         HTAB *hp;
1362         HASH_CURSOR *lcp;
1363         int page_deleted;
1364
1365         /*
1366          * Regular adds are always at the end of a given page, so we never
1367          * have to adjust anyone's cursor after a regular add.
1368          */
1369         if (!is_dup && add)
1370                 return;
1371
1372         /*
1373          * Determine if a page was deleted.    If this is a regular update
1374          * (i.e., not is_dup) then the deleted page's number will be that in
1375          * chg_pgno, and the pgno in the cursor will be different.  If this
1376          * was an onpage-duplicate, then the same conditions apply.  If this
1377          * was an off-page duplicate, then we need to verify if hcp->dpgno
1378          * is the same (no delete) or different (delete) than chg_pgno.
1379          */
1380         if (!is_dup || hcp->dpgno == PGNO_INVALID)
1381                 page_deleted =
1382                     chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno;
1383         else
1384                 page_deleted =
1385                     chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno;
1386
1387         hp = hcp->db_cursor->dbp->master->internal;
1388         DB_THREAD_LOCK(hp->dbp);
1389
1390         for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
1391             cp = TAILQ_NEXT(cp, links)) {
1392                 if (cp->internal == hcp)
1393                         continue;
1394
1395                 lcp = (HASH_CURSOR *)cp->internal;
1396
1397                 if (!is_dup && lcp->pgno != chg_pgno)
1398                         continue;
1399
1400                 if (is_dup) {
1401                         if (F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1402                                 continue;
1403                         if (!F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1404                                 continue;
1405                 }
1406
1407                 if (page_deleted) {
1408                         if (is_dup) {
1409                                 lcp->dpgno = hcp->dpgno;
1410                                 lcp->dndx = hcp->dndx;
1411                         } else {
1412                                 lcp->pgno = hcp->pgno;
1413                                 lcp->bndx = hcp->bndx;
1414                                 lcp->bucket = hcp->bucket;
1415                         }
1416                         F_CLR(lcp, H_ISDUP);
1417                         continue;
1418                 }
1419
1420                 if (!is_dup && lcp->bndx > hcp->bndx)
1421                         lcp->bndx--;
1422                 else if (!is_dup && lcp->bndx == hcp->bndx)
1423                         F_SET(lcp, H_DELETED);
1424                 else if (is_dup && lcp->bndx == hcp->bndx) {
1425                         /* Assign dpgno in case there was page conversion. */
1426                         lcp->dpgno = hcp->dpgno;
1427                         if (add && lcp->dndx >= hcp->dndx )
1428                                 lcp->dndx++;
1429                         else if (!add && lcp->dndx > hcp->dndx)
1430                                 lcp->dndx--;
1431                         else if (!add && lcp->dndx == hcp->dndx)
1432                                 F_SET(lcp, H_DELETED);
1433
1434                         /* Now adjust on-page information. */
1435                         if (lcp->dpgno == PGNO_INVALID)
1436                                 if (add) {
1437                                         lcp->dup_tlen += len;
1438                                         if (lcp->dndx > hcp->dndx)
1439                                                 lcp->dup_off += len;
1440                                 } else {
1441                                         lcp->dup_tlen -= len;
1442                                         if (lcp->dndx > hcp->dndx)
1443                                                 lcp->dup_off -= len;
1444                                 }
1445                 }
1446         }
1447         DB_THREAD_UNLOCK(hp->dbp);
1448 }
1449
1450 /*
1451  * __ham_hdup --
1452  *      This function gets called when we create a duplicate handle for a
1453  *      threaded DB.  It should create the private part of the DB structure.
1454  * PUBLIC: int  __ham_hdup __P((DB *, DB *));
1455  */
1456 int
1457 __ham_hdup(orig, new)
1458         DB *orig, *new;
1459 {
1460         HTAB *hashp;
1461         DBC *curs;
1462         int ret;
1463
1464         if ((hashp = (HTAB *)__db_malloc(sizeof(HTAB))) == NULL)
1465                 return (ENOMEM);
1466
1467         new->internal = hashp;
1468
1469         hashp->dbp = new;
1470         hashp->hlock = 0;
1471         hashp->hdr = NULL;
1472         hashp->hash = ((HTAB *)orig->internal)->hash;
1473         if ((hashp->split_buf = (PAGE *)__db_malloc(orig->pgsize)) == NULL)
1474                 return (ENOMEM);
1475         hashp->local_errno = 0;
1476         hashp->hash_accesses = 0;
1477         hashp->hash_collisions = 0;
1478         hashp->hash_expansions = 0;
1479         hashp->hash_overflows = 0;
1480         hashp->hash_bigpages = 0;
1481         /* Initialize the cursor queue. */
1482         ret = __ham_c_init(new, NULL, &curs);
1483         TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);
1484         return (ret);
1485 }