6d8c40057d9fef63f2661ec04007feab2815ae72
[kopensolaris-gnu/glibc.git] / db2 / hash / hash.c
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997
5  *      Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1990, 1993, 1994
9  *      Margo Seltzer.  All rights reserved.
10  */
11 /*
12  * Copyright (c) 1990, 1993, 1994
13  *      The Regents of the University of California.  All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Margo Seltzer.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  * 3. All advertising materials mentioning features or use of this software
27  *    must display the following acknowledgement:
28  *      This product includes software developed by the University of
29  *      California, Berkeley and its contributors.
30  * 4. Neither the name of the University nor the names of its contributors
31  *    may be used to endorse or promote products derived from this software
32  *    without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  */
46
47 #include "config.h"
48
49 #ifndef lint
50 static const char sccsid[] = "@(#)hash.c        10.25 (Sleepycat) 8/24/97";
51 #endif /* not lint */
52
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
55 #include <sys/stat.h>
56
57 #include <errno.h>
58 #include <fcntl.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63 #endif
64
65 #include "shqueue.h"
66 #include "db_int.h"
67 #include "db_page.h"
68 #include "db_am.h"
69 #include "db_ext.h"
70 #include "hash.h"
71 #include "log.h"
72
73 static int  __ham_c_close __P((DBC *));
74 static int  __ham_c_del __P((DBC *, int));
75 static int  __ham_c_get __P((DBC *, DBT *, DBT *, int));
76 static int  __ham_c_put __P((DBC *, DBT *, DBT *, int));
77 static int  __ham_c_init __P((DB *, DB_TXN *, DBC **));
78 static int  __ham_cursor __P((DB *, DB_TXN *, DBC **));
79 static int  __ham_delete __P((DB *, DB_TXN *, DBT *, int));
80 static int  __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
81 static int  __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
82 static void __ham_init_htab __P((HTAB *));
83 static int  __ham_lookup __P((HTAB *,
84                 HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
85 static int  __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
86 static int  __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
87 static int  __ham_sync __P((DB *, int));
88
89 /************************** INTERFACE ROUTINES ***************************/
90 /* OPEN/CLOSE */
91
92 /*
93  * __ham_open --
94  *
95  * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
96  */
97 int
98 __ham_open(dbp, dbinfo)
99         DB *dbp;
100         DB_INFO *dbinfo;
101 {
102         DB_ENV *dbenv;
103         DBC *curs;
104         HTAB *hashp;
105         int file_existed, ret;
106
107         dbenv = dbp->dbenv;
108
109         if ((hashp = (HTAB *)calloc(1, sizeof(HTAB))) == NULL)
110                 return (ENOMEM);
111         hashp->dbp = dbp;
112
113         /* Set the hash function if specified by the user. */
114         if (dbinfo != NULL && dbinfo->h_hash != NULL)
115                 hashp->hash = dbinfo->h_hash;
116
117         /*
118          * Initialize the remaining fields of the dbp.  The type, close and
119          * fd functions are all set in db_open.
120          */
121         dbp->internal = hashp;
122         dbp->cursor = __ham_cursor;
123         dbp->del = __ham_delete;
124         dbp->get = __ham_get;
125         dbp->put = __ham_put;
126         dbp->sync = __ham_sync;
127
128         /* If locking is turned on, lock the meta data page. */
129         if (F_ISSET(dbp, DB_AM_LOCKING)) {
130                 dbp->lock.pgno = BUCKET_INVALID;
131                 if ((ret = lock_get(dbenv->lk_info, dbp->locker,
132                     0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
133                         if (ret < 0)
134                                 ret = EAGAIN;
135                         goto out;
136                 }
137         }
138
139         /*
140          * Now, we can try to read the meta-data page and figure out
141          * if we set up locking and get the meta-data page properly.
142          * If this is a new file, initialize it, and put it back dirty.
143          */
144         if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
145                 goto out;
146
147         /* Initialize the hashp structure */
148         if (hashp->hdr->magic == DB_HASHMAGIC) {
149                 file_existed = 1;
150                 /* File exists, verify the data in the header. */
151                 if (hashp->hash == NULL)
152                         hashp->hash =
153                             hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
154                 if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
155                     hashp->hdr->h_charkey) {
156                         __db_err(hashp->dbp->dbenv,
157                             "hash: incompatible hash function");
158                         ret = EINVAL;
159                         goto out;
160                 }
161                 if (F_ISSET(hashp->hdr, DB_HASH_DUP))
162                         F_SET(dbp, DB_AM_DUP);
163         } else {
164                 /*
165                  * File does not exist, we must initialize the header.  If
166                  * locking is enabled that means getting a write lock first.
167                  */
168                 file_existed = 0;
169                 if (F_ISSET(dbp, DB_AM_LOCKING) &&
170                     ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
171                     (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
172                         &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
173                         if (ret < 0)
174                                 ret = EAGAIN;
175                         goto out;
176                 }
177
178                 hashp->hdr->nelem = dbinfo != NULL ? dbinfo->h_nelem : 0;
179                 hashp->hdr->ffactor =
180                     dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
181                 __ham_init_htab(hashp);
182                 if (F_ISSET(dbp, DB_AM_DUP))
183                         F_SET(hashp->hdr, DB_HASH_DUP);
184                 if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
185                         goto out;
186         }
187
188         /* Initialize the default cursor. */
189         __ham_c_init(dbp, NULL, &curs);
190         TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
191
192         /* Allocate memory for our split buffer. */
193         if ((hashp->split_buf = (PAGE *)malloc(dbp->pgsize)) == NULL) {
194                 ret = ENOMEM;
195                 goto out;
196         }
197
198 #ifdef NO_STATISTICS_FOR_DB_ERR
199         __db_err(dbp->dbenv,
200             "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
201             "TABLE POINTER   ", (long)hashp,
202             "BUCKET SIZE     ", (long)hashp->hdr->pagesize,
203             "FILL FACTOR     ", (long)hashp->hdr->ffactor,
204             "MAX BUCKET      ", (long)hashp->hdr->max_bucket,
205             "OVFL POINT      ", (long)hashp->hdr->ovfl_point,
206             "LAST FREED      ", (long)hashp->hdr->last_freed,
207             "HIGH MASK       ", (long)hashp->hdr->high_mask,
208             "LOW  MASK       ", (long)hashp->hdr->low_mask,
209             "NELEM           ", (long)hashp->hdr->nelem,
210             "FLAGS           ", (long)hashp->hdr->flags);
211 #endif
212
213         /* Release the meta data page */
214         (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
215         if (F_ISSET(dbp, DB_AM_LOCKING) &&
216             (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
217                 if (ret < 0)
218                         ret = EAGAIN;
219                 goto out;
220         }
221
222         hashp->hlock = 0;
223         hashp->hdr = NULL;
224         /* Sync the file so that we know that the meta data goes to disk. */
225         if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
226                 goto out;
227         return (0);
228
229 out:    (void)__ham_close(dbp);
230         return (ret);
231 }
232
233 /*
234  * PUBLIC: int  __ham_close __P((DB *));
235  */
236 int
237 __ham_close(dbp)
238         DB *dbp;
239 {
240         HTAB *hashp;
241         int ret, t_ret;
242
243         DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
244         hashp = (HTAB *)dbp->internal;
245         ret = 0;
246
247         /* Free the split page. */
248         if (hashp->split_buf)
249                 FREE(hashp->split_buf, dbp->pgsize);
250
251         if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
252             (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
253                 ret = t_ret;
254         if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
255             hashp->hlock)) != 0 && ret == 0)
256                 ret = t_ret;
257
258         FREE(hashp, sizeof(HTAB));
259         dbp->internal = NULL;
260         return (ret);
261 }
262
263 /************************** LOCAL CREATION ROUTINES **********************/
264 /*
265  * Returns 0 on No Error
266  */
267 static void
268 __ham_init_htab(hashp)
269         HTAB *hashp;
270 {
271         u_int32_t nelem;
272         int32_t l2, nbuckets;
273
274         nelem = hashp->hdr->nelem;
275         hashp->hdr->pagesize = hashp->dbp->pgsize;
276         ZERO_LSN(hashp->hdr->lsn);
277         hashp->hdr->magic = DB_HASHMAGIC;
278         hashp->hdr->version = DB_HASHVERSION;
279         if (hashp->hash == NULL)
280                 hashp->hash =
281                     hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
282         hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
283         if (nelem != 0 && hashp->hdr->ffactor != 0) {
284                 nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
285                 l2 = __db_log2(nelem > 2 ? nelem : 2);
286         } else
287                 l2 = 2;
288
289         nbuckets = 1 << l2;
290
291         hashp->hdr->spares[l2] = 0;
292         hashp->hdr->spares[l2 + 1] = 0;
293         hashp->hdr->ovfl_point = l2;
294         hashp->hdr->last_freed = PGNO_INVALID;
295
296         hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
297         hashp->hdr->low_mask = (nbuckets >> 1) - 1;
298         memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
299 }
300
301 /********************** DESTROY/CLOSE ROUTINES ************************/
302
303
304 /*
305  * Write modified pages to disk
306  *
307  * Returns:
308  *       0 == OK
309  *      -1 ERROR
310  */
311 static int
312 __ham_sync(dbp, flags)
313         DB *dbp;
314         int flags;
315 {
316         int ret;
317
318         DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
319         if ((ret = __db_syncchk(dbp, flags)) != 0)
320                 return (ret);
321         if (F_ISSET(dbp, DB_AM_RDONLY))
322                 return (0);
323
324         if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
325                 ret = 0;
326
327         return (ret);
328 }
329
330 /*******************************SEARCH ROUTINES *****************************/
331 /*
332  * All the access routines return
333  *
334  * Returns:
335  *       0 on SUCCESS
336  *       1 to indicate an external ERROR (i.e. key not found, etc)
337  *      -1 to indicate an internal ERROR (i.e. out of memory, etc)
338  */
339
340 static int
341 __ham_get(dbp, txn, key, data, flags)
342         DB *dbp;
343         DB_TXN *txn;
344         DBT *key;
345         DBT *data;
346         int flags;
347 {
348         DB *ldbp;
349         DBC *cp;
350         HTAB *hashp;
351         HASH_CURSOR *hcp;
352         int ret, t_ret;
353
354         DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
355         if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
356                 return (ret);
357
358         ldbp = dbp;
359         if (F_ISSET(dbp, DB_AM_THREAD) &&
360             (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
361                 return (ret);
362
363         hashp = (HTAB *)ldbp->internal;
364         SET_LOCKER(ldbp, txn);
365         GET_META(ldbp, hashp);
366         cp = TAILQ_FIRST(&ldbp->curs_queue);
367
368         hashp->hash_accesses++;
369         hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
370         if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0)
371                 if (F_ISSET(hcp, H_OK))
372                         ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
373                 else /* Key was not found */
374                         ret = DB_NOTFOUND;
375
376         if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
377                 ret = t_ret;
378         RELEASE_META(ldbp, hashp);
379         if (F_ISSET(dbp, DB_AM_THREAD))
380                 __db_puthandle(ldbp);
381         return (ret);
382 }
383
384 static int
385 __ham_put(dbp, txn, key, data, flags)
386         DB *dbp;
387         DB_TXN *txn;
388         DBT *key;
389         DBT *data;
390         int flags;
391 {
392         DB *ldbp;
393         HTAB *hashp;
394         HASH_CURSOR *hcp;
395         DBT tmp_val, *myval;
396         int ret, t_ret;
397         u_int32_t nbytes;
398
399         DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
400         if ((ret = __db_putchk(dbp, key, data,
401             flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
402                 return (ret);
403
404         ldbp = dbp;
405         if (F_ISSET(dbp, DB_AM_THREAD) &&
406             (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
407                 return (ret);
408
409         hashp = (HTAB *)ldbp->internal;
410         SET_LOCKER(ldbp, txn);
411         GET_META(ldbp, hashp);
412         hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
413
414         nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
415             HKEYDATA_PSIZE(key->size)) +
416             (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
417             HKEYDATA_PSIZE(data->size));
418
419         hashp->hash_accesses++;
420         ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
421
422         if (ret == DB_NOTFOUND) {
423                 ret = 0;
424                 if (hcp->seek_found_page != PGNO_INVALID &&
425                     hcp->seek_found_page != hcp->pgno) {
426                         if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
427                                 goto out;
428                         hcp->pgno = hcp->seek_found_page;
429                         hcp->bndx = NDX_INVALID;
430                 }
431
432                 if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
433                         /*
434                          * Doing a partial put, but the key does not exist
435                          * and we are not beginning the write at 0.  We
436                          * must create a data item padded up to doff and
437                          * then write the new bytes represented by val.
438                          */
439                         ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
440                             &hcp->big_data, &hcp->big_datalen);
441                         if (ret == 0) {
442                                 memset(tmp_val.data, 0, data->doff);
443                                 memcpy((u_int8_t *)tmp_val.data + data->doff,
444                                     data->data, data->size);
445                                 myval = &tmp_val;
446                         }
447                 } else
448                         myval = (DBT *)data;
449
450                 if (ret == 0)
451                         ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
452         } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
453                 if (flags == DB_NOOVERWRITE)
454                         ret = DB_KEYEXIST;
455                 else if (F_ISSET(ldbp, DB_AM_DUP))
456                         ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
457                 else
458                         ret = __ham_overwrite(hashp, hcp, data);
459         }
460
461         /* Free up all the cursor pages. */
462         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
463                 ret = t_ret;
464         /* Now check if we have to grow. */
465 out:    if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
466                 ret = __ham_expand_table(hashp);
467                 F_CLR(hcp, H_EXPAND);
468         }
469
470         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
471                 ret = t_ret;
472         RELEASE_META(ldbp, hashp);
473         if (F_ISSET(dbp, DB_AM_THREAD))
474                 __db_puthandle(ldbp);
475         return (ret);
476 }
477
478 static int
479 __ham_cursor(dbp, txnid, dbcp)
480         DB *dbp;
481         DB_TXN *txnid;
482         DBC **dbcp;
483 {
484         int ret;
485
486         DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
487         if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
488                 return (ret);
489
490         DB_THREAD_LOCK(dbp);
491         TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
492         DB_THREAD_UNLOCK(dbp);
493         return (ret);
494 }
495
496 static int
497 __ham_c_init(dbp, txnid, dbcp)
498         DB *dbp;
499         DB_TXN *txnid;
500         DBC **dbcp;
501 {
502         DBC *db_curs;
503         HASH_CURSOR *new_curs;
504
505         if ((db_curs = (DBC *)calloc(sizeof(DBC), 1)) == NULL)
506                 return (ENOMEM);
507
508         if ((new_curs =
509             (HASH_CURSOR *)calloc(sizeof(struct cursor_t), 1)) == NULL) {
510                 FREE(db_curs, sizeof(DBC));
511                 return (ENOMEM);
512         }
513
514         db_curs->internal = new_curs;
515         db_curs->c_close = __ham_c_close;
516         db_curs->c_del = __ham_c_del;
517         db_curs->c_get = __ham_c_get;
518         db_curs->c_put = __ham_c_put;
519         db_curs->txn = txnid;
520         db_curs->dbp = dbp;
521
522         new_curs->db_cursor = db_curs;
523         __ham_item_init(new_curs);
524
525         if (dbcp != NULL)
526                 *dbcp = db_curs;
527         return (0);
528 }
529
530 static int
531 __ham_delete(dbp, txn, key, flags)
532         DB *dbp;
533         DB_TXN *txn;
534         DBT *key;
535         int flags;
536 {
537         DB *ldbp;
538         HTAB *hashp;
539         HASH_CURSOR *hcp;
540         int ret, t_ret;
541
542         DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
543         if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
544                 return (ret);
545
546         ldbp = dbp;
547         if (F_ISSET(dbp, DB_AM_THREAD) &&
548             (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
549                 return (ret);
550         hashp = (HTAB *)ldbp->internal;
551         SET_LOCKER(ldbp, txn);
552         GET_META(ldbp, hashp);
553         hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
554
555         hashp->hash_accesses++;
556         if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
557                 if (F_ISSET(hcp, H_OK))
558                         ret = __ham_del_pair(hashp, hcp);
559                 else
560                         ret = DB_NOTFOUND;
561
562         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
563                 ret = t_ret;
564         RELEASE_META(ldbp, hashp);
565         if (F_ISSET(dbp, DB_AM_THREAD))
566                 __db_puthandle(ldbp);
567         return (ret);
568 }
569
570 /* ****************** CURSORS ********************************** */
571 static int
572 __ham_c_close(cursor)
573         DBC *cursor;
574 {
575         DB  *ldbp;
576         HTAB *hashp;
577         HASH_CURSOR *hcp;
578         int ret;
579
580         DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
581         /*
582          * If the pagep, dpagep, and lock fields of the cursor are all NULL,
583          * then there really isn't a need to get a handle here.  However,
584          * the normal case is that at least one of those fields is non-NULL,
585          * and putting those checks in here would couple the ham_item_done
586          * functionality with cursor close which would be pretty disgusting.
587          * Instead, we pay the overhead here of always getting the handle.
588          */
589         ldbp = cursor->dbp;
590         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
591             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
592                 return (ret);
593         hashp = (HTAB *)ldbp->internal;
594         hcp = (HASH_CURSOR *)cursor->internal;
595         ret = __ham_item_done(hashp, hcp, 0);
596
597         if (hcp->big_key)
598                 FREE(hcp->big_key, hcp->big_keylen);
599         if (hcp->big_data)
600                 FREE(hcp->big_data, hcp->big_datalen);
601
602         /*
603          * All cursors (except the default ones) are linked off the master.
604          * Therefore, when we close the cursor, we have to remove it from
605          * the master, not the local one.  When we are closing the file in
606          * its entirety, then we clear the THREAD bit and the master and
607          * local are identical, so we remove the correct one.
608          */
609         DB_THREAD_LOCK(cursor->dbp);
610         TAILQ_REMOVE(&cursor->dbp->curs_queue, cursor, links);
611         DB_THREAD_UNLOCK(cursor->dbp);
612
613         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
614                 __db_puthandle(ldbp);
615
616         FREE(hcp, sizeof(HASH_CURSOR));
617         FREE(cursor, sizeof(DBC));
618         return (ret);
619 }
620
621 static int
622 __ham_c_del(cursor, flags)
623         DBC *cursor;
624         int flags;
625 {
626         DB *ldbp;
627         HTAB *hashp;
628         HASH_CURSOR *hcp;
629         HASH_CURSOR save_curs;
630         db_pgno_t ppgno, chg_pgno;
631         int ret, t_ret;
632
633         DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
634         ldbp = cursor->dbp;
635         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
636             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
637                 return (ret);
638         hashp = (HTAB *)ldbp->internal;
639         hcp = (HASH_CURSOR *)cursor->internal;
640         save_curs = *hcp;
641         if ((ret = __db_cdelchk(ldbp, flags,
642             F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
643                 return (ret);
644         if (F_ISSET(hcp, H_DELETED))
645                 return (DB_NOTFOUND);
646
647         SET_LOCKER(hashp->dbp, cursor->txn);
648         GET_META(hashp->dbp, hashp);
649         hashp->hash_accesses++;
650         if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
651                 goto out;
652         if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
653                 ppgno = PREV_PGNO(hcp->dpagep);
654
655                 /* Remove item from duplicate page. */
656                 chg_pgno = hcp->dpgno;
657                 if ((ret = __db_drem(hashp->dbp,
658                     &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
659                         goto out;
660
661                 /*
662                  * There are 4 cases.
663                  * 1. We removed an item on a page, but nothing else changed.
664                  * 2. We removed the last item on a page, but there is a
665                  *    following page of duplicates.
666                  * 3. We removed the last item on a page, this page was the
667                  *    last page in a duplicate set, but there were dups before
668                  *    it.
669                  * 4. We removed the last item on a page, removing the last
670                  *    duplicate.
671                  * In case 1 hcp->dpagep is unchanged.
672                  * In case 2 hcp->dpagep comes back pointing to the next dup
673                  *     page.
674                  * In case 3 hcp->dpagep comes back NULL.
675                  * In case 4 hcp->dpagep comes back NULL.
676                  */
677                 if (hcp->dpagep == NULL) {
678                         if (ppgno != PGNO_INVALID) {            /* Case 3 */
679                                 hcp->dpgno = ppgno;
680                                 if ((ret = __ham_get_cpage(hashp, hcp,
681                                     DB_LOCK_READ)) != 0)
682                                         goto out;
683                                 hcp->dndx = NUM_ENT(hcp->dpagep);
684                                 F_SET(hcp, H_DELETED);
685                         } else {                                /* Case 4 */
686                                 ret = __ham_del_pair(hashp, hcp);
687                                 hcp->dpgno = PGNO_INVALID;
688                                 /*
689                                  * Delpair updated the cursor queue, so we
690                                  * don't have to do that here.
691                                  */
692                                 chg_pgno = PGNO_INVALID;
693                         }
694                 } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
695                         hcp->dndx = 0;                          /* Case 2 */
696                         hcp->dpgno = PGNO(hcp->dpagep);
697                         if (ppgno == PGNO_INVALID)
698                                 memcpy(P_ENTRY(hcp->pagep,
699                                     H_DATAINDEX(hcp->bndx)) +
700                                     SSZ(HOFFDUP, pgno), &hcp->dpgno,
701                                     sizeof(db_pgno_t));
702                         F_SET(hcp, H_DELETED);
703                 } else                                  /* Case 1 */
704                         F_SET(hcp, H_DELETED);
705                 if (chg_pgno != PGNO_INVALID)
706                         __ham_c_update(hashp, hcp, chg_pgno, 0, 0, 1);
707         } else if (F_ISSET(hcp, H_ISDUP)) {                     /* on page */
708                 if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
709                     LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
710                         ret = __ham_del_pair(hashp, hcp);
711                 else {
712                         DBT repldbt;
713
714                         repldbt.flags = 0;
715                         F_SET(&repldbt, DB_DBT_PARTIAL);
716                         repldbt.doff = hcp->dup_off;
717                         repldbt.dlen = DUP_SIZE(hcp->dup_len);
718                         repldbt.size = 0;
719                         ret = __ham_replpair(hashp, hcp, &repldbt, 0);
720                         hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
721                         __ham_c_update(hashp, hcp, hcp->pgno,
722                             DUP_SIZE(hcp->dup_len), 0, 1);
723                         F_SET(hcp, H_DELETED);
724                 }
725
726         } else
727                 /* Not a duplicate */
728                 ret = __ham_del_pair(hashp, hcp);
729
730 out:    if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
731                 t_ret = ret;
732         if (ret != 0)
733                 *hcp = save_curs;
734         RELEASE_META(hashp->dbp, hashp);
735         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
736                 __db_puthandle(ldbp);
737         return (ret);
738 }
739
740 static int
741 __ham_c_get(cursor, key, data, flags)
742         DBC *cursor;
743         DBT *key;
744         DBT *data;
745         int flags;
746 {
747         DB *ldbp;
748         HTAB *hashp;
749         HASH_CURSOR *hcp, save_curs;
750         int get_key, ret, t_ret;
751
752         DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
753             flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
754             NULL, flags);
755         ldbp = cursor->dbp;
756         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
757             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
758                 return (ret);
759         hashp = (HTAB *)(ldbp->internal);
760         hcp = (HASH_CURSOR *)cursor->internal;
761         save_curs = *hcp;
762         if ((ret =
763             __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
764                 return (ret);
765
766         SET_LOCKER(hashp->dbp, cursor->txn);
767         GET_META(hashp->dbp, hashp);
768         hashp->hash_accesses++;
769
770         hcp->seek_size = 0;
771
772         ret = 0;
773         get_key = 1;
774         switch (flags) {
775         case DB_PREV:
776                 if (hcp->bucket != BUCKET_INVALID) {
777                         ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
778                         break;
779                 }
780                 /* FALL THROUGH */
781         case DB_LAST:
782                 ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
783                 break;
784         case DB_FIRST:
785                 ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
786                 break;
787         case DB_NEXT:
788                 if (hcp->bucket == BUCKET_INVALID)
789                         hcp->bucket = 0;
790                 ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
791                 break;
792         case DB_SET:
793         case DB_SET_RANGE:
794                 ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
795                 get_key = 0;
796                 break;
797         case DB_CURRENT:
798                 if (F_ISSET(hcp, H_DELETED)) {
799                         ret = DB_KEYEMPTY;
800                         goto out;
801                 }
802
803                 ret = __ham_item(hashp, hcp, DB_LOCK_READ);
804                 break;
805         }
806
807         /*
808          * Must always enter this loop to do error handling and
809          * check for big key/data pair.
810          */
811         while (1) {
812                 if (ret != 0 && ret != DB_NOTFOUND)
813                         goto out1;
814                 else if (F_ISSET(hcp, H_OK)) {
815                         /* Get the key. */
816                         if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
817                             H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
818                             &hcp->big_keylen)) != 0)
819                                 goto out1;
820
821                         ret = __ham_dup_return(hashp, hcp, data, flags);
822                         break;
823                 } else if (!F_ISSET(hcp, H_NOMORE)) {
824                         abort();
825                         break;
826                 }
827
828                 /*
829                  * Ran out of entries in a bucket; change buckets.
830                  */
831                 switch (flags) {
832                         case DB_LAST:
833                         case DB_PREV:
834                                 ret = __ham_item_done(hashp, hcp, 0);
835                                 if (hcp->bucket == 0) {
836                                         ret = DB_NOTFOUND;
837                                         goto out1;
838                                 }
839                                 hcp->bucket--;
840                                 hcp->bndx = NDX_INVALID;
841                                 if (ret == 0)
842                                         ret = __ham_item_prev(hashp,
843                                             hcp, DB_LOCK_READ);
844                                 break;
845                         case DB_FIRST:
846                         case DB_NEXT:
847                                 ret = __ham_item_done(hashp, hcp, 0);
848                                 hcp->bndx = NDX_INVALID;
849                                 hcp->bucket++;
850                                 hcp->pgno = PGNO_INVALID;
851                                 hcp->pagep = NULL;
852                                 if (hcp->bucket > hashp->hdr->max_bucket) {
853                                         ret = DB_NOTFOUND;
854                                         goto out1;
855                                 }
856                                 if (ret == 0)
857                                         ret = __ham_item_next(hashp,
858                                             hcp, DB_LOCK_READ);
859                                 break;
860                         case DB_SET:
861                         case DB_SET_RANGE:
862                                 /* Key not found. */
863                                 ret = DB_NOTFOUND;
864                                 goto out1;
865                 }
866         }
867 out1:   if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
868                 t_ret = ret;
869 out:    if (ret)
870                 *hcp = save_curs;
871         RELEASE_META(hashp->dbp, hashp);
872         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
873                 __db_puthandle(ldbp);
874         return (ret);
875 }
876
877 static int
878 __ham_c_put(cursor, key, data, flags)
879         DBC *cursor;
880         DBT *key;
881         DBT *data;
882         int flags;
883 {
884         DB *ldbp;
885         HTAB *hashp;
886         HASH_CURSOR *hcp, save_curs;
887         int ret, t_ret;
888         u_int32_t nbytes;
889
890         DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
891             flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
892             NULL, flags);
893         ldbp = cursor->dbp;
894         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
895             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
896                 return (ret);
897         hashp = (HTAB *)(ldbp->internal);
898         hcp = (HASH_CURSOR *)cursor->internal;
899         save_curs = *hcp;
900
901         if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
902             F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
903                 return (ret);
904         if (F_ISSET(hcp, H_DELETED))
905                 return (DB_NOTFOUND);
906
907         SET_LOCKER(hashp->dbp, cursor->txn);
908         GET_META(hashp->dbp, hashp);
909         ret = 0;
910
911         switch (flags) {
912         case DB_KEYLAST:
913         case DB_KEYFIRST:
914                 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
915                     HKEYDATA_PSIZE(key->size)) +
916                     (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
917                     HKEYDATA_PSIZE(data->size));
918                 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
919                 break;
920         case DB_BEFORE:
921         case DB_AFTER:
922         case DB_CURRENT:
923                 ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
924                 break;
925         }
926
927         if (ret == 0) {
928                 if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
929                         ret = __ham_overwrite(hashp, hcp, data);
930                 else
931                         ret = __ham_add_dup(hashp, hcp, data, flags);
932         }
933
934         if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
935                 ret = __ham_expand_table(hashp);
936                 F_CLR(hcp, H_EXPAND);
937         }
938
939         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
940                 ret = t_ret;
941         if (ret != 0)
942                 *hcp = save_curs;
943         RELEASE_META(hashp->dbp, hashp);
944         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
945                 __db_puthandle(ldbp);
946         return (ret);
947 }
948
949 /********************************* UTILITIES ************************/
950
951 /*
952  * __ham_expand_table --
953  *
954  * PUBLIC: int __ham_expand_table __P((HTAB *));
955  */
956 int
957 __ham_expand_table(hashp)
958         HTAB *hashp;
959 {
960         u_int32_t old_bucket, new_bucket;
961         u_int32_t spare_ndx;
962         int ret;
963
964         ret = 0;
965         DIRTY_META(hashp, ret);
966         if (ret)
967                 return (ret);
968
969         if (DB_LOGGING(hashp->dbp)) {
970                 DB_LSN new_lsn;
971
972                 if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
973                     (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
974                     hashp->dbp->log_fileid,
975                     hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
976                     hashp->hdr->spares[hashp->hdr->ovfl_point],
977                     &hashp->hdr->lsn)) != 0)
978                         return (ret);
979
980                 hashp->hdr->lsn = new_lsn;
981         }
982
983         hashp->hash_expansions++;
984         new_bucket = ++hashp->hdr->max_bucket;
985         old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
986
987         /*
988          * If the split point is increasing (hdr.max_bucket's log base 2
989          * increases), max sure that we have enough extra pages, then
990          * copy the current contents of the spare split bucket to the
991          * next bucket.
992          */
993         spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
994         if (spare_ndx > hashp->hdr->ovfl_point) {
995                 /*
996                  * We are about to shift the split point.  Make sure that
997                  * if the next doubling is going to be big (more than 8
998                  * pages), we have some extra pages around.
999                  */
1000                 if (hashp->hdr->spares[hashp->hdr->ovfl_point] == 0 &&
1001                     new_bucket >= 8)
1002                         __ham_init_ovflpages(hashp);
1003
1004                 hashp->hdr->spares[spare_ndx] =
1005                     hashp->hdr->spares[hashp->hdr->ovfl_point];
1006                 hashp->hdr->ovfl_point = spare_ndx;
1007         }
1008
1009         if (new_bucket > hashp->hdr->high_mask) {
1010                 /* Starting a new doubling */
1011                 hashp->hdr->low_mask = hashp->hdr->high_mask;
1012                 hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
1013         }
1014
1015         if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
1016                 __db_err(hashp->dbp->dbenv,
1017                     "hash: Cannot allocate new bucket.  Pages exhausted.");
1018                 return (ENOSPC);
1019         }
1020
1021         /* Relocate records to the new bucket */
1022         return (__ham_split_page(hashp, old_bucket, new_bucket));
1023 }
1024
1025 /*
1026  * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1027  */
1028 u_int32_t
1029 __ham_call_hash(hashp, k, len)
1030         HTAB *hashp;
1031         u_int8_t *k;
1032         int32_t len;
1033 {
1034         u_int32_t n, bucket;
1035
1036         n = (u_int32_t)hashp->hash(k, len);
1037         bucket = n & hashp->hdr->high_mask;
1038         if (bucket > hashp->hdr->max_bucket)
1039                 bucket = bucket & hashp->hdr->low_mask;
1040         return (bucket);
1041 }
1042
1043 /*
1044  * Check for duplicates, and call __db_ret appropriately.  Release
1045  * everything held by the cursor.
1046  */
1047 static int
1048 __ham_dup_return(hashp, hcp, val, flags)
1049         HTAB *hashp;
1050         HASH_CURSOR *hcp;
1051         DBT *val;
1052         int flags;
1053 {
1054         HKEYDATA *hk;
1055         PAGE *pp;
1056         DBT *myval, tmp_val;
1057         db_indx_t ndx;
1058         db_pgno_t pgno;
1059         u_int8_t type;
1060         int indx, ret;
1061         db_indx_t len;
1062
1063         /* Check for duplicate and return the first one. */
1064         ndx = H_DATAINDEX(hcp->bndx);
1065         type = GET_HKEYDATA(hcp->pagep, ndx)->type;
1066         pp = hcp->pagep;
1067         myval = val;
1068
1069         /*
1070          * There are 3 cases:
1071          * 1. We are not in duplicate, simply call db_ret.
1072          * 2. We are looking at keys and stumbled onto a duplicate.
1073          * 3. We are in the middle of a duplicate set. (ISDUP set)
1074          */
1075
1076         /*
1077          * Here we check for the case where we just stumbled onto a
1078          * duplicate.  In this case, we do initialization and then
1079          * let the normal duplicate code handle it.
1080          */
1081         if (!F_ISSET(hcp, H_ISDUP))
1082                 if (type == H_DUPLICATE) {
1083                         F_SET(hcp, H_ISDUP);
1084                         hcp->dup_tlen = LEN_HDATA(hcp->pagep,
1085                             hashp->hdr->pagesize, hcp->bndx);
1086                         hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1087                         if (flags == DB_LAST || flags == DB_PREV) {
1088                                 hcp->dndx = 0;
1089                                 hcp->dup_off = 0;
1090                                 do {
1091                                         memcpy(&len, hk->data + hcp->dup_off,
1092                                             sizeof(db_indx_t));
1093                                         hcp->dup_off += DUP_SIZE(len);
1094                                         hcp->dndx++;
1095                                 } while (hcp->dup_off < hcp->dup_tlen);
1096                                 hcp->dup_off -= DUP_SIZE(len);
1097                                 hcp->dndx--;
1098                         } else {
1099                                 memcpy(&len, hk->data, sizeof(db_indx_t));
1100                                 hcp->dup_off = 0;
1101                                 hcp->dndx = 0;
1102                         }
1103                         hcp->dup_len = len;
1104                 } else if (type == H_OFFDUP) {
1105                         F_SET(hcp, H_ISDUP);
1106                         memcpy(&pgno,
1107                             P_ENTRY(hcp->pagep, ndx) + SSZ(HOFFDUP, pgno),
1108                             sizeof(db_pgno_t));
1109                         if (flags == DB_LAST || flags == DB_PREV) {
1110                                 indx = (int)hcp->dndx;
1111                                 if ((ret = __db_dend(hashp->dbp,
1112                                     pgno, &hcp->dpagep)) != 0)
1113                                         return (ret);
1114                                 hcp->dpgno = PGNO(hcp->dpagep);
1115                                 hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
1116                         } else if ((ret = __ham_next_cpage(hashp,
1117                             hcp, pgno, 0, H_ISDUP)) != 0)
1118                                 return (ret);
1119                 }
1120
1121
1122         /*
1123          * Now, everything is initialized, grab a duplicate if
1124          * necessary.
1125          */
1126         if (F_ISSET(hcp, H_ISDUP))
1127                 if (hcp->dpgno != PGNO_INVALID) {
1128                         pp = hcp->dpagep;
1129                         ndx = hcp->dndx;
1130                 } else {
1131                         /*
1132                          * Copy the DBT in case we are retrieving into
1133                          * user memory and we need the parameters for
1134                          * it.
1135                          */
1136                         memcpy(&tmp_val, val, sizeof(*val));
1137                         F_SET(&tmp_val, DB_DBT_PARTIAL);
1138                         tmp_val.dlen = hcp->dup_len;
1139                         tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1140                         myval = &tmp_val;
1141                 }
1142
1143
1144         /*
1145          * Finally, if we had a duplicate, pp, ndx, and myval should be
1146          * set appropriately.
1147          */
1148         if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
1149             &hcp->big_datalen)) != 0)
1150                 return (ret);
1151
1152         /*
1153          * In case we sent a temporary off to db_ret, set the real
1154          * return values.
1155          */
1156         val->data = myval->data;
1157         val->size = myval->size;
1158
1159         return (0);
1160 }
1161
1162 static int
1163 __ham_overwrite(hashp, hcp, nval)
1164         HTAB *hashp;
1165         HASH_CURSOR *hcp;
1166         DBT *nval;
1167 {
1168         DBT *myval, tmp_val;
1169         HKEYDATA *hk;
1170
1171         if (F_ISSET(hashp->dbp, DB_AM_DUP))
1172                 return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
1173         else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1174                 /* Put/overwrite */
1175                 memcpy(&tmp_val, nval, sizeof(*nval));
1176                 F_SET(&tmp_val, DB_DBT_PARTIAL);
1177                 tmp_val.doff = 0;
1178                 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1179                 if (hk->type == H_OFFPAGE)
1180                         memcpy(&tmp_val.dlen,
1181                             (u_int8_t *)hk + SSZ(HOFFPAGE, tlen),
1182                             sizeof(u_int32_t));
1183                 else
1184                         tmp_val.dlen = LEN_HDATA(hcp->pagep,
1185                             hashp->hdr->pagesize,hcp->bndx);
1186                 myval = &tmp_val;
1187         } else /* Regular partial put */
1188                 myval = nval;
1189
1190         return (__ham_replpair(hashp, hcp, myval, 0));
1191 }
1192
1193 /*
1194  * Given a key and a cursor, sets the cursor to the page/ndx on which
1195  * the key resides.  If the key is found, the cursor H_OK flag is set
1196  * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1197  * If the key is not found, the H_OK flag is not set.  If the sought
1198  * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1199  * are set indicating where an add might take place.  If it is 0,
1200  * non of the cursor pointer field are valid.
1201  */
1202 static int
1203 __ham_lookup(hashp, hcp, key, sought, mode)
1204         HTAB *hashp;
1205         HASH_CURSOR *hcp;
1206         const DBT *key;
1207         u_int32_t sought;
1208         db_lockmode_t mode;
1209 {
1210         HKEYDATA *hk;
1211         db_pgno_t pgno;
1212         u_int32_t tlen;
1213         int match, ret, t_ret;
1214
1215         /*
1216          * Set up cursor so that we're looking for space to add an item
1217          * as we cycle through the pages looking for the key.
1218          */
1219         if ((ret = __ham_item_reset(hashp, hcp)) != 0)
1220                 return (ret);
1221         hcp->seek_size = sought;
1222
1223         hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
1224         while (1) {
1225                 if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
1226                         return (ret);
1227
1228                 if (F_ISSET(hcp, H_NOMORE))
1229                         break;
1230
1231                 hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1232                 switch (hk->type) {
1233                 case H_OFFPAGE:
1234                         memcpy(&tlen, (u_int8_t *)hk + SSZ(HOFFPAGE, tlen),
1235                             sizeof(u_int32_t));
1236                         if (tlen == key->size) {
1237                                 memcpy(&pgno,
1238                                     (u_int8_t *)hk + SSZ(HOFFPAGE, pgno),
1239                                     sizeof(db_pgno_t));
1240                                 match = __db_moff(hashp->dbp, key, pgno);
1241                                 if (match == 0) {
1242                                         F_SET(hcp, H_OK);
1243                                         return (0);
1244                                 }
1245                         }
1246                         break;
1247                 case H_KEYDATA:
1248                         if (key->size == LEN_HKEY(hcp->pagep,
1249                             hashp->hdr->pagesize, hcp->bndx) &&
1250                             memcmp(key->data, hk->data, key->size) == 0) {
1251                                 F_SET(hcp, H_OK);
1252                                 return (0);
1253                         }
1254                         break;
1255                 case H_DUPLICATE:
1256                 case H_OFFDUP:
1257                         /*
1258                          * These are errors because keys are never
1259                          * duplicated, only data items are.
1260                          */
1261                         return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
1262                 }
1263                 hashp->hash_collisions++;
1264         }
1265
1266         /*
1267          * Item was not found, adjust cursor properly.
1268          */
1269
1270         if (sought != 0)
1271                 return (ret);
1272
1273         if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
1274                 ret = t_ret;
1275         return (ret);
1276 }
1277
1278 /*
1279  * Initialize a dbt using some possibly already allocated storage
1280  * for items.
1281  * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1282  */
1283 int
1284 __ham_init_dbt(dbt, size, bufp, sizep)
1285         DBT *dbt;
1286         u_int32_t size;
1287         void **bufp;
1288         u_int32_t *sizep;
1289 {
1290         memset(dbt, 0, sizeof(*dbt));
1291         if (*sizep < size) {
1292                 if ((*bufp = (void *)(*bufp == NULL ?
1293                     malloc(size) : realloc(*bufp, size))) == NULL) {
1294                         *sizep = 0;
1295                         return (ENOMEM);
1296                 }
1297                 *sizep = size;
1298         }
1299         dbt->data = *bufp;
1300         dbt->size = size;
1301         return (0);
1302 }
1303
1304 /*
1305  * Adjust the cursor after an insert or delete.  The cursor passed is
1306  * the one that was operated upon; we just need to check any of the
1307  * others.
1308  *
1309  * len indicates the length of the item added/deleted
1310  * add indicates if the item indicated by the cursor has just been
1311  * added (add == 1) or deleted (add == 0).
1312  * dup indicates if the addition occurred into a duplicate set.
1313  *
1314  * PUBLIC: void __ham_c_update __P((HTAB *,
1315  * PUBLIC:    HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1316  */
1317 void
1318 __ham_c_update(hashp, hcp, chg_pgno, len, add, dup)
1319         HTAB *hashp;
1320         HASH_CURSOR *hcp;
1321         db_pgno_t chg_pgno;
1322         u_int32_t len;
1323         int add;
1324         int dup;
1325 {
1326         DBC *cp;
1327         HTAB *hp;
1328         HASH_CURSOR *lcp;
1329         int page_deleted;
1330
1331         /*
1332          * Regular adds are always at the end of a given page,
1333          * so we never have to adjust anyone's cursor after
1334          * a regular add.
1335          */
1336         if (!dup && add)
1337                 return;
1338
1339         page_deleted = chg_pgno != PGNO_INVALID &&
1340             ((!dup && chg_pgno != hcp->pgno) ||
1341             (dup && chg_pgno != hcp->dpgno));
1342
1343         hp = hcp->db_cursor->dbp->master->internal;
1344         DB_THREAD_LOCK(hp->dbp);
1345
1346         for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
1347             cp = TAILQ_NEXT(cp, links)) {
1348                 if (cp->internal == hcp)
1349                         continue;
1350
1351                 lcp = (HASH_CURSOR *)cp->internal;
1352
1353                 if (!dup && lcp->pgno != chg_pgno)
1354                         continue;
1355
1356                 if (dup && F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1357                         continue;
1358
1359                 if (dup && !F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1360                         continue;
1361
1362                 if (page_deleted) {
1363                         if (dup) {
1364                                 lcp->dpgno = hcp->dpgno;
1365                                 lcp->dndx = hcp->dndx;
1366                         } else {
1367                                 lcp->pgno = hcp->pgno;
1368                                 lcp->bndx = hcp->bndx;
1369                                 lcp->bucket = hcp->bucket;
1370                         }
1371                         F_CLR(lcp, H_ISDUP);
1372                         continue;
1373                 }
1374
1375                 if (!dup && lcp->bndx > hcp->bndx)
1376                         lcp->bndx--;
1377                 else if (!dup && lcp->bndx == hcp->bndx)
1378                         F_SET(lcp, H_DELETED);
1379                 else if (dup && lcp->bndx == hcp->bndx) {
1380                         /* Assign dpgno in case there was page conversion. */
1381                         lcp->dpgno = hcp->dpgno;
1382                         if (add && lcp->dndx >= hcp->dndx )
1383                                 lcp->dndx++;
1384                         else if (!add && lcp->dndx > hcp->dndx)
1385                                 lcp->dndx--;
1386                         else if (!add && lcp->dndx == hcp->dndx)
1387                                 F_SET(lcp, H_DELETED);
1388
1389                         /* Now adjust on-page information. */
1390                         if (lcp->dpgno == PGNO_INVALID)
1391                                 if (add) {
1392                                         lcp->dup_tlen += len;
1393                                         if (lcp->dndx > hcp->dndx)
1394                                                 lcp->dup_off += len;
1395                                 } else {
1396                                         lcp->dup_tlen -= len;
1397                                         if (lcp->dndx > hcp->dndx)
1398                                                 lcp->dup_off -= len;
1399                                 }
1400                 }
1401         }
1402         DB_THREAD_UNLOCK(hp->dbp);
1403 }
1404
1405 /*
1406  * __ham_hdup --
1407  *      This function gets called when we create a duplicate handle for a
1408  *      threaded DB.  It should create the private part of the DB structure.
1409  * PUBLIC: int  __ham_hdup __P((DB *, DB *));
1410  */
1411 int
1412 __ham_hdup(orig, new)
1413         DB *orig, *new;
1414 {
1415         HTAB *hashp;
1416         DBC *curs;
1417         int ret;
1418
1419         if ((hashp = (HTAB *)malloc(sizeof(HTAB))) == NULL)
1420                 return (ENOMEM);
1421
1422         new->internal = hashp;
1423
1424         hashp->dbp = new;
1425         hashp->hlock = 0;
1426         hashp->hdr = NULL;
1427         hashp->hash = ((HTAB *)orig->internal)->hash;
1428         if ((hashp->split_buf = (PAGE *)malloc(orig->pgsize)) == NULL)
1429                 return (ENOMEM);
1430         hashp->local_errno = 0;
1431         hashp->hash_accesses = 0;
1432         hashp->hash_collisions = 0;
1433         hashp->hash_expansions = 0;
1434         hashp->hash_overflows = 0;
1435         hashp->hash_bigpages = 0;
1436         /* Initialize the cursor queue. */
1437         ret = __ham_c_init(new, NULL, &curs);
1438         TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);
1439         return (ret);
1440 }