d986e08087021421ca7bc6e2e6dce6cb77b5ab79
[kopensolaris-gnu/glibc.git] / db2 / hash / hash.c
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997
5  *      Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1990, 1993, 1994
9  *      Margo Seltzer.  All rights reserved.
10  */
11 /*
12  * Copyright (c) 1990, 1993, 1994
13  *      The Regents of the University of California.  All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Margo Seltzer.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  * 3. All advertising materials mentioning features or use of this software
27  *    must display the following acknowledgement:
28  *      This product includes software developed by the University of
29  *      California, Berkeley and its contributors.
30  * 4. Neither the name of the University nor the names of its contributors
31  *    may be used to endorse or promote products derived from this software
32  *    without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  */
46
47 #include "config.h"
48
49 #ifndef lint
50 static const char sccsid[] = "@(#)hash.c        10.27 (Sleepycat) 9/15/97";
51 #endif /* not lint */
52
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
55 #include <sys/stat.h>
56
57 #include <errno.h>
58 #include <fcntl.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63 #endif
64
65 #include "shqueue.h"
66 #include "db_int.h"
67 #include "db_page.h"
68 #include "db_am.h"
69 #include "db_ext.h"
70 #include "hash.h"
71 #include "log.h"
72
73 static int  __ham_c_close __P((DBC *));
74 static int  __ham_c_del __P((DBC *, int));
75 static int  __ham_c_get __P((DBC *, DBT *, DBT *, int));
76 static int  __ham_c_put __P((DBC *, DBT *, DBT *, int));
77 static int  __ham_c_init __P((DB *, DB_TXN *, DBC **));
78 static int  __ham_cursor __P((DB *, DB_TXN *, DBC **));
79 static int  __ham_delete __P((DB *, DB_TXN *, DBT *, int));
80 static int  __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
81 static int  __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
82 static void __ham_init_htab __P((HTAB *));
83 static int  __ham_lookup __P((HTAB *,
84                 HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
85 static int  __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
86 static int  __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
87 static int  __ham_sync __P((DB *, int));
88
89 /************************** INTERFACE ROUTINES ***************************/
90 /* OPEN/CLOSE */
91
92 /*
93  * __ham_open --
94  *
95  * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
96  */
97 int
98 __ham_open(dbp, dbinfo)
99         DB *dbp;
100         DB_INFO *dbinfo;
101 {
102         DB_ENV *dbenv;
103         DBC *curs;
104         HTAB *hashp;
105         int file_existed, ret;
106
107         dbenv = dbp->dbenv;
108
109         if ((hashp = (HTAB *)calloc(1, sizeof(HTAB))) == NULL)
110                 return (ENOMEM);
111         hashp->dbp = dbp;
112
113         /* Set the hash function if specified by the user. */
114         if (dbinfo != NULL && dbinfo->h_hash != NULL)
115                 hashp->hash = dbinfo->h_hash;
116
117         /*
118          * Initialize the remaining fields of the dbp.  The type, close and
119          * fd functions are all set in db_open.
120          */
121         dbp->internal = hashp;
122         dbp->cursor = __ham_cursor;
123         dbp->del = __ham_delete;
124         dbp->get = __ham_get;
125         dbp->put = __ham_put;
126         dbp->sync = __ham_sync;
127
128         /* If locking is turned on, lock the meta data page. */
129         if (F_ISSET(dbp, DB_AM_LOCKING)) {
130                 dbp->lock.pgno = BUCKET_INVALID;
131                 if ((ret = lock_get(dbenv->lk_info, dbp->locker,
132                     0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
133                         if (ret < 0)
134                                 ret = EAGAIN;
135                         goto out;
136                 }
137         }
138
139         /*
140          * Now, we can try to read the meta-data page and figure out
141          * if we set up locking and get the meta-data page properly.
142          * If this is a new file, initialize it, and put it back dirty.
143          */
144         if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
145                 goto out;
146
147         /* Initialize the hashp structure */
148         if (hashp->hdr->magic == DB_HASHMAGIC) {
149                 file_existed = 1;
150                 /* File exists, verify the data in the header. */
151                 if (hashp->hash == NULL)
152                         hashp->hash =
153                             hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
154                 if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
155                     hashp->hdr->h_charkey) {
156                         __db_err(hashp->dbp->dbenv,
157                             "hash: incompatible hash function");
158                         ret = EINVAL;
159                         goto out;
160                 }
161                 if (F_ISSET(hashp->hdr, DB_HASH_DUP))
162                         F_SET(dbp, DB_AM_DUP);
163         } else {
164                 /*
165                  * File does not exist, we must initialize the header.  If
166                  * locking is enabled that means getting a write lock first.
167                  */
168                 file_existed = 0;
169                 if (F_ISSET(dbp, DB_AM_LOCKING) &&
170                     ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
171                     (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
172                         &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
173                         if (ret < 0)
174                                 ret = EAGAIN;
175                         goto out;
176                 }
177
178                 hashp->hdr->nelem = dbinfo != NULL ? dbinfo->h_nelem : 0;
179                 hashp->hdr->ffactor =
180                     dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
181                 __ham_init_htab(hashp);
182                 if (F_ISSET(dbp, DB_AM_DUP))
183                         F_SET(hashp->hdr, DB_HASH_DUP);
184                 if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
185                         goto out;
186         }
187
188         /* Initialize the default cursor. */
189         __ham_c_init(dbp, NULL, &curs);
190         TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
191
192         /* Allocate memory for our split buffer. */
193         if ((hashp->split_buf = (PAGE *)malloc(dbp->pgsize)) == NULL) {
194                 ret = ENOMEM;
195                 goto out;
196         }
197
198 #ifdef NO_STATISTICS_FOR_DB_ERR
199         __db_err(dbp->dbenv,
200             "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
201             "TABLE POINTER   ", (long)hashp,
202             "BUCKET SIZE     ", (long)hashp->hdr->pagesize,
203             "FILL FACTOR     ", (long)hashp->hdr->ffactor,
204             "MAX BUCKET      ", (long)hashp->hdr->max_bucket,
205             "OVFL POINT      ", (long)hashp->hdr->ovfl_point,
206             "LAST FREED      ", (long)hashp->hdr->last_freed,
207             "HIGH MASK       ", (long)hashp->hdr->high_mask,
208             "LOW  MASK       ", (long)hashp->hdr->low_mask,
209             "NELEM           ", (long)hashp->hdr->nelem,
210             "FLAGS           ", (long)hashp->hdr->flags);
211 #endif
212
213         /* Release the meta data page */
214         (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
215         if (F_ISSET(dbp, DB_AM_LOCKING) &&
216             (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
217                 if (ret < 0)
218                         ret = EAGAIN;
219                 goto out;
220         }
221
222         hashp->hlock = 0;
223         hashp->hdr = NULL;
224         /* Sync the file so that we know that the meta data goes to disk. */
225         if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
226                 goto out;
227         return (0);
228
229 out:    (void)__ham_close(dbp);
230         return (ret);
231 }
232
233 /*
234  * PUBLIC: int  __ham_close __P((DB *));
235  */
236 int
237 __ham_close(dbp)
238         DB *dbp;
239 {
240         HTAB *hashp;
241         int ret, t_ret;
242
243         DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
244         hashp = (HTAB *)dbp->internal;
245         ret = 0;
246
247         /* Free the split page. */
248         if (hashp->split_buf)
249                 FREE(hashp->split_buf, dbp->pgsize);
250
251         if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
252             (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
253                 ret = t_ret;
254         if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
255             hashp->hlock)) != 0 && ret == 0)
256                 ret = t_ret;
257
258         FREE(hashp, sizeof(HTAB));
259         dbp->internal = NULL;
260         return (ret);
261 }
262
263 /************************** LOCAL CREATION ROUTINES **********************/
264 /*
265  * Returns 0 on No Error
266  */
267 static void
268 __ham_init_htab(hashp)
269         HTAB *hashp;
270 {
271         u_int32_t nelem;
272         int32_t l2, nbuckets;
273
274         nelem = hashp->hdr->nelem;
275         hashp->hdr->pagesize = hashp->dbp->pgsize;
276         ZERO_LSN(hashp->hdr->lsn);
277         hashp->hdr->magic = DB_HASHMAGIC;
278         hashp->hdr->version = DB_HASHVERSION;
279         if (hashp->hash == NULL)
280                 hashp->hash =
281                     hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
282         hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
283         if (nelem != 0 && hashp->hdr->ffactor != 0) {
284                 nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
285                 l2 = __db_log2(nelem > 2 ? nelem : 2);
286         } else
287                 l2 = 2;
288
289         nbuckets = 1 << l2;
290
291         hashp->hdr->spares[l2] = 0;
292         hashp->hdr->spares[l2 + 1] = 0;
293         hashp->hdr->ovfl_point = l2;
294         hashp->hdr->last_freed = PGNO_INVALID;
295
296         hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
297         hashp->hdr->low_mask = (nbuckets >> 1) - 1;
298         memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
299 }
300
301 /********************** DESTROY/CLOSE ROUTINES ************************/
302
303
304 /*
305  * Write modified pages to disk
306  *
307  * Returns:
308  *       0 == OK
309  *      -1 ERROR
310  */
311 static int
312 __ham_sync(dbp, flags)
313         DB *dbp;
314         int flags;
315 {
316         int ret;
317
318         DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
319         if ((ret = __db_syncchk(dbp, flags)) != 0)
320                 return (ret);
321         if (F_ISSET(dbp, DB_AM_RDONLY))
322                 return (0);
323
324         if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
325                 ret = 0;
326
327         return (ret);
328 }
329
330 /*******************************SEARCH ROUTINES *****************************/
331 /*
332  * All the access routines return
333  *
334  * Returns:
335  *       0 on SUCCESS
336  *       1 to indicate an external ERROR (i.e. key not found, etc)
337  *      -1 to indicate an internal ERROR (i.e. out of memory, etc)
338  */
339
340 static int
341 __ham_get(dbp, txn, key, data, flags)
342         DB *dbp;
343         DB_TXN *txn;
344         DBT *key;
345         DBT *data;
346         int flags;
347 {
348         DB *ldbp;
349         DBC *cp;
350         HTAB *hashp;
351         HASH_CURSOR *hcp;
352         int ret, t_ret;
353
354         DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
355         if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
356                 return (ret);
357
358         ldbp = dbp;
359         if (F_ISSET(dbp, DB_AM_THREAD) &&
360             (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
361                 return (ret);
362
363         hashp = (HTAB *)ldbp->internal;
364         SET_LOCKER(ldbp, txn);
365         GET_META(ldbp, hashp);
366         cp = TAILQ_FIRST(&ldbp->curs_queue);
367
368         hashp->hash_accesses++;
369         hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
370         if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0)
371                 if (F_ISSET(hcp, H_OK))
372                         ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
373                 else /* Key was not found */
374                         ret = DB_NOTFOUND;
375
376         if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
377                 ret = t_ret;
378         RELEASE_META(ldbp, hashp);
379         if (F_ISSET(dbp, DB_AM_THREAD))
380                 __db_puthandle(ldbp);
381         return (ret);
382 }
383
384 static int
385 __ham_put(dbp, txn, key, data, flags)
386         DB *dbp;
387         DB_TXN *txn;
388         DBT *key;
389         DBT *data;
390         int flags;
391 {
392         DB *ldbp;
393         HTAB *hashp;
394         HASH_CURSOR *hcp;
395         DBT tmp_val, *myval;
396         int ret, t_ret;
397         u_int32_t nbytes;
398
399         DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
400         if ((ret = __db_putchk(dbp, key, data,
401             flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
402                 return (ret);
403
404         ldbp = dbp;
405         if (F_ISSET(dbp, DB_AM_THREAD) &&
406             (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
407                 return (ret);
408
409         hashp = (HTAB *)ldbp->internal;
410         SET_LOCKER(ldbp, txn);
411         GET_META(ldbp, hashp);
412         hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
413
414         nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
415             HKEYDATA_PSIZE(key->size)) +
416             (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
417             HKEYDATA_PSIZE(data->size));
418
419         hashp->hash_accesses++;
420         ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
421
422         if (ret == DB_NOTFOUND) {
423                 ret = 0;
424                 if (hcp->seek_found_page != PGNO_INVALID &&
425                     hcp->seek_found_page != hcp->pgno) {
426                         if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
427                                 goto out;
428                         hcp->pgno = hcp->seek_found_page;
429                         hcp->bndx = NDX_INVALID;
430                 }
431
432                 if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
433                         /*
434                          * Doing a partial put, but the key does not exist
435                          * and we are not beginning the write at 0.  We
436                          * must create a data item padded up to doff and
437                          * then write the new bytes represented by val.
438                          */
439                         ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
440                             &hcp->big_data, &hcp->big_datalen);
441                         if (ret == 0) {
442                                 memset(tmp_val.data, 0, data->doff);
443                                 memcpy((u_int8_t *)tmp_val.data + data->doff,
444                                     data->data, data->size);
445                                 myval = &tmp_val;
446                         }
447                 } else
448                         myval = (DBT *)data;
449
450                 if (ret == 0)
451                         ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
452         } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
453                 if (flags == DB_NOOVERWRITE)
454                         ret = DB_KEYEXIST;
455                 else if (F_ISSET(ldbp, DB_AM_DUP))
456                         ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
457                 else
458                         ret = __ham_overwrite(hashp, hcp, data);
459         }
460
461         /* Free up all the cursor pages. */
462         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
463                 ret = t_ret;
464         /* Now check if we have to grow. */
465 out:    if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
466                 ret = __ham_expand_table(hashp);
467                 F_CLR(hcp, H_EXPAND);
468         }
469
470         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
471                 ret = t_ret;
472         RELEASE_META(ldbp, hashp);
473         if (F_ISSET(dbp, DB_AM_THREAD))
474                 __db_puthandle(ldbp);
475         return (ret);
476 }
477
478 static int
479 __ham_cursor(dbp, txnid, dbcp)
480         DB *dbp;
481         DB_TXN *txnid;
482         DBC **dbcp;
483 {
484         int ret;
485
486         DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
487         if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
488                 return (ret);
489
490         DB_THREAD_LOCK(dbp);
491         TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
492         DB_THREAD_UNLOCK(dbp);
493         return (ret);
494 }
495
496 static int
497 __ham_c_init(dbp, txnid, dbcp)
498         DB *dbp;
499         DB_TXN *txnid;
500         DBC **dbcp;
501 {
502         DBC *db_curs;
503         HASH_CURSOR *new_curs;
504
505         if ((db_curs = (DBC *)calloc(sizeof(DBC), 1)) == NULL)
506                 return (ENOMEM);
507
508         if ((new_curs =
509             (HASH_CURSOR *)calloc(sizeof(struct cursor_t), 1)) == NULL) {
510                 FREE(db_curs, sizeof(DBC));
511                 return (ENOMEM);
512         }
513
514         db_curs->internal = new_curs;
515         db_curs->c_close = __ham_c_close;
516         db_curs->c_del = __ham_c_del;
517         db_curs->c_get = __ham_c_get;
518         db_curs->c_put = __ham_c_put;
519         db_curs->txn = txnid;
520         db_curs->dbp = dbp;
521
522         new_curs->db_cursor = db_curs;
523         __ham_item_init(new_curs);
524
525         if (dbcp != NULL)
526                 *dbcp = db_curs;
527         return (0);
528 }
529
530 static int
531 __ham_delete(dbp, txn, key, flags)
532         DB *dbp;
533         DB_TXN *txn;
534         DBT *key;
535         int flags;
536 {
537         DB *ldbp;
538         HTAB *hashp;
539         HASH_CURSOR *hcp;
540         int ret, t_ret;
541
542         DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
543         if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
544                 return (ret);
545
546         ldbp = dbp;
547         if (F_ISSET(dbp, DB_AM_THREAD) &&
548             (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
549                 return (ret);
550         hashp = (HTAB *)ldbp->internal;
551         SET_LOCKER(ldbp, txn);
552         GET_META(ldbp, hashp);
553         hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
554
555         hashp->hash_accesses++;
556         if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
557                 if (F_ISSET(hcp, H_OK))
558                         ret = __ham_del_pair(hashp, hcp);
559                 else
560                         ret = DB_NOTFOUND;
561
562         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
563                 ret = t_ret;
564         RELEASE_META(ldbp, hashp);
565         if (F_ISSET(dbp, DB_AM_THREAD))
566                 __db_puthandle(ldbp);
567         return (ret);
568 }
569
570 /* ****************** CURSORS ********************************** */
571 static int
572 __ham_c_close(cursor)
573         DBC *cursor;
574 {
575         DB  *ldbp;
576         int ret;
577
578         DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
579         /*
580          * If the pagep, dpagep, and lock fields of the cursor are all NULL,
581          * then there really isn't a need to get a handle here.  However,
582          * the normal case is that at least one of those fields is non-NULL,
583          * and putting those checks in here would couple the ham_item_done
584          * functionality with cursor close which would be pretty disgusting.
585          * Instead, we pay the overhead here of always getting the handle.
586          */
587         ldbp = cursor->dbp;
588         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
589             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
590                 return (ret);
591
592         ret = __ham_c_iclose(ldbp, cursor);
593
594         if (F_ISSET(ldbp, DB_AM_THREAD))
595                 __db_puthandle(ldbp);
596         return (ret);
597 }
598 /*
599  * __ham_c_iclose --
600  *
601  * Internal cursor close routine; assumes it is being passed the correct
602  * handle, rather than getting and putting a handle.
603  *
604  * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
605  */
606 int
607 __ham_c_iclose(dbp, dbc)
608         DB *dbp;
609         DBC *dbc;
610 {
611         HASH_CURSOR *hcp;
612         HTAB *hashp;
613         int ret;
614
615         hashp = (HTAB *)dbp->internal;
616         hcp = (HASH_CURSOR *)dbc->internal;
617         ret = __ham_item_done(hashp, hcp, 0);
618
619         if (hcp->big_key)
620                 FREE(hcp->big_key, hcp->big_keylen);
621         if (hcp->big_data)
622                 FREE(hcp->big_data, hcp->big_datalen);
623
624         /*
625          * All cursors (except the default ones) are linked off the master.
626          * Therefore, when we close the cursor, we have to remove it from
627          * the master, not the local one.
628          * XXX I am always removing from the master; what about local cursors?
629          */
630         DB_THREAD_LOCK(dbc->dbp);
631         TAILQ_REMOVE(&dbc->dbp->curs_queue, dbc, links);
632         DB_THREAD_UNLOCK(dbc->dbp);
633
634         FREE(hcp, sizeof(HASH_CURSOR));
635         FREE(dbc, sizeof(DBC));
636
637         return (ret);
638 }
639
640 static int
641 __ham_c_del(cursor, flags)
642         DBC *cursor;
643         int flags;
644 {
645         DB *ldbp;
646         HTAB *hashp;
647         HASH_CURSOR *hcp;
648         HASH_CURSOR save_curs;
649         db_pgno_t ppgno, chg_pgno;
650         int ret, t_ret;
651
652         DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
653         ldbp = cursor->dbp;
654         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
655             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
656                 return (ret);
657         hashp = (HTAB *)ldbp->internal;
658         hcp = (HASH_CURSOR *)cursor->internal;
659         save_curs = *hcp;
660         if ((ret = __db_cdelchk(ldbp, flags,
661             F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
662                 return (ret);
663         if (F_ISSET(hcp, H_DELETED))
664                 return (DB_NOTFOUND);
665
666         SET_LOCKER(hashp->dbp, cursor->txn);
667         GET_META(hashp->dbp, hashp);
668         hashp->hash_accesses++;
669         if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
670                 goto out;
671         if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
672                 ppgno = PREV_PGNO(hcp->dpagep);
673
674                 /* Remove item from duplicate page. */
675                 chg_pgno = hcp->dpgno;
676                 if ((ret = __db_drem(hashp->dbp,
677                     &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
678                         goto out;
679
680                 /*
681                  * There are 4 cases.
682                  * 1. We removed an item on a page, but nothing else changed.
683                  * 2. We removed the last item on a page, but there is a
684                  *    following page of duplicates.
685                  * 3. We removed the last item on a page, this page was the
686                  *    last page in a duplicate set, but there were dups before
687                  *    it.
688                  * 4. We removed the last item on a page, removing the last
689                  *    duplicate.
690                  * In case 1 hcp->dpagep is unchanged.
691                  * In case 2 hcp->dpagep comes back pointing to the next dup
692                  *     page.
693                  * In case 3 hcp->dpagep comes back NULL.
694                  * In case 4 hcp->dpagep comes back NULL.
695                  */
696                 if (hcp->dpagep == NULL) {
697                         if (ppgno != PGNO_INVALID) {            /* Case 3 */
698                                 hcp->dpgno = ppgno;
699                                 if ((ret = __ham_get_cpage(hashp, hcp,
700                                     DB_LOCK_READ)) != 0)
701                                         goto out;
702                                 hcp->dndx = NUM_ENT(hcp->dpagep);
703                                 F_SET(hcp, H_DELETED);
704                         } else {                                /* Case 4 */
705                                 ret = __ham_del_pair(hashp, hcp);
706                                 hcp->dpgno = PGNO_INVALID;
707                                 /*
708                                  * Delpair updated the cursor queue, so we
709                                  * don't have to do that here.
710                                  */
711                                 chg_pgno = PGNO_INVALID;
712                         }
713                 } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
714                         hcp->dndx = 0;                          /* Case 2 */
715                         hcp->dpgno = PGNO(hcp->dpagep);
716                         if (ppgno == PGNO_INVALID)
717                                 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
718                                     H_DATAINDEX(hcp->bndx))),
719                                     &hcp->dpgno, sizeof(db_pgno_t));
720                         F_SET(hcp, H_DELETED);
721                 } else                                  /* Case 1 */
722                         F_SET(hcp, H_DELETED);
723                 if (chg_pgno != PGNO_INVALID)
724                         __ham_c_update(hashp, hcp, chg_pgno, 0, 0, 1);
725         } else if (F_ISSET(hcp, H_ISDUP)) {                     /* on page */
726                 if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
727                     LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
728                         ret = __ham_del_pair(hashp, hcp);
729                 else {
730                         DBT repldbt;
731
732                         repldbt.flags = 0;
733                         F_SET(&repldbt, DB_DBT_PARTIAL);
734                         repldbt.doff = hcp->dup_off;
735                         repldbt.dlen = DUP_SIZE(hcp->dup_len);
736                         repldbt.size = 0;
737                         ret = __ham_replpair(hashp, hcp, &repldbt, 0);
738                         hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
739                         __ham_c_update(hashp, hcp, hcp->pgno,
740                             DUP_SIZE(hcp->dup_len), 0, 1);
741                         F_SET(hcp, H_DELETED);
742                 }
743
744         } else
745                 /* Not a duplicate */
746                 ret = __ham_del_pair(hashp, hcp);
747
748 out:    if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
749                 t_ret = ret;
750         if (ret != 0)
751                 *hcp = save_curs;
752         RELEASE_META(hashp->dbp, hashp);
753         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
754                 __db_puthandle(ldbp);
755         return (ret);
756 }
757
758 static int
759 __ham_c_get(cursor, key, data, flags)
760         DBC *cursor;
761         DBT *key;
762         DBT *data;
763         int flags;
764 {
765         DB *ldbp;
766         HTAB *hashp;
767         HASH_CURSOR *hcp, save_curs;
768         int get_key, ret, t_ret;
769
770         DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
771             flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
772             NULL, flags);
773         ldbp = cursor->dbp;
774         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
775             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
776                 return (ret);
777         hashp = (HTAB *)(ldbp->internal);
778         hcp = (HASH_CURSOR *)cursor->internal;
779         save_curs = *hcp;
780         if ((ret =
781             __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
782                 return (ret);
783
784         SET_LOCKER(hashp->dbp, cursor->txn);
785         GET_META(hashp->dbp, hashp);
786         hashp->hash_accesses++;
787
788         hcp->seek_size = 0;
789
790         ret = 0;
791         get_key = 1;
792         switch (flags) {
793         case DB_PREV:
794                 if (hcp->bucket != BUCKET_INVALID) {
795                         ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
796                         break;
797                 }
798                 /* FALL THROUGH */
799         case DB_LAST:
800                 ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
801                 break;
802         case DB_FIRST:
803                 ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
804                 break;
805         case DB_NEXT:
806                 if (hcp->bucket == BUCKET_INVALID)
807                         hcp->bucket = 0;
808                 ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
809                 break;
810         case DB_SET:
811         case DB_SET_RANGE:
812                 ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
813                 get_key = 0;
814                 break;
815         case DB_CURRENT:
816                 if (F_ISSET(hcp, H_DELETED)) {
817                         ret = DB_KEYEMPTY;
818                         goto out;
819                 }
820
821                 ret = __ham_item(hashp, hcp, DB_LOCK_READ);
822                 break;
823         }
824
825         /*
826          * Must always enter this loop to do error handling and
827          * check for big key/data pair.
828          */
829         while (1) {
830                 if (ret != 0 && ret != DB_NOTFOUND)
831                         goto out1;
832                 else if (F_ISSET(hcp, H_OK)) {
833                         /* Get the key. */
834                         if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
835                             H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
836                             &hcp->big_keylen)) != 0)
837                                 goto out1;
838
839                         ret = __ham_dup_return(hashp, hcp, data, flags);
840                         break;
841                 } else if (!F_ISSET(hcp, H_NOMORE)) {
842                         abort();
843                         break;
844                 }
845
846                 /*
847                  * Ran out of entries in a bucket; change buckets.
848                  */
849                 switch (flags) {
850                         case DB_LAST:
851                         case DB_PREV:
852                                 ret = __ham_item_done(hashp, hcp, 0);
853                                 if (hcp->bucket == 0) {
854                                         ret = DB_NOTFOUND;
855                                         goto out1;
856                                 }
857                                 hcp->bucket--;
858                                 hcp->bndx = NDX_INVALID;
859                                 if (ret == 0)
860                                         ret = __ham_item_prev(hashp,
861                                             hcp, DB_LOCK_READ);
862                                 break;
863                         case DB_FIRST:
864                         case DB_NEXT:
865                                 ret = __ham_item_done(hashp, hcp, 0);
866                                 hcp->bndx = NDX_INVALID;
867                                 hcp->bucket++;
868                                 hcp->pgno = PGNO_INVALID;
869                                 hcp->pagep = NULL;
870                                 if (hcp->bucket > hashp->hdr->max_bucket) {
871                                         ret = DB_NOTFOUND;
872                                         goto out1;
873                                 }
874                                 if (ret == 0)
875                                         ret = __ham_item_next(hashp,
876                                             hcp, DB_LOCK_READ);
877                                 break;
878                         case DB_SET:
879                         case DB_SET_RANGE:
880                                 /* Key not found. */
881                                 ret = DB_NOTFOUND;
882                                 goto out1;
883                 }
884         }
885 out1:   if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
886                 t_ret = ret;
887 out:    if (ret)
888                 *hcp = save_curs;
889         RELEASE_META(hashp->dbp, hashp);
890         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
891                 __db_puthandle(ldbp);
892         return (ret);
893 }
894
895 static int
896 __ham_c_put(cursor, key, data, flags)
897         DBC *cursor;
898         DBT *key;
899         DBT *data;
900         int flags;
901 {
902         DB *ldbp;
903         HTAB *hashp;
904         HASH_CURSOR *hcp, save_curs;
905         int ret, t_ret;
906         u_int32_t nbytes;
907
908         DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
909             flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
910             NULL, flags);
911         ldbp = cursor->dbp;
912         if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
913             (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
914                 return (ret);
915         hashp = (HTAB *)(ldbp->internal);
916         hcp = (HASH_CURSOR *)cursor->internal;
917         save_curs = *hcp;
918
919         if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
920             F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
921                 return (ret);
922         if (F_ISSET(hcp, H_DELETED))
923                 return (DB_NOTFOUND);
924
925         SET_LOCKER(hashp->dbp, cursor->txn);
926         GET_META(hashp->dbp, hashp);
927         ret = 0;
928
929         switch (flags) {
930         case DB_KEYLAST:
931         case DB_KEYFIRST:
932                 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
933                     HKEYDATA_PSIZE(key->size)) +
934                     (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
935                     HKEYDATA_PSIZE(data->size));
936                 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
937                 break;
938         case DB_BEFORE:
939         case DB_AFTER:
940         case DB_CURRENT:
941                 ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
942                 break;
943         }
944
945         if (ret == 0) {
946                 if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
947                         ret = __ham_overwrite(hashp, hcp, data);
948                 else
949                         ret = __ham_add_dup(hashp, hcp, data, flags);
950         }
951
952         if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
953                 ret = __ham_expand_table(hashp);
954                 F_CLR(hcp, H_EXPAND);
955         }
956
957         if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
958                 ret = t_ret;
959         if (ret != 0)
960                 *hcp = save_curs;
961         RELEASE_META(hashp->dbp, hashp);
962         if (F_ISSET(cursor->dbp, DB_AM_THREAD))
963                 __db_puthandle(ldbp);
964         return (ret);
965 }
966
967 /********************************* UTILITIES ************************/
968
969 /*
970  * __ham_expand_table --
971  *
972  * PUBLIC: int __ham_expand_table __P((HTAB *));
973  */
974 int
975 __ham_expand_table(hashp)
976         HTAB *hashp;
977 {
978         u_int32_t old_bucket, new_bucket;
979         u_int32_t spare_ndx;
980         int ret;
981
982         ret = 0;
983         DIRTY_META(hashp, ret);
984         if (ret)
985                 return (ret);
986
987         if (DB_LOGGING(hashp->dbp)) {
988                 DB_LSN new_lsn;
989
990                 if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
991                     (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
992                     hashp->dbp->log_fileid,
993                     hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
994                     hashp->hdr->spares[hashp->hdr->ovfl_point],
995                     &hashp->hdr->lsn)) != 0)
996                         return (ret);
997
998                 hashp->hdr->lsn = new_lsn;
999         }
1000
1001         hashp->hash_expansions++;
1002         new_bucket = ++hashp->hdr->max_bucket;
1003         old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
1004
1005         /*
1006          * If the split point is increasing (hdr.max_bucket's log base 2
1007          * increases), max sure that we have enough extra pages, then
1008          * copy the current contents of the spare split bucket to the
1009          * next bucket.
1010          */
1011         spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
1012         if (spare_ndx > hashp->hdr->ovfl_point) {
1013                 /*
1014                  * We are about to shift the split point.  Make sure that
1015                  * if the next doubling is going to be big (more than 8
1016                  * pages), we have some extra pages around.
1017                  */
1018                 if (hashp->hdr->spares[hashp->hdr->ovfl_point] == 0 &&
1019                     new_bucket >= 8)
1020                         __ham_init_ovflpages(hashp);
1021
1022                 hashp->hdr->spares[spare_ndx] =
1023                     hashp->hdr->spares[hashp->hdr->ovfl_point];
1024                 hashp->hdr->ovfl_point = spare_ndx;
1025         }
1026
1027         if (new_bucket > hashp->hdr->high_mask) {
1028                 /* Starting a new doubling */
1029                 hashp->hdr->low_mask = hashp->hdr->high_mask;
1030                 hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
1031         }
1032
1033         if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
1034                 __db_err(hashp->dbp->dbenv,
1035                     "hash: Cannot allocate new bucket.  Pages exhausted.");
1036                 return (ENOSPC);
1037         }
1038
1039         /* Relocate records to the new bucket */
1040         return (__ham_split_page(hashp, old_bucket, new_bucket));
1041 }
1042
1043 /*
1044  * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1045  */
1046 u_int32_t
1047 __ham_call_hash(hashp, k, len)
1048         HTAB *hashp;
1049         u_int8_t *k;
1050         int32_t len;
1051 {
1052         u_int32_t n, bucket;
1053
1054         n = (u_int32_t)hashp->hash(k, len);
1055         bucket = n & hashp->hdr->high_mask;
1056         if (bucket > hashp->hdr->max_bucket)
1057                 bucket = bucket & hashp->hdr->low_mask;
1058         return (bucket);
1059 }
1060
1061 /*
1062  * Check for duplicates, and call __db_ret appropriately.  Release
1063  * everything held by the cursor.
1064  */
1065 static int
1066 __ham_dup_return(hashp, hcp, val, flags)
1067         HTAB *hashp;
1068         HASH_CURSOR *hcp;
1069         DBT *val;
1070         int flags;
1071 {
1072         PAGE *pp;
1073         DBT *myval, tmp_val;
1074         db_indx_t ndx;
1075         db_pgno_t pgno;
1076         u_int8_t *hk, type;
1077         int indx, ret;
1078         db_indx_t len;
1079
1080         /* Check for duplicate and return the first one. */
1081         ndx = H_DATAINDEX(hcp->bndx);
1082         type = HPAGE_TYPE(hcp->pagep, ndx);
1083         pp = hcp->pagep;
1084         myval = val;
1085
1086         /*
1087          * There are 3 cases:
1088          * 1. We are not in duplicate, simply call db_ret.
1089          * 2. We are looking at keys and stumbled onto a duplicate.
1090          * 3. We are in the middle of a duplicate set. (ISDUP set)
1091          */
1092
1093         /*
1094          * Here we check for the case where we just stumbled onto a
1095          * duplicate.  In this case, we do initialization and then
1096          * let the normal duplicate code handle it.
1097          */
1098         if (!F_ISSET(hcp, H_ISDUP))
1099                 if (type == H_DUPLICATE) {
1100                         F_SET(hcp, H_ISDUP);
1101                         hcp->dup_tlen = LEN_HDATA(hcp->pagep,
1102                             hashp->hdr->pagesize, hcp->bndx);
1103                         hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1104                         if (flags == DB_LAST || flags == DB_PREV) {
1105                                 hcp->dndx = 0;
1106                                 hcp->dup_off = 0;
1107                                 do {
1108                                         memcpy(&len,
1109                                             HKEYDATA_DATA(hk) + hcp->dup_off,
1110                                             sizeof(db_indx_t));
1111                                         hcp->dup_off += DUP_SIZE(len);
1112                                         hcp->dndx++;
1113                                 } while (hcp->dup_off < hcp->dup_tlen);
1114                                 hcp->dup_off -= DUP_SIZE(len);
1115                                 hcp->dndx--;
1116                         } else {
1117                                 memcpy(&len,
1118                                     HKEYDATA_DATA(hk), sizeof(db_indx_t));
1119                                 hcp->dup_off = 0;
1120                                 hcp->dndx = 0;
1121                         }
1122                         hcp->dup_len = len;
1123                 } else if (type == H_OFFDUP) {
1124                         F_SET(hcp, H_ISDUP);
1125                         memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
1126                             sizeof(db_pgno_t));
1127                         if (flags == DB_LAST || flags == DB_PREV) {
1128                                 indx = (int)hcp->dndx;
1129                                 if ((ret = __db_dend(hashp->dbp,
1130                                     pgno, &hcp->dpagep)) != 0)
1131                                         return (ret);
1132                                 hcp->dpgno = PGNO(hcp->dpagep);
1133                                 hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
1134                         } else if ((ret = __ham_next_cpage(hashp,
1135                             hcp, pgno, 0, H_ISDUP)) != 0)
1136                                 return (ret);
1137                 }
1138
1139
1140         /*
1141          * Now, everything is initialized, grab a duplicate if
1142          * necessary.
1143          */
1144         if (F_ISSET(hcp, H_ISDUP))
1145                 if (hcp->dpgno != PGNO_INVALID) {
1146                         pp = hcp->dpagep;
1147                         ndx = hcp->dndx;
1148                 } else {
1149                         /*
1150                          * Copy the DBT in case we are retrieving into
1151                          * user memory and we need the parameters for
1152                          * it.
1153                          */
1154                         memcpy(&tmp_val, val, sizeof(*val));
1155                         F_SET(&tmp_val, DB_DBT_PARTIAL);
1156                         tmp_val.dlen = hcp->dup_len;
1157                         tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1158                         myval = &tmp_val;
1159                 }
1160
1161
1162         /*
1163          * Finally, if we had a duplicate, pp, ndx, and myval should be
1164          * set appropriately.
1165          */
1166         if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
1167             &hcp->big_datalen)) != 0)
1168                 return (ret);
1169
1170         /*
1171          * In case we sent a temporary off to db_ret, set the real
1172          * return values.
1173          */
1174         val->data = myval->data;
1175         val->size = myval->size;
1176
1177         return (0);
1178 }
1179
1180 static int
1181 __ham_overwrite(hashp, hcp, nval)
1182         HTAB *hashp;
1183         HASH_CURSOR *hcp;
1184         DBT *nval;
1185 {
1186         DBT *myval, tmp_val;
1187         u_int8_t *hk;
1188
1189         if (F_ISSET(hashp->dbp, DB_AM_DUP))
1190                 return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
1191         else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1192                 /* Put/overwrite */
1193                 memcpy(&tmp_val, nval, sizeof(*nval));
1194                 F_SET(&tmp_val, DB_DBT_PARTIAL);
1195                 tmp_val.doff = 0;
1196                 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1197                 if (HPAGE_PTYPE(hk) == H_OFFPAGE)
1198                         memcpy(&tmp_val.dlen,
1199                             HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1200                 else
1201                         tmp_val.dlen = LEN_HDATA(hcp->pagep,
1202                             hashp->hdr->pagesize,hcp->bndx);
1203                 myval = &tmp_val;
1204         } else /* Regular partial put */
1205                 myval = nval;
1206
1207         return (__ham_replpair(hashp, hcp, myval, 0));
1208 }
1209
1210 /*
1211  * Given a key and a cursor, sets the cursor to the page/ndx on which
1212  * the key resides.  If the key is found, the cursor H_OK flag is set
1213  * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1214  * If the key is not found, the H_OK flag is not set.  If the sought
1215  * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1216  * are set indicating where an add might take place.  If it is 0,
1217  * non of the cursor pointer field are valid.
1218  */
1219 static int
1220 __ham_lookup(hashp, hcp, key, sought, mode)
1221         HTAB *hashp;
1222         HASH_CURSOR *hcp;
1223         const DBT *key;
1224         u_int32_t sought;
1225         db_lockmode_t mode;
1226 {
1227         db_pgno_t pgno;
1228         u_int32_t tlen;
1229         int match, ret, t_ret;
1230         u_int8_t *hk;
1231
1232         /*
1233          * Set up cursor so that we're looking for space to add an item
1234          * as we cycle through the pages looking for the key.
1235          */
1236         if ((ret = __ham_item_reset(hashp, hcp)) != 0)
1237                 return (ret);
1238         hcp->seek_size = sought;
1239
1240         hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
1241         while (1) {
1242                 if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
1243                         return (ret);
1244
1245                 if (F_ISSET(hcp, H_NOMORE))
1246                         break;
1247
1248                 hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1249                 switch (HPAGE_PTYPE(hk)) {
1250                 case H_OFFPAGE:
1251                         memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1252                         if (tlen == key->size) {
1253                                 memcpy(&pgno,
1254                                     HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
1255                                 match = __db_moff(hashp->dbp, key, pgno);
1256                                 if (match == 0) {
1257                                         F_SET(hcp, H_OK);
1258                                         return (0);
1259                                 }
1260                         }
1261                         break;
1262                 case H_KEYDATA:
1263                         if (key->size == LEN_HKEY(hcp->pagep,
1264                             hashp->hdr->pagesize, hcp->bndx) &&
1265                             memcmp(key->data,
1266                             HKEYDATA_DATA(hk), key->size) == 0) {
1267                                 F_SET(hcp, H_OK);
1268                                 return (0);
1269                         }
1270                         break;
1271                 case H_DUPLICATE:
1272                 case H_OFFDUP:
1273                         /*
1274                          * These are errors because keys are never
1275                          * duplicated, only data items are.
1276                          */
1277                         return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
1278                 }
1279                 hashp->hash_collisions++;
1280         }
1281
1282         /*
1283          * Item was not found, adjust cursor properly.
1284          */
1285
1286         if (sought != 0)
1287                 return (ret);
1288
1289         if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
1290                 ret = t_ret;
1291         return (ret);
1292 }
1293
1294 /*
1295  * Initialize a dbt using some possibly already allocated storage
1296  * for items.
1297  * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1298  */
1299 int
1300 __ham_init_dbt(dbt, size, bufp, sizep)
1301         DBT *dbt;
1302         u_int32_t size;
1303         void **bufp;
1304         u_int32_t *sizep;
1305 {
1306         memset(dbt, 0, sizeof(*dbt));
1307         if (*sizep < size) {
1308                 if ((*bufp = (void *)(*bufp == NULL ?
1309                     malloc(size) : realloc(*bufp, size))) == NULL) {
1310                         *sizep = 0;
1311                         return (ENOMEM);
1312                 }
1313                 *sizep = size;
1314         }
1315         dbt->data = *bufp;
1316         dbt->size = size;
1317         return (0);
1318 }
1319
1320 /*
1321  * Adjust the cursor after an insert or delete.  The cursor passed is
1322  * the one that was operated upon; we just need to check any of the
1323  * others.
1324  *
1325  * len indicates the length of the item added/deleted
1326  * add indicates if the item indicated by the cursor has just been
1327  * added (add == 1) or deleted (add == 0).
1328  * dup indicates if the addition occurred into a duplicate set.
1329  *
1330  * PUBLIC: void __ham_c_update __P((HTAB *,
1331  * PUBLIC:    HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1332  */
1333 void
1334 __ham_c_update(hashp, hcp, chg_pgno, len, add, dup)
1335         HTAB *hashp;
1336         HASH_CURSOR *hcp;
1337         db_pgno_t chg_pgno;
1338         u_int32_t len;
1339         int add;
1340         int dup;
1341 {
1342         DBC *cp;
1343         HTAB *hp;
1344         HASH_CURSOR *lcp;
1345         int page_deleted;
1346
1347         /*
1348          * Regular adds are always at the end of a given page,
1349          * so we never have to adjust anyone's cursor after
1350          * a regular add.
1351          */
1352         if (!dup && add)
1353                 return;
1354
1355         page_deleted = chg_pgno != PGNO_INVALID &&
1356             ((!dup && chg_pgno != hcp->pgno) ||
1357             (dup && chg_pgno != hcp->dpgno));
1358
1359         hp = hcp->db_cursor->dbp->master->internal;
1360         DB_THREAD_LOCK(hp->dbp);
1361
1362         for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
1363             cp = TAILQ_NEXT(cp, links)) {
1364                 if (cp->internal == hcp)
1365                         continue;
1366
1367                 lcp = (HASH_CURSOR *)cp->internal;
1368
1369                 if (!dup && lcp->pgno != chg_pgno)
1370                         continue;
1371
1372                 if (dup && F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1373                         continue;
1374
1375                 if (dup && !F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1376                         continue;
1377
1378                 if (page_deleted) {
1379                         if (dup) {
1380                                 lcp->dpgno = hcp->dpgno;
1381                                 lcp->dndx = hcp->dndx;
1382                         } else {
1383                                 lcp->pgno = hcp->pgno;
1384                                 lcp->bndx = hcp->bndx;
1385                                 lcp->bucket = hcp->bucket;
1386                         }
1387                         F_CLR(lcp, H_ISDUP);
1388                         continue;
1389                 }
1390
1391                 if (!dup && lcp->bndx > hcp->bndx)
1392                         lcp->bndx--;
1393                 else if (!dup && lcp->bndx == hcp->bndx)
1394                         F_SET(lcp, H_DELETED);
1395                 else if (dup && lcp->bndx == hcp->bndx) {
1396                         /* Assign dpgno in case there was page conversion. */
1397                         lcp->dpgno = hcp->dpgno;
1398                         if (add && lcp->dndx >= hcp->dndx )
1399                                 lcp->dndx++;
1400                         else if (!add && lcp->dndx > hcp->dndx)
1401                                 lcp->dndx--;
1402                         else if (!add && lcp->dndx == hcp->dndx)
1403                                 F_SET(lcp, H_DELETED);
1404
1405                         /* Now adjust on-page information. */
1406                         if (lcp->dpgno == PGNO_INVALID)
1407                                 if (add) {
1408                                         lcp->dup_tlen += len;
1409                                         if (lcp->dndx > hcp->dndx)
1410                                                 lcp->dup_off += len;
1411                                 } else {
1412                                         lcp->dup_tlen -= len;
1413                                         if (lcp->dndx > hcp->dndx)
1414                                                 lcp->dup_off -= len;
1415                                 }
1416                 }
1417         }
1418         DB_THREAD_UNLOCK(hp->dbp);
1419 }
1420
1421 /*
1422  * __ham_hdup --
1423  *      This function gets called when we create a duplicate handle for a
1424  *      threaded DB.  It should create the private part of the DB structure.
1425  * PUBLIC: int  __ham_hdup __P((DB *, DB *));
1426  */
1427 int
1428 __ham_hdup(orig, new)
1429         DB *orig, *new;
1430 {
1431         HTAB *hashp;
1432         DBC *curs;
1433         int ret;
1434
1435         if ((hashp = (HTAB *)malloc(sizeof(HTAB))) == NULL)
1436                 return (ENOMEM);
1437
1438         new->internal = hashp;
1439
1440         hashp->dbp = new;
1441         hashp->hlock = 0;
1442         hashp->hdr = NULL;
1443         hashp->hash = ((HTAB *)orig->internal)->hash;
1444         if ((hashp->split_buf = (PAGE *)malloc(orig->pgsize)) == NULL)
1445                 return (ENOMEM);
1446         hashp->local_errno = 0;
1447         hashp->hash_accesses = 0;
1448         hashp->hash_collisions = 0;
1449         hashp->hash_expansions = 0;
1450         hashp->hash_overflows = 0;
1451         hashp->hash_bigpages = 0;
1452         /* Initialize the cursor queue. */
1453         ret = __ham_c_init(new, NULL, &curs);
1454         TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);
1455         return (ret);
1456 }