db/0040755000076400007640000000000007055362607011733 5ustar cjwatsoncjwatsondb/Makefile0100644000076400007640000000437506675314607013405 0ustar cjwatsoncjwatson# Makefile for 4.4BSD db code in GNU C library. # This code is taken verbatim from the BSD db 1.85 package. Only this # Makefile and compat.h were written for GNU libc, and the header files # moved up to this directory. subdir = db subdir-dirs = btree db hash mpool recno vpath %.c $(subdir-dirs) extra-libs := libdb1 extra-libs-others := $(extra-libs) libdb1-routines := bt_close bt_conv bt_debug bt_delete bt_get \ bt_open bt_overflow bt_page bt_put bt_search \ bt_seq bt_split bt_utils \ db \ hash hash_bigkey hash_buf hash_func hash_log2 hash_page \ ndbm \ mpool \ rec_close rec_delete rec_get rec_open rec_put rec_search \ rec_seq rec_utils db1-headers := db.h mpool.h ndbm.h distribute := compat.h \ btree/btree.h btree/extern.h \ hash/extern.h hash/hash.h hash/page.h \ recno/extern.h recno/recno.h \ $(db1-headers) include ../Makeconfig install-others := $(db1-headers:%=$(inst_includedir)/db1/%) ifeq (yes,$(build-shared)) install-others += $(inst_slibdir)/libdb.so$(libdb1.so-version) endif $(inst_slibdir)/libdb.so$(libdb1.so-version): $(inst_slibdir)/libdb1-$(version).so $(+force) rm -f $@ $(LN_S) $( #include #include #include #include #include #include #include "btree.h" static int bt_meta __P((BTREE *)); /* * BT_CLOSE -- Close a btree. * * Parameters: * dbp: pointer to access method * * Returns: * RET_ERROR, RET_SUCCESS */ int __bt_close(dbp) DB *dbp; { BTREE *t; int fd; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* Sync the tree. */ if (__bt_sync(dbp, 0) == RET_ERROR) return (RET_ERROR); /* Close the memory pool. */ if (mpool_close(t->bt_mp) == RET_ERROR) return (RET_ERROR); /* Free random memory. */ if (t->bt_cursor.key.data != NULL) { free(t->bt_cursor.key.data); t->bt_cursor.key.size = 0; t->bt_cursor.key.data = NULL; } if (t->bt_rkey.data) { free(t->bt_rkey.data); t->bt_rkey.size = 0; t->bt_rkey.data = NULL; } if (t->bt_rdata.data) { free(t->bt_rdata.data); t->bt_rdata.size = 0; t->bt_rdata.data = NULL; } fd = t->bt_fd; free(t); free(dbp); return (close(fd) ? RET_ERROR : RET_SUCCESS); } /* * BT_SYNC -- sync the btree to disk. * * Parameters: * dbp: pointer to access method * * Returns: * RET_SUCCESS, RET_ERROR. */ int __bt_sync(dbp, flags) const DB *dbp; u_int flags; { BTREE *t; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* Sync doesn't currently take any flags. */ if (flags != 0) { errno = EINVAL; return (RET_ERROR); } if (F_ISSET(t, B_INMEM | B_RDONLY) || !F_ISSET(t, B_MODIFIED)) return (RET_SUCCESS); if (F_ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR) return (RET_ERROR); if ((status = mpool_sync(t->bt_mp)) == RET_SUCCESS) F_CLR(t, B_MODIFIED); return (status); } /* * BT_META -- write the tree meta data to disk. * * Parameters: * t: tree * * Returns: * RET_ERROR, RET_SUCCESS */ static int bt_meta(t) BTREE *t; { BTMETA m; void *p; if ((p = mpool_get(t->bt_mp, P_META, 0)) == NULL) return (RET_ERROR); /* Fill in metadata. */ m.magic = BTREEMAGIC; m.version = BTREEVERSION; m.psize = t->bt_psize; m.free = t->bt_free; m.nrecs = t->bt_nrecs; m.flags = F_ISSET(t, SAVEMETA); memmove(p, &m, sizeof(BTMETA)); mpool_put(t->bt_mp, p, MPOOL_DIRTY); return (RET_SUCCESS); } db/btree/bt_conv.c0100644000076400007640000001243006072171167014624 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_conv.c 8.5 (Berkeley) 8/17/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include "btree.h" static void mswap __P((PAGE *)); /* * __BT_BPGIN, __BT_BPGOUT -- * Convert host-specific number layout to/from the host-independent * format stored on disk. * * Parameters: * t: tree * pg: page number * h: page to convert */ void __bt_pgin(t, pg, pp) void *t; pgno_t pg; void *pp; { PAGE *h; indx_t i, top; u_char flags; char *p; if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) return; if (pg == P_META) { mswap(pp); return; } h = pp; M_32_SWAP(h->pgno); M_32_SWAP(h->prevpg); M_32_SWAP(h->nextpg); M_32_SWAP(h->flags); M_16_SWAP(h->lower); M_16_SWAP(h->upper); top = NEXTINDEX(h); if ((h->flags & P_TYPE) == P_BINTERNAL) for (i = 0; i < top; i++) { M_16_SWAP(h->linp[i]); p = (char *)GETBINTERNAL(h, i); P_32_SWAP(p); p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); if (*(u_char *)p & P_BIGKEY) { p += sizeof(u_char); P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); } } else if ((h->flags & P_TYPE) == P_BLEAF) for (i = 0; i < top; i++) { M_16_SWAP(h->linp[i]); p = (char *)GETBLEAF(h, i); P_32_SWAP(p); p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(u_int32_t); flags = *(u_char *)p; if (flags & (P_BIGKEY | P_BIGDATA)) { p += sizeof(u_char); if (flags & P_BIGKEY) { P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); } if (flags & P_BIGDATA) { p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); } } } } void __bt_pgout(t, pg, pp) void *t; pgno_t pg; void *pp; { PAGE *h; indx_t i, top; u_char flags; char *p; if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) return; if (pg == P_META) { mswap(pp); return; } h = pp; top = NEXTINDEX(h); if ((h->flags & P_TYPE) == P_BINTERNAL) for (i = 0; i < top; i++) { p = (char *)GETBINTERNAL(h, i); P_32_SWAP(p); p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); if (*(u_char *)p & P_BIGKEY) { p += sizeof(u_char); P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); } M_16_SWAP(h->linp[i]); } else if ((h->flags & P_TYPE) == P_BLEAF) for (i = 0; i < top; i++) { p = (char *)GETBLEAF(h, i); P_32_SWAP(p); p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(u_int32_t); flags = *(u_char *)p; if (flags & (P_BIGKEY | P_BIGDATA)) { p += sizeof(u_char); if (flags & P_BIGKEY) { P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); } if (flags & P_BIGDATA) { p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); } } M_16_SWAP(h->linp[i]); } M_32_SWAP(h->pgno); M_32_SWAP(h->prevpg); M_32_SWAP(h->nextpg); M_32_SWAP(h->flags); M_16_SWAP(h->lower); M_16_SWAP(h->upper); } /* * MSWAP -- Actually swap the bytes on the meta page. * * Parameters: * p: page to convert */ static void mswap(pg) PAGE *pg; { char *p; p = (char *)pg; P_32_SWAP(p); /* magic */ p += sizeof(u_int32_t); P_32_SWAP(p); /* version */ p += sizeof(u_int32_t); P_32_SWAP(p); /* psize */ p += sizeof(u_int32_t); P_32_SWAP(p); /* free */ p += sizeof(u_int32_t); P_32_SWAP(p); /* nrecs */ p += sizeof(u_int32_t); P_32_SWAP(p); /* flags */ p += sizeof(u_int32_t); } db/btree/bt_debug.c0100644000076400007640000002074706072171170014751 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_debug.c 8.5 (Berkeley) 8/17/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include "btree.h" #ifdef DEBUG /* * BT_DUMP -- Dump the tree * * Parameters: * dbp: pointer to the DB */ void __bt_dump(dbp) DB *dbp; { BTREE *t; PAGE *h; pgno_t i; char *sep; t = dbp->internal; (void)fprintf(stderr, "%s: pgsz %d", F_ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize); if (F_ISSET(t, R_RECNO)) (void)fprintf(stderr, " keys %lu", t->bt_nrecs); #undef X #define X(flag, name) \ if (F_ISSET(t, flag)) { \ (void)fprintf(stderr, "%s%s", sep, name); \ sep = ", "; \ } if (t->flags != 0) { sep = " flags ("; X(R_FIXLEN, "FIXLEN"); X(B_INMEM, "INMEM"); X(B_NODUPS, "NODUPS"); X(B_RDONLY, "RDONLY"); X(R_RECNO, "RECNO"); X(B_METADIRTY,"METADIRTY"); (void)fprintf(stderr, ")\n"); } #undef X for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) { __bt_dpage(h); (void)mpool_put(t->bt_mp, h, 0); } } /* * BT_DMPAGE -- Dump the meta page * * Parameters: * h: pointer to the PAGE */ void __bt_dmpage(h) PAGE *h; { BTMETA *m; char *sep; m = (BTMETA *)h; (void)fprintf(stderr, "magic %lx\n", m->magic); (void)fprintf(stderr, "version %lu\n", m->version); (void)fprintf(stderr, "psize %lu\n", m->psize); (void)fprintf(stderr, "free %lu\n", m->free); (void)fprintf(stderr, "nrecs %lu\n", m->nrecs); (void)fprintf(stderr, "flags %lu", m->flags); #undef X #define X(flag, name) \ if (m->flags & flag) { \ (void)fprintf(stderr, "%s%s", sep, name); \ sep = ", "; \ } if (m->flags) { sep = " ("; X(B_NODUPS, "NODUPS"); X(R_RECNO, "RECNO"); (void)fprintf(stderr, ")"); } } /* * BT_DNPAGE -- Dump the page * * Parameters: * n: page number to dump. */ void __bt_dnpage(dbp, pgno) DB *dbp; pgno_t pgno; { BTREE *t; PAGE *h; t = dbp->internal; if ((h = mpool_get(t->bt_mp, pgno, 0)) != NULL) { __bt_dpage(h); (void)mpool_put(t->bt_mp, h, 0); } } /* * BT_DPAGE -- Dump the page * * Parameters: * h: pointer to the PAGE */ void __bt_dpage(h) PAGE *h; { BINTERNAL *bi; BLEAF *bl; RINTERNAL *ri; RLEAF *rl; indx_t cur, top; char *sep; (void)fprintf(stderr, " page %d: (", h->pgno); #undef X #define X(flag, name) \ if (h->flags & flag) { \ (void)fprintf(stderr, "%s%s", sep, name); \ sep = ", "; \ } sep = ""; X(P_BINTERNAL, "BINTERNAL") /* types */ X(P_BLEAF, "BLEAF") X(P_RINTERNAL, "RINTERNAL") /* types */ X(P_RLEAF, "RLEAF") X(P_OVERFLOW, "OVERFLOW") X(P_PRESERVE, "PRESERVE"); (void)fprintf(stderr, ")\n"); #undef X (void)fprintf(stderr, "\tprev %2d next %2d", h->prevpg, h->nextpg); if (h->flags & P_OVERFLOW) return; top = NEXTINDEX(h); (void)fprintf(stderr, " lower %3d upper %3d nextind %d\n", h->lower, h->upper, top); for (cur = 0; cur < top; cur++) { (void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]); switch (h->flags & P_TYPE) { case P_BINTERNAL: bi = GETBINTERNAL(h, cur); (void)fprintf(stderr, "size %03d pgno %03d", bi->ksize, bi->pgno); if (bi->flags & P_BIGKEY) (void)fprintf(stderr, " (indirect)"); else if (bi->ksize) (void)fprintf(stderr, " {%.*s}", (int)bi->ksize, bi->bytes); break; case P_RINTERNAL: ri = GETRINTERNAL(h, cur); (void)fprintf(stderr, "entries %03d pgno %03d", ri->nrecs, ri->pgno); break; case P_BLEAF: bl = GETBLEAF(h, cur); if (bl->flags & P_BIGKEY) (void)fprintf(stderr, "big key page %lu size %u/", *(pgno_t *)bl->bytes, *(u_int32_t *)(bl->bytes + sizeof(pgno_t))); else if (bl->ksize) (void)fprintf(stderr, "%s/", bl->bytes); if (bl->flags & P_BIGDATA) (void)fprintf(stderr, "big data page %lu size %u", *(pgno_t *)(bl->bytes + bl->ksize), *(u_int32_t *)(bl->bytes + bl->ksize + sizeof(pgno_t))); else if (bl->dsize) (void)fprintf(stderr, "%.*s", (int)bl->dsize, bl->bytes + bl->ksize); break; case P_RLEAF: rl = GETRLEAF(h, cur); if (rl->flags & P_BIGDATA) (void)fprintf(stderr, "big data page %lu size %u", *(pgno_t *)rl->bytes, *(u_int32_t *)(rl->bytes + sizeof(pgno_t))); else if (rl->dsize) (void)fprintf(stderr, "%.*s", (int)rl->dsize, rl->bytes); break; } (void)fprintf(stderr, "\n"); } } #endif #ifdef STATISTICS /* * BT_STAT -- Gather/print the tree statistics * * Parameters: * dbp: pointer to the DB */ void __bt_stat(dbp) DB *dbp; { extern u_long bt_cache_hit, bt_cache_miss, bt_pfxsaved, bt_rootsplit; extern u_long bt_sortsplit, bt_split; BTREE *t; PAGE *h; pgno_t i, pcont, pinternal, pleaf; u_long ifree, lfree, nkeys; int levels; t = dbp->internal; pcont = pinternal = pleaf = 0; nkeys = ifree = lfree = 0; for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) { switch (h->flags & P_TYPE) { case P_BINTERNAL: case P_RINTERNAL: ++pinternal; ifree += h->upper - h->lower; break; case P_BLEAF: case P_RLEAF: ++pleaf; lfree += h->upper - h->lower; nkeys += NEXTINDEX(h); break; case P_OVERFLOW: ++pcont; break; } (void)mpool_put(t->bt_mp, h, 0); } /* Count the levels of the tree. */ for (i = P_ROOT, levels = 0 ;; ++levels) { h = mpool_get(t->bt_mp, i, 0); if (h->flags & (P_BLEAF|P_RLEAF)) { if (levels == 0) levels = 1; (void)mpool_put(t->bt_mp, h, 0); break; } i = F_ISSET(t, R_RECNO) ? GETRINTERNAL(h, 0)->pgno : GETBINTERNAL(h, 0)->pgno; (void)mpool_put(t->bt_mp, h, 0); } (void)fprintf(stderr, "%d level%s with %ld keys", levels, levels == 1 ? "" : "s", nkeys); if (F_ISSET(t, R_RECNO)) (void)fprintf(stderr, " (%ld header count)", t->bt_nrecs); (void)fprintf(stderr, "\n%lu pages (leaf %ld, internal %ld, overflow %ld)\n", pinternal + pleaf + pcont, pleaf, pinternal, pcont); (void)fprintf(stderr, "%ld cache hits, %ld cache misses\n", bt_cache_hit, bt_cache_miss); (void)fprintf(stderr, "%ld splits (%ld root splits, %ld sort splits)\n", bt_split, bt_rootsplit, bt_sortsplit); pleaf *= t->bt_psize - BTDATAOFF; if (pleaf) (void)fprintf(stderr, "%.0f%% leaf fill (%ld bytes used, %ld bytes free)\n", ((double)(pleaf - lfree) / pleaf) * 100, pleaf - lfree, lfree); pinternal *= t->bt_psize - BTDATAOFF; if (pinternal) (void)fprintf(stderr, "%.0f%% internal fill (%ld bytes used, %ld bytes free\n", ((double)(pinternal - ifree) / pinternal) * 100, pinternal - ifree, ifree); if (bt_pfxsaved) (void)fprintf(stderr, "prefix checking removed %lu bytes.\n", bt_pfxsaved); } #endif db/btree/bt_delete.c0100644000076400007640000004021006603002376015111 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include "btree.h" static int __bt_bdelete __P((BTREE *, const DBT *)); static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int)); static int __bt_pdelete __P((BTREE *, PAGE *)); static int __bt_relink __P((BTREE *, PAGE *)); static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *)); /* * __bt_delete * Delete the item(s) referenced by a key. * * Return RET_SPECIAL if the key is not found. */ int __bt_delete(dbp, key, flags) const DB *dbp; const DBT *key; u_int flags; { BTREE *t; CURSOR *c; PAGE *h; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* Check for change to a read-only tree. */ if (F_ISSET(t, B_RDONLY)) { errno = EPERM; return (RET_ERROR); } switch (flags) { case 0: status = __bt_bdelete(t, key); break; case R_CURSOR: /* * If flags is R_CURSOR, delete the cursor. Must already * have started a scan and not have already deleted it. */ c = &t->bt_cursor; if (F_ISSET(c, CURS_INIT)) { if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) return (RET_SPECIAL); if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) return (RET_ERROR); /* * If the page is about to be emptied, we'll need to * delete it, which means we have to acquire a stack. */ if (NEXTINDEX(h) == 1) if (__bt_stkacq(t, &h, &t->bt_cursor)) return (RET_ERROR); status = __bt_dleaf(t, NULL, h, c->pg.index); if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) { if (__bt_pdelete(t, h)) return (RET_ERROR); } else mpool_put(t->bt_mp, h, status == RET_SUCCESS ? MPOOL_DIRTY : 0); break; } /* FALLTHROUGH */ default: errno = EINVAL; return (RET_ERROR); } if (status == RET_SUCCESS) F_SET(t, B_MODIFIED); return (status); } /* * __bt_stkacq -- * Acquire a stack so we can delete a cursor entry. * * Parameters: * t: tree * hp: pointer to current, pinned PAGE pointer * c: pointer to the cursor * * Returns: * 0 on success, 1 on failure */ static int __bt_stkacq(t, hp, c) BTREE *t; PAGE **hp; CURSOR *c; { BINTERNAL *bi; EPG *e; EPGNO *parent; PAGE *h; indx_t index; pgno_t pgno; recno_t nextpg, prevpg; int exact, level; /* * Find the first occurrence of the key in the tree. Toss the * currently locked page so we don't hit an already-locked page. */ h = *hp; mpool_put(t->bt_mp, h, 0); if ((e = __bt_search(t, &c->key, &exact)) == NULL) return (1); h = e->page; /* See if we got it in one shot. */ if (h->pgno == c->pg.pgno) goto ret; /* * Move right, looking for the page. At each move we have to move * up the stack until we don't have to move to the next page. If * we have to change pages at an internal level, we have to fix the * stack back up. */ while (h->pgno != c->pg.pgno) { if ((nextpg = h->nextpg) == P_INVALID) break; mpool_put(t->bt_mp, h, 0); /* Move up the stack. */ for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { /* Get the parent page. */ if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) return (1); /* Move to the next index. */ if (parent->index != NEXTINDEX(h) - 1) { index = parent->index + 1; BT_PUSH(t, h->pgno, index); break; } mpool_put(t->bt_mp, h, 0); } /* Restore the stack. */ while (level--) { /* Push the next level down onto the stack. */ bi = GETBINTERNAL(h, index); pgno = bi->pgno; BT_PUSH(t, pgno, 0); /* Lose the currently pinned page. */ mpool_put(t->bt_mp, h, 0); /* Get the next level down. */ if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) return (1); index = 0; } mpool_put(t->bt_mp, h, 0); if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL) return (1); } if (h->pgno == c->pg.pgno) goto ret; /* Reacquire the original stack. */ mpool_put(t->bt_mp, h, 0); if ((e = __bt_search(t, &c->key, &exact)) == NULL) return (1); h = e->page; /* * Move left, looking for the page. At each move we have to move * up the stack until we don't have to change pages to move to the * next page. If we have to change pages at an internal level, we * have to fix the stack back up. */ while (h->pgno != c->pg.pgno) { if ((prevpg = h->prevpg) == P_INVALID) break; mpool_put(t->bt_mp, h, 0); /* Move up the stack. */ for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { /* Get the parent page. */ if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) return (1); /* Move to the next index. */ if (parent->index != 0) { index = parent->index - 1; BT_PUSH(t, h->pgno, index); break; } mpool_put(t->bt_mp, h, 0); } /* Restore the stack. */ while (level--) { /* Push the next level down onto the stack. */ bi = GETBINTERNAL(h, index); pgno = bi->pgno; /* Lose the currently pinned page. */ mpool_put(t->bt_mp, h, 0); /* Get the next level down. */ if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) return (1); index = NEXTINDEX(h) - 1; BT_PUSH(t, pgno, index); } mpool_put(t->bt_mp, h, 0); if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL) return (1); } ret: mpool_put(t->bt_mp, h, 0); return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL); } /* * __bt_bdelete -- * Delete all key/data pairs matching the specified key. * * Parameters: * t: tree * key: key to delete * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. */ static int __bt_bdelete(t, key) BTREE *t; const DBT *key; { EPG *e; PAGE *h; int deleted, exact, redo; deleted = 0; /* Find any matching record; __bt_search pins the page. */ loop: if ((e = __bt_search(t, key, &exact)) == NULL) return (deleted ? RET_SUCCESS : RET_ERROR); if (!exact) { mpool_put(t->bt_mp, e->page, 0); return (deleted ? RET_SUCCESS : RET_SPECIAL); } /* * Delete forward, then delete backward, from the found key. If * there are duplicates and we reach either side of the page, do * the key search again, so that we get them all. */ redo = 0; h = e->page; do { if (__bt_dleaf(t, key, h, e->index)) { mpool_put(t->bt_mp, h, 0); return (RET_ERROR); } if (F_ISSET(t, B_NODUPS)) { if (NEXTINDEX(h) == 0) { if (__bt_pdelete(t, h)) return (RET_ERROR); } else mpool_put(t->bt_mp, h, MPOOL_DIRTY); return (RET_SUCCESS); } deleted = 1; } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); /* Check for right-hand edge of the page. */ if (e->index == NEXTINDEX(h)) redo = 1; /* Delete from the key to the beginning of the page. */ while (e->index-- > 0) { if (__bt_cmp(t, key, e) != 0) break; if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) { mpool_put(t->bt_mp, h, 0); return (RET_ERROR); } if (e->index == 0) redo = 1; } /* Check for an empty page. */ if (NEXTINDEX(h) == 0) { if (__bt_pdelete(t, h)) return (RET_ERROR); goto loop; } /* Put the page. */ mpool_put(t->bt_mp, h, MPOOL_DIRTY); if (redo) goto loop; return (RET_SUCCESS); } /* * __bt_pdelete -- * Delete a single page from the tree. * * Parameters: * t: tree * h: leaf page * * Returns: * RET_SUCCESS, RET_ERROR. * * Side-effects: * mpool_put's the page */ static int __bt_pdelete(t, h) BTREE *t; PAGE *h; { BINTERNAL *bi; PAGE *pg; EPGNO *parent; indx_t cnt, index, *ip, offset; u_int32_t nksize; char *from; /* * Walk the parent page stack -- a LIFO stack of the pages that were * traversed when we searched for the page where the delete occurred. * Each stack entry is a page number and a page index offset. The * offset is for the page traversed on the search. We've just deleted * a page, so we have to delete the key from the parent page. * * If the delete from the parent page makes it empty, this process may * continue all the way up the tree. We stop if we reach the root page * (which is never deleted, it's just not worth the effort) or if the * delete does not empty the page. */ while ((parent = BT_POP(t)) != NULL) { /* Get the parent page. */ if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) return (RET_ERROR); index = parent->index; bi = GETBINTERNAL(pg, index); /* Free any overflow pages. */ if (bi->flags & P_BIGKEY && __ovfl_delete(t, bi->bytes) == RET_ERROR) { mpool_put(t->bt_mp, pg, 0); return (RET_ERROR); } /* * Free the parent if it has only the one key and it's not the * root page. If it's the rootpage, turn it back into an empty * leaf page. */ if (NEXTINDEX(pg) == 1) { if (pg->pgno == P_ROOT) { pg->lower = BTDATAOFF; pg->upper = t->bt_psize; pg->flags = P_BLEAF; } else { if (__bt_relink(t, pg) || __bt_free(t, pg)) return (RET_ERROR); continue; } } else { /* Pack remaining key items at the end of the page. */ nksize = NBINTERNAL(bi->ksize); from = (char *)pg + pg->upper; memmove(from + nksize, from, (char *)bi - from); pg->upper += nksize; /* Adjust indices' offsets, shift the indices down. */ offset = pg->linp[index]; for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip) if (ip[0] < offset) ip[0] += nksize; for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip) ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1]; pg->lower -= sizeof(indx_t); } mpool_put(t->bt_mp, pg, MPOOL_DIRTY); break; } /* Free the leaf page, as long as it wasn't the root. */ if (h->pgno == P_ROOT) { mpool_put(t->bt_mp, h, MPOOL_DIRTY); return (RET_SUCCESS); } return (__bt_relink(t, h) || __bt_free(t, h)); } /* * __bt_dleaf -- * Delete a single record from a leaf page. * * Parameters: * t: tree * key: referenced key * h: page * index: index on page to delete * * Returns: * RET_SUCCESS, RET_ERROR. */ int __bt_dleaf(t, key, h, index) BTREE *t; const DBT *key; PAGE *h; u_int index; { BLEAF *bl; indx_t cnt, *ip, offset; u_int32_t nbytes; void *to; char *from; /* If this record is referenced by the cursor, delete the cursor. */ if (F_ISSET(&t->bt_cursor, CURS_INIT) && !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index && __bt_curdel(t, key, h, index)) return (RET_ERROR); /* If the entry uses overflow pages, make them available for reuse. */ to = bl = GETBLEAF(h, index); if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) return (RET_ERROR); if (bl->flags & P_BIGDATA && __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) return (RET_ERROR); /* Pack the remaining key/data items at the end of the page. */ nbytes = NBLEAF(bl); from = (char *)h + h->upper; memmove(from + nbytes, from, (char *)to - from); h->upper += nbytes; /* Adjust the indices' offsets, shift the indices down. */ offset = h->linp[index]; for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) if (ip[0] < offset) ip[0] += nbytes; for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; h->lower -= sizeof(indx_t); /* If the cursor is on this page, adjust it as necessary. */ if (F_ISSET(&t->bt_cursor, CURS_INIT) && !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index) --t->bt_cursor.pg.index; return (RET_SUCCESS); } /* * __bt_curdel -- * Delete the cursor. * * Parameters: * t: tree * key: referenced key (or NULL) * h: page * index: index on page to delete * * Returns: * RET_SUCCESS, RET_ERROR. */ static int __bt_curdel(t, key, h, index) BTREE *t; const DBT *key; PAGE *h; u_int index; { CURSOR *c; EPG e; PAGE *pg; int curcopy, status; /* * If there are duplicates, move forward or backward to one. * Otherwise, copy the key into the cursor area. */ c = &t->bt_cursor; F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE); curcopy = 0; if (!F_ISSET(t, B_NODUPS)) { /* * We're going to have to do comparisons. If we weren't * provided a copy of the key, i.e. the user is deleting * the current cursor position, get one. */ if (key == NULL) { e.page = h; e.index = index; if ((status = __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS) return (status); curcopy = 1; key = &c->key; } /* Check previous key, if not at the beginning of the page. */ if (index > 0) { e.page = h; e.index = index - 1; if (__bt_cmp(t, key, &e) == 0) { F_SET(c, CURS_BEFORE); goto dup2; } } /* Check next key, if not at the end of the page. */ if (index < NEXTINDEX(h) - 1) { e.page = h; e.index = index + 1; if (__bt_cmp(t, key, &e) == 0) { F_SET(c, CURS_AFTER); goto dup2; } } /* Check previous key if at the beginning of the page. */ if (index == 0 && h->prevpg != P_INVALID) { if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) return (RET_ERROR); e.page = pg; e.index = NEXTINDEX(pg) - 1; if (__bt_cmp(t, key, &e) == 0) { F_SET(c, CURS_BEFORE); goto dup1; } mpool_put(t->bt_mp, pg, 0); } /* Check next key if at the end of the page. */ if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) { if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) return (RET_ERROR); e.page = pg; e.index = 0; if (__bt_cmp(t, key, &e) == 0) { F_SET(c, CURS_AFTER); dup1: mpool_put(t->bt_mp, pg, 0); dup2: c->pg.pgno = e.page->pgno; c->pg.index = e.index; return (RET_SUCCESS); } mpool_put(t->bt_mp, pg, 0); } } e.page = h; e.index = index; if (curcopy || (status = __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) { F_SET(c, CURS_ACQUIRE); return (RET_SUCCESS); } return (status); } /* * __bt_relink -- * Link around a deleted page. * * Parameters: * t: tree * h: page to be deleted */ static int __bt_relink(t, h) BTREE *t; PAGE *h; { PAGE *pg; if (h->nextpg != P_INVALID) { if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) return (RET_ERROR); pg->prevpg = h->prevpg; mpool_put(t->bt_mp, pg, MPOOL_DIRTY); } if (h->prevpg != P_INVALID) { if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) return (RET_ERROR); pg->nextpg = h->nextpg; mpool_put(t->bt_mp, pg, MPOOL_DIRTY); } return (0); } db/btree/bt_get.c0100644000076400007640000000626506072171174014445 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_get.c 8.6 (Berkeley) 7/20/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include "btree.h" /* * __BT_GET -- Get a record from the btree. * * Parameters: * dbp: pointer to access method * key: key to find * data: data to return * flag: currently unused * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. */ int __bt_get(dbp, key, data, flags) const DB *dbp; const DBT *key; DBT *data; u_int flags; { BTREE *t; EPG *e; int exact, status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* Get currently doesn't take any flags. */ if (flags) { errno = EINVAL; return (RET_ERROR); } if ((e = __bt_search(t, key, &exact)) == NULL) return (RET_ERROR); if (!exact) { mpool_put(t->bt_mp, e->page, 0); return (RET_SPECIAL); } status = __bt_ret(t, e, NULL, NULL, data, &t->bt_rdata, 0); /* * If the user is doing concurrent access, we copied the * key/data, toss the page. */ if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e->page, 0); else t->bt_pinned = e->page; return (status); } db/btree/bt_open.c0100644000076400007640000002625406602177360014631 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_open.c 8.10 (Berkeley) 8/17/94"; #endif /* LIBC_SCCS and not lint */ /* * Implementation of btree access method for 4.4BSD. * * The design here was originally based on that of the btree access method * used in the Postgres database system at UC Berkeley. This implementation * is wholly independent of the Postgres code. */ #include #include #include #include #include #include #include #include #include #include #include #include "btree.h" #ifdef DEBUG #undef MINPSIZE #define MINPSIZE 128 #endif static int byteorder __P((void)); static int nroot __P((BTREE *)); static int tmp __P((void)); /* * __BT_OPEN -- Open a btree. * * Creates and fills a DB struct, and calls the routine that actually * opens the btree. * * Parameters: * fname: filename (NULL for in-memory trees) * flags: open flag bits * mode: open permission bits * b: BTREEINFO pointer * * Returns: * NULL on failure, pointer to DB on success. * */ DB * __bt_open(fname, flags, mode, openinfo, dflags) const char *fname; int flags, mode, dflags; const BTREEINFO *openinfo; { struct stat sb; BTMETA m; BTREE *t; BTREEINFO b; DB *dbp; pgno_t ncache; ssize_t nr; int machine_lorder; t = NULL; /* * Intention is to make sure all of the user's selections are okay * here and then use them without checking. Can't be complete, since * we don't know the right page size, lorder or flags until the backing * file is opened. Also, the file's page size can cause the cachesize * to change. */ machine_lorder = byteorder(); if (openinfo) { b = *openinfo; /* Flags: R_DUP. */ if (b.flags & ~(R_DUP)) goto einval; /* * Page size must be indx_t aligned and >= MINPSIZE. Default * page size is set farther on, based on the underlying file * transfer size. */ if (b.psize && (b.psize < MINPSIZE || b.psize > MAX_PAGE_OFFSET + 1 || b.psize & (sizeof(indx_t) - 1))) goto einval; /* Minimum number of keys per page; absolute minimum is 2. */ if (b.minkeypage) { if (b.minkeypage < 2) goto einval; } else b.minkeypage = DEFMINKEYPAGE; /* If no comparison, use default comparison and prefix. */ if (b.compare == NULL) { b.compare = __bt_defcmp; if (b.prefix == NULL) b.prefix = __bt_defpfx; } if (b.lorder == 0) b.lorder = machine_lorder; } else { b.compare = __bt_defcmp; b.cachesize = 0; b.flags = 0; b.lorder = machine_lorder; b.minkeypage = DEFMINKEYPAGE; b.prefix = __bt_defpfx; b.psize = 0; } /* Check for the ubiquitous PDP-11. */ if (b.lorder != BIG_ENDIAN && b.lorder != LITTLE_ENDIAN) goto einval; /* Allocate and initialize DB and BTREE structures. */ if ((t = (BTREE *)malloc(sizeof(BTREE))) == NULL) goto err; memset(t, 0, sizeof(BTREE)); t->bt_fd = -1; /* Don't close unopened fd on error. */ t->bt_lorder = b.lorder; t->bt_order = NOT; t->bt_cmp = b.compare; t->bt_pfx = b.prefix; t->bt_rfd = -1; if ((t->bt_dbp = dbp = (DB *)malloc(sizeof(DB))) == NULL) goto err; memset(t->bt_dbp, 0, sizeof(DB)); if (t->bt_lorder != machine_lorder) F_SET(t, B_NEEDSWAP); dbp->type = DB_BTREE; dbp->internal = t; dbp->close = __bt_close; dbp->del = __bt_delete; dbp->fd = __bt_fd; dbp->get = __bt_get; dbp->put = __bt_put; dbp->seq = __bt_seq; dbp->sync = __bt_sync; /* * If no file name was supplied, this is an in-memory btree and we * open a backing temporary file. Otherwise, it's a disk-based tree. */ if (fname) { switch (flags & O_ACCMODE) { case O_RDONLY: F_SET(t, B_RDONLY); break; case O_RDWR: break; case O_WRONLY: default: goto einval; } if ((t->bt_fd = open(fname, flags, mode)) < 0) goto err; } else { if ((flags & O_ACCMODE) != O_RDWR) goto einval; if ((t->bt_fd = tmp()) == -1) goto err; F_SET(t, B_INMEM); } if (fcntl(t->bt_fd, F_SETFD, 1) == -1) goto err; if (fstat(t->bt_fd, &sb)) goto err; if (sb.st_size) { if ((nr = read(t->bt_fd, &m, sizeof(BTMETA))) < 0) goto err; if (nr != sizeof(BTMETA)) goto eftype; /* * Read in the meta-data. This can change the notion of what * the lorder, page size and flags are, and, when the page size * changes, the cachesize value can change too. If the user * specified the wrong byte order for an existing database, we * don't bother to return an error, we just clear the NEEDSWAP * bit. */ if (m.magic == BTREEMAGIC) F_CLR(t, B_NEEDSWAP); else { F_SET(t, B_NEEDSWAP); M_32_SWAP(m.magic); M_32_SWAP(m.version); M_32_SWAP(m.psize); M_32_SWAP(m.free); M_32_SWAP(m.nrecs); M_32_SWAP(m.flags); } if (m.magic != BTREEMAGIC || m.version != BTREEVERSION) goto eftype; if (m.psize < MINPSIZE || m.psize > MAX_PAGE_OFFSET + 1 || m.psize & (sizeof(indx_t) - 1)) goto eftype; if (m.flags & ~SAVEMETA) goto eftype; b.psize = m.psize; F_SET(t, m.flags); t->bt_free = m.free; t->bt_nrecs = m.nrecs; } else { /* * Set the page size to the best value for I/O to this file. * Don't overflow the page offset type. */ if (b.psize == 0) { #ifdef _STATBUF_ST_BLKSIZE b.psize = sb.st_blksize; #endif if (b.psize < MINPSIZE) b.psize = MINPSIZE; if (b.psize > MAX_PAGE_OFFSET + 1) b.psize = MAX_PAGE_OFFSET + 1; } /* Set flag if duplicates permitted. */ if (!(b.flags & R_DUP)) F_SET(t, B_NODUPS); t->bt_free = P_INVALID; t->bt_nrecs = 0; F_SET(t, B_METADIRTY); } t->bt_psize = b.psize; /* Set the cache size; must be a multiple of the page size. */ if (b.cachesize && b.cachesize & (b.psize - 1)) b.cachesize += (~b.cachesize & (b.psize - 1)) + 1; if (b.cachesize < b.psize * MINCACHE) b.cachesize = b.psize * MINCACHE; /* Calculate number of pages to cache. */ ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize; /* * The btree data structure requires that at least two keys can fit on * a page, but other than that there's no fixed requirement. The user * specified a minimum number per page, and we translated that into the * number of bytes a key/data pair can use before being placed on an * overflow page. This calculation includes the page header, the size * of the index referencing the leaf item and the size of the leaf item * structure. Also, don't let the user specify a minkeypage such that * a key/data pair won't fit even if both key and data are on overflow * pages. */ t->bt_ovflsize = (t->bt_psize - BTDATAOFF) / b.minkeypage - (sizeof(indx_t) + NBLEAFDBT(0, 0)); if (t->bt_ovflsize < NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t)) t->bt_ovflsize = NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t); /* Initialize the buffer pool. */ if ((t->bt_mp = mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL) goto err; if (!F_ISSET(t, B_INMEM)) mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t); /* Create a root page if new tree. */ if (nroot(t) == RET_ERROR) goto err; /* Global flags. */ if (dflags & DB_LOCK) F_SET(t, B_DB_LOCK); if (dflags & DB_SHMEM) F_SET(t, B_DB_SHMEM); if (dflags & DB_TXN) F_SET(t, B_DB_TXN); return (dbp); einval: errno = EINVAL; goto err; eftype: errno = EFTYPE; goto err; err: if (t) { if (t->bt_dbp) free(t->bt_dbp); if (t->bt_fd != -1) (void)close(t->bt_fd); free(t); } return (NULL); } /* * NROOT -- Create the root of a new tree. * * Parameters: * t: tree * * Returns: * RET_ERROR, RET_SUCCESS */ static int nroot(t) BTREE *t; { PAGE *meta, *root; pgno_t npg; if ((meta = mpool_get(t->bt_mp, 0, 0)) != NULL) { mpool_put(t->bt_mp, meta, 0); return (RET_SUCCESS); } if (errno != EINVAL) /* It's OK to not exist. */ return (RET_ERROR); errno = 0; if ((meta = mpool_new(t->bt_mp, &npg)) == NULL) return (RET_ERROR); if ((root = mpool_new(t->bt_mp, &npg)) == NULL) return (RET_ERROR); if (npg != P_ROOT) return (RET_ERROR); root->pgno = npg; root->prevpg = root->nextpg = P_INVALID; root->lower = BTDATAOFF; root->upper = t->bt_psize; root->flags = P_BLEAF; memset(meta, 0, t->bt_psize); mpool_put(t->bt_mp, meta, MPOOL_DIRTY); mpool_put(t->bt_mp, root, MPOOL_DIRTY); return (RET_SUCCESS); } static int tmp() { sigset_t set, oset; int fd; const char *envtmp; char *path; static const char fmt[] = "%s/bt.XXXXXX"; size_t n; envtmp = getenv("TMPDIR"); if (!envtmp) envtmp = "/tmp"; n = strlen (envtmp) + sizeof fmt; #ifdef __GNUC__ path = __builtin_alloca(n); #else path = malloc(n); #endif (void)snprintf(path, n, fmt, envtmp ? envtmp : "/tmp"); (void)sigfillset(&set); (void)sigprocmask(SIG_BLOCK, &set, &oset); if ((fd = mkstemp(path)) != -1) (void)unlink(path); (void)sigprocmask(SIG_SETMASK, &oset, NULL); #ifndef __GNUC__ free(path); #endif return(fd); } static int byteorder() { u_int32_t x; u_char *p; x = 0x01020304; p = (u_char *)&x; switch (*p) { case 1: return (BIG_ENDIAN); case 4: return (LITTLE_ENDIAN); default: return (0); } } int __bt_fd(dbp) const DB *dbp; { BTREE *t; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* In-memory database can't have a file descriptor. */ if (F_ISSET(t, B_INMEM)) { errno = ENOENT; return (-1); } return (t->bt_fd); } db/btree/bt_overflow.c0100644000076400007640000001347606072171200015521 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_overflow.c 8.5 (Berkeley) 7/16/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include "btree.h" /* * Big key/data code. * * Big key and data entries are stored on linked lists of pages. The initial * reference is byte string stored with the key or data and is the page number * and size. The actual record is stored in a chain of pages linked by the * nextpg field of the PAGE header. * * The first page of the chain has a special property. If the record is used * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set * in the header. * * XXX * A single DBT is written to each chain, so a lot of space on the last page * is wasted. This is a fairly major bug for some data sets. */ /* * __OVFL_GET -- Get an overflow key/data item. * * Parameters: * t: tree * p: pointer to { pgno_t, u_int32_t } * buf: storage address * bufsz: storage size * * Returns: * RET_ERROR, RET_SUCCESS */ int __ovfl_get(t, p, ssz, buf, bufsz) BTREE *t; void *p; size_t *ssz; void **buf; size_t *bufsz; { PAGE *h; pgno_t pg; size_t nb, plen; u_int32_t sz; memmove(&pg, p, sizeof(pgno_t)); memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); *ssz = sz; #ifdef DEBUG if (pg == P_INVALID || sz == 0) abort(); #endif /* Make the buffer bigger as necessary. */ if (*bufsz < sz) { *buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz)); if (*buf == NULL) return (RET_ERROR); *bufsz = sz; } /* * Step through the linked list of pages, copying the data on each one * into the buffer. Never copy more than the data's length. */ plen = t->bt_psize - BTDATAOFF; for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); nb = MIN(sz, plen); memmove(p, (char *)h + BTDATAOFF, nb); mpool_put(t->bt_mp, h, 0); if ((sz -= nb) == 0) break; } return (RET_SUCCESS); } /* * __OVFL_PUT -- Store an overflow key/data item. * * Parameters: * t: tree * data: DBT to store * pgno: storage page number * * Returns: * RET_ERROR, RET_SUCCESS */ int __ovfl_put(t, dbt, pg) BTREE *t; const DBT *dbt; pgno_t *pg; { PAGE *h, *last; void *p; pgno_t npg; size_t nb, plen; u_int32_t sz; /* * Allocate pages and copy the key/data record into them. Store the * number of the first page in the chain. */ plen = t->bt_psize - BTDATAOFF; for (last = NULL, p = dbt->data, sz = dbt->size;; p = (char *)p + plen, last = h) { if ((h = __bt_new(t, &npg)) == NULL) return (RET_ERROR); h->pgno = npg; h->nextpg = h->prevpg = P_INVALID; h->flags = P_OVERFLOW; h->lower = h->upper = 0; nb = MIN(sz, plen); memmove((char *)h + BTDATAOFF, p, nb); if (last) { last->nextpg = h->pgno; mpool_put(t->bt_mp, last, MPOOL_DIRTY); } else *pg = h->pgno; if ((sz -= nb) == 0) { mpool_put(t->bt_mp, h, MPOOL_DIRTY); break; } } return (RET_SUCCESS); } /* * __OVFL_DELETE -- Delete an overflow chain. * * Parameters: * t: tree * p: pointer to { pgno_t, u_int32_t } * * Returns: * RET_ERROR, RET_SUCCESS */ int __ovfl_delete(t, p) BTREE *t; void *p; { PAGE *h; pgno_t pg; size_t plen; u_int32_t sz; memmove(&pg, p, sizeof(pgno_t)); memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); #ifdef DEBUG if (pg == P_INVALID || sz == 0) abort(); #endif if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); /* Don't delete chains used by internal pages. */ if (h->flags & P_PRESERVE) { mpool_put(t->bt_mp, h, 0); return (RET_SUCCESS); } /* Step through the chain, calling the free routine for each page. */ for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) { pg = h->nextpg; __bt_free(t, h); if (sz <= plen) break; if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); } return (RET_SUCCESS); } db/btree/bt_page.c0100644000076400007640000000566606403660046014605 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_page.c 8.3 (Berkeley) 7/14/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include "btree.h" /* * __bt_free -- * Put a page on the freelist. * * Parameters: * t: tree * h: page to free * * Returns: * RET_ERROR, RET_SUCCESS * * Side-effect: * mpool_put's the page. */ int __bt_free(t, h) BTREE *t; PAGE *h; { /* Insert the page at the head of the free list. */ h->prevpg = P_INVALID; h->nextpg = t->bt_free; t->bt_free = h->pgno; F_SET(t, B_METADIRTY); /* Make sure the page gets written back. */ return (mpool_put(t->bt_mp, h, MPOOL_DIRTY)); } /* * __bt_new -- * Get a new page, preferably from the freelist. * * Parameters: * t: tree * npg: storage for page number. * * Returns: * Pointer to a page, NULL on error. */ PAGE * __bt_new(t, npg) BTREE *t; pgno_t *npg; { PAGE *h; if (t->bt_free != P_INVALID && (h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) { *npg = t->bt_free; t->bt_free = h->nextpg; F_SET(t, B_METADIRTY); return (h); } return (mpool_new(t->bt_mp, npg)); } db/btree/bt_put.c0100644000076400007640000002100606602177361014467 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_put.c 8.8 (Berkeley) 7/26/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include "btree.h" static EPG *bt_fast __P((BTREE *, const DBT *, const DBT *, int *)); /* * __BT_PUT -- Add a btree item to the tree. * * Parameters: * dbp: pointer to access method * key: key * data: data * flag: R_NOOVERWRITE * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the * tree and R_NOOVERWRITE specified. */ int __bt_put(dbp, key, data, flags) const DB *dbp; DBT *key; const DBT *data; u_int flags; { BTREE *t; DBT tkey, tdata; EPG *e; PAGE *h; indx_t index, nxtindex; pgno_t pg; u_int32_t nbytes; int dflags, exact, status; char *dest, db[NOVFLSIZE], kb[NOVFLSIZE]; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* Check for change to a read-only tree. */ if (F_ISSET(t, B_RDONLY)) { errno = EPERM; return (RET_ERROR); } switch (flags) { case 0: case R_NOOVERWRITE: break; case R_CURSOR: /* * If flags is R_CURSOR, put the cursor. Must already * have started a scan and not have already deleted it. */ if (F_ISSET(&t->bt_cursor, CURS_INIT) && !F_ISSET(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) break; /* FALLTHROUGH */ default: errno = EINVAL; return (RET_ERROR); } /* * If the key/data pair won't fit on a page, store it on overflow * pages. Only put the key on the overflow page if the pair are * still too big after moving the data to an overflow page. * * XXX * If the insert fails later on, the overflow pages aren't recovered. */ dflags = 0; if (key->size + data->size > t->bt_ovflsize) { if (key->size > t->bt_ovflsize) { storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) return (RET_ERROR); tkey.data = kb; tkey.size = NOVFLSIZE; memmove(kb, &pg, sizeof(pgno_t)); memmove(kb + sizeof(pgno_t), &key->size, sizeof(u_int32_t)); dflags |= P_BIGKEY; key = &tkey; } if (key->size + data->size > t->bt_ovflsize) { if (__ovfl_put(t, data, &pg) == RET_ERROR) return (RET_ERROR); tdata.data = db; tdata.size = NOVFLSIZE; memmove(db, &pg, sizeof(pgno_t)); memmove(db + sizeof(pgno_t), &data->size, sizeof(u_int32_t)); dflags |= P_BIGDATA; data = &tdata; } if (key->size + data->size > t->bt_ovflsize) goto storekey; } /* Replace the cursor. */ if (flags == R_CURSOR) { if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL) return (RET_ERROR); index = t->bt_cursor.pg.index; goto delete; } /* * Find the key to delete, or, the location at which to insert. * Bt_fast and __bt_search both pin the returned page. */ if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL) if ((e = __bt_search(t, key, &exact)) == NULL) return (RET_ERROR); h = e->page; index = e->index; /* * Add the key/data pair to the tree. If an identical key is already * in the tree, and R_NOOVERWRITE is set, an error is returned. If * R_NOOVERWRITE is not set, the key is either added (if duplicates are * permitted) or an error is returned. */ switch (flags) { case R_NOOVERWRITE: if (!exact) break; mpool_put(t->bt_mp, h, 0); return (RET_SPECIAL); default: if (!exact || !F_ISSET(t, B_NODUPS)) break; /* * !!! * Note, the delete may empty the page, so we need to put a * new entry into the page immediately. */ delete: if (__bt_dleaf(t, key, h, index) == RET_ERROR) { mpool_put(t->bt_mp, h, 0); return (RET_ERROR); } break; } /* * If not enough room, or the user has put a ceiling on the number of * keys permitted in the page, split the page. The split code will * insert the key and data and unpin the current page. If inserting * into the offset array, shift the pointers up. */ nbytes = NBLEAFDBT(key->size, data->size); if ((u_int32_t) (h->upper - h->lower) < nbytes + sizeof(indx_t)) { if ((status = __bt_split(t, h, key, data, dflags, nbytes, index)) != RET_SUCCESS) return (status); goto success; } if (index < (nxtindex = NEXTINDEX(h))) memmove(h->linp + index + 1, h->linp + index, (nxtindex - index) * sizeof(indx_t)); h->lower += sizeof(indx_t); h->linp[index] = h->upper -= nbytes; dest = (char *)h + h->upper; WR_BLEAF(dest, key, data, dflags); /* If the cursor is on this page, adjust it as necessary. */ if (F_ISSET(&t->bt_cursor, CURS_INIT) && !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= index) ++t->bt_cursor.pg.index; if (t->bt_order == NOT) { if (h->nextpg == P_INVALID) { if (index == NEXTINDEX(h) - 1) { t->bt_order = FORWARD; t->bt_last.index = index; t->bt_last.pgno = h->pgno; } } else if (h->prevpg == P_INVALID) { if (index == 0) { t->bt_order = BACK; t->bt_last.index = 0; t->bt_last.pgno = h->pgno; } } } mpool_put(t->bt_mp, h, MPOOL_DIRTY); success: if (flags == R_SETCURSOR) __bt_setcur(t, e->page->pgno, e->index); F_SET(t, B_MODIFIED); return (RET_SUCCESS); } #ifdef STATISTICS u_long bt_cache_hit, bt_cache_miss; #endif /* * BT_FAST -- Do a quick check for sorted data. * * Parameters: * t: tree * key: key to insert * * Returns: * EPG for new record or NULL if not found. */ static EPG * bt_fast(t, key, data, exactp) BTREE *t; const DBT *key, *data; int *exactp; { PAGE *h; u_int32_t nbytes; int cmp; if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) { t->bt_order = NOT; return (NULL); } t->bt_cur.page = h; t->bt_cur.index = t->bt_last.index; /* * If won't fit in this page or have too many keys in this page, * have to search to get split stack. */ nbytes = NBLEAFDBT(key->size, data->size); if ((u_int32_t) (h->upper - h->lower) < nbytes + sizeof(indx_t)) goto miss; if (t->bt_order == FORWARD) { if (t->bt_cur.page->nextpg != P_INVALID) goto miss; if (t->bt_cur.index != NEXTINDEX(h) - 1) goto miss; if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0) goto miss; t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index; } else { if (t->bt_cur.page->prevpg != P_INVALID) goto miss; if (t->bt_cur.index != 0) goto miss; if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0) goto miss; t->bt_last.index = 0; } *exactp = cmp == 0; #ifdef STATISTICS ++bt_cache_hit; #endif return (&t->bt_cur); miss: #ifdef STATISTICS ++bt_cache_miss; #endif t->bt_order = NOT; mpool_put(t->bt_mp, h, 0); return (NULL); } db/btree/bt_search.c0100644000076400007640000001336606072171205015126 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_search.c 8.8 (Berkeley) 7/31/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include "btree.h" static int __bt_snext __P((BTREE *, PAGE *, const DBT *, int *)); static int __bt_sprev __P((BTREE *, PAGE *, const DBT *, int *)); /* * __bt_search -- * Search a btree for a key. * * Parameters: * t: tree to search * key: key to find * exactp: pointer to exact match flag * * Returns: * The EPG for matching record, if any, or the EPG for the location * of the key, if it were inserted into the tree, is entered into * the bt_cur field of the tree. A pointer to the field is returned. */ EPG * __bt_search(t, key, exactp) BTREE *t; const DBT *key; int *exactp; { PAGE *h; indx_t base, index, lim; pgno_t pg; int cmp; BT_CLR(t); for (pg = P_ROOT;;) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (NULL); /* Do a binary search on the current page. */ t->bt_cur.page = h; for (base = 0, lim = NEXTINDEX(h); lim; lim >>= 1) { t->bt_cur.index = index = base + (lim >> 1); if ((cmp = __bt_cmp(t, key, &t->bt_cur)) == 0) { if (h->flags & P_BLEAF) { *exactp = 1; return (&t->bt_cur); } goto next; } if (cmp > 0) { base = index + 1; --lim; } } /* * If it's a leaf page, we're almost done. If no duplicates * are allowed, or we have an exact match, we're done. Else, * it's possible that there were matching keys on this page, * which later deleted, and we're on a page with no matches * while there are matches on other pages. If at the start or * end of a page, check the adjacent page. */ if (h->flags & P_BLEAF) { if (!F_ISSET(t, B_NODUPS)) { if (base == 0 && h->prevpg != P_INVALID && __bt_sprev(t, h, key, exactp)) return (&t->bt_cur); if (base == NEXTINDEX(h) && h->nextpg != P_INVALID && __bt_snext(t, h, key, exactp)) return (&t->bt_cur); } *exactp = 0; t->bt_cur.index = base; return (&t->bt_cur); } /* * No match found. Base is the smallest index greater than * key and may be zero or a last + 1 index. If it's non-zero, * decrement by one, and record the internal page which should * be a parent page for the key. If a split later occurs, the * inserted page will be to the right of the saved page. */ index = base ? base - 1 : base; next: BT_PUSH(t, h->pgno, index); pg = GETBINTERNAL(h, index)->pgno; mpool_put(t->bt_mp, h, 0); } } /* * __bt_snext -- * Check for an exact match after the key. * * Parameters: * t: tree * h: current page * key: key * exactp: pointer to exact match flag * * Returns: * If an exact match found. */ static int __bt_snext(t, h, key, exactp) BTREE *t; PAGE *h; const DBT *key; int *exactp; { EPG e; /* * Get the next page. The key is either an exact * match, or not as good as the one we already have. */ if ((e.page = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) return (0); e.index = 0; if (__bt_cmp(t, key, &e) == 0) { mpool_put(t->bt_mp, h, 0); t->bt_cur = e; *exactp = 1; return (1); } mpool_put(t->bt_mp, e.page, 0); return (0); } /* * __bt_sprev -- * Check for an exact match before the key. * * Parameters: * t: tree * h: current page * key: key * exactp: pointer to exact match flag * * Returns: * If an exact match found. */ static int __bt_sprev(t, h, key, exactp) BTREE *t; PAGE *h; const DBT *key; int *exactp; { EPG e; /* * Get the previous page. The key is either an exact * match, or not as good as the one we already have. */ if ((e.page = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) return (0); e.index = NEXTINDEX(e.page) - 1; if (__bt_cmp(t, key, &e) == 0) { mpool_put(t->bt_mp, h, 0); t->bt_cur = e; *exactp = 1; return (1); } mpool_put(t->bt_mp, e.page, 0); return (0); } db/btree/bt_seq.c0100644000076400007640000002661006256366206014460 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include "btree.h" static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); static int __bt_seqadv __P((BTREE *, EPG *, int)); static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); /* * Sequential scan support. * * The tree can be scanned sequentially, starting from either end of the * tree or from any specific key. A scan request before any scanning is * done is initialized as starting from the least node. */ /* * __bt_seq -- * Btree sequential scan interface. * * Parameters: * dbp: pointer to access method * key: key for positioning and return value * data: data return value * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. * * Returns: * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. */ int __bt_seq(dbp, key, data, flags) const DB *dbp; DBT *key, *data; u_int flags; { BTREE *t; EPG e; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* * If scan uninitialized as yet, or starting at a specific record, set * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin * the page the cursor references if they're successful. */ switch (flags) { case R_NEXT: case R_PREV: if (F_ISSET(&t->bt_cursor, CURS_INIT)) { status = __bt_seqadv(t, &e, flags); break; } /* FALLTHROUGH */ case R_FIRST: case R_LAST: case R_CURSOR: status = __bt_seqset(t, &e, key, flags); break; default: errno = EINVAL; return (RET_ERROR); } if (status == RET_SUCCESS) { __bt_setcur(t, e.page->pgno, e.index); status = __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); /* * If the user is doing concurrent access, we copied the * key/data, toss the page. */ if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e.page, 0); else t->bt_pinned = e.page; } return (status); } /* * __bt_seqset -- * Set the sequential scan to a specific key. * * Parameters: * t: tree * ep: storage for returned key * key: key for initial scan position * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV * * Side effects: * Pins the page the cursor references. * * Returns: * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. */ static int __bt_seqset(t, ep, key, flags) BTREE *t; EPG *ep; DBT *key; int flags; { PAGE *h; pgno_t pg; int exact; /* * Find the first, last or specific key in the tree and point the * cursor at it. The cursor may not be moved until a new key has * been found. */ switch (flags) { case R_CURSOR: /* Keyed scan. */ /* * Find the first instance of the key or the smallest key * which is greater than or equal to the specified key. */ if (key->data == NULL || key->size == 0) { errno = EINVAL; return (RET_ERROR); } return (__bt_first(t, key, ep, &exact)); case R_FIRST: /* First record. */ case R_NEXT: /* Walk down the left-hand side of the tree. */ for (pg = P_ROOT;;) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); /* Check for an empty tree. */ if (NEXTINDEX(h) == 0) { mpool_put(t->bt_mp, h, 0); return (RET_SPECIAL); } if (h->flags & (P_BLEAF | P_RLEAF)) break; pg = GETBINTERNAL(h, 0)->pgno; mpool_put(t->bt_mp, h, 0); } ep->page = h; ep->index = 0; break; case R_LAST: /* Last record. */ case R_PREV: /* Walk down the right-hand side of the tree. */ for (pg = P_ROOT;;) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); /* Check for an empty tree. */ if (NEXTINDEX(h) == 0) { mpool_put(t->bt_mp, h, 0); return (RET_SPECIAL); } if (h->flags & (P_BLEAF | P_RLEAF)) break; pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; mpool_put(t->bt_mp, h, 0); } ep->page = h; ep->index = NEXTINDEX(h) - 1; break; } return (RET_SUCCESS); } /* * __bt_seqadvance -- * Advance the sequential scan. * * Parameters: * t: tree * flags: R_NEXT, R_PREV * * Side effects: * Pins the page the new key/data record is on. * * Returns: * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. */ static int __bt_seqadv(t, ep, flags) BTREE *t; EPG *ep; int flags; { CURSOR *c; PAGE *h; indx_t index; pgno_t pg; int exact; /* * There are a couple of states that we can be in. The cursor has * been initialized by the time we get here, but that's all we know. */ c = &t->bt_cursor; /* * The cursor was deleted where there weren't any duplicate records, * so the key was saved. Find out where that key would go in the * current tree. It doesn't matter if the returned key is an exact * match or not -- if it's an exact match, the record was added after * the delete so we can just return it. If not, as long as there's * a record there, return it. */ if (F_ISSET(c, CURS_ACQUIRE)) return (__bt_first(t, &c->key, ep, &exact)); /* Get the page referenced by the cursor. */ if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) return (RET_ERROR); /* * Find the next/previous record in the tree and point the cursor at * it. The cursor may not be moved until a new key has been found. */ switch (flags) { case R_NEXT: /* Next record. */ /* * The cursor was deleted in duplicate records, and moved * forward to a record that has yet to be returned. Clear * that flag, and return the record. */ if (F_ISSET(c, CURS_AFTER)) goto usecurrent; index = c->pg.index; if (++index == NEXTINDEX(h)) { pg = h->nextpg; mpool_put(t->bt_mp, h, 0); if (pg == P_INVALID) return (RET_SPECIAL); if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); index = 0; } break; case R_PREV: /* Previous record. */ /* * The cursor was deleted in duplicate records, and moved * backward to a record that has yet to be returned. Clear * that flag, and return the record. */ if (F_ISSET(c, CURS_BEFORE)) { usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); ep->page = h; ep->index = c->pg.index; return (RET_SUCCESS); } index = c->pg.index; if (index == 0) { pg = h->prevpg; mpool_put(t->bt_mp, h, 0); if (pg == P_INVALID) return (RET_SPECIAL); if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); index = NEXTINDEX(h) - 1; } else --index; break; } ep->page = h; ep->index = index; return (RET_SUCCESS); } /* * __bt_first -- * Find the first entry. * * Parameters: * t: the tree * key: the key * erval: return EPG * exactp: pointer to exact match flag * * Returns: * The first entry in the tree greater than or equal to key, * or RET_SPECIAL if no such key exists. */ static int __bt_first(t, key, erval, exactp) BTREE *t; const DBT *key; EPG *erval; int *exactp; { PAGE *h; EPG *ep, save; pgno_t pg; /* * Find any matching record; __bt_search pins the page. * * If it's an exact match and duplicates are possible, walk backwards * in the tree until we find the first one. Otherwise, make sure it's * a valid key (__bt_search may return an index just past the end of a * page) and return it. */ if ((ep = __bt_search(t, key, exactp)) == NULL) return (RET_SPECIAL); if (*exactp) { if (F_ISSET(t, B_NODUPS)) { *erval = *ep; return (RET_SUCCESS); } /* * Walk backwards, as long as the entry matches and there are * keys left in the tree. Save a copy of each match in case * we go too far. */ save = *ep; h = ep->page; do { if (save.page->pgno != ep->page->pgno) { mpool_put(t->bt_mp, save.page, 0); save = *ep; } else save.index = ep->index; /* * Don't unpin the page the last (or original) match * was on, but make sure it's unpinned if an error * occurs. */ if (ep->index == 0) { if (h->prevpg == P_INVALID) break; if (h->pgno != save.page->pgno) mpool_put(t->bt_mp, h, 0); if ((h = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) { if (h->pgno == save.page->pgno) mpool_put(t->bt_mp, save.page, 0); return (RET_ERROR); } ep->page = h; ep->index = NEXTINDEX(h); } --ep->index; } while (__bt_cmp(t, key, ep) == 0); /* * Reach here with the last page that was looked at pinned, * which may or may not be the same as the last (or original) * match page. If it's not useful, release it. */ if (h->pgno != save.page->pgno) mpool_put(t->bt_mp, h, 0); *erval = save; return (RET_SUCCESS); } /* If at the end of a page, find the next entry. */ if (ep->index == NEXTINDEX(ep->page)) { h = ep->page; pg = h->nextpg; mpool_put(t->bt_mp, h, 0); if (pg == P_INVALID) return (RET_SPECIAL); if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); ep->index = 0; ep->page = h; } *erval = *ep; return (RET_SUCCESS); } /* * __bt_setcur -- * Set the cursor to an entry in the tree. * * Parameters: * t: the tree * pgno: page number * index: page index */ void __bt_setcur(t, pgno, index) BTREE *t; pgno_t pgno; u_int index; { /* Lose any already deleted key. */ if (t->bt_cursor.key.data != NULL) { free(t->bt_cursor.key.data); t->bt_cursor.key.size = 0; t->bt_cursor.key.data = NULL; } F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); /* Update the cursor. */ t->bt_cursor.pg.pgno = pgno; t->bt_cursor.pg.index = index; F_SET(&t->bt_cursor, CURS_INIT); } db/btree/bt_split.c0100644000076400007640000005343606602177362015027 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_split.c 8.9 (Berkeley) 7/26/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include "btree.h" static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *)); static PAGE *bt_page __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t)); static int bt_preserve __P((BTREE *, pgno_t)); static PAGE *bt_psplit __P((BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t)); static PAGE *bt_root __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t)); static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *)); static recno_t rec_total __P((PAGE *)); #ifdef STATISTICS u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; #endif /* * __BT_SPLIT -- Split the tree. * * Parameters: * t: tree * sp: page to split * key: key to insert * data: data to insert * flags: BIGKEY/BIGDATA flags * ilen: insert length * skip: index to leave open * * Returns: * RET_ERROR, RET_SUCCESS */ int __bt_split(t, sp, key, data, flags, ilen, argskip) BTREE *t; PAGE *sp; const DBT *key, *data; int flags; size_t ilen; u_int32_t argskip; { BINTERNAL *bi; BLEAF *bl, *tbl; DBT a, b; EPGNO *parent; PAGE *h, *l, *r, *lchild, *rchild; indx_t nxtindex; u_int16_t skip; u_int32_t n, nbytes, nksize; int parentsplit; char *dest; /* * Split the page into two pages, l and r. The split routines return * a pointer to the page into which the key should be inserted and with * skip set to the offset which should be used. Additionally, l and r * are pinned. */ skip = argskip; h = sp->pgno == P_ROOT ? bt_root(t, sp, &l, &r, &skip, ilen) : bt_page(t, sp, &l, &r, &skip, ilen); if (h == NULL) return (RET_ERROR); /* * Insert the new key/data pair into the leaf page. (Key inserts * always cause a leaf page to split first.) */ h->linp[skip] = h->upper -= ilen; dest = (char *)h + h->upper; if (F_ISSET(t, R_RECNO)) WR_RLEAF(dest, data, flags) else WR_BLEAF(dest, key, data, flags) /* If the root page was split, make it look right. */ if (sp->pgno == P_ROOT && (F_ISSET(t, R_RECNO) ? bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) goto err2; /* * Now we walk the parent page stack -- a LIFO stack of the pages that * were traversed when we searched for the page that split. Each stack * entry is a page number and a page index offset. The offset is for * the page traversed on the search. We've just split a page, so we * have to insert a new key into the parent page. * * If the insert into the parent page causes it to split, may have to * continue splitting all the way up the tree. We stop if the root * splits or the page inserted into didn't have to split to hold the * new key. Some algorithms replace the key for the old page as well * as the new page. We don't, as there's no reason to believe that the * first key on the old page is any better than the key we have, and, * in the case of a key being placed at index 0 causing the split, the * key is unavailable. * * There are a maximum of 5 pages pinned at any time. We keep the left * and right pages pinned while working on the parent. The 5 are the * two children, left parent and right parent (when the parent splits) * and the root page or the overflow key page when calling bt_preserve. * This code must make sure that all pins are released other than the * root page or overflow page which is unlocked elsewhere. */ while ((parent = BT_POP(t)) != NULL) { lchild = l; rchild = r; /* Get the parent page. */ if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) goto err2; /* * The new key goes ONE AFTER the index, because the split * was to the right. */ skip = parent->index + 1; /* * Calculate the space needed on the parent page. * * Prefix trees: space hack when inserting into BINTERNAL * pages. Retain only what's needed to distinguish between * the new entry and the LAST entry on the page to its left. * If the keys compare equal, retain the entire key. Note, * we don't touch overflow keys, and the entire key must be * retained for the next-to-left most key on the leftmost * page of each level, or the search will fail. Applicable * ONLY to internal pages that have leaf pages as children. * Further reduction of the key between pairs of internal * pages loses too much information. */ switch (rchild->flags & P_TYPE) { case P_BINTERNAL: bi = GETBINTERNAL(rchild, 0); nbytes = NBINTERNAL(bi->ksize); break; case P_BLEAF: bl = GETBLEAF(rchild, 0); nbytes = NBINTERNAL(bl->ksize); if (t->bt_pfx && !(bl->flags & P_BIGKEY) && (h->prevpg != P_INVALID || skip > 1)) { tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1); a.size = tbl->ksize; a.data = tbl->bytes; b.size = bl->ksize; b.data = bl->bytes; nksize = t->bt_pfx(&a, &b); n = NBINTERNAL(nksize); if (n < nbytes) { #ifdef STATISTICS bt_pfxsaved += nbytes - n; #endif nbytes = n; } else nksize = 0; } else nksize = 0; break; case P_RINTERNAL: case P_RLEAF: nbytes = NRINTERNAL; break; default: abort(); } /* Split the parent page if necessary or shift the indices. */ if ((u_int32_t) (h->upper - h->lower) < nbytes + sizeof(indx_t)) { sp = h; h = h->pgno == P_ROOT ? bt_root(t, h, &l, &r, &skip, nbytes) : bt_page(t, h, &l, &r, &skip, nbytes); if (h == NULL) goto err1; parentsplit = 1; } else { if (skip < (nxtindex = NEXTINDEX(h))) memmove(h->linp + skip + 1, h->linp + skip, (nxtindex - skip) * sizeof(indx_t)); h->lower += sizeof(indx_t); parentsplit = 0; } /* Insert the key into the parent page. */ switch (rchild->flags & P_TYPE) { case P_BINTERNAL: h->linp[skip] = h->upper -= nbytes; dest = (char *)h + h->linp[skip]; memmove(dest, bi, nbytes); ((BINTERNAL *)dest)->pgno = rchild->pgno; break; case P_BLEAF: h->linp[skip] = h->upper -= nbytes; dest = (char *)h + h->linp[skip]; WR_BINTERNAL(dest, nksize ? nksize : bl->ksize, rchild->pgno, bl->flags & P_BIGKEY); memmove(dest, bl->bytes, nksize ? nksize : bl->ksize); if (bl->flags & P_BIGKEY && bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) goto err1; break; case P_RINTERNAL: /* * Update the left page count. If split * added at index 0, fix the correct page. */ if (skip > 0) dest = (char *)h + h->linp[skip - 1]; else dest = (char *)l + l->linp[NEXTINDEX(l) - 1]; ((RINTERNAL *)dest)->nrecs = rec_total(lchild); ((RINTERNAL *)dest)->pgno = lchild->pgno; /* Update the right page count. */ h->linp[skip] = h->upper -= nbytes; dest = (char *)h + h->linp[skip]; ((RINTERNAL *)dest)->nrecs = rec_total(rchild); ((RINTERNAL *)dest)->pgno = rchild->pgno; break; case P_RLEAF: /* * Update the left page count. If split * added at index 0, fix the correct page. */ if (skip > 0) dest = (char *)h + h->linp[skip - 1]; else dest = (char *)l + l->linp[NEXTINDEX(l) - 1]; ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild); ((RINTERNAL *)dest)->pgno = lchild->pgno; /* Update the right page count. */ h->linp[skip] = h->upper -= nbytes; dest = (char *)h + h->linp[skip]; ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild); ((RINTERNAL *)dest)->pgno = rchild->pgno; break; default: abort(); } /* Unpin the held pages. */ if (!parentsplit) { mpool_put(t->bt_mp, h, MPOOL_DIRTY); break; } /* If the root page was split, make it look right. */ if (sp->pgno == P_ROOT && (F_ISSET(t, R_RECNO) ? bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) goto err1; mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); } /* Unpin the held pages. */ mpool_put(t->bt_mp, l, MPOOL_DIRTY); mpool_put(t->bt_mp, r, MPOOL_DIRTY); /* Clear any pages left on the stack. */ return (RET_SUCCESS); /* * If something fails in the above loop we were already walking back * up the tree and the tree is now inconsistent. Nothing much we can * do about it but release any memory we're holding. */ err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); err2: mpool_put(t->bt_mp, l, 0); mpool_put(t->bt_mp, r, 0); __dbpanic(t->bt_dbp); return (RET_ERROR); } /* * BT_PAGE -- Split a non-root page of a btree. * * Parameters: * t: tree * h: root page * lp: pointer to left page pointer * rp: pointer to right page pointer * skip: pointer to index to leave open * ilen: insert length * * Returns: * Pointer to page in which to insert or NULL on error. */ static PAGE * bt_page(t, h, lp, rp, skip, ilen) BTREE *t; PAGE *h, **lp, **rp; indx_t *skip; size_t ilen; { PAGE *l, *r, *tp; pgno_t npg; #ifdef STATISTICS ++bt_split; #endif /* Put the new right page for the split into place. */ if ((r = __bt_new(t, &npg)) == NULL) return (NULL); r->pgno = npg; r->lower = BTDATAOFF; r->upper = t->bt_psize; r->nextpg = h->nextpg; r->prevpg = h->pgno; r->flags = h->flags & P_TYPE; /* * If we're splitting the last page on a level because we're appending * a key to it (skip is NEXTINDEX()), it's likely that the data is * sorted. Adding an empty page on the side of the level is less work * and can push the fill factor much higher than normal. If we're * wrong it's no big deal, we'll just do the split the right way next * time. It may look like it's equally easy to do a similar hack for * reverse sorted data, that is, split the tree left, but it's not. * Don't even try. */ if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) { #ifdef STATISTICS ++bt_sortsplit; #endif h->nextpg = r->pgno; r->lower = BTDATAOFF + sizeof(indx_t); *skip = 0; *lp = h; *rp = r; return (r); } /* Put the new left page for the split into place. */ if ((l = (PAGE *)malloc(t->bt_psize)) == NULL) { mpool_put(t->bt_mp, r, 0); return (NULL); } #ifdef PURIFY memset(l, 0xff, t->bt_psize); #endif l->pgno = h->pgno; l->nextpg = r->pgno; l->prevpg = h->prevpg; l->lower = BTDATAOFF; l->upper = t->bt_psize; l->flags = h->flags & P_TYPE; /* Fix up the previous pointer of the page after the split page. */ if (h->nextpg != P_INVALID) { if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) { free(l); /* XXX mpool_free(t->bt_mp, r->pgno); */ return (NULL); } tp->prevpg = r->pgno; mpool_put(t->bt_mp, tp, MPOOL_DIRTY); } /* * Split right. The key/data pairs aren't sorted in the btree page so * it's simpler to copy the data from the split page onto two new pages * instead of copying half the data to the right page and compacting * the left page in place. Since the left page can't change, we have * to swap the original and the allocated left page after the split. */ tp = bt_psplit(t, h, l, r, skip, ilen); /* Move the new left page onto the old left page. */ memmove(h, l, t->bt_psize); if (tp == l) tp = h; free(l); *lp = h; *rp = r; return (tp); } /* * BT_ROOT -- Split the root page of a btree. * * Parameters: * t: tree * h: root page * lp: pointer to left page pointer * rp: pointer to right page pointer * skip: pointer to index to leave open * ilen: insert length * * Returns: * Pointer to page in which to insert or NULL on error. */ static PAGE * bt_root(t, h, lp, rp, skip, ilen) BTREE *t; PAGE *h, **lp, **rp; indx_t *skip; size_t ilen; { PAGE *l, *r, *tp; pgno_t lnpg, rnpg; #ifdef STATISTICS ++bt_split; ++bt_rootsplit; #endif /* Put the new left and right pages for the split into place. */ if ((l = __bt_new(t, &lnpg)) == NULL || (r = __bt_new(t, &rnpg)) == NULL) return (NULL); l->pgno = lnpg; r->pgno = rnpg; l->nextpg = r->pgno; r->prevpg = l->pgno; l->prevpg = r->nextpg = P_INVALID; l->lower = r->lower = BTDATAOFF; l->upper = r->upper = t->bt_psize; l->flags = r->flags = h->flags & P_TYPE; /* Split the root page. */ tp = bt_psplit(t, h, l, r, skip, ilen); *lp = l; *rp = r; return (tp); } /* * BT_RROOT -- Fix up the recno root page after it has been split. * * Parameters: * t: tree * h: root page * l: left page * r: right page * * Returns: * RET_ERROR, RET_SUCCESS */ static int bt_rroot(t, h, l, r) BTREE *t; PAGE *h, *l, *r; { char *dest; /* Insert the left and right keys, set the header information. */ h->linp[0] = h->upper = t->bt_psize - NRINTERNAL; dest = (char *)h + h->upper; WR_RINTERNAL(dest, l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno); h->linp[1] = h->upper -= NRINTERNAL; dest = (char *)h + h->upper; WR_RINTERNAL(dest, r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno); h->lower = BTDATAOFF + 2 * sizeof(indx_t); /* Unpin the root page, set to recno internal page. */ h->flags &= ~P_TYPE; h->flags |= P_RINTERNAL; mpool_put(t->bt_mp, h, MPOOL_DIRTY); return (RET_SUCCESS); } /* * BT_BROOT -- Fix up the btree root page after it has been split. * * Parameters: * t: tree * h: root page * l: left page * r: right page * * Returns: * RET_ERROR, RET_SUCCESS */ static int bt_broot(t, h, l, r) BTREE *t; PAGE *h, *l, *r; { BINTERNAL *bi; BLEAF *bl; u_int32_t nbytes; char *dest; /* * If the root page was a leaf page, change it into an internal page. * We copy the key we split on (but not the key's data, in the case of * a leaf page) to the new root page. * * The btree comparison code guarantees that the left-most key on any * level of the tree is never used, so it doesn't need to be filled in. */ nbytes = NBINTERNAL(0); h->linp[0] = h->upper = t->bt_psize - nbytes; dest = (char *)h + h->upper; WR_BINTERNAL(dest, 0, l->pgno, 0); switch (h->flags & P_TYPE) { case P_BLEAF: bl = GETBLEAF(r, 0); nbytes = NBINTERNAL(bl->ksize); h->linp[1] = h->upper -= nbytes; dest = (char *)h + h->upper; WR_BINTERNAL(dest, bl->ksize, r->pgno, 0); memmove(dest, bl->bytes, bl->ksize); /* * If the key is on an overflow page, mark the overflow chain * so it isn't deleted when the leaf copy of the key is deleted. */ if (bl->flags & P_BIGKEY && bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) return (RET_ERROR); break; case P_BINTERNAL: bi = GETBINTERNAL(r, 0); nbytes = NBINTERNAL(bi->ksize); h->linp[1] = h->upper -= nbytes; dest = (char *)h + h->upper; memmove(dest, bi, nbytes); ((BINTERNAL *)dest)->pgno = r->pgno; break; default: abort(); } /* There are two keys on the page. */ h->lower = BTDATAOFF + 2 * sizeof(indx_t); /* Unpin the root page, set to btree internal page. */ h->flags &= ~P_TYPE; h->flags |= P_BINTERNAL; mpool_put(t->bt_mp, h, MPOOL_DIRTY); return (RET_SUCCESS); } /* * BT_PSPLIT -- Do the real work of splitting the page. * * Parameters: * t: tree * h: page to be split * l: page to put lower half of data * r: page to put upper half of data * pskip: pointer to index to leave open * ilen: insert length * * Returns: * Pointer to page in which to insert. */ static PAGE * bt_psplit(t, h, l, r, pskip, ilen) BTREE *t; PAGE *h, *l, *r; indx_t *pskip; size_t ilen; { BINTERNAL *bi; BLEAF *bl; CURSOR *c; RLEAF *rl; PAGE *rval; void *src; indx_t full, half, nxt, off, skip, top, used; u_int32_t nbytes; int bigkeycnt, isbigkey; /* * Split the data to the left and right pages. Leave the skip index * open. Additionally, make some effort not to split on an overflow * key. This makes internal page processing faster and can save * space as overflow keys used by internal pages are never deleted. */ bigkeycnt = 0; skip = *pskip; full = t->bt_psize - BTDATAOFF; half = full / 2; used = 0; for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { if (skip == off) { nbytes = ilen; isbigkey = 0; /* XXX: not really known. */ } else switch (h->flags & P_TYPE) { case P_BINTERNAL: src = bi = GETBINTERNAL(h, nxt); nbytes = NBINTERNAL(bi->ksize); isbigkey = bi->flags & P_BIGKEY; break; case P_BLEAF: src = bl = GETBLEAF(h, nxt); nbytes = NBLEAF(bl); isbigkey = bl->flags & P_BIGKEY; break; case P_RINTERNAL: src = GETRINTERNAL(h, nxt); nbytes = NRINTERNAL; isbigkey = 0; break; case P_RLEAF: src = rl = GETRLEAF(h, nxt); nbytes = NRLEAF(rl); isbigkey = 0; break; default: abort(); } /* * If the key/data pairs are substantial fractions of the max * possible size for the page, it's possible to get situations * where we decide to try and copy too much onto the left page. * Make sure that doesn't happen. */ if ((skip <= off && used + nbytes + sizeof(indx_t) >= full) || nxt == top - 1) { --off; break; } /* Copy the key/data pair, if not the skipped index. */ if (skip != off) { ++nxt; l->linp[off] = l->upper -= nbytes; memmove((char *)l + l->upper, src, nbytes); } used += nbytes + sizeof(indx_t); if (used >= half) { if (!isbigkey || bigkeycnt == 3) break; else ++bigkeycnt; } } /* * Off is the last offset that's valid for the left page. * Nxt is the first offset to be placed on the right page. */ l->lower += (off + 1) * sizeof(indx_t); /* * If splitting the page that the cursor was on, the cursor has to be * adjusted to point to the same record as before the split. If the * cursor is at or past the skipped slot, the cursor is incremented by * one. If the cursor is on the right page, it is decremented by the * number of records split to the left page. */ c = &t->bt_cursor; if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) { if (c->pg.index >= skip) ++c->pg.index; if (c->pg.index < nxt) /* Left page. */ c->pg.pgno = l->pgno; else { /* Right page. */ c->pg.pgno = r->pgno; c->pg.index -= nxt; } } /* * If the skipped index was on the left page, just return that page. * Otherwise, adjust the skip index to reflect the new position on * the right page. */ if (skip <= off) { skip = 0; rval = l; } else { rval = r; *pskip -= nxt; } for (off = 0; nxt < top; ++off) { if (skip == nxt) { ++off; skip = 0; } switch (h->flags & P_TYPE) { case P_BINTERNAL: src = bi = GETBINTERNAL(h, nxt); nbytes = NBINTERNAL(bi->ksize); break; case P_BLEAF: src = bl = GETBLEAF(h, nxt); nbytes = NBLEAF(bl); break; case P_RINTERNAL: src = GETRINTERNAL(h, nxt); nbytes = NRINTERNAL; break; case P_RLEAF: src = rl = GETRLEAF(h, nxt); nbytes = NRLEAF(rl); break; default: abort(); } ++nxt; r->linp[off] = r->upper -= nbytes; memmove((char *)r + r->upper, src, nbytes); } r->lower += off * sizeof(indx_t); /* If the key is being appended to the page, adjust the index. */ if (skip == top) r->lower += sizeof(indx_t); return (rval); } /* * BT_PRESERVE -- Mark a chain of pages as used by an internal node. * * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the * record that references them gets deleted. Chains pointed to by internal * pages never get deleted. This routine marks a chain as pointed to by an * internal page. * * Parameters: * t: tree * pg: page number of first page in the chain. * * Returns: * RET_SUCCESS, RET_ERROR. */ static int bt_preserve(t, pg) BTREE *t; pgno_t pg; { PAGE *h; if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); h->flags |= P_PRESERVE; mpool_put(t->bt_mp, h, MPOOL_DIRTY); return (RET_SUCCESS); } /* * REC_TOTAL -- Return the number of recno entries below a page. * * Parameters: * h: page * * Returns: * The number of recno entries below a page. * * XXX * These values could be set by the bt_psplit routine. The problem is that the * entry has to be popped off of the stack etc. or the values have to be passed * all the way back to bt_split/bt_rroot and it's not very clean. */ static recno_t rec_total(h) PAGE *h; { recno_t recs; indx_t nxt, top; for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) recs += GETRINTERNAL(h, nxt)->nrecs; return (recs); } db/btree/bt_utils.c0100644000076400007640000001524006256366207015026 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)bt_utils.c 8.8 (Berkeley) 7/20/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include "btree.h" /* * __bt_ret -- * Build return key/data pair. * * Parameters: * t: tree * e: key/data pair to be returned * key: user's key structure (NULL if not to be filled in) * rkey: memory area to hold key * data: user's data structure (NULL if not to be filled in) * rdata: memory area to hold data * copy: always copy the key/data item * * Returns: * RET_SUCCESS, RET_ERROR. */ int __bt_ret(t, e, key, rkey, data, rdata, copy) BTREE *t; EPG *e; DBT *key, *rkey, *data, *rdata; int copy; { BLEAF *bl; void *p; bl = GETBLEAF(e->page, e->index); /* * We must copy big keys/data to make them contiguous. Otherwise, * leave the page pinned and don't copy unless the user specified * concurrent access. */ if (key == NULL) goto dataonly; if (bl->flags & P_BIGKEY) { if (__ovfl_get(t, bl->bytes, &key->size, &rkey->data, &rkey->size)) return (RET_ERROR); key->data = rkey->data; } else if (copy || F_ISSET(t, B_DB_LOCK)) { if (bl->ksize > rkey->size) { p = (void *)(rkey->data == NULL ? malloc(bl->ksize) : realloc(rkey->data, bl->ksize)); if (p == NULL) return (RET_ERROR); rkey->data = p; rkey->size = bl->ksize; } memmove(rkey->data, bl->bytes, bl->ksize); key->size = bl->ksize; key->data = rkey->data; } else { key->size = bl->ksize; key->data = bl->bytes; } dataonly: if (data == NULL) return (RET_SUCCESS); if (bl->flags & P_BIGDATA) { if (__ovfl_get(t, bl->bytes + bl->ksize, &data->size, &rdata->data, &rdata->size)) return (RET_ERROR); data->data = rdata->data; } else if (copy || F_ISSET(t, B_DB_LOCK)) { /* Use +1 in case the first record retrieved is 0 length. */ if (bl->dsize + 1 > rdata->size) { p = (void *)(rdata->data == NULL ? malloc(bl->dsize + 1) : realloc(rdata->data, bl->dsize + 1)); if (p == NULL) return (RET_ERROR); rdata->data = p; rdata->size = bl->dsize + 1; } memmove(rdata->data, bl->bytes + bl->ksize, bl->dsize); data->size = bl->dsize; data->data = rdata->data; } else { data->size = bl->dsize; data->data = bl->bytes + bl->ksize; } return (RET_SUCCESS); } /* * __BT_CMP -- Compare a key to a given record. * * Parameters: * t: tree * k1: DBT pointer of first arg to comparison * e: pointer to EPG for comparison * * Returns: * < 0 if k1 is < record * = 0 if k1 is = record * > 0 if k1 is > record */ int __bt_cmp(t, k1, e) BTREE *t; const DBT *k1; EPG *e; { BINTERNAL *bi; BLEAF *bl; DBT k2; PAGE *h; void *bigkey; /* * The left-most key on internal pages, at any level of the tree, is * guaranteed by the following code to be less than any user key. * This saves us from having to update the leftmost key on an internal * page when the user inserts a new key in the tree smaller than * anything we've yet seen. */ h = e->page; if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & P_BLEAF)) return (1); bigkey = NULL; if (h->flags & P_BLEAF) { bl = GETBLEAF(h, e->index); if (bl->flags & P_BIGKEY) bigkey = bl->bytes; else { k2.data = bl->bytes; k2.size = bl->ksize; } } else { bi = GETBINTERNAL(h, e->index); if (bi->flags & P_BIGKEY) bigkey = bi->bytes; else { k2.data = bi->bytes; k2.size = bi->ksize; } } if (bigkey) { if (__ovfl_get(t, bigkey, &k2.size, &t->bt_rdata.data, &t->bt_rdata.size)) return (RET_ERROR); k2.data = t->bt_rdata.data; } return ((*t->bt_cmp)(k1, &k2)); } /* * __BT_DEFCMP -- Default comparison routine. * * Parameters: * a: DBT #1 * b: DBT #2 * * Returns: * < 0 if a is < b * = 0 if a is = b * > 0 if a is > b */ int __bt_defcmp(a, b) const DBT *a, *b; { register size_t len; register u_char *p1, *p2; /* * XXX * If a size_t doesn't fit in an int, this routine can lose. * What we need is a integral type which is guaranteed to be * larger than a size_t, and there is no such thing. */ len = MIN(a->size, b->size); for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2) if (*p1 != *p2) return ((int)*p1 - (int)*p2); return ((int)a->size - (int)b->size); } /* * __BT_DEFPFX -- Default prefix routine. * * Parameters: * a: DBT #1 * b: DBT #2 * * Returns: * Number of bytes needed to distinguish b from a. */ size_t __bt_defpfx(a, b) const DBT *a, *b; { register u_char *p1, *p2; register size_t cnt, len; cnt = 1; len = MIN(a->size, b->size); for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt) if (*p1 != *p2) return (cnt); /* a->size must be <= b->size, or they wouldn't be in this order. */ return (a->size < b->size ? a->size + 1 : a->size); } db/btree/btree.h0100644000076400007640000003513506323402115014274 0ustar cjwatsoncjwatson/*- * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)btree.h 8.11 (Berkeley) 8/17/94 */ /* Macros to set/clear/test flags. */ #define F_SET(p, f) (p)->flags |= (f) #define F_CLR(p, f) (p)->flags &= ~(f) #define F_ISSET(p, f) ((p)->flags & (f)) #include #ifdef _LIBC /* In the GNU C library we must not pollute the namespace because libdb is needed by libnss_db. */ #define mpool_open __mpool_open #define mpool_filter __mpool_filter #define mpool_new __mpool_new #define mpool_get __mpool_get #define mpool_put __mpool_put #define mpool_sync __mpool_sync #define mpool_close __mpool_close #endif #define DEFMINKEYPAGE (2) /* Minimum keys per page */ #define MINCACHE (5) /* Minimum cached pages */ #define MINPSIZE (512) /* Minimum page size */ /* * Page 0 of a btree file contains a copy of the meta-data. This page is also * used as an out-of-band page, i.e. page pointers that point to nowhere point * to page 0. Page 1 is the root of the btree. */ #define P_INVALID 0 /* Invalid tree page number. */ #define P_META 0 /* Tree metadata page number. */ #define P_ROOT 1 /* Tree root page number. */ /* * There are five page layouts in the btree: btree internal pages (BINTERNAL), * btree leaf pages (BLEAF), recno internal pages (RINTERNAL), recno leaf pages * (RLEAF) and overflow pages. All five page types have a page header (PAGE). * This implementation requires that values within structures NOT be padded. * (ANSI C permits random padding.) If your compiler pads randomly you'll have * to do some work to get this package to run. */ typedef struct _page { pgno_t pgno; /* this page's page number */ pgno_t prevpg; /* left sibling */ pgno_t nextpg; /* right sibling */ #define P_BINTERNAL 0x01 /* btree internal page */ #define P_BLEAF 0x02 /* leaf page */ #define P_OVERFLOW 0x04 /* overflow page */ #define P_RINTERNAL 0x08 /* recno internal page */ #define P_RLEAF 0x10 /* leaf page */ #define P_TYPE 0x1f /* type mask */ #define P_PRESERVE 0x20 /* never delete this chain of pages */ u_int32_t flags; indx_t lower; /* lower bound of free space on page */ indx_t upper; /* upper bound of free space on page */ indx_t linp[1]; /* indx_t-aligned VAR. LENGTH DATA */ } PAGE; /* First and next index. */ #define BTDATAOFF \ (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \ sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t)) #define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t)) /* * For pages other than overflow pages, there is an array of offsets into the * rest of the page immediately following the page header. Each offset is to * an item which is unique to the type of page. The h_lower offset is just * past the last filled-in index. The h_upper offset is the first item on the * page. Offsets are from the beginning of the page. * * If an item is too big to store on a single page, a flag is set and the item * is a { page, size } pair such that the page is the first page of an overflow * chain with size bytes of item. Overflow pages are simply bytes without any * external structure. * * The page number and size fields in the items are pgno_t-aligned so they can * be manipulated without copying. (This presumes that 32 bit items can be * manipulated on this system.) */ #define LALIGN(n) (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1)) #define NOVFLSIZE (sizeof(pgno_t) + sizeof(u_int32_t)) /* * For the btree internal pages, the item is a key. BINTERNALs are {key, pgno} * pairs, such that the key compares less than or equal to all of the records * on that page. For a tree without duplicate keys, an internal page with two * consecutive keys, a and b, will have all records greater than or equal to a * and less than b stored on the page associated with a. Duplicate keys are * somewhat special and can cause duplicate internal and leaf page records and * some minor modifications of the above rule. */ typedef struct _binternal { u_int32_t ksize; /* key size */ pgno_t pgno; /* page number stored on */ #define P_BIGDATA 0x01 /* overflow data */ #define P_BIGKEY 0x02 /* overflow key */ u_char flags; char bytes[1]; /* data */ } BINTERNAL; /* Get the page's BINTERNAL structure at index indx. */ #define GETBINTERNAL(pg, indx) \ ((BINTERNAL *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ #define NBINTERNAL(len) \ LALIGN(sizeof(u_int32_t) + sizeof(pgno_t) + sizeof(u_char) + (len)) /* Copy a BINTERNAL entry to the page. */ #define WR_BINTERNAL(p, size, pgno, flags) { \ *(u_int32_t *)p = size; \ p += sizeof(u_int32_t); \ *(pgno_t *)p = pgno; \ p += sizeof(pgno_t); \ *(u_char *)p = flags; \ p += sizeof(u_char); \ } /* * For the recno internal pages, the item is a page number with the number of * keys found on that page and below. */ typedef struct _rinternal { recno_t nrecs; /* number of records */ pgno_t pgno; /* page number stored below */ } RINTERNAL; /* Get the page's RINTERNAL structure at index indx. */ #define GETRINTERNAL(pg, indx) \ ((RINTERNAL *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ #define NRINTERNAL \ LALIGN(sizeof(recno_t) + sizeof(pgno_t)) /* Copy a RINTERNAL entry to the page. */ #define WR_RINTERNAL(p, nrecs, pgno) { \ *(recno_t *)p = nrecs; \ p += sizeof(recno_t); \ *(pgno_t *)p = pgno; \ } /* For the btree leaf pages, the item is a key and data pair. */ typedef struct _bleaf { u_int32_t ksize; /* size of key */ u_int32_t dsize; /* size of data */ u_char flags; /* P_BIGDATA, P_BIGKEY */ char bytes[1]; /* data */ } BLEAF; /* Get the page's BLEAF structure at index indx. */ #define GETBLEAF(pg, indx) \ ((BLEAF *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ #define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize) /* Get the number of bytes in the user's key/data pair. */ #define NBLEAFDBT(ksize, dsize) \ LALIGN(sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_char) + \ (ksize) + (dsize)) /* Copy a BLEAF entry to the page. */ #define WR_BLEAF(p, key, data, flags) { \ *(u_int32_t *)p = key->size; \ p += sizeof(u_int32_t); \ *(u_int32_t *)p = data->size; \ p += sizeof(u_int32_t); \ *(u_char *)p = flags; \ p += sizeof(u_char); \ memmove(p, key->data, key->size); \ p += key->size; \ memmove(p, data->data, data->size); \ } /* For the recno leaf pages, the item is a data entry. */ typedef struct _rleaf { u_int32_t dsize; /* size of data */ u_char flags; /* P_BIGDATA */ char bytes[1]; } RLEAF; /* Get the page's RLEAF structure at index indx. */ #define GETRLEAF(pg, indx) \ ((RLEAF *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ #define NRLEAF(p) NRLEAFDBT((p)->dsize) /* Get the number of bytes from the user's data. */ #define NRLEAFDBT(dsize) \ LALIGN(sizeof(u_int32_t) + sizeof(u_char) + (dsize)) /* Copy a RLEAF entry to the page. */ #define WR_RLEAF(p, data, flags) { \ *(u_int32_t *)p = data->size; \ p += sizeof(u_int32_t); \ *(u_char *)p = flags; \ p += sizeof(u_char); \ memmove(p, data->data, data->size); \ } /* * A record in the tree is either a pointer to a page and an index in the page * or a page number and an index. These structures are used as a cursor, stack * entry and search returns as well as to pass records to other routines. * * One comment about searches. Internal page searches must find the largest * record less than key in the tree so that descents work. Leaf page searches * must find the smallest record greater than key so that the returned index * is the record's correct position for insertion. */ typedef struct _epgno { pgno_t pgno; /* the page number */ indx_t index; /* the index on the page */ } EPGNO; typedef struct _epg { PAGE *page; /* the (pinned) page */ indx_t index; /* the index on the page */ } EPG; /* * About cursors. The cursor (and the page that contained the key/data pair * that it referenced) can be deleted, which makes things a bit tricky. If * there are no duplicates of the cursor key in the tree (i.e. B_NODUPS is set * or there simply aren't any duplicates of the key) we copy the key that it * referenced when it's deleted, and reacquire a new cursor key if the cursor * is used again. If there are duplicates keys, we move to the next/previous * key, and set a flag so that we know what happened. NOTE: if duplicate (to * the cursor) keys are added to the tree during this process, it is undefined * if they will be returned or not in a cursor scan. * * The flags determine the possible states of the cursor: * * CURS_INIT The cursor references *something*. * CURS_ACQUIRE The cursor was deleted, and a key has been saved so that * we can reacquire the right position in the tree. * CURS_AFTER, CURS_BEFORE * The cursor was deleted, and now references a key/data pair * that has not yet been returned, either before or after the * deleted key/data pair. * XXX * This structure is broken out so that we can eventually offer multiple * cursors as part of the DB interface. */ typedef struct _cursor { EPGNO pg; /* B: Saved tree reference. */ DBT key; /* B: Saved key, or key.data == NULL. */ recno_t rcursor; /* R: recno cursor (1-based) */ #define CURS_ACQUIRE 0x01 /* B: Cursor needs to be reacquired. */ #define CURS_AFTER 0x02 /* B: Unreturned cursor after key. */ #define CURS_BEFORE 0x04 /* B: Unreturned cursor before key. */ #define CURS_INIT 0x08 /* RB: Cursor initialized. */ u_int8_t flags; } CURSOR; /* * The metadata of the tree. The nrecs field is used only by the RECNO code. * This is because the btree doesn't really need it and it requires that every * put or delete call modify the metadata. */ typedef struct _btmeta { u_int32_t magic; /* magic number */ u_int32_t version; /* version */ u_int32_t psize; /* page size */ u_int32_t free; /* page number of first free page */ u_int32_t nrecs; /* R: number of records */ #define SAVEMETA (B_NODUPS | R_RECNO) u_int32_t flags; /* bt_flags & SAVEMETA */ } BTMETA; /* The in-memory btree/recno data structure. */ typedef struct _btree { MPOOL *bt_mp; /* memory pool cookie */ DB *bt_dbp; /* pointer to enclosing DB */ EPG bt_cur; /* current (pinned) page */ PAGE *bt_pinned; /* page pinned across calls */ CURSOR bt_cursor; /* cursor */ #define BT_PUSH(t, p, i) { \ t->bt_sp->pgno = p; \ t->bt_sp->index = i; \ ++t->bt_sp; \ } #define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL : --t->bt_sp) #define BT_CLR(t) (t->bt_sp = t->bt_stack) EPGNO bt_stack[50]; /* stack of parent pages */ EPGNO *bt_sp; /* current stack pointer */ DBT bt_rkey; /* returned key */ DBT bt_rdata; /* returned data */ int bt_fd; /* tree file descriptor */ pgno_t bt_free; /* next free page */ u_int32_t bt_psize; /* page size */ indx_t bt_ovflsize; /* cut-off for key/data overflow */ int bt_lorder; /* byte order */ /* sorted order */ enum { NOT, BACK, FORWARD } bt_order; EPGNO bt_last; /* last insert */ /* B: key comparison function */ int (*bt_cmp) __P((const DBT *, const DBT *)); /* B: prefix comparison function */ size_t (*bt_pfx) __P((const DBT *, const DBT *)); /* R: recno input function */ int (*bt_irec) __P((struct _btree *, recno_t)); FILE *bt_rfp; /* R: record FILE pointer */ int bt_rfd; /* R: record file descriptor */ caddr_t bt_cmap; /* R: current point in mapped space */ caddr_t bt_smap; /* R: start of mapped space */ caddr_t bt_emap; /* R: end of mapped space */ size_t bt_msize; /* R: size of mapped region. */ recno_t bt_nrecs; /* R: number of records */ size_t bt_reclen; /* R: fixed record length */ u_char bt_bval; /* R: delimiting byte/pad character */ /* * NB: * B_NODUPS and R_RECNO are stored on disk, and may not be changed. */ #define B_INMEM 0x00001 /* in-memory tree */ #define B_METADIRTY 0x00002 /* need to write metadata */ #define B_MODIFIED 0x00004 /* tree modified */ #define B_NEEDSWAP 0x00008 /* if byte order requires swapping */ #define B_RDONLY 0x00010 /* read-only tree */ #define B_NODUPS 0x00020 /* no duplicate keys permitted */ #define R_RECNO 0x00080 /* record oriented tree */ #define R_CLOSEFP 0x00040 /* opened a file pointer */ #define R_EOF 0x00100 /* end of input file reached. */ #define R_FIXLEN 0x00200 /* fixed length records */ #define R_MEMMAPPED 0x00400 /* memory mapped file. */ #define R_INMEM 0x00800 /* in-memory file */ #define R_MODIFIED 0x01000 /* modified file */ #define R_RDONLY 0x02000 /* read-only file */ #define B_DB_LOCK 0x04000 /* DB_LOCK specified. */ #define B_DB_SHMEM 0x08000 /* DB_SHMEM specified. */ #define B_DB_TXN 0x10000 /* DB_TXN specified. */ u_int32_t flags; } BTREE; #include "extern.h" db/btree/extern.h0100644000076400007640000000634606072171217014511 0ustar cjwatsoncjwatson/*- * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)extern.h 8.10 (Berkeley) 7/20/94 */ int __bt_close __P((DB *)); int __bt_cmp __P((BTREE *, const DBT *, EPG *)); int __bt_crsrdel __P((BTREE *, EPGNO *)); int __bt_defcmp __P((const DBT *, const DBT *)); size_t __bt_defpfx __P((const DBT *, const DBT *)); int __bt_delete __P((const DB *, const DBT *, u_int)); int __bt_dleaf __P((BTREE *, const DBT *, PAGE *, u_int)); int __bt_fd __P((const DB *)); int __bt_free __P((BTREE *, PAGE *)); int __bt_get __P((const DB *, const DBT *, DBT *, u_int)); PAGE *__bt_new __P((BTREE *, pgno_t *)); void __bt_pgin __P((void *, pgno_t, void *)); void __bt_pgout __P((void *, pgno_t, void *)); int __bt_push __P((BTREE *, pgno_t, int)); int __bt_put __P((const DB *dbp, DBT *, const DBT *, u_int)); int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *, DBT *, DBT *, int)); EPG *__bt_search __P((BTREE *, const DBT *, int *)); int __bt_seq __P((const DB *, DBT *, DBT *, u_int)); void __bt_setcur __P((BTREE *, pgno_t, u_int)); int __bt_split __P((BTREE *, PAGE *, const DBT *, const DBT *, int, size_t, u_int32_t)); int __bt_sync __P((const DB *, u_int)); int __ovfl_delete __P((BTREE *, void *)); int __ovfl_get __P((BTREE *, void *, size_t *, void **, size_t *)); int __ovfl_put __P((BTREE *, const DBT *, pgno_t *)); #ifdef DEBUG void __bt_dnpage __P((DB *, pgno_t)); void __bt_dpage __P((PAGE *)); void __bt_dump __P((DB *)); #endif #ifdef STATISTICS void __bt_stat __P((DB *)); #endif db/compat.h0100644000076400007640000000172206072171147013361 0ustar cjwatsoncjwatson/* Values for building 4.4 BSD db routines in the GNU C library. */ #ifndef _compat_h_ #define _compat_h_ #include /* * If you can't provide lock values in the open(2) call. Note, this * allows races to happen. */ #ifndef O_EXLOCK /* 4.4BSD extension. */ #define O_EXLOCK 0 #endif #ifndef O_SHLOCK /* 4.4BSD extension. */ #define O_SHLOCK 0 #endif #include #ifndef EFTYPE #define EFTYPE EINVAL /* POSIX 1003.1 format errno. */ #endif #include #include #ifndef _POSIX_VDISABLE /* POSIX 1003.1 disabling char. */ #define _POSIX_VDISABLE 0 /* Some systems used 0. */ #endif #include #ifndef TCSASOFT /* 4.4BSD extension. */ #define TCSASOFT 0 #endif #include #ifndef MAX /* Usually found in . */ #define MAX(_a,_b) ((_a)<(_b)?(_b):(_a)) #endif #ifndef MIN /* Usually found in . */ #define MIN(_a,_b) ((_a)<(_b)?(_a):(_b)) #endif #endif /* compat.h */ db/db/0040755000076400007640000000000007055362607012320 5ustar cjwatsoncjwatsondb/db/db.c0100644000076400007640000000722006323402133013031 0ustar cjwatsoncjwatson/*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)db.c 8.4 (Berkeley) 2/21/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #ifdef _LIBC /* In the GNU C library we must not pollute the namespace, because libdb is needed by libnss_db. */ #define dbopen __dbopen #endif DB * dbopen(fname, flags, mode, type, openinfo) const char *fname; int flags, mode; DBTYPE type; const void *openinfo; { #define DB_FLAGS (DB_LOCK | DB_SHMEM | DB_TXN) #define USE_OPEN_FLAGS \ (O_CREAT | O_EXCL | O_EXLOCK | O_NONBLOCK | O_RDONLY | \ O_RDWR | O_SHLOCK | O_TRUNC) if ((flags & ~(USE_OPEN_FLAGS | DB_FLAGS)) == 0) switch (type) { case DB_BTREE: return (__bt_open(fname, flags & USE_OPEN_FLAGS, mode, openinfo, flags & DB_FLAGS)); case DB_HASH: return (__hash_open(fname, flags & USE_OPEN_FLAGS, mode, openinfo, flags & DB_FLAGS)); case DB_RECNO: return (__rec_open(fname, flags & USE_OPEN_FLAGS, mode, openinfo, flags & DB_FLAGS)); } errno = EINVAL; return (NULL); } #ifdef _LIBC #undef dbopen weak_alias (__dbopen, dbopen) #endif static int __dberr __P((void)) { return (RET_ERROR); } /* * __DBPANIC -- Stop. * * Parameters: * dbp: pointer to the DB structure. */ void __dbpanic(dbp) DB *dbp; { /* The only thing that can succeed is a close. */ dbp->del = (int (*)__P((const struct __db *, const DBT *, u_int))) __dberr; dbp->get = (int (*)__P((const struct __db *, const DBT *, DBT *, u_int))) __dberr; dbp->put = (int (*)__P((const struct __db *, DBT *, const DBT *, u_int))) __dberr; dbp->seq = (int (*)__P((const struct __db *, DBT *, DBT *, u_int))) __dberr; dbp->sync = (int (*)__P((const struct __db *, u_int))) __dberr; dbp->fd = (int (*)__P((const struct __db *))) __dberr; } db/db.h0100644000076400007640000002015206614102773012462 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)db.h 8.7 (Berkeley) 6/16/94 */ #ifndef _DB_H #define _DB_H 1 #include #include #include #ifdef __DBINTERFACE_PRIVATE #include #endif #define RET_ERROR -1 /* Return values. */ #define RET_SUCCESS 0 #define RET_SPECIAL 1 #ifndef __BIT_TYPES_DEFINED__ #define __BIT_TYPES_DEFINED__ typedef __signed char int8_t; typedef unsigned char u_int8_t; typedef short int16_t; typedef unsigned short u_int16_t; typedef int int32_t; typedef unsigned int u_int32_t; #ifdef WE_DONT_NEED_QUADS typedef long long int64_t; typedef unsigned long long u_int64_t; #endif #endif #define MAX_PAGE_NUMBER 0xffffffff /* >= # of pages in a file */ typedef u_int32_t pgno_t; #define MAX_PAGE_OFFSET 65535 /* >= # of bytes in a page */ typedef u_int16_t indx_t; #define MAX_REC_NUMBER 0xffffffff /* >= # of records in a tree */ typedef u_int32_t recno_t; /* Key/data structure -- a Data-Base Thang. */ typedef struct { void *data; /* data */ size_t size; /* data length */ } DBT; /* Routine flags. */ #define R_CURSOR 1 /* del, put, seq */ #define __R_UNUSED 2 /* UNUSED */ #define R_FIRST 3 /* seq */ #define R_IAFTER 4 /* put (RECNO) */ #define R_IBEFORE 5 /* put (RECNO) */ #define R_LAST 6 /* seq (BTREE, RECNO) */ #define R_NEXT 7 /* seq */ #define R_NOOVERWRITE 8 /* put */ #define R_PREV 9 /* seq (BTREE, RECNO) */ #define R_SETCURSOR 10 /* put (RECNO) */ #define R_RECNOSYNC 11 /* sync (RECNO) */ typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; /* * !!! * The following flags are included in the dbopen(3) call as part of the * open(2) flags. In order to avoid conflicts with the open flags, start * at the top of the 16 or 32-bit number space and work our way down. If * the open flags were significantly expanded in the future, it could be * a problem. Wish I'd left another flags word in the dbopen call. * * !!! * None of this stuff is implemented yet. The only reason that it's here * is so that the access methods can skip copying the key/data pair when * the DB_LOCK flag isn't set. */ #if UINT_MAX > 65535 #define DB_LOCK 0x20000000 /* Do locking. */ #define DB_SHMEM 0x40000000 /* Use shared memory. */ #define DB_TXN 0x80000000 /* Do transactions. */ #else #define DB_LOCK 0x2000 /* Do locking. */ #define DB_SHMEM 0x4000 /* Use shared memory. */ #define DB_TXN 0x8000 /* Do transactions. */ #endif /* Access method description structure. */ typedef struct __db { DBTYPE type; /* Underlying db type. */ int (*close) __PMT((struct __db *)); int (*del) __PMT((const struct __db *, const DBT *, u_int)); int (*get) __PMT((const struct __db *, const DBT *, DBT *, u_int)); int (*put) __PMT((const struct __db *, DBT *, const DBT *, u_int)); int (*seq) __PMT((const struct __db *, DBT *, DBT *, u_int)); int (*sync) __PMT((const struct __db *, u_int)); void *internal; /* Access method private. */ int (*fd) __PMT((const struct __db *)); } DB; #define BTREEMAGIC 0x053162 #define BTREEVERSION 3 /* Structure used to pass parameters to the btree routines. */ typedef struct { #define R_DUP 0x01 /* duplicate keys */ u_long flags; u_int cachesize; /* bytes to cache */ int maxkeypage; /* maximum keys per page */ int minkeypage; /* minimum keys per page */ u_int psize; /* page size */ int (*compare) /* comparison function */ __PMT((const DBT *, const DBT *)); size_t (*prefix) /* prefix function */ __PMT((const DBT *, const DBT *)); int lorder; /* byte order */ } BTREEINFO; #define HASHMAGIC 0x061561 #define HASHVERSION 2 /* Structure used to pass parameters to the hashing routines. */ typedef struct { u_int bsize; /* bucket size */ u_int ffactor; /* fill factor */ u_int nelem; /* number of elements */ u_int cachesize; /* bytes to cache */ u_int32_t /* hash function */ (*hash) __PMT((const void *, size_t)); int lorder; /* byte order */ } HASHINFO; /* Structure used to pass parameters to the record routines. */ typedef struct { #define R_FIXEDLEN 0x01 /* fixed-length records */ #define R_NOKEY 0x02 /* key not required */ #define R_SNAPSHOT 0x04 /* snapshot the input */ u_long flags; u_int cachesize; /* bytes to cache */ u_int psize; /* page size */ int lorder; /* byte order */ size_t reclen; /* record length (fixed-length records) */ u_char bval; /* delimiting byte (variable-length records */ char *bfname; /* btree file name */ } RECNOINFO; #ifdef __DBINTERFACE_PRIVATE /* * Little endian <==> big endian 32-bit swap macros. * M_32_SWAP swap a memory location * P_32_SWAP swap a referenced memory location * P_32_COPY swap from one location to another */ #define M_32_SWAP(a) { \ u_int32_t _tmp = a; \ ((char *)&a)[0] = ((char *)&_tmp)[3]; \ ((char *)&a)[1] = ((char *)&_tmp)[2]; \ ((char *)&a)[2] = ((char *)&_tmp)[1]; \ ((char *)&a)[3] = ((char *)&_tmp)[0]; \ } #define P_32_SWAP(a) { \ u_int32_t _tmp = *(u_int32_t *)a; \ ((char *)a)[0] = ((char *)&_tmp)[3]; \ ((char *)a)[1] = ((char *)&_tmp)[2]; \ ((char *)a)[2] = ((char *)&_tmp)[1]; \ ((char *)a)[3] = ((char *)&_tmp)[0]; \ } #define P_32_COPY(a, b) { \ ((char *)&(b))[0] = ((char *)&(a))[3]; \ ((char *)&(b))[1] = ((char *)&(a))[2]; \ ((char *)&(b))[2] = ((char *)&(a))[1]; \ ((char *)&(b))[3] = ((char *)&(a))[0]; \ } /* * Little endian <==> big endian 16-bit swap macros. * M_16_SWAP swap a memory location * P_16_SWAP swap a referenced memory location * P_16_COPY swap from one location to another */ #define M_16_SWAP(a) { \ u_int16_t _tmp = a; \ ((char *)&a)[0] = ((char *)&_tmp)[1]; \ ((char *)&a)[1] = ((char *)&_tmp)[0]; \ } #define P_16_SWAP(a) { \ u_int16_t _tmp = *(u_int16_t *)a; \ ((char *)a)[0] = ((char *)&_tmp)[1]; \ ((char *)a)[1] = ((char *)&_tmp)[0]; \ } #define P_16_COPY(a, b) { \ ((char *)&(b))[0] = ((char *)&(a))[1]; \ ((char *)&(b))[1] = ((char *)&(a))[0]; \ } #endif __BEGIN_DECLS DB *__dbopen __P((const char *, int, int, DBTYPE, const void *)); DB *dbopen __P((const char *, int, int, DBTYPE, const void *)); #ifdef __DBINTERFACE_PRIVATE DB *__bt_open __P((const char *, int, int, const BTREEINFO *, int)); DB *__hash_open __P((const char *, int, int, const HASHINFO *, int)); DB *__rec_open __P((const char *, int, int, const RECNOINFO *, int)); void __dbpanic __P((DB *dbp)); #endif __END_DECLS #endif /* db.h */ db/hash/0040755000076400007640000000000007055362607012656 5ustar cjwatsoncjwatsondb/hash/hash.c0100644000076400007640000005730606602177365013757 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)hash.c 8.9 (Berkeley) 6/16/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include #include #ifdef DEBUG #include #endif #include #include "hash.h" #include "page.h" #include "extern.h" static int alloc_segs __P((HTAB *, int)); static int flush_meta __P((HTAB *)); static int hash_access __P((HTAB *, ACTION, DBT *, DBT *)); static int hash_close __P((DB *)); static int hash_delete __P((const DB *, const DBT *, u_int32_t)); static int hash_fd __P((const DB *)); static int hash_get __P((const DB *, const DBT *, DBT *, u_int32_t)); static int hash_put __P((const DB *, DBT *, const DBT *, u_int32_t)); static void *hash_realloc __P((SEGMENT **, int, int)); static int hash_seq __P((const DB *, DBT *, DBT *, u_int32_t)); static int hash_sync __P((const DB *, u_int32_t)); static int hdestroy __P((HTAB *)); static HTAB *init_hash __P((HTAB *, const char *, HASHINFO *)); static int init_htab __P((HTAB *, int)); #if BYTE_ORDER == LITTLE_ENDIAN static void swap_header __P((HTAB *)); static void swap_header_copy __P((HASHHDR *, HASHHDR *)); #endif /* Fast arithmetic, relying on powers of 2, */ #define MOD(x, y) ((x) & ((y) - 1)) #define RETURN_ERROR(ERR, LOC) { save_errno = ERR; goto LOC; } /* Return values */ #define SUCCESS (0) #define ERROR (-1) #define ABNORMAL (1) #ifdef HASH_STATISTICS int hash_accesses, hash_collisions, hash_expansions, hash_overflows; #endif /************************** INTERFACE ROUTINES ***************************/ /* OPEN/CLOSE */ extern DB * __hash_open(file, flags, mode, info, dflags) const char *file; int flags, mode, dflags; const HASHINFO *info; /* Special directives for create */ { HTAB *hashp; struct stat statbuf; DB *dbp; int bpages, hdrsize, new_table, nsegs, save_errno; if ((flags & O_ACCMODE) == O_WRONLY) { errno = EINVAL; return (NULL); } if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB)))) return (NULL); hashp->fp = -1; /* * Even if user wants write only, we need to be able to read * the actual file, so we need to open it read/write. But, the * field in the hashp structure needs to be accurate so that * we can check accesses. */ hashp->flags = flags; new_table = 0; if (!file || (flags & O_TRUNC) || (stat(file, &statbuf) && (errno == ENOENT))) { if (errno == ENOENT) errno = 0; /* Just in case someone looks at errno */ new_table = 1; } if (file) { if ((hashp->fp = open(file, flags, mode)) == -1) RETURN_ERROR(errno, error0); (void)fcntl(hashp->fp, F_SETFD, 1); } if (new_table) { if (!(hashp = init_hash(hashp, file, (HASHINFO *)info))) RETURN_ERROR(errno, error1); } else { /* Table already exists */ if (info && info->hash) hashp->hash = info->hash; else hashp->hash = __default_hash; hdrsize = read(hashp->fp, &hashp->hdr, sizeof(HASHHDR)); #if BYTE_ORDER == LITTLE_ENDIAN swap_header(hashp); #endif if (hdrsize == -1) RETURN_ERROR(errno, error1); if (hdrsize != sizeof(HASHHDR)) RETURN_ERROR(EFTYPE, error1); /* Verify file type, versions and hash function */ if (hashp->MAGIC != HASHMAGIC) RETURN_ERROR(EFTYPE, error1); #define OLDHASHVERSION 1 if (hashp->VERSION != HASHVERSION && hashp->VERSION != OLDHASHVERSION) RETURN_ERROR(EFTYPE, error1); if (hashp->hash(CHARKEY, sizeof(CHARKEY)) != (u_int32_t) hashp->H_CHARKEY) RETURN_ERROR(EFTYPE, error1); /* * Figure out how many segments we need. Max_Bucket is the * maximum bucket number, so the number of buckets is * max_bucket + 1. */ nsegs = (hashp->MAX_BUCKET + 1 + hashp->SGSIZE - 1) / hashp->SGSIZE; hashp->nsegs = 0; if (alloc_segs(hashp, nsegs)) /* * If alloc_segs fails, table will have been destroyed * and errno will have been set. */ return (NULL); /* Read in bitmaps */ bpages = (hashp->SPARES[hashp->OVFL_POINT] + (hashp->BSIZE << BYTE_SHIFT) - 1) >> (hashp->BSHIFT + BYTE_SHIFT); hashp->nmaps = bpages; (void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *)); } /* Initialize Buffer Manager */ if (info && info->cachesize) __buf_init(hashp, info->cachesize); else __buf_init(hashp, DEF_BUFSIZE); hashp->new_file = new_table; hashp->save_file = file && (hashp->flags & O_ACCMODE) != O_RDONLY; hashp->cbucket = -1; if (!(dbp = (DB *)malloc(sizeof(DB)))) { save_errno = errno; hdestroy(hashp); errno = save_errno; return (NULL); } dbp->internal = hashp; dbp->close = hash_close; dbp->del = hash_delete; dbp->fd = hash_fd; dbp->get = hash_get; dbp->put = hash_put; dbp->seq = hash_seq; dbp->sync = hash_sync; dbp->type = DB_HASH; #ifdef DEBUG (void)fprintf(stderr, "%s\n%s%x\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n", "init_htab:", "TABLE POINTER ", hashp, "BUCKET SIZE ", hashp->BSIZE, "BUCKET SHIFT ", hashp->BSHIFT, "DIRECTORY SIZE ", hashp->DSIZE, "SEGMENT SIZE ", hashp->SGSIZE, "SEGMENT SHIFT ", hashp->SSHIFT, "FILL FACTOR ", hashp->FFACTOR, "MAX BUCKET ", hashp->MAX_BUCKET, "OVFL POINT ", hashp->OVFL_POINT, "LAST FREED ", hashp->LAST_FREED, "HIGH MASK ", hashp->HIGH_MASK, "LOW MASK ", hashp->LOW_MASK, "NSEGS ", hashp->nsegs, "NKEYS ", hashp->NKEYS); #endif #ifdef HASH_STATISTICS hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0; #endif return (dbp); error1: if (hashp != NULL) (void)close(hashp->fp); error0: free(hashp); errno = save_errno; return (NULL); } static int hash_close(dbp) DB *dbp; { HTAB *hashp; int retval; if (!dbp) return (ERROR); hashp = (HTAB *)dbp->internal; retval = hdestroy(hashp); free(dbp); return (retval); } static int hash_fd(dbp) const DB *dbp; { HTAB *hashp; if (!dbp) return (ERROR); hashp = (HTAB *)dbp->internal; if (hashp->fp == -1) { errno = ENOENT; return (-1); } return (hashp->fp); } /************************** LOCAL CREATION ROUTINES **********************/ static HTAB * init_hash(hashp, file, info) HTAB *hashp; const char *file; HASHINFO *info; { #ifdef _STATBUF_ST_BLKSIZE struct stat statbuf; #endif int nelem; nelem = 1; hashp->NKEYS = 0; hashp->LORDER = BYTE_ORDER; hashp->BSIZE = DEF_BUCKET_SIZE; hashp->BSHIFT = DEF_BUCKET_SHIFT; hashp->SGSIZE = DEF_SEGSIZE; hashp->SSHIFT = DEF_SEGSIZE_SHIFT; hashp->DSIZE = DEF_DIRSIZE; hashp->FFACTOR = DEF_FFACTOR; hashp->hash = __default_hash; memset(hashp->SPARES, 0, sizeof(hashp->SPARES)); memset(hashp->BITMAPS, 0, sizeof (hashp->BITMAPS)); /* Fix bucket size to be optimal for file system */ #ifdef _STATBUF_ST_BLKSIZE if (file != NULL) { if (stat(file, &statbuf)) return (NULL); hashp->BSIZE = statbuf.st_blksize; hashp->BSHIFT = __hash_log2(hashp->BSIZE); } #endif if (info) { if (info->bsize) { /* Round pagesize up to power of 2 */ hashp->BSHIFT = __hash_log2(info->bsize); hashp->BSIZE = 1 << hashp->BSHIFT; if (hashp->BSIZE > MAX_BSIZE) { errno = EINVAL; return (NULL); } } if (info->ffactor) hashp->FFACTOR = info->ffactor; if (info->hash) hashp->hash = info->hash; if (info->nelem) nelem = info->nelem; if (info->lorder) { if (info->lorder != BIG_ENDIAN && info->lorder != LITTLE_ENDIAN) { errno = EINVAL; return (NULL); } hashp->LORDER = info->lorder; } } /* init_htab should destroy the table and set errno if it fails */ if (init_htab(hashp, nelem)) return (NULL); else return (hashp); } /* * This calls alloc_segs which may run out of memory. Alloc_segs will destroy * the table and set errno, so we just pass the error information along. * * Returns 0 on No Error */ static int init_htab(hashp, nelem) HTAB *hashp; int nelem; { register int nbuckets, nsegs; int l2; /* * Divide number of elements by the fill factor and determine a * desired number of buckets. Allocate space for the next greater * power of two number of buckets. */ nelem = (nelem - 1) / hashp->FFACTOR + 1; l2 = __hash_log2(MAX(nelem, 2)); nbuckets = 1 << l2; hashp->SPARES[l2] = l2 + 1; hashp->SPARES[l2 + 1] = l2 + 1; hashp->OVFL_POINT = l2; hashp->LAST_FREED = 2; /* First bitmap page is at: splitpoint l2 page offset 1 */ if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0)) return (-1); hashp->MAX_BUCKET = hashp->LOW_MASK = nbuckets - 1; hashp->HIGH_MASK = (nbuckets << 1) - 1; hashp->HDRPAGES = ((MAX(sizeof(HASHHDR), MINHDRSIZE) - 1) >> hashp->BSHIFT) + 1; nsegs = (nbuckets - 1) / hashp->SGSIZE + 1; nsegs = 1 << __hash_log2(nsegs); if (nsegs > hashp->DSIZE) hashp->DSIZE = nsegs; return (alloc_segs(hashp, nsegs)); } /********************** DESTROY/CLOSE ROUTINES ************************/ /* * Flushes any changes to the file if necessary and destroys the hashp * structure, freeing all allocated space. */ static int hdestroy(hashp) HTAB *hashp; { int i, save_errno; save_errno = 0; #ifdef HASH_STATISTICS (void)fprintf(stderr, "hdestroy: accesses %ld collisions %ld\n", hash_accesses, hash_collisions); (void)fprintf(stderr, "hdestroy: expansions %ld\n", hash_expansions); (void)fprintf(stderr, "hdestroy: overflows %ld\n", hash_overflows); (void)fprintf(stderr, "keys %ld maxp %d segmentcount %d\n", hashp->NKEYS, hashp->MAX_BUCKET, hashp->nsegs); for (i = 0; i < NCACHED; i++) (void)fprintf(stderr, "spares[%d] = %d\n", i, hashp->SPARES[i]); #endif /* * Call on buffer manager to free buffers, and if required, * write them to disk. */ if (__buf_free(hashp, 1, hashp->save_file)) save_errno = errno; if (hashp->dir) { free(*hashp->dir); /* Free initial segments */ /* Free extra segments */ while (hashp->exsegs--) free(hashp->dir[--hashp->nsegs]); free(hashp->dir); } if (flush_meta(hashp) && !save_errno) save_errno = errno; /* Free Bigmaps */ for (i = 0; i < hashp->nmaps; i++) if (hashp->mapp[i]) free(hashp->mapp[i]); if (hashp->fp != -1) (void)close(hashp->fp); free(hashp); if (save_errno) { errno = save_errno; return (ERROR); } return (SUCCESS); } /* * Write modified pages to disk * * Returns: * 0 == OK * -1 ERROR */ static int hash_sync(dbp, flags) const DB *dbp; u_int32_t flags; { HTAB *hashp; if (flags != 0) { errno = EINVAL; return (ERROR); } if (!dbp) return (ERROR); hashp = (HTAB *)dbp->internal; if (!hashp->save_file) return (0); if (__buf_free(hashp, 0, 1) || flush_meta(hashp)) return (ERROR); hashp->new_file = 0; return (0); } /* * Returns: * 0 == OK * -1 indicates that errno should be set */ static int flush_meta(hashp) HTAB *hashp; { HASHHDR *whdrp; #if BYTE_ORDER == LITTLE_ENDIAN HASHHDR whdr; #endif int fp, i, wsize; if (!hashp->save_file) return (0); hashp->MAGIC = HASHMAGIC; hashp->VERSION = HASHVERSION; hashp->H_CHARKEY = hashp->hash(CHARKEY, sizeof(CHARKEY)); fp = hashp->fp; whdrp = &hashp->hdr; #if BYTE_ORDER == LITTLE_ENDIAN whdrp = &whdr; swap_header_copy(&hashp->hdr, whdrp); #endif if ((lseek(fp, (off_t)0, SEEK_SET) == -1) || ((wsize = write(fp, whdrp, sizeof(HASHHDR))) == -1)) return (-1); else if (wsize != sizeof(HASHHDR)) { errno = EFTYPE; hashp->errnum = errno; return (-1); } for (i = 0; i < NCACHED; i++) if (hashp->mapp[i]) if (__put_page(hashp, (char *)hashp->mapp[i], hashp->BITMAPS[i], 0, 1)) return (-1); return (0); } /*******************************SEARCH ROUTINES *****************************/ /* * All the access routines return * * Returns: * 0 on SUCCESS * 1 to indicate an external ERROR (i.e. key not found, etc) * -1 to indicate an internal ERROR (i.e. out of memory, etc) */ static int hash_get(dbp, key, data, flag) const DB *dbp; const DBT *key; DBT *data; u_int32_t flag; { HTAB *hashp; hashp = (HTAB *)dbp->internal; if (flag) { hashp->errnum = errno = EINVAL; return (ERROR); } return (hash_access(hashp, HASH_GET, (DBT *)key, data)); } static int hash_put(dbp, key, data, flag) const DB *dbp; DBT *key; const DBT *data; u_int32_t flag; { HTAB *hashp; hashp = (HTAB *)dbp->internal; if (flag && flag != R_NOOVERWRITE) { hashp->errnum = errno = EINVAL; return (ERROR); } if ((hashp->flags & O_ACCMODE) == O_RDONLY) { hashp->errnum = errno = EPERM; return (ERROR); } return (hash_access(hashp, flag == R_NOOVERWRITE ? HASH_PUTNEW : HASH_PUT, (DBT *)key, (DBT *)data)); } static int hash_delete(dbp, key, flag) const DB *dbp; const DBT *key; u_int32_t flag; /* Ignored */ { HTAB *hashp; hashp = (HTAB *)dbp->internal; if (flag && flag != R_CURSOR) { hashp->errnum = errno = EINVAL; return (ERROR); } if ((hashp->flags & O_ACCMODE) == O_RDONLY) { hashp->errnum = errno = EPERM; return (ERROR); } return (hash_access(hashp, HASH_DELETE, (DBT *)key, NULL)); } /* * Assume that hashp has been set in wrapper routine. */ static int hash_access(hashp, action, key, val) HTAB *hashp; ACTION action; DBT *key, *val; { register BUFHEAD *rbufp; BUFHEAD *bufp, *save_bufp; register u_int16_t *bp; register int n, ndx, off, size; register char *kp; u_int16_t pageno; #ifdef HASH_STATISTICS hash_accesses++; #endif off = hashp->BSIZE; size = key->size; kp = (char *)key->data; rbufp = __get_buf(hashp, __call_hash(hashp, kp, size), NULL, 0); if (!rbufp) return (ERROR); save_bufp = rbufp; /* Pin the bucket chain */ rbufp->flags |= BUF_PIN; for (bp = (u_int16_t *)rbufp->page, n = *bp++, ndx = 1; ndx < n;) if (bp[1] >= REAL_KEY) { /* Real key/data pair */ if (size == off - *bp && memcmp(kp, rbufp->page + *bp, size) == 0) goto found; off = bp[1]; #ifdef HASH_STATISTICS hash_collisions++; #endif bp += 2; ndx += 2; } else if (bp[1] == OVFLPAGE) { rbufp = __get_buf(hashp, *bp, rbufp, 0); if (!rbufp) { save_bufp->flags &= ~BUF_PIN; return (ERROR); } /* FOR LOOP INIT */ bp = (u_int16_t *)rbufp->page; n = *bp++; ndx = 1; off = hashp->BSIZE; } else if (bp[1] < REAL_KEY) { if ((ndx = __find_bigpair(hashp, rbufp, ndx, kp, size)) > 0) goto found; if (ndx == -2) { bufp = rbufp; if (!(pageno = __find_last_page(hashp, &bufp))) { ndx = 0; rbufp = bufp; break; /* FOR */ } rbufp = __get_buf(hashp, pageno, bufp, 0); if (!rbufp) { save_bufp->flags &= ~BUF_PIN; return (ERROR); } /* FOR LOOP INIT */ bp = (u_int16_t *)rbufp->page; n = *bp++; ndx = 1; off = hashp->BSIZE; } else { save_bufp->flags &= ~BUF_PIN; return (ERROR); } } /* Not found */ switch (action) { case HASH_PUT: case HASH_PUTNEW: if (__addel(hashp, rbufp, key, val)) { save_bufp->flags &= ~BUF_PIN; return (ERROR); } else { save_bufp->flags &= ~BUF_PIN; return (SUCCESS); } case HASH_GET: case HASH_DELETE: default: save_bufp->flags &= ~BUF_PIN; return (ABNORMAL); } found: switch (action) { case HASH_PUTNEW: save_bufp->flags &= ~BUF_PIN; return (ABNORMAL); case HASH_GET: bp = (u_int16_t *)rbufp->page; if (bp[ndx + 1] < REAL_KEY) { if (__big_return(hashp, rbufp, ndx, val, 0)) return (ERROR); } else { val->data = (u_char *)rbufp->page + (int)bp[ndx + 1]; val->size = bp[ndx] - bp[ndx + 1]; } break; case HASH_PUT: if ((__delpair(hashp, rbufp, ndx)) || (__addel(hashp, rbufp, key, val))) { save_bufp->flags &= ~BUF_PIN; return (ERROR); } break; case HASH_DELETE: if (__delpair(hashp, rbufp, ndx)) return (ERROR); break; default: abort(); } save_bufp->flags &= ~BUF_PIN; return (SUCCESS); } static int hash_seq(dbp, key, data, flag) const DB *dbp; DBT *key, *data; u_int32_t flag; { register u_int32_t bucket; register BUFHEAD *bufp; HTAB *hashp; u_int16_t *bp, ndx; hashp = (HTAB *)dbp->internal; if (flag && flag != R_FIRST && flag != R_NEXT) { hashp->errnum = errno = EINVAL; return (ERROR); } #ifdef HASH_STATISTICS hash_accesses++; #endif if ((hashp->cbucket < 0) || (flag == R_FIRST)) { hashp->cbucket = 0; hashp->cndx = 1; hashp->cpage = NULL; } for (bp = NULL; !bp || !bp[0]; ) { if (!(bufp = hashp->cpage)) { for (bucket = hashp->cbucket; bucket <= (u_int32_t) hashp->MAX_BUCKET; bucket++, hashp->cndx = 1) { bufp = __get_buf(hashp, bucket, NULL, 0); if (!bufp) return (ERROR); hashp->cpage = bufp; bp = (u_int16_t *)bufp->page; if (bp[0]) break; } hashp->cbucket = bucket; if (hashp->cbucket > hashp->MAX_BUCKET) { hashp->cbucket = -1; return (ABNORMAL); } } else bp = (u_int16_t *)hashp->cpage->page; #ifdef DEBUG assert(bp); assert(bufp); #endif while (bp[hashp->cndx + 1] == OVFLPAGE) { bufp = hashp->cpage = __get_buf(hashp, bp[hashp->cndx], bufp, 0); if (!bufp) return (ERROR); bp = (u_int16_t *)(bufp->page); hashp->cndx = 1; } if (!bp[0]) { hashp->cpage = NULL; ++hashp->cbucket; } } ndx = hashp->cndx; if (bp[ndx + 1] < REAL_KEY) { if (__big_keydata(hashp, bufp, key, data, 1)) return (ERROR); } else { key->data = (u_char *)hashp->cpage->page + bp[ndx]; key->size = (ndx > 1 ? bp[ndx - 1] : hashp->BSIZE) - bp[ndx]; data->data = (u_char *)hashp->cpage->page + bp[ndx + 1]; data->size = bp[ndx] - bp[ndx + 1]; ndx += 2; if (ndx > bp[0]) { hashp->cpage = NULL; hashp->cbucket++; hashp->cndx = 1; } else hashp->cndx = ndx; } return (SUCCESS); } /********************************* UTILITIES ************************/ /* * Returns: * 0 ==> OK * -1 ==> Error */ extern int __expand_table(hashp) HTAB *hashp; { u_int32_t old_bucket, new_bucket; int dirsize, new_segnum, spare_ndx; #ifdef HASH_STATISTICS hash_expansions++; #endif new_bucket = ++hashp->MAX_BUCKET; old_bucket = (hashp->MAX_BUCKET & hashp->LOW_MASK); new_segnum = new_bucket >> hashp->SSHIFT; /* Check if we need a new segment */ if (new_segnum >= hashp->nsegs) { /* Check if we need to expand directory */ if (new_segnum >= hashp->DSIZE) { /* Reallocate directory */ dirsize = hashp->DSIZE * sizeof(SEGMENT *); if (!hash_realloc(&hashp->dir, dirsize, dirsize << 1)) return (-1); hashp->DSIZE = dirsize << 1; } if ((hashp->dir[new_segnum] = (SEGMENT)calloc(hashp->SGSIZE, sizeof(SEGMENT))) == NULL) return (-1); hashp->exsegs++; hashp->nsegs++; } /* * If the split point is increasing (MAX_BUCKET's log base 2 * * increases), we need to copy the current contents of the spare * split bucket to the next bucket. */ spare_ndx = __hash_log2(hashp->MAX_BUCKET + 1); if (spare_ndx > hashp->OVFL_POINT) { hashp->SPARES[spare_ndx] = hashp->SPARES[hashp->OVFL_POINT]; hashp->OVFL_POINT = spare_ndx; } if (new_bucket > (u_int32_t) hashp->HIGH_MASK) { /* Starting a new doubling */ hashp->LOW_MASK = hashp->HIGH_MASK; hashp->HIGH_MASK = new_bucket | hashp->LOW_MASK; } /* Relocate records to the new bucket */ return (__split_page(hashp, old_bucket, new_bucket)); } /* * If realloc guarantees that the pointer is not destroyed if the realloc * fails, then this routine can go away. */ static void * hash_realloc(p_ptr, oldsize, newsize) SEGMENT **p_ptr; int oldsize, newsize; { register void *p; if ((p = malloc(newsize))) { memmove(p, *p_ptr, oldsize); memset((char *)p + oldsize, 0, newsize - oldsize); free(*p_ptr); *p_ptr = p; } return (p); } extern u_int32_t __call_hash(hashp, k, len) HTAB *hashp; char *k; int len; { int n, bucket; n = hashp->hash(k, len); bucket = n & hashp->HIGH_MASK; if (bucket > hashp->MAX_BUCKET) bucket = bucket & hashp->LOW_MASK; return (bucket); } /* * Allocate segment table. On error, destroy the table and set errno. * * Returns 0 on success */ static int alloc_segs(hashp, nsegs) HTAB *hashp; int nsegs; { register int i; register SEGMENT store; int save_errno; if ((hashp->dir = (SEGMENT *)calloc(hashp->DSIZE, sizeof(SEGMENT *))) == NULL) { save_errno = errno; (void)hdestroy(hashp); errno = save_errno; return (-1); } /* Allocate segments */ if ((store = (SEGMENT)calloc(nsegs << hashp->SSHIFT, sizeof(SEGMENT))) == NULL) { save_errno = errno; (void)hdestroy(hashp); errno = save_errno; return (-1); } for (i = 0; i < nsegs; i++, hashp->nsegs++) hashp->dir[i] = &store[i << hashp->SSHIFT]; return (0); } #if BYTE_ORDER == LITTLE_ENDIAN /* * Hashp->hdr needs to be byteswapped. */ static void swap_header_copy(srcp, destp) HASHHDR *srcp, *destp; { int i; P_32_COPY(srcp->magic, destp->magic); P_32_COPY(srcp->version, destp->version); P_32_COPY(srcp->lorder, destp->lorder); P_32_COPY(srcp->bsize, destp->bsize); P_32_COPY(srcp->bshift, destp->bshift); P_32_COPY(srcp->dsize, destp->dsize); P_32_COPY(srcp->ssize, destp->ssize); P_32_COPY(srcp->sshift, destp->sshift); P_32_COPY(srcp->ovfl_point, destp->ovfl_point); P_32_COPY(srcp->last_freed, destp->last_freed); P_32_COPY(srcp->max_bucket, destp->max_bucket); P_32_COPY(srcp->high_mask, destp->high_mask); P_32_COPY(srcp->low_mask, destp->low_mask); P_32_COPY(srcp->ffactor, destp->ffactor); P_32_COPY(srcp->nkeys, destp->nkeys); P_32_COPY(srcp->hdrpages, destp->hdrpages); P_32_COPY(srcp->h_charkey, destp->h_charkey); for (i = 0; i < NCACHED; i++) { P_32_COPY(srcp->spares[i], destp->spares[i]); P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]); } } static void swap_header(hashp) HTAB *hashp; { HASHHDR *hdrp; int i; hdrp = &hashp->hdr; M_32_SWAP(hdrp->magic); M_32_SWAP(hdrp->version); M_32_SWAP(hdrp->lorder); M_32_SWAP(hdrp->bsize); M_32_SWAP(hdrp->bshift); M_32_SWAP(hdrp->dsize); M_32_SWAP(hdrp->ssize); M_32_SWAP(hdrp->sshift); M_32_SWAP(hdrp->ovfl_point); M_32_SWAP(hdrp->last_freed); M_32_SWAP(hdrp->max_bucket); M_32_SWAP(hdrp->high_mask); M_32_SWAP(hdrp->low_mask); M_32_SWAP(hdrp->ffactor); M_32_SWAP(hdrp->nkeys); M_32_SWAP(hdrp->hdrpages); M_32_SWAP(hdrp->h_charkey); for (i = 0; i < NCACHED; i++) { M_32_SWAP(hdrp->spares[i]); M_16_SWAP(hdrp->bitmaps[i]); } } #endif db/hash/extern.h0100644000076400007640000000632406343257365014341 0ustar cjwatsoncjwatson/*- * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)extern.h 8.4 (Berkeley) 6/16/94 */ BUFHEAD *__add_ovflpage __P((HTAB *, BUFHEAD *)); int __addel __P((HTAB *, BUFHEAD *, const DBT *, const DBT *)); int __big_delete __P((HTAB *, BUFHEAD *)); int __big_insert __P((HTAB *, BUFHEAD *, const DBT *, const DBT *)); int __big_keydata __P((HTAB *, BUFHEAD *, DBT *, DBT *, int)); int __big_return __P((HTAB *, BUFHEAD *, int, DBT *, int)); int __big_split __P((HTAB *, BUFHEAD *, BUFHEAD *, BUFHEAD *, int, u_int32_t, SPLIT_RETURN *)); int __buf_free __P((HTAB *, int, int)); void __buf_init __P((HTAB *, int)); u_int32_t __call_hash __P((HTAB *, char *, int)); int __delpair __P((HTAB *, BUFHEAD *, int)); int __expand_table __P((HTAB *)); int __find_bigpair __P((HTAB *, BUFHEAD *, int, char *, int)); u_int16_t __find_last_page __P((HTAB *, BUFHEAD **)); void __free_ovflpage __P((HTAB *, BUFHEAD *)); BUFHEAD *__get_buf __P((HTAB *, u_int32_t, BUFHEAD *, int)); int __get_page __P((HTAB *, char *, u_int32_t, int, int, int)); int __ibitmap __P((HTAB *, int, int, int)); u_int32_t __hash_log2 __P((u_int32_t)); int __put_page __P((HTAB *, char *, u_int32_t, int, int)); void __reclaim_buf __P((HTAB *, BUFHEAD *)); int __split_page __P((HTAB *, u_int32_t, u_int32_t)); /* Default hash routine. */ extern u_int32_t (*__default_hash) __P((const void *, size_t)); #ifdef HASH_STATISTICS extern int hash_accesses, hash_collisions, hash_expansions, hash_overflows; #endif db/hash/hash.h0100644000076400007640000002374506343257371013762 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)hash.h 8.3 (Berkeley) 5/31/94 */ /* Operations */ typedef enum { HASH_GET, HASH_PUT, HASH_PUTNEW, HASH_DELETE, HASH_FIRST, HASH_NEXT } ACTION; /* Buffer Management structures */ typedef struct _bufhead BUFHEAD; struct _bufhead { BUFHEAD *prev; /* LRU links */ BUFHEAD *next; /* LRU links */ BUFHEAD *ovfl; /* Overflow page buffer header */ u_int32_t addr; /* Address of this page */ char *page; /* Actual page data */ char flags; #define BUF_MOD 0x0001 #define BUF_DISK 0x0002 #define BUF_BUCKET 0x0004 #define BUF_PIN 0x0008 }; #define IS_BUCKET(X) ((X) & BUF_BUCKET) typedef BUFHEAD **SEGMENT; /* Hash Table Information */ typedef struct hashhdr { /* Disk resident portion */ int magic; /* Magic NO for hash tables */ int version; /* Version ID */ u_int32_t lorder; /* Byte Order */ int bsize; /* Bucket/Page Size */ int bshift; /* Bucket shift */ int dsize; /* Directory Size */ int ssize; /* Segment Size */ int sshift; /* Segment shift */ int ovfl_point; /* Where overflow pages are being * allocated */ int last_freed; /* Last overflow page freed */ int max_bucket; /* ID of Maximum bucket in use */ int high_mask; /* Mask to modulo into entire table */ int low_mask; /* Mask to modulo into lower half of * table */ int ffactor; /* Fill factor */ int nkeys; /* Number of keys in hash table */ int hdrpages; /* Size of table header */ int h_charkey; /* value of hash(CHARKEY) */ #define NCACHED 32 /* number of bit maps and spare * points */ int spares[NCACHED];/* spare pages for overflow */ u_int16_t bitmaps[NCACHED]; /* address of overflow page * bitmaps */ } HASHHDR; typedef struct htab { /* Memory resident data structure */ HASHHDR hdr; /* Header */ int nsegs; /* Number of allocated segments */ int exsegs; /* Number of extra allocated * segments */ u_int32_t /* Hash function */ (*hash)__P((const void *, size_t)); int flags; /* Flag values */ int fp; /* File pointer */ char *tmp_buf; /* Temporary Buffer for BIG data */ char *tmp_key; /* Temporary Buffer for BIG keys */ BUFHEAD *cpage; /* Current page */ int cbucket; /* Current bucket */ int cndx; /* Index of next item on cpage */ int errnum; /* Error Number -- for DBM * compatibility */ int new_file; /* Indicates if fd is backing store * or no */ int save_file; /* Indicates whether we need to flush * file at * exit */ u_int32_t *mapp[NCACHED]; /* Pointers to page maps */ int nmaps; /* Initial number of bitmaps */ int nbufs; /* Number of buffers left to * allocate */ BUFHEAD bufhead; /* Header of buffer lru list */ SEGMENT *dir; /* Hash Bucket directory */ } HTAB; /* * Constants */ #define MAX_BSIZE 65536 /* 2^16 */ #define MIN_BUFFERS 6 #define MINHDRSIZE 512 #define DEF_BUFSIZE 65536 /* 64 K */ #define DEF_BUCKET_SIZE 4096 #define DEF_BUCKET_SHIFT 12 /* log2(BUCKET) */ #define DEF_SEGSIZE 256 #define DEF_SEGSIZE_SHIFT 8 /* log2(SEGSIZE) */ #define DEF_DIRSIZE 256 #define DEF_FFACTOR 65536 #define MIN_FFACTOR 4 #define SPLTMAX 8 #define CHARKEY "%$sniglet^&" #define NUMKEY 1038583 #define BYTE_SHIFT 3 #define INT_TO_BYTE 2 #define INT_BYTE_SHIFT 5 #define ALL_SET ((u_int32_t)0xFFFFFFFF) #define ALL_CLEAR 0 #define PTROF(X) ((BUFHEAD *)((ptrdiff_t)(X)&~0x3)) #define ISMOD(X) ((u_int32_t)(ptrdiff_t)(X)&0x1) #define DOMOD(X) ((X) = (char *)((ptrdiff_t)(X)|0x1)) #define ISDISK(X) ((u_int32_t)(ptrdiff_t)(X)&0x2) #define DODISK(X) ((X) = (char *)((ptrdiff_t)(X)|0x2)) #define BITS_PER_MAP 32 /* Given the address of the beginning of a big map, clear/set the nth bit */ #define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP))) #define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP))) #define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP))) /* Overflow management */ /* * Overflow page numbers are allocated per split point. At each doubling of * the table, we can allocate extra pages. So, an overflow page number has * the top 5 bits indicate which split point and the lower 11 bits indicate * which page at that split point is indicated (pages within split points are * numberered starting with 1). */ #define SPLITSHIFT 11 #define SPLITMASK 0x7FF #define SPLITNUM(N) (((u_int32_t)(N)) >> SPLITSHIFT) #define OPAGENUM(N) ((N) & SPLITMASK) #define OADDR_OF(S,O) ((u_int32_t)((u_int32_t)(S) << SPLITSHIFT) + (O)) #define BUCKET_TO_PAGE(B) \ (B) + hashp->HDRPAGES + ((B) ? hashp->SPARES[__hash_log2((B)+1)-1] : 0) #define OADDR_TO_PAGE(B) \ BUCKET_TO_PAGE ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B)); /* * page.h contains a detailed description of the page format. * * Normally, keys and data are accessed from offset tables in the top of * each page which point to the beginning of the key and data. There are * four flag values which may be stored in these offset tables which indicate * the following: * * * OVFLPAGE Rather than a key data pair, this pair contains * the address of an overflow page. The format of * the pair is: * OVERFLOW_PAGE_NUMBER OVFLPAGE * * PARTIAL_KEY This must be the first key/data pair on a page * and implies that page contains only a partial key. * That is, the key is too big to fit on a single page * so it starts on this page and continues on the next. * The format of the page is: * KEY_OFF PARTIAL_KEY OVFL_PAGENO OVFLPAGE * * KEY_OFF -- offset of the beginning of the key * PARTIAL_KEY -- 1 * OVFL_PAGENO - page number of the next overflow page * OVFLPAGE -- 0 * * FULL_KEY This must be the first key/data pair on the page. It * is used in two cases. * * Case 1: * There is a complete key on the page but no data * (because it wouldn't fit). The next page contains * the data. * * Page format it: * KEY_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE * * KEY_OFF -- offset of the beginning of the key * FULL_KEY -- 2 * OVFL_PAGENO - page number of the next overflow page * OVFLPAGE -- 0 * * Case 2: * This page contains no key, but part of a large * data field, which is continued on the next page. * * Page format it: * DATA_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE * * KEY_OFF -- offset of the beginning of the data on * this page * FULL_KEY -- 2 * OVFL_PAGENO - page number of the next overflow page * OVFLPAGE -- 0 * * FULL_KEY_DATA * This must be the first key/data pair on the page. * There are two cases: * * Case 1: * This page contains a key and the beginning of the * data field, but the data field is continued on the * next page. * * Page format is: * KEY_OFF FULL_KEY_DATA OVFL_PAGENO DATA_OFF * * KEY_OFF -- offset of the beginning of the key * FULL_KEY_DATA -- 3 * OVFL_PAGENO - page number of the next overflow page * DATA_OFF -- offset of the beginning of the data * * Case 2: * This page contains the last page of a big data pair. * There is no key, only the tail end of the data * on this page. * * Page format is: * DATA_OFF FULL_KEY_DATA * * DATA_OFF -- offset of the beginning of the data on * this page * FULL_KEY_DATA -- 3 * OVFL_PAGENO - page number of the next overflow page * OVFLPAGE -- 0 * * OVFL_PAGENO and OVFLPAGE are optional (they are * not present if there is no next page). */ #define OVFLPAGE 0 #define PARTIAL_KEY 1 #define FULL_KEY 2 #define FULL_KEY_DATA 3 #define REAL_KEY 4 /* Short hands for accessing structure */ #define BSIZE hdr.bsize #define BSHIFT hdr.bshift #define DSIZE hdr.dsize #define SGSIZE hdr.ssize #define SSHIFT hdr.sshift #define LORDER hdr.lorder #define OVFL_POINT hdr.ovfl_point #define LAST_FREED hdr.last_freed #define MAX_BUCKET hdr.max_bucket #define FFACTOR hdr.ffactor #define HIGH_MASK hdr.high_mask #define LOW_MASK hdr.low_mask #define NKEYS hdr.nkeys #define HDRPAGES hdr.hdrpages #define SPARES hdr.spares #define BITMAPS hdr.bitmaps #define VERSION hdr.version #define MAGIC hdr.magic #define NEXT_FREE hdr.next_free #define H_CHARKEY hdr.h_charkey db/hash/page.h0100644000076400007640000000705506072171255013742 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)page.h 8.2 (Berkeley) 5/31/94 */ /* * Definitions for hashing page file format. */ /* * routines dealing with a data page * * page format: * +------------------------------+ * p | n | keyoff | datoff | keyoff | * +------------+--------+--------+ * | datoff | free | ptr | --> | * +--------+---------------------+ * | F R E E A R E A | * +--------------+---------------+ * | <---- - - - | data | * +--------+-----+----+----------+ * | key | data | key | * +--------+----------+----------+ * * Pointer to the free space is always: p[p[0] + 2] * Amount of free space on the page is: p[p[0] + 1] */ /* * How many bytes required for this pair? * 2 shorts in the table at the top of the page + room for the * key and room for the data * * We prohibit entering a pair on a page unless there is also room to append * an overflow page. The reason for this it that you can get in a situation * where a single key/data pair fits on a page, but you can't append an * overflow page and later you'd have to split the key/data and handle like * a big pair. * You might as well do this up front. */ #define PAIRSIZE(K,D) (2*sizeof(u_int16_t) + (K)->size + (D)->size) #define BIGOVERHEAD (4*sizeof(u_int16_t)) #define KEYSIZE(K) (4*sizeof(u_int16_t) + (K)->size); #define OVFLSIZE (2*sizeof(u_int16_t)) #define FREESPACE(P) ((P)[(P)[0]+1]) #define OFFSET(P) ((P)[(P)[0]+2]) #define PAIRFITS(P,K,D) \ (((P)[2] >= REAL_KEY) && \ (PAIRSIZE((K),(D)) + OVFLSIZE) <= FREESPACE((P))) #define PAGE_META(N) (((N)+3) * sizeof(u_int16_t)) typedef struct { BUFHEAD *newp; BUFHEAD *oldp; BUFHEAD *nextp; u_int16_t next_addr; } SPLIT_RETURN; db/hash/hash_bigkey.c0100644000076400007640000004000706602177366015300 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; #endif /* LIBC_SCCS and not lint */ /* * PACKAGE: hash * DESCRIPTION: * Big key/data handling for the hashing package. * * ROUTINES: * External * __big_keydata * __big_split * __big_insert * __big_return * __big_delete * __find_last_page * Internal * collect_key * collect_data */ #include #include #include #include #include #ifdef DEBUG #include #endif #include #include "hash.h" #include "page.h" #include "extern.h" static int collect_key __P((HTAB *, BUFHEAD *, int, DBT *, int)); static int collect_data __P((HTAB *, BUFHEAD *, int, int)); /* * Big_insert * * You need to do an insert and the key/data pair is too big * * Returns: * 0 ==> OK *-1 ==> ERROR */ extern int __big_insert(hashp, bufp, key, val) HTAB *hashp; BUFHEAD *bufp; const DBT *key, *val; { register u_int16_t *p; int key_size, n, val_size; u_int16_t space, move_bytes, off; char *cp, *key_data, *val_data; cp = bufp->page; /* Character pointer of p. */ p = (u_int16_t *)cp; key_data = (char *)key->data; key_size = key->size; val_data = (char *)val->data; val_size = val->size; /* First move the Key */ for (space = FREESPACE(p) - BIGOVERHEAD; key_size; space = FREESPACE(p) - BIGOVERHEAD) { move_bytes = MIN(space, key_size); off = OFFSET(p) - move_bytes; memmove(cp + off, key_data, move_bytes); key_size -= move_bytes; key_data += move_bytes; n = p[0]; p[++n] = off; p[0] = ++n; FREESPACE(p) = off - PAGE_META(n); OFFSET(p) = off; p[n] = PARTIAL_KEY; bufp = __add_ovflpage(hashp, bufp); if (!bufp) return (-1); n = p[0]; if (!key_size) { if (FREESPACE(p)) { move_bytes = MIN(FREESPACE(p), val_size); off = OFFSET(p) - move_bytes; p[n] = off; memmove(cp + off, val_data, move_bytes); val_data += move_bytes; val_size -= move_bytes; p[n - 2] = FULL_KEY_DATA; FREESPACE(p) = FREESPACE(p) - move_bytes; OFFSET(p) = off; } else p[n - 2] = FULL_KEY; } p = (u_int16_t *)bufp->page; cp = bufp->page; bufp->flags |= BUF_MOD; } /* Now move the data */ for (space = FREESPACE(p) - BIGOVERHEAD; val_size; space = FREESPACE(p) - BIGOVERHEAD) { move_bytes = MIN(space, val_size); /* * Here's the hack to make sure that if the data ends on the * same page as the key ends, FREESPACE is at least one. */ if ((int) space == val_size && (size_t) val_size == val->size) move_bytes--; off = OFFSET(p) - move_bytes; memmove(cp + off, val_data, move_bytes); val_size -= move_bytes; val_data += move_bytes; n = p[0]; p[++n] = off; p[0] = ++n; FREESPACE(p) = off - PAGE_META(n); OFFSET(p) = off; if (val_size) { p[n] = FULL_KEY; bufp = __add_ovflpage(hashp, bufp); if (!bufp) return (-1); cp = bufp->page; p = (u_int16_t *)cp; } else p[n] = FULL_KEY_DATA; bufp->flags |= BUF_MOD; } return (0); } /* * Called when bufp's page contains a partial key (index should be 1) * * All pages in the big key/data pair except bufp are freed. We cannot * free bufp because the page pointing to it is lost and we can't get rid * of its pointer. * * Returns: * 0 => OK *-1 => ERROR */ extern int __big_delete(hashp, bufp) HTAB *hashp; BUFHEAD *bufp; { register BUFHEAD *last_bfp, *rbufp; u_int16_t *bp, pageno; int key_done, n; rbufp = bufp; last_bfp = NULL; bp = (u_int16_t *)bufp->page; pageno = 0; key_done = 0; while (!key_done || (bp[2] != FULL_KEY_DATA)) { if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) key_done = 1; /* * If there is freespace left on a FULL_KEY_DATA page, then * the data is short and fits entirely on this page, and this * is the last page. */ if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) break; pageno = bp[bp[0] - 1]; rbufp->flags |= BUF_MOD; rbufp = __get_buf(hashp, pageno, rbufp, 0); if (last_bfp) __free_ovflpage(hashp, last_bfp); last_bfp = rbufp; if (!rbufp) return (-1); /* Error. */ bp = (u_int16_t *)rbufp->page; } /* * If we get here then rbufp points to the last page of the big * key/data pair. Bufp points to the first one -- it should now be * empty pointing to the next page after this pair. Can't free it * because we don't have the page pointing to it. */ /* This is information from the last page of the pair. */ n = bp[0]; pageno = bp[n - 1]; /* Now, bp is the first page of the pair. */ bp = (u_int16_t *)bufp->page; if (n > 2) { /* There is an overflow page. */ bp[1] = pageno; bp[2] = OVFLPAGE; bufp->ovfl = rbufp->ovfl; } else /* This is the last page. */ bufp->ovfl = NULL; n -= 2; bp[0] = n; FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); OFFSET(bp) = hashp->BSIZE - 1; bufp->flags |= BUF_MOD; if (rbufp) __free_ovflpage(hashp, rbufp); if (last_bfp && last_bfp != rbufp) __free_ovflpage(hashp, last_bfp); hashp->NKEYS--; return (0); } /* * Returns: * 0 = key not found * -1 = get next overflow page * -2 means key not found and this is big key/data * -3 error */ extern int __find_bigpair(hashp, bufp, ndx, key, size) HTAB *hashp; BUFHEAD *bufp; int ndx; char *key; int size; { register u_int16_t *bp; register char *p; int ksize; u_int16_t bytes; char *kkey; bp = (u_int16_t *)bufp->page; p = bufp->page; ksize = size; kkey = key; for (bytes = hashp->BSIZE - bp[ndx]; bytes <= size && bp[ndx + 1] == PARTIAL_KEY; bytes = hashp->BSIZE - bp[ndx]) { if (memcmp(p + bp[ndx], kkey, bytes)) return (-2); kkey += bytes; ksize -= bytes; bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); if (!bufp) return (-3); p = bufp->page; bp = (u_int16_t *)p; ndx = 1; } if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { #ifdef HASH_STATISTICS ++hash_collisions; #endif return (-2); } else return (ndx); } /* * Given the buffer pointer of the first overflow page of a big pair, * find the end of the big pair * * This will set bpp to the buffer header of the last page of the big pair. * It will return the pageno of the overflow page following the last page * of the pair; 0 if there isn't any (i.e. big pair is the last key in the * bucket) */ extern u_int16_t __find_last_page(hashp, bpp) HTAB *hashp; BUFHEAD **bpp; { BUFHEAD *bufp; u_int16_t *bp, pageno; int n; bufp = *bpp; bp = (u_int16_t *)bufp->page; for (;;) { n = bp[0]; /* * This is the last page if: the tag is FULL_KEY_DATA and * either only 2 entries OVFLPAGE marker is explicit there * is freespace on the page. */ if (bp[2] == FULL_KEY_DATA && ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) break; pageno = bp[n - 1]; bufp = __get_buf(hashp, pageno, bufp, 0); if (!bufp) return (0); /* Need to indicate an error! */ bp = (u_int16_t *)bufp->page; } *bpp = bufp; if (bp[0] > 2) return (bp[3]); else return (0); } /* * Return the data for the key/data pair that begins on this page at this * index (index should always be 1). */ extern int __big_return(hashp, bufp, ndx, val, set_current) HTAB *hashp; BUFHEAD *bufp; int ndx; DBT *val; int set_current; { BUFHEAD *save_p; u_int16_t *bp, len, off, save_addr; char *tp; bp = (u_int16_t *)bufp->page; while (bp[ndx + 1] == PARTIAL_KEY) { bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (u_int16_t *)bufp->page; ndx = 1; } if (bp[ndx + 1] == FULL_KEY) { bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (u_int16_t *)bufp->page; save_p = bufp; save_addr = save_p->addr; off = bp[1]; len = 0; } else if (!FREESPACE(bp)) { /* * This is a hack. We can't distinguish between * FULL_KEY_DATA that contains complete data or * incomplete data, so we require that if the data * is complete, there is at least 1 byte of free * space left. */ off = bp[bp[0]]; len = bp[1] - off; save_p = bufp; save_addr = bufp->addr; bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (u_int16_t *)bufp->page; } else { /* The data is all on one page. */ tp = (char *)bp; off = bp[bp[0]]; val->data = (u_char *)tp + off; val->size = bp[1] - off; if (set_current) { if (bp[0] == 2) { /* No more buckets in * chain */ hashp->cpage = NULL; hashp->cbucket++; hashp->cndx = 1; } else { hashp->cpage = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!hashp->cpage) return (-1); hashp->cndx = 1; if (!((u_int16_t *) hashp->cpage->page)[0]) { hashp->cbucket++; hashp->cpage = NULL; } } } return (0); } val->size = collect_data(hashp, bufp, (int)len, set_current); if (val->size == (size_t) -1) return (-1); if (save_p->addr != save_addr) { /* We are pretty short on buffers. */ errno = EINVAL; /* OUT OF BUFFERS */ return (-1); } memmove(hashp->tmp_buf, (save_p->page) + off, len); val->data = (u_char *)hashp->tmp_buf; return (0); } /* * Count how big the total datasize is by recursing through the pages. Then * allocate a buffer and copy the data as you recurse up. */ static int collect_data(hashp, bufp, len, set) HTAB *hashp; BUFHEAD *bufp; int len, set; { register u_int16_t *bp; register char *p; BUFHEAD *xbp; u_int16_t save_addr; int mylen, totlen; p = bufp->page; bp = (u_int16_t *)p; mylen = hashp->BSIZE - bp[1]; save_addr = bufp->addr; if (bp[2] == FULL_KEY_DATA) { /* End of Data */ totlen = len + mylen; if (hashp->tmp_buf) free(hashp->tmp_buf); if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) return (-1); if (set) { hashp->cndx = 1; if (bp[0] == 2) { /* No more buckets in chain */ hashp->cpage = NULL; hashp->cbucket++; } else { hashp->cpage = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!hashp->cpage) return (-1); else if (!((u_int16_t *)hashp->cpage->page)[0]) { hashp->cbucket++; hashp->cpage = NULL; } } } } else { xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!xbp || ((totlen = collect_data(hashp, xbp, len + mylen, set)) < 1)) return (-1); } if (bufp->addr != save_addr) { errno = EINVAL; /* Out of buffers. */ return (-1); } memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); return (totlen); } /* * Fill in the key and data for this big pair. */ extern int __big_keydata(hashp, bufp, key, val, set) HTAB *hashp; BUFHEAD *bufp; DBT *key, *val; int set; { key->size = collect_key(hashp, bufp, 0, val, set); if (key->size == (size_t) -1) return (-1); key->data = (u_char *)hashp->tmp_key; return (0); } /* * Count how big the total key size is by recursing through the pages. Then * collect the data, allocate a buffer and copy the key as you recurse up. */ static int collect_key(hashp, bufp, len, val, set) HTAB *hashp; BUFHEAD *bufp; int len; DBT *val; int set; { BUFHEAD *xbp; char *p; int mylen, totlen; u_int16_t *bp, save_addr; p = bufp->page; bp = (u_int16_t *)p; mylen = hashp->BSIZE - bp[1]; save_addr = bufp->addr; totlen = len + mylen; if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ if (hashp->tmp_key != NULL) free(hashp->tmp_key); if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) return (-1); if (__big_return(hashp, bufp, 1, val, set)) return (-1); } else { xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!xbp || ((totlen = collect_key(hashp, xbp, totlen, val, set)) < 1)) return (-1); } if (bufp->addr != save_addr) { errno = EINVAL; /* MIS -- OUT OF BUFFERS */ return (-1); } memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); return (totlen); } /* * Returns: * 0 => OK * -1 => error */ extern int __big_split(hashp, op, np, big_keyp, addr, obucket, ret) HTAB *hashp; BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */ BUFHEAD *np; /* Pointer to new bucket page */ /* Pointer to first page containing the big key/data */ BUFHEAD *big_keyp; int addr; /* Address of big_keyp */ u_int32_t obucket;/* Old Bucket */ SPLIT_RETURN *ret; { register BUFHEAD *tmpp; register u_int16_t *tp; BUFHEAD *bp; DBT key, val; u_int32_t change; u_int16_t free_space, n, off; bp = big_keyp; /* Now figure out where the big key/data goes */ if (__big_keydata(hashp, big_keyp, &key, &val, 0)) return (-1); change = (__call_hash(hashp, key.data, key.size) != obucket); if ((ret->next_addr = __find_last_page(hashp, &big_keyp))) { if (!(ret->nextp = __get_buf(hashp, ret->next_addr, big_keyp, 0))) return (-1);; } else ret->nextp = NULL; /* Now make one of np/op point to the big key/data pair */ #ifdef DEBUG assert(np->ovfl == NULL); #endif if (change) tmpp = np; else tmpp = op; tmpp->flags |= BUF_MOD; #ifdef DEBUG1 (void)fprintf(stderr, "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); #endif tmpp->ovfl = bp; /* one of op/np point to big_keyp */ tp = (u_int16_t *)tmpp->page; #ifdef DEBUG assert(FREESPACE(tp) >= OVFLSIZE); #endif n = tp[0]; off = OFFSET(tp); free_space = FREESPACE(tp); tp[++n] = (u_int16_t)addr; tp[++n] = OVFLPAGE; tp[0] = n; OFFSET(tp) = off; FREESPACE(tp) = free_space - OVFLSIZE; /* * Finally, set the new and old return values. BIG_KEYP contains a * pointer to the last page of the big key_data pair. Make sure that * big_keyp has no following page (2 elements) or create an empty * following page. */ ret->newp = np; ret->oldp = op; tp = (u_int16_t *)big_keyp->page; big_keyp->flags |= BUF_MOD; if (tp[0] > 2) { /* * There may be either one or two offsets on this page. If * there is one, then the overflow page is linked on normally * and tp[4] is OVFLPAGE. If there are two, tp[4] contains * the second offset and needs to get stuffed in after the * next overflow page is added. */ n = tp[4]; free_space = FREESPACE(tp); off = OFFSET(tp); tp[0] -= 2; FREESPACE(tp) = free_space + OVFLSIZE; OFFSET(tp) = off; tmpp = __add_ovflpage(hashp, big_keyp); if (!tmpp) return (-1); tp[4] = n; } else tmpp = big_keyp; if (change) ret->newp = tmpp; else ret->oldp = tmpp; return (0); } db/hash/hash_buf.c0100644000076400007640000002204206072171243014566 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)hash_buf.c 8.5 (Berkeley) 7/15/94"; #endif /* LIBC_SCCS and not lint */ /* * PACKAGE: hash * * DESCRIPTION: * Contains buffer management * * ROUTINES: * External * __buf_init * __get_buf * __buf_free * __reclaim_buf * Internal * newbuf */ #include #include #include #include #include #ifdef DEBUG #include #endif #include #include "hash.h" #include "page.h" #include "extern.h" static BUFHEAD *newbuf __P((HTAB *, u_int32_t, BUFHEAD *)); /* Unlink B from its place in the lru */ #define BUF_REMOVE(B) { \ (B)->prev->next = (B)->next; \ (B)->next->prev = (B)->prev; \ } /* Insert B after P */ #define BUF_INSERT(B, P) { \ (B)->next = (P)->next; \ (B)->prev = (P); \ (P)->next = (B); \ (B)->next->prev = (B); \ } #define MRU hashp->bufhead.next #define LRU hashp->bufhead.prev #define MRU_INSERT(B) BUF_INSERT((B), &hashp->bufhead) #define LRU_INSERT(B) BUF_INSERT((B), LRU) /* * We are looking for a buffer with address "addr". If prev_bp is NULL, then * address is a bucket index. If prev_bp is not NULL, then it points to the * page previous to an overflow page that we are trying to find. * * CAVEAT: The buffer header accessed via prev_bp's ovfl field may no longer * be valid. Therefore, you must always verify that its address matches the * address you are seeking. */ extern BUFHEAD * __get_buf(hashp, addr, prev_bp, newpage) HTAB *hashp; u_int32_t addr; BUFHEAD *prev_bp; int newpage; /* If prev_bp set, indicates a new overflow page. */ { register BUFHEAD *bp; register u_int32_t is_disk_mask; register int is_disk, segment_ndx; SEGMENT segp; is_disk = 0; is_disk_mask = 0; if (prev_bp) { bp = prev_bp->ovfl; if (!bp || (bp->addr != addr)) bp = NULL; if (!newpage) is_disk = BUF_DISK; } else { /* Grab buffer out of directory */ segment_ndx = addr & (hashp->SGSIZE - 1); /* valid segment ensured by __call_hash() */ segp = hashp->dir[addr >> hashp->SSHIFT]; #ifdef DEBUG assert(segp != NULL); #endif bp = PTROF(segp[segment_ndx]); is_disk_mask = ISDISK(segp[segment_ndx]); is_disk = is_disk_mask || !hashp->new_file; } if (!bp) { bp = newbuf(hashp, addr, prev_bp); if (!bp || __get_page(hashp, bp->page, addr, !prev_bp, is_disk, 0)) return (NULL); if (!prev_bp) segp[segment_ndx] = (BUFHEAD *)((ptrdiff_t)bp | is_disk_mask); } else { BUF_REMOVE(bp); MRU_INSERT(bp); } return (bp); } /* * We need a buffer for this page. Either allocate one, or evict a resident * one (if we have as many buffers as we're allowed) and put this one in. * * If newbuf finds an error (returning NULL), it also sets errno. */ static BUFHEAD * newbuf(hashp, addr, prev_bp) HTAB *hashp; u_int32_t addr; BUFHEAD *prev_bp; { register BUFHEAD *bp; /* The buffer we're going to use */ register BUFHEAD *xbp; /* Temp pointer */ register BUFHEAD *next_xbp; SEGMENT segp; int segment_ndx; u_int16_t oaddr, *shortp; oaddr = 0; bp = LRU; /* * If LRU buffer is pinned, the buffer pool is too small. We need to * allocate more buffers. */ if (hashp->nbufs || (bp->flags & BUF_PIN)) { /* Allocate a new one */ if ((bp = (BUFHEAD *)malloc(sizeof(BUFHEAD))) == NULL) return (NULL); #ifdef PURIFY memset(bp, 0xff, sizeof(BUFHEAD)); #endif if ((bp->page = (char *)malloc(hashp->BSIZE)) == NULL) { free(bp); return (NULL); } #ifdef PURIFY memset(bp->page, 0xff, hashp->BSIZE); #endif if (hashp->nbufs) hashp->nbufs--; } else { /* Kick someone out */ BUF_REMOVE(bp); /* * If this is an overflow page with addr 0, it's already been * flushed back in an overflow chain and initialized. */ if ((bp->addr != 0) || (bp->flags & BUF_BUCKET)) { /* * Set oaddr before __put_page so that you get it * before bytes are swapped. */ shortp = (u_int16_t *)bp->page; if (shortp[0]) oaddr = shortp[shortp[0] - 1]; if ((bp->flags & BUF_MOD) && __put_page(hashp, bp->page, bp->addr, (int)IS_BUCKET(bp->flags), 0)) return (NULL); /* * Update the pointer to this page (i.e. invalidate it). * * If this is a new file (i.e. we created it at open * time), make sure that we mark pages which have been * written to disk so we retrieve them from disk later, * rather than allocating new pages. */ if (IS_BUCKET(bp->flags)) { segment_ndx = bp->addr & (hashp->SGSIZE - 1); segp = hashp->dir[bp->addr >> hashp->SSHIFT]; #ifdef DEBUG assert(segp != NULL); #endif if (hashp->new_file && ((bp->flags & BUF_MOD) || ISDISK(segp[segment_ndx]))) segp[segment_ndx] = (BUFHEAD *)BUF_DISK; else segp[segment_ndx] = NULL; } /* * Since overflow pages can only be access by means of * their bucket, free overflow pages associated with * this bucket. */ for (xbp = bp; xbp->ovfl;) { next_xbp = xbp->ovfl; xbp->ovfl = 0; xbp = next_xbp; /* Check that ovfl pointer is up date. */ if (IS_BUCKET(xbp->flags) || (oaddr != xbp->addr)) break; shortp = (u_int16_t *)xbp->page; if (shortp[0]) /* set before __put_page */ oaddr = shortp[shortp[0] - 1]; if ((xbp->flags & BUF_MOD) && __put_page(hashp, xbp->page, xbp->addr, 0, 0)) return (NULL); xbp->addr = 0; xbp->flags = 0; BUF_REMOVE(xbp); LRU_INSERT(xbp); } } } /* Now assign this buffer */ bp->addr = addr; #ifdef DEBUG1 (void)fprintf(stderr, "NEWBUF1: %d->ovfl was %d is now %d\n", bp->addr, (bp->ovfl ? bp->ovfl->addr : 0), 0); #endif bp->ovfl = NULL; if (prev_bp) { /* * If prev_bp is set, this is an overflow page, hook it in to * the buffer overflow links. */ #ifdef DEBUG1 (void)fprintf(stderr, "NEWBUF2: %d->ovfl was %d is now %d\n", prev_bp->addr, (prev_bp->ovfl ? bp->ovfl->addr : 0), (bp ? bp->addr : 0)); #endif prev_bp->ovfl = bp; bp->flags = 0; } else bp->flags = BUF_BUCKET; MRU_INSERT(bp); return (bp); } extern void __buf_init(hashp, nbytes) HTAB *hashp; int nbytes; { BUFHEAD *bfp; int npages; bfp = &(hashp->bufhead); npages = (nbytes + hashp->BSIZE - 1) >> hashp->BSHIFT; npages = MAX(npages, MIN_BUFFERS); hashp->nbufs = npages; bfp->next = bfp; bfp->prev = bfp; /* * This space is calloc'd so these are already null. * * bfp->ovfl = NULL; * bfp->flags = 0; * bfp->page = NULL; * bfp->addr = 0; */ } extern int __buf_free(hashp, do_free, to_disk) HTAB *hashp; int do_free, to_disk; { BUFHEAD *bp; /* Need to make sure that buffer manager has been initialized */ if (!LRU) return (0); for (bp = LRU; bp != &hashp->bufhead;) { /* Check that the buffer is valid */ if (bp->addr || IS_BUCKET(bp->flags)) { if (to_disk && (bp->flags & BUF_MOD) && __put_page(hashp, bp->page, bp->addr, IS_BUCKET(bp->flags), 0)) return (-1); } /* Check if we are freeing stuff */ if (do_free) { if (bp->page) free(bp->page); BUF_REMOVE(bp); free(bp); bp = LRU; } else bp = bp->prev; } return (0); } extern void __reclaim_buf(hashp, bp) HTAB *hashp; BUFHEAD *bp; { bp->ovfl = 0; bp->addr = 0; bp->flags = 0; BUF_REMOVE(bp); LRU_INSERT(bp); } db/hash/hash_func.c0100644000076400007640000001216706072171245014756 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)hash_func.c 8.2 (Berkeley) 2/21/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include "hash.h" #include "page.h" #include "extern.h" static u_int32_t hash1 __P((const void *, size_t)); static u_int32_t hash2 __P((const void *, size_t)); static u_int32_t hash3 __P((const void *, size_t)); static u_int32_t hash4 __P((const void *, size_t)); /* Global default hash function */ u_int32_t (*__default_hash) __P((const void *, size_t)) = hash4; /* * HASH FUNCTIONS * * Assume that we've already split the bucket to which this key hashes, * calculate that bucket, and check that in fact we did already split it. * * This came from ejb's hsearch. */ #define PRIME1 37 #define PRIME2 1048583 static u_int32_t hash1(keyarg, len) const void *keyarg; register size_t len; { register const u_char *key; register u_int32_t h; /* Convert string to integer */ for (key = keyarg, h = 0; len--;) h = h * PRIME1 ^ (*key++ - ' '); h %= PRIME2; return (h); } /* * Phong's linear congruential hash */ #define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) static u_int32_t hash2(keyarg, len) const void *keyarg; size_t len; { register const u_char *e, *key; register u_int32_t h; register u_char c; key = keyarg; e = key + len; for (h = 0; key != e;) { c = *key++; if (!c && key > e) break; dcharhash(h, c); } return (h); } /* * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte * units. On the first time through the loop we get the "leftover bytes" * (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If * this routine is heavily used enough, it's worth the ugly coding. * * OZ's original sdbm hash */ static u_int32_t hash3(keyarg, len) const void *keyarg; register size_t len; { register const u_char *key; register size_t loop; register u_int32_t h; #define HASHC h = *key++ + 65599 * h h = 0; key = keyarg; if (len > 0) { loop = (len + 8 - 1) >> 3; switch (len & (8 - 1)) { case 0: do { HASHC; /* FALLTHROUGH */ case 7: HASHC; /* FALLTHROUGH */ case 6: HASHC; /* FALLTHROUGH */ case 5: HASHC; /* FALLTHROUGH */ case 4: HASHC; /* FALLTHROUGH */ case 3: HASHC; /* FALLTHROUGH */ case 2: HASHC; /* FALLTHROUGH */ case 1: HASHC; } while (--loop); } } return (h); } /* Hash function from Chris Torek. */ static u_int32_t hash4(keyarg, len) const void *keyarg; register size_t len; { register const u_char *key; register size_t loop; register u_int32_t h; #define HASH4a h = (h << 5) - h + *key++; #define HASH4b h = (h << 5) + h + *key++; #define HASH4 HASH4b h = 0; key = keyarg; if (len > 0) { loop = (len + 8 - 1) >> 3; switch (len & (8 - 1)) { case 0: do { HASH4; /* FALLTHROUGH */ case 7: HASH4; /* FALLTHROUGH */ case 6: HASH4; /* FALLTHROUGH */ case 5: HASH4; /* FALLTHROUGH */ case 4: HASH4; /* FALLTHROUGH */ case 3: HASH4; /* FALLTHROUGH */ case 2: HASH4; /* FALLTHROUGH */ case 1: HASH4; } while (--loop); } } return (h); } db/hash/hash_log2.c0100644000076400007640000000440106343257373014666 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)hash_log2.c 8.2 (Berkeley) 5/31/94"; #endif /* LIBC_SCCS and not lint */ #include #include u_int32_t __hash_log2 __P((u_int32_t)); u_int32_t __hash_log2(num) u_int32_t num; { register u_int32_t i, limit; limit = 1; for (i = 0; limit < num; limit = limit << 1, i++); return (i); } db/hash/hash_page.c0100644000076400007640000005561406072171250014737 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)hash_page.c 8.7 (Berkeley) 8/16/94"; #endif /* LIBC_SCCS and not lint */ /* * PACKAGE: hashing * * DESCRIPTION: * Page manipulation for hashing package. * * ROUTINES: * * External * __get_page * __add_ovflpage * Internal * overflow_page * open_temp */ #include #include #include #include #include #include #include #include #ifdef DEBUG #include #endif #include #include "hash.h" #include "page.h" #include "extern.h" static u_int32_t *fetch_bitmap __P((HTAB *, int)); static u_int32_t first_free __P((u_int32_t)); static int open_temp __P((HTAB *)); static u_int16_t overflow_page __P((HTAB *)); static void putpair __P((char *, const DBT *, const DBT *)); static void squeeze_key __P((u_int16_t *, const DBT *, const DBT *)); static int ugly_split __P((HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int)); #define PAGE_INIT(P) { \ ((u_int16_t *)(P))[0] = 0; \ ((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \ ((u_int16_t *)(P))[2] = hashp->BSIZE; \ } /* * This is called AFTER we have verified that there is room on the page for * the pair (PAIRFITS has returned true) so we go right ahead and start moving * stuff on. */ static void putpair(p, key, val) char *p; const DBT *key, *val; { register u_int16_t *bp, n, off; bp = (u_int16_t *)p; /* Enter the key first. */ n = bp[0]; off = OFFSET(bp) - key->size; memmove(p + off, key->data, key->size); bp[++n] = off; /* Now the data. */ off -= val->size; memmove(p + off, val->data, val->size); bp[++n] = off; /* Adjust page info. */ bp[0] = n; bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t)); bp[n + 2] = off; } /* * Returns: * 0 OK * -1 error */ extern int __delpair(hashp, bufp, ndx) HTAB *hashp; BUFHEAD *bufp; register int ndx; { register u_int16_t *bp, newoff; register int n; u_int16_t pairlen; bp = (u_int16_t *)bufp->page; n = bp[0]; if (bp[ndx + 1] < REAL_KEY) return (__big_delete(hashp, bufp)); if (ndx != 1) newoff = bp[ndx - 1]; else newoff = hashp->BSIZE; pairlen = newoff - bp[ndx + 1]; if (ndx != (n - 1)) { /* Hard Case -- need to shuffle keys */ register int i; register char *src = bufp->page + (int)OFFSET(bp); register char *dst = src + (int)pairlen; memmove(dst, src, bp[ndx + 1] - OFFSET(bp)); /* Now adjust the pointers */ for (i = ndx + 2; i <= n; i += 2) { if (bp[i + 1] == OVFLPAGE) { bp[i - 2] = bp[i]; bp[i - 1] = bp[i + 1]; } else { bp[i - 2] = bp[i] + pairlen; bp[i - 1] = bp[i + 1] + pairlen; } } } /* Finally adjust the page data */ bp[n] = OFFSET(bp) + pairlen; bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t); bp[0] = n - 2; hashp->NKEYS--; bufp->flags |= BUF_MOD; return (0); } /* * Returns: * 0 ==> OK * -1 ==> Error */ extern int __split_page(hashp, obucket, nbucket) HTAB *hashp; u_int32_t obucket, nbucket; { register BUFHEAD *new_bufp, *old_bufp; register u_int16_t *ino; register char *np; DBT key, val; int n, ndx, retval; u_int16_t copyto, diff, off, moved; char *op; copyto = (u_int16_t)hashp->BSIZE; off = (u_int16_t)hashp->BSIZE; old_bufp = __get_buf(hashp, obucket, NULL, 0); if (old_bufp == NULL) return (-1); new_bufp = __get_buf(hashp, nbucket, NULL, 0); if (new_bufp == NULL) return (-1); old_bufp->flags |= (BUF_MOD | BUF_PIN); new_bufp->flags |= (BUF_MOD | BUF_PIN); ino = (u_int16_t *)(op = old_bufp->page); np = new_bufp->page; moved = 0; for (n = 1, ndx = 1; n < ino[0]; n += 2) { if (ino[n + 1] < REAL_KEY) { retval = ugly_split(hashp, obucket, old_bufp, new_bufp, (int)copyto, (int)moved); old_bufp->flags &= ~BUF_PIN; new_bufp->flags &= ~BUF_PIN; return (retval); } key.data = (u_char *)op + ino[n]; key.size = off - ino[n]; if (__call_hash(hashp, key.data, key.size) == obucket) { /* Don't switch page */ diff = copyto - off; if (diff) { copyto = ino[n + 1] + diff; memmove(op + copyto, op + ino[n + 1], off - ino[n + 1]); ino[ndx] = copyto + ino[n] - ino[n + 1]; ino[ndx + 1] = copyto; } else copyto = ino[n + 1]; ndx += 2; } else { /* Switch page */ val.data = (u_char *)op + ino[n + 1]; val.size = ino[n] - ino[n + 1]; putpair(np, &key, &val); moved += 2; } off = ino[n + 1]; } /* Now clean up the page */ ino[0] -= moved; FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3); OFFSET(ino) = copyto; #ifdef DEBUG3 (void)fprintf(stderr, "split %d/%d\n", ((u_int16_t *)np)[0] / 2, ((u_int16_t *)op)[0] / 2); #endif /* unpin both pages */ old_bufp->flags &= ~BUF_PIN; new_bufp->flags &= ~BUF_PIN; return (0); } /* * Called when we encounter an overflow or big key/data page during split * handling. This is special cased since we have to begin checking whether * the key/data pairs fit on their respective pages and because we may need * overflow pages for both the old and new pages. * * The first page might be a page with regular key/data pairs in which case * we have a regular overflow condition and just need to go on to the next * page or it might be a big key/data pair in which case we need to fix the * big key/data pair. * * Returns: * 0 ==> success * -1 ==> failure */ static int ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved) HTAB *hashp; u_int32_t obucket; /* Same as __split_page. */ BUFHEAD *old_bufp, *new_bufp; int copyto; /* First byte on page which contains key/data values. */ int moved; /* Number of pairs moved to new page. */ { register BUFHEAD *bufp; /* Buffer header for ino */ register u_int16_t *ino; /* Page keys come off of */ register u_int16_t *np; /* New page */ register u_int16_t *op; /* Page keys go on to if they aren't moving */ BUFHEAD *last_bfp; /* Last buf header OVFL needing to be freed */ DBT key, val; SPLIT_RETURN ret; u_int16_t n, off, ov_addr, scopyto; char *cino; /* Character value of ino */ bufp = old_bufp; ino = (u_int16_t *)old_bufp->page; np = (u_int16_t *)new_bufp->page; op = (u_int16_t *)old_bufp->page; last_bfp = NULL; scopyto = (u_int16_t)copyto; /* ANSI */ n = ino[0] - 1; while (n < ino[0]) { if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) { if (__big_split(hashp, old_bufp, new_bufp, bufp, bufp->addr, obucket, &ret)) return (-1); old_bufp = ret.oldp; if (!old_bufp) return (-1); op = (u_int16_t *)old_bufp->page; new_bufp = ret.newp; if (!new_bufp) return (-1); np = (u_int16_t *)new_bufp->page; bufp = ret.nextp; if (!bufp) return (0); cino = (char *)bufp->page; ino = (u_int16_t *)cino; last_bfp = ret.nextp; } else if (ino[n + 1] == OVFLPAGE) { ov_addr = ino[n]; /* * Fix up the old page -- the extra 2 are the fields * which contained the overflow information. */ ino[0] -= (moved + 2); FREESPACE(ino) = scopyto - sizeof(u_int16_t) * (ino[0] + 3); OFFSET(ino) = scopyto; bufp = __get_buf(hashp, ov_addr, bufp, 0); if (!bufp) return (-1); ino = (u_int16_t *)bufp->page; n = 1; scopyto = hashp->BSIZE; moved = 0; if (last_bfp) __free_ovflpage(hashp, last_bfp); last_bfp = bufp; } /* Move regular sized pairs of there are any */ off = hashp->BSIZE; for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) { cino = (char *)ino; key.data = (u_char *)cino + ino[n]; key.size = off - ino[n]; val.data = (u_char *)cino + ino[n + 1]; val.size = ino[n] - ino[n + 1]; off = ino[n + 1]; if (__call_hash(hashp, key.data, key.size) == obucket) { /* Keep on old page */ if (PAIRFITS(op, (&key), (&val))) putpair((char *)op, &key, &val); else { old_bufp = __add_ovflpage(hashp, old_bufp); if (!old_bufp) return (-1); op = (u_int16_t *)old_bufp->page; putpair((char *)op, &key, &val); } old_bufp->flags |= BUF_MOD; } else { /* Move to new page */ if (PAIRFITS(np, (&key), (&val))) putpair((char *)np, &key, &val); else { new_bufp = __add_ovflpage(hashp, new_bufp); if (!new_bufp) return (-1); np = (u_int16_t *)new_bufp->page; putpair((char *)np, &key, &val); } new_bufp->flags |= BUF_MOD; } } } if (last_bfp) __free_ovflpage(hashp, last_bfp); return (0); } /* * Add the given pair to the page * * Returns: * 0 ==> OK * 1 ==> failure */ extern int __addel(hashp, bufp, key, val) HTAB *hashp; BUFHEAD *bufp; const DBT *key, *val; { register u_int16_t *bp, *sop; int do_expand; bp = (u_int16_t *)bufp->page; do_expand = 0; while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY)) /* Exception case */ if (bp[2] == FULL_KEY_DATA && bp[0] == 2) /* This is the last page of a big key/data pair and we need to add another page */ break; else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) { bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (u_int16_t *)bufp->page; } else /* Try to squeeze key on this page */ if (FREESPACE(bp) > PAIRSIZE(key, val)) { squeeze_key(bp, key, val); return (0); } else { bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (u_int16_t *)bufp->page; } if (PAIRFITS(bp, key, val)) putpair(bufp->page, key, val); else { do_expand = 1; bufp = __add_ovflpage(hashp, bufp); if (!bufp) return (-1); sop = (u_int16_t *)bufp->page; if (PAIRFITS(sop, key, val)) putpair((char *)sop, key, val); else if (__big_insert(hashp, bufp, key, val)) return (-1); } bufp->flags |= BUF_MOD; /* * If the average number of keys per bucket exceeds the fill factor, * expand the table. */ hashp->NKEYS++; if (do_expand || (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR)) return (__expand_table(hashp)); return (0); } /* * * Returns: * pointer on success * NULL on error */ extern BUFHEAD * __add_ovflpage(hashp, bufp) HTAB *hashp; BUFHEAD *bufp; { register u_int16_t *sp; u_int16_t ndx, ovfl_num; #ifdef DEBUG1 int tmp1, tmp2; #endif sp = (u_int16_t *)bufp->page; /* Check if we are dynamically determining the fill factor */ if (hashp->FFACTOR == DEF_FFACTOR) { hashp->FFACTOR = sp[0] >> 1; if (hashp->FFACTOR < MIN_FFACTOR) hashp->FFACTOR = MIN_FFACTOR; } bufp->flags |= BUF_MOD; ovfl_num = overflow_page(hashp); #ifdef DEBUG1 tmp1 = bufp->addr; tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0; #endif if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1))) return (NULL); bufp->ovfl->flags |= BUF_MOD; #ifdef DEBUG1 (void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n", tmp1, tmp2, bufp->ovfl->addr); #endif ndx = sp[0]; /* * Since a pair is allocated on a page only if there's room to add * an overflow page, we know that the OVFL information will fit on * the page. */ sp[ndx + 4] = OFFSET(sp); sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE; sp[ndx + 1] = ovfl_num; sp[ndx + 2] = OVFLPAGE; sp[0] = ndx + 2; #ifdef HASH_STATISTICS hash_overflows++; #endif return (bufp->ovfl); } /* * Returns: * 0 indicates SUCCESS * -1 indicates FAILURE */ extern int __get_page(hashp, p, bucket, is_bucket, is_disk, is_bitmap) HTAB *hashp; char *p; u_int32_t bucket; int is_bucket, is_disk, is_bitmap; { register int fd, page, size; int rsize; u_int16_t *bp; fd = hashp->fp; size = hashp->BSIZE; if ((fd == -1) || !is_disk) { PAGE_INIT(p); return (0); } if (is_bucket) page = BUCKET_TO_PAGE(bucket); else page = OADDR_TO_PAGE(bucket); if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) || ((rsize = read(fd, p, size)) == -1)) return (-1); bp = (u_int16_t *)p; if (!rsize) bp[0] = 0; /* We hit the EOF, so initialize a new page */ else if (rsize != size) { errno = EFTYPE; return (-1); } if (!is_bitmap && !bp[0]) { PAGE_INIT(p); } else if (hashp->LORDER != BYTE_ORDER) { register int i, max; if (is_bitmap) { max = hashp->BSIZE >> 2; /* divide by 4 */ for (i = 0; i < max; i++) M_32_SWAP(((int *)p)[i]); } else { M_16_SWAP(bp[0]); max = bp[0] + 2; for (i = 1; i <= max; i++) M_16_SWAP(bp[i]); } } return (0); } /* * Write page p to disk * * Returns: * 0 ==> OK * -1 ==>failure */ extern int __put_page(hashp, p, bucket, is_bucket, is_bitmap) HTAB *hashp; char *p; u_int32_t bucket; int is_bucket, is_bitmap; { register int fd, page, size; int wsize; size = hashp->BSIZE; if ((hashp->fp == -1) && open_temp(hashp)) return (-1); fd = hashp->fp; if (hashp->LORDER != BYTE_ORDER) { register int i; register int max; if (is_bitmap) { max = hashp->BSIZE >> 2; /* divide by 4 */ for (i = 0; i < max; i++) M_32_SWAP(((int *)p)[i]); } else { max = ((u_int16_t *)p)[0] + 2; for (i = 0; i <= max; i++) M_16_SWAP(((u_int16_t *)p)[i]); } } if (is_bucket) page = BUCKET_TO_PAGE(bucket); else page = OADDR_TO_PAGE(bucket); if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) || ((wsize = write(fd, p, size)) == -1)) /* Errno is set */ return (-1); if (wsize != size) { errno = EFTYPE; return (-1); } return (0); } #define BYTE_MASK ((1 << INT_BYTE_SHIFT) -1) /* * Initialize a new bitmap page. Bitmap pages are left in memory * once they are read in. */ extern int __ibitmap(hashp, pnum, nbits, ndx) HTAB *hashp; int pnum, nbits, ndx; { u_int32_t *ip; int clearbytes, clearints; if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) return (1); hashp->nmaps++; clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1; clearbytes = clearints << INT_TO_BYTE; (void)memset((char *)ip, 0, clearbytes); (void)memset(((char *)ip) + clearbytes, 0xFF, hashp->BSIZE - clearbytes); ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK); SETBIT(ip, 0); hashp->BITMAPS[ndx] = (u_int16_t)pnum; hashp->mapp[ndx] = ip; return (0); } static u_int32_t first_free(map) u_int32_t map; { register u_int32_t i, mask; mask = 0x1; for (i = 0; i < BITS_PER_MAP; i++) { if (!(mask & map)) return (i); mask = mask << 1; } return (i); } static u_int16_t overflow_page(hashp) HTAB *hashp; { register u_int32_t *freep; register int max_free, offset, splitnum; u_int16_t addr; int bit, first_page, free_bit, free_page, i, in_use_bits, j; #ifdef DEBUG2 int tmp1, tmp2; #endif splitnum = hashp->OVFL_POINT; max_free = hashp->SPARES[splitnum]; free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT); free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1); /* Look through all the free maps to find the first free block */ first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT); for ( i = first_page; i <= free_page; i++ ) { if (!(freep = (u_int32_t *)hashp->mapp[i]) && !(freep = fetch_bitmap(hashp, i))) return (0); if (i == free_page) in_use_bits = free_bit; else in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1; if (i == first_page) { bit = hashp->LAST_FREED & ((hashp->BSIZE << BYTE_SHIFT) - 1); j = bit / BITS_PER_MAP; bit = bit & ~(BITS_PER_MAP - 1); } else { bit = 0; j = 0; } for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) if (freep[j] != ALL_SET) goto found; } /* No Free Page Found */ hashp->LAST_FREED = hashp->SPARES[splitnum]; hashp->SPARES[splitnum]++; offset = hashp->SPARES[splitnum] - (splitnum ? hashp->SPARES[splitnum - 1] : 0); #define OVMSG "HASH: Out of overflow pages. Increase page size\n" if (offset > SPLITMASK) { if (++splitnum >= NCACHED) { (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); return (0); } hashp->OVFL_POINT = splitnum; hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; hashp->SPARES[splitnum-1]--; offset = 1; } /* Check if we need to allocate a new bitmap page */ if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) { free_page++; if (free_page >= NCACHED) { (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); return (0); } /* * This is tricky. The 1 indicates that you want the new page * allocated with 1 clear bit. Actually, you are going to * allocate 2 pages from this map. The first is going to be * the map page, the second is the overflow page we were * looking for. The init_bitmap routine automatically, sets * the first bit of itself to indicate that the bitmap itself * is in use. We would explicitly set the second bit, but * don't have to if we tell init_bitmap not to leave it clear * in the first place. */ if (__ibitmap(hashp, (int)OADDR_OF(splitnum, offset), 1, free_page)) return (0); hashp->SPARES[splitnum]++; #ifdef DEBUG2 free_bit = 2; #endif offset++; if (offset > SPLITMASK) { if (++splitnum >= NCACHED) { (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); return (0); } hashp->OVFL_POINT = splitnum; hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; hashp->SPARES[splitnum-1]--; offset = 0; } } else { /* * Free_bit addresses the last used bit. Bump it to address * the first available bit. */ free_bit++; SETBIT(freep, free_bit); } /* Calculate address of the new overflow page */ addr = OADDR_OF(splitnum, offset); #ifdef DEBUG2 (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", addr, free_bit, free_page); #endif return (addr); found: bit = bit + first_free(freep[j]); SETBIT(freep, bit); #ifdef DEBUG2 tmp1 = bit; tmp2 = i; #endif /* * Bits are addressed starting with 0, but overflow pages are addressed * beginning at 1. Bit is a bit addressnumber, so we need to increment * it to convert it to a page number. */ bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT)); if (bit >= hashp->LAST_FREED) hashp->LAST_FREED = bit - 1; /* Calculate the split number for this page */ for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++); offset = (i ? bit - hashp->SPARES[i - 1] : bit); if (offset >= SPLITMASK) return (0); /* Out of overflow pages */ addr = OADDR_OF(i, offset); #ifdef DEBUG2 (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", addr, tmp1, tmp2); #endif /* Allocate and return the overflow page */ return (addr); } /* * Mark this overflow page as free. */ extern void __free_ovflpage(hashp, obufp) HTAB *hashp; BUFHEAD *obufp; { register u_int16_t addr; u_int32_t *freep; int bit_address, free_page, free_bit; u_int16_t ndx; addr = obufp->addr; #ifdef DEBUG1 (void)fprintf(stderr, "Freeing %d\n", addr); #endif ndx = (((u_int16_t)addr) >> SPLITSHIFT); bit_address = (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1; if (bit_address < hashp->LAST_FREED) hashp->LAST_FREED = bit_address; free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT)); free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1); if (!(freep = hashp->mapp[free_page])) freep = fetch_bitmap(hashp, free_page); #ifdef DEBUG /* * This had better never happen. It means we tried to read a bitmap * that has already had overflow pages allocated off it, and we * failed to read it from the file. */ if (!freep) assert(0); #endif CLRBIT(freep, free_bit); #ifdef DEBUG2 (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n", obufp->addr, free_bit, free_page); #endif __reclaim_buf(hashp, obufp); } /* * Returns: * 0 success * -1 failure */ static int open_temp(hashp) HTAB *hashp; { sigset_t set, oset; static char namestr[] = "_hashXXXXXX"; /* Block signals; make sure file goes away at process exit. */ (void)sigfillset(&set); (void)sigprocmask(SIG_BLOCK, &set, &oset); if ((hashp->fp = mkstemp(namestr)) != -1) { (void)unlink(namestr); (void)fcntl(hashp->fp, F_SETFD, 1); } (void)sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL); return (hashp->fp != -1 ? 0 : -1); } /* * We have to know that the key will fit, but the last entry on the page is * an overflow pair, so we need to shift things. */ static void squeeze_key(sp, key, val) u_int16_t *sp; const DBT *key, *val; { register char *p; u_int16_t free_space, n, off, pageno; p = (char *)sp; n = sp[0]; free_space = FREESPACE(sp); off = OFFSET(sp); pageno = sp[n - 1]; off -= key->size; sp[n - 1] = off; memmove(p + off, key->data, key->size); off -= val->size; sp[n] = off; memmove(p + off, val->data, val->size); sp[0] = n + 2; sp[n + 1] = pageno; sp[n + 2] = OVFLPAGE; FREESPACE(sp) = free_space - PAIRSIZE(key, val); OFFSET(sp) = off; } static u_int32_t * fetch_bitmap(hashp, ndx) HTAB *hashp; int ndx; { if (ndx >= hashp->nmaps) return (NULL); if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) return (NULL); if (__get_page(hashp, (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) { free(hashp->mapp[ndx]); return (NULL); } return (hashp->mapp[ndx]); } #ifdef DEBUG4 int print_chain(addr) int addr; { BUFHEAD *bufp; short *bp, oaddr; (void)fprintf(stderr, "%d ", addr); bufp = __get_buf(hashp, addr, NULL, 0); bp = (short *)bufp->page; while (bp[0] && ((bp[bp[0]] == OVFLPAGE) || ((bp[0] > 2) && bp[2] < REAL_KEY))) { oaddr = bp[bp[0] - 1]; (void)fprintf(stderr, "%d ", (int)oaddr); bufp = __get_buf(hashp, (int)oaddr, bufp, 0); bp = (short *)bufp->page; } (void)fprintf(stderr, "\n"); } #endif db/hash/ndbm.c0100644000076400007640000001071106103177550013731 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)ndbm.c 8.4 (Berkeley) 7/21/94"; #endif /* LIBC_SCCS and not lint */ /* * This package provides a dbm compatible interface to the new hashing * package described in db(3). */ #include #include #include #include #include #include "hash.h" /* * Returns: * *DBM on success * NULL on failure */ extern DBM * dbm_open(file, flags, mode) const char *file; int flags, mode; { DBM *db; HASHINFO info; const size_t len = strlen(file) + sizeof (DBM_SUFFIX); #ifdef __GNUC__ char path[len]; #else char *path = malloc(len); if (path == NULL) return NULL; #endif info.bsize = 4096; info.ffactor = 40; info.nelem = 1; info.cachesize = 0; info.hash = NULL; info.lorder = 0; (void)strcpy(path, file); (void)strcat(path, DBM_SUFFIX); db = (DBM *)__hash_open(path, flags, mode, &info, 0); #ifndef __GNUC__ free(path); #endif return db; } extern void dbm_close(db) DBM *db; { (void)(db->close)(db); } /* * Returns: * DATUM on success * NULL on failure */ extern datum dbm_fetch(db, key) DBM *db; datum key; { datum retval; int status; status = (db->get)(db, (DBT *)&key, (DBT *)&retval, 0); if (status) { retval.dptr = NULL; retval.dsize = 0; } return (retval); } /* * Returns: * DATUM on success * NULL on failure */ extern datum dbm_firstkey(db) DBM *db; { int status; datum retdata, retkey; status = (db->seq)(db, (DBT *)&retkey, (DBT *)&retdata, R_FIRST); if (status) retkey.dptr = NULL; return (retkey); } /* * Returns: * DATUM on success * NULL on failure */ extern datum dbm_nextkey(db) DBM *db; { int status; datum retdata, retkey; status = (db->seq)(db, (DBT *)&retkey, (DBT *)&retdata, R_NEXT); if (status) retkey.dptr = NULL; return (retkey); } /* * Returns: * 0 on success * <0 failure */ extern int dbm_delete(db, key) DBM *db; datum key; { int status; status = (db->del)(db, (DBT *)&key, 0); if (status) return (-1); else return (0); } /* * Returns: * 0 on success * <0 failure * 1 if DBM_INSERT and entry exists */ extern int dbm_store(db, key, content, flags) DBM *db; datum key, content; int flags; { return ((db->put)(db, (DBT *)&key, (DBT *)&content, (flags == DBM_INSERT) ? R_NOOVERWRITE : 0)); } extern int dbm_error(db) DBM *db; { HTAB *hp; hp = (HTAB *)db->internal; return (hp->errnum); } extern int dbm_clearerr(db) DBM *db; { HTAB *hp; hp = (HTAB *)db->internal; hp->errnum = 0; return (0); } extern int dbm_dirfno(db) DBM *db; { return(((HTAB *)db->internal)->fp); } db/mpool/0040755000076400007640000000000007055362607013061 5ustar cjwatsoncjwatsondb/mpool/mpool.c0100644000076400007640000002614006323402146014341 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include #include #include #define __MPOOLINTERFACE_PRIVATE #include #ifdef _LIBC /* In the GNU C library we must not pollute the namespace because libdb is needed by libnss_db. */ #define mpool_open __mpool_open #define mpool_filter __mpool_filter #define mpool_new __mpool_new #define mpool_get __mpool_get #define mpool_put __mpool_put #define mpool_sync __mpool_sync #define mpool_close __mpool_close #endif static BKT *mpool_bkt __P((MPOOL *)); static BKT *mpool_look __P((MPOOL *, pgno_t)); static int mpool_write __P((MPOOL *, BKT *)); /* * mpool_open -- * Initialize a memory pool. */ MPOOL * mpool_open(key, fd, pagesize, maxcache) void *key; int fd; pgno_t pagesize, maxcache; { struct stat sb; MPOOL *mp; int entry; /* * Get information about the file. * * XXX * We don't currently handle pipes, although we should. */ if (fstat(fd, &sb)) return (NULL); if (!S_ISREG(sb.st_mode)) { errno = ESPIPE; return (NULL); } /* Allocate and initialize the MPOOL cookie. */ if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL) return (NULL); CIRCLEQ_INIT(&mp->lqh); for (entry = 0; entry < HASHSIZE; ++entry) CIRCLEQ_INIT(&mp->hqh[entry]); mp->maxcache = maxcache; mp->npages = sb.st_size / pagesize; mp->pagesize = pagesize; mp->fd = fd; return (mp); } /* * mpool_filter -- * Initialize input/output filters. */ void mpool_filter(mp, pgin, pgout, pgcookie) MPOOL *mp; void (*pgin) __P((void *, pgno_t, void *)); void (*pgout) __P((void *, pgno_t, void *)); void *pgcookie; { mp->pgin = pgin; mp->pgout = pgout; mp->pgcookie = pgcookie; } /* * mpool_new -- * Get a new page of memory. */ void * mpool_new(mp, pgnoaddr) MPOOL *mp; pgno_t *pgnoaddr; { struct _hqh *head; BKT *bp; if (mp->npages == MAX_PAGE_NUMBER) { (void)fprintf(stderr, "mpool_new: page allocation overflow.\n"); abort(); } #ifdef STATISTICS ++mp->pagenew; #endif /* * Get a BKT from the cache. Assign a new page number, attach * it to the head of the hash chain, the tail of the lru chain, * and return. */ if ((bp = mpool_bkt(mp)) == NULL) return (NULL); *pgnoaddr = bp->pgno = mp->npages++; bp->flags = MPOOL_PINNED; head = &mp->hqh[HASHKEY(bp->pgno)]; CIRCLEQ_INSERT_HEAD(head, bp, hq); CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); return (bp->page); } /* * mpool_get * Get a page. */ void * mpool_get(mp, pgno, flags) MPOOL *mp; pgno_t pgno; u_int flags; /* XXX not used? */ { struct _hqh *head; BKT *bp; off_t off; int nr; /* Check for attempt to retrieve a non-existent page. */ if (pgno >= mp->npages) { errno = EINVAL; return (NULL); } #ifdef STATISTICS ++mp->pageget; #endif /* Check for a page that is cached. */ if ((bp = mpool_look(mp, pgno)) != NULL) { #ifdef DEBUG if (bp->flags & MPOOL_PINNED) { (void)fprintf(stderr, "mpool_get: page %d already pinned\n", bp->pgno); abort(); } #endif /* * Move the page to the head of the hash chain and the tail * of the lru chain. */ head = &mp->hqh[HASHKEY(bp->pgno)]; CIRCLEQ_REMOVE(head, bp, hq); CIRCLEQ_INSERT_HEAD(head, bp, hq); CIRCLEQ_REMOVE(&mp->lqh, bp, q); CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); /* Return a pinned page. */ bp->flags |= MPOOL_PINNED; return (bp->page); } /* Get a page from the cache. */ if ((bp = mpool_bkt(mp)) == NULL) return (NULL); /* Read in the contents. */ #ifdef STATISTICS ++mp->pageread; #endif off = mp->pagesize * pgno; if (lseek(mp->fd, off, SEEK_SET) != off) return (NULL); if ((u_long) (nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) { if (nr >= 0) errno = EFTYPE; return (NULL); } /* Set the page number, pin the page. */ bp->pgno = pgno; bp->flags = MPOOL_PINNED; /* * Add the page to the head of the hash chain and the tail * of the lru chain. */ head = &mp->hqh[HASHKEY(bp->pgno)]; CIRCLEQ_INSERT_HEAD(head, bp, hq); CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); /* Run through the user's filter. */ if (mp->pgin != NULL) (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); return (bp->page); } /* * mpool_put * Return a page. */ int mpool_put(mp, page, flags) MPOOL *mp; void *page; u_int flags; { BKT *bp; #ifdef STATISTICS ++mp->pageput; #endif bp = (BKT *)((char *)page - sizeof(BKT)); #ifdef DEBUG if (!(bp->flags & MPOOL_PINNED)) { (void)fprintf(stderr, "mpool_put: page %d not pinned\n", bp->pgno); abort(); } #endif bp->flags &= ~MPOOL_PINNED; bp->flags |= flags & MPOOL_DIRTY; return (RET_SUCCESS); } /* * mpool_close * Close the buffer pool. */ int mpool_close(mp) MPOOL *mp; { BKT *bp; /* Free up any space allocated to the lru pages. */ while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) { CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q); free(bp); } /* Free the MPOOL cookie. */ free(mp); return (RET_SUCCESS); } /* * mpool_sync * Sync the pool to disk. */ int mpool_sync(mp) MPOOL *mp; { BKT *bp; /* Walk the lru chain, flushing any dirty pages to disk. */ for (bp = mp->lqh.cqh_first; bp != (void *)&mp->lqh; bp = bp->q.cqe_next) if (bp->flags & MPOOL_DIRTY && mpool_write(mp, bp) == RET_ERROR) return (RET_ERROR); /* Sync the file descriptor. */ return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); } #ifdef _LIBC #undef mpool_open #undef mpool_filter #undef mpool_new #undef mpool_get #undef mpool_put #undef mpool_close #undef mpool_sync weak_alias (__mpool_open, mpool_open) weak_alias (__mpool_filter, mpool_filter) weak_alias (__mpool_new, mpool_new) weak_alias (__mpool_get, mpool_get) weak_alias (__mpool_put, mpool_put) weak_alias (__mpool_close, mpool_close) weak_alias (__mpool_sync, mpool_sync) #endif /* * mpool_bkt * Get a page from the cache (or create one). */ static BKT * mpool_bkt(mp) MPOOL *mp; { struct _hqh *head; BKT *bp; /* If under the max cached, always create a new page. */ if (mp->curcache < mp->maxcache) goto new; /* * If the cache is max'd out, walk the lru list for a buffer we * can flush. If we find one, write it (if necessary) and take it * off any lists. If we don't find anything we grow the cache anyway. * The cache never shrinks. */ for (bp = mp->lqh.cqh_first; bp != (void *)&mp->lqh; bp = bp->q.cqe_next) if (!(bp->flags & MPOOL_PINNED)) { /* Flush if dirty. */ if (bp->flags & MPOOL_DIRTY && mpool_write(mp, bp) == RET_ERROR) return (NULL); #ifdef STATISTICS ++mp->pageflush; #endif /* Remove from the hash and lru queues. */ head = &mp->hqh[HASHKEY(bp->pgno)]; CIRCLEQ_REMOVE(head, bp, hq); CIRCLEQ_REMOVE(&mp->lqh, bp, q); #ifdef DEBUG { void *spage; spage = bp->page; memset(bp, 0xff, sizeof(BKT) + mp->pagesize); bp->page = spage; } #endif return (bp); } new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) return (NULL); #ifdef STATISTICS ++mp->pagealloc; #endif #if defined(DEBUG) || defined(PURIFY) memset(bp, 0xff, sizeof(BKT) + mp->pagesize); #endif bp->page = (char *)bp + sizeof(BKT); ++mp->curcache; return (bp); } /* * mpool_write * Write a page to disk. */ static int mpool_write(mp, bp) MPOOL *mp; BKT *bp; { off_t off; #ifdef STATISTICS ++mp->pagewrite; #endif /* Run through the user's filter. */ if (mp->pgout) (mp->pgout)(mp->pgcookie, bp->pgno, bp->page); off = mp->pagesize * bp->pgno; if (lseek(mp->fd, off, SEEK_SET) != off) return (RET_ERROR); if ((u_long) write(mp->fd, bp->page, mp->pagesize) != mp->pagesize) return (RET_ERROR); bp->flags &= ~MPOOL_DIRTY; return (RET_SUCCESS); } /* * mpool_look * Lookup a page in the cache. */ static BKT * mpool_look(mp, pgno) MPOOL *mp; pgno_t pgno; { struct _hqh *head; BKT *bp; head = &mp->hqh[HASHKEY(pgno)]; for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next) if (bp->pgno == pgno) { #ifdef STATISTICS ++mp->cachehit; #endif return (bp); } #ifdef STATISTICS ++mp->cachemiss; #endif return (NULL); } #ifdef STATISTICS /* * mpool_stat * Print out cache statistics. */ void mpool_stat(mp) MPOOL *mp; { BKT *bp; int cnt; char *sep; (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); (void)fprintf(stderr, "page size %lu, cacheing %lu pages of %lu page max cache\n", mp->pagesize, mp->curcache, mp->maxcache); (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", mp->pageput, mp->pageget, mp->pagenew); (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", mp->pagealloc, mp->pageflush); if (mp->cachehit + mp->cachemiss) (void)fprintf(stderr, "%.0f%% cache hit rate (%lu hits, %lu misses)\n", ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) * 100, mp->cachehit, mp->cachemiss); (void)fprintf(stderr, "%lu page reads, %lu page writes\n", mp->pageread, mp->pagewrite); sep = ""; cnt = 0; for (bp = mp->lqh.cqh_first; bp != (void *)&mp->lqh; bp = bp->q.cqe_next) { (void)fprintf(stderr, "%s%d", sep, bp->pgno); if (bp->flags & MPOOL_DIRTY) (void)fprintf(stderr, "d"); if (bp->flags & MPOOL_PINNED) (void)fprintf(stderr, "P"); if (++cnt == 10) { sep = "\n"; cnt = 0; } else sep = ", "; } (void)fprintf(stderr, "\n"); } #endif db/mpool.h0100644000076400007640000001055006614102774013225 0ustar cjwatsoncjwatson/*- * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mpool.h 8.2 (Berkeley) 7/14/94 */ #ifndef _MPOOL_H #define _MPOOL_H 1 #include /* * The memory pool scheme is a simple one. Each in-memory page is referenced * by a bucket which is threaded in up to two of three ways. All active pages * are threaded on a hash chain (hashed by page number) and an lru chain. * Inactive pages are threaded on a free chain. Each reference to a memory * pool is handed an opaque MPOOL cookie which stores all of this information. */ #define HASHSIZE 128 #define HASHKEY(pgno) ((pgno - 1) % HASHSIZE) /* The BKT structures are the elements of the queues. */ typedef struct _bkt { CIRCLEQ_ENTRY(_bkt) hq; /* hash queue */ CIRCLEQ_ENTRY(_bkt) q; /* lru queue */ void *page; /* page */ pgno_t pgno; /* page number */ #define MPOOL_DIRTY 0x01 /* page needs to be written */ #define MPOOL_PINNED 0x02 /* page is pinned into memory */ u_int8_t flags; /* flags */ } BKT; typedef struct MPOOL { CIRCLEQ_HEAD(_lqh, _bkt) lqh; /* lru queue head */ /* hash queue array */ CIRCLEQ_HEAD(_hqh, _bkt) hqh[HASHSIZE]; pgno_t curcache; /* current number of cached pages */ pgno_t maxcache; /* max number of cached pages */ pgno_t npages; /* number of pages in the file */ u_long pagesize; /* file page size */ int fd; /* file descriptor */ /* page in conversion routine */ void (*pgin) __PMT((void *, pgno_t, void *)); /* page out conversion routine */ void (*pgout) __PMT((void *, pgno_t, void *)); void *pgcookie; /* cookie for page in/out routines */ #ifdef STATISTICS u_long cachehit; u_long cachemiss; u_long pagealloc; u_long pageflush; u_long pageget; u_long pagenew; u_long pageput; u_long pageread; u_long pagewrite; #endif } MPOOL; __BEGIN_DECLS MPOOL *__mpool_open __P((void *, int, pgno_t, pgno_t)); MPOOL *mpool_open __P((void *, int, pgno_t, pgno_t)); void __mpool_filter __P((MPOOL *, void (*)(void *, pgno_t, void *), void (*)(void *, pgno_t, void *), void *)); void mpool_filter __P((MPOOL *, void (*)(void *, pgno_t, void *), void (*)(void *, pgno_t, void *), void *)); void *__mpool_new __P((MPOOL *, pgno_t *)); void *mpool_new __P((MPOOL *, pgno_t *)); void *__mpool_get __P((MPOOL *, pgno_t, u_int)); void *mpool_get __P((MPOOL *, pgno_t, u_int)); int __mpool_put __P((MPOOL *, void *, u_int)); int mpool_put __P((MPOOL *, void *, u_int)); int __mpool_sync __P((MPOOL *)); int mpool_sync __P((MPOOL *)); int __mpool_close __P((MPOOL *)); int mpool_close __P((MPOOL *)); #ifdef STATISTICS void mpool_stat __P((MPOOL *)); #endif __END_DECLS #endif /* mpool.h */ db/ndbm.h0100644000076400007640000000547506352623324013027 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ndbm.h 8.1 (Berkeley) 6/2/93 */ #ifndef _NDBM_H #define _NDBM_H 1 #include /* Map dbm interface onto db(3). */ #define DBM_RDONLY O_RDONLY /* Flags to dbm_store(). */ #define DBM_INSERT 0 #define DBM_REPLACE 1 /* * The db(3) support for ndbm(3) always appends this suffix to the * file name to avoid overwriting the user's original database. */ #define DBM_SUFFIX ".db" typedef struct { char *dptr; int dsize; } datum; typedef DB DBM; #define dbm_pagfno(a) DBM_PAGFNO_NOT_AVAILABLE __BEGIN_DECLS void dbm_close __P((DBM *)); int dbm_delete __P((DBM *, datum)); datum dbm_fetch __P((DBM *, datum)); datum dbm_firstkey __P((DBM *)); long dbm_forder __P((DBM *, datum)); datum dbm_nextkey __P((DBM *)); DBM *dbm_open __P((const char *, int, int)); int dbm_store __P((DBM *, datum, datum, int)); int dbm_dirfno __P((DBM *)); int dbm_error __P((DBM *)); int dbm_clearerr __P((DBM *)); __END_DECLS #endif /* ndbm.h */ db/recno/0040755000076400007640000000000007055362607013041 5ustar cjwatsoncjwatsondb/recno/rec_close.c0100644000076400007640000001160606602177371015143 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rec_close.c 8.6 (Berkeley) 8/18/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include #include #include "recno.h" /* * __REC_CLOSE -- Close a recno tree. * * Parameters: * dbp: pointer to access method * * Returns: * RET_ERROR, RET_SUCCESS */ int __rec_close(dbp) DB *dbp; { BTREE *t; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } if (__rec_sync(dbp, 0) == RET_ERROR) return (RET_ERROR); /* Committed to closing. */ status = RET_SUCCESS; if (F_ISSET(t, R_MEMMAPPED) && munmap(t->bt_smap, t->bt_msize)) status = RET_ERROR; if (!F_ISSET(t, R_INMEM)) { if (F_ISSET(t, R_CLOSEFP)) { if (fclose(t->bt_rfp)) status = RET_ERROR; } else if (close(t->bt_rfd)) status = RET_ERROR; } if (__bt_close(dbp) == RET_ERROR) status = RET_ERROR; return (status); } /* * __REC_SYNC -- sync the recno tree to disk. * * Parameters: * dbp: pointer to access method * * Returns: * RET_SUCCESS, RET_ERROR. */ int __rec_sync(dbp, flags) const DB *dbp; u_int flags; { struct iovec iov[2]; BTREE *t; DBT data, key; off_t off; recno_t scursor, trec; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } if (flags == R_RECNOSYNC) return (__bt_sync(dbp, 0)); if (F_ISSET(t, R_RDONLY | R_INMEM) || !F_ISSET(t, R_MODIFIED)) return (RET_SUCCESS); /* Read any remaining records into the tree. */ if (!F_ISSET(t, R_EOF) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) return (RET_ERROR); /* Rewind the file descriptor. */ if (lseek(t->bt_rfd, (off_t)0, SEEK_SET) != 0) return (RET_ERROR); /* Save the cursor. */ scursor = t->bt_cursor.rcursor; key.size = sizeof(recno_t); key.data = &trec; if (F_ISSET(t, R_FIXLEN)) { /* * We assume that fixed length records are all fixed length. * Any that aren't are either EINVAL'd or corrected by the * record put code. */ status = (dbp->seq)(dbp, &key, &data, R_FIRST); while (status == RET_SUCCESS) { if ((size_t) write(t->bt_rfd, data.data, data.size) != data.size) return (RET_ERROR); status = (dbp->seq)(dbp, &key, &data, R_NEXT); } } else { iov[1].iov_base = &t->bt_bval; iov[1].iov_len = 1; status = (dbp->seq)(dbp, &key, &data, R_FIRST); while (status == RET_SUCCESS) { iov[0].iov_base = data.data; iov[0].iov_len = data.size; if ((size_t) writev(t->bt_rfd, iov, 2) != data.size + 1) return (RET_ERROR); status = (dbp->seq)(dbp, &key, &data, R_NEXT); } } /* Restore the cursor. */ t->bt_cursor.rcursor = scursor; if (status == RET_ERROR) return (RET_ERROR); if ((off = lseek(t->bt_rfd, (off_t)0, SEEK_CUR)) == -1) return (RET_ERROR); if (ftruncate(t->bt_rfd, off)) return (RET_ERROR); F_CLR(t, R_MODIFIED); return (RET_SUCCESS); } db/recno/rec_delete.c0100644000076400007640000001260606072171275015277 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rec_delete.c 8.7 (Berkeley) 7/14/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include "recno.h" static int rec_rdelete __P((BTREE *, recno_t)); /* * __REC_DELETE -- Delete the item(s) referenced by a key. * * Parameters: * dbp: pointer to access method * key: key to delete * flags: R_CURSOR if deleting what the cursor references * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. */ int __rec_delete(dbp, key, flags) const DB *dbp; const DBT *key; u_int flags; { BTREE *t; recno_t nrec; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } switch(flags) { case 0: if ((nrec = *(recno_t *)key->data) == 0) goto einval; if (nrec > t->bt_nrecs) return (RET_SPECIAL); --nrec; status = rec_rdelete(t, nrec); break; case R_CURSOR: if (!F_ISSET(&t->bt_cursor, CURS_INIT)) goto einval; if (t->bt_nrecs == 0) return (RET_SPECIAL); status = rec_rdelete(t, t->bt_cursor.rcursor - 1); if (status == RET_SUCCESS) --t->bt_cursor.rcursor; break; default: einval: errno = EINVAL; return (RET_ERROR); } if (status == RET_SUCCESS) F_SET(t, B_MODIFIED | R_MODIFIED); return (status); } /* * REC_RDELETE -- Delete the data matching the specified key. * * Parameters: * tree: tree * nrec: record to delete * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. */ static int rec_rdelete(t, nrec) BTREE *t; recno_t nrec; { EPG *e; PAGE *h; int status; /* Find the record; __rec_search pins the page. */ if ((e = __rec_search(t, nrec, SDELETE)) == NULL) return (RET_ERROR); /* Delete the record. */ h = e->page; status = __rec_dleaf(t, h, e->index); if (status != RET_SUCCESS) { mpool_put(t->bt_mp, h, 0); return (status); } mpool_put(t->bt_mp, h, MPOOL_DIRTY); return (RET_SUCCESS); } /* * __REC_DLEAF -- Delete a single record from a recno leaf page. * * Parameters: * t: tree * index: index on current page to delete * * Returns: * RET_SUCCESS, RET_ERROR. */ int __rec_dleaf(t, h, index) BTREE *t; PAGE *h; u_int32_t index; { RLEAF *rl; indx_t *ip, cnt, offset; u_int32_t nbytes; char *from; void *to; /* * Delete a record from a recno leaf page. Internal records are never * deleted from internal pages, regardless of the records that caused * them to be added being deleted. Pages made empty by deletion are * not reclaimed. They are, however, made available for reuse. * * Pack the remaining entries at the end of the page, shift the indices * down, overwriting the deleted record and its index. If the record * uses overflow pages, make them available for reuse. */ to = rl = GETRLEAF(h, index); if (rl->flags & P_BIGDATA && __ovfl_delete(t, rl->bytes) == RET_ERROR) return (RET_ERROR); nbytes = NRLEAF(rl); /* * Compress the key/data pairs. Compress and adjust the [BR]LEAF * offsets. Reset the headers. */ from = (char *)h + h->upper; memmove(from + nbytes, from, (char *)to - from); h->upper += nbytes; offset = h->linp[index]; for (cnt = &h->linp[index] - (ip = &h->linp[0]); cnt--; ++ip) if (ip[0] < offset) ip[0] += nbytes; for (cnt = &h->linp[NEXTINDEX(h)] - ip; --cnt; ++ip) ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; h->lower -= sizeof(indx_t); --t->bt_nrecs; return (RET_SUCCESS); } db/recno/rec_get.c0100644000076400007640000001626506072171277014623 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rec_get.c 8.9 (Berkeley) 8/18/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include #include #include "recno.h" /* * __REC_GET -- Get a record from the btree. * * Parameters: * dbp: pointer to access method * key: key to find * data: data to return * flag: currently unused * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. */ int __rec_get(dbp, key, data, flags) const DB *dbp; const DBT *key; DBT *data; u_int flags; { BTREE *t; EPG *e; recno_t nrec; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* Get currently doesn't take any flags, and keys of 0 are illegal. */ if (flags || (nrec = *(recno_t *)key->data) == 0) { errno = EINVAL; return (RET_ERROR); } /* * If we haven't seen this record yet, try to find it in the * original file. */ if (nrec > t->bt_nrecs) { if (F_ISSET(t, R_EOF | R_INMEM)) return (RET_SPECIAL); if ((status = t->bt_irec(t, nrec)) != RET_SUCCESS) return (status); } --nrec; if ((e = __rec_search(t, nrec, SEARCH)) == NULL) return (RET_ERROR); status = __rec_ret(t, e, 0, NULL, data); if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e->page, 0); else t->bt_pinned = e->page; return (status); } /* * __REC_FPIPE -- Get fixed length records from a pipe. * * Parameters: * t: tree * cnt: records to read * * Returns: * RET_ERROR, RET_SUCCESS */ int __rec_fpipe(t, top) BTREE *t; recno_t top; { DBT data; recno_t nrec; size_t len; int ch; u_char *p; if (t->bt_rdata.size < t->bt_reclen) { t->bt_rdata.data = t->bt_rdata.data == NULL ? malloc(t->bt_reclen) : realloc(t->bt_rdata.data, t->bt_reclen); if (t->bt_rdata.data == NULL) return (RET_ERROR); t->bt_rdata.size = t->bt_reclen; } data.data = t->bt_rdata.data; data.size = t->bt_reclen; for (nrec = t->bt_nrecs; nrec < top;) { len = t->bt_reclen; for (p = t->bt_rdata.data;; *p++ = ch) if ((ch = getc(t->bt_rfp)) == EOF || !--len) { if (ch != EOF) *p = ch; if (len != 0) memset(p, t->bt_bval, len); if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) return (RET_ERROR); ++nrec; break; } if (ch == EOF) break; } if (nrec < top) { F_SET(t, R_EOF); return (RET_SPECIAL); } return (RET_SUCCESS); } /* * __REC_VPIPE -- Get variable length records from a pipe. * * Parameters: * t: tree * cnt: records to read * * Returns: * RET_ERROR, RET_SUCCESS */ int __rec_vpipe(t, top) BTREE *t; recno_t top; { DBT data; recno_t nrec; indx_t len; size_t sz; int bval, ch; u_char *p; bval = t->bt_bval; for (nrec = t->bt_nrecs; nrec < top; ++nrec) { for (p = t->bt_rdata.data, sz = t->bt_rdata.size;; *p++ = ch, --sz) { if ((ch = getc(t->bt_rfp)) == EOF || ch == bval) { data.data = t->bt_rdata.data; data.size = p - (u_char *)t->bt_rdata.data; if (ch == EOF && data.size == 0) break; if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) return (RET_ERROR); break; } if (sz == 0) { len = p - (u_char *)t->bt_rdata.data; t->bt_rdata.size += (sz = 256); t->bt_rdata.data = t->bt_rdata.data == NULL ? malloc(t->bt_rdata.size) : realloc(t->bt_rdata.data, t->bt_rdata.size); if (t->bt_rdata.data == NULL) return (RET_ERROR); p = (u_char *)t->bt_rdata.data + len; } } if (ch == EOF) break; } if (nrec < top) { F_SET(t, R_EOF); return (RET_SPECIAL); } return (RET_SUCCESS); } /* * __REC_FMAP -- Get fixed length records from a file. * * Parameters: * t: tree * cnt: records to read * * Returns: * RET_ERROR, RET_SUCCESS */ int __rec_fmap(t, top) BTREE *t; recno_t top; { DBT data; recno_t nrec; u_char *sp, *ep, *p; size_t len; if (t->bt_rdata.size < t->bt_reclen) { t->bt_rdata.data = t->bt_rdata.data == NULL ? malloc(t->bt_reclen) : realloc(t->bt_rdata.data, t->bt_reclen); if (t->bt_rdata.data == NULL) return (RET_ERROR); t->bt_rdata.size = t->bt_reclen; } data.data = t->bt_rdata.data; data.size = t->bt_reclen; sp = (u_char *)t->bt_cmap; ep = (u_char *)t->bt_emap; for (nrec = t->bt_nrecs; nrec < top; ++nrec) { if (sp >= ep) { F_SET(t, R_EOF); return (RET_SPECIAL); } len = t->bt_reclen; for (p = t->bt_rdata.data; sp < ep && len > 0; *p++ = *sp++, --len); if (len != 0) memset(p, t->bt_bval, len); if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) return (RET_ERROR); } t->bt_cmap = (caddr_t)sp; return (RET_SUCCESS); } /* * __REC_VMAP -- Get variable length records from a file. * * Parameters: * t: tree * cnt: records to read * * Returns: * RET_ERROR, RET_SUCCESS */ int __rec_vmap(t, top) BTREE *t; recno_t top; { DBT data; u_char *sp, *ep; recno_t nrec; int bval; sp = (u_char *)t->bt_cmap; ep = (u_char *)t->bt_emap; bval = t->bt_bval; for (nrec = t->bt_nrecs; nrec < top; ++nrec) { if (sp >= ep) { F_SET(t, R_EOF); return (RET_SPECIAL); } for (data.data = sp; sp < ep && *sp != bval; ++sp); data.size = sp - (u_char *)data.data; if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) return (RET_ERROR); ++sp; } t->bt_cmap = (caddr_t)sp; return (RET_SUCCESS); } db/recno/rec_open.c0100644000076400007640000001520206072171300014756 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Olson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rec_open.c 8.10 (Berkeley) 9/1/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include #include #include #include #include "recno.h" DB * __rec_open(fname, flags, mode, openinfo, dflags) const char *fname; int flags, mode, dflags; const RECNOINFO *openinfo; { BTREE *t; BTREEINFO btopeninfo; DB *dbp; PAGE *h; struct stat sb; int rfd, sverrno; /* Open the user's file -- if this fails, we're done. */ if (fname != NULL && (rfd = open(fname, flags, mode)) < 0) return (NULL); /* Create a btree in memory (backed by disk). */ dbp = NULL; if (openinfo) { if (openinfo->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT)) goto einval; btopeninfo.flags = 0; btopeninfo.cachesize = openinfo->cachesize; btopeninfo.maxkeypage = 0; btopeninfo.minkeypage = 0; btopeninfo.psize = openinfo->psize; btopeninfo.compare = NULL; btopeninfo.prefix = NULL; btopeninfo.lorder = openinfo->lorder; dbp = __bt_open(openinfo->bfname, O_RDWR, S_IRUSR | S_IWUSR, &btopeninfo, dflags); } else dbp = __bt_open(NULL, O_RDWR, S_IRUSR | S_IWUSR, NULL, dflags); if (dbp == NULL) goto err; /* * Some fields in the tree structure are recno specific. Fill them * in and make the btree structure look like a recno structure. We * don't change the bt_ovflsize value, it's close enough and slightly * bigger. */ t = dbp->internal; if (openinfo) { if (openinfo->flags & R_FIXEDLEN) { F_SET(t, R_FIXLEN); t->bt_reclen = openinfo->reclen; if (t->bt_reclen == 0) goto einval; } t->bt_bval = openinfo->bval; } else t->bt_bval = '\n'; F_SET(t, R_RECNO); if (fname == NULL) F_SET(t, R_EOF | R_INMEM); else t->bt_rfd = rfd; if (fname != NULL) { /* * In 4.4BSD, stat(2) returns true for ISSOCK on pipes. * Unfortunately, that's not portable, so we use lseek * and check the errno values. */ errno = 0; if (lseek(rfd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE) { switch (flags & O_ACCMODE) { case O_RDONLY: F_SET(t, R_RDONLY); break; default: goto einval; } slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL) goto err; F_SET(t, R_CLOSEFP); t->bt_irec = F_ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe; } else { switch (flags & O_ACCMODE) { case O_RDONLY: F_SET(t, R_RDONLY); break; case O_RDWR: break; default: goto einval; } if (fstat(rfd, &sb)) goto err; /* * Kluge -- we'd like to test to see if the file is too * big to mmap. Since, we don't know what size or type * off_t's or size_t's are, what the largest unsigned * integral type is, or what random insanity the local * C compiler will perpetrate, doing the comparison in * a portable way is flatly impossible. Hope that mmap * fails if the file is too large. */ if (sb.st_size == 0) F_SET(t, R_EOF); else { #ifdef MMAP_NOT_AVAILABLE /* * XXX * Mmap doesn't work correctly on many current * systems. In particular, it can fail subtly, * with cache coherency problems. Don't use it * for now. */ t->bt_msize = sb.st_size; if ((t->bt_smap = mmap(NULL, t->bt_msize, PROT_READ, MAP_PRIVATE, rfd, (off_t)0)) == (caddr_t)-1) goto slow; t->bt_cmap = t->bt_smap; t->bt_emap = t->bt_smap + sb.st_size; t->bt_irec = F_ISSET(t, R_FIXLEN) ? __rec_fmap : __rec_vmap; F_SET(t, R_MEMMAPPED); #else goto slow; #endif } } } /* Use the recno routines. */ dbp->close = __rec_close; dbp->del = __rec_delete; dbp->fd = __rec_fd; dbp->get = __rec_get; dbp->put = __rec_put; dbp->seq = __rec_seq; dbp->sync = __rec_sync; /* If the root page was created, reset the flags. */ if ((h = mpool_get(t->bt_mp, P_ROOT, 0)) == NULL) goto err; if ((h->flags & P_TYPE) == P_BLEAF) { F_CLR(h, P_TYPE); F_SET(h, P_RLEAF); mpool_put(t->bt_mp, h, MPOOL_DIRTY); } else mpool_put(t->bt_mp, h, 0); if (openinfo && openinfo->flags & R_SNAPSHOT && !F_ISSET(t, R_EOF | R_INMEM) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) goto err; return (dbp); einval: errno = EINVAL; err: sverrno = errno; if (dbp != NULL) (void)__bt_close(dbp); if (fname != NULL) (void)close(rfd); errno = sverrno; return (NULL); } int __rec_fd(dbp) const DB *dbp; { BTREE *t; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* In-memory database can't have a file descriptor. */ if (F_ISSET(t, R_INMEM)) { errno = ENOENT; return (-1); } return (t->bt_rfd); } db/recno/rec_put.c0100644000076400007640000001642406242764377014661 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rec_put.c 8.7 (Berkeley) 8/18/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include "recno.h" /* * __REC_PUT -- Add a recno item to the tree. * * Parameters: * dbp: pointer to access method * key: key * data: data * flag: R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is * already in the tree and R_NOOVERWRITE specified. */ int __rec_put(dbp, key, data, flags) const DB *dbp; DBT *key; const DBT *data; u_int flags; { BTREE *t; DBT fdata, tdata; recno_t nrec; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* * If using fixed-length records, and the record is long, return * EINVAL. If it's short, pad it out. Use the record data return * memory, it's only short-term. */ if (F_ISSET(t, R_FIXLEN) && data->size != t->bt_reclen) { if (data->size > t->bt_reclen) goto einval; if (t->bt_rdata.size < t->bt_reclen) { t->bt_rdata.data = t->bt_rdata.data == NULL ? malloc(t->bt_reclen) : realloc(t->bt_rdata.data, t->bt_reclen); if (t->bt_rdata.data == NULL) return (RET_ERROR); t->bt_rdata.size = t->bt_reclen; } memmove(t->bt_rdata.data, data->data, data->size); memset((char *)t->bt_rdata.data + data->size, t->bt_bval, t->bt_reclen - data->size); fdata.data = t->bt_rdata.data; fdata.size = t->bt_reclen; } else { fdata.data = data->data; fdata.size = data->size; } switch (flags) { case R_CURSOR: if (!F_ISSET(&t->bt_cursor, CURS_INIT)) goto einval; nrec = t->bt_cursor.rcursor; break; case R_SETCURSOR: if ((nrec = *(recno_t *)key->data) == 0) goto einval; break; case R_IAFTER: if ((nrec = *(recno_t *)key->data) == 0) { nrec = 1; flags = R_IBEFORE; } break; case 0: case R_IBEFORE: if ((nrec = *(recno_t *)key->data) == 0) goto einval; break; case R_NOOVERWRITE: if ((nrec = *(recno_t *)key->data) == 0) goto einval; if (nrec <= t->bt_nrecs) return (RET_SPECIAL); break; default: einval: errno = EINVAL; return (RET_ERROR); } /* * Make sure that records up to and including the put record are * already in the database. If skipping records, create empty ones. */ if (nrec > t->bt_nrecs) { if (!F_ISSET(t, R_EOF | R_INMEM) && t->bt_irec(t, nrec) == RET_ERROR) return (RET_ERROR); if (nrec > t->bt_nrecs + 1) { if (F_ISSET(t, R_FIXLEN)) { if ((tdata.data = (void *)malloc(t->bt_reclen)) == NULL) return (RET_ERROR); tdata.size = t->bt_reclen; memset(tdata.data, t->bt_bval, tdata.size); } else { tdata.data = NULL; tdata.size = 0; } while (nrec > t->bt_nrecs + 1) if (__rec_iput(t, t->bt_nrecs, &tdata, 0) != RET_SUCCESS) return (RET_ERROR); if (F_ISSET(t, R_FIXLEN)) free(tdata.data); } } if ((status = __rec_iput(t, nrec - 1, &fdata, flags)) != RET_SUCCESS) return (status); if (flags == R_SETCURSOR) t->bt_cursor.rcursor = nrec; F_SET(t, R_MODIFIED); return (__rec_ret(t, NULL, nrec, key, NULL)); } /* * __REC_IPUT -- Add a recno item to the tree. * * Parameters: * t: tree * nrec: record number * data: data * * Returns: * RET_ERROR, RET_SUCCESS */ int __rec_iput(t, nrec, data, flags) BTREE *t; recno_t nrec; const DBT *data; u_int flags; { DBT tdata; EPG *e; PAGE *h; indx_t index, nxtindex; pgno_t pg; u_int32_t nbytes; int dflags, status; char *dest, db[NOVFLSIZE]; /* * If the data won't fit on a page, store it on indirect pages. * * XXX * If the insert fails later on, these pages aren't recovered. */ if (data->size > t->bt_ovflsize) { if (__ovfl_put(t, data, &pg) == RET_ERROR) return (RET_ERROR); tdata.data = db; tdata.size = NOVFLSIZE; *(pgno_t *)db = pg; *(u_int32_t *)(db + sizeof(pgno_t)) = data->size; dflags = P_BIGDATA; data = &tdata; } else dflags = 0; /* __rec_search pins the returned page. */ if ((e = __rec_search(t, nrec, nrec > t->bt_nrecs || flags == R_IAFTER || flags == R_IBEFORE ? SINSERT : SEARCH)) == NULL) return (RET_ERROR); h = e->page; index = e->index; /* * Add the specified key/data pair to the tree. The R_IAFTER and * R_IBEFORE flags insert the key after/before the specified key. * * Pages are split as required. */ switch (flags) { case R_IAFTER: ++index; break; case R_IBEFORE: break; default: if (nrec < t->bt_nrecs && __rec_dleaf(t, h, index) == RET_ERROR) { mpool_put(t->bt_mp, h, 0); return (RET_ERROR); } break; } /* * If not enough room, split the page. The split code will insert * the key and data and unpin the current page. If inserting into * the offset array, shift the pointers up. */ nbytes = NRLEAFDBT(data->size); if ((u_int32_t) (h->upper - h->lower) < nbytes + sizeof(indx_t)) { status = __bt_split(t, h, NULL, data, dflags, nbytes, index); if (status == RET_SUCCESS) ++t->bt_nrecs; return (status); } if (index < (nxtindex = NEXTINDEX(h))) memmove(h->linp + index + 1, h->linp + index, (nxtindex - index) * sizeof(indx_t)); h->lower += sizeof(indx_t); h->linp[index] = h->upper -= nbytes; dest = (char *)h + h->upper; WR_RLEAF(dest, data, dflags); ++t->bt_nrecs; F_SET(t, B_MODIFIED); mpool_put(t->bt_mp, h, MPOOL_DIRTY); return (RET_SUCCESS); } db/recno/rec_search.c0100644000076400007640000000751706072171305015301 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rec_search.c 8.4 (Berkeley) 7/14/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include "recno.h" /* * __REC_SEARCH -- Search a btree for a key. * * Parameters: * t: tree to search * recno: key to find * op: search operation * * Returns: * EPG for matching record, if any, or the EPG for the location of the * key, if it were inserted into the tree. * * Returns: * The EPG for matching record, if any, or the EPG for the location * of the key, if it were inserted into the tree, is entered into * the bt_cur field of the tree. A pointer to the field is returned. */ EPG * __rec_search(t, recno, op) BTREE *t; recno_t recno; enum SRCHOP op; { register indx_t index; register PAGE *h; EPGNO *parent; RINTERNAL *r; pgno_t pg; indx_t top; recno_t total; int sverrno; BT_CLR(t); for (pg = P_ROOT, total = 0;;) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) goto err; if (h->flags & P_RLEAF) { t->bt_cur.page = h; t->bt_cur.index = recno - total; return (&t->bt_cur); } for (index = 0, top = NEXTINDEX(h);;) { r = GETRINTERNAL(h, index); if (++index == top || total + r->nrecs > recno) break; total += r->nrecs; } BT_PUSH(t, pg, index - 1); pg = r->pgno; switch (op) { case SDELETE: --GETRINTERNAL(h, (index - 1))->nrecs; mpool_put(t->bt_mp, h, MPOOL_DIRTY); break; case SINSERT: ++GETRINTERNAL(h, (index - 1))->nrecs; mpool_put(t->bt_mp, h, MPOOL_DIRTY); break; case SEARCH: mpool_put(t->bt_mp, h, 0); break; } } /* Try and recover the tree. */ err: sverrno = errno; if (op != SEARCH) while ((parent = BT_POP(t)) != NULL) { if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) break; if (op == SINSERT) --GETRINTERNAL(h, parent->index)->nrecs; else ++GETRINTERNAL(h, parent->index)->nrecs; mpool_put(t->bt_mp, h, MPOOL_DIRTY); } errno = sverrno; return (NULL); } db/recno/rec_seq.c0100644000076400007640000000746406150746672014641 0ustar cjwatsoncjwatson/*- * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rec_seq.c 8.3 (Berkeley) 7/14/94"; #endif /* not lint */ #include #include #include #include #include #include #include "recno.h" /* * __REC_SEQ -- Recno sequential scan interface. * * Parameters: * dbp: pointer to access method * key: key for positioning and return value * data: data return value * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. * * Returns: * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. */ int __rec_seq(dbp, key, data, flags) const DB *dbp; DBT *key, *data; u_int flags; { BTREE *t; EPG *e; recno_t nrec; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } switch(flags) { case R_CURSOR: if ((nrec = *(recno_t *)key->data) == 0) goto einval; break; case R_NEXT: if (F_ISSET(&t->bt_cursor, CURS_INIT)) { nrec = t->bt_cursor.rcursor + 1; break; } /* FALLTHROUGH */ case R_FIRST: nrec = 1; break; case R_PREV: if (F_ISSET(&t->bt_cursor, CURS_INIT)) { if ((nrec = t->bt_cursor.rcursor - 1) == 0) return (RET_SPECIAL); break; } /* FALLTHROUGH */ case R_LAST: if (!F_ISSET(t, R_EOF | R_INMEM) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) return (RET_ERROR); nrec = t->bt_nrecs; break; default: einval: errno = EINVAL; return (RET_ERROR); } if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) { if (!F_ISSET(t, R_EOF | R_INMEM) && (status = t->bt_irec(t, nrec)) != RET_SUCCESS) return (status); if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) return (RET_SPECIAL); } if ((e = __rec_search(t, nrec - 1, SEARCH)) == NULL) return (RET_ERROR); F_SET(&t->bt_cursor, CURS_INIT); t->bt_cursor.rcursor = nrec; status = __rec_ret(t, e, nrec, key, data); if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e->page, 0); else t->bt_pinned = e->page; return (status); } db/recno/rec_utils.c0100644000076400007640000000753306256366211015200 0ustar cjwatsoncjwatson/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rec_utils.c 8.6 (Berkeley) 7/16/94"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include "recno.h" /* * __rec_ret -- * Build return data. * * Parameters: * t: tree * e: key/data pair to be returned * nrec: record number * key: user's key structure * data: user's data structure * * Returns: * RET_SUCCESS, RET_ERROR. */ int __rec_ret(t, e, nrec, key, data) BTREE *t; EPG *e; recno_t nrec; DBT *key, *data; { RLEAF *rl; void *p; if (key == NULL) goto dataonly; /* We have to copy the key, it's not on the page. */ if (sizeof(recno_t) > t->bt_rkey.size) { p = (void *)(t->bt_rkey.data == NULL ? malloc(sizeof(recno_t)) : realloc(t->bt_rkey.data, sizeof(recno_t))); if (p == NULL) return (RET_ERROR); t->bt_rkey.data = p; t->bt_rkey.size = sizeof(recno_t); } memmove(t->bt_rkey.data, &nrec, sizeof(recno_t)); key->size = sizeof(recno_t); key->data = t->bt_rkey.data; dataonly: if (data == NULL) return (RET_SUCCESS); /* * We must copy big keys/data to make them contiguous. Otherwise, * leave the page pinned and don't copy unless the user specified * concurrent access. */ rl = GETRLEAF(e->page, e->index); if (rl->flags & P_BIGDATA) { if (__ovfl_get(t, rl->bytes, &data->size, &t->bt_rdata.data, &t->bt_rdata.size)) return (RET_ERROR); data->data = t->bt_rdata.data; } else if (F_ISSET(t, B_DB_LOCK)) { /* Use +1 in case the first record retrieved is 0 length. */ if (rl->dsize + 1 > t->bt_rdata.size) { p = (void *)(t->bt_rdata.data == NULL ? malloc(rl->dsize + 1) : realloc(t->bt_rdata.data, rl->dsize + 1)); if (p == NULL) return (RET_ERROR); t->bt_rdata.data = p; t->bt_rdata.size = rl->dsize + 1; } memmove(t->bt_rdata.data, rl->bytes, rl->dsize); data->size = rl->dsize; data->data = t->bt_rdata.data; } else { data->size = rl->dsize; data->data = rl->bytes; } return (RET_SUCCESS); } db/recno/extern.h0100644000076400007640000000522206072171272014507 0ustar cjwatsoncjwatson/*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)extern.h 8.3 (Berkeley) 6/4/94 */ #include "../btree/extern.h" int __rec_close __P((DB *)); int __rec_delete __P((const DB *, const DBT *, u_int)); int __rec_dleaf __P((BTREE *, PAGE *, u_int32_t)); int __rec_fd __P((const DB *)); int __rec_fmap __P((BTREE *, recno_t)); int __rec_fout __P((BTREE *)); int __rec_fpipe __P((BTREE *, recno_t)); int __rec_get __P((const DB *, const DBT *, DBT *, u_int)); int __rec_iput __P((BTREE *, recno_t, const DBT *, u_int)); int __rec_put __P((const DB *dbp, DBT *, const DBT *, u_int)); int __rec_ret __P((BTREE *, EPG *, recno_t, DBT *, DBT *)); EPG *__rec_search __P((BTREE *, recno_t, enum SRCHOP)); int __rec_seq __P((const DB *, DBT *, DBT *, u_int)); int __rec_sync __P((const DB *, u_int)); int __rec_vmap __P((BTREE *, recno_t)); int __rec_vout __P((BTREE *)); int __rec_vpipe __P((BTREE *, recno_t)); db/recno/recno.h0100644000076400007640000000370706072171313014312 0ustar cjwatsoncjwatson/*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)recno.h 8.1 (Berkeley) 6/4/93 */ enum SRCHOP { SDELETE, SINSERT, SEARCH}; /* Rec_search operation. */ #include "../btree/btree.h" #include "extern.h"