CDB_File-0.97/0000755000076500000240000000000011546652653012014 5ustar toddrstaffCDB_File-0.97/ACKNOWLEDGE0000644000076500000240000000173411545432375013463 0ustar toddrstaffThe help of these people is gratefully acknowledged. AK Andreas Koenig BD Bert Driehuis CMC Chris Chalfant DB Dan Bernstein FvL Felix von Leitner FL Frederik Lindberg GT Gene Titus IP Ian Phillipps IW Ira Woodhead JB Jos Backus JH John Horne JPB Joao Bordalo MdlR Michael de la Rue MJP M J Pomraning MP Mark Powell NMS Nickolay Saukh RDM Raul Miller RDW Rich Williams SB Stephen Beckstrom-Sternberg Tim Goodwin 2001-12-18 CDB_File-0.97/CDB_File.pm0000644000076500000240000002414311546652537013706 0ustar toddrstaffpackage CDB_File; use strict; use Carp; use vars qw($VERSION @ISA @EXPORT_OK); use DynaLoader (); use Exporter (); @ISA = qw(Exporter DynaLoader); @EXPORT_OK = qw(create); $VERSION = '0.97'; =head1 NAME CDB_File - Perl extension for access to cdb databases =head1 SYNOPSIS use CDB_File; $c = tie %h, 'CDB_File', 'file.cdb' or die "tie failed: $!\n"; $fh = $c->handle; sysseek $fh, $c->datapos, 0 or die ...; sysread $fh, $x, $c->datalen; undef $c; untie %h; $t = new CDB_File ('t.cdb', "t.$$") or die ...; $t->insert('key', 'value'); $t->finish; CDB_File::create %t, $file, "$file.$$"; or use CDB_File 'create'; create %t, $file, "$file.$$"; =head1 DESCRIPTION B is a module which provides a Perl interface to Dan Berstein's B package: cdb is a fast, reliable, lightweight package for creating and reading constant databases. =head2 Reading from a cdb After the C shown above, accesses to C<%h> will refer to the B file C, as described in L. Low level access to the database is provided by the three methods C, C, and C. To use them, you must remember the C object returned by the C call: C<$c> in the example above. The C and C methods return the file offset position and length respectively of the most recently visited key (for example, via C). Beware that if you create an extra reference to the C object (like C<$c> in the example above) you must destroy it (with C) before calling C on the hash. This ensures that the object's C method is called. Note that C will check this for you; see L for further details. =head2 Creating a cdb A B file is created in three steps. First call C, where C<$final> is the name of the database to be created, and C<$tmp> is the name of a temporary file which can be atomically renamed to C<$final>. Secondly, call the C method once for each (I, I) pair. Finally, call the C method to complete the creation and renaming of the B file. Alternatively, call the C method with multiple key/value pairs. This can be significantly faster because there is less crossing over the bridge from perl to C code. One simple way to do this is to pass in an entire hash, as in: C<< $cdbmaker->insert(%hash); >>. A simpler interface to B file creation is provided by C. This creates a B file named C<$final> containing the contents of C<%t>. As before, C<$tmp> must name a temporary file which can be atomically renamed to C<$final>. C may be imported. =head1 EXAMPLES These are all complete programs. 1. Convert a Berkeley DB (B-tree) database to B format. use CDB_File; use DB_File; tie %h, DB_File, $ARGV[0], O_RDONLY, undef, $DB_BTREE or die "$0: can't tie to $ARGV[0]: $!\n"; CDB_File::create %h, $ARGV[1], "$ARGV[1].$$" or die "$0: can't create cdb: $!\n"; 2. Convert a flat file to B format. In this example, the flat file consists of one key per line, separated by a colon from the value. Blank lines and lines beginning with B<#> are skipped. use CDB_File; $cdb = new CDB_File("data.cdb", "data.$$") or die "$0: new CDB_File failed: $!\n"; while (<>) { next if /^$/ or /^#/; chop; ($k, $v) = split /:/, $_, 2; if (defined $v) { $cdb->insert($k, $v); } else { warn "bogus line: $_\n"; } } $cdb->finish or die "$0: CDB_File finish failed: $!\n"; 3. Perl version of B. use CDB_File; tie %data, 'CDB_File', $ARGV[0] or die "$0: can't tie to $ARGV[0]: $!\n"; while (($k, $v) = each %data) { print '+', length $k, ',', length $v, ":$k->$v\n"; } print "\n"; 4. For really enormous data values, you can use C, C, and C, in combination with C and C, to avoid reading the values into memory. Here is the script F, which can extract uncompressed files and directories from a B file. use CDB_File; sub unnetstrings { my($netstrings) = @_; my @result; while ($netstrings =~ s/^([0-9]+)://) { push @result, substr($netstrings, 0, $1, ''); $netstrings =~ s/^,//; } return @result; } my $chunk = 8192; sub extract { my($file, $t, $b) = @_; my $head = $$b{"H$file"}; my ($code, $type) = $head =~ m/^([0-9]+)(.)/; if ($type eq "/") { mkdir $file, 0777; } elsif ($type eq "_") { my ($total, $now, $got, $x); open OUT, ">$file" or die "open for output: $!\n"; exists $$b{"D$code"} or die "corrupt bun file\n"; my $fh = $t->handle; sysseek $fh, $t->datapos, 0; $total = $t->datalen; while ($total) { $now = ($total > $chunk) ? $chunk : $total; $got = sysread $fh, $x, $now; if (not $got) { die "read error\n"; } $total -= $got; print OUT $x; } close OUT; } else { print STDERR "warning: skipping unknown file type\n"; } } die "usage\n" if @ARGV != 1; my (%b, $t); $t = tie %b, 'CDB_File', $ARGV[0] or die "tie: $!\n"; map { extract $_, $t, \%b } unnetstrings $b{""}; 5. Although a B file is constant, you can simulate updating it in Perl. This is an expensive operation, as you have to create a new database, and copy into it everything that's unchanged from the old database. (As compensation, the update does not affect database readers. The old database is available for them, till the moment the new one is Ced.) use CDB_File; $file = 'data.cdb'; $new = new CDB_File($file, "$file.$$") or die "$0: new CDB_File failed: $!\n"; # Add the new values; remember which keys we've seen. while (<>) { chop; ($k, $v) = split; $new->insert($k, $v); $seen{$k} = 1; } # Add any old values that haven't been replaced. tie %old, 'CDB_File', $file or die "$0: can't tie to $file: $!\n"; while (($k, $v) = each %old) { $new->insert($k, $v) unless $seen{$k}; } $new->finish or die "$0: CDB_File finish failed: $!\n"; =head1 REPEATED KEYS Most users can ignore this section. A B file can contain repeated keys. If the C method is called more than once with the same key during the creation of a B file, that key will be repeated. Here's an example. $cdb = new CDB_File ("$file.cdb", "$file.$$") or die ...; $cdb->insert('cat', 'gato'); $cdb->insert('cat', 'chat'); $cdb->finish; Normally, any attempt to access a key retrieves the first value stored under that key. This code snippet always prints B. $catref = tie %catalogue, CDB_File, "$file.cdb" or die ...; print "$catalogue{cat}"; However, all the usual ways of iterating over a hash---C, C, and C---do the Right Thing, even in the presence of repeated keys. This code snippet prints B. print join(' ', keys %catalogue, values %catalogue); And these two both print B, although the second is more efficient. foreach $key (keys %catalogue) { print "$key:$catalogue{$key} "; } while (($key, $val) = each %catalogue) { print "$key:$val "; } The C method retrieves all the values associated with a key. It returns a reference to an array containing all the values. This code prints B. print "@{$catref->multi_get('cat')}"; C always returns an array reference. If the key was not found in the database, it will be a reference to an empty array. To test whether the key was found, you must test the array, and not the reference. $x = $catref->multiget($key); warn "$key not found\n" unless $x; # WRONG; message never printed warn "$key not found\n" unless @$x; # Correct =head1 RETURN VALUES The routines C, C, and C return B if the attempted operation failed; C<$!> contains the reason for failure. =head1 DIAGNOSTICS The following fatal errors may occur. (See L if you want to trap them.) =over 4 =item Modification of a CDB_File attempted You attempted to modify a hash tied to a B. =item CDB database too large You attempted to create a B file larger than 4 gigabytes. =item [ Write to | Read of | Seek in ] CDB_File failed: If B is B, you tried to C to access something that isn't a B file. Otherwise a serious OS level problem occurred, for example, you have run out of disk space. =back =head1 PERFORMANCE Sometimes you need to get the most performance possible out of a library. Rumour has it that perl's tie() interface is slow. In order to get around that you can use CDB_File in an object oriented fashion, rather than via tie(). my $cdb = CDB_File->TIEHASH('/path/to/cdbfile.cdb'); if ($cdb->EXISTS('key')) { print "Key is: ", $cdb->FETCH('key'), "\n"; } For more information on the methods available on tied hashes see L. =head1 BUGS The C interface could be done with C. =head1 SEE ALSO cdb(3). =head1 AUTHOR Tim Goodwin, . B began on 1997-01-08. Now maintained by Matt Sergeant, =cut bootstrap CDB_File $VERSION; sub CLEAR { croak "Modification of a CDB_File attempted" } sub DELETE { &CLEAR } sub STORE { &CLEAR } # Must be preloaded for the prototype. sub create(\%$$) { my($RHdata, $fn, $fntemp) = @_; my $cdb = new CDB_File($fn, $fntemp) or return undef; my($k, $v); $cdb->insert(%$RHdata); $cdb->finish; return 1; } 1; CDB_File-0.97/CDB_File.xs0000644000076500000240000004043111545432375013715 0ustar toddrstaff/* Most of this is reasonably straightforward. The complications arise when we are "iterating" over the CDB file, that is to say, using `keys' or `values' or `each' to retrieve all the data in the file in order. This interface stores extra data to allow us to track iterations: end is a pointer to the end of data in the CDB file, and also a flag which indicates whether we are iterating or not (note that the end of data occurs at a position >= 2048); curkey is a copy of the current key; curpos is the file offset of curkey; and fetch_advance is 0 for FIRSTKEY, fetch, NEXTKEY, fetch, NEXTKEY, fetch, ... but 1 for FIRSTKEY, NEXTKEY, NEXTKEY, ..., fetch, fetch, fetch, ... Don't tell the OO Police, but there are actually two different objects called CDB_File. One is created by TIEHASH, and accessed by the usual tied hash methods (FETCH, FIRSTKEY, etc.). The other is created by new, and accessed by insert and finish. In both cases, the object is a blessed reference to a scalar. The scalar contains either a struct cdbobj or a struct cdbmakeobj. It gets a little messy in DESTROY: since this method will automatically be called for both sorts of object, it distinguishes them by their different sizes. */ #ifdef __cplusplus extern "C" { #endif #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include #include #include #include #include #include #ifdef WIN32 #define fsync _commit #endif #ifdef HASMMAP #include #endif /* We need to whistle up an error number for a file that is not a CDB file. The BSDish EFTYPE probably gives the most useful error message; failing that we'll settle for the Single Unix Specification v2 EPROTO; and finally the rather inappropriate, but universally(?) implemented, EINVAL. */ #ifdef EFTYPE #else #ifdef EPROTO #define EFTYPE EPROTO #else #define EFTYPE EINVAL #endif #endif #ifdef __cplusplus } #endif struct t_cdb { PerlIO *fh; /* */ #ifdef HASMMAP char *map; #endif U32 end; /* If non zero, the file offset of the first byte of hash tables. */ SV *curkey; /* While iterating: a copy of the current key; */ U32 curpos; /* the file offset of the current record. */ int fetch_advance; /* the kludge */ U32 size; /* initialized if map is nonzero */ U32 loop; /* number of hash slots searched under this key */ U32 khash; /* initialized if loop is nonzero */ U32 kpos; /* initialized if loop is nonzero */ U32 hpos; /* initialized if loop is nonzero */ U32 hslots; /* initialized if loop is nonzero */ U32 dpos; /* initialized if cdb_findnext() returns 1 */ U32 dlen; /* initialized if cdb_findnext() returns 1 */ } ; typedef struct t_cdb cdb; #define CDB_HPLIST 1000 struct cdb_hp { U32 h; U32 p; } ; struct cdb_hplist { struct cdb_hp hp[CDB_HPLIST]; struct cdb_hplist *next; int num; } ; struct t_cdb_make { PerlIO *f; /* Handle of file being created. */ char *fn; /* Final name of file. */ char *fntemp; /* Temporary name of file. */ char final[2048]; char bspace[1024]; U32 count[256]; U32 start[256]; struct cdb_hplist *head; struct cdb_hp *split; /* includes space for hash */ struct cdb_hp *hash; U32 numentries; U32 pos; int fd; } ; typedef struct t_cdb_make cdb_make; static void writeerror() { croak("Write to CDB_File failed: %s", Strerror(errno)); } static void readerror() { croak("Read of CDB_File failed: %s", Strerror(errno)); } static void seekerror() { croak("Seek in CDB_File failed: %s", Strerror(errno)); } static void nomem() { croak("Out of memory!"); } static int cdb_make_start(cdb_make *c) { c->head = 0; c->split = 0; c->hash = 0; c->numentries = 0; c->pos = sizeof c->final; return PerlIO_seek(c->f, c->pos, SEEK_SET); } static int posplus(cdb_make *c, U32 len) { U32 newpos = c->pos + len; if (newpos < len) { errno = ENOMEM; return -1; } c->pos = newpos; return 0; } static int cdb_make_addend(cdb_make *c, unsigned int keylen, unsigned int datalen, U32 h) { struct cdb_hplist *head; head = c->head; if (!head || (head->num >= CDB_HPLIST)) { New(0xCDB, head, 1, struct cdb_hplist); head->num = 0; head->next = c->head; c->head = head; } head->hp[head->num].h = h; head->hp[head->num].p = c->pos; ++head->num; ++c->numentries; if (posplus(c, 8) == -1) return -1; if (posplus(c, keylen) == -1) return -1; if (posplus(c, datalen) == -1) return -1; return 0; } #define CDB_HASHSTART 5381 static U32 cdb_hashadd(U32 h, unsigned char c) { h += (h << 5); return h ^ c; } static U32 cdb_hash(char *buf, unsigned int len) { U32 h; h = CDB_HASHSTART; while (len) { h = cdb_hashadd(h,*buf++); --len; } return h; } static void uint32_pack(char s[4], U32 u) { s[0] = u & 255; u >>= 8; s[1] = u & 255; u >>= 8; s[2] = u & 255; s[3] = u >> 8; } static void uint32_unpack(char s[4], U32 *u) { U32 result; result = (unsigned char) s[3]; result <<= 8; result += (unsigned char) s[2]; result <<= 8; result += (unsigned char) s[1]; result <<= 8; result += (unsigned char) s[0]; *u = result; } static void cdb_findstart(cdb *c) { c->loop = 0; } static int cdb_read(cdb *c, char *buf, unsigned int len, U32 pos) { #ifdef HASMMAP if (c->map) { if ((pos > c->size) || (c->size - pos < len)) { errno = EFTYPE; return -1; } memcpy(buf, c->map + pos, len); return 0; } #endif if (PerlIO_seek(c->fh, pos, SEEK_SET) == -1) return -1; while (len > 0) { int r; do r = PerlIO_read(c->fh, buf, len); while ((r == -1) && (errno == EINTR)); if (r == -1) return -1; if (r == 0) { errno = EFTYPE; return -1; } buf += r; len -= r; } return 0; } static int match(cdb *c,char *key,unsigned int len, U32 pos) { char buf[32]; int n; while (len > 0) { n = sizeof buf; if (n > len) n = len; if (cdb_read(c, buf, n, pos) == -1) return -1; if (memcmp(buf, key, n)) return 0; pos += n; key += n; len -= n; } return 1; } static int cdb_findnext(cdb *c,char *key,unsigned int len) { char buf[8]; U32 pos; U32 u; /* Matt: reset these so if a search fails they are zero'd */ c->dpos = 0; c->dlen = 0; if (!c->loop) { u = cdb_hash(key,len); if (cdb_read(c,buf,8,(u << 3) & 2047) == -1) return -1; uint32_unpack(buf + 4,&c->hslots); if (!c->hslots) return 0; uint32_unpack(buf,&c->hpos); c->khash = u; u >>= 8; u %= c->hslots; u <<= 3; c->kpos = c->hpos + u; } while (c->loop < c->hslots) { if (cdb_read(c,buf,8,c->kpos) == -1) return -1; uint32_unpack(buf + 4,&pos); if (!pos) return 0; c->loop += 1; c->kpos += 8; if (c->kpos == c->hpos + (c->hslots << 3)) c->kpos = c->hpos; uint32_unpack(buf,&u); if (u == c->khash) { if (cdb_read(c,buf,8,pos) == -1) return -1; uint32_unpack(buf,&u); if (u == len) switch(match(c,key,len,pos + 8)) { case -1: return -1; case 1: uint32_unpack(buf + 4,&c->dlen); c->dpos = pos + 8 + len; return 1; } } } return 0; } static int cdb_find(cdb *c, char *key, unsigned int len) { cdb_findstart(c); return cdb_findnext(c,key,len); } static void iter_start(cdb *c) { char buf[4]; c->curpos = 2048; if (cdb_read(c, buf, 4, 0) == -1) readerror(); uint32_unpack(buf, &c->end); c->curkey = NEWSV(0xcdb, 1); c->fetch_advance = 0; } static int iter_key(cdb *c) { char buf[8]; U32 klen; if (c->curpos < c->end) { if (cdb_read(c, buf, 8, c->curpos) == -1) readerror(); uint32_unpack(buf, &klen); (void)SvPOK_only(c->curkey); SvGROW(c->curkey, klen); SvCUR_set(c->curkey, klen); if (cdb_read(c, SvPVX(c->curkey), klen, c->curpos + 8) == -1) readerror(); return 1; } return 0; } static void iter_advance(cdb *c) { char buf[8]; U32 klen, dlen; if (cdb_read(c, buf, 8, c->curpos) == -1) readerror(); uint32_unpack(buf, &klen); uint32_unpack(buf + 4, &dlen); c->curpos += 8 + klen + dlen; } static void iter_end(cdb *c) { if (c->end != 0) { c->end = 0; SvREFCNT_dec(c->curkey); } } #define cdb_datapos(c) ((c)->dpos) #define cdb_datalen(c) ((c)->dlen) typedef PerlIO * InputStream; MODULE = CDB_File PACKAGE = CDB_File PREFIX = cdb_ PROTOTYPES: DISABLED # Some accessor methods. # WARNING: I don't really understand enough about Perl's guts (file # handles / globs, etc.) to write this code. I think this is right, and # it seems to work, but input from anybody with a deeper # understanding would be most welcome. # Additional: fixed by someone with a deeper understanding ;-) (Matt Sergeant) InputStream cdb_handle(this) cdb * this PREINIT: GV *gv; char *packname; CODE: /* here we dup the filehandle, because perl space will try and close it when it goes out of scope */ RETVAL = PerlIO_fdopen(PerlIO_fileno(this->fh), "r"); OUTPUT: RETVAL U32 cdb_datalen(db) cdb * db CODE: RETVAL = cdb_datalen(db); OUTPUT: RETVAL U32 cdb_datapos(db) cdb * db CODE: RETVAL = cdb_datapos(db); OUTPUT: RETVAL cdb * cdb_TIEHASH(CLASS, filename) char * CLASS char * filename PREINIT: PerlIO *f; IO *io; SV *cdbp; CODE: New(0, RETVAL, 1, cdb); RETVAL->fh = f = PerlIO_open(filename, "rb"); if (!f) XSRETURN_NO; RETVAL->end = 0; #ifdef HASMMAP { struct stat st; int fd = PerlIO_fileno(f); RETVAL->map = 0; if (fstat(fd, &st) == 0) { if (st.st_size <= 0xffffffff) { char *x; x = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0); if (x != (char *)-1) { RETVAL->size = st.st_size; RETVAL->map = x; } } } } #endif OUTPUT: RETVAL SV * cdb_FETCH(this, k) cdb * this SV * k PREINIT: PerlIO *f; char buf[8]; int found; off_t pos; STRLEN klen, x; U32 klen0; char *kp; CODE: if (!SvOK(k)) { XSRETURN_UNDEF; } kp = SvPV(k, klen); if (this->end && sv_eq(this->curkey, k)) { if (cdb_read(this, buf, 8, this->curpos) == -1) readerror(); uint32_unpack(buf + 4, &this->dlen); this->dpos = this->curpos + 8 + klen; if (this->fetch_advance) { iter_advance(this); if (!iter_key(this)) iter_end(this); } found = 1; } else { cdb_findstart(this); found = cdb_findnext(this, kp, klen); if ((found != 0) && (found != 1)) readerror(); } ST(0) = sv_newmortal(); if (found) { U32 dlen; SvUPGRADE(ST(0), SVt_PV); dlen = cdb_datalen(this); (void)SvPOK_only(ST(0)); SvGROW(ST(0), dlen + 1); SvCUR_set(ST(0), dlen); if (cdb_read(this, SvPVX(ST(0)), dlen, cdb_datapos(this)) == -1) readerror(); SvPV(ST(0), PL_na)[dlen] = '\0'; } AV * cdb_multi_get(this, k) cdb * this SV * k PREINIT: PerlIO *f; char buf[8]; int found; off_t pos; STRLEN klen; U32 dlen, klen0; char *kp; SV *x; CODE: if (!SvOK(k)) { XSRETURN_UNDEF; } cdb_findstart(this); RETVAL = newAV(); sv_2mortal((SV *)RETVAL); kp = SvPV(k, klen); for (;;) { found = cdb_findnext(this, kp, klen); if ((found != 0) && (found != 1)) readerror(); if (!found) break; x = newSVpvn("", 0); dlen = cdb_datalen(this); SvGROW(x, dlen + 1); SvCUR_set(x, dlen); if (cdb_read(this, SvPVX(x), dlen, cdb_datapos(this)) == -1) readerror(); SvPV(x, PL_na)[dlen] = '\0'; av_push(RETVAL, x); } OUTPUT: RETVAL int cdb_EXISTS(this, k) cdb * this SV * k PREINIT: STRLEN klen; char *kp; CODE: if (!SvOK(k)) { XSRETURN_NO; } kp = SvPV(k, klen); RETVAL = cdb_find(this, kp, klen); if (RETVAL != 0 && RETVAL != 1) readerror(); OUTPUT: RETVAL void cdb_DESTROY(db) SV * db PREINIT: cdb * this; IO *io; CODE: if (sv_isobject(db) && (SvTYPE(SvRV(db)) == SVt_PVMG) ) { this = (cdb*)SvIV(SvRV(db)); iter_end(this); #ifdef HASMMAP if (this->map) { munmap(this->map, this->size); this->map = 0; } #endif PerlIO_close(this->fh); /* close() on O_RDONLY cannot fail */ Safefree(this); } SV * cdb_FIRSTKEY(this) cdb * this PREINIT: char buf[8]; U32 klen; CODE: iter_start(this); if (iter_key(this)) ST(0) = sv_mortalcopy(this->curkey); else XSRETURN_UNDEF; /* empty database */ SV * cdb_NEXTKEY(this, k) cdb * this SV * k PREINIT: char buf[8], *kp; int found; off_t pos; U32 dlen, klen0; STRLEN klen1; CODE: if (!SvOK(k)) { XSRETURN_UNDEF; } /* Sometimes NEXTKEY gets called before FIRSTKEY if the hash * gets re-tied so we call iter_start() anyway here */ if (this->end == 0 || !sv_eq(this->curkey, k)) iter_start(this); iter_advance(this); if (iter_key(this)) ST(0) = sv_mortalcopy(this->curkey); else { iter_start(this); (void)iter_key(this); /* prepare curkey for FETCH */ this->fetch_advance = 1; XSRETURN_UNDEF; } cdb_make * cdb_new(CLASS, fn, fntemp) char * CLASS char * fn char * fntemp PREINIT: cdb_make *cdbmake; int i; CODE: New(0, cdbmake, 1, cdb_make); cdbmake->f = PerlIO_open(fntemp, "wb"); if (!cdbmake->f) XSRETURN_UNDEF; if (cdb_make_start(cdbmake) < 0) XSRETURN_UNDEF; /* Oh, for referential transparency. */ New(0, cdbmake->fn, strlen(fn) + 1, char); New(0, cdbmake->fntemp, strlen(fntemp) + 1, char); strncpy(cdbmake->fn, fn, strlen(fn) + 1); strncpy(cdbmake->fntemp, fntemp, strlen(fntemp) + 1); CLASS = "CDB_File::Maker"; /* OK, so this is a hack */ RETVAL = cdbmake; OUTPUT: RETVAL MODULE = CDB_File PACKAGE = CDB_File::Maker PREFIX = cdbmaker_ void cdbmaker_DESTROY(sv) SV * sv PREINIT: cdb_make * this; CODE: if (sv_isobject(sv) && (SvTYPE(SvRV(sv)) == SVt_PVMG) ) { this = (cdb_make*)SvIV(SvRV(sv)); Safefree(this); } void cdbmaker_insert(this, ...) cdb_make * this PREINIT: char *kp, *vp, packbuf[8]; int c, i, x; STRLEN klen, vlen; U32 h; SV *k; SV *v; PPCODE: for (x = 1; x < items; x += 2) { k = ST(x); v = ST(x+1); kp = SvPV(k, klen); vp = SvPV(v, vlen); uint32_pack(packbuf, klen); uint32_pack(packbuf + 4, vlen); if (PerlIO_write(this->f, packbuf, 8) < 8) writeerror(); h = cdb_hash(kp, klen); if (PerlIO_write(this->f, kp, klen) < klen) writeerror(); if (PerlIO_write(this->f, vp, vlen) < vlen) writeerror(); if (cdb_make_addend(this, klen, vlen, h) == -1) nomem(); } int cdbmaker_finish(this) cdb_make * this PREINIT: char buf[8]; int i; U32 len, u; U32 count, memsize, where; struct cdb_hplist *x, *prev; struct cdb_hp *hp; CODE: for (i = 0; i < 256; ++i) this->count[i] = 0; for (x = this->head; x; x = x->next) { i = x->num; while (i--) ++this->count[255 & x->hp[i].h]; } memsize = 1; for (i = 0; i < 256; ++i) { u = this->count[i] * 2; if (u > memsize) memsize = u; } memsize += this->numentries; /* no overflow possible up to now */ u = (U32) 0 - (U32) 1; u /= sizeof(struct cdb_hp); if (memsize > u) { errno = ENOMEM; XSRETURN_UNDEF; } New(0xCDB, this->split, memsize, struct cdb_hp); this->hash = this->split + this->numentries; u = 0; for (i = 0; i < 256; ++i) { u += this->count[i]; /* bounded by numentries, so no overflow */ this->start[i] = u; } prev = 0; for (x = this->head; x; x = x->next) { i = x->num; while (i--) this->split[--this->start[255 & x->hp[i].h]] = x->hp[i]; if (prev) Safefree(prev); prev = x; } if (prev) Safefree(prev); for (i = 0; i < 256; ++i) { count = this->count[i]; len = count + count; /* no overflow possible */ uint32_pack(this->final + 8 * i, this->pos); uint32_pack(this->final + 8 * i + 4, len); for (u = 0; u < len; ++u) this->hash[u].h = this->hash[u].p = 0; hp = this->split + this->start[i]; for (u = 0; u < count; ++u) { where = (hp->h >> 8) % len; while (this->hash[where].p) if (++where == len) where = 0; this->hash[where] = *hp++; } for (u = 0; u < len; ++u) { uint32_pack(buf, this->hash[u].h); uint32_pack(buf + 4, this->hash[u].p); if (PerlIO_write(this->f, buf, 8) == -1) XSRETURN_UNDEF; if (posplus(this, 8) == -1) XSRETURN_UNDEF; } } Safefree(this->split); if (PerlIO_flush(this->f) == EOF) writeerror(); PerlIO_rewind(this->f); if (PerlIO_write(this->f, this->final, sizeof this->final) < sizeof this->final) writeerror(); if (PerlIO_flush(this->f) == EOF) writeerror(); if (fsync(PerlIO_fileno(this->f)) == -1) XSRETURN_NO; if (PerlIO_close(this->f) == EOF) XSRETURN_NO; if (rename(this->fntemp, this->fn)) XSRETURN_NO; Safefree(this->fn); Safefree(this->fntemp); RETVAL = 1; OUTPUT: RETVAL CDB_File-0.97/CHANGES0000644000076500000240000000730611546652606013013 0ustar toddrstaffRevision history for Perl extension CDB_File. 0.97 - Todd Rinald 2011-04-05 - CPAN testers looks relativley clean. Publishing a stable version. 0.96_02 - Todd Rinald 2011-04-03 - Perl 5.6 doesn't warn about $x{undef}. Skip this test for 5.6 0.96_01 - Todd Rinald 2011-04-01 - Remove ppport.h from distro on reccommendation from ppport.h - Remove C99 style code from CDB_File to allow GCC 2.95 compilers to work - Modernize Makefile.PL and require Test::More - Update tests to use Test::More - Fix makefile - {'d_mmap'} isn't always defined - Cleanup temp files during testing - Untie before file removal so windows doesn't block the removal 0.96 - Fix for compiling under stricter compilers 0.95 - Support passing multiple key/value pairs to ->insert() for performance when building CDBs. 0.94 - Made work on perl 5.8.1 - Added some notes about performance 0.93 - Switched to allocating memory on the heap. I have no idea how the previous scheme worked at all. It should have fallen over everywhere. - Fixed a bug where you re-tie the same hash and CDB_File complains about calling NEXT before calling FIRST. 0.92 - Fixed major set of leaks in both memory and filehandles - Change of ownership to Matt Sergeant 0.91 2001-12-18 - fix memory leak (thanks MJP) - document and test that multi_get returns ref to empty hash - beta release 0.86 2001-05-25 - add handle, datalen, and datapos methods for low level access - simplify multi_get, and remove a memory leak - document need to destroy extra references - open files in binary mode (thanks IW) - use mmap() (thanks RDW) - beta release 0.85 2001-02-06 - multi_get now works during each (thanks MdlR) - move multi_get to CDB_File.xs, remove dumb O(n*n), and fix bug - don't make the database files read-only (thanks FL) - beta release 0.84 2000-11-21 - backwards compatibility with perl-5.005 (thanks BD) - EPROTO not available everywhere (thanks BD); EFTYPE preferred - beta release 0.83 2000-11-03 - fix stupid typo - beta release 0.82 2000-05-30 - fix bug in `each', introduced in 0.81 - beta release 0.81 2000-05-12 - port to perl 5.6.0 - cdb code derived from cdb-0.75 - cdb code incorporated into CDB_File.xs - multi_get works even for non-adjacent keys - fetching values in order from previously obtained keys array works - use perlapio 0.8 1999-09-08 - fix bug with undefined keys / values (thanks CMC, JPB) - beta release 0.7 1997-10-20 - use Perl's Strerror instead of strerror - fix bogus warning in multi_get (thanks MdlR) - fix bug with empty values (thanks RDM) - don't fail test 6 if run as root (thanks MP, JB) - alpha release 0.6 1997-03-25 - fix unsigned off_t bug - fix version number confusion - propagate Perl's idea of CC and LD to cdb (thanks IP, SB) - use safe cdb_bread() in preference to read() (thanks MdlR) - object is now a scalar again, containing struct cdbobj - support repeated keys (thanks MdlR) - split create into new, insert, finish - optimize FETCH and NEXTKEY - support building as a static extension - PERLIO_NOT_STDIO so it works with useperlio defined (thanks AK, NMS) - add multi_get method (thanks MdlR) - fix some core dumps (thanks MdlR) - make cdb object read only (thanks MdlR) - alpha release 0.5 1997-02-12 - fix order of @ISA, so imports work - alpha release 0.4 1997-02-06 - iteration (FIRSTKEY, NEXTKEY) added - "pre-alpha" release 0.3 1997-01-28 - no longer dependent on cdbmake - CDB_File::cdbm removed - temporary file name no longer optional - "pre-alpha" release 0.2 1997-01-14 - first "pre-alpha" release 0.1 1997-01-08 - original version; created by h2xs 1.16 CDB_File-0.97/COPYRIGHT0000644000076500000240000000020311545432375013276 0ustar toddrstaffThe files in this directory are Copyright 1997 - 2001 Tim Goodwin. You may redistribute them under the same terms as Perl itself. CDB_File-0.97/INSTALL0000644000076500000240000000120511545432375013037 0ustar toddrstaffYou need Perl 5.005 or later. 1. Create a Makefile. perl Makefile.PL 2. Build the CDB_File extension. make 3. Test it (please don't omit this step). make test You should see `ok 1' through to `ok 38'. If any tests fail, please get in touch so we can sort out the problem. 4. Install the extension. If you have built CDB_File as a dynamic extension, it's as simple as this. make install If you have built CDB_File as a static extension, follow the instructions given during the build process. 5. If you have any problems, questions, or ideas for future enhancements, please contact the author (see perldoc CDB_File). CDB_File-0.97/MANIFEST0000644000076500000240000000032011545432375013134 0ustar toddrstaffACKNOWLEDGE CDB_File.pm CDB_File.xs COPYRIGHT CHANGES INSTALL MANIFEST Makefile.PL README bun-x.pl t/01main.t t/02last.t typemap META.yml Module meta-data (added by MakeMaker) CDB_File-0.97/META.yml0000644000076500000240000000137211546652653013270 0ustar toddrstaff--- #YAML:1.0 name: CDB_File version: 0.97 abstract: Perl extension for access to cdb databases author: - Todd E Rinaldo license: perl distribution_type: module configure_requires: ExtUtils::MakeMaker: 0 build_requires: ExtUtils::MakeMaker: 0 Test::More: 0 requires: Test::More: 0 resources: homepage: http://wiki.github.com/toddr/CDB_File license: http://dev.perl.org/licenses/ repository: https://github.com/toddr/CDB_File no_index: directory: - t - inc generated_by: ExtUtils::MakeMaker version 6.56 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 recommends: {} CDB_File-0.97/Makefile.PL0000644000076500000240000000221211545432375013757 0ustar toddrstaffuse strict; use warnings; use Config; use ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'CDB_File', 'AUTHOR' => 'Todd E Rinaldo ', 'VERSION_FROM' => 'CDB_File.pm', 'ABSTRACT_FROM' => 'CDB_File.pm', 'PL_FILES' => {}, ($ExtUtils::MakeMaker::VERSION >= 6.3002 ? ('LICENSE' => 'perl', ) : ()), (($Config{'d_mmap'} && $Config{'d_mmap'} eq 'define') ? ('DEFINE' => '-DHASMMAP') : ()), PREREQ_PM => { 'Test::More' => 0, # For testing }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'CDB_File-*' }, META_MERGE => { recommends => { }, build_requires => { 'Test::More' => 0, # For testing }, resources => { license => 'http://dev.perl.org/licenses/', homepage => 'http://wiki.github.com/toddr/CDB_File', # bugtracker => 'https://rt.cpan.org/Dist/Display.html?Queue=CDB_File', repository => 'https://github.com/toddr/CDB_File', # MailingList => '', }, }, ); CDB_File-0.97/README0000644000076500000240000000052411545432375012671 0ustar toddrstaffREADME for CDB_File ------------------- See INSTALL for installation instructions. CDB_File is a module which provides a Perl interface to Dan Berstein's cdb package: cdb is a fast, reliable, lightweight package for creating and reading constant databases. See http://cr.yp.to/cdb.html for the latest information about cdb. CDB_File-0.97/bun-x.pl0000755000076500000240000000204611545432375013403 0ustar toddrstaff#! /usr/bin/perl use CDB_File; use strict; sub unnetstrings { my($netstrings) = @_; my @result; while ($netstrings =~ s/^([0-9]+)://) { push @result, substr($netstrings, 0, $1, ''); $netstrings =~ s/^,//; } return @result; } my $chunk = 8192; sub extract { my($file, $t, $b) = @_; my $head = $$b{"H$file"}; my ($code, $type) = $head =~ m/^([0-9]+)(.)/; if ($type eq "/") { mkdir $file, 0777; } elsif ($type eq "_") { my ($total, $now, $got, $x); open OUT, ">$file" or die "open for output: $!\n"; exists $$b{"D$code"} or die "corrupt bun file\n"; my $fh = $t->handle; sysseek $fh, $t->datapos, 0; $total = $t->datalen; while ($total) { $now = ($total > $chunk) ? $chunk : $total; $got = sysread $fh, $x, $now; if (not $got) { die "read error\n"; } $total -= $got; print OUT $x; } close OUT; } else { print STDERR "warning: skipping unknown file type\n"; } } die "usage\n" if @ARGV != 1; my (%b, $t); $t = tie %b, 'CDB_File', $ARGV[0] or die "tie: $!\n"; map { extract $_, $t, \%b } unnetstrings $b{""}; CDB_File-0.97/t/0000755000076500000240000000000011546652653012257 5ustar toddrstaffCDB_File-0.97/t/01main.t0000644000076500000240000001611111546136463013525 0ustar toddrstaffuse strict; use warnings; use Test::More tests => 128; use CDB_File; my $good_file_db = 'good.cdb'; my $good_file_temp = 'good.tmp'; my %h; ok(!(tie(%h, "CDB_File", 'nonesuch.cdb')), "Tie non-existant file"); open OUT, '> bad.cdb'; close OUT; ok((tie(%h, "CDB_File", 'bad.cdb')), "Load blank cdb file (invalid file, but loading it works)"); eval { print $h{'one'} }; like($@, qr/^Read of CDB_File failed:/, "Test that attempt to read incorrect file fails"); untie %h; cleanup_cdb('bad'); my %a = qw(one Hello two Goodbye); eval { CDB_File::create(%a, $good_file_db, $good_file_temp) or die "Failed to create cdb: $!" }; is("$@", '', "Create cdb"); # Test that good file works. tie(%h, "CDB_File", $good_file_db) and pass("Test that good file works"); my $t = tied %h; isa_ok($t, "CDB_File" ); is($t->FETCH('one'), 'Hello', "Test that good file FETCHes right results"); is($h{'one'}, 'Hello', "Test that good file hash access gets right results"); ok(!defined($h{'1'}), "Check defined() non-existant entry works"); ok(exists($h{'two'}), "Check exists() on a real entry works"); ok(!exists($h{'three'}), "Check exists() on non-existant entry works"); # Test low level access. my $fh = $t->handle; my $x; exists($h{'one'}); # go to this entry print "# Datapos: ", $t->datapos, ", Datalen: ", $t->datalen, "\n"; sysseek($fh, $t->datapos, 0); sysread($fh, $x, $t->datalen); is($x, 'Hello', "Check low level access read worked"); exists($h{'two'}); print "# Datapos: ", $t->datapos, ", Datalen: ", $t->datalen, "\n"; sysseek($fh, $t->datapos, 0); sysread($fh, $x, $t->datalen); is($x, 'Goodbye', "Check low level access read worked"); exists($h{'three'}); print "# Datapos: ", $t->datapos, ", Datalen: ", $t->datalen, "\n"; is($t->datapos, 0, "Low level access on no-exist entry"); is($t->datalen, 0, "Low level access on no-exist entry"); my @h = sort keys %h; is(scalar @h, 2, "keys length == 2"); is($h[0], 'one', "first key right"); is($h[1], 'two', "second key right"); eval { $h{'four'} = 'foo' }; like($@, qr/Modification of a CDB_File attempted/, "Check modifying throws exception"); eval { delete $h{'five'} }; like($@, qr/Modification of a CDB_File attempted/, "Check modifying throws exception"); close $fh; # Duped file handle must be closed. undef $t; untie %h; # Release the tie so the file closes and we can remove it. cleanup_cdb('good'); # Test empty file. %a = (); eval { CDB_File::create(%a, 'empty.cdb', 'empty.tmp') || die "CDB create failed" }; is(!$@, 1, "No errors creating cdb"); ok((tie(%h, "CDB_File", 'empty.cdb')), "Tie new empty cdb"); @h = keys %h; is(scalar @h, 0, "Empty cdb has no keys"); untie %h; cleanup_cdb('empty'); # Test failing new. ok(!CDB_File->new('..', '.'), "Creating cdb with dirs fails"); # Test file with repeated keys. my $tmp = 'repeat.tmp'; my $cdbm = CDB_File->new('repeat.cdb', $tmp); isa_ok($cdbm, 'CDB_File::Maker'); $cdbm->insert('dog', 'perro'); $cdbm->insert('cat', 'gato'); $cdbm->insert('cat', 'chat'); $cdbm->insert('dog', 'chien'); $cdbm->insert('rabbit', 'conejo'); $tmp = 'ERROR!'; # Test that name was stashed correctly. $cdbm->finish; undef $cdbm; $t = tie %h, "CDB_File", 'repeat.cdb'; isa_ok($t, 'CDB_File'); eval { $t->NEXTKEY('dog') }; # ok($@, qr/^Use CDB_File::FIRSTKEY before CDB_File::NEXTKEY/, "Test that NEXTKEY can't be used immediately after TIEHASH"); is($@, '', "Test that NEXTKEY can be used immediately after TIEHASH"); # Check keys/values works my @k = keys %h; my @v = values %h; is($k[0], 'dog'); is($v[0], 'perro'); is($k[1], 'cat'); is($v[1], 'gato'); is($k[2], 'cat'); is($v[2], 'chat'); is($k[3], 'dog'); is($v[3], 'chien'); is($k[4], 'rabbit'); is($v[4], 'conejo'); @k = (); @v = (); # Check each works while (my ($k, $v) = each %h) { push @k, $k; push @v, $v; } is($k[0], 'dog'); is($v[0], 'perro'); is($k[1], 'cat'); is($v[1], 'gato'); is($k[2], 'cat'); is($v[2], 'chat'); is($k[3], 'dog'); is($v[3], 'chien'); is($k[4], 'rabbit'); is($v[4], 'conejo'); my $v = $t->multi_get('cat'); is(@$v, 2, "multi_get returned 2 entries"); is($v->[0], 'gato'); is($v->[1], 'chat'); $v = $t->multi_get('dog'); is(@$v, 2, "multi_get returned 2 entries"); is($v->[0], 'perro'); is($v->[1], 'chien'); $v = $t->multi_get('rabbit'); is(@$v, 1, "multi_get returned 1 entry"); is($v->[0], 'conejo'); $v = $t->multi_get('foo'); is(ref($v), 'ARRAY', "multi_get on non-existant entry works"); is(@$v, 0); while (my ($k, $v) = each %h) { $v = $t->multi_get($k); ok($v->[0] eq 'gato' and $v->[1] eq 'chat') if $k eq 'cat'; ok($v->[0] eq 'perro' and $v->[1] eq 'chien') if $k eq 'dog'; ok($v->[0] eq 'conejo') if $k eq 'rabbit'; } # Test undefined keys. { my $warned = 0; local $SIG{__WARN__} = sub { $warned = 1 if $_[0] =~ /^Use of uninitialized value/ }; local $^W = 1; my $x; ok(! defined $h{$x}); SKIP: { skip 'Perl 5.6 does not warn about $x{undef}', 1 unless $] > 5.007; ok($warned); } $warned = 0; ok(!exists $h{$x}); SKIP: { skip 'Perl 5.6 does not warn about $x{undef}', 1 unless $] > 5.007; ok($warned); } $warned = 0; my $v = $t->multi_get('rabbit'); ok($v); ok(! $warned); } # Check that object is readonly. eval { $$t = 'foo' }; like($@, qr/^Modification of a read-only value/, "Check object (\$t) is read only"); is($h{'cat'}, 'gato'); undef $t; untie %h; cleanup_cdb('repeat'); # Regression test - dumps core in 0.6. %a = ('one', ''); ok((CDB_File::create(%a, $good_file_db, $good_file_temp)), "Create good.cdb"); ok((tie(%h, "CDB_File", $good_file_db)), "Tie good.cdb"); ok(!exists $h{'zero'}, "missing key test"); ok(defined($h{'one'}), "one is found and defined"); is($h{'one'}, '', "one is empty"); untie %h; # Release the tie so the file closes and we can remove it. cleanup_cdb('good'); # Test numeric data (broken before 0.8) my $h = CDB_File->new('t.cdb', 't.tmp'); isa_ok($h, 'CDB_File::Maker'); $h->insert(1, 1 * 23); ok($h->finish); ok(tie(%h, "CDB_File", 't.cdb')); is($h{1}, 23, "Numeric comparison works"); untie %h; cleanup_cdb('t'); # Test zero value with multi_get (broken before 0.85) $h = CDB_File->new('t.cdb', 't.tmp'); isa_ok($h, 'CDB_File::Maker'); $h->insert('x', 0); $h->insert('x', 1); ok($h->finish); $t = tie(%h, "CDB_File", 't.cdb'); isa_ok($t, 'CDB_File'); $x = $t->multi_get('x'); is(@$x, 2); is($x->[0], 0); is($x->[1], 1); undef $t; untie %h; cleanup_cdb('t'); $h = CDB_File->new('t.cdb', 't.tmp'); isa_ok($h, 'CDB_File::Maker'); for (my $i = 0; $i < 10; ++$i) { $h->insert($i, $i); } ok($h->finish); undef $h; $t = tie(%h, "CDB_File", 't.cdb'); isa_ok($t, 'CDB_File'); for (my $i = 0; $i < 10; ++$i) { my ($k, $v) = each %h; if ($k == 2) { ok(exists($h{4})); } if ($k == 5) { ok(!exists($h{23})); } if ($k == 7) { my $m = $t->multi_get(3); is(@$m, 1); is($m->[0], 3); } is($k, $i, "$k eq $i"); is($v, $i, "$v eq $i"); } undef $t; untie %h; cleanup_cdb('t'); sub cleanup_cdb { my $file = shift; local $Test::Builder::Level = $Test::Builder::Level + 1; unlink "$file.cdb", "$file.tmp"; ok(!-e $_, "Remove $_") foreach("$file.cdb", "$file.tmp"); } CDB_File-0.97/t/02last.t0000644000076500000240000000135311545432375013547 0ustar toddrstaffuse strict; use warnings; use Test::More tests => 44; use CDB_File; my $c = CDB_File->new('last.cdb', 'last.tmp'); isa_ok($c, 'CDB_File::Maker'); for (1..10) { $c->insert("Key$_" => "Val$_"); } is($c->finish, 1, "Finish writes out"); my %h; tie(%h, "CDB_File", "last.cdb"); isa_ok(tied(%h), 'CDB_File'); my $count = 0; foreach my $k (keys %h) { $k =~ m/^Key(\d+)$/ or die; my $n = $1; ok($n <= 10 && $n > 0, "Expected key ($n) is found") or diag($k); is($h{$k}, "Val$n", "Val$n matches"); } tie(%h, "CDB_File", "last.cdb"); isa_ok(tied(%h), 'CDB_File'); while (my ($k, $v) = each(%h)) { ok($k, "verify k in re-tied hash ($k)"); ok($v, "verify v in re-tied hash ($v)"); } END { unlink 'last.cdb' } CDB_File-0.97/typemap0000644000076500000240000000130311545432375013407 0ustar toddrstaffTYPEMAP cdb * O_OBJECT cdb_make * O_OBJECT ###################################################################### OUTPUT # The Perl object is blessed into 'CLASS', which should be a # char* having the name of the package for the blessing. O_OBJECT sv_setref_pv( $arg, CLASS, (void*)$var ); SvREADONLY_on( SvRV( $arg ) ); ###################################################################### INPUT O_OBJECT if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) $var = ($type)SvIV((SV*)SvRV( $arg )); else{ warn( \"${Package}::$func_name() -- $var is not a blessed SV reference\" ); XSRETURN_UNDEF; }